src/amd/addrlib/core/addrelemlib.cpp

   1 /*
   2  * Copyright © 2014 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ***************************************************************************************************
  29 * @file  addrelemlib.cpp
  30 * @brief Contains the class implementation for element/pixel related functions
  31 ***************************************************************************************************
  32 */
  33
  34 #include "addrelemlib.h"
  35 #include "addrlib.h"
  36
  37
  38 /**
  39 ***************************************************************************************************
  40 *   AddrElemLib::AddrElemLib
  41 *
  42 *   @brief
  43 *       constructor
  44 *
  45 *   @return
  46 *       N/A
  47 ***************************************************************************************************
  48 */
  49 AddrElemLib::AddrElemLib(
  50     AddrLib* const pAddrLib) :  ///< [in] Parent addrlib instance pointer
  51     AddrObject(pAddrLib->GetClient()),
  52     m_pAddrLib(pAddrLib)
  53 {
  54     switch (m_pAddrLib->GetAddrChipFamily())
  55     {
  56         case ADDR_CHIP_FAMILY_R6XX:
  57             m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
  58             m_fp16ExportNorm = 0;
  59             break;
  60         case ADDR_CHIP_FAMILY_R7XX:
  61             m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
  62             m_fp16ExportNorm = 1;
  63             break;
  64         case ADDR_CHIP_FAMILY_R8XX:
  65         case ADDR_CHIP_FAMILY_NI: // Same as 8xx
  66             m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
  67             m_fp16ExportNorm = 1;
  68             break;
  69         default:
  70             m_fp16ExportNorm = 1;
  71             m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
  72     }
  73
  74     m_configFlags.value = 0;
  75 }
  76
  77 /**
  78 ***************************************************************************************************
  79 *   AddrElemLib::~AddrElemLib
  80 *
  81 *   @brief
  82 *       destructor
  83 *
  84 *   @return
  85 *       N/A
  86 ***************************************************************************************************
  87 */
  88 AddrElemLib::~AddrElemLib()
  89 {
  90 }
  91
  92 /**
  93 ***************************************************************************************************
  94 *   AddrElemLib::Create
  95 *
  96 *   @brief
  97 *       Creates and initializes AddrLib object.
  98 *
  99 *   @return
 100 *       Returns point to ADDR_CREATEINFO if successful.
 101 ***************************************************************************************************
 102 */
 103 AddrElemLib* AddrElemLib::Create(
 104     const AddrLib* const        pAddrLib)   ///< [in] Pointer of parent AddrLib instance
 105 {
 106     AddrElemLib* pElemLib = NULL;
 107
 108     if (pAddrLib)
 109     {
 110         pElemLib = new(pAddrLib->GetClient()) AddrElemLib(const_cast<AddrLib* const>(pAddrLib));
 111     }
 112
 113     return pElemLib;
 114 }
 115
 116 /**************************************************************************************************
 117 *   AddrElemLib::Flt32sToInt32s
 118 *
 119 *   @brief
 120 *       Convert a ADDR_FLT_32 value to Int32 value
 121 *
 122 *   @return
 123 *       N/A
 124 ***************************************************************************************************
 125 */
 126 VOID AddrElemLib::Flt32sToInt32s(
 127     ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
 128     UINT_32         bits,       ///< [in] nubmer of bits in value
 129     AddrNumberType  numberType, ///< [in] the type of number
 130     UINT_32*        pResult)    ///< [out] Int32 value
 131 {
 132     UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
 133     UINT_32 uscale;
 134     UINT_32 sign;
 135
 136     //convert each component to an INT_32
 137     switch ( numberType )
 138     {
 139         case ADDR_NO_NUMBER:    //fall through
 140         case ADDR_ZERO:         //fall through
 141         case ADDR_ONE:          //fall through
 142         case ADDR_EPSILON:      //fall through
 143             return;        // these are zero-bit components, so don't set result
 144
 145         case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
 146             uscale = (1<<bits) - 1;
 147             if (bits == 32)               // special case unsigned 32-bit int
 148             {
 149                 *pResult = value.i;
 150             }
 151             else
 152             {
 153                 if ((value.i < 0) || (value.u > uscale))
 154                 {
 155                     *pResult = uscale;
 156                 }
 157                 else
 158                 {
 159                     *pResult = value.i;
 160                 }
 161                 return;
 162             }
 163
 164         // The algorithm used in the DB and TX differs at one value for 24-bit unorms
 165         case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
 166             if ((bits==24) && (value.i == 0x33000000))
 167             {
 168                 *pResult = 1;
 169                 return;
 170             }              // Else treat like ADDR_UNORM_R6XX
 171
 172         case ADDR_UNORM_R6XX:            // unsigned repeating fraction
 173             if (value.f <= 0)
 174             {
 175                 *pResult = 0;            // first clamp to [0..1]
 176             }
 177             else
 178             {
 179                 if (value.f >= 1)
 180                 {
 181                      *pResult = (1<<bits) - 1;
 182                 }
 183                 else
 184                 {
 185                     if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
 186                     {
 187                         *pResult = 0;                        // NaN, so force to 0
 188                     }
 189
 190                     #if 0 // floating point version for documentation
 191                     else
 192                     {
 193                         FLOAT f = value.f * ((1<<bits) - 1);
 194                         *pResult = static_cast<INT_32>(f + (round/256.0f));
 195                     }
 196                     #endif
 197                     else
 198                     {
 199                         ADDR_FLT_32 scaled;
 200                         ADDR_FLT_32 shifted;
 201                         UINT_64 truncated, rounded;
 202                         UINT_32 altShift;
 203                         UINT_32 mask = (1 << bits) - 1;
 204                         UINT_32 half = 1 << (bits - 1);
 205                         UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
 206                         UINT_64 temp = mant24 - (mant24>>bits) -
 207                             static_cast<INT_32>((mant24 & mask) > half);
 208                         UINT_32 exp8 = value.i >> 23;
 209                         UINT_32 shift = 126 - exp8 + 24 - bits;
 210                         UINT_64 final;
 211
 212                         if (shift >= 32) // This is zero, even with maximum dither add
 213                         {
 214                             final = 0;
 215                         }
 216                         else
 217                         {
 218                             final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
 219                         }
 220                         //ADDR_EXIT( *pResult == final,
 221                         //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
 222                         //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
 223                         if (final > mask)
 224                         {
 225                             final = mask;
 226                         }
 227
 228                         scaled.f  = value.f * ((1<<bits) - 1);
 229                         shifted.f = (scaled.f * 256);
 230                         truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
 231                         altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
 232                         truncated = (altShift > 60) ? 0 : truncated >> altShift;
 233                         rounded   = static_cast<INT_32>((round + truncated) >> 8);
 234                         //if (rounded > ((1<<bits) - 1))
 235                         //    rounded = ((1<<bits) - 1);
 236                         *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
 237                     }
 238                 }
 239             }
 240
 241             return;
 242
 243         case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
 244             *pResult = value.i;
 245             return;
 246
 247         // @@ FIX ROUNDING in this code, fix the denorm case
 248         case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
 249             sign = (value.i >> 31) & 1;
 250             if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
 251             {
 252                 if ((value.i&0x007FFFFF) != 0)             // then if NaN
 253                 {
 254                     *pResult = 0;                       // return 0
 255                 }
 256                 else
 257                 {
 258                     *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
 259                 }
 260                 return;
 261             }
 262             if (value.f <= 0)
 263             {
 264                 *pResult = 0;
 265             }
 266             else
 267             {
 268                 if (value.f>=1)
 269                 {
 270                     *pResult = 0xF << (bits-4);
 271                 }
 272                 else
 273                 {
 274                     if ((value.i>>23) > 112 )
 275                     {
 276                         // 24-bit float: normalized
 277                         // value.i += 1 << (22-bits+4);
 278                         // round the IEEE mantissa to mantissa size
 279                         // @@ NOTE: add code to support rounding
 280                         value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
 281                         *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
 282                     }
 283                     else
 284                     {
 285                         // 24-bit float: denormalized
 286                         value.f = value.f / (1<<28) / (1<<28);
 287                         value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
 288                         // value.i += 1 << (22-bits+4);
 289                         // round the IEEE mantissa to mantissa size
 290                         // @@ NOTE: add code to support rounding
 291                         *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
 292                     }
 293                 }
 294             }
 295
 296             return;
 297
 298         default:                    // invalid number mode
 299             //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
 300             break;
 301
 302     }
 303 }
 304
 305 /**
 306 ***************************************************************************************************
 307 *   AddrElemLib::Int32sToPixel
 308 *
 309 *   @brief
 310 *       Pack 32-bit integer values into an uncompressed pixel,
 311 *       in the proper order
 312 *
 313 *   @return
 314 *       N/A
 315 *
 316 *   @note
 317 *       This entry point packes four 32-bit integer values into
 318 *       an uncompressed pixel. The pixel values are specifies in
 319 *       standard order, e.g. depth/stencil. This routine asserts
 320 *       if called on compressed pixel.
 321 ***************************************************************************************************
 322 */
 323 VOID AddrElemLib::Int32sToPixel(
 324     UINT_32              numComps,      ///< [in] number of components
 325     UINT_32*             pComps,        ///< [in] compnents
 326     UINT_32*             pCompBits,     ///< [in] total bits in each component
 327     UINT_32*             pCompStart,    ///< [in] the first bit position of each component
 328     ADDR_COMPONENT_FLAGS properties,    ///< [in] properties about byteAligned, exportNorm
 329     UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
 330     UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
 331 {
 332     UINT_32 i;
 333     UINT_32 j;
 334     UINT_32 start;
 335     UINT_32 size;
 336     UINT_32 byte;
 337     UINT_32 value = 0;
 338     UINT_32 compMask;
 339     UINT_32 elemMask=0;
 340     UINT_32 elementXor = 0;  // address xor when reading bytes from elements
 341
 342
 343     // @@ NOTE: assert if called on a compressed format!
 344
 345     if (properties.byteAligned)    // Components are all byte-sized
 346     {
 347         for (i = 0; i < numComps; i++)        // Then for each component
 348         {
 349             // Copy the bytes of the component into the element
 350             start = pCompStart[i] / 8;
 351             size  = pCompBits[i]  / 8;
 352             for (j = 0; j < size; j++)
 353             {
 354                 pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
 355             }
 356         }
 357     }
 358     else                        // Element is 32-bits or less, components are bit fields
 359     {
 360         // First, extract each component in turn and combine it into a 32-bit value
 361         for (i = 0; i < numComps; i++)
 362         {
 363             compMask = (1 << pCompBits[i]) - 1;
 364             elemMask |= compMask << pCompStart[i];
 365             value |= (pComps[i] & compMask) << pCompStart[i];
 366         }
 367
 368         // Mext, copy the masked value into the element
 369         size = (resultBits + 7) / 8;
 370         for (i = 0; i < size; i++)
 371         {
 372             byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
 373             pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
 374         }
 375     }
 376 }
 377
 378 /**
 379 ***************************************************************************************************
 380 *   Flt32ToDepthPixel
 381 *
 382 *   @brief
 383 *       Convert a FLT_32 value to a depth/stencil pixel value
 384 *
 385 *   @return
 386 *       N/A
 387 ***************************************************************************************************
 388 */
 389 VOID AddrElemLib::Flt32ToDepthPixel(
 390     AddrDepthFormat     format,     ///< [in] Depth format
 391     const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
 392     UINT_8*             pPixel      ///< [out] depth pixel value
 393     ) const
 394 {
 395     UINT_32 i;
 396     UINT_32 values[2];
 397     ADDR_COMPONENT_FLAGS properties;    // byteAligned, exportNorm
 398     UINT_32 resultBits = 0;             // result bits: total bits per pixel after decompression
 399
 400     ADDR_PIXEL_FORMATINFO fmt;
 401
 402     // get type for each component
 403     PixGetDepthCompInfo(format, &fmt);
 404
 405     //initialize properties
 406     properties.byteAligned = TRUE;
 407     properties.exportNorm  = TRUE;
 408     properties.floatComp   = FALSE;
 409
 410     //set properties and result bits
 411     for (i = 0; i < 2; i++)
 412     {
 413         if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
 414         {
 415             properties.byteAligned = FALSE;
 416         }
 417
 418         if (resultBits < fmt.compStart[i] + fmt.compBit[i])
 419         {
 420             resultBits = fmt.compStart[i] + fmt.compBit[i];
 421         }
 422
 423         // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
 424         if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
 425         {
 426             properties.exportNorm = FALSE;
 427         }
 428
 429         // Mark if there are any floating point components
 430         if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
 431         {
 432             properties.floatComp = TRUE;
 433         }
 434     }
 435
 436     // Convert the two input floats to integer values
 437     for (i = 0; i < 2; i++)
 438     {
 439         Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
 440     }
 441
 442     // Then pack the two integer components, in the proper order
 443     Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
 444
 445 }
 446
 447 /**
 448 ***************************************************************************************************
 449 *   Flt32ToColorPixel
 450 *
 451 *   @brief
 452 *       Convert a FLT_32 value to a red/green/blue/alpha pixel value
 453 *
 454 *   @return
 455 *       N/A
 456 ***************************************************************************************************
 457 */
 458 VOID AddrElemLib::Flt32ToColorPixel(
 459     AddrColorFormat     format,     ///< [in] Color format
 460     AddrSurfaceNumber   surfNum,    ///< [in] Surface number
 461     AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
 462     const ADDR_FLT_32   comps[4],   ///< [in] four components of color
 463     UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
 464     ) const
 465 {
 466     ADDR_PIXEL_FORMATINFO pixelInfo;
 467
 468     UINT_32 i;
 469     UINT_32 values[4];
 470     ADDR_COMPONENT_FLAGS properties;    // byteAligned, exportNorm
 471     UINT_32 resultBits = 0;             // result bits: total bits per pixel after decompression
 472
 473     memset(&pixelInfo, 0, sizeof(ADDR_PIXEL_FORMATINFO));
 474
 475     PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
 476
 477     //initialize properties
 478     properties.byteAligned = TRUE;
 479     properties.exportNorm  = TRUE;
 480     properties.floatComp   = FALSE;
 481
 482     //set properties and result bits
 483     for (i = 0; i < 4; i++)
 484     {
 485         if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
 486         {
 487             properties.byteAligned = FALSE;
 488         }
 489
 490         if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
 491         {
 492             resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
 493         }
 494
 495         if (m_fp16ExportNorm)
 496         {
 497             // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
 498             // or if it's not FP and <=16 bits
 499             if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
 500                 && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
 501             {
 502                 properties.exportNorm = FALSE;
 503             }
 504         }
 505         else
 506         {
 507             // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
 508             if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
 509             {
 510                 properties.exportNorm = FALSE;
 511             }
 512         }
 513
 514         // Mark if there are any floating point components
 515         if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
 516              (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
 517         {
 518             properties.floatComp = TRUE;
 519         }
 520     }
 521
 522     // Convert the four input floats to integer values
 523     for (i = 0; i < 4; i++)
 524     {
 525         Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
 526     }
 527
 528     // Then pack the four integer components, in the proper order
 529     Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
 530                   properties, resultBits, pPixel);
 531 }
 532
 533 /**
 534 ***************************************************************************************************
 535 *   AddrElemLib::GetCompType
 536 *
 537 *   @brief
 538 *       Fill per component info
 539 *
 540 *   @return
 541 *       N/A
 542 *
 543 ***************************************************************************************************
 544 */
 545 VOID AddrElemLib::GetCompType(
 546     AddrColorFormat         format,     ///< [in] surface format
 547     AddrSurfaceNumber       numType,  ///< [in] number type
 548     ADDR_PIXEL_FORMATINFO*  pInfo)       ///< [in][out] per component info out
 549 {
 550     BOOL_32 handled = FALSE;
 551
 552     // Floating point formats override the number format
 553     switch (format)
 554     {
 555         case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
 556         case ADDR_COLOR_16_16_FLOAT:
 557         case ADDR_COLOR_16_16_16_16_FLOAT:
 558         case ADDR_COLOR_32_FLOAT:
 559         case ADDR_COLOR_32_32_FLOAT:
 560         case ADDR_COLOR_32_32_32_32_FLOAT:
 561         case ADDR_COLOR_10_11_11_FLOAT:
 562         case ADDR_COLOR_11_11_10_FLOAT:
 563             numType = ADDR_NUMBER_FLOAT;
 564             break;
 565             // Special handling for the depth formats
 566         case ADDR_COLOR_8_24:                // fall through for these 2 similar format
 567         case ADDR_COLOR_24_8:
 568             for (UINT_32 c = 0; c < 4; c++)
 569             {
 570                 if (pInfo->compBit[c] == 8)
 571                 {
 572                     pInfo->numType[c] = ADDR_UINT_BITS;
 573                 }
 574                 else if (pInfo->compBit[c]  == 24)
 575                 {
 576                     pInfo->numType[c] = ADDR_UNORM_R6XX;
 577                 }
 578                 else
 579                 {
 580                     pInfo->numType[c] = ADDR_NO_NUMBER;
 581                 }
 582             }
 583             handled = TRUE;
 584             break;
 585         case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
 586         case ADDR_COLOR_24_8_FLOAT:
 587         case ADDR_COLOR_X24_8_32_FLOAT:
 588             for (UINT_32 c = 0; c < 4; c++)
 589             {
 590                 if (pInfo->compBit[c] == 8)
 591                 {
 592                     pInfo->numType[c] = ADDR_UINT_BITS;
 593                 }
 594                 else if (pInfo->compBit[c] == 24)
 595                 {
 596                     pInfo->numType[c] = ADDR_U4FLOATC;
 597                 }
 598                 else if (pInfo->compBit[c] == 32)
 599                 {
 600                     pInfo->numType[c] = ADDR_S8FLOAT32;
 601                 }
 602                 else
 603                 {
 604                     pInfo->numType[c] = ADDR_NO_NUMBER;
 605                 }
 606             }
 607             handled = TRUE;
 608             break;
 609         default:
 610             break;
 611     }
 612
 613     if (!handled)
 614     {
 615         for (UINT_32 c = 0; c < 4; c++)
 616         {
 617             // Assign a number type for each component
 618             AddrSurfaceNumber cnum;
 619
 620             // First handle default component values
 621             if (pInfo->compBit[c] == 0)
 622             {
 623                 if (c < 3)
 624                 {
 625                     pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
 626                 }
 627                 else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
 628                 {
 629                     pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
 630                 }
 631                 else
 632                 {
 633                     pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
 634                 }
 635                 continue;
 636             }
 637             // Now handle small components
 638             else if (pInfo->compBit[c] == 1)
 639             {
 640                 if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
 641                 {
 642                     cnum = ADDR_NUMBER_UINT;
 643                 }
 644                 else
 645                 {
 646                     cnum = ADDR_NUMBER_UNORM;
 647                 }
 648             }
 649             else
 650             {
 651                 cnum = numType;
 652             }
 653
 654             // If no default, set the number type fom num, compbits, and architecture
 655             switch (cnum)
 656             {
 657                 case ADDR_NUMBER_SRGB:
 658                     pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
 659                     break;
 660                 case ADDR_NUMBER_UNORM:
 661                     pInfo->numType[c] = ADDR_UNORM_R6XX;
 662                     break;
 663                 case ADDR_NUMBER_SNORM:
 664                     pInfo->numType[c] = ADDR_SNORM_R6XX;
 665                     break;
 666                 case ADDR_NUMBER_USCALED:
 667                     pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
 668                     break;
 669                 case ADDR_NUMBER_SSCALED:
 670                     pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
 671                     break;
 672                 case ADDR_NUMBER_FLOAT:
 673                     if (pInfo->compBit[c] == 32)
 674                     {
 675                         pInfo->numType[c] = ADDR_S8FLOAT32;
 676                     }
 677                     else if (pInfo->compBit[c] == 16)
 678                     {
 679                         pInfo->numType[c] = ADDR_S5FLOAT;
 680                     }
 681                     else if (pInfo->compBit[c] >= 10)
 682                     {
 683                         pInfo->numType[c] = ADDR_U5FLOAT;
 684                     }
 685                     else
 686                     {
 687                         ADDR_ASSERT_ALWAYS();
 688                     }
 689                     break;
 690                 case ADDR_NUMBER_SINT:
 691                     pInfo->numType[c] = ADDR_SINT_BITS;
 692                     break;
 693                 case ADDR_NUMBER_UINT:
 694                     pInfo->numType[c] = ADDR_UINT_BITS;
 695                     break;
 696
 697                 default:
 698                     ADDR_ASSERT(!"Invalid number type");
 699                     pInfo->numType[c] = ADDR_NO_NUMBER;
 700                     break;
 701              }
 702         }
 703     }
 704 }
 705
 706 /**
 707 ***************************************************************************************************
 708 *   AddrElemLib::GetCompSwap
 709 *
 710 *   @brief
 711 *       Get components swapped for color surface
 712 *
 713 *   @return
 714 *       N/A
 715 *
 716 ***************************************************************************************************
 717 */
 718 VOID AddrElemLib::GetCompSwap(
 719     AddrSurfaceSwap         swap,   ///< [in] swap mode
 720     ADDR_PIXEL_FORMATINFO*  pInfo)  ///< [in/out] output per component info
 721 {
 722     switch (pInfo->comps)
 723     {
 724         case 4:
 725             switch (swap)
 726             {
 727                 case ADDR_SWAP_ALT:
 728                     SwapComps( 0, 2, pInfo );
 729                     break;    // BGRA
 730                 case ADDR_SWAP_STD_REV:
 731                     SwapComps( 0, 3, pInfo );
 732                     SwapComps( 1, 2, pInfo );
 733                     break;    // ABGR
 734                 case ADDR_SWAP_ALT_REV:
 735                     SwapComps( 0, 3, pInfo );
 736                     SwapComps( 0, 2, pInfo );
 737                     SwapComps( 0, 1, pInfo );
 738                     break;    // ARGB
 739                 default:
 740                     break;
 741             }
 742             break;
 743         case 3:
 744             switch (swap)
 745             {
 746                 case ADDR_SWAP_ALT_REV:
 747                     SwapComps( 0, 3, pInfo );
 748                     SwapComps( 0, 2, pInfo );
 749                     break;    // AGR
 750                 case ADDR_SWAP_STD_REV:
 751                     SwapComps( 0, 2, pInfo );
 752                     break;    // BGR
 753                 case ADDR_SWAP_ALT:
 754                     SwapComps( 2, 3, pInfo );
 755                     break;    // RGA
 756                 default:
 757                     break;    // RGB
 758             }
 759             break;
 760         case 2:
 761             switch (swap)
 762             {
 763                 case ADDR_SWAP_ALT_REV:
 764                     SwapComps( 0, 1, pInfo );
 765                     SwapComps( 1, 3, pInfo );
 766                     break;    // AR
 767                 case ADDR_SWAP_STD_REV:
 768                     SwapComps( 0, 1, pInfo );
 769                     break;    // GR
 770                 case ADDR_SWAP_ALT:
 771                     SwapComps( 1, 3, pInfo );
 772                     break;    // RA
 773                 default:
 774                     break;    // RG
 775             }
 776             break;
 777         case 1:
 778             switch (swap)
 779             {
 780                 case ADDR_SWAP_ALT_REV:
 781                     SwapComps( 0, 3, pInfo );
 782                     break;    // A
 783                 case ADDR_SWAP_STD_REV:
 784                     SwapComps( 0, 2, pInfo );
 785                     break;    // B
 786                 case ADDR_SWAP_ALT:
 787                     SwapComps( 0, 1, pInfo );
 788                     break;    // G
 789                 default:
 790                     break;    // R
 791             }
 792             break;
 793     }
 794 }
 795
 796 /**
 797 ***************************************************************************************************
 798 *   AddrElemLib::GetCompSwap
 799 *
 800 *   @brief
 801 *       Get components swapped for color surface
 802 *
 803 *   @return
 804 *       N/A
 805 *
 806 ***************************************************************************************************
 807 */
 808 VOID AddrElemLib::SwapComps(
 809     UINT_32                 c0,     ///< [in] component index 0
 810     UINT_32                 c1,     ///< [in] component index 1
 811     ADDR_PIXEL_FORMATINFO*  pInfo)  ///< [in/out] output per component info
 812 {
 813     UINT_32 start;
 814     UINT_32 bits;
 815
 816     start = pInfo->compStart[c0];
 817     pInfo->compStart[c0] = pInfo->compStart[c1];
 818     pInfo->compStart[c1] = start;
 819
 820     bits  = pInfo->compBit[c0];
 821     pInfo->compBit[c0] = pInfo->compBit[c1];
 822     pInfo->compBit[c1] = bits;
 823 }
 824
 825 /**
 826 ***************************************************************************************************
 827 *   AddrElemLib::PixGetColorCompInfo
 828 *
 829 *   @brief
 830 *       Get per component info for color surface
 831 *
 832 *   @return
 833 *       N/A
 834 *
 835 ***************************************************************************************************
 836 */
 837 VOID AddrElemLib::PixGetColorCompInfo(
 838     AddrColorFormat         format, ///< [in] surface format, read from register
 839     AddrSurfaceNumber       number, ///< [in] pixel number type
 840     AddrSurfaceSwap         swap,   ///< [in] component swap mode
 841     ADDR_PIXEL_FORMATINFO*  pInfo   ///< [out] output per component info
 842     ) const
 843 {
 844     // 1. Get componet bits
 845     switch (format)
 846     {
 847         case ADDR_COLOR_8:
 848             GetCompBits(8, 0, 0, 0, pInfo);
 849             break;
 850         case ADDR_COLOR_1_5_5_5:
 851             GetCompBits(5, 5, 5, 1, pInfo);
 852             break;
 853         case ADDR_COLOR_5_6_5:
 854             GetCompBits(8, 6, 5, 0, pInfo);
 855             break;
 856         case ADDR_COLOR_6_5_5:
 857             GetCompBits(5, 5, 6, 0, pInfo);
 858             break;
 859         case ADDR_COLOR_8_8:
 860             GetCompBits(8, 8, 0, 0, pInfo);
 861             break;
 862         case ADDR_COLOR_4_4_4_4:
 863             GetCompBits(4, 4, 4, 4, pInfo);
 864             break;
 865         case ADDR_COLOR_16:
 866             GetCompBits(16, 0, 0, 0, pInfo);
 867             break;
 868         case ADDR_COLOR_8_8_8_8:
 869             GetCompBits(8, 8, 8, 8, pInfo);
 870             break;
 871         case ADDR_COLOR_2_10_10_10:
 872             GetCompBits(10, 10, 10, 2, pInfo);
 873             break;
 874         case ADDR_COLOR_10_11_11:
 875             GetCompBits(11, 11, 10, 0, pInfo);
 876             break;
 877         case ADDR_COLOR_11_11_10:
 878             GetCompBits(10, 11, 11, 0, pInfo);
 879             break;
 880         case ADDR_COLOR_16_16:
 881             GetCompBits(16, 16, 0, 0, pInfo);
 882             break;
 883         case ADDR_COLOR_16_16_16_16:
 884             GetCompBits(16, 16, 16, 16, pInfo);
 885             break;
 886         case ADDR_COLOR_16_FLOAT:
 887             GetCompBits(16, 0, 0, 0, pInfo);
 888             break;
 889         case ADDR_COLOR_16_16_FLOAT:
 890             GetCompBits(16, 16, 0, 0, pInfo);
 891             break;
 892         case ADDR_COLOR_32_FLOAT:
 893             GetCompBits(32, 0, 0, 0, pInfo);
 894             break;
 895         case ADDR_COLOR_32_32_FLOAT:
 896             GetCompBits(32, 32, 0, 0, pInfo);
 897             break;
 898         case ADDR_COLOR_16_16_16_16_FLOAT:
 899             GetCompBits(16, 16, 16, 16, pInfo);
 900             break;
 901         case ADDR_COLOR_32_32_32_32_FLOAT:
 902             GetCompBits(32, 32, 32, 32, pInfo);
 903             break;
 904
 905         case ADDR_COLOR_32:
 906             GetCompBits(32, 0, 0, 0, pInfo);
 907             break;
 908         case ADDR_COLOR_32_32:
 909             GetCompBits(32, 32, 0, 0, pInfo);
 910             break;
 911         case ADDR_COLOR_32_32_32_32:
 912             GetCompBits(32, 32, 32, 32, pInfo);
 913             break;
 914         case ADDR_COLOR_10_10_10_2:
 915             GetCompBits(2, 10, 10, 10, pInfo);
 916             break;
 917         case ADDR_COLOR_10_11_11_FLOAT:
 918             GetCompBits(11, 11, 10, 0, pInfo);
 919             break;
 920         case ADDR_COLOR_11_11_10_FLOAT:
 921             GetCompBits(10, 11, 11, 0, pInfo);
 922             break;
 923         case ADDR_COLOR_5_5_5_1:
 924             GetCompBits(1, 5, 5, 5, pInfo);
 925             break;
 926         case ADDR_COLOR_3_3_2:
 927             GetCompBits(2, 3, 3, 0, pInfo);
 928             break;
 929         case ADDR_COLOR_4_4:
 930             GetCompBits(4, 4, 0, 0, pInfo);
 931             break;
 932         case ADDR_COLOR_8_24:
 933         case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
 934             GetCompBits(24, 8, 0, 0, pInfo);
 935             break;
 936         case ADDR_COLOR_24_8:
 937         case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
 938             GetCompBits(8, 24, 0, 0, pInfo);
 939             break;
 940         case ADDR_COLOR_X24_8_32_FLOAT:
 941             GetCompBits(32, 8, 0, 0, pInfo);
 942             break;
 943
 944         case ADDR_COLOR_INVALID:
 945             GetCompBits(0, 0, 0, 0, pInfo);
 946             break;
 947         default:
 948             ADDR_ASSERT(0);
 949             GetCompBits(0, 0, 0, 0, pInfo);
 950             break;
 951     }
 952
 953     // 2. Get component number type
 954
 955     GetCompType(format, number, pInfo);
 956
 957     // 3. Swap components if needed
 958
 959     GetCompSwap(swap, pInfo);
 960 }
 961
 962 /**
 963 ***************************************************************************************************
 964 *   AddrElemLib::PixGetDepthCompInfo
 965 *
 966 *   @brief
 967 *       Get per component info for depth surface
 968 *
 969 *   @return
 970 *       N/A
 971 *
 972 ***************************************************************************************************
 973 */
 974 VOID AddrElemLib::PixGetDepthCompInfo(
 975     AddrDepthFormat         format,     ///< [in] surface format, read from register
 976     ADDR_PIXEL_FORMATINFO*  pInfo       ///< [out] output per component bits and type
 977     ) const
 978 {
 979     if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
 980     {
 981         if (format == ADDR_DEPTH_8_24_FLOAT)
 982         {
 983             format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
 984         }
 985
 986         if (format == ADDR_DEPTH_X8_24_FLOAT)
 987         {
 988             format = ADDR_DEPTH_32_FLOAT;
 989         }
 990     }
 991
 992     switch (format)
 993     {
 994         case ADDR_DEPTH_16:
 995             GetCompBits(16, 0, 0, 0, pInfo);
 996             break;
 997         case ADDR_DEPTH_8_24:
 998         case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
 999             GetCompBits(24, 8, 0, 0, pInfo);
1000             break;
1001         case ADDR_DEPTH_X8_24:
1002         case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
1003             GetCompBits(24, 0, 0, 0, pInfo);
1004             break;
1005         case ADDR_DEPTH_32_FLOAT:
1006             GetCompBits(32, 0, 0, 0, pInfo);
1007             break;
1008         case ADDR_DEPTH_X24_8_32_FLOAT:
1009             GetCompBits(32, 8, 0, 0, pInfo);
1010             break;
1011         case ADDR_DEPTH_INVALID:
1012             GetCompBits(0, 0, 0, 0, pInfo);
1013             break;
1014         default:
1015             ADDR_ASSERT(0);
1016             GetCompBits(0, 0, 0, 0, pInfo);
1017             break;
1018     }
1019
1020     switch (format)
1021     {
1022         case ADDR_DEPTH_16:
1023             pInfo->numType [0] = ADDR_UNORM_R6XX;
1024             pInfo->numType [1] = ADDR_ZERO;
1025             break;
1026         case ADDR_DEPTH_8_24:
1027             pInfo->numType [0] = ADDR_UNORM_R6XXDB;
1028             pInfo->numType [1] = ADDR_UINT_BITS;
1029             break;
1030         case ADDR_DEPTH_8_24_FLOAT:
1031             pInfo->numType [0] = ADDR_U4FLOATC;
1032             pInfo->numType [1] = ADDR_UINT_BITS;
1033             break;
1034         case ADDR_DEPTH_X8_24:
1035             pInfo->numType [0] = ADDR_UNORM_R6XXDB;
1036             pInfo->numType [1] = ADDR_ZERO;
1037             break;
1038         case ADDR_DEPTH_X8_24_FLOAT:
1039             pInfo->numType [0] = ADDR_U4FLOATC;
1040             pInfo->numType [1] = ADDR_ZERO;
1041             break;
1042         case ADDR_DEPTH_32_FLOAT:
1043             pInfo->numType [0] = ADDR_S8FLOAT32;
1044             pInfo->numType [1] = ADDR_ZERO;
1045             break;
1046         case ADDR_DEPTH_X24_8_32_FLOAT:
1047             pInfo->numType [0] = ADDR_S8FLOAT32;
1048             pInfo->numType [1] = ADDR_UINT_BITS;
1049             break;
1050         default:
1051             pInfo->numType [0] = ADDR_NO_NUMBER;
1052             pInfo->numType [1] = ADDR_NO_NUMBER;
1053             break;
1054     }
1055
1056     pInfo->numType [2] = ADDR_NO_NUMBER;
1057     pInfo->numType [3] = ADDR_NO_NUMBER;
1058 }
1059
1060 /**
1061 ***************************************************************************************************
1062 *   AddrElemLib::PixGetExportNorm
1063 *
1064 *   @brief
1065 *       Check if fp16 export norm can be enabled.
1066 *
1067 *   @return
1068 *       TRUE if this can be enabled.
1069 *
1070 ***************************************************************************************************
1071 */
1072 BOOL_32 AddrElemLib::PixGetExportNorm(
1073     AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
1074     AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
1075     AddrSurfaceSwap     swap            ///< [in] components swap type
1076     ) const
1077 {
1078     BOOL_32 enabled = TRUE;
1079
1080     ADDR_PIXEL_FORMATINFO formatInfo;
1081
1082     PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
1083
1084     for (UINT_32 c = 0; c < 4; c++)
1085     {
1086         if (m_fp16ExportNorm)
1087         {
1088             if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
1089                 (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
1090                 (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
1091                 (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
1092                 (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
1093                 (formatInfo.numType[c] != ADDR_U3FLOATM))
1094             {
1095                 enabled = FALSE;
1096                 break;
1097             }
1098         }
1099         else
1100         {
1101             if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
1102             {
1103                 enabled = FALSE;
1104                 break;
1105             }
1106         }
1107     }
1108
1109     return enabled;
1110 }
1111
1112 /**
1113 ***************************************************************************************************
1114 *   AddrElemLib::AdjustSurfaceInfo
1115 *
1116 *   @brief
1117 *       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
1118 *
1119 *   @return
1120 *       N/A
1121 ***************************************************************************************************
1122 */
1123 VOID AddrElemLib::AdjustSurfaceInfo(
1124     AddrElemMode    elemMode,       ///< [in] element mode
1125     UINT_32         expandX,        ///< [in] decompression expansion factor in X
1126     UINT_32         expandY,        ///< [in] decompression expansion factor in Y
1127     UINT_32*        pBpp,           ///< [in/out] bpp
1128     UINT_32*        pBasePitch,     ///< [in/out] base pitch
1129     UINT_32*        pWidth,         ///< [in/out] width
1130     UINT_32*        pHeight)        ///< [in/out] height
1131 {
1132     UINT_32 packedBits;
1133     UINT_32 basePitch;
1134     UINT_32 width;
1135     UINT_32 height;
1136     UINT_32 bpp;
1137     BOOL_32 bBCnFormat = FALSE;
1138
1139     ADDR_ASSERT(pBpp != NULL);
1140     ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
1141
1142     if (pBpp)
1143     {
1144         bpp = *pBpp;
1145
1146         switch (elemMode)
1147         {
1148             case ADDR_EXPANDED:
1149                 packedBits = bpp / expandX / expandY;
1150                 break;
1151             case ADDR_PACKED_STD: // Different bit order
1152             case ADDR_PACKED_REV:
1153                 packedBits = bpp * expandX * expandY;
1154                 break;
1155             case ADDR_PACKED_GBGR:
1156             case ADDR_PACKED_BGRG:
1157                 packedBits = bpp; // 32-bit packed ==> 2 32-bit result
1158                 break;
1159             case ADDR_PACKED_BC1: // Fall through
1160             case ADDR_PACKED_BC4:
1161                 packedBits = 64;
1162                 bBCnFormat = TRUE;
1163                 break;
1164             case ADDR_PACKED_BC2: // Fall through
1165             case ADDR_PACKED_BC3: // Fall through
1166             case ADDR_PACKED_BC5: // Fall through
1167                 bBCnFormat = TRUE;
1168                 packedBits = 128;
1169                 break;
1170             case ADDR_ROUND_BY_HALF:  // Fall through
1171             case ADDR_ROUND_TRUNCATE: // Fall through
1172             case ADDR_ROUND_DITHER:   // Fall through
1173             case ADDR_UNCOMPRESSED:
1174                 packedBits = bpp;
1175                 break;
1176             default:
1177                 packedBits = bpp;
1178                 ADDR_ASSERT_ALWAYS();
1179                 break;
1180         }
1181
1182         *pBpp = packedBits;
1183     }
1184
1185     if (pWidth && pHeight && pBasePitch)
1186     {
1187         basePitch = *pBasePitch;
1188         width     = *pWidth;
1189         height    = *pHeight;
1190
1191         if ((expandX > 1) || (expandY > 1))
1192         {
1193             if (elemMode == ADDR_EXPANDED)
1194             {
1195                 basePitch *= expandX;
1196                 width     *= expandX;
1197                 height    *= expandY;
1198             }
1199             else
1200             {
1201                 // Evergreen family workaround
1202                 if (bBCnFormat && (m_pAddrLib->GetAddrChipFamily() == ADDR_CHIP_FAMILY_R8XX))
1203                 {
1204                     // For BCn we now pad it to POW2 at the beginning so it is safe to
1205                     // divide by 4 directly
1206                     basePitch = basePitch / expandX;
1207                     width     = width  / expandX;
1208                     height    = height / expandY;
1209 #if DEBUG
1210                     width     = (width == 0) ? 1 : width;
1211                     height    = (height == 0) ? 1 : height;
1212
1213                     if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
1214                         (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
1215                     {
1216                         // if this assertion is hit we may have issues if app samples
1217                         // rightmost/bottommost pixels
1218                         ADDR_ASSERT_ALWAYS();
1219                     }
1220 #endif
1221                 }
1222                 else // Not BCn format we still keep old way (FMT_1? No real test yet)
1223                 {
1224                     basePitch = (basePitch + expandX - 1) / expandX;
1225                     width     = (width + expandX - 1) / expandX;
1226                     height    = (height + expandY - 1) / expandY;
1227                 }
1228             }
1229
1230             *pBasePitch = basePitch; // 0 is legal value for base pitch.
1231             *pWidth     = (width == 0) ? 1 : width;
1232             *pHeight    = (height == 0) ? 1 : height;
1233         } //if (pWidth && pHeight && pBasePitch)
1234     }
1235 }
1236
1237 /**
1238 ***************************************************************************************************
1239 *   AddrElemLib::RestoreSurfaceInfo
1240 *
1241 *   @brief
1242 *       Reverse operation of AdjustSurfaceInfo
1243 *
1244 *   @return
1245 *       N/A
1246 ***************************************************************************************************
1247 */
1248 VOID AddrElemLib::RestoreSurfaceInfo(
1249     AddrElemMode    elemMode,       ///< [in] element mode
1250     UINT_32         expandX,        ///< [in] decompression expansion factor in X
1251     UINT_32         expandY,        ///< [out] decompression expansion factor in Y
1252     UINT_32*        pBpp,           ///< [in/out] bpp
1253     UINT_32*        pWidth,         ///< [in/out] width
1254     UINT_32*        pHeight)        ///< [in/out] height
1255 {
1256     UINT_32 originalBits;
1257     UINT_32 width;
1258     UINT_32 height;
1259     UINT_32 bpp;
1260
1261     ADDR_ASSERT(pBpp != NULL);
1262     ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
1263
1264     if (pBpp)
1265     {
1266         bpp = *pBpp;
1267
1268         switch (elemMode)
1269         {
1270         case ADDR_EXPANDED:
1271             originalBits = bpp * expandX * expandY;
1272             break;
1273         case ADDR_PACKED_STD: // Different bit order
1274         case ADDR_PACKED_REV:
1275             originalBits = bpp / expandX / expandY;
1276             break;
1277         case ADDR_PACKED_GBGR:
1278         case ADDR_PACKED_BGRG:
1279             originalBits = bpp; // 32-bit packed ==> 2 32-bit result
1280             break;
1281         case ADDR_PACKED_BC1: // Fall through
1282         case ADDR_PACKED_BC4:
1283             originalBits = 64;
1284             break;
1285         case ADDR_PACKED_BC2: // Fall through
1286         case ADDR_PACKED_BC3: // Fall through
1287             case ADDR_PACKED_BC5:
1288             originalBits = 128;
1289             break;
1290         case ADDR_ROUND_BY_HALF:  // Fall through
1291         case ADDR_ROUND_TRUNCATE: // Fall through
1292         case ADDR_ROUND_DITHER:   // Fall through
1293         case ADDR_UNCOMPRESSED:
1294             originalBits = bpp;
1295             break;
1296         default:
1297             originalBits = bpp;
1298             ADDR_ASSERT_ALWAYS();
1299             break;
1300         }
1301
1302         *pBpp = originalBits;
1303     }
1304
1305     if (pWidth && pHeight)
1306     {
1307         width    = *pWidth;
1308         height   = *pHeight;
1309
1310         if ((expandX > 1) || (expandY > 1))
1311         {
1312             if (elemMode == ADDR_EXPANDED)
1313             {
1314                 width /= expandX;
1315                 height /= expandY;
1316             }
1317             else
1318             {
1319                 width *= expandX;
1320                 height *= expandY;
1321             }
1322         }
1323
1324         *pWidth  = (width == 0) ? 1 : width;
1325         *pHeight = (height == 0) ? 1 : height;
1326     }
1327 }
1328
1329 /**
1330 ***************************************************************************************************
1331 *   AddrElemLib::GetBitsPerPixel
1332 *
1333 *   @brief
1334 *       Compute the total bits per element according to a format
1335 *       code. For compressed formats, this is not the same as
1336 *       the number of bits per decompressed element.
1337 *
1338 *   @return
1339 *       Bits per pixel
1340 ***************************************************************************************************
1341 */
1342 UINT_32 AddrElemLib::GetBitsPerPixel(
1343     AddrFormat          format,         ///< [in] surface format code
1344     AddrElemMode*       pElemMode,      ///< [out] element mode
1345     UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
1346     UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
1347     UINT_32*            pUnusedBits)    ///< [out] bits unused
1348 {
1349     UINT_32 bpp;
1350     UINT_32 expandX = 1;
1351     UINT_32 expandY = 1;
1352     UINT_32 bitUnused = 0;
1353     AddrElemMode elemMode = ADDR_UNCOMPRESSED; // default value
1354
1355     switch (format)
1356     {
1357         case ADDR_FMT_8:
1358             bpp = 8;
1359             break;
1360         case ADDR_FMT_1_5_5_5:
1361         case ADDR_FMT_5_6_5:
1362         case ADDR_FMT_6_5_5:
1363         case ADDR_FMT_8_8:
1364         case ADDR_FMT_4_4_4_4:
1365         case ADDR_FMT_16:
1366         case ADDR_FMT_16_FLOAT:
1367             bpp = 16;
1368             break;
1369         case ADDR_FMT_GB_GR: // treat as FMT_8_8
1370             elemMode = ADDR_PACKED_GBGR;
1371             bpp = 16;
1372             break;
1373         case ADDR_FMT_BG_RG: // treat as FMT_8_8
1374             elemMode = ADDR_PACKED_BGRG;
1375             bpp = 16;
1376             break;
1377         case ADDR_FMT_8_8_8_8:
1378         case ADDR_FMT_2_10_10_10:
1379         case ADDR_FMT_10_11_11:
1380         case ADDR_FMT_11_11_10:
1381         case ADDR_FMT_16_16:
1382         case ADDR_FMT_16_16_FLOAT:
1383         case ADDR_FMT_32:
1384         case ADDR_FMT_32_FLOAT:
1385         case ADDR_FMT_24_8:
1386         case ADDR_FMT_24_8_FLOAT:
1387             bpp = 32;
1388             break;
1389         case ADDR_FMT_16_16_16_16:
1390         case ADDR_FMT_16_16_16_16_FLOAT:
1391         case ADDR_FMT_32_32:
1392         case ADDR_FMT_32_32_FLOAT:
1393         case ADDR_FMT_CTX1:
1394             bpp = 64;
1395             break;
1396         case ADDR_FMT_32_32_32_32:
1397         case ADDR_FMT_32_32_32_32_FLOAT:
1398             bpp = 128;
1399             break;
1400         case ADDR_FMT_INVALID:
1401             bpp = 0;
1402             break;
1403         case ADDR_FMT_1_REVERSED:
1404             elemMode = ADDR_PACKED_REV;
1405             expandX = 8;
1406             bpp = 1;
1407             break;
1408         case ADDR_FMT_1:
1409             elemMode = ADDR_PACKED_STD;
1410             expandX = 8;
1411             bpp = 1;
1412             break;
1413         case ADDR_FMT_4_4:
1414         case ADDR_FMT_3_3_2:
1415             bpp = 8;
1416             break;
1417         case ADDR_FMT_5_5_5_1:
1418             bpp = 16;
1419             break;
1420         case ADDR_FMT_32_AS_8:
1421         case ADDR_FMT_32_AS_8_8:
1422         case ADDR_FMT_8_24:
1423         case ADDR_FMT_8_24_FLOAT:
1424         case ADDR_FMT_10_10_10_2:
1425         case ADDR_FMT_10_11_11_FLOAT:
1426         case ADDR_FMT_11_11_10_FLOAT:
1427         case ADDR_FMT_5_9_9_9_SHAREDEXP:
1428             bpp = 32;
1429             break;
1430         case ADDR_FMT_X24_8_32_FLOAT:
1431             bpp = 64;
1432             bitUnused = 24;
1433             break;
1434         case ADDR_FMT_8_8_8:
1435             elemMode = ADDR_EXPANDED;
1436             bpp = 24;//@@ 8;      // read 3 elements per pixel
1437             expandX = 3;
1438             break;
1439         case ADDR_FMT_16_16_16:
1440         case ADDR_FMT_16_16_16_FLOAT:
1441             elemMode = ADDR_EXPANDED;
1442             bpp = 48;//@@ 16;      // read 3 elements per pixel
1443             expandX = 3;
1444             break;
1445         case ADDR_FMT_32_32_32_FLOAT:
1446         case ADDR_FMT_32_32_32:
1447             elemMode = ADDR_EXPANDED;
1448             expandX = 3;
1449             bpp = 96;//@@ 32;      // read 3 elements per pixel
1450             break;
1451         case ADDR_FMT_BC1:
1452             elemMode = ADDR_PACKED_BC1;
1453             expandX = 4;
1454             expandY = 4;
1455             bpp = 64;
1456             break;
1457         case ADDR_FMT_BC4:
1458             elemMode = ADDR_PACKED_BC4;
1459             expandX = 4;
1460             expandY = 4;
1461             bpp = 64;
1462             break;
1463         case ADDR_FMT_BC2:
1464             elemMode = ADDR_PACKED_BC2;
1465             expandX = 4;
1466             expandY = 4;
1467             bpp = 128;
1468             break;
1469         case ADDR_FMT_BC3:
1470             elemMode = ADDR_PACKED_BC3;
1471             expandX = 4;
1472             expandY = 4;
1473             bpp = 128;
1474             break;
1475         case ADDR_FMT_BC5:
1476         case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
1477         case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
1478             elemMode = ADDR_PACKED_BC5;
1479             expandX = 4;
1480             expandY = 4;
1481             bpp = 128;
1482             break;
1483         default:
1484             bpp = 0;
1485             ADDR_ASSERT_ALWAYS();
1486             break;
1487             // @@ or should this be an error?
1488     }
1489
1490     SafeAssign(pExpandX, expandX);
1491     SafeAssign(pExpandY, expandY);
1492     SafeAssign(pUnusedBits, bitUnused);
1493     SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
1494
1495     return bpp;
1496 }
1497
1498 /**
1499 ***************************************************************************************************
1500 *   AddrElemLib::GetCompBits
1501 *
1502 *   @brief
1503 *       Set each component's bit size and bit start. And set element mode and number type
1504 *
1505 *   @return
1506 *       N/A
1507 ***************************************************************************************************
1508 */
1509 VOID AddrElemLib::GetCompBits(
1510     UINT_32 c0,                     ///< [in] bits of component 0
1511     UINT_32 c1,                     ///< [in] bits of component 1
1512     UINT_32 c2,                     ///< [in] bits of component 2
1513     UINT_32 c3,                     ///< [in] bits of component 3
1514     ADDR_PIXEL_FORMATINFO* pInfo,   ///< [out] per component info out
1515     AddrElemMode elemMode)          ///< [in] element mode
1516 {
1517     pInfo->comps = 0;
1518
1519     pInfo->compBit[0] = c0;
1520     pInfo->compBit[1] = c1;
1521     pInfo->compBit[2] = c2;
1522     pInfo->compBit[3] = c3;
1523
1524     pInfo->compStart[0] = 0;
1525     pInfo->compStart[1] = c0;
1526     pInfo->compStart[2] = c0+c1;
1527     pInfo->compStart[3] = c0+c1+c2;
1528
1529     pInfo->elemMode = elemMode;
1530     // still needed since component swap may depend on number of components
1531     for (INT i=0; i<4; i++)
1532     {
1533         if (pInfo->compBit[i] == 0)
1534         {
1535             pInfo->compStart[i]  = 0;       // all null components start at bit 0
1536             pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
1537         }
1538         else
1539         {
1540             pInfo->comps++;
1541         }
1542     }
1543 }
1544
1545 /**
1546 ***************************************************************************************************
1547 *   AddrElemLib::GetCompBits
1548 *
1549 *   @brief
1550 *       Set the clear color (or clear depth/stencil) for a surface
1551 *
1552 *   @note
1553 *       If clearColor is zero, a default clear value is used in place of comps[4].
1554 *       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
1555 *
1556 *   @return
1557 *       N/A
1558 ***************************************************************************************************
1559 */
1560 VOID AddrElemLib::SetClearComps(
1561     ADDR_FLT_32 comps[4],   ///< [in/out] components
1562     BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
1563     BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
1564 {
1565     INT_32 i;
1566
1567     // Use default clearvalues if clearColor is disabled
1568     if (clearColor == FALSE)
1569     {
1570         for (i=0; i<3; i++)
1571         {
1572             comps[i].f = 0.0;
1573         }
1574         comps[3].f = 1.0;
1575     }
1576
1577     // Otherwise use the (modified) clear value
1578     else
1579     {
1580         for (i=0; i<4; i++)
1581         {   // If full precision, use clear value unchanged
1582             if (float32)
1583             {
1584                 // Do nothing
1585                 //comps[i] = comps[i];
1586             }
1587             // Else if it is a NaN, use the standard NaN value
1588             else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
1589             {
1590                 comps[i].u = 0xFFC00000;
1591             }
1592             // Else reduce the mantissa precision
1593             else
1594             {
1595                 comps[i].u = comps[i].u & 0xFFFFF000;
1596             }
1597         }
1598     }
1599 }
1600
1601 /**
1602 ***************************************************************************************************
1603 *   AddrElemLib::IsBlockCompressed
1604 *
1605 *   @brief
1606 *       TRUE if this is block compressed format
1607 *
1608 *   @note
1609 *
1610 *   @return
1611 *       BOOL_32
1612 ***************************************************************************************************
1613 */
1614 BOOL_32 AddrElemLib::IsBlockCompressed(
1615     AddrFormat format)  ///< [in] Format
1616 {
1617     return format >= ADDR_FMT_BC1 && format <= ADDR_FMT_BC7;
1618 }
1619
1620
1621 /**
1622 ***************************************************************************************************
1623 *   AddrElemLib::IsCompressed
1624 *
1625 *   @brief
1626 *       TRUE if this is block compressed format or 1 bit format
1627 *
1628 *   @note
1629 *
1630 *   @return
1631 *       BOOL_32
1632 ***************************************************************************************************
1633 */
1634 BOOL_32 AddrElemLib::IsCompressed(
1635     AddrFormat format)  ///< [in] Format
1636 {
1637     return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
1638 }
1639
1640 /**
1641 ***************************************************************************************************
1642 *   AddrElemLib::IsExpand3x
1643 *
1644 *   @brief
1645 *       TRUE if this is 3x expand format
1646 *
1647 *   @note
1648 *
1649 *   @return
1650 *       BOOL_32
1651 ***************************************************************************************************
1652 */
1653 BOOL_32 AddrElemLib::IsExpand3x(
1654     AddrFormat format)  ///< [in] Format
1655 {
1656     BOOL_32 is3x = FALSE;
1657
1658     switch (format)
1659     {
1660         case ADDR_FMT_8_8_8:
1661         case ADDR_FMT_16_16_16:
1662         case ADDR_FMT_16_16_16_FLOAT:
1663         case ADDR_FMT_32_32_32:
1664         case ADDR_FMT_32_32_32_FLOAT:
1665             is3x = TRUE;
1666             break;
1667         default:
1668             break;
1669     }
1670
1671     return is3x;
1672 }
1673
1674