src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.5
   4  *
   5  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "texcompress.h"
  39 #include "texformat.h"
  40 #include "texstore.h"
  41
  42
  43 static void
  44 fxt1_encode (GLuint width, GLuint height, GLint comps,
  45              const void *source, GLint srcRowStride,
  46              void *dest, GLint destRowStride);
  47
  48 static void
  49 fxt1_decode_1 (const void *texture, GLint stride,
  50                GLint i, GLint j, GLchan *rgba);
  51
  52
  53 /**
  54  * Called during context initialization.
  55  */
  56 void
  57 _mesa_init_texture_fxt1( GLcontext *ctx )
  58 {
  59    (void) ctx;
  60 }
  61
  62
  63 /**
  64  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  65  */
  66 static GLboolean
  67 texstore_rgb_fxt1(STORE_PARAMS)
  68 {
  69    const GLchan *pixels;
  70    GLint srcRowStride;
  71    GLubyte *dst;
  72    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  73    const GLchan *tempImage = NULL;
  74
  75    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  76    ASSERT(dstXoffset % 8 == 0);
  77    ASSERT(dstYoffset % 4 == 0);
  78    ASSERT(dstZoffset     == 0);
  79    (void) dstZoffset; (void) dstImageStride;
  80
  81    if (srcFormat != GL_RGB ||
  82        srcType != CHAN_TYPE ||
  83        ctx->_ImageTransferState ||
  84        srcPacking->SwapBytes) {
  85       /* convert image to RGB/GLchan */
  86       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  87                                              baseInternalFormat,
  88                                              dstFormat->BaseFormat,
  89                                              srcWidth, srcHeight, srcDepth,
  90                                              srcFormat, srcType, srcAddr,
  91                                              srcPacking);
  92       if (!tempImage)
  93          return GL_FALSE; /* out of memory */
  94       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  95       pixels = tempImage;
  96       srcRowStride = 3 * srcWidth;
  97       srcFormat = GL_RGB;
  98    }
  99    else {
 100       pixels = (const GLchan *) srcAddr;
 101       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 102                                             srcType) / sizeof(GLchan);
 103    }
 104
 105    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 106                                         GL_COMPRESSED_RGB_FXT1_3DFX,
 107                                         texWidth, (GLubyte *) dstAddr);
 108
 109    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 110                dst, dstRowStride);
 111
 112    if (tempImage)
 113       _mesa_free((void*) tempImage);
 114
 115    return GL_TRUE;
 116 }
 117
 118
 119 /**
 120  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 121  */
 122 static GLboolean
 123 texstore_rgba_fxt1(STORE_PARAMS)
 124 {
 125    const GLchan *pixels;
 126    GLint srcRowStride;
 127    GLubyte *dst;
 128    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 129    const GLchan *tempImage = NULL;
 130
 131    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 132    ASSERT(dstXoffset % 8 == 0);
 133    ASSERT(dstYoffset % 4 == 0);
 134    ASSERT(dstZoffset     == 0);
 135    (void) dstZoffset; (void) dstImageStride;
 136
 137    if (srcFormat != GL_RGBA ||
 138        srcType != CHAN_TYPE ||
 139        ctx->_ImageTransferState ||
 140        srcPacking->SwapBytes) {
 141       /* convert image to RGBA/GLchan */
 142       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 143                                              baseInternalFormat,
 144                                              dstFormat->BaseFormat,
 145                                              srcWidth, srcHeight, srcDepth,
 146                                              srcFormat, srcType, srcAddr,
 147                                              srcPacking);
 148       if (!tempImage)
 149          return GL_FALSE; /* out of memory */
 150       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 151       pixels = tempImage;
 152       srcRowStride = 4 * srcWidth;
 153       srcFormat = GL_RGBA;
 154    }
 155    else {
 156       pixels = (const GLchan *) srcAddr;
 157       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 158                                             srcType) / sizeof(GLchan);
 159    }
 160
 161    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 162                                         GL_COMPRESSED_RGBA_FXT1_3DFX,
 163                                         texWidth, (GLubyte *) dstAddr);
 164
 165    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 166                dst, dstRowStride);
 167
 168    if (tempImage)
 169       _mesa_free((void*) tempImage);
 170
 171    return GL_TRUE;
 172 }
 173
 174
 175 static void
 176 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 177                           GLint i, GLint j, GLint k, GLchan *texel )
 178 {
 179    (void) k;
 180    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 181 }
 182
 183
 184 static void
 185 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 186                             GLint i, GLint j, GLint k, GLfloat *texel )
 187 {
 188    /* just sample as GLchan and convert to float here */
 189    GLchan rgba[4];
 190    (void) k;
 191    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 192    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 193    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 194    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 195    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 196 }
 197
 198
 199 static void
 200 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 201                          GLint i, GLint j, GLint k, GLchan *texel )
 202 {
 203    (void) k;
 204    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 205    texel[ACOMP] = 255;
 206 }
 207
 208
 209 static void
 210 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 211                            GLint i, GLint j, GLint k, GLfloat *texel )
 212 {
 213    /* just sample as GLchan and convert to float here */
 214    GLchan rgba[4];
 215    (void) k;
 216    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 217    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 218    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 219    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 220    texel[ACOMP] = 1.0F;
 221 }
 222
 223
 224
 225 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 226    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 227    GL_RGB,                              /* BaseFormat */
 228    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 229    4, /*approx*/                        /* RedBits */
 230    4, /*approx*/                        /* GreenBits */
 231    4, /*approx*/                        /* BlueBits */
 232    0,                                   /* AlphaBits */
 233    0,                                   /* LuminanceBits */
 234    0,                                   /* IntensityBits */
 235    0,                                   /* IndexBits */
 236    0,                                   /* DepthBits */
 237    0,                                   /* TexelBytes */
 238    texstore_rgb_fxt1,                   /* StoreTexImageFunc */
 239    NULL, /*impossible*/                 /* FetchTexel1D */
 240    fetch_texel_2d_rgb_fxt1,             /* FetchTexel2D */
 241    NULL, /*impossible*/                 /* FetchTexel3D */
 242    NULL, /*impossible*/                 /* FetchTexel1Df */
 243    fetch_texel_2d_f_rgb_fxt1,           /* FetchTexel2Df */
 244    NULL, /*impossible*/                 /* FetchTexel3Df */
 245 };
 246
 247 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 248    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 249    GL_RGBA,                             /* BaseFormat */
 250    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 251    4, /*approx*/                        /* RedBits */
 252    4, /*approx*/                        /* GreenBits */
 253    4, /*approx*/                        /* BlueBits */
 254    1, /*approx*/                        /* AlphaBits */
 255    0,                                   /* LuminanceBits */
 256    0,                                   /* IntensityBits */
 257    0,                                   /* IndexBits */
 258    0,                                   /* DepthBits */
 259    0,                                   /* TexelBytes */
 260    texstore_rgba_fxt1,                  /* StoreTexImageFunc */
 261    NULL, /*impossible*/                 /* FetchTexel1D */
 262    fetch_texel_2d_rgba_fxt1,            /* FetchTexel2D */
 263    NULL, /*impossible*/                 /* FetchTexel3D */
 264    NULL, /*impossible*/                 /* FetchTexel1Df */
 265    fetch_texel_2d_f_rgba_fxt1,          /* FetchTexel2Df */
 266    NULL, /*impossible*/                 /* FetchTexel3Df */
 267 };
 268
 269
 270 /***************************************************************************\
 271  * FXT1 encoder
 272  *
 273  * The encoder was built by reversing the decoder,
 274  * and is vaguely based on Texus2 by 3dfx. Note that this code
 275  * is merely a proof of concept, since it is highly UNoptimized;
 276  * moreover, it is sub-optimal due to initial conditions passed
 277  * to Lloyd's algorithm (the interpolation modes are even worse).
 278 \***************************************************************************/
 279
 280
 281 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 282 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 283 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 284 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 285 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 286 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 287 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 288 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 289
 290
 291 /*
 292  * Define a 64-bit unsigned integer type and macros
 293  */
 294 #if defined(__GNUC__) && !defined(__cplusplus)
 295
 296 #define FX64_NATIVE 1
 297
 298 typedef unsigned long long Fx64;
 299
 300 #define FX64_MOV32(a, b) a = b
 301 #define FX64_OR32(a, b)  a |= b
 302 #define FX64_SHL(a, c)   a <<= c
 303
 304 #else  /* !__GNUC__ */
 305
 306 #define FX64_NATIVE 0
 307
 308 typedef struct {
 309    GLuint lo, hi;
 310 } Fx64;
 311
 312 #define FX64_MOV32(a, b) a.lo = b
 313 #define FX64_OR32(a, b)  a.lo |= b
 314
 315 #define FX64_SHL(a, c)                                 \
 316    do {                                                \
 317        if ((c) >= 32) {                                \
 318           a.hi = a.lo << ((c) - 32);                   \
 319           a.lo = 0;                                    \
 320        } else {                                        \
 321           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 322           a.lo <<= (c);                                \
 323        }                                               \
 324    } while (0)
 325
 326 #endif /* !__GNUC__ */
 327
 328
 329 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 330 #define SAFECDOT 1 /* for paranoids */
 331
 332 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 333    do {                                  \
 334       /* compute interpolation vector */ \
 335       GLfloat d2 = 0.0F;                 \
 336       GLfloat rd2;                       \
 337                                          \
 338       for (i = 0; i < NC; i++) {         \
 339          IV[i] = (V1[i] - V0[i]) * F(i); \
 340          d2 += IV[i] * IV[i];            \
 341       }                                  \
 342       rd2 = (GLfloat)NV / d2;            \
 343       B = 0;                             \
 344       for (i = 0; i < NC; i++) {         \
 345          IV[i] *= F(i);                  \
 346          B -= IV[i] * V0[i];             \
 347          IV[i] *= rd2;                   \
 348       }                                  \
 349       B = B * rd2 + 0.5f;                \
 350    } while (0)
 351
 352 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 353    do {                                  \
 354       GLfloat dot = 0.0F;                \
 355       for (i = 0; i < NC; i++) {         \
 356          dot += V[i] * IV[i];            \
 357       }                                  \
 358       TEXEL = (GLint)(dot + B);          \
 359       if (SAFECDOT) {                    \
 360          if (TEXEL < 0) {                \
 361             TEXEL = 0;                   \
 362          } else if (TEXEL > NV) {        \
 363             TEXEL = NV;                  \
 364          }                               \
 365       }                                  \
 366    } while (0)
 367
 368
 369 static GLint
 370 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 371               GLubyte input[MAX_COMP], GLint nc)
 372 {
 373    GLint i, j, best = -1;
 374    GLfloat err = 1e9; /* big enough */
 375
 376    for (j = 0; j < nv; j++) {
 377       GLfloat e = 0.0F;
 378       for (i = 0; i < nc; i++) {
 379          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 380       }
 381       if (e < err) {
 382          err = e;
 383          best = j;
 384       }
 385    }
 386
 387    return best;
 388 }
 389
 390
 391 static GLint
 392 fxt1_worst (GLfloat vec[MAX_COMP],
 393             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 394 {
 395    GLint i, k, worst = -1;
 396    GLfloat err = -1.0F; /* small enough */
 397
 398    for (k = 0; k < n; k++) {
 399       GLfloat e = 0.0F;
 400       for (i = 0; i < nc; i++) {
 401          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 402       }
 403       if (e > err) {
 404          err = e;
 405          worst = k;
 406       }
 407    }
 408
 409    return worst;
 410 }
 411
 412
 413 static GLint
 414 fxt1_variance (GLdouble variance[MAX_COMP],
 415                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 416 {
 417    GLint i, k, best = 0;
 418    GLint sx, sx2;
 419    GLdouble var, maxvar = -1; /* small enough */
 420    GLdouble teenth = 1.0 / n;
 421
 422    for (i = 0; i < nc; i++) {
 423       sx = sx2 = 0;
 424       for (k = 0; k < n; k++) {
 425          GLint t = input[k][i];
 426          sx += t;
 427          sx2 += t * t;
 428       }
 429       var = sx2 * teenth - sx * sx * teenth * teenth;
 430       if (maxvar < var) {
 431          maxvar = var;
 432          best = i;
 433       }
 434       if (variance) {
 435          variance[i] = var;
 436       }
 437    }
 438
 439    return best;
 440 }
 441
 442
 443 static GLint
 444 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 445              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 446 {
 447 #if 0
 448    /* Choose colors from a grid.
 449     */
 450    GLint i, j;
 451
 452    for (j = 0; j < nv; j++) {
 453       GLint m = j * (n - 1) / (nv - 1);
 454       for (i = 0; i < nc; i++) {
 455          vec[j][i] = input[m][i];
 456       }
 457    }
 458 #else
 459    /* Our solution here is to find the darkest and brightest colors in
 460     * the 8x4 tile and use those as the two representative colors.
 461     * There are probably better algorithms to use (histogram-based).
 462     */
 463    GLint i, j, k;
 464    GLint minSum = 2000; /* big enough */
 465    GLint maxSum = -1; /* small enough */
 466    GLint minCol = 0; /* phoudoin: silent compiler! */
 467    GLint maxCol = 0; /* phoudoin: silent compiler! */
 468
 469    struct {
 470       GLint flag;
 471       GLint key;
 472       GLint freq;
 473       GLint idx;
 474    } hist[N_TEXELS];
 475    GLint lenh = 0;
 476
 477    memset(hist, 0, sizeof(hist));
 478
 479    for (k = 0; k < n; k++) {
 480       GLint l;
 481       GLint key = 0;
 482       GLint sum = 0;
 483       for (i = 0; i < nc; i++) {
 484          key <<= 8;
 485          key |= input[k][i];
 486          sum += input[k][i];
 487       }
 488       for (l = 0; l < n; l++) {
 489          if (!hist[l].flag) {
 490             /* alloc new slot */
 491             hist[l].flag = !0;
 492             hist[l].key = key;
 493             hist[l].freq = 1;
 494             hist[l].idx = k;
 495             lenh = l + 1;
 496             break;
 497          } else if (hist[l].key == key) {
 498             hist[l].freq++;
 499             break;
 500          }
 501       }
 502       if (minSum > sum) {
 503          minSum = sum;
 504          minCol = k;
 505       }
 506       if (maxSum < sum) {
 507          maxSum = sum;
 508          maxCol = k;
 509       }
 510    }
 511
 512    if (lenh <= nv) {
 513       for (j = 0; j < lenh; j++) {
 514          for (i = 0; i < nc; i++) {
 515             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 516          }
 517       }
 518       for (; j < nv; j++) {
 519          for (i = 0; i < nc; i++) {
 520             vec[j][i] = vec[0][i];
 521          }
 522       }
 523       return 0;
 524    }
 525
 526    for (j = 0; j < nv; j++) {
 527       for (i = 0; i < nc; i++) {
 528          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 529       }
 530    }
 531 #endif
 532
 533    return !0;
 534 }
 535
 536
 537 static GLint
 538 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 539             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 540 {
 541    /* Use the generalized lloyd's algorithm for VQ:
 542     *     find 4 color vectors.
 543     *
 544     *     for each sample color
 545     *         sort to nearest vector.
 546     *
 547     *     replace each vector with the centroid of it's matching colors.
 548     *
 549     *     repeat until RMS doesn't improve.
 550     *
 551     *     if a color vector has no samples, or becomes the same as another
 552     *     vector, replace it with the color which is farthest from a sample.
 553     *
 554     * vec[][MAX_COMP]           initial vectors and resulting colors
 555     * nv                        number of resulting colors required
 556     * input[N_TEXELS][MAX_COMP] input texels
 557     * nc                        number of components in input / vec
 558     * n                         number of input samples
 559     */
 560
 561    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 562    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 563    GLfloat error, lasterror = 1e9;
 564
 565    GLint i, j, k, rep;
 566
 567    /* the quantizer */
 568    for (rep = 0; rep < LL_N_REP; rep++) {
 569       /* reset sums & counters */
 570       for (j = 0; j < nv; j++) {
 571          for (i = 0; i < nc; i++) {
 572             sum[j][i] = 0;
 573          }
 574          cnt[j] = 0;
 575       }
 576       error = 0;
 577
 578       /* scan whole block */
 579       for (k = 0; k < n; k++) {
 580 #if 1
 581          GLint best = -1;
 582          GLfloat err = 1e9; /* big enough */
 583          /* determine best vector */
 584          for (j = 0; j < nv; j++) {
 585             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 586                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 587                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 588             if (nc == 4) {
 589                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 590             }
 591             if (e < err) {
 592                err = e;
 593                best = j;
 594             }
 595          }
 596 #else
 597          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 598 #endif
 599          /* add in closest color */
 600          for (i = 0; i < nc; i++) {
 601             sum[best][i] += input[k][i];
 602          }
 603          /* mark this vector as used */
 604          cnt[best]++;
 605          /* accumulate error */
 606          error += err;
 607       }
 608
 609       /* check RMS */
 610       if ((error < LL_RMS_E) ||
 611           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 612          return !0; /* good match */
 613       }
 614       lasterror = error;
 615
 616       /* move each vector to the barycenter of its closest colors */
 617       for (j = 0; j < nv; j++) {
 618          if (cnt[j]) {
 619             GLfloat div = 1.0F / cnt[j];
 620             for (i = 0; i < nc; i++) {
 621                vec[j][i] = div * sum[j][i];
 622             }
 623          } else {
 624             /* this vec has no samples or is identical with a previous vec */
 625             GLint worst = fxt1_worst(vec[j], input, nc, n);
 626             for (i = 0; i < nc; i++) {
 627                vec[j][i] = input[worst][i];
 628             }
 629          }
 630       }
 631    }
 632
 633    return 0; /* could not converge fast enough */
 634 }
 635
 636
 637 static void
 638 fxt1_quantize_CHROMA (GLuint *cc,
 639                       GLubyte input[N_TEXELS][MAX_COMP])
 640 {
 641    const GLint n_vect = 4; /* 4 base vectors to find */
 642    const GLint n_comp = 3; /* 3 components: R, G, B */
 643    GLfloat vec[MAX_VECT][MAX_COMP];
 644    GLint i, j, k;
 645    Fx64 hi; /* high quadword */
 646    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 647
 648    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 649       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 650    }
 651
 652    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 653    for (j = n_vect - 1; j >= 0; j--) {
 654       for (i = 0; i < n_comp; i++) {
 655          /* add in colors */
 656          FX64_SHL(hi, 5);
 657          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 658       }
 659    }
 660    ((Fx64 *)cc)[1] = hi;
 661
 662    lohi = lolo = 0;
 663    /* right microtile */
 664    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 665       lohi <<= 2;
 666       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 667    }
 668    /* left microtile */
 669    for (; k >= 0; k--) {
 670       lolo <<= 2;
 671       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 672    }
 673    cc[1] = lohi;
 674    cc[0] = lolo;
 675 }
 676
 677
 678 static void
 679 fxt1_quantize_ALPHA0 (GLuint *cc,
 680                       GLubyte input[N_TEXELS][MAX_COMP],
 681                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 682 {
 683    const GLint n_vect = 3; /* 3 base vectors to find */
 684    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 685    GLfloat vec[MAX_VECT][MAX_COMP];
 686    GLint i, j, k;
 687    Fx64 hi; /* high quadword */
 688    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 689
 690    /* the last vector indicates zero */
 691    for (i = 0; i < n_comp; i++) {
 692       vec[n_vect][i] = 0;
 693    }
 694
 695    /* the first n texels in reord are guaranteed to be non-zero */
 696    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 697       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 698    }
 699
 700    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 701    for (j = n_vect - 1; j >= 0; j--) {
 702       /* add in alphas */
 703       FX64_SHL(hi, 5);
 704       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 705    }
 706    for (j = n_vect - 1; j >= 0; j--) {
 707       for (i = 0; i < n_comp - 1; i++) {
 708          /* add in colors */
 709          FX64_SHL(hi, 5);
 710          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 711       }
 712    }
 713    ((Fx64 *)cc)[1] = hi;
 714
 715    lohi = lolo = 0;
 716    /* right microtile */
 717    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 718       lohi <<= 2;
 719       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 720    }
 721    /* left microtile */
 722    for (; k >= 0; k--) {
 723       lolo <<= 2;
 724       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 725    }
 726    cc[1] = lohi;
 727    cc[0] = lolo;
 728 }
 729
 730
 731 static void
 732 fxt1_quantize_ALPHA1 (GLuint *cc,
 733                       GLubyte input[N_TEXELS][MAX_COMP])
 734 {
 735    const GLint n_vect = 3; /* highest vector number in each microtile */
 736    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 737    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 738    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 739    GLint i, j, k;
 740    Fx64 hi; /* high quadword */
 741    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 742
 743    GLint minSum;
 744    GLint maxSum;
 745    GLint minColL = 0, maxColL = 0;
 746    GLint minColR = 0, maxColR = 0;
 747    GLint sumL = 0, sumR = 0;
 748
 749    /* Our solution here is to find the darkest and brightest colors in
 750     * the 4x4 tile and use those as the two representative colors.
 751     * There are probably better algorithms to use (histogram-based).
 752     */
 753    minSum = 2000; /* big enough */
 754    maxSum = -1; /* small enough */
 755    for (k = 0; k < N_TEXELS / 2; k++) {
 756       GLint sum = 0;
 757       for (i = 0; i < n_comp; i++) {
 758          sum += input[k][i];
 759       }
 760       if (minSum > sum) {
 761          minSum = sum;
 762          minColL = k;
 763       }
 764       if (maxSum < sum) {
 765          maxSum = sum;
 766          maxColL = k;
 767       }
 768       sumL += sum;
 769    }
 770    minSum = 2000; /* big enough */
 771    maxSum = -1; /* small enough */
 772    for (; k < N_TEXELS; k++) {
 773       GLint sum = 0;
 774       for (i = 0; i < n_comp; i++) {
 775          sum += input[k][i];
 776       }
 777       if (minSum > sum) {
 778          minSum = sum;
 779          minColR = k;
 780       }
 781       if (maxSum < sum) {
 782          maxSum = sum;
 783          maxColR = k;
 784       }
 785       sumR += sum;
 786    }
 787
 788    /* choose the common vector (yuck!) */
 789    {
 790       GLint j1, j2;
 791       GLint v1 = 0, v2 = 0;
 792       GLfloat err = 1e9; /* big enough */
 793       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 794       for (i = 0; i < n_comp; i++) {
 795          tv[0][i] = input[minColL][i];
 796          tv[1][i] = input[maxColL][i];
 797          tv[2][i] = input[minColR][i];
 798          tv[3][i] = input[maxColR][i];
 799       }
 800       for (j1 = 0; j1 < 2; j1++) {
 801          for (j2 = 2; j2 < 4; j2++) {
 802             GLfloat e = 0.0F;
 803             for (i = 0; i < n_comp; i++) {
 804                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 805             }
 806             if (e < err) {
 807                err = e;
 808                v1 = j1;
 809                v2 = j2;
 810             }
 811          }
 812       }
 813       for (i = 0; i < n_comp; i++) {
 814          vec[0][i] = tv[1 - v1][i];
 815          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 816          vec[2][i] = tv[5 - v2][i];
 817       }
 818    }
 819
 820    /* left microtile */
 821    cc[0] = 0;
 822    if (minColL != maxColL) {
 823       /* compute interpolation vector */
 824       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 825
 826       /* add in texels */
 827       lolo = 0;
 828       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 829          GLint texel;
 830          /* interpolate color */
 831          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 832          /* add in texel */
 833          lolo <<= 2;
 834          lolo |= texel;
 835       }
 836
 837       cc[0] = lolo;
 838    }
 839
 840    /* right microtile */
 841    cc[1] = 0;
 842    if (minColR != maxColR) {
 843       /* compute interpolation vector */
 844       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 845
 846       /* add in texels */
 847       lohi = 0;
 848       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 849          GLint texel;
 850          /* interpolate color */
 851          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 852          /* add in texel */
 853          lohi <<= 2;
 854          lohi |= texel;
 855       }
 856
 857       cc[1] = lohi;
 858    }
 859
 860    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 861    for (j = n_vect - 1; j >= 0; j--) {
 862       /* add in alphas */
 863       FX64_SHL(hi, 5);
 864       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 865    }
 866    for (j = n_vect - 1; j >= 0; j--) {
 867       for (i = 0; i < n_comp - 1; i++) {
 868          /* add in colors */
 869          FX64_SHL(hi, 5);
 870          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 871       }
 872    }
 873    ((Fx64 *)cc)[1] = hi;
 874 }
 875
 876
 877 static void
 878 fxt1_quantize_HI (GLuint *cc,
 879                   GLubyte input[N_TEXELS][MAX_COMP],
 880                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 881 {
 882    const GLint n_vect = 6; /* highest vector number */
 883    const GLint n_comp = 3; /* 3 components: R, G, B */
 884    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 885    GLfloat iv[MAX_COMP];   /* interpolation vector */
 886    GLint i, k;
 887    GLuint hihi; /* high quadword: hi dword */
 888
 889    GLint minSum = 2000; /* big enough */
 890    GLint maxSum = -1; /* small enough */
 891    GLint minCol = 0; /* phoudoin: silent compiler! */
 892    GLint maxCol = 0; /* phoudoin: silent compiler! */
 893
 894    /* Our solution here is to find the darkest and brightest colors in
 895     * the 8x4 tile and use those as the two representative colors.
 896     * There are probably better algorithms to use (histogram-based).
 897     */
 898    for (k = 0; k < n; k++) {
 899       GLint sum = 0;
 900       for (i = 0; i < n_comp; i++) {
 901          sum += reord[k][i];
 902       }
 903       if (minSum > sum) {
 904          minSum = sum;
 905          minCol = k;
 906       }
 907       if (maxSum < sum) {
 908          maxSum = sum;
 909          maxCol = k;
 910       }
 911    }
 912
 913    hihi = 0; /* cc-hi = "00" */
 914    for (i = 0; i < n_comp; i++) {
 915       /* add in colors */
 916       hihi <<= 5;
 917       hihi |= reord[maxCol][i] >> 3;
 918    }
 919    for (i = 0; i < n_comp; i++) {
 920       /* add in colors */
 921       hihi <<= 5;
 922       hihi |= reord[minCol][i] >> 3;
 923    }
 924    cc[3] = hihi;
 925    cc[0] = cc[1] = cc[2] = 0;
 926
 927    /* compute interpolation vector */
 928    if (minCol != maxCol) {
 929       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 930    }
 931
 932    /* add in texels */
 933    for (k = N_TEXELS - 1; k >= 0; k--) {
 934       GLint t = k * 3;
 935       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 936       GLint texel = n_vect + 1; /* transparent black */
 937
 938       if (!ISTBLACK(input[k])) {
 939          if (minCol != maxCol) {
 940             /* interpolate color */
 941             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 942             /* add in texel */
 943             kk[0] |= texel << (t & 7);
 944          }
 945       } else {
 946          /* add in texel */
 947          kk[0] |= texel << (t & 7);
 948       }
 949    }
 950 }
 951
 952
 953 static void
 954 fxt1_quantize_MIXED1 (GLuint *cc,
 955                       GLubyte input[N_TEXELS][MAX_COMP])
 956 {
 957    const GLint n_vect = 2; /* highest vector number in each microtile */
 958    const GLint n_comp = 3; /* 3 components: R, G, B */
 959    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 960    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 961    GLint i, j, k;
 962    Fx64 hi; /* high quadword */
 963    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 964
 965    GLint minSum;
 966    GLint maxSum;
 967    GLint minColL = 0, maxColL = -1;
 968    GLint minColR = 0, maxColR = -1;
 969
 970    /* Our solution here is to find the darkest and brightest colors in
 971     * the 4x4 tile and use those as the two representative colors.
 972     * There are probably better algorithms to use (histogram-based).
 973     */
 974    minSum = 2000; /* big enough */
 975    maxSum = -1; /* small enough */
 976    for (k = 0; k < N_TEXELS / 2; k++) {
 977       if (!ISTBLACK(input[k])) {
 978          GLint sum = 0;
 979          for (i = 0; i < n_comp; i++) {
 980             sum += input[k][i];
 981          }
 982          if (minSum > sum) {
 983             minSum = sum;
 984             minColL = k;
 985          }
 986          if (maxSum < sum) {
 987             maxSum = sum;
 988             maxColL = k;
 989          }
 990       }
 991    }
 992    minSum = 2000; /* big enough */
 993    maxSum = -1; /* small enough */
 994    for (; k < N_TEXELS; k++) {
 995       if (!ISTBLACK(input[k])) {
 996          GLint sum = 0;
 997          for (i = 0; i < n_comp; i++) {
 998             sum += input[k][i];
 999          }
1000          if (minSum > sum) {
1001             minSum = sum;
1002             minColR = k;
1003          }
1004          if (maxSum < sum) {
1005             maxSum = sum;
1006             maxColR = k;
1007          }
1008       }
1009    }
1010
1011    /* left microtile */
1012    if (maxColL == -1) {
1013       /* all transparent black */
1014       cc[0] = ~0u;
1015       for (i = 0; i < n_comp; i++) {
1016          vec[0][i] = 0;
1017          vec[1][i] = 0;
1018       }
1019    } else {
1020       cc[0] = 0;
1021       for (i = 0; i < n_comp; i++) {
1022          vec[0][i] = input[minColL][i];
1023          vec[1][i] = input[maxColL][i];
1024       }
1025       if (minColL != maxColL) {
1026          /* compute interpolation vector */
1027          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1028
1029          /* add in texels */
1030          lolo = 0;
1031          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1032             GLint texel = n_vect + 1; /* transparent black */
1033             if (!ISTBLACK(input[k])) {
1034                /* interpolate color */
1035                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1036             }
1037             /* add in texel */
1038             lolo <<= 2;
1039             lolo |= texel;
1040          }
1041          cc[0] = lolo;
1042       }
1043    }
1044
1045    /* right microtile */
1046    if (maxColR == -1) {
1047       /* all transparent black */
1048       cc[1] = ~0u;
1049       for (i = 0; i < n_comp; i++) {
1050          vec[2][i] = 0;
1051          vec[3][i] = 0;
1052       }
1053    } else {
1054       cc[1] = 0;
1055       for (i = 0; i < n_comp; i++) {
1056          vec[2][i] = input[minColR][i];
1057          vec[3][i] = input[maxColR][i];
1058       }
1059       if (minColR != maxColR) {
1060          /* compute interpolation vector */
1061          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1062
1063          /* add in texels */
1064          lohi = 0;
1065          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1066             GLint texel = n_vect + 1; /* transparent black */
1067             if (!ISTBLACK(input[k])) {
1068                /* interpolate color */
1069                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1070             }
1071             /* add in texel */
1072             lohi <<= 2;
1073             lohi |= texel;
1074          }
1075          cc[1] = lohi;
1076       }
1077    }
1078
1079    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1080    for (j = 2 * 2 - 1; j >= 0; j--) {
1081       for (i = 0; i < n_comp; i++) {
1082          /* add in colors */
1083          FX64_SHL(hi, 5);
1084          FX64_OR32(hi, vec[j][i] >> 3);
1085       }
1086    }
1087    ((Fx64 *)cc)[1] = hi;
1088 }
1089
1090
1091 static void
1092 fxt1_quantize_MIXED0 (GLuint *cc,
1093                       GLubyte input[N_TEXELS][MAX_COMP])
1094 {
1095    const GLint n_vect = 3; /* highest vector number in each microtile */
1096    const GLint n_comp = 3; /* 3 components: R, G, B */
1097    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1098    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1099    GLint i, j, k;
1100    Fx64 hi; /* high quadword */
1101    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1102
1103    GLint minColL = 0, maxColL = 0;
1104    GLint minColR = 0, maxColR = 0;
1105 #if 0
1106    GLint minSum;
1107    GLint maxSum;
1108
1109    /* Our solution here is to find the darkest and brightest colors in
1110     * the 4x4 tile and use those as the two representative colors.
1111     * There are probably better algorithms to use (histogram-based).
1112     */
1113    minSum = 2000; /* big enough */
1114    maxSum = -1; /* small enough */
1115    for (k = 0; k < N_TEXELS / 2; k++) {
1116       GLint sum = 0;
1117       for (i = 0; i < n_comp; i++) {
1118          sum += input[k][i];
1119       }
1120       if (minSum > sum) {
1121          minSum = sum;
1122          minColL = k;
1123       }
1124       if (maxSum < sum) {
1125          maxSum = sum;
1126          maxColL = k;
1127       }
1128    }
1129    minSum = 2000; /* big enough */
1130    maxSum = -1; /* small enough */
1131    for (; k < N_TEXELS; k++) {
1132       GLint sum = 0;
1133       for (i = 0; i < n_comp; i++) {
1134          sum += input[k][i];
1135       }
1136       if (minSum > sum) {
1137          minSum = sum;
1138          minColR = k;
1139       }
1140       if (maxSum < sum) {
1141          maxSum = sum;
1142          maxColR = k;
1143       }
1144    }
1145 #else
1146    GLint minVal;
1147    GLint maxVal;
1148    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1149    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1150
1151    /* Scan the channel with max variance for lo & hi
1152     * and use those as the two representative colors.
1153     */
1154    minVal = 2000; /* big enough */
1155    maxVal = -1; /* small enough */
1156    for (k = 0; k < N_TEXELS / 2; k++) {
1157       GLint t = input[k][maxVarL];
1158       if (minVal > t) {
1159          minVal = t;
1160          minColL = k;
1161       }
1162       if (maxVal < t) {
1163          maxVal = t;
1164          maxColL = k;
1165       }
1166    }
1167    minVal = 2000; /* big enough */
1168    maxVal = -1; /* small enough */
1169    for (; k < N_TEXELS; k++) {
1170       GLint t = input[k][maxVarR];
1171       if (minVal > t) {
1172          minVal = t;
1173          minColR = k;
1174       }
1175       if (maxVal < t) {
1176          maxVal = t;
1177          maxColR = k;
1178       }
1179    }
1180 #endif
1181
1182    /* left microtile */
1183    cc[0] = 0;
1184    for (i = 0; i < n_comp; i++) {
1185       vec[0][i] = input[minColL][i];
1186       vec[1][i] = input[maxColL][i];
1187    }
1188    if (minColL != maxColL) {
1189       /* compute interpolation vector */
1190       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1191
1192       /* add in texels */
1193       lolo = 0;
1194       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1195          GLint texel;
1196          /* interpolate color */
1197          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1198          /* add in texel */
1199          lolo <<= 2;
1200          lolo |= texel;
1201       }
1202
1203       /* funky encoding for LSB of green */
1204       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1205          for (i = 0; i < n_comp; i++) {
1206             vec[1][i] = input[minColL][i];
1207             vec[0][i] = input[maxColL][i];
1208          }
1209          lolo = ~lolo;
1210       }
1211
1212       cc[0] = lolo;
1213    }
1214
1215    /* right microtile */
1216    cc[1] = 0;
1217    for (i = 0; i < n_comp; i++) {
1218       vec[2][i] = input[minColR][i];
1219       vec[3][i] = input[maxColR][i];
1220    }
1221    if (minColR != maxColR) {
1222       /* compute interpolation vector */
1223       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1224
1225       /* add in texels */
1226       lohi = 0;
1227       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1228          GLint texel;
1229          /* interpolate color */
1230          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1231          /* add in texel */
1232          lohi <<= 2;
1233          lohi |= texel;
1234       }
1235
1236       /* funky encoding for LSB of green */
1237       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1238          for (i = 0; i < n_comp; i++) {
1239             vec[3][i] = input[minColR][i];
1240             vec[2][i] = input[maxColR][i];
1241          }
1242          lohi = ~lohi;
1243       }
1244
1245       cc[1] = lohi;
1246    }
1247
1248    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1249    for (j = 2 * 2 - 1; j >= 0; j--) {
1250       for (i = 0; i < n_comp; i++) {
1251          /* add in colors */
1252          FX64_SHL(hi, 5);
1253          FX64_OR32(hi, vec[j][i] >> 3);
1254       }
1255    }
1256    ((Fx64 *)cc)[1] = hi;
1257 }
1258
1259
1260 static void
1261 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1262 {
1263    GLint trualpha;
1264    GLubyte reord[N_TEXELS][MAX_COMP];
1265
1266    GLubyte input[N_TEXELS][MAX_COMP];
1267    GLint i, k, l;
1268
1269    if (comps == 3) {
1270       /* make the whole block opaque */
1271       memset(input, -1, sizeof(input));
1272    }
1273
1274    /* 8 texels each line */
1275    for (l = 0; l < 4; l++) {
1276       for (k = 0; k < 4; k++) {
1277          for (i = 0; i < comps; i++) {
1278             input[k + l * 4][i] = *lines[l]++;
1279          }
1280       }
1281       for (; k < 8; k++) {
1282          for (i = 0; i < comps; i++) {
1283             input[k + l * 4 + 12][i] = *lines[l]++;
1284          }
1285       }
1286    }
1287
1288    /* block layout:
1289     * 00, 01, 02, 03, 08, 09, 0a, 0b
1290     * 10, 11, 12, 13, 18, 19, 1a, 1b
1291     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1292     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1293     */
1294
1295    /* [dBorca]
1296     * stupidity flows forth from this
1297     */
1298    l = N_TEXELS;
1299    trualpha = 0;
1300    if (comps == 4) {
1301       /* skip all transparent black texels */
1302       l = 0;
1303       for (k = 0; k < N_TEXELS; k++) {
1304          /* test all components against 0 */
1305          if (!ISTBLACK(input[k])) {
1306             /* texel is not transparent black */
1307             COPY_4UBV(reord[l], input[k]);
1308             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1309                /* non-opaque texel */
1310                trualpha = !0;
1311             }
1312             l++;
1313          }
1314       }
1315    }
1316
1317 #if 0
1318    if (trualpha) {
1319       fxt1_quantize_ALPHA0(cc, input, reord, l);
1320    } else if (l == 0) {
1321       cc[0] = cc[1] = cc[2] = -1;
1322       cc[3] = 0;
1323    } else if (l < N_TEXELS) {
1324       fxt1_quantize_HI(cc, input, reord, l);
1325    } else {
1326       fxt1_quantize_CHROMA(cc, input);
1327    }
1328    (void)fxt1_quantize_ALPHA1;
1329    (void)fxt1_quantize_MIXED1;
1330    (void)fxt1_quantize_MIXED0;
1331 #else
1332    if (trualpha) {
1333       fxt1_quantize_ALPHA1(cc, input);
1334    } else if (l == 0) {
1335       cc[0] = cc[1] = cc[2] = ~0u;
1336       cc[3] = 0;
1337    } else if (l < N_TEXELS) {
1338       fxt1_quantize_MIXED1(cc, input);
1339    } else {
1340       fxt1_quantize_MIXED0(cc, input);
1341    }
1342    (void)fxt1_quantize_ALPHA0;
1343    (void)fxt1_quantize_HI;
1344    (void)fxt1_quantize_CHROMA;
1345 #endif
1346 }
1347
1348
1349 static void
1350 fxt1_encode (GLuint width, GLuint height, GLint comps,
1351              const void *source, GLint srcRowStride,
1352              void *dest, GLint destRowStride)
1353 {
1354    GLuint x, y;
1355    const GLubyte *data;
1356    GLuint *encoded = (GLuint *)dest;
1357    void *newSource = NULL;
1358
1359    assert(comps == 3 || comps == 4);
1360
1361    /* Replicate image if width is not M8 or height is not M4 */
1362    if ((width & 7) | (height & 3)) {
1363       GLint newWidth = (width + 7) & ~7;
1364       GLint newHeight = (height + 3) & ~3;
1365       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1366       if (!newSource) {
1367          GET_CURRENT_CONTEXT(ctx);
1368          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1369          goto cleanUp;
1370       }
1371       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1372                                comps, (const GLchan *) source,
1373                                srcRowStride, (GLchan *) newSource);
1374       source = newSource;
1375       width = newWidth;
1376       height = newHeight;
1377       srcRowStride = comps * newWidth;
1378    }
1379
1380    /* convert from 16/32-bit channels to GLubyte if needed */
1381    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1382       const GLuint n = width * height * comps;
1383       const GLchan *src = (const GLchan *) source;
1384       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1385       GLuint i;
1386       if (!dest) {
1387          GET_CURRENT_CONTEXT(ctx);
1388          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1389          goto cleanUp;
1390       }
1391       for (i = 0; i < n; i++) {
1392          dest[i] = CHAN_TO_UBYTE(src[i]);
1393       }
1394       if (newSource != NULL) {
1395          _mesa_free(newSource);
1396       }
1397       newSource = dest;  /* we'll free this buffer before returning */
1398       source = dest;  /* the new, GLubyte incoming image */
1399    }
1400
1401    data = (const GLubyte *) source;
1402    destRowStride = (destRowStride - width * 2) / 4;
1403    for (y = 0; y < height; y += 4) {
1404       GLuint offs = 0 + (y + 0) * srcRowStride;
1405       for (x = 0; x < width; x += 8) {
1406          const GLubyte *lines[4];
1407          lines[0] = &data[offs];
1408          lines[1] = lines[0] + srcRowStride;
1409          lines[2] = lines[1] + srcRowStride;
1410          lines[3] = lines[2] + srcRowStride;
1411          offs += 8 * comps;
1412          fxt1_quantize(encoded, lines, comps);
1413          /* 128 bits per 8x4 block */
1414          encoded += 4;
1415       }
1416       encoded += destRowStride;
1417    }
1418
1419  cleanUp:
1420    if (newSource != NULL) {
1421       _mesa_free(newSource);
1422    }
1423 }
1424
1425
1426 /***************************************************************************\
1427  * FXT1 decoder
1428  *
1429  * The decoder is based on GL_3DFX_texture_compression_FXT1
1430  * specification and serves as a concept for the encoder.
1431 \***************************************************************************/
1432
1433
1434 /* lookup table for scaling 5 bit colors up to 8 bits */
1435 static const GLubyte _rgb_scale_5[] = {
1436    0,   8,   16,  25,  33,  41,  49,  58,
1437    66,  74,  82,  90,  99,  107, 115, 123,
1438    132, 140, 148, 156, 165, 173, 181, 189,
1439    197, 206, 214, 222, 230, 239, 247, 255
1440 };
1441
1442 /* lookup table for scaling 6 bit colors up to 8 bits */
1443 static const GLubyte _rgb_scale_6[] = {
1444    0,   4,   8,   12,  16,  20,  24,  28,
1445    32,  36,  40,  45,  49,  53,  57,  61,
1446    65,  69,  73,  77,  81,  85,  89,  93,
1447    97,  101, 105, 109, 113, 117, 121, 125,
1448    130, 134, 138, 142, 146, 150, 154, 158,
1449    162, 166, 170, 174, 178, 182, 186, 190,
1450    194, 198, 202, 206, 210, 215, 219, 223,
1451    227, 231, 235, 239, 243, 247, 251, 255
1452 };
1453
1454
1455 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1456 #define UP5(c) _rgb_scale_5[(c) & 31]
1457 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1458 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1459
1460
1461 static void
1462 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1463 {
1464    const GLuint *cc;
1465
1466    t *= 3;
1467    cc = (const GLuint *)(code + t / 8);
1468    t = (cc[0] >> (t & 7)) & 7;
1469
1470    if (t == 7) {
1471       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1472    } else {
1473       GLubyte r, g, b;
1474       cc = (const GLuint *)(code + 12);
1475       if (t == 0) {
1476          b = UP5(CC_SEL(cc, 0));
1477          g = UP5(CC_SEL(cc, 5));
1478          r = UP5(CC_SEL(cc, 10));
1479       } else if (t == 6) {
1480          b = UP5(CC_SEL(cc, 15));
1481          g = UP5(CC_SEL(cc, 20));
1482          r = UP5(CC_SEL(cc, 25));
1483       } else {
1484          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1485          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1486          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1487       }
1488       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1489       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1490       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1491       rgba[ACOMP] = CHAN_MAX;
1492    }
1493 }
1494
1495
1496 static void
1497 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1498 {
1499    const GLuint *cc;
1500    GLuint kk;
1501
1502    cc = (const GLuint *)code;
1503    if (t & 16) {
1504       cc++;
1505       t &= 15;
1506    }
1507    t = (cc[0] >> (t * 2)) & 3;
1508
1509    t *= 15;
1510    cc = (const GLuint *)(code + 8 + t / 8);
1511    kk = cc[0] >> (t & 7);
1512    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1513    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1514    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1515    rgba[ACOMP] = CHAN_MAX;
1516 }
1517
1518
1519 static void
1520 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1521 {
1522    const GLuint *cc;
1523    GLuint col[2][3];
1524    GLint glsb, selb;
1525
1526    cc = (const GLuint *)code;
1527    if (t & 16) {
1528       t &= 15;
1529       t = (cc[1] >> (t * 2)) & 3;
1530       /* col 2 */
1531       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1532       col[0][GCOMP] = CC_SEL(cc, 99);
1533       col[0][RCOMP] = CC_SEL(cc, 104);
1534       /* col 3 */
1535       col[1][BCOMP] = CC_SEL(cc, 109);
1536       col[1][GCOMP] = CC_SEL(cc, 114);
1537       col[1][RCOMP] = CC_SEL(cc, 119);
1538       glsb = CC_SEL(cc, 126);
1539       selb = CC_SEL(cc, 33);
1540    } else {
1541       t = (cc[0] >> (t * 2)) & 3;
1542       /* col 0 */
1543       col[0][BCOMP] = CC_SEL(cc, 64);
1544       col[0][GCOMP] = CC_SEL(cc, 69);
1545       col[0][RCOMP] = CC_SEL(cc, 74);
1546       /* col 1 */
1547       col[1][BCOMP] = CC_SEL(cc, 79);
1548       col[1][GCOMP] = CC_SEL(cc, 84);
1549       col[1][RCOMP] = CC_SEL(cc, 89);
1550       glsb = CC_SEL(cc, 125);
1551       selb = CC_SEL(cc, 1);
1552    }
1553
1554    if (CC_SEL(cc, 124) & 1) {
1555       /* alpha[0] == 1 */
1556
1557       if (t == 3) {
1558          /* zero */
1559          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1560       } else {
1561          GLubyte r, g, b;
1562          if (t == 0) {
1563             b = UP5(col[0][BCOMP]);
1564             g = UP5(col[0][GCOMP]);
1565             r = UP5(col[0][RCOMP]);
1566          } else if (t == 2) {
1567             b = UP5(col[1][BCOMP]);
1568             g = UP6(col[1][GCOMP], glsb);
1569             r = UP5(col[1][RCOMP]);
1570          } else {
1571             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1572             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1573             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1574          }
1575          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1576          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1577          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1578          rgba[ACOMP] = CHAN_MAX;
1579       }
1580    } else {
1581       /* alpha[0] == 0 */
1582       GLubyte r, g, b;
1583       if (t == 0) {
1584          b = UP5(col[0][BCOMP]);
1585          g = UP6(col[0][GCOMP], glsb ^ selb);
1586          r = UP5(col[0][RCOMP]);
1587       } else if (t == 3) {
1588          b = UP5(col[1][BCOMP]);
1589          g = UP6(col[1][GCOMP], glsb);
1590          r = UP5(col[1][RCOMP]);
1591       } else {
1592          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1593          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1594                         UP6(col[1][GCOMP], glsb));
1595          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1596       }
1597       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1598       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1599       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1600       rgba[ACOMP] = CHAN_MAX;
1601    }
1602 }
1603
1604
1605 static void
1606 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1607 {
1608    const GLuint *cc;
1609    GLubyte r, g, b, a;
1610
1611    cc = (const GLuint *)code;
1612    if (CC_SEL(cc, 124) & 1) {
1613       /* lerp == 1 */
1614       GLuint col0[4];
1615
1616       if (t & 16) {
1617          t &= 15;
1618          t = (cc[1] >> (t * 2)) & 3;
1619          /* col 2 */
1620          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1621          col0[GCOMP] = CC_SEL(cc, 99);
1622          col0[RCOMP] = CC_SEL(cc, 104);
1623          col0[ACOMP] = CC_SEL(cc, 119);
1624       } else {
1625          t = (cc[0] >> (t * 2)) & 3;
1626          /* col 0 */
1627          col0[BCOMP] = CC_SEL(cc, 64);
1628          col0[GCOMP] = CC_SEL(cc, 69);
1629          col0[RCOMP] = CC_SEL(cc, 74);
1630          col0[ACOMP] = CC_SEL(cc, 109);
1631       }
1632
1633       if (t == 0) {
1634          b = UP5(col0[BCOMP]);
1635          g = UP5(col0[GCOMP]);
1636          r = UP5(col0[RCOMP]);
1637          a = UP5(col0[ACOMP]);
1638       } else if (t == 3) {
1639          b = UP5(CC_SEL(cc, 79));
1640          g = UP5(CC_SEL(cc, 84));
1641          r = UP5(CC_SEL(cc, 89));
1642          a = UP5(CC_SEL(cc, 114));
1643       } else {
1644          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1645          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1646          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1647          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1648       }
1649    } else {
1650       /* lerp == 0 */
1651
1652       if (t & 16) {
1653          cc++;
1654          t &= 15;
1655       }
1656       t = (cc[0] >> (t * 2)) & 3;
1657
1658       if (t == 3) {
1659          /* zero */
1660          r = g = b = 0;
1661       } else {
1662          GLuint kk;
1663          cc = (const GLuint *)code;
1664          a = UP5(cc[3] >> (t * 5 + 13));
1665          t *= 15;
1666          cc = (const GLuint *)(code + 8 + t / 8);
1667          kk = cc[0] >> (t & 7);
1668          b = UP5(kk);
1669          g = UP5(kk >> 5);
1670          r = UP5(kk >> 10);
1671       }
1672    }
1673    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1674    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1675    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1676    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1677 }
1678
1679
1680 static void
1681 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1682                GLint i, GLint j, GLchan *rgba)
1683 {
1684    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1685       fxt1_decode_1HI,     /* cc-high   = "00?" */
1686       fxt1_decode_1HI,     /* cc-high   = "00?" */
1687       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1688       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1689       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1690       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1691       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1692       fxt1_decode_1MIXED   /* mixed     = "1??" */
1693    };
1694
1695    const GLubyte *code = (const GLubyte *)texture +
1696                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1697    GLint mode = CC_SEL(code, 125);
1698    GLint t = i & 7;
1699
1700    if (t & 4) {
1701       t += 12;
1702    }
1703    t += (j & 3) * 4;
1704
1705    decode_1[mode](code, t, rgba);
1706 }