src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.1
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "texcompress.h"
  39 #include "texformat.h"
  40 #include "texstore.h"
  41
  42
  43 int
  44 fxt1_encode (unsigned int width, unsigned int height, int comps,
  45              const void *source, int srcRowStride,
  46              void *dest, int destRowStride);
  47 void
  48 fxt1_decode_1 (const void *texture, int stride,
  49                int i, int j, unsigned char *rgba);
  50
  51
  52 /**
  53  * Called during context initialization.
  54  */
  55 void
  56 _mesa_init_texture_fxt1( GLcontext *ctx )
  57 {
  58    (void) ctx;
  59 }
  60
  61
  62 /**
  63  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  64  */
  65 static GLboolean
  66 texstore_rgb_fxt1(STORE_PARAMS)
  67 {
  68    const GLchan *pixels;
  69    GLint srcRowStride;
  70    GLubyte *dst;
  71    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  72    const GLchan *tempImage = NULL;
  73
  74    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  75    ASSERT(dstXoffset % 8 == 0);
  76    ASSERT(dstYoffset % 4 == 0);
  77    ASSERT(dstZoffset     == 0);
  78    (void) dstZoffset; (void) dstImageStride;
  79
  80    if (srcFormat != GL_RGB ||
  81        srcType != CHAN_TYPE ||
  82        ctx->_ImageTransferState ||
  83        srcPacking->SwapBytes) {
  84       /* convert image to RGB/GLchan */
  85       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  86                                              baseInternalFormat,
  87                                              dstFormat->BaseFormat,
  88                                              srcWidth, srcHeight, srcDepth,
  89                                              srcFormat, srcType, srcAddr,
  90                                              srcPacking);
  91       if (!tempImage)
  92          return GL_FALSE; /* out of memory */
  93       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  94       pixels = tempImage;
  95       srcRowStride = 3 * srcWidth;
  96       srcFormat = GL_RGB;
  97    }
  98    else {
  99       pixels = (const GLchan *) srcAddr;
 100       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 101                                             srcType) / sizeof(GLchan);
 102    }
 103
 104    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 105                                         GL_COMPRESSED_RGB_FXT1_3DFX,
 106                                         texWidth, (GLubyte *) dstAddr);
 107
 108    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 109                dst, dstRowStride);
 110
 111    if (tempImage)
 112       _mesa_free((void*) tempImage);
 113
 114    return GL_TRUE;
 115 }
 116
 117
 118 /**
 119  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 120  */
 121 static GLboolean
 122 texstore_rgba_fxt1(STORE_PARAMS)
 123 {
 124    const GLchan *pixels;
 125    GLint srcRowStride;
 126    GLubyte *dst;
 127    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 128    const GLchan *tempImage = NULL;
 129
 130    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 131    ASSERT(dstXoffset % 8 == 0);
 132    ASSERT(dstYoffset % 4 == 0);
 133    ASSERT(dstZoffset     == 0);
 134    (void) dstZoffset; (void) dstImageStride;
 135
 136    if (srcFormat != GL_RGBA ||
 137        srcType != CHAN_TYPE ||
 138        ctx->_ImageTransferState ||
 139        srcPacking->SwapBytes) {
 140       /* convert image to RGBA/GLchan */
 141       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 142                                              baseInternalFormat,
 143                                              dstFormat->BaseFormat,
 144                                              srcWidth, srcHeight, srcDepth,
 145                                              srcFormat, srcType, srcAddr,
 146                                              srcPacking);
 147       if (!tempImage)
 148          return GL_FALSE; /* out of memory */
 149       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 150       pixels = tempImage;
 151       srcRowStride = 4 * srcWidth;
 152       srcFormat = GL_RGBA;
 153    }
 154    else {
 155       pixels = (const GLchan *) srcAddr;
 156       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 157                                             srcType) / sizeof(GLchan);
 158    }
 159
 160    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 161                                         GL_COMPRESSED_RGBA_FXT1_3DFX,
 162                                         texWidth, (GLubyte *) dstAddr);
 163
 164    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 165                dst, dstRowStride);
 166
 167    if (tempImage)
 168       _mesa_free((void*) tempImage);
 169
 170    return GL_TRUE;
 171 }
 172
 173
 174 static void
 175 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 176                           GLint i, GLint j, GLint k, GLchan *texel )
 177 {
 178    (void) k;
 179    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 180 }
 181
 182
 183 static void
 184 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 185                             GLint i, GLint j, GLint k, GLfloat *texel )
 186 {
 187    /* just sample as GLchan and convert to float here */
 188    GLchan rgba[4];
 189    (void) k;
 190    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 191    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 192    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 193    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 194    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 195 }
 196
 197
 198 static void
 199 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 200                          GLint i, GLint j, GLint k, GLchan *texel )
 201 {
 202    (void) k;
 203    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 204    texel[ACOMP] = 255;
 205 }
 206
 207
 208 static void
 209 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 210                            GLint i, GLint j, GLint k, GLfloat *texel )
 211 {
 212    /* just sample as GLchan and convert to float here */
 213    GLchan rgba[4];
 214    (void) k;
 215    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 216    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 217    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 218    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 219    texel[ACOMP] = 1.0;
 220 }
 221
 222
 223
 224 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 225    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 226    GL_RGB,                              /* BaseFormat */
 227    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 228    4, /*approx*/                        /* RedBits */
 229    4, /*approx*/                        /* GreenBits */
 230    4, /*approx*/                        /* BlueBits */
 231    0,                                   /* AlphaBits */
 232    0,                                   /* LuminanceBits */
 233    0,                                   /* IntensityBits */
 234    0,                                   /* IndexBits */
 235    0,                                   /* DepthBits */
 236    0,                                   /* TexelBytes */
 237    texstore_rgb_fxt1,                   /* StoreTexImageFunc */
 238    NULL, /*impossible*/                 /* FetchTexel1D */
 239    fetch_texel_2d_rgb_fxt1,             /* FetchTexel2D */
 240    NULL, /*impossible*/                 /* FetchTexel3D */
 241    NULL, /*impossible*/                 /* FetchTexel1Df */
 242    fetch_texel_2d_f_rgb_fxt1,           /* FetchTexel2Df */
 243    NULL, /*impossible*/                 /* FetchTexel3Df */
 244 };
 245
 246 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 247    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 248    GL_RGBA,                             /* BaseFormat */
 249    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 250    4, /*approx*/                        /* RedBits */
 251    4, /*approx*/                        /* GreenBits */
 252    4, /*approx*/                        /* BlueBits */
 253    1, /*approx*/                        /* AlphaBits */
 254    0,                                   /* LuminanceBits */
 255    0,                                   /* IntensityBits */
 256    0,                                   /* IndexBits */
 257    0,                                   /* DepthBits */
 258    0,                                   /* TexelBytes */
 259    texstore_rgba_fxt1,                  /* StoreTexImageFunc */
 260    NULL, /*impossible*/                 /* FetchTexel1D */
 261    fetch_texel_2d_rgba_fxt1,            /* FetchTexel2D */
 262    NULL, /*impossible*/                 /* FetchTexel3D */
 263    NULL, /*impossible*/                 /* FetchTexel1Df */
 264    fetch_texel_2d_f_rgba_fxt1,          /* FetchTexel2Df */
 265    NULL, /*impossible*/                 /* FetchTexel3Df */
 266 };
 267
 268
 269 /***************************************************************************\
 270  * FXT1 encoder
 271  *
 272  * The encoder was built by reversing the decoder,
 273  * and is vaguely based on Texus2 by 3dfx. Note that this code
 274  * is merely a proof of concept, since it is highly UNoptimized;
 275  * moreover, it is sub-optimal due to initial conditions passed
 276  * to Lloyd's algorithm (the interpolation modes are even worse).
 277 \***************************************************************************/
 278
 279
 280 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 281 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 282 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 283 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 284 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 285 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 286 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 287 #define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
 288
 289
 290 #ifdef __GNUC__
 291
 292 #define FX64_NATIVE 1
 293
 294 typedef unsigned long long Fx64;
 295
 296 #define FX64_MOV32(a, b) a = b
 297 #define FX64_OR32(a, b)  a |= b
 298 #define FX64_SHL(a, c)   a <<= c
 299
 300 #else  /* !__GNUC__ */
 301
 302 #define FX64_NATIVE 0
 303
 304 typedef struct {
 305         unsigned long lo, hi;
 306 } Fx64;
 307
 308 #define FX64_MOV32(a, b) a.lo = b
 309 #define FX64_OR32(a, b)  a.lo |= b
 310
 311 #define FX64_SHL(a, c)                                 \
 312    do {                                                \
 313        if ((c) >= 32) {                                \
 314           a.hi = a.lo << ((c) - 32);                   \
 315           a.lo = 0;                                    \
 316        } else {                                        \
 317           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 318           a.lo <<= (c);                                \
 319        }                                               \
 320    } while (0)
 321
 322 #endif /* !__GNUC__ */
 323
 324
 325 #define F(i) 1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 326 #define SAFECDOT 1 /* for paranoids */
 327
 328 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 329    do {                                  \
 330       /* compute interpolation vector */ \
 331       float d2 = 0;                      \
 332       float rd2;                         \
 333                                          \
 334       for (i = 0; i < NC; i++) {         \
 335          IV[i] = (V1[i] - V0[i]) * F(i); \
 336          d2 += IV[i] * IV[i];            \
 337       }                                  \
 338       rd2 = (float)NV / d2;              \
 339       B = 0;                             \
 340       for (i = 0; i < NC; i++) {         \
 341          IV[i] *= F(i);                  \
 342          B -= IV[i] * V0[i];             \
 343          IV[i] *= rd2;                   \
 344       }                                  \
 345       B = B * rd2 + 0.5f;                \
 346    } while (0)
 347
 348 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 349    do {                                  \
 350       float dot = 0;                     \
 351       for (i = 0; i < NC; i++) {         \
 352          dot += V[i] * IV[i];            \
 353       }                                  \
 354       TEXEL = (int)(dot + B);            \
 355       if (SAFECDOT) {                    \
 356          if (TEXEL < 0) {                \
 357             TEXEL = 0;                   \
 358          } else if (TEXEL > NV) {        \
 359             TEXEL = NV;                  \
 360          }                               \
 361       }                                  \
 362    } while (0)
 363
 364
 365 static int
 366 fxt1_bestcol (float vec[][MAX_COMP], int nv,
 367               unsigned char input[MAX_COMP], int nc)
 368 {
 369    int i, j, best = -1;
 370    float err = 1e9; /* big enough */
 371
 372    for (j = 0; j < nv; j++) {
 373       float e = 0;
 374       for (i = 0; i < nc; i++) {
 375          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 376       }
 377       if (e < err) {
 378          err = e;
 379          best = j;
 380       }
 381    }
 382
 383    return best;
 384 }
 385
 386
 387 static int
 388 fxt1_worst (float vec[MAX_COMP],
 389             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 390 {
 391    int i, k, worst = -1;
 392    float err = -1; /* small enough */
 393
 394    for (k = 0; k < n; k++) {
 395       float e = 0;
 396       for (i = 0; i < nc; i++) {
 397          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 398       }
 399       if (e > err) {
 400          err = e;
 401          worst = k;
 402       }
 403    }
 404
 405    return worst;
 406 }
 407
 408
 409 static int
 410 fxt1_variance (double variance[MAX_COMP],
 411                unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 412 {
 413    int i, k, best = 0;
 414    int sx, sx2;
 415    double var, maxvar = -1; /* small enough */
 416    double teenth = 1.0 / n;
 417
 418    for (i = 0; i < nc; i++) {
 419       sx = sx2 = 0;
 420       for (k = 0; k < n; k++) {
 421          int t = input[k][i];
 422          sx += t;
 423          sx2 += t * t;
 424       }
 425       var = sx2 * teenth - sx * sx * teenth * teenth;
 426       if (maxvar < var) {
 427          maxvar = var;
 428          best = i;
 429       }
 430       if (variance) {
 431          variance[i] = var;
 432       }
 433    }
 434
 435    return best;
 436 }
 437
 438
 439 static int
 440 fxt1_choose (float vec[][MAX_COMP], int nv,
 441             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 442 {
 443 #if 0
 444    /* Choose colors from a grid.
 445     */
 446    int i, j;
 447
 448    for (j = 0; j < nv; j++) {
 449       int m = j * (n - 1) / (nv - 1);
 450       for (i = 0; i < nc; i++) {
 451          vec[j][i] = input[m][i];
 452       }
 453    }
 454 #else
 455    /* Our solution here is to find the darkest and brightest colors in
 456     * the 8x4 tile and use those as the two representative colors.
 457     * There are probably better algorithms to use (histogram-based).
 458     */
 459    int i, j, k;
 460    int minSum = 2000; /* big enough */
 461    int maxSum = -1; /* small enough */
 462    int minCol = 0; /* phoudoin: silent compiler! */
 463    int maxCol = 0; /* phoudoin: silent compiler! */
 464
 465    struct {
 466       int flag;
 467       int key;
 468       int freq;
 469       int idx;
 470    } hist[N_TEXELS];
 471    int lenh = 0;
 472
 473    memset(hist, 0, sizeof(hist));
 474
 475    for (k = 0; k < n; k++) {
 476       int l;
 477       int key = 0;
 478       int sum = 0;
 479       for (i = 0; i < nc; i++) {
 480          key <<= 8;
 481          key |= input[k][i];
 482          sum += input[k][i];
 483       }
 484       for (l = 0; l < n; l++) {
 485          if (!hist[l].flag) {
 486             /* alloc new slot */
 487             hist[l].flag = !0;
 488             hist[l].key = key;
 489             hist[l].freq = 1;
 490             hist[l].idx = k;
 491             lenh = l + 1;
 492             break;
 493          } else if (hist[l].key == key) {
 494             hist[l].freq++;
 495             break;
 496          }
 497       }
 498       if (minSum > sum) {
 499          minSum = sum;
 500          minCol = k;
 501       }
 502       if (maxSum < sum) {
 503          maxSum = sum;
 504          maxCol = k;
 505       }
 506    }
 507
 508    if (lenh <= nv) {
 509       for (j = 0; j < lenh; j++) {
 510          for (i = 0; i < nc; i++) {
 511             vec[j][i] = (float)input[hist[j].idx][i];
 512          }
 513       }
 514       for (; j < nv; j++) {
 515          for (i = 0; i < nc; i++) {
 516             vec[j][i] = vec[0][i];
 517          }
 518       }
 519       return 0;
 520    }
 521
 522    for (j = 0; j < nv; j++) {
 523       for (i = 0; i < nc; i++) {
 524          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
 525       }
 526    }
 527 #endif
 528
 529    return !0;
 530 }
 531
 532
 533 static int
 534 fxt1_lloyd (float vec[][MAX_COMP], int nv,
 535             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 536 {
 537    /* Use the generalized lloyd's algorithm for VQ:
 538     *     find 4 color vectors.
 539     *
 540     *     for each sample color
 541     *         sort to nearest vector.
 542     *
 543     *     replace each vector with the centroid of it's matching colors.
 544     *
 545     *     repeat until RMS doesn't improve.
 546     *
 547     *     if a color vector has no samples, or becomes the same as another
 548     *     vector, replace it with the color which is farthest from a sample.
 549     *
 550     * vec[][MAX_COMP]           initial vectors and resulting colors
 551     * nv                        number of resulting colors required
 552     * input[N_TEXELS][MAX_COMP] input texels
 553     * nc                        number of components in input / vec
 554     * n                         number of input samples
 555     */
 556
 557    int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 558    int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 559    float error, lasterror = 1e9;
 560
 561    int i, j, k, rep;
 562
 563    /* the quantizer */
 564    for (rep = 0; rep < LL_N_REP; rep++) {
 565       /* reset sums & counters */
 566       for (j = 0; j < nv; j++) {
 567          for (i = 0; i < nc; i++) {
 568             sum[j][i] = 0;
 569          }
 570          cnt[j] = 0;
 571       }
 572       error = 0;
 573
 574       /* scan whole block */
 575       for (k = 0; k < n; k++) {
 576 #if 1
 577          int best = -1;
 578          float err = 1e9; /* big enough */
 579          /* determine best vector */
 580          for (j = 0; j < nv; j++) {
 581             float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 582                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 583                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 584             if (nc == 4) {
 585                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 586             }
 587             if (e < err) {
 588                err = e;
 589                best = j;
 590             }
 591          }
 592 #else
 593          int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 594 #endif
 595          /* add in closest color */
 596          for (i = 0; i < nc; i++) {
 597             sum[best][i] += input[k][i];
 598          }
 599          /* mark this vector as used */
 600          cnt[best]++;
 601          /* accumulate error */
 602          error += err;
 603       }
 604
 605       /* check RMS */
 606       if ((error < LL_RMS_E) ||
 607           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 608          return !0; /* good match */
 609       }
 610       lasterror = error;
 611
 612       /* move each vector to the barycenter of its closest colors */
 613       for (j = 0; j < nv; j++) {
 614          if (cnt[j]) {
 615             float div = 1.0F / cnt[j];
 616             for (i = 0; i < nc; i++) {
 617                vec[j][i] = div * sum[j][i];
 618             }
 619          } else {
 620             /* this vec has no samples or is identical with a previous vec */
 621             int worst = fxt1_worst(vec[j], input, nc, n);
 622             for (i = 0; i < nc; i++) {
 623                vec[j][i] = input[worst][i];
 624             }
 625          }
 626       }
 627    }
 628
 629    return 0; /* could not converge fast enough */
 630 }
 631
 632
 633 static void
 634 fxt1_quantize_CHROMA (unsigned long *cc,
 635                       unsigned char input[N_TEXELS][MAX_COMP])
 636 {
 637    const int n_vect = 4; /* 4 base vectors to find */
 638    const int n_comp = 3; /* 3 components: R, G, B */
 639    float vec[MAX_VECT][MAX_COMP];
 640    int i, j, k;
 641    Fx64 hi; /* high quadword */
 642    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 643
 644    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 645       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 646    }
 647
 648    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 649    for (j = n_vect - 1; j >= 0; j--) {
 650       for (i = 0; i < n_comp; i++) {
 651          /* add in colors */
 652          FX64_SHL(hi, 5);
 653          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 654       }
 655    }
 656    ((Fx64 *)cc)[1] = hi;
 657
 658    lohi = lolo = 0;
 659    /* right microtile */
 660    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 661       lohi <<= 2;
 662       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 663    }
 664    /* left microtile */
 665    for (; k >= 0; k--) {
 666       lolo <<= 2;
 667       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 668    }
 669    cc[1] = lohi;
 670    cc[0] = lolo;
 671 }
 672
 673
 674 static void
 675 fxt1_quantize_ALPHA0 (unsigned long *cc,
 676                       unsigned char input[N_TEXELS][MAX_COMP],
 677                       unsigned char reord[N_TEXELS][MAX_COMP], int n)
 678 {
 679    const int n_vect = 3; /* 3 base vectors to find */
 680    const int n_comp = 4; /* 4 components: R, G, B, A */
 681    float vec[MAX_VECT][MAX_COMP];
 682    int i, j, k;
 683    Fx64 hi; /* high quadword */
 684    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 685
 686    /* the last vector indicates zero */
 687    for (i = 0; i < n_comp; i++) {
 688       vec[n_vect][i] = 0;
 689    }
 690
 691    /* the first n texels in reord are guaranteed to be non-zero */
 692    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 693       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 694    }
 695
 696    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 697    for (j = n_vect - 1; j >= 0; j--) {
 698       /* add in alphas */
 699       FX64_SHL(hi, 5);
 700       FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
 701    }
 702    for (j = n_vect - 1; j >= 0; j--) {
 703       for (i = 0; i < n_comp - 1; i++) {
 704          /* add in colors */
 705          FX64_SHL(hi, 5);
 706          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 707       }
 708    }
 709    ((Fx64 *)cc)[1] = hi;
 710
 711    lohi = lolo = 0;
 712    /* right microtile */
 713    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 714       lohi <<= 2;
 715       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 716    }
 717    /* left microtile */
 718    for (; k >= 0; k--) {
 719       lolo <<= 2;
 720       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 721    }
 722    cc[1] = lohi;
 723    cc[0] = lolo;
 724 }
 725
 726
 727 static void
 728 fxt1_quantize_ALPHA1 (unsigned long *cc,
 729                       unsigned char input[N_TEXELS][MAX_COMP])
 730 {
 731    const int n_vect = 3; /* highest vector number in each microtile */
 732    const int n_comp = 4; /* 4 components: R, G, B, A */
 733    float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 734    float b, iv[MAX_COMP]; /* interpolation vector */
 735    int i, j, k;
 736    Fx64 hi; /* high quadword */
 737    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 738
 739    int minSum;
 740    int maxSum;
 741    int minColL = 0, maxColL = 0;
 742    int minColR = 0, maxColR = 0;
 743    int sumL = 0, sumR = 0;
 744
 745    /* Our solution here is to find the darkest and brightest colors in
 746     * the 4x4 tile and use those as the two representative colors.
 747     * There are probably better algorithms to use (histogram-based).
 748     */
 749    minSum = 2000; /* big enough */
 750    maxSum = -1; /* small enough */
 751    for (k = 0; k < N_TEXELS / 2; k++) {
 752       int sum = 0;
 753       for (i = 0; i < n_comp; i++) {
 754          sum += input[k][i];
 755       }
 756       if (minSum > sum) {
 757          minSum = sum;
 758          minColL = k;
 759       }
 760       if (maxSum < sum) {
 761          maxSum = sum;
 762          maxColL = k;
 763       }
 764       sumL += sum;
 765    }
 766    minSum = 2000; /* big enough */
 767    maxSum = -1; /* small enough */
 768    for (; k < N_TEXELS; k++) {
 769       int sum = 0;
 770       for (i = 0; i < n_comp; i++) {
 771          sum += input[k][i];
 772       }
 773       if (minSum > sum) {
 774          minSum = sum;
 775          minColR = k;
 776       }
 777       if (maxSum < sum) {
 778          maxSum = sum;
 779          maxColR = k;
 780       }
 781       sumR += sum;
 782    }
 783
 784    /* choose the common vector (yuck!) */
 785 {
 786    int j1, j2;
 787    int v1 = 0, v2 = 0;
 788    float err = 1e9; /* big enough */
 789    float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 790    for (i = 0; i < n_comp; i++) {
 791       tv[0][i] = input[minColL][i];
 792       tv[1][i] = input[maxColL][i];
 793       tv[2][i] = input[minColR][i];
 794       tv[3][i] = input[maxColR][i];
 795    }
 796    for (j1 = 0; j1 < 2; j1++) {
 797       for (j2 = 2; j2 < 4; j2++) {
 798           float e = 0;
 799           for (i = 0; i < n_comp; i++) {
 800              e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 801           }
 802           if (e < err) {
 803              err = e;
 804              v1 = j1;
 805              v2 = j2;
 806           }
 807       }
 808    }
 809    for (i = 0; i < n_comp; i++) {
 810       vec[0][i] = tv[1 - v1][i];
 811       vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 812       vec[2][i] = tv[5 - v2][i];
 813    }
 814 }
 815
 816    /* left microtile */
 817    cc[0] = 0;
 818    if (minColL != maxColL) {
 819       /* compute interpolation vector */
 820       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 821
 822       /* add in texels */
 823       lolo = 0;
 824       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 825          int texel;
 826          /* interpolate color */
 827          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 828          /* add in texel */
 829          lolo <<= 2;
 830          lolo |= texel;
 831       }
 832
 833       cc[0] = lolo;
 834    }
 835
 836    /* right microtile */
 837    cc[1] = 0;
 838    if (minColR != maxColR) {
 839       /* compute interpolation vector */
 840       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 841
 842       /* add in texels */
 843       lohi = 0;
 844       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 845          int texel;
 846          /* interpolate color */
 847          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 848          /* add in texel */
 849          lohi <<= 2;
 850          lohi |= texel;
 851       }
 852
 853       cc[1] = lohi;
 854    }
 855
 856    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 857    for (j = n_vect - 1; j >= 0; j--) {
 858       /* add in alphas */
 859       FX64_SHL(hi, 5);
 860       FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
 861    }
 862    for (j = n_vect - 1; j >= 0; j--) {
 863       for (i = 0; i < n_comp - 1; i++) {
 864          /* add in colors */
 865          FX64_SHL(hi, 5);
 866          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 867       }
 868    }
 869    ((Fx64 *)cc)[1] = hi;
 870 }
 871
 872
 873 static void
 874 fxt1_quantize_HI (unsigned long *cc,
 875                   unsigned char input[N_TEXELS][MAX_COMP],
 876                   unsigned char reord[N_TEXELS][MAX_COMP], int n)
 877 {
 878    const int n_vect = 6; /* highest vector number */
 879    const int n_comp = 3; /* 3 components: R, G, B */
 880    float b = 0.0;        /* phoudoin: silent compiler! */
 881    float iv[MAX_COMP];   /* interpolation vector */
 882    int i, k;
 883    unsigned long hihi; /* high quadword: hi dword */
 884
 885    int minSum = 2000; /* big enough */
 886    int maxSum = -1; /* small enough */
 887    int minCol = 0; /* phoudoin: silent compiler! */
 888    int maxCol = 0; /* phoudoin: silent compiler! */
 889
 890    /* Our solution here is to find the darkest and brightest colors in
 891     * the 8x4 tile and use those as the two representative colors.
 892     * There are probably better algorithms to use (histogram-based).
 893     */
 894    for (k = 0; k < n; k++) {
 895       int sum = 0;
 896       for (i = 0; i < n_comp; i++) {
 897          sum += reord[k][i];
 898       }
 899       if (minSum > sum) {
 900          minSum = sum;
 901          minCol = k;
 902       }
 903       if (maxSum < sum) {
 904          maxSum = sum;
 905          maxCol = k;
 906       }
 907    }
 908
 909    hihi = 0; /* cc-hi = "00" */
 910    for (i = 0; i < n_comp; i++) {
 911       /* add in colors */
 912       hihi <<= 5;
 913       hihi |= reord[maxCol][i] >> 3;
 914    }
 915    for (i = 0; i < n_comp; i++) {
 916       /* add in colors */
 917       hihi <<= 5;
 918       hihi |= reord[minCol][i] >> 3;
 919    }
 920    cc[3] = hihi;
 921    cc[0] = cc[1] = cc[2] = 0;
 922
 923    /* compute interpolation vector */
 924    if (minCol != maxCol) {
 925       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 926    }
 927
 928    /* add in texels */
 929    for (k = N_TEXELS - 1; k >= 0; k--) {
 930       int t = k * 3;
 931       unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
 932       int texel = n_vect + 1; /* transparent black */
 933
 934       if (!ISTBLACK(input[k])) {
 935          if (minCol != maxCol) {
 936             /* interpolate color */
 937             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 938             /* add in texel */
 939             kk[0] |= texel << (t & 7);
 940          }
 941       } else {
 942          /* add in texel */
 943          kk[0] |= texel << (t & 7);
 944       }
 945    }
 946 }
 947
 948
 949 static void
 950 fxt1_quantize_MIXED1 (unsigned long *cc,
 951                       unsigned char input[N_TEXELS][MAX_COMP])
 952 {
 953    const int n_vect = 2; /* highest vector number in each microtile */
 954    const int n_comp = 3; /* 3 components: R, G, B */
 955    unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 956    float b, iv[MAX_COMP]; /* interpolation vector */
 957    int i, j, k;
 958    Fx64 hi; /* high quadword */
 959    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 960
 961    int minSum;
 962    int maxSum;
 963    int minColL = 0, maxColL = -1;
 964    int minColR = 0, maxColR = -1;
 965
 966    /* Our solution here is to find the darkest and brightest colors in
 967     * the 4x4 tile and use those as the two representative colors.
 968     * There are probably better algorithms to use (histogram-based).
 969     */
 970    minSum = 2000; /* big enough */
 971    maxSum = -1; /* small enough */
 972    for (k = 0; k < N_TEXELS / 2; k++) {
 973       if (!ISTBLACK(input[k])) {
 974          int sum = 0;
 975          for (i = 0; i < n_comp; i++) {
 976             sum += input[k][i];
 977          }
 978          if (minSum > sum) {
 979             minSum = sum;
 980             minColL = k;
 981          }
 982          if (maxSum < sum) {
 983             maxSum = sum;
 984             maxColL = k;
 985          }
 986       }
 987    }
 988    minSum = 2000; /* big enough */
 989    maxSum = -1; /* small enough */
 990    for (; k < N_TEXELS; k++) {
 991       if (!ISTBLACK(input[k])) {
 992          int sum = 0;
 993          for (i = 0; i < n_comp; i++) {
 994             sum += input[k][i];
 995          }
 996          if (minSum > sum) {
 997             minSum = sum;
 998             minColR = k;
 999          }
1000          if (maxSum < sum) {
1001             maxSum = sum;
1002             maxColR = k;
1003          }
1004       }
1005    }
1006
1007    /* left microtile */
1008    if (maxColL == -1) {
1009       /* all transparent black */
1010       cc[0] = -1;
1011       for (i = 0; i < n_comp; i++) {
1012          vec[0][i] = 0;
1013          vec[1][i] = 0;
1014       }
1015    } else {
1016       cc[0] = 0;
1017       for (i = 0; i < n_comp; i++) {
1018          vec[0][i] = input[minColL][i];
1019          vec[1][i] = input[maxColL][i];
1020       }
1021       if (minColL != maxColL) {
1022          /* compute interpolation vector */
1023          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1024
1025          /* add in texels */
1026          lolo = 0;
1027          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1028             int texel = n_vect + 1; /* transparent black */
1029             if (!ISTBLACK(input[k])) {
1030                /* interpolate color */
1031                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1032             }
1033             /* add in texel */
1034             lolo <<= 2;
1035             lolo |= texel;
1036          }
1037          cc[0] = lolo;
1038       }
1039    }
1040
1041    /* right microtile */
1042    if (maxColR == -1) {
1043       /* all transparent black */
1044       cc[1] = -1;
1045       for (i = 0; i < n_comp; i++) {
1046          vec[2][i] = 0;
1047          vec[3][i] = 0;
1048       }
1049    } else {
1050       cc[1] = 0;
1051       for (i = 0; i < n_comp; i++) {
1052          vec[2][i] = input[minColR][i];
1053          vec[3][i] = input[maxColR][i];
1054       }
1055       if (minColR != maxColR) {
1056          /* compute interpolation vector */
1057          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1058
1059          /* add in texels */
1060          lohi = 0;
1061          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1062             int texel = n_vect + 1; /* transparent black */
1063             if (!ISTBLACK(input[k])) {
1064                /* interpolate color */
1065                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1066             }
1067             /* add in texel */
1068             lohi <<= 2;
1069             lohi |= texel;
1070          }
1071          cc[1] = lohi;
1072       }
1073    }
1074
1075    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1076    for (j = 2 * 2 - 1; j >= 0; j--) {
1077       for (i = 0; i < n_comp; i++) {
1078          /* add in colors */
1079          FX64_SHL(hi, 5);
1080          FX64_OR32(hi, vec[j][i] >> 3);
1081       }
1082    }
1083    ((Fx64 *)cc)[1] = hi;
1084 }
1085
1086
1087 static void
1088 fxt1_quantize_MIXED0 (unsigned long *cc,
1089                       unsigned char input[N_TEXELS][MAX_COMP])
1090 {
1091    const int n_vect = 3; /* highest vector number in each microtile */
1092    const int n_comp = 3; /* 3 components: R, G, B */
1093    unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1094    float b, iv[MAX_COMP]; /* interpolation vector */
1095    int i, j, k;
1096    Fx64 hi; /* high quadword */
1097    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1098
1099    int minColL = 0, maxColL = 0;
1100    int minColR = 0, maxColR = 0;
1101 #if 0
1102    int minSum;
1103    int maxSum;
1104
1105    /* Our solution here is to find the darkest and brightest colors in
1106     * the 4x4 tile and use those as the two representative colors.
1107     * There are probably better algorithms to use (histogram-based).
1108     */
1109    minSum = 2000; /* big enough */
1110    maxSum = -1; /* small enough */
1111    for (k = 0; k < N_TEXELS / 2; k++) {
1112       int sum = 0;
1113       for (i = 0; i < n_comp; i++) {
1114          sum += input[k][i];
1115       }
1116       if (minSum > sum) {
1117          minSum = sum;
1118          minColL = k;
1119       }
1120       if (maxSum < sum) {
1121          maxSum = sum;
1122          maxColL = k;
1123       }
1124    }
1125    minSum = 2000; /* big enough */
1126    maxSum = -1; /* small enough */
1127    for (; k < N_TEXELS; k++) {
1128       int sum = 0;
1129       for (i = 0; i < n_comp; i++) {
1130          sum += input[k][i];
1131       }
1132       if (minSum > sum) {
1133          minSum = sum;
1134          minColR = k;
1135       }
1136       if (maxSum < sum) {
1137          maxSum = sum;
1138          maxColR = k;
1139       }
1140    }
1141 #else
1142    int minVal;
1143    int maxVal;
1144    int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1145    int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1146
1147    /* Scan the channel with max variance for lo & hi
1148     * and use those as the two representative colors.
1149     */
1150    minVal = 2000; /* big enough */
1151    maxVal = -1; /* small enough */
1152    for (k = 0; k < N_TEXELS / 2; k++) {
1153       int t = input[k][maxVarL];
1154       if (minVal > t) {
1155          minVal = t;
1156          minColL = k;
1157       }
1158       if (maxVal < t) {
1159          maxVal = t;
1160          maxColL = k;
1161       }
1162    }
1163    minVal = 2000; /* big enough */
1164    maxVal = -1; /* small enough */
1165    for (; k < N_TEXELS; k++) {
1166       int t = input[k][maxVarR];
1167       if (minVal > t) {
1168          minVal = t;
1169          minColR = k;
1170       }
1171       if (maxVal < t) {
1172          maxVal = t;
1173          maxColR = k;
1174       }
1175    }
1176 #endif
1177
1178    /* left microtile */
1179    cc[0] = 0;
1180    for (i = 0; i < n_comp; i++) {
1181       vec[0][i] = input[minColL][i];
1182       vec[1][i] = input[maxColL][i];
1183    }
1184    if (minColL != maxColL) {
1185       /* compute interpolation vector */
1186       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1187
1188       /* add in texels */
1189       lolo = 0;
1190       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1191          int texel;
1192          /* interpolate color */
1193          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1194          /* add in texel */
1195          lolo <<= 2;
1196          lolo |= texel;
1197       }
1198
1199       /* funky encoding for LSB of green */
1200       if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1201          for (i = 0; i < n_comp; i++) {
1202             vec[1][i] = input[minColL][i];
1203             vec[0][i] = input[maxColL][i];
1204          }
1205          lolo = ~lolo;
1206       }
1207
1208       cc[0] = lolo;
1209    }
1210
1211    /* right microtile */
1212    cc[1] = 0;
1213    for (i = 0; i < n_comp; i++) {
1214       vec[2][i] = input[minColR][i];
1215       vec[3][i] = input[maxColR][i];
1216    }
1217    if (minColR != maxColR) {
1218       /* compute interpolation vector */
1219       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1220
1221       /* add in texels */
1222       lohi = 0;
1223       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1224          int texel;
1225          /* interpolate color */
1226          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1227          /* add in texel */
1228          lohi <<= 2;
1229          lohi |= texel;
1230       }
1231
1232       /* funky encoding for LSB of green */
1233       if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1234          for (i = 0; i < n_comp; i++) {
1235             vec[3][i] = input[minColR][i];
1236             vec[2][i] = input[maxColR][i];
1237          }
1238          lohi = ~lohi;
1239       }
1240
1241       cc[1] = lohi;
1242    }
1243
1244    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1245    for (j = 2 * 2 - 1; j >= 0; j--) {
1246       for (i = 0; i < n_comp; i++) {
1247          /* add in colors */
1248          FX64_SHL(hi, 5);
1249          FX64_OR32(hi, vec[j][i] >> 3);
1250       }
1251    }
1252    ((Fx64 *)cc)[1] = hi;
1253 }
1254
1255
1256 static void
1257 fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1258 {
1259    int trualpha;
1260    unsigned char reord[N_TEXELS][MAX_COMP];
1261
1262    unsigned char input[N_TEXELS][MAX_COMP];
1263    int i, k, l;
1264
1265    if (comps == 3) {
1266       /* make the whole block opaque */
1267       memset(input, -1, sizeof(input));
1268    }
1269
1270    /* 8 texels each line */
1271    for (l = 0; l < 4; l++) {
1272       for (k = 0; k < 4; k++) {
1273          for (i = 0; i < comps; i++) {
1274             input[k + l * 4][i] = *lines[l]++;
1275          }
1276       }
1277       for (; k < 8; k++) {
1278          for (i = 0; i < comps; i++) {
1279             input[k + l * 4 + 12][i] = *lines[l]++;
1280          }
1281       }
1282    }
1283
1284    /* block layout:
1285     * 00, 01, 02, 03, 08, 09, 0a, 0b
1286     * 10, 11, 12, 13, 18, 19, 1a, 1b
1287     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1288     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1289     */
1290
1291    /* [dBorca]
1292     * stupidity flows forth from this
1293     */
1294    l = N_TEXELS;
1295    trualpha = 0;
1296    if (comps == 4) {
1297       /* skip all transparent black texels */
1298       l = 0;
1299       for (k = 0; k < N_TEXELS; k++) {
1300          /* test all components against 0 */
1301          if (!ISTBLACK(input[k])) {
1302             /* texel is not transparent black */
1303             COPY_4UBV(reord[l], input[k]);
1304             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1305                /* non-opaque texel */
1306                trualpha = !0;
1307             }
1308             l++;
1309          }
1310       }
1311    }
1312
1313 #if 0
1314    if (trualpha) {
1315       fxt1_quantize_ALPHA0(cc, input, reord, l);
1316    } else if (l == 0) {
1317       cc[0] = cc[1] = cc[2] = -1;
1318       cc[3] = 0;
1319    } else if (l < N_TEXELS) {
1320       fxt1_quantize_HI(cc, input, reord, l);
1321    } else {
1322       fxt1_quantize_CHROMA(cc, input);
1323    }
1324    (void)fxt1_quantize_ALPHA1;
1325    (void)fxt1_quantize_MIXED1;
1326    (void)fxt1_quantize_MIXED0;
1327 #else
1328    if (trualpha) {
1329       fxt1_quantize_ALPHA1(cc, input);
1330    } else if (l == 0) {
1331       cc[0] = cc[1] = cc[2] = -1;
1332       cc[3] = 0;
1333    } else if (l < N_TEXELS) {
1334       fxt1_quantize_MIXED1(cc, input);
1335    } else {
1336       fxt1_quantize_MIXED0(cc, input);
1337    }
1338    (void)fxt1_quantize_ALPHA0;
1339    (void)fxt1_quantize_HI;
1340    (void)fxt1_quantize_CHROMA;
1341 #endif
1342 }
1343
1344
1345 int
1346 fxt1_encode (unsigned int width, unsigned int height, int comps,
1347              const void *source, int srcRowStride,
1348              void *dest, int destRowStride)
1349 {
1350    unsigned int x, y;
1351    const unsigned char *data;
1352    unsigned long *encoded = dest;
1353    unsigned char *newSource = NULL;
1354
1355    /* Replicate image if width is not M8 or height is not M4 */
1356    if ((width & 7) | (height & 3)) {
1357       int newWidth = (width + 7) & ~7;
1358       int newHeight = (height + 3) & ~3;
1359       newSource = malloc(comps * newWidth * newHeight * sizeof(unsigned char *));
1360       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1361                           comps, source, srcRowStride, newSource);
1362       source = newSource;
1363       width = newWidth;
1364       height = newHeight;
1365       srcRowStride = comps * newWidth;
1366    }
1367
1368    data = source;
1369    destRowStride = (destRowStride - width * 2) / 4;
1370    for (y = 0; y < height; y += 4) {
1371       unsigned int offs = 0 + (y + 0) * srcRowStride;
1372       for (x = 0; x < width; x += 8) {
1373          const unsigned char *lines[4];
1374          lines[0] = &data[offs];
1375          lines[1] = lines[0] + srcRowStride;
1376          lines[2] = lines[1] + srcRowStride;
1377          lines[3] = lines[2] + srcRowStride;
1378          offs += 8 * comps;
1379          fxt1_quantize(encoded, lines, comps);
1380          /* 128 bits per 8x4 block */
1381          encoded += 4;
1382       }
1383       encoded += destRowStride;
1384    }
1385
1386    if (newSource != NULL) {
1387       free(newSource);
1388    }
1389
1390    return 0;
1391 }
1392
1393
1394 /***************************************************************************\
1395  * FXT1 decoder
1396  *
1397  * The decoder is based on GL_3DFX_texture_compression_FXT1
1398  * specification and serves as a concept for the encoder.
1399 \***************************************************************************/
1400
1401
1402 /* lookup table for scaling 5 bit colors up to 8 bits */
1403 static unsigned char _rgb_scale_5[] = {
1404    0,   8,   16,  25,  33,  41,  49,  58,
1405    66,  74,  82,  90,  99,  107, 115, 123,
1406    132, 140, 148, 156, 165, 173, 181, 189,
1407    197, 206, 214, 222, 230, 239, 247, 255
1408 };
1409
1410 /* lookup table for scaling 6 bit colors up to 8 bits */
1411 static unsigned char _rgb_scale_6[] = {
1412    0,   4,   8,   12,  16,  20,  24,  28,
1413    32,  36,  40,  45,  49,  53,  57,  61,
1414    65,  69,  73,  77,  81,  85,  89,  93,
1415    97,  101, 105, 109, 113, 117, 121, 125,
1416    130, 134, 138, 142, 146, 150, 154, 158,
1417    162, 166, 170, 174, 178, 182, 186, 190,
1418    194, 198, 202, 206, 210, 215, 219, 223,
1419    227, 231, 235, 239, 243, 247, 251, 255
1420 };
1421
1422
1423 #define CC_SEL(cc, which) (((unsigned long *)(cc))[(which) / 32] >> ((which) & 31))
1424 #define UP5(c) _rgb_scale_5[(c) & 31]
1425 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1426 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1427 #define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1428
1429
1430 static void
1431 fxt1_decode_1HI (unsigned char *code, int t, unsigned char *rgba)
1432 {
1433    const unsigned long *cc;
1434
1435    t *= 3;
1436    cc = (unsigned long *)(code + t / 8);
1437    t = (cc[0] >> (t & 7)) & 7;
1438
1439    if (t == 7) {
1440       ZERO_4UBV(rgba);
1441    } else {
1442       cc = (unsigned long *)(code + 12);
1443       if (t == 0) {
1444          rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1445          rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1446          rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1447       } else if (t == 6) {
1448          rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1449          rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1450          rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1451       } else {
1452          rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1453          rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1454          rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1455       }
1456       rgba[ACOMP] = 255;
1457    }
1458 }
1459
1460
1461 static void
1462 fxt1_decode_1CHROMA (unsigned char *code, int t, unsigned char *rgba)
1463 {
1464    const unsigned long *cc;
1465    unsigned long kk;
1466
1467    cc = (unsigned long *)code;
1468    if (t & 16) {
1469       cc++;
1470       t &= 15;
1471    }
1472    t = (cc[0] >> (t * 2)) & 3;
1473
1474    t *= 15;
1475    cc = (unsigned long *)(code + 8 + t / 8);
1476    kk = cc[0] >> (t & 7);
1477    rgba[BCOMP] = UP5(kk);
1478    rgba[GCOMP] = UP5(kk >> 5);
1479    rgba[RCOMP] = UP5(kk >> 10);
1480    rgba[ACOMP] = 255;
1481 }
1482
1483
1484 static void
1485 fxt1_decode_1MIXED (unsigned char *code, int t, unsigned char *rgba)
1486 {
1487    const unsigned long *cc;
1488    unsigned int col[2][3];
1489    int glsb, selb;
1490
1491    cc = (unsigned long *)code;
1492    if (t & 16) {
1493       t &= 15;
1494       t = (cc[1] >> (t * 2)) & 3;
1495       /* col 2 */
1496       col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1497       col[0][GCOMP] = CC_SEL(cc, 99);
1498       col[0][RCOMP] = CC_SEL(cc, 104);
1499       /* col 3 */
1500       col[1][BCOMP] = CC_SEL(cc, 109);
1501       col[1][GCOMP] = CC_SEL(cc, 114);
1502       col[1][RCOMP] = CC_SEL(cc, 119);
1503       glsb = CC_SEL(cc, 126);
1504       selb = CC_SEL(cc, 33);
1505    } else {
1506       t = (cc[0] >> (t * 2)) & 3;
1507       /* col 0 */
1508       col[0][BCOMP] = CC_SEL(cc, 64);
1509       col[0][GCOMP] = CC_SEL(cc, 69);
1510       col[0][RCOMP] = CC_SEL(cc, 74);
1511       /* col 1 */
1512       col[1][BCOMP] = CC_SEL(cc, 79);
1513       col[1][GCOMP] = CC_SEL(cc, 84);
1514       col[1][RCOMP] = CC_SEL(cc, 89);
1515       glsb = CC_SEL(cc, 125);
1516       selb = CC_SEL(cc, 1);
1517    }
1518
1519    if (CC_SEL(cc, 124) & 1) {
1520       /* alpha[0] == 1 */
1521
1522       if (t == 3) {
1523          ZERO_4UBV(rgba);
1524       } else {
1525          if (t == 0) {
1526             rgba[BCOMP] = UP5(col[0][BCOMP]);
1527             rgba[GCOMP] = UP5(col[0][GCOMP]);
1528             rgba[RCOMP] = UP5(col[0][RCOMP]);
1529          } else if (t == 2) {
1530             rgba[BCOMP] = UP5(col[1][BCOMP]);
1531             rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1532             rgba[RCOMP] = UP5(col[1][RCOMP]);
1533          } else {
1534             rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1535             rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1536             rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1537          }
1538          rgba[ACOMP] = 255;
1539       }
1540    } else {
1541       /* alpha[0] == 0 */
1542
1543       if (t == 0) {
1544          rgba[BCOMP] = UP5(col[0][BCOMP]);
1545          rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1546          rgba[RCOMP] = UP5(col[0][RCOMP]);
1547       } else if (t == 3) {
1548          rgba[BCOMP] = UP5(col[1][BCOMP]);
1549          rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1550          rgba[RCOMP] = UP5(col[1][RCOMP]);
1551       } else {
1552          rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1553          rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1554                                   UP6(col[1][GCOMP], glsb));
1555          rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1556       }
1557       rgba[ACOMP] = 255;
1558    }
1559 }
1560
1561
1562 static void
1563 fxt1_decode_1ALPHA (unsigned char *code, int t, unsigned char *rgba)
1564 {
1565    const unsigned long *cc;
1566
1567    cc = (unsigned long *)code;
1568    if (CC_SEL(cc, 124) & 1) {
1569       /* lerp == 1 */
1570       unsigned int col0[4];
1571
1572       if (t & 16) {
1573          t &= 15;
1574          t = (cc[1] >> (t * 2)) & 3;
1575          /* col 2 */
1576          col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1577          col0[GCOMP] = CC_SEL(cc, 99);
1578          col0[RCOMP] = CC_SEL(cc, 104);
1579          col0[ACOMP] = CC_SEL(cc, 119);
1580       } else {
1581          t = (cc[0] >> (t * 2)) & 3;
1582          /* col 0 */
1583          col0[BCOMP] = CC_SEL(cc, 64);
1584          col0[GCOMP] = CC_SEL(cc, 69);
1585          col0[RCOMP] = CC_SEL(cc, 74);
1586          col0[ACOMP] = CC_SEL(cc, 109);
1587       }
1588
1589       if (t == 0) {
1590          rgba[BCOMP] = UP5(col0[BCOMP]);
1591          rgba[GCOMP] = UP5(col0[GCOMP]);
1592          rgba[RCOMP] = UP5(col0[RCOMP]);
1593          rgba[ACOMP] = UP5(col0[ACOMP]);
1594       } else if (t == 3) {
1595          rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1596          rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1597          rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1598          rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1599       } else {
1600          rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1601          rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1602          rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1603          rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1604       }
1605    } else {
1606       /* lerp == 0 */
1607
1608       if (t & 16) {
1609          cc++;
1610          t &= 15;
1611       }
1612       t = (cc[0] >> (t * 2)) & 3;
1613
1614       if (t == 3) {
1615          ZERO_4UBV(rgba);
1616       } else {
1617          unsigned long kk;
1618          cc = (unsigned long *)code;
1619          rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1620          t *= 15;
1621          cc = (unsigned long *)(code + 8 + t / 8);
1622          kk = cc[0] >> (t & 7);
1623          rgba[BCOMP] = UP5(kk);
1624          rgba[GCOMP] = UP5(kk >> 5);
1625          rgba[RCOMP] = UP5(kk >> 10);
1626       }
1627    }
1628 }
1629
1630
1631 void
1632 fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1633                int i, int j, unsigned char *rgba)
1634 {
1635    static void (*decode_1[]) (unsigned char *, int, unsigned char *) = {
1636       fxt1_decode_1HI,     /* cc-high   = "00?" */
1637       fxt1_decode_1HI,     /* cc-high   = "00?" */
1638       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1639       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1640       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1641       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1642       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1643       fxt1_decode_1MIXED   /* mixed     = "1??" */
1644    };
1645
1646    unsigned char *code = (unsigned char *)texture +
1647                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1648    int mode = CC_SEL((unsigned long *)code, 125);
1649    int t = i & 7;
1650
1651    if (t & 4) {
1652       t += 12;
1653    }
1654    t += (j & 3) * 4;
1655
1656    decode_1[mode](code, t, rgba);
1657 }