src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texformat.h"
  42 #include "texstore.h"
  43
  44
  45 static void
  46 fxt1_encode (GLuint width, GLuint height, GLint comps,
  47              const void *source, GLint srcRowStride,
  48              void *dest, GLint destRowStride);
  49
  50 void
  51 fxt1_decode_1 (const void *texture, GLint stride,
  52                GLint i, GLint j, GLchan *rgba);
  53
  54
  55 /**
  56  * Called during context initialization.
  57  */
  58 void
  59 _mesa_init_texture_fxt1( GLcontext *ctx )
  60 {
  61    (void) ctx;
  62 }
  63
  64
  65 /**
  66  * Store user's image in rgb_fxt1 format.
  67  */
  68 GLboolean
  69 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  70 {
  71    const GLchan *pixels;
  72    GLint srcRowStride;
  73    GLubyte *dst;
  74    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  75    const GLchan *tempImage = NULL;
  76
  77    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  78    ASSERT(dstXoffset % 8 == 0);
  79    ASSERT(dstYoffset % 4 == 0);
  80    ASSERT(dstZoffset     == 0);
  81    (void) dstZoffset;
  82    (void) dstImageOffsets;
  83
  84    if (srcFormat != GL_RGB ||
  85        srcType != CHAN_TYPE ||
  86        ctx->_ImageTransferState ||
  87        srcPacking->SwapBytes) {
  88       /* convert image to RGB/GLchan */
  89       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  90                                              baseInternalFormat,
  91                                              _mesa_get_format_base_format(dstFormat),
  92                                              srcWidth, srcHeight, srcDepth,
  93                                              srcFormat, srcType, srcAddr,
  94                                              srcPacking);
  95       if (!tempImage)
  96          return GL_FALSE; /* out of memory */
  97       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  98       pixels = tempImage;
  99       srcRowStride = 3 * srcWidth;
 100       srcFormat = GL_RGB;
 101    }
 102    else {
 103       pixels = (const GLchan *) srcAddr;
 104       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 105                                             srcType) / sizeof(GLchan);
 106    }
 107
 108    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 109                                         dstFormat,
 110                                         texWidth, (GLubyte *) dstAddr);
 111
 112    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 113                dst, dstRowStride);
 114
 115    if (tempImage)
 116       _mesa_free((void*) tempImage);
 117
 118    return GL_TRUE;
 119 }
 120
 121
 122 /**
 123  * Store user's image in rgba_fxt1 format.
 124  */
 125 GLboolean
 126 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 127 {
 128    const GLchan *pixels;
 129    GLint srcRowStride;
 130    GLubyte *dst;
 131    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 132    const GLchan *tempImage = NULL;
 133
 134    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 135    ASSERT(dstXoffset % 8 == 0);
 136    ASSERT(dstYoffset % 4 == 0);
 137    ASSERT(dstZoffset     == 0);
 138    (void) dstZoffset;
 139    (void) dstImageOffsets;
 140
 141    if (srcFormat != GL_RGBA ||
 142        srcType != CHAN_TYPE ||
 143        ctx->_ImageTransferState ||
 144        srcPacking->SwapBytes) {
 145       /* convert image to RGBA/GLchan */
 146       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 147                                              baseInternalFormat,
 148                                              _mesa_get_format_base_format(dstFormat),
 149                                              srcWidth, srcHeight, srcDepth,
 150                                              srcFormat, srcType, srcAddr,
 151                                              srcPacking);
 152       if (!tempImage)
 153          return GL_FALSE; /* out of memory */
 154       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 155       pixels = tempImage;
 156       srcRowStride = 4 * srcWidth;
 157       srcFormat = GL_RGBA;
 158    }
 159    else {
 160       pixels = (const GLchan *) srcAddr;
 161       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 162                                             srcType) / sizeof(GLchan);
 163    }
 164
 165    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 166                                         dstFormat,
 167                                         texWidth, (GLubyte *) dstAddr);
 168
 169    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 170                dst, dstRowStride);
 171
 172    if (tempImage)
 173       _mesa_free((void*) tempImage);
 174
 175    return GL_TRUE;
 176 }
 177
 178
 179 void
 180 _mesa_fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 181                                 GLint i, GLint j, GLint k, GLchan *texel )
 182 {
 183    (void) k;
 184    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 185 }
 186
 187
 188 void
 189 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 190                                   GLint i, GLint j, GLint k, GLfloat *texel )
 191 {
 192    /* just sample as GLchan and convert to float here */
 193    GLchan rgba[4];
 194    (void) k;
 195    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 196    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 197    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 198    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 199    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 200 }
 201
 202
 203 void
 204 _mesa_fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 205                                GLint i, GLint j, GLint k, GLchan *texel )
 206 {
 207    (void) k;
 208    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 209    texel[ACOMP] = 255;
 210 }
 211
 212
 213 void
 214 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 215                                  GLint i, GLint j, GLint k, GLfloat *texel )
 216 {
 217    /* just sample as GLchan and convert to float here */
 218    GLchan rgba[4];
 219    (void) k;
 220    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 221    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 222    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 223    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 224    texel[ACOMP] = 1.0F;
 225 }
 226
 227
 228
 229 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 230    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 231    GL_RGB,                              /* BaseFormat */
 232    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 233    4, /*approx*/                        /* RedBits */
 234    4, /*approx*/                        /* GreenBits */
 235    4, /*approx*/                        /* BlueBits */
 236    0,                                   /* AlphaBits */
 237    0,                                   /* LuminanceBits */
 238    0,                                   /* IntensityBits */
 239    0,                                   /* IndexBits */
 240    0,                                   /* DepthBits */
 241    0,                                   /* StencilBits */
 242    0                                    /* TexelBytes */
 243 };
 244
 245 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 246    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 247    GL_RGBA,                             /* BaseFormat */
 248    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 249    4, /*approx*/                        /* RedBits */
 250    4, /*approx*/                        /* GreenBits */
 251    4, /*approx*/                        /* BlueBits */
 252    1, /*approx*/                        /* AlphaBits */
 253    0,                                   /* LuminanceBits */
 254    0,                                   /* IntensityBits */
 255    0,                                   /* IndexBits */
 256    0,                                   /* DepthBits */
 257    0,                                   /* StencilBits */
 258    0                                    /* TexelBytes */
 259 };
 260
 261
 262 /***************************************************************************\
 263  * FXT1 encoder
 264  *
 265  * The encoder was built by reversing the decoder,
 266  * and is vaguely based on Texus2 by 3dfx. Note that this code
 267  * is merely a proof of concept, since it is highly UNoptimized;
 268  * moreover, it is sub-optimal due to initial conditions passed
 269  * to Lloyd's algorithm (the interpolation modes are even worse).
 270 \***************************************************************************/
 271
 272
 273 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 274 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 275 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 276 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 277 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 278 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 279 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 280 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 281
 282
 283 /*
 284  * Define a 64-bit unsigned integer type and macros
 285  */
 286 #if 1
 287
 288 #define FX64_NATIVE 1
 289
 290 typedef uint64_t Fx64;
 291
 292 #define FX64_MOV32(a, b) a = b
 293 #define FX64_OR32(a, b)  a |= b
 294 #define FX64_SHL(a, c)   a <<= c
 295
 296 #else
 297
 298 #define FX64_NATIVE 0
 299
 300 typedef struct {
 301    GLuint lo, hi;
 302 } Fx64;
 303
 304 #define FX64_MOV32(a, b) a.lo = b
 305 #define FX64_OR32(a, b)  a.lo |= b
 306
 307 #define FX64_SHL(a, c)                                 \
 308    do {                                                \
 309        if ((c) >= 32) {                                \
 310           a.hi = a.lo << ((c) - 32);                   \
 311           a.lo = 0;                                    \
 312        } else {                                        \
 313           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 314           a.lo <<= (c);                                \
 315        }                                               \
 316    } while (0)
 317
 318 #endif
 319
 320
 321 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 322 #define SAFECDOT 1 /* for paranoids */
 323
 324 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 325    do {                                  \
 326       /* compute interpolation vector */ \
 327       GLfloat d2 = 0.0F;                 \
 328       GLfloat rd2;                       \
 329                                          \
 330       for (i = 0; i < NC; i++) {         \
 331          IV[i] = (V1[i] - V0[i]) * F(i); \
 332          d2 += IV[i] * IV[i];            \
 333       }                                  \
 334       rd2 = (GLfloat)NV / d2;            \
 335       B = 0;                             \
 336       for (i = 0; i < NC; i++) {         \
 337          IV[i] *= F(i);                  \
 338          B -= IV[i] * V0[i];             \
 339          IV[i] *= rd2;                   \
 340       }                                  \
 341       B = B * rd2 + 0.5f;                \
 342    } while (0)
 343
 344 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 345    do {                                  \
 346       GLfloat dot = 0.0F;                \
 347       for (i = 0; i < NC; i++) {         \
 348          dot += V[i] * IV[i];            \
 349       }                                  \
 350       TEXEL = (GLint)(dot + B);          \
 351       if (SAFECDOT) {                    \
 352          if (TEXEL < 0) {                \
 353             TEXEL = 0;                   \
 354          } else if (TEXEL > NV) {        \
 355             TEXEL = NV;                  \
 356          }                               \
 357       }                                  \
 358    } while (0)
 359
 360
 361 static GLint
 362 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 363               GLubyte input[MAX_COMP], GLint nc)
 364 {
 365    GLint i, j, best = -1;
 366    GLfloat err = 1e9; /* big enough */
 367
 368    for (j = 0; j < nv; j++) {
 369       GLfloat e = 0.0F;
 370       for (i = 0; i < nc; i++) {
 371          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 372       }
 373       if (e < err) {
 374          err = e;
 375          best = j;
 376       }
 377    }
 378
 379    return best;
 380 }
 381
 382
 383 static GLint
 384 fxt1_worst (GLfloat vec[MAX_COMP],
 385             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 386 {
 387    GLint i, k, worst = -1;
 388    GLfloat err = -1.0F; /* small enough */
 389
 390    for (k = 0; k < n; k++) {
 391       GLfloat e = 0.0F;
 392       for (i = 0; i < nc; i++) {
 393          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 394       }
 395       if (e > err) {
 396          err = e;
 397          worst = k;
 398       }
 399    }
 400
 401    return worst;
 402 }
 403
 404
 405 static GLint
 406 fxt1_variance (GLdouble variance[MAX_COMP],
 407                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 408 {
 409    GLint i, k, best = 0;
 410    GLint sx, sx2;
 411    GLdouble var, maxvar = -1; /* small enough */
 412    GLdouble teenth = 1.0 / n;
 413
 414    for (i = 0; i < nc; i++) {
 415       sx = sx2 = 0;
 416       for (k = 0; k < n; k++) {
 417          GLint t = input[k][i];
 418          sx += t;
 419          sx2 += t * t;
 420       }
 421       var = sx2 * teenth - sx * sx * teenth * teenth;
 422       if (maxvar < var) {
 423          maxvar = var;
 424          best = i;
 425       }
 426       if (variance) {
 427          variance[i] = var;
 428       }
 429    }
 430
 431    return best;
 432 }
 433
 434
 435 static GLint
 436 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 437              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 438 {
 439 #if 0
 440    /* Choose colors from a grid.
 441     */
 442    GLint i, j;
 443
 444    for (j = 0; j < nv; j++) {
 445       GLint m = j * (n - 1) / (nv - 1);
 446       for (i = 0; i < nc; i++) {
 447          vec[j][i] = input[m][i];
 448       }
 449    }
 450 #else
 451    /* Our solution here is to find the darkest and brightest colors in
 452     * the 8x4 tile and use those as the two representative colors.
 453     * There are probably better algorithms to use (histogram-based).
 454     */
 455    GLint i, j, k;
 456    GLint minSum = 2000; /* big enough */
 457    GLint maxSum = -1; /* small enough */
 458    GLint minCol = 0; /* phoudoin: silent compiler! */
 459    GLint maxCol = 0; /* phoudoin: silent compiler! */
 460
 461    struct {
 462       GLint flag;
 463       GLint key;
 464       GLint freq;
 465       GLint idx;
 466    } hist[N_TEXELS];
 467    GLint lenh = 0;
 468
 469    _mesa_memset(hist, 0, sizeof(hist));
 470
 471    for (k = 0; k < n; k++) {
 472       GLint l;
 473       GLint key = 0;
 474       GLint sum = 0;
 475       for (i = 0; i < nc; i++) {
 476          key <<= 8;
 477          key |= input[k][i];
 478          sum += input[k][i];
 479       }
 480       for (l = 0; l < n; l++) {
 481          if (!hist[l].flag) {
 482             /* alloc new slot */
 483             hist[l].flag = !0;
 484             hist[l].key = key;
 485             hist[l].freq = 1;
 486             hist[l].idx = k;
 487             lenh = l + 1;
 488             break;
 489          } else if (hist[l].key == key) {
 490             hist[l].freq++;
 491             break;
 492          }
 493       }
 494       if (minSum > sum) {
 495          minSum = sum;
 496          minCol = k;
 497       }
 498       if (maxSum < sum) {
 499          maxSum = sum;
 500          maxCol = k;
 501       }
 502    }
 503
 504    if (lenh <= nv) {
 505       for (j = 0; j < lenh; j++) {
 506          for (i = 0; i < nc; i++) {
 507             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 508          }
 509       }
 510       for (; j < nv; j++) {
 511          for (i = 0; i < nc; i++) {
 512             vec[j][i] = vec[0][i];
 513          }
 514       }
 515       return 0;
 516    }
 517
 518    for (j = 0; j < nv; j++) {
 519       for (i = 0; i < nc; i++) {
 520          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 521       }
 522    }
 523 #endif
 524
 525    return !0;
 526 }
 527
 528
 529 static GLint
 530 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 531             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 532 {
 533    /* Use the generalized lloyd's algorithm for VQ:
 534     *     find 4 color vectors.
 535     *
 536     *     for each sample color
 537     *         sort to nearest vector.
 538     *
 539     *     replace each vector with the centroid of it's matching colors.
 540     *
 541     *     repeat until RMS doesn't improve.
 542     *
 543     *     if a color vector has no samples, or becomes the same as another
 544     *     vector, replace it with the color which is farthest from a sample.
 545     *
 546     * vec[][MAX_COMP]           initial vectors and resulting colors
 547     * nv                        number of resulting colors required
 548     * input[N_TEXELS][MAX_COMP] input texels
 549     * nc                        number of components in input / vec
 550     * n                         number of input samples
 551     */
 552
 553    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 554    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 555    GLfloat error, lasterror = 1e9;
 556
 557    GLint i, j, k, rep;
 558
 559    /* the quantizer */
 560    for (rep = 0; rep < LL_N_REP; rep++) {
 561       /* reset sums & counters */
 562       for (j = 0; j < nv; j++) {
 563          for (i = 0; i < nc; i++) {
 564             sum[j][i] = 0;
 565          }
 566          cnt[j] = 0;
 567       }
 568       error = 0;
 569
 570       /* scan whole block */
 571       for (k = 0; k < n; k++) {
 572 #if 1
 573          GLint best = -1;
 574          GLfloat err = 1e9; /* big enough */
 575          /* determine best vector */
 576          for (j = 0; j < nv; j++) {
 577             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 578                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 579                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 580             if (nc == 4) {
 581                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 582             }
 583             if (e < err) {
 584                err = e;
 585                best = j;
 586             }
 587          }
 588 #else
 589          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 590 #endif
 591          /* add in closest color */
 592          for (i = 0; i < nc; i++) {
 593             sum[best][i] += input[k][i];
 594          }
 595          /* mark this vector as used */
 596          cnt[best]++;
 597          /* accumulate error */
 598          error += err;
 599       }
 600
 601       /* check RMS */
 602       if ((error < LL_RMS_E) ||
 603           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 604          return !0; /* good match */
 605       }
 606       lasterror = error;
 607
 608       /* move each vector to the barycenter of its closest colors */
 609       for (j = 0; j < nv; j++) {
 610          if (cnt[j]) {
 611             GLfloat div = 1.0F / cnt[j];
 612             for (i = 0; i < nc; i++) {
 613                vec[j][i] = div * sum[j][i];
 614             }
 615          } else {
 616             /* this vec has no samples or is identical with a previous vec */
 617             GLint worst = fxt1_worst(vec[j], input, nc, n);
 618             for (i = 0; i < nc; i++) {
 619                vec[j][i] = input[worst][i];
 620             }
 621          }
 622       }
 623    }
 624
 625    return 0; /* could not converge fast enough */
 626 }
 627
 628
 629 static void
 630 fxt1_quantize_CHROMA (GLuint *cc,
 631                       GLubyte input[N_TEXELS][MAX_COMP])
 632 {
 633    const GLint n_vect = 4; /* 4 base vectors to find */
 634    const GLint n_comp = 3; /* 3 components: R, G, B */
 635    GLfloat vec[MAX_VECT][MAX_COMP];
 636    GLint i, j, k;
 637    Fx64 hi; /* high quadword */
 638    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 639
 640    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 641       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 642    }
 643
 644    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 645    for (j = n_vect - 1; j >= 0; j--) {
 646       for (i = 0; i < n_comp; i++) {
 647          /* add in colors */
 648          FX64_SHL(hi, 5);
 649          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 650       }
 651    }
 652    ((Fx64 *)cc)[1] = hi;
 653
 654    lohi = lolo = 0;
 655    /* right microtile */
 656    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 657       lohi <<= 2;
 658       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 659    }
 660    /* left microtile */
 661    for (; k >= 0; k--) {
 662       lolo <<= 2;
 663       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 664    }
 665    cc[1] = lohi;
 666    cc[0] = lolo;
 667 }
 668
 669
 670 static void
 671 fxt1_quantize_ALPHA0 (GLuint *cc,
 672                       GLubyte input[N_TEXELS][MAX_COMP],
 673                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 674 {
 675    const GLint n_vect = 3; /* 3 base vectors to find */
 676    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 677    GLfloat vec[MAX_VECT][MAX_COMP];
 678    GLint i, j, k;
 679    Fx64 hi; /* high quadword */
 680    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 681
 682    /* the last vector indicates zero */
 683    for (i = 0; i < n_comp; i++) {
 684       vec[n_vect][i] = 0;
 685    }
 686
 687    /* the first n texels in reord are guaranteed to be non-zero */
 688    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 689       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 690    }
 691
 692    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 693    for (j = n_vect - 1; j >= 0; j--) {
 694       /* add in alphas */
 695       FX64_SHL(hi, 5);
 696       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 697    }
 698    for (j = n_vect - 1; j >= 0; j--) {
 699       for (i = 0; i < n_comp - 1; i++) {
 700          /* add in colors */
 701          FX64_SHL(hi, 5);
 702          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 703       }
 704    }
 705    ((Fx64 *)cc)[1] = hi;
 706
 707    lohi = lolo = 0;
 708    /* right microtile */
 709    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 710       lohi <<= 2;
 711       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 712    }
 713    /* left microtile */
 714    for (; k >= 0; k--) {
 715       lolo <<= 2;
 716       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 717    }
 718    cc[1] = lohi;
 719    cc[0] = lolo;
 720 }
 721
 722
 723 static void
 724 fxt1_quantize_ALPHA1 (GLuint *cc,
 725                       GLubyte input[N_TEXELS][MAX_COMP])
 726 {
 727    const GLint n_vect = 3; /* highest vector number in each microtile */
 728    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 729    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 730    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 731    GLint i, j, k;
 732    Fx64 hi; /* high quadword */
 733    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 734
 735    GLint minSum;
 736    GLint maxSum;
 737    GLint minColL = 0, maxColL = 0;
 738    GLint minColR = 0, maxColR = 0;
 739    GLint sumL = 0, sumR = 0;
 740    GLint nn_comp;
 741    /* Our solution here is to find the darkest and brightest colors in
 742     * the 4x4 tile and use those as the two representative colors.
 743     * There are probably better algorithms to use (histogram-based).
 744     */
 745    nn_comp = n_comp;
 746    while ((minColL == maxColL) && nn_comp) {
 747        minSum = 2000; /* big enough */
 748        maxSum = -1; /* small enough */
 749        for (k = 0; k < N_TEXELS / 2; k++) {
 750            GLint sum = 0;
 751            for (i = 0; i < nn_comp; i++) {
 752                sum += input[k][i];
 753            }
 754            if (minSum > sum) {
 755                minSum = sum;
 756                minColL = k;
 757            }
 758            if (maxSum < sum) {
 759                maxSum = sum;
 760                maxColL = k;
 761            }
 762            sumL += sum;
 763        }
 764
 765        nn_comp--;
 766    }
 767
 768    nn_comp = n_comp;
 769    while ((minColR == maxColR) && nn_comp) {
 770        minSum = 2000; /* big enough */
 771        maxSum = -1; /* small enough */
 772        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 773            GLint sum = 0;
 774            for (i = 0; i < nn_comp; i++) {
 775                sum += input[k][i];
 776            }
 777            if (minSum > sum) {
 778                minSum = sum;
 779                minColR = k;
 780            }
 781            if (maxSum < sum) {
 782                maxSum = sum;
 783                maxColR = k;
 784            }
 785            sumR += sum;
 786        }
 787
 788        nn_comp--;
 789    }
 790
 791    /* choose the common vector (yuck!) */
 792    {
 793       GLint j1, j2;
 794       GLint v1 = 0, v2 = 0;
 795       GLfloat err = 1e9; /* big enough */
 796       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 797       for (i = 0; i < n_comp; i++) {
 798          tv[0][i] = input[minColL][i];
 799          tv[1][i] = input[maxColL][i];
 800          tv[2][i] = input[minColR][i];
 801          tv[3][i] = input[maxColR][i];
 802       }
 803       for (j1 = 0; j1 < 2; j1++) {
 804          for (j2 = 2; j2 < 4; j2++) {
 805             GLfloat e = 0.0F;
 806             for (i = 0; i < n_comp; i++) {
 807                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 808             }
 809             if (e < err) {
 810                err = e;
 811                v1 = j1;
 812                v2 = j2;
 813             }
 814          }
 815       }
 816       for (i = 0; i < n_comp; i++) {
 817          vec[0][i] = tv[1 - v1][i];
 818          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 819          vec[2][i] = tv[5 - v2][i];
 820       }
 821    }
 822
 823    /* left microtile */
 824    cc[0] = 0;
 825    if (minColL != maxColL) {
 826       /* compute interpolation vector */
 827       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 828
 829       /* add in texels */
 830       lolo = 0;
 831       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 832          GLint texel;
 833          /* interpolate color */
 834          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 835          /* add in texel */
 836          lolo <<= 2;
 837          lolo |= texel;
 838       }
 839
 840       cc[0] = lolo;
 841    }
 842
 843    /* right microtile */
 844    cc[1] = 0;
 845    if (minColR != maxColR) {
 846       /* compute interpolation vector */
 847       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 848
 849       /* add in texels */
 850       lohi = 0;
 851       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 852          GLint texel;
 853          /* interpolate color */
 854          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 855          /* add in texel */
 856          lohi <<= 2;
 857          lohi |= texel;
 858       }
 859
 860       cc[1] = lohi;
 861    }
 862
 863    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 864    for (j = n_vect - 1; j >= 0; j--) {
 865       /* add in alphas */
 866       FX64_SHL(hi, 5);
 867       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 868    }
 869    for (j = n_vect - 1; j >= 0; j--) {
 870       for (i = 0; i < n_comp - 1; i++) {
 871          /* add in colors */
 872          FX64_SHL(hi, 5);
 873          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 874       }
 875    }
 876    ((Fx64 *)cc)[1] = hi;
 877 }
 878
 879
 880 static void
 881 fxt1_quantize_HI (GLuint *cc,
 882                   GLubyte input[N_TEXELS][MAX_COMP],
 883                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 884 {
 885    const GLint n_vect = 6; /* highest vector number */
 886    const GLint n_comp = 3; /* 3 components: R, G, B */
 887    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 888    GLfloat iv[MAX_COMP];   /* interpolation vector */
 889    GLint i, k;
 890    GLuint hihi; /* high quadword: hi dword */
 891
 892    GLint minSum = 2000; /* big enough */
 893    GLint maxSum = -1; /* small enough */
 894    GLint minCol = 0; /* phoudoin: silent compiler! */
 895    GLint maxCol = 0; /* phoudoin: silent compiler! */
 896
 897    /* Our solution here is to find the darkest and brightest colors in
 898     * the 8x4 tile and use those as the two representative colors.
 899     * There are probably better algorithms to use (histogram-based).
 900     */
 901    for (k = 0; k < n; k++) {
 902       GLint sum = 0;
 903       for (i = 0; i < n_comp; i++) {
 904          sum += reord[k][i];
 905       }
 906       if (minSum > sum) {
 907          minSum = sum;
 908          minCol = k;
 909       }
 910       if (maxSum < sum) {
 911          maxSum = sum;
 912          maxCol = k;
 913       }
 914    }
 915
 916    hihi = 0; /* cc-hi = "00" */
 917    for (i = 0; i < n_comp; i++) {
 918       /* add in colors */
 919       hihi <<= 5;
 920       hihi |= reord[maxCol][i] >> 3;
 921    }
 922    for (i = 0; i < n_comp; i++) {
 923       /* add in colors */
 924       hihi <<= 5;
 925       hihi |= reord[minCol][i] >> 3;
 926    }
 927    cc[3] = hihi;
 928    cc[0] = cc[1] = cc[2] = 0;
 929
 930    /* compute interpolation vector */
 931    if (minCol != maxCol) {
 932       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 933    }
 934
 935    /* add in texels */
 936    for (k = N_TEXELS - 1; k >= 0; k--) {
 937       GLint t = k * 3;
 938       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 939       GLint texel = n_vect + 1; /* transparent black */
 940
 941       if (!ISTBLACK(input[k])) {
 942          if (minCol != maxCol) {
 943             /* interpolate color */
 944             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 945             /* add in texel */
 946             kk[0] |= texel << (t & 7);
 947          }
 948       } else {
 949          /* add in texel */
 950          kk[0] |= texel << (t & 7);
 951       }
 952    }
 953 }
 954
 955
 956 static void
 957 fxt1_quantize_MIXED1 (GLuint *cc,
 958                       GLubyte input[N_TEXELS][MAX_COMP])
 959 {
 960    const GLint n_vect = 2; /* highest vector number in each microtile */
 961    const GLint n_comp = 3; /* 3 components: R, G, B */
 962    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 963    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 964    GLint i, j, k;
 965    Fx64 hi; /* high quadword */
 966    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 967
 968    GLint minSum;
 969    GLint maxSum;
 970    GLint minColL = 0, maxColL = -1;
 971    GLint minColR = 0, maxColR = -1;
 972
 973    /* Our solution here is to find the darkest and brightest colors in
 974     * the 4x4 tile and use those as the two representative colors.
 975     * There are probably better algorithms to use (histogram-based).
 976     */
 977    minSum = 2000; /* big enough */
 978    maxSum = -1; /* small enough */
 979    for (k = 0; k < N_TEXELS / 2; k++) {
 980       if (!ISTBLACK(input[k])) {
 981          GLint sum = 0;
 982          for (i = 0; i < n_comp; i++) {
 983             sum += input[k][i];
 984          }
 985          if (minSum > sum) {
 986             minSum = sum;
 987             minColL = k;
 988          }
 989          if (maxSum < sum) {
 990             maxSum = sum;
 991             maxColL = k;
 992          }
 993       }
 994    }
 995    minSum = 2000; /* big enough */
 996    maxSum = -1; /* small enough */
 997    for (; k < N_TEXELS; k++) {
 998       if (!ISTBLACK(input[k])) {
 999          GLint sum = 0;
1000          for (i = 0; i < n_comp; i++) {
1001             sum += input[k][i];
1002          }
1003          if (minSum > sum) {
1004             minSum = sum;
1005             minColR = k;
1006          }
1007          if (maxSum < sum) {
1008             maxSum = sum;
1009             maxColR = k;
1010          }
1011       }
1012    }
1013
1014    /* left microtile */
1015    if (maxColL == -1) {
1016       /* all transparent black */
1017       cc[0] = ~0u;
1018       for (i = 0; i < n_comp; i++) {
1019          vec[0][i] = 0;
1020          vec[1][i] = 0;
1021       }
1022    } else {
1023       cc[0] = 0;
1024       for (i = 0; i < n_comp; i++) {
1025          vec[0][i] = input[minColL][i];
1026          vec[1][i] = input[maxColL][i];
1027       }
1028       if (minColL != maxColL) {
1029          /* compute interpolation vector */
1030          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1031
1032          /* add in texels */
1033          lolo = 0;
1034          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1035             GLint texel = n_vect + 1; /* transparent black */
1036             if (!ISTBLACK(input[k])) {
1037                /* interpolate color */
1038                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1039             }
1040             /* add in texel */
1041             lolo <<= 2;
1042             lolo |= texel;
1043          }
1044          cc[0] = lolo;
1045       }
1046    }
1047
1048    /* right microtile */
1049    if (maxColR == -1) {
1050       /* all transparent black */
1051       cc[1] = ~0u;
1052       for (i = 0; i < n_comp; i++) {
1053          vec[2][i] = 0;
1054          vec[3][i] = 0;
1055       }
1056    } else {
1057       cc[1] = 0;
1058       for (i = 0; i < n_comp; i++) {
1059          vec[2][i] = input[minColR][i];
1060          vec[3][i] = input[maxColR][i];
1061       }
1062       if (minColR != maxColR) {
1063          /* compute interpolation vector */
1064          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1065
1066          /* add in texels */
1067          lohi = 0;
1068          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1069             GLint texel = n_vect + 1; /* transparent black */
1070             if (!ISTBLACK(input[k])) {
1071                /* interpolate color */
1072                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1073             }
1074             /* add in texel */
1075             lohi <<= 2;
1076             lohi |= texel;
1077          }
1078          cc[1] = lohi;
1079       }
1080    }
1081
1082    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1083    for (j = 2 * 2 - 1; j >= 0; j--) {
1084       for (i = 0; i < n_comp; i++) {
1085          /* add in colors */
1086          FX64_SHL(hi, 5);
1087          FX64_OR32(hi, vec[j][i] >> 3);
1088       }
1089    }
1090    ((Fx64 *)cc)[1] = hi;
1091 }
1092
1093
1094 static void
1095 fxt1_quantize_MIXED0 (GLuint *cc,
1096                       GLubyte input[N_TEXELS][MAX_COMP])
1097 {
1098    const GLint n_vect = 3; /* highest vector number in each microtile */
1099    const GLint n_comp = 3; /* 3 components: R, G, B */
1100    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1101    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1102    GLint i, j, k;
1103    Fx64 hi; /* high quadword */
1104    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1105
1106    GLint minColL = 0, maxColL = 0;
1107    GLint minColR = 0, maxColR = 0;
1108 #if 0
1109    GLint minSum;
1110    GLint maxSum;
1111
1112    /* Our solution here is to find the darkest and brightest colors in
1113     * the 4x4 tile and use those as the two representative colors.
1114     * There are probably better algorithms to use (histogram-based).
1115     */
1116    minSum = 2000; /* big enough */
1117    maxSum = -1; /* small enough */
1118    for (k = 0; k < N_TEXELS / 2; k++) {
1119       GLint sum = 0;
1120       for (i = 0; i < n_comp; i++) {
1121          sum += input[k][i];
1122       }
1123       if (minSum > sum) {
1124          minSum = sum;
1125          minColL = k;
1126       }
1127       if (maxSum < sum) {
1128          maxSum = sum;
1129          maxColL = k;
1130       }
1131    }
1132    minSum = 2000; /* big enough */
1133    maxSum = -1; /* small enough */
1134    for (; k < N_TEXELS; k++) {
1135       GLint sum = 0;
1136       for (i = 0; i < n_comp; i++) {
1137          sum += input[k][i];
1138       }
1139       if (minSum > sum) {
1140          minSum = sum;
1141          minColR = k;
1142       }
1143       if (maxSum < sum) {
1144          maxSum = sum;
1145          maxColR = k;
1146       }
1147    }
1148 #else
1149    GLint minVal;
1150    GLint maxVal;
1151    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1152    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1153
1154    /* Scan the channel with max variance for lo & hi
1155     * and use those as the two representative colors.
1156     */
1157    minVal = 2000; /* big enough */
1158    maxVal = -1; /* small enough */
1159    for (k = 0; k < N_TEXELS / 2; k++) {
1160       GLint t = input[k][maxVarL];
1161       if (minVal > t) {
1162          minVal = t;
1163          minColL = k;
1164       }
1165       if (maxVal < t) {
1166          maxVal = t;
1167          maxColL = k;
1168       }
1169    }
1170    minVal = 2000; /* big enough */
1171    maxVal = -1; /* small enough */
1172    for (; k < N_TEXELS; k++) {
1173       GLint t = input[k][maxVarR];
1174       if (minVal > t) {
1175          minVal = t;
1176          minColR = k;
1177       }
1178       if (maxVal < t) {
1179          maxVal = t;
1180          maxColR = k;
1181       }
1182    }
1183 #endif
1184
1185    /* left microtile */
1186    cc[0] = 0;
1187    for (i = 0; i < n_comp; i++) {
1188       vec[0][i] = input[minColL][i];
1189       vec[1][i] = input[maxColL][i];
1190    }
1191    if (minColL != maxColL) {
1192       /* compute interpolation vector */
1193       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1194
1195       /* add in texels */
1196       lolo = 0;
1197       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1198          GLint texel;
1199          /* interpolate color */
1200          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1201          /* add in texel */
1202          lolo <<= 2;
1203          lolo |= texel;
1204       }
1205
1206       /* funky encoding for LSB of green */
1207       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1208          for (i = 0; i < n_comp; i++) {
1209             vec[1][i] = input[minColL][i];
1210             vec[0][i] = input[maxColL][i];
1211          }
1212          lolo = ~lolo;
1213       }
1214
1215       cc[0] = lolo;
1216    }
1217
1218    /* right microtile */
1219    cc[1] = 0;
1220    for (i = 0; i < n_comp; i++) {
1221       vec[2][i] = input[minColR][i];
1222       vec[3][i] = input[maxColR][i];
1223    }
1224    if (minColR != maxColR) {
1225       /* compute interpolation vector */
1226       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1227
1228       /* add in texels */
1229       lohi = 0;
1230       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1231          GLint texel;
1232          /* interpolate color */
1233          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1234          /* add in texel */
1235          lohi <<= 2;
1236          lohi |= texel;
1237       }
1238
1239       /* funky encoding for LSB of green */
1240       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1241          for (i = 0; i < n_comp; i++) {
1242             vec[3][i] = input[minColR][i];
1243             vec[2][i] = input[maxColR][i];
1244          }
1245          lohi = ~lohi;
1246       }
1247
1248       cc[1] = lohi;
1249    }
1250
1251    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1252    for (j = 2 * 2 - 1; j >= 0; j--) {
1253       for (i = 0; i < n_comp; i++) {
1254          /* add in colors */
1255          FX64_SHL(hi, 5);
1256          FX64_OR32(hi, vec[j][i] >> 3);
1257       }
1258    }
1259    ((Fx64 *)cc)[1] = hi;
1260 }
1261
1262
1263 static void
1264 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1265 {
1266    GLint trualpha;
1267    GLubyte reord[N_TEXELS][MAX_COMP];
1268
1269    GLubyte input[N_TEXELS][MAX_COMP];
1270    GLint i, k, l;
1271
1272    if (comps == 3) {
1273       /* make the whole block opaque */
1274       _mesa_memset(input, -1, sizeof(input));
1275    }
1276
1277    /* 8 texels each line */
1278    for (l = 0; l < 4; l++) {
1279       for (k = 0; k < 4; k++) {
1280          for (i = 0; i < comps; i++) {
1281             input[k + l * 4][i] = *lines[l]++;
1282          }
1283       }
1284       for (; k < 8; k++) {
1285          for (i = 0; i < comps; i++) {
1286             input[k + l * 4 + 12][i] = *lines[l]++;
1287          }
1288       }
1289    }
1290
1291    /* block layout:
1292     * 00, 01, 02, 03, 08, 09, 0a, 0b
1293     * 10, 11, 12, 13, 18, 19, 1a, 1b
1294     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1295     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1296     */
1297
1298    /* [dBorca]
1299     * stupidity flows forth from this
1300     */
1301    l = N_TEXELS;
1302    trualpha = 0;
1303    if (comps == 4) {
1304       /* skip all transparent black texels */
1305       l = 0;
1306       for (k = 0; k < N_TEXELS; k++) {
1307          /* test all components against 0 */
1308          if (!ISTBLACK(input[k])) {
1309             /* texel is not transparent black */
1310             COPY_4UBV(reord[l], input[k]);
1311             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1312                /* non-opaque texel */
1313                trualpha = !0;
1314             }
1315             l++;
1316          }
1317       }
1318    }
1319
1320 #if 0
1321    if (trualpha) {
1322       fxt1_quantize_ALPHA0(cc, input, reord, l);
1323    } else if (l == 0) {
1324       cc[0] = cc[1] = cc[2] = -1;
1325       cc[3] = 0;
1326    } else if (l < N_TEXELS) {
1327       fxt1_quantize_HI(cc, input, reord, l);
1328    } else {
1329       fxt1_quantize_CHROMA(cc, input);
1330    }
1331    (void)fxt1_quantize_ALPHA1;
1332    (void)fxt1_quantize_MIXED1;
1333    (void)fxt1_quantize_MIXED0;
1334 #else
1335    if (trualpha) {
1336       fxt1_quantize_ALPHA1(cc, input);
1337    } else if (l == 0) {
1338       cc[0] = cc[1] = cc[2] = ~0u;
1339       cc[3] = 0;
1340    } else if (l < N_TEXELS) {
1341       fxt1_quantize_MIXED1(cc, input);
1342    } else {
1343       fxt1_quantize_MIXED0(cc, input);
1344    }
1345    (void)fxt1_quantize_ALPHA0;
1346    (void)fxt1_quantize_HI;
1347    (void)fxt1_quantize_CHROMA;
1348 #endif
1349 }
1350
1351
1352 static void
1353 fxt1_encode (GLuint width, GLuint height, GLint comps,
1354              const void *source, GLint srcRowStride,
1355              void *dest, GLint destRowStride)
1356 {
1357    GLuint x, y;
1358    const GLubyte *data;
1359    GLuint *encoded = (GLuint *)dest;
1360    void *newSource = NULL;
1361
1362    assert(comps == 3 || comps == 4);
1363
1364    /* Replicate image if width is not M8 or height is not M4 */
1365    if ((width & 7) | (height & 3)) {
1366       GLint newWidth = (width + 7) & ~7;
1367       GLint newHeight = (height + 3) & ~3;
1368       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1369       if (!newSource) {
1370          GET_CURRENT_CONTEXT(ctx);
1371          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1372          goto cleanUp;
1373       }
1374       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1375                                comps, (const GLchan *) source,
1376                                srcRowStride, (GLchan *) newSource);
1377       source = newSource;
1378       width = newWidth;
1379       height = newHeight;
1380       srcRowStride = comps * newWidth;
1381    }
1382
1383    /* convert from 16/32-bit channels to GLubyte if needed */
1384    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1385       const GLuint n = width * height * comps;
1386       const GLchan *src = (const GLchan *) source;
1387       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1388       GLuint i;
1389       if (!dest) {
1390          GET_CURRENT_CONTEXT(ctx);
1391          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1392          goto cleanUp;
1393       }
1394       for (i = 0; i < n; i++) {
1395          dest[i] = CHAN_TO_UBYTE(src[i]);
1396       }
1397       if (newSource != NULL) {
1398          _mesa_free(newSource);
1399       }
1400       newSource = dest;  /* we'll free this buffer before returning */
1401       source = dest;  /* the new, GLubyte incoming image */
1402    }
1403
1404    data = (const GLubyte *) source;
1405    destRowStride = (destRowStride - width * 2) / 4;
1406    for (y = 0; y < height; y += 4) {
1407       GLuint offs = 0 + (y + 0) * srcRowStride;
1408       for (x = 0; x < width; x += 8) {
1409          const GLubyte *lines[4];
1410          lines[0] = &data[offs];
1411          lines[1] = lines[0] + srcRowStride;
1412          lines[2] = lines[1] + srcRowStride;
1413          lines[3] = lines[2] + srcRowStride;
1414          offs += 8 * comps;
1415          fxt1_quantize(encoded, lines, comps);
1416          /* 128 bits per 8x4 block */
1417          encoded += 4;
1418       }
1419       encoded += destRowStride;
1420    }
1421
1422  cleanUp:
1423    if (newSource != NULL) {
1424       _mesa_free(newSource);
1425    }
1426 }
1427
1428
1429 /***************************************************************************\
1430  * FXT1 decoder
1431  *
1432  * The decoder is based on GL_3DFX_texture_compression_FXT1
1433  * specification and serves as a concept for the encoder.
1434 \***************************************************************************/
1435
1436
1437 /* lookup table for scaling 5 bit colors up to 8 bits */
1438 static const GLubyte _rgb_scale_5[] = {
1439    0,   8,   16,  25,  33,  41,  49,  58,
1440    66,  74,  82,  90,  99,  107, 115, 123,
1441    132, 140, 148, 156, 165, 173, 181, 189,
1442    197, 206, 214, 222, 230, 239, 247, 255
1443 };
1444
1445 /* lookup table for scaling 6 bit colors up to 8 bits */
1446 static const GLubyte _rgb_scale_6[] = {
1447    0,   4,   8,   12,  16,  20,  24,  28,
1448    32,  36,  40,  45,  49,  53,  57,  61,
1449    65,  69,  73,  77,  81,  85,  89,  93,
1450    97,  101, 105, 109, 113, 117, 121, 125,
1451    130, 134, 138, 142, 146, 150, 154, 158,
1452    162, 166, 170, 174, 178, 182, 186, 190,
1453    194, 198, 202, 206, 210, 215, 219, 223,
1454    227, 231, 235, 239, 243, 247, 251, 255
1455 };
1456
1457
1458 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1459 #define UP5(c) _rgb_scale_5[(c) & 31]
1460 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1461 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1462
1463
1464 static void
1465 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1466 {
1467    const GLuint *cc;
1468
1469    t *= 3;
1470    cc = (const GLuint *)(code + t / 8);
1471    t = (cc[0] >> (t & 7)) & 7;
1472
1473    if (t == 7) {
1474       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1475    } else {
1476       GLubyte r, g, b;
1477       cc = (const GLuint *)(code + 12);
1478       if (t == 0) {
1479          b = UP5(CC_SEL(cc, 0));
1480          g = UP5(CC_SEL(cc, 5));
1481          r = UP5(CC_SEL(cc, 10));
1482       } else if (t == 6) {
1483          b = UP5(CC_SEL(cc, 15));
1484          g = UP5(CC_SEL(cc, 20));
1485          r = UP5(CC_SEL(cc, 25));
1486       } else {
1487          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1488          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1489          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1490       }
1491       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1492       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1493       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1494       rgba[ACOMP] = CHAN_MAX;
1495    }
1496 }
1497
1498
1499 static void
1500 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1501 {
1502    const GLuint *cc;
1503    GLuint kk;
1504
1505    cc = (const GLuint *)code;
1506    if (t & 16) {
1507       cc++;
1508       t &= 15;
1509    }
1510    t = (cc[0] >> (t * 2)) & 3;
1511
1512    t *= 15;
1513    cc = (const GLuint *)(code + 8 + t / 8);
1514    kk = cc[0] >> (t & 7);
1515    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1516    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1517    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1518    rgba[ACOMP] = CHAN_MAX;
1519 }
1520
1521
1522 static void
1523 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1524 {
1525    const GLuint *cc;
1526    GLuint col[2][3];
1527    GLint glsb, selb;
1528
1529    cc = (const GLuint *)code;
1530    if (t & 16) {
1531       t &= 15;
1532       t = (cc[1] >> (t * 2)) & 3;
1533       /* col 2 */
1534       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1535       col[0][GCOMP] = CC_SEL(cc, 99);
1536       col[0][RCOMP] = CC_SEL(cc, 104);
1537       /* col 3 */
1538       col[1][BCOMP] = CC_SEL(cc, 109);
1539       col[1][GCOMP] = CC_SEL(cc, 114);
1540       col[1][RCOMP] = CC_SEL(cc, 119);
1541       glsb = CC_SEL(cc, 126);
1542       selb = CC_SEL(cc, 33);
1543    } else {
1544       t = (cc[0] >> (t * 2)) & 3;
1545       /* col 0 */
1546       col[0][BCOMP] = CC_SEL(cc, 64);
1547       col[0][GCOMP] = CC_SEL(cc, 69);
1548       col[0][RCOMP] = CC_SEL(cc, 74);
1549       /* col 1 */
1550       col[1][BCOMP] = CC_SEL(cc, 79);
1551       col[1][GCOMP] = CC_SEL(cc, 84);
1552       col[1][RCOMP] = CC_SEL(cc, 89);
1553       glsb = CC_SEL(cc, 125);
1554       selb = CC_SEL(cc, 1);
1555    }
1556
1557    if (CC_SEL(cc, 124) & 1) {
1558       /* alpha[0] == 1 */
1559
1560       if (t == 3) {
1561          /* zero */
1562          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1563       } else {
1564          GLubyte r, g, b;
1565          if (t == 0) {
1566             b = UP5(col[0][BCOMP]);
1567             g = UP5(col[0][GCOMP]);
1568             r = UP5(col[0][RCOMP]);
1569          } else if (t == 2) {
1570             b = UP5(col[1][BCOMP]);
1571             g = UP6(col[1][GCOMP], glsb);
1572             r = UP5(col[1][RCOMP]);
1573          } else {
1574             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1575             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1576             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1577          }
1578          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1579          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1580          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1581          rgba[ACOMP] = CHAN_MAX;
1582       }
1583    } else {
1584       /* alpha[0] == 0 */
1585       GLubyte r, g, b;
1586       if (t == 0) {
1587          b = UP5(col[0][BCOMP]);
1588          g = UP6(col[0][GCOMP], glsb ^ selb);
1589          r = UP5(col[0][RCOMP]);
1590       } else if (t == 3) {
1591          b = UP5(col[1][BCOMP]);
1592          g = UP6(col[1][GCOMP], glsb);
1593          r = UP5(col[1][RCOMP]);
1594       } else {
1595          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1596          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1597                         UP6(col[1][GCOMP], glsb));
1598          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1599       }
1600       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1601       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1602       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1603       rgba[ACOMP] = CHAN_MAX;
1604    }
1605 }
1606
1607
1608 static void
1609 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1610 {
1611    const GLuint *cc;
1612    GLubyte r, g, b, a;
1613
1614    cc = (const GLuint *)code;
1615    if (CC_SEL(cc, 124) & 1) {
1616       /* lerp == 1 */
1617       GLuint col0[4];
1618
1619       if (t & 16) {
1620          t &= 15;
1621          t = (cc[1] >> (t * 2)) & 3;
1622          /* col 2 */
1623          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1624          col0[GCOMP] = CC_SEL(cc, 99);
1625          col0[RCOMP] = CC_SEL(cc, 104);
1626          col0[ACOMP] = CC_SEL(cc, 119);
1627       } else {
1628          t = (cc[0] >> (t * 2)) & 3;
1629          /* col 0 */
1630          col0[BCOMP] = CC_SEL(cc, 64);
1631          col0[GCOMP] = CC_SEL(cc, 69);
1632          col0[RCOMP] = CC_SEL(cc, 74);
1633          col0[ACOMP] = CC_SEL(cc, 109);
1634       }
1635
1636       if (t == 0) {
1637          b = UP5(col0[BCOMP]);
1638          g = UP5(col0[GCOMP]);
1639          r = UP5(col0[RCOMP]);
1640          a = UP5(col0[ACOMP]);
1641       } else if (t == 3) {
1642          b = UP5(CC_SEL(cc, 79));
1643          g = UP5(CC_SEL(cc, 84));
1644          r = UP5(CC_SEL(cc, 89));
1645          a = UP5(CC_SEL(cc, 114));
1646       } else {
1647          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1648          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1649          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1650          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1651       }
1652    } else {
1653       /* lerp == 0 */
1654
1655       if (t & 16) {
1656          cc++;
1657          t &= 15;
1658       }
1659       t = (cc[0] >> (t * 2)) & 3;
1660
1661       if (t == 3) {
1662          /* zero */
1663          r = g = b = a = 0;
1664       } else {
1665          GLuint kk;
1666          cc = (const GLuint *)code;
1667          a = UP5(cc[3] >> (t * 5 + 13));
1668          t *= 15;
1669          cc = (const GLuint *)(code + 8 + t / 8);
1670          kk = cc[0] >> (t & 7);
1671          b = UP5(kk);
1672          g = UP5(kk >> 5);
1673          r = UP5(kk >> 10);
1674       }
1675    }
1676    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1677    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1678    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1679    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1680 }
1681
1682
1683 void
1684 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1685                GLint i, GLint j, GLchan *rgba)
1686 {
1687    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1688       fxt1_decode_1HI,     /* cc-high   = "00?" */
1689       fxt1_decode_1HI,     /* cc-high   = "00?" */
1690       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1691       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1692       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1693       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1694       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1695       fxt1_decode_1MIXED   /* mixed     = "1??" */
1696    };
1697
1698    const GLubyte *code = (const GLubyte *)texture +
1699                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1700    GLint mode = CC_SEL(code, 125);
1701    GLint t = i & 7;
1702
1703    if (t & 4) {
1704       t += 12;
1705    }
1706    t += (j & 3) * 4;
1707
1708    decode_1[mode](code, t, rgba);
1709 }