src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.5
   4  *
   5  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texformat.h"
  41 #include "texstore.h"
  42
  43
  44 static void
  45 fxt1_encode (GLuint width, GLuint height, GLint comps,
  46              const void *source, GLint srcRowStride,
  47              void *dest, GLint destRowStride);
  48
  49 void
  50 fxt1_decode_1 (const void *texture, GLint stride,
  51                GLint i, GLint j, GLchan *rgba);
  52
  53
  54 /**
  55  * Called during context initialization.
  56  */
  57 void
  58 _mesa_init_texture_fxt1( GLcontext *ctx )
  59 {
  60    (void) ctx;
  61 }
  62
  63
  64 /**
  65  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  66  */
  67 static GLboolean
  68 texstore_rgb_fxt1(TEXSTORE_PARAMS)
  69 {
  70    const GLchan *pixels;
  71    GLint srcRowStride;
  72    GLubyte *dst;
  73    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  74    const GLchan *tempImage = NULL;
  75
  76    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  77    ASSERT(dstXoffset % 8 == 0);
  78    ASSERT(dstYoffset % 4 == 0);
  79    ASSERT(dstZoffset     == 0);
  80    (void) dstZoffset;
  81    (void) dstImageOffsets;
  82
  83    if (srcFormat != GL_RGB ||
  84        srcType != CHAN_TYPE ||
  85        ctx->_ImageTransferState ||
  86        srcPacking->SwapBytes) {
  87       /* convert image to RGB/GLchan */
  88       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  89                                              baseInternalFormat,
  90                                              dstFormat->BaseFormat,
  91                                              srcWidth, srcHeight, srcDepth,
  92                                              srcFormat, srcType, srcAddr,
  93                                              srcPacking);
  94       if (!tempImage)
  95          return GL_FALSE; /* out of memory */
  96       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  97       pixels = tempImage;
  98       srcRowStride = 3 * srcWidth;
  99       srcFormat = GL_RGB;
 100    }
 101    else {
 102       pixels = (const GLchan *) srcAddr;
 103       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 104                                             srcType) / sizeof(GLchan);
 105    }
 106
 107    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 108                                         GL_COMPRESSED_RGB_FXT1_3DFX,
 109                                         texWidth, (GLubyte *) dstAddr);
 110
 111    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 112                dst, dstRowStride);
 113
 114    if (tempImage)
 115       _mesa_free((void*) tempImage);
 116
 117    return GL_TRUE;
 118 }
 119
 120
 121 /**
 122  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 123  */
 124 static GLboolean
 125 texstore_rgba_fxt1(TEXSTORE_PARAMS)
 126 {
 127    const GLchan *pixels;
 128    GLint srcRowStride;
 129    GLubyte *dst;
 130    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 131    const GLchan *tempImage = NULL;
 132
 133    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 134    ASSERT(dstXoffset % 8 == 0);
 135    ASSERT(dstYoffset % 4 == 0);
 136    ASSERT(dstZoffset     == 0);
 137    (void) dstZoffset;
 138    (void) dstImageOffsets;
 139
 140    if (srcFormat != GL_RGBA ||
 141        srcType != CHAN_TYPE ||
 142        ctx->_ImageTransferState ||
 143        srcPacking->SwapBytes) {
 144       /* convert image to RGBA/GLchan */
 145       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 146                                              baseInternalFormat,
 147                                              dstFormat->BaseFormat,
 148                                              srcWidth, srcHeight, srcDepth,
 149                                              srcFormat, srcType, srcAddr,
 150                                              srcPacking);
 151       if (!tempImage)
 152          return GL_FALSE; /* out of memory */
 153       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 154       pixels = tempImage;
 155       srcRowStride = 4 * srcWidth;
 156       srcFormat = GL_RGBA;
 157    }
 158    else {
 159       pixels = (const GLchan *) srcAddr;
 160       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 161                                             srcType) / sizeof(GLchan);
 162    }
 163
 164    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 165                                         GL_COMPRESSED_RGBA_FXT1_3DFX,
 166                                         texWidth, (GLubyte *) dstAddr);
 167
 168    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 169                dst, dstRowStride);
 170
 171    if (tempImage)
 172       _mesa_free((void*) tempImage);
 173
 174    return GL_TRUE;
 175 }
 176
 177
 178 static void
 179 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 180                           GLint i, GLint j, GLint k, GLchan *texel )
 181 {
 182    (void) k;
 183    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 184 }
 185
 186
 187 static void
 188 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 189                             GLint i, GLint j, GLint k, GLfloat *texel )
 190 {
 191    /* just sample as GLchan and convert to float here */
 192    GLchan rgba[4];
 193    (void) k;
 194    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 195    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 196    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 197    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 198    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 199 }
 200
 201
 202 static void
 203 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 204                          GLint i, GLint j, GLint k, GLchan *texel )
 205 {
 206    (void) k;
 207    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 208    texel[ACOMP] = 255;
 209 }
 210
 211
 212 static void
 213 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 214                            GLint i, GLint j, GLint k, GLfloat *texel )
 215 {
 216    /* just sample as GLchan and convert to float here */
 217    GLchan rgba[4];
 218    (void) k;
 219    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 220    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 221    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 222    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 223    texel[ACOMP] = 1.0F;
 224 }
 225
 226
 227
 228 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 229    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 230    GL_RGB,                              /* BaseFormat */
 231    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 232    4, /*approx*/                        /* RedBits */
 233    4, /*approx*/                        /* GreenBits */
 234    4, /*approx*/                        /* BlueBits */
 235    0,                                   /* AlphaBits */
 236    0,                                   /* LuminanceBits */
 237    0,                                   /* IntensityBits */
 238    0,                                   /* IndexBits */
 239    0,                                   /* DepthBits */
 240    0,                                   /* StencilBits */
 241    0,                                   /* TexelBytes */
 242    texstore_rgb_fxt1,                   /* StoreTexImageFunc */
 243    NULL, /*impossible*/                 /* FetchTexel1D */
 244    fetch_texel_2d_rgb_fxt1,             /* FetchTexel2D */
 245    NULL, /*impossible*/                 /* FetchTexel3D */
 246    NULL, /*impossible*/                 /* FetchTexel1Df */
 247    fetch_texel_2d_f_rgb_fxt1,           /* FetchTexel2Df */
 248    NULL, /*impossible*/                 /* FetchTexel3Df */
 249    NULL                                 /* StoreTexel */
 250 };
 251
 252 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 253    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 254    GL_RGBA,                             /* BaseFormat */
 255    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 256    4, /*approx*/                        /* RedBits */
 257    4, /*approx*/                        /* GreenBits */
 258    4, /*approx*/                        /* BlueBits */
 259    1, /*approx*/                        /* AlphaBits */
 260    0,                                   /* LuminanceBits */
 261    0,                                   /* IntensityBits */
 262    0,                                   /* IndexBits */
 263    0,                                   /* DepthBits */
 264    0,                                   /* StencilBits */
 265    0,                                   /* TexelBytes */
 266    texstore_rgba_fxt1,                  /* StoreTexImageFunc */
 267    NULL, /*impossible*/                 /* FetchTexel1D */
 268    fetch_texel_2d_rgba_fxt1,            /* FetchTexel2D */
 269    NULL, /*impossible*/                 /* FetchTexel3D */
 270    NULL, /*impossible*/                 /* FetchTexel1Df */
 271    fetch_texel_2d_f_rgba_fxt1,          /* FetchTexel2Df */
 272    NULL, /*impossible*/                 /* FetchTexel3Df */
 273    NULL                                 /* StoreTexel */
 274 };
 275
 276
 277 /***************************************************************************\
 278  * FXT1 encoder
 279  *
 280  * The encoder was built by reversing the decoder,
 281  * and is vaguely based on Texus2 by 3dfx. Note that this code
 282  * is merely a proof of concept, since it is highly UNoptimized;
 283  * moreover, it is sub-optimal due to initial conditions passed
 284  * to Lloyd's algorithm (the interpolation modes are even worse).
 285 \***************************************************************************/
 286
 287
 288 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 289 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 290 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 291 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 292 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 293 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 294 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 295 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 296
 297
 298 /*
 299  * Define a 64-bit unsigned integer type and macros
 300  */
 301 #if defined(__GNUC__) && !defined(__cplusplus)
 302
 303 #define FX64_NATIVE 1
 304
 305 typedef unsigned long long Fx64;
 306
 307 #define FX64_MOV32(a, b) a = b
 308 #define FX64_OR32(a, b)  a |= b
 309 #define FX64_SHL(a, c)   a <<= c
 310
 311 #else  /* !__GNUC__ */
 312
 313 #define FX64_NATIVE 0
 314
 315 typedef struct {
 316    GLuint lo, hi;
 317 } Fx64;
 318
 319 #define FX64_MOV32(a, b) a.lo = b
 320 #define FX64_OR32(a, b)  a.lo |= b
 321
 322 #define FX64_SHL(a, c)                                 \
 323    do {                                                \
 324        if ((c) >= 32) {                                \
 325           a.hi = a.lo << ((c) - 32);                   \
 326           a.lo = 0;                                    \
 327        } else {                                        \
 328           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 329           a.lo <<= (c);                                \
 330        }                                               \
 331    } while (0)
 332
 333 #endif /* !__GNUC__ */
 334
 335
 336 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 337 #define SAFECDOT 1 /* for paranoids */
 338
 339 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 340    do {                                  \
 341       /* compute interpolation vector */ \
 342       GLfloat d2 = 0.0F;                 \
 343       GLfloat rd2;                       \
 344                                          \
 345       for (i = 0; i < NC; i++) {         \
 346          IV[i] = (V1[i] - V0[i]) * F(i); \
 347          d2 += IV[i] * IV[i];            \
 348       }                                  \
 349       rd2 = (GLfloat)NV / d2;            \
 350       B = 0;                             \
 351       for (i = 0; i < NC; i++) {         \
 352          IV[i] *= F(i);                  \
 353          B -= IV[i] * V0[i];             \
 354          IV[i] *= rd2;                   \
 355       }                                  \
 356       B = B * rd2 + 0.5f;                \
 357    } while (0)
 358
 359 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 360    do {                                  \
 361       GLfloat dot = 0.0F;                \
 362       for (i = 0; i < NC; i++) {         \
 363          dot += V[i] * IV[i];            \
 364       }                                  \
 365       TEXEL = (GLint)(dot + B);          \
 366       if (SAFECDOT) {                    \
 367          if (TEXEL < 0) {                \
 368             TEXEL = 0;                   \
 369          } else if (TEXEL > NV) {        \
 370             TEXEL = NV;                  \
 371          }                               \
 372       }                                  \
 373    } while (0)
 374
 375
 376 static GLint
 377 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 378               GLubyte input[MAX_COMP], GLint nc)
 379 {
 380    GLint i, j, best = -1;
 381    GLfloat err = 1e9; /* big enough */
 382
 383    for (j = 0; j < nv; j++) {
 384       GLfloat e = 0.0F;
 385       for (i = 0; i < nc; i++) {
 386          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 387       }
 388       if (e < err) {
 389          err = e;
 390          best = j;
 391       }
 392    }
 393
 394    return best;
 395 }
 396
 397
 398 static GLint
 399 fxt1_worst (GLfloat vec[MAX_COMP],
 400             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 401 {
 402    GLint i, k, worst = -1;
 403    GLfloat err = -1.0F; /* small enough */
 404
 405    for (k = 0; k < n; k++) {
 406       GLfloat e = 0.0F;
 407       for (i = 0; i < nc; i++) {
 408          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 409       }
 410       if (e > err) {
 411          err = e;
 412          worst = k;
 413       }
 414    }
 415
 416    return worst;
 417 }
 418
 419
 420 static GLint
 421 fxt1_variance (GLdouble variance[MAX_COMP],
 422                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 423 {
 424    GLint i, k, best = 0;
 425    GLint sx, sx2;
 426    GLdouble var, maxvar = -1; /* small enough */
 427    GLdouble teenth = 1.0 / n;
 428
 429    for (i = 0; i < nc; i++) {
 430       sx = sx2 = 0;
 431       for (k = 0; k < n; k++) {
 432          GLint t = input[k][i];
 433          sx += t;
 434          sx2 += t * t;
 435       }
 436       var = sx2 * teenth - sx * sx * teenth * teenth;
 437       if (maxvar < var) {
 438          maxvar = var;
 439          best = i;
 440       }
 441       if (variance) {
 442          variance[i] = var;
 443       }
 444    }
 445
 446    return best;
 447 }
 448
 449
 450 static GLint
 451 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 452              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 453 {
 454 #if 0
 455    /* Choose colors from a grid.
 456     */
 457    GLint i, j;
 458
 459    for (j = 0; j < nv; j++) {
 460       GLint m = j * (n - 1) / (nv - 1);
 461       for (i = 0; i < nc; i++) {
 462          vec[j][i] = input[m][i];
 463       }
 464    }
 465 #else
 466    /* Our solution here is to find the darkest and brightest colors in
 467     * the 8x4 tile and use those as the two representative colors.
 468     * There are probably better algorithms to use (histogram-based).
 469     */
 470    GLint i, j, k;
 471    GLint minSum = 2000; /* big enough */
 472    GLint maxSum = -1; /* small enough */
 473    GLint minCol = 0; /* phoudoin: silent compiler! */
 474    GLint maxCol = 0; /* phoudoin: silent compiler! */
 475
 476    struct {
 477       GLint flag;
 478       GLint key;
 479       GLint freq;
 480       GLint idx;
 481    } hist[N_TEXELS];
 482    GLint lenh = 0;
 483
 484    _mesa_memset(hist, 0, sizeof(hist));
 485
 486    for (k = 0; k < n; k++) {
 487       GLint l;
 488       GLint key = 0;
 489       GLint sum = 0;
 490       for (i = 0; i < nc; i++) {
 491          key <<= 8;
 492          key |= input[k][i];
 493          sum += input[k][i];
 494       }
 495       for (l = 0; l < n; l++) {
 496          if (!hist[l].flag) {
 497             /* alloc new slot */
 498             hist[l].flag = !0;
 499             hist[l].key = key;
 500             hist[l].freq = 1;
 501             hist[l].idx = k;
 502             lenh = l + 1;
 503             break;
 504          } else if (hist[l].key == key) {
 505             hist[l].freq++;
 506             break;
 507          }
 508       }
 509       if (minSum > sum) {
 510          minSum = sum;
 511          minCol = k;
 512       }
 513       if (maxSum < sum) {
 514          maxSum = sum;
 515          maxCol = k;
 516       }
 517    }
 518
 519    if (lenh <= nv) {
 520       for (j = 0; j < lenh; j++) {
 521          for (i = 0; i < nc; i++) {
 522             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 523          }
 524       }
 525       for (; j < nv; j++) {
 526          for (i = 0; i < nc; i++) {
 527             vec[j][i] = vec[0][i];
 528          }
 529       }
 530       return 0;
 531    }
 532
 533    for (j = 0; j < nv; j++) {
 534       for (i = 0; i < nc; i++) {
 535          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 536       }
 537    }
 538 #endif
 539
 540    return !0;
 541 }
 542
 543
 544 static GLint
 545 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 546             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 547 {
 548    /* Use the generalized lloyd's algorithm for VQ:
 549     *     find 4 color vectors.
 550     *
 551     *     for each sample color
 552     *         sort to nearest vector.
 553     *
 554     *     replace each vector with the centroid of it's matching colors.
 555     *
 556     *     repeat until RMS doesn't improve.
 557     *
 558     *     if a color vector has no samples, or becomes the same as another
 559     *     vector, replace it with the color which is farthest from a sample.
 560     *
 561     * vec[][MAX_COMP]           initial vectors and resulting colors
 562     * nv                        number of resulting colors required
 563     * input[N_TEXELS][MAX_COMP] input texels
 564     * nc                        number of components in input / vec
 565     * n                         number of input samples
 566     */
 567
 568    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 569    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 570    GLfloat error, lasterror = 1e9;
 571
 572    GLint i, j, k, rep;
 573
 574    /* the quantizer */
 575    for (rep = 0; rep < LL_N_REP; rep++) {
 576       /* reset sums & counters */
 577       for (j = 0; j < nv; j++) {
 578          for (i = 0; i < nc; i++) {
 579             sum[j][i] = 0;
 580          }
 581          cnt[j] = 0;
 582       }
 583       error = 0;
 584
 585       /* scan whole block */
 586       for (k = 0; k < n; k++) {
 587 #if 1
 588          GLint best = -1;
 589          GLfloat err = 1e9; /* big enough */
 590          /* determine best vector */
 591          for (j = 0; j < nv; j++) {
 592             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 593                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 594                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 595             if (nc == 4) {
 596                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 597             }
 598             if (e < err) {
 599                err = e;
 600                best = j;
 601             }
 602          }
 603 #else
 604          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 605 #endif
 606          /* add in closest color */
 607          for (i = 0; i < nc; i++) {
 608             sum[best][i] += input[k][i];
 609          }
 610          /* mark this vector as used */
 611          cnt[best]++;
 612          /* accumulate error */
 613          error += err;
 614       }
 615
 616       /* check RMS */
 617       if ((error < LL_RMS_E) ||
 618           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 619          return !0; /* good match */
 620       }
 621       lasterror = error;
 622
 623       /* move each vector to the barycenter of its closest colors */
 624       for (j = 0; j < nv; j++) {
 625          if (cnt[j]) {
 626             GLfloat div = 1.0F / cnt[j];
 627             for (i = 0; i < nc; i++) {
 628                vec[j][i] = div * sum[j][i];
 629             }
 630          } else {
 631             /* this vec has no samples or is identical with a previous vec */
 632             GLint worst = fxt1_worst(vec[j], input, nc, n);
 633             for (i = 0; i < nc; i++) {
 634                vec[j][i] = input[worst][i];
 635             }
 636          }
 637       }
 638    }
 639
 640    return 0; /* could not converge fast enough */
 641 }
 642
 643
 644 static void
 645 fxt1_quantize_CHROMA (GLuint *cc,
 646                       GLubyte input[N_TEXELS][MAX_COMP])
 647 {
 648    const GLint n_vect = 4; /* 4 base vectors to find */
 649    const GLint n_comp = 3; /* 3 components: R, G, B */
 650    GLfloat vec[MAX_VECT][MAX_COMP];
 651    GLint i, j, k;
 652    Fx64 hi; /* high quadword */
 653    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 654
 655    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 656       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 657    }
 658
 659    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 660    for (j = n_vect - 1; j >= 0; j--) {
 661       for (i = 0; i < n_comp; i++) {
 662          /* add in colors */
 663          FX64_SHL(hi, 5);
 664          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 665       }
 666    }
 667    ((Fx64 *)cc)[1] = hi;
 668
 669    lohi = lolo = 0;
 670    /* right microtile */
 671    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 672       lohi <<= 2;
 673       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 674    }
 675    /* left microtile */
 676    for (; k >= 0; k--) {
 677       lolo <<= 2;
 678       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 679    }
 680    cc[1] = lohi;
 681    cc[0] = lolo;
 682 }
 683
 684
 685 static void
 686 fxt1_quantize_ALPHA0 (GLuint *cc,
 687                       GLubyte input[N_TEXELS][MAX_COMP],
 688                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 689 {
 690    const GLint n_vect = 3; /* 3 base vectors to find */
 691    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 692    GLfloat vec[MAX_VECT][MAX_COMP];
 693    GLint i, j, k;
 694    Fx64 hi; /* high quadword */
 695    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 696
 697    /* the last vector indicates zero */
 698    for (i = 0; i < n_comp; i++) {
 699       vec[n_vect][i] = 0;
 700    }
 701
 702    /* the first n texels in reord are guaranteed to be non-zero */
 703    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 704       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 705    }
 706
 707    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 708    for (j = n_vect - 1; j >= 0; j--) {
 709       /* add in alphas */
 710       FX64_SHL(hi, 5);
 711       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 712    }
 713    for (j = n_vect - 1; j >= 0; j--) {
 714       for (i = 0; i < n_comp - 1; i++) {
 715          /* add in colors */
 716          FX64_SHL(hi, 5);
 717          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 718       }
 719    }
 720    ((Fx64 *)cc)[1] = hi;
 721
 722    lohi = lolo = 0;
 723    /* right microtile */
 724    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 725       lohi <<= 2;
 726       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 727    }
 728    /* left microtile */
 729    for (; k >= 0; k--) {
 730       lolo <<= 2;
 731       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 732    }
 733    cc[1] = lohi;
 734    cc[0] = lolo;
 735 }
 736
 737
 738 static void
 739 fxt1_quantize_ALPHA1 (GLuint *cc,
 740                       GLubyte input[N_TEXELS][MAX_COMP])
 741 {
 742    const GLint n_vect = 3; /* highest vector number in each microtile */
 743    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 744    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 745    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 746    GLint i, j, k;
 747    Fx64 hi; /* high quadword */
 748    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 749
 750    GLint minSum;
 751    GLint maxSum;
 752    GLint minColL = 0, maxColL = 0;
 753    GLint minColR = 0, maxColR = 0;
 754    GLint sumL = 0, sumR = 0;
 755
 756    /* Our solution here is to find the darkest and brightest colors in
 757     * the 4x4 tile and use those as the two representative colors.
 758     * There are probably better algorithms to use (histogram-based).
 759     */
 760    minSum = 2000; /* big enough */
 761    maxSum = -1; /* small enough */
 762    for (k = 0; k < N_TEXELS / 2; k++) {
 763       GLint sum = 0;
 764       for (i = 0; i < n_comp; i++) {
 765          sum += input[k][i];
 766       }
 767       if (minSum > sum) {
 768          minSum = sum;
 769          minColL = k;
 770       }
 771       if (maxSum < sum) {
 772          maxSum = sum;
 773          maxColL = k;
 774       }
 775       sumL += sum;
 776    }
 777    minSum = 2000; /* big enough */
 778    maxSum = -1; /* small enough */
 779    for (; k < N_TEXELS; k++) {
 780       GLint sum = 0;
 781       for (i = 0; i < n_comp; i++) {
 782          sum += input[k][i];
 783       }
 784       if (minSum > sum) {
 785          minSum = sum;
 786          minColR = k;
 787       }
 788       if (maxSum < sum) {
 789          maxSum = sum;
 790          maxColR = k;
 791       }
 792       sumR += sum;
 793    }
 794
 795    /* choose the common vector (yuck!) */
 796    {
 797       GLint j1, j2;
 798       GLint v1 = 0, v2 = 0;
 799       GLfloat err = 1e9; /* big enough */
 800       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 801       for (i = 0; i < n_comp; i++) {
 802          tv[0][i] = input[minColL][i];
 803          tv[1][i] = input[maxColL][i];
 804          tv[2][i] = input[minColR][i];
 805          tv[3][i] = input[maxColR][i];
 806       }
 807       for (j1 = 0; j1 < 2; j1++) {
 808          for (j2 = 2; j2 < 4; j2++) {
 809             GLfloat e = 0.0F;
 810             for (i = 0; i < n_comp; i++) {
 811                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 812             }
 813             if (e < err) {
 814                err = e;
 815                v1 = j1;
 816                v2 = j2;
 817             }
 818          }
 819       }
 820       for (i = 0; i < n_comp; i++) {
 821          vec[0][i] = tv[1 - v1][i];
 822          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 823          vec[2][i] = tv[5 - v2][i];
 824       }
 825    }
 826
 827    /* left microtile */
 828    cc[0] = 0;
 829    if (minColL != maxColL) {
 830       /* compute interpolation vector */
 831       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 832
 833       /* add in texels */
 834       lolo = 0;
 835       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 836          GLint texel;
 837          /* interpolate color */
 838          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 839          /* add in texel */
 840          lolo <<= 2;
 841          lolo |= texel;
 842       }
 843
 844       cc[0] = lolo;
 845    }
 846
 847    /* right microtile */
 848    cc[1] = 0;
 849    if (minColR != maxColR) {
 850       /* compute interpolation vector */
 851       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 852
 853       /* add in texels */
 854       lohi = 0;
 855       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 856          GLint texel;
 857          /* interpolate color */
 858          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 859          /* add in texel */
 860          lohi <<= 2;
 861          lohi |= texel;
 862       }
 863
 864       cc[1] = lohi;
 865    }
 866
 867    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 868    for (j = n_vect - 1; j >= 0; j--) {
 869       /* add in alphas */
 870       FX64_SHL(hi, 5);
 871       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 872    }
 873    for (j = n_vect - 1; j >= 0; j--) {
 874       for (i = 0; i < n_comp - 1; i++) {
 875          /* add in colors */
 876          FX64_SHL(hi, 5);
 877          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 878       }
 879    }
 880    ((Fx64 *)cc)[1] = hi;
 881 }
 882
 883
 884 static void
 885 fxt1_quantize_HI (GLuint *cc,
 886                   GLubyte input[N_TEXELS][MAX_COMP],
 887                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 888 {
 889    const GLint n_vect = 6; /* highest vector number */
 890    const GLint n_comp = 3; /* 3 components: R, G, B */
 891    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 892    GLfloat iv[MAX_COMP];   /* interpolation vector */
 893    GLint i, k;
 894    GLuint hihi; /* high quadword: hi dword */
 895
 896    GLint minSum = 2000; /* big enough */
 897    GLint maxSum = -1; /* small enough */
 898    GLint minCol = 0; /* phoudoin: silent compiler! */
 899    GLint maxCol = 0; /* phoudoin: silent compiler! */
 900
 901    /* Our solution here is to find the darkest and brightest colors in
 902     * the 8x4 tile and use those as the two representative colors.
 903     * There are probably better algorithms to use (histogram-based).
 904     */
 905    for (k = 0; k < n; k++) {
 906       GLint sum = 0;
 907       for (i = 0; i < n_comp; i++) {
 908          sum += reord[k][i];
 909       }
 910       if (minSum > sum) {
 911          minSum = sum;
 912          minCol = k;
 913       }
 914       if (maxSum < sum) {
 915          maxSum = sum;
 916          maxCol = k;
 917       }
 918    }
 919
 920    hihi = 0; /* cc-hi = "00" */
 921    for (i = 0; i < n_comp; i++) {
 922       /* add in colors */
 923       hihi <<= 5;
 924       hihi |= reord[maxCol][i] >> 3;
 925    }
 926    for (i = 0; i < n_comp; i++) {
 927       /* add in colors */
 928       hihi <<= 5;
 929       hihi |= reord[minCol][i] >> 3;
 930    }
 931    cc[3] = hihi;
 932    cc[0] = cc[1] = cc[2] = 0;
 933
 934    /* compute interpolation vector */
 935    if (minCol != maxCol) {
 936       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 937    }
 938
 939    /* add in texels */
 940    for (k = N_TEXELS - 1; k >= 0; k--) {
 941       GLint t = k * 3;
 942       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 943       GLint texel = n_vect + 1; /* transparent black */
 944
 945       if (!ISTBLACK(input[k])) {
 946          if (minCol != maxCol) {
 947             /* interpolate color */
 948             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 949             /* add in texel */
 950             kk[0] |= texel << (t & 7);
 951          }
 952       } else {
 953          /* add in texel */
 954          kk[0] |= texel << (t & 7);
 955       }
 956    }
 957 }
 958
 959
 960 static void
 961 fxt1_quantize_MIXED1 (GLuint *cc,
 962                       GLubyte input[N_TEXELS][MAX_COMP])
 963 {
 964    const GLint n_vect = 2; /* highest vector number in each microtile */
 965    const GLint n_comp = 3; /* 3 components: R, G, B */
 966    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 967    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 968    GLint i, j, k;
 969    Fx64 hi; /* high quadword */
 970    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 971
 972    GLint minSum;
 973    GLint maxSum;
 974    GLint minColL = 0, maxColL = -1;
 975    GLint minColR = 0, maxColR = -1;
 976
 977    /* Our solution here is to find the darkest and brightest colors in
 978     * the 4x4 tile and use those as the two representative colors.
 979     * There are probably better algorithms to use (histogram-based).
 980     */
 981    minSum = 2000; /* big enough */
 982    maxSum = -1; /* small enough */
 983    for (k = 0; k < N_TEXELS / 2; k++) {
 984       if (!ISTBLACK(input[k])) {
 985          GLint sum = 0;
 986          for (i = 0; i < n_comp; i++) {
 987             sum += input[k][i];
 988          }
 989          if (minSum > sum) {
 990             minSum = sum;
 991             minColL = k;
 992          }
 993          if (maxSum < sum) {
 994             maxSum = sum;
 995             maxColL = k;
 996          }
 997       }
 998    }
 999    minSum = 2000; /* big enough */
1000    maxSum = -1; /* small enough */
1001    for (; k < N_TEXELS; k++) {
1002       if (!ISTBLACK(input[k])) {
1003          GLint sum = 0;
1004          for (i = 0; i < n_comp; i++) {
1005             sum += input[k][i];
1006          }
1007          if (minSum > sum) {
1008             minSum = sum;
1009             minColR = k;
1010          }
1011          if (maxSum < sum) {
1012             maxSum = sum;
1013             maxColR = k;
1014          }
1015       }
1016    }
1017
1018    /* left microtile */
1019    if (maxColL == -1) {
1020       /* all transparent black */
1021       cc[0] = ~0u;
1022       for (i = 0; i < n_comp; i++) {
1023          vec[0][i] = 0;
1024          vec[1][i] = 0;
1025       }
1026    } else {
1027       cc[0] = 0;
1028       for (i = 0; i < n_comp; i++) {
1029          vec[0][i] = input[minColL][i];
1030          vec[1][i] = input[maxColL][i];
1031       }
1032       if (minColL != maxColL) {
1033          /* compute interpolation vector */
1034          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1035
1036          /* add in texels */
1037          lolo = 0;
1038          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1039             GLint texel = n_vect + 1; /* transparent black */
1040             if (!ISTBLACK(input[k])) {
1041                /* interpolate color */
1042                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1043             }
1044             /* add in texel */
1045             lolo <<= 2;
1046             lolo |= texel;
1047          }
1048          cc[0] = lolo;
1049       }
1050    }
1051
1052    /* right microtile */
1053    if (maxColR == -1) {
1054       /* all transparent black */
1055       cc[1] = ~0u;
1056       for (i = 0; i < n_comp; i++) {
1057          vec[2][i] = 0;
1058          vec[3][i] = 0;
1059       }
1060    } else {
1061       cc[1] = 0;
1062       for (i = 0; i < n_comp; i++) {
1063          vec[2][i] = input[minColR][i];
1064          vec[3][i] = input[maxColR][i];
1065       }
1066       if (minColR != maxColR) {
1067          /* compute interpolation vector */
1068          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1069
1070          /* add in texels */
1071          lohi = 0;
1072          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1073             GLint texel = n_vect + 1; /* transparent black */
1074             if (!ISTBLACK(input[k])) {
1075                /* interpolate color */
1076                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1077             }
1078             /* add in texel */
1079             lohi <<= 2;
1080             lohi |= texel;
1081          }
1082          cc[1] = lohi;
1083       }
1084    }
1085
1086    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1087    for (j = 2 * 2 - 1; j >= 0; j--) {
1088       for (i = 0; i < n_comp; i++) {
1089          /* add in colors */
1090          FX64_SHL(hi, 5);
1091          FX64_OR32(hi, vec[j][i] >> 3);
1092       }
1093    }
1094    ((Fx64 *)cc)[1] = hi;
1095 }
1096
1097
1098 static void
1099 fxt1_quantize_MIXED0 (GLuint *cc,
1100                       GLubyte input[N_TEXELS][MAX_COMP])
1101 {
1102    const GLint n_vect = 3; /* highest vector number in each microtile */
1103    const GLint n_comp = 3; /* 3 components: R, G, B */
1104    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1105    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1106    GLint i, j, k;
1107    Fx64 hi; /* high quadword */
1108    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1109
1110    GLint minColL = 0, maxColL = 0;
1111    GLint minColR = 0, maxColR = 0;
1112 #if 0
1113    GLint minSum;
1114    GLint maxSum;
1115
1116    /* Our solution here is to find the darkest and brightest colors in
1117     * the 4x4 tile and use those as the two representative colors.
1118     * There are probably better algorithms to use (histogram-based).
1119     */
1120    minSum = 2000; /* big enough */
1121    maxSum = -1; /* small enough */
1122    for (k = 0; k < N_TEXELS / 2; k++) {
1123       GLint sum = 0;
1124       for (i = 0; i < n_comp; i++) {
1125          sum += input[k][i];
1126       }
1127       if (minSum > sum) {
1128          minSum = sum;
1129          minColL = k;
1130       }
1131       if (maxSum < sum) {
1132          maxSum = sum;
1133          maxColL = k;
1134       }
1135    }
1136    minSum = 2000; /* big enough */
1137    maxSum = -1; /* small enough */
1138    for (; k < N_TEXELS; k++) {
1139       GLint sum = 0;
1140       for (i = 0; i < n_comp; i++) {
1141          sum += input[k][i];
1142       }
1143       if (minSum > sum) {
1144          minSum = sum;
1145          minColR = k;
1146       }
1147       if (maxSum < sum) {
1148          maxSum = sum;
1149          maxColR = k;
1150       }
1151    }
1152 #else
1153    GLint minVal;
1154    GLint maxVal;
1155    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1156    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1157
1158    /* Scan the channel with max variance for lo & hi
1159     * and use those as the two representative colors.
1160     */
1161    minVal = 2000; /* big enough */
1162    maxVal = -1; /* small enough */
1163    for (k = 0; k < N_TEXELS / 2; k++) {
1164       GLint t = input[k][maxVarL];
1165       if (minVal > t) {
1166          minVal = t;
1167          minColL = k;
1168       }
1169       if (maxVal < t) {
1170          maxVal = t;
1171          maxColL = k;
1172       }
1173    }
1174    minVal = 2000; /* big enough */
1175    maxVal = -1; /* small enough */
1176    for (; k < N_TEXELS; k++) {
1177       GLint t = input[k][maxVarR];
1178       if (minVal > t) {
1179          minVal = t;
1180          minColR = k;
1181       }
1182       if (maxVal < t) {
1183          maxVal = t;
1184          maxColR = k;
1185       }
1186    }
1187 #endif
1188
1189    /* left microtile */
1190    cc[0] = 0;
1191    for (i = 0; i < n_comp; i++) {
1192       vec[0][i] = input[minColL][i];
1193       vec[1][i] = input[maxColL][i];
1194    }
1195    if (minColL != maxColL) {
1196       /* compute interpolation vector */
1197       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1198
1199       /* add in texels */
1200       lolo = 0;
1201       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1202          GLint texel;
1203          /* interpolate color */
1204          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1205          /* add in texel */
1206          lolo <<= 2;
1207          lolo |= texel;
1208       }
1209
1210       /* funky encoding for LSB of green */
1211       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1212          for (i = 0; i < n_comp; i++) {
1213             vec[1][i] = input[minColL][i];
1214             vec[0][i] = input[maxColL][i];
1215          }
1216          lolo = ~lolo;
1217       }
1218
1219       cc[0] = lolo;
1220    }
1221
1222    /* right microtile */
1223    cc[1] = 0;
1224    for (i = 0; i < n_comp; i++) {
1225       vec[2][i] = input[minColR][i];
1226       vec[3][i] = input[maxColR][i];
1227    }
1228    if (minColR != maxColR) {
1229       /* compute interpolation vector */
1230       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1231
1232       /* add in texels */
1233       lohi = 0;
1234       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1235          GLint texel;
1236          /* interpolate color */
1237          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1238          /* add in texel */
1239          lohi <<= 2;
1240          lohi |= texel;
1241       }
1242
1243       /* funky encoding for LSB of green */
1244       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1245          for (i = 0; i < n_comp; i++) {
1246             vec[3][i] = input[minColR][i];
1247             vec[2][i] = input[maxColR][i];
1248          }
1249          lohi = ~lohi;
1250       }
1251
1252       cc[1] = lohi;
1253    }
1254
1255    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1256    for (j = 2 * 2 - 1; j >= 0; j--) {
1257       for (i = 0; i < n_comp; i++) {
1258          /* add in colors */
1259          FX64_SHL(hi, 5);
1260          FX64_OR32(hi, vec[j][i] >> 3);
1261       }
1262    }
1263    ((Fx64 *)cc)[1] = hi;
1264 }
1265
1266
1267 static void
1268 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1269 {
1270    GLint trualpha;
1271    GLubyte reord[N_TEXELS][MAX_COMP];
1272
1273    GLubyte input[N_TEXELS][MAX_COMP];
1274    GLint i, k, l;
1275
1276    if (comps == 3) {
1277       /* make the whole block opaque */
1278       _mesa_memset(input, -1, sizeof(input));
1279    }
1280
1281    /* 8 texels each line */
1282    for (l = 0; l < 4; l++) {
1283       for (k = 0; k < 4; k++) {
1284          for (i = 0; i < comps; i++) {
1285             input[k + l * 4][i] = *lines[l]++;
1286          }
1287       }
1288       for (; k < 8; k++) {
1289          for (i = 0; i < comps; i++) {
1290             input[k + l * 4 + 12][i] = *lines[l]++;
1291          }
1292       }
1293    }
1294
1295    /* block layout:
1296     * 00, 01, 02, 03, 08, 09, 0a, 0b
1297     * 10, 11, 12, 13, 18, 19, 1a, 1b
1298     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1299     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1300     */
1301
1302    /* [dBorca]
1303     * stupidity flows forth from this
1304     */
1305    l = N_TEXELS;
1306    trualpha = 0;
1307    if (comps == 4) {
1308       /* skip all transparent black texels */
1309       l = 0;
1310       for (k = 0; k < N_TEXELS; k++) {
1311          /* test all components against 0 */
1312          if (!ISTBLACK(input[k])) {
1313             /* texel is not transparent black */
1314             COPY_4UBV(reord[l], input[k]);
1315             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1316                /* non-opaque texel */
1317                trualpha = !0;
1318             }
1319             l++;
1320          }
1321       }
1322    }
1323
1324 #if 0
1325    if (trualpha) {
1326       fxt1_quantize_ALPHA0(cc, input, reord, l);
1327    } else if (l == 0) {
1328       cc[0] = cc[1] = cc[2] = -1;
1329       cc[3] = 0;
1330    } else if (l < N_TEXELS) {
1331       fxt1_quantize_HI(cc, input, reord, l);
1332    } else {
1333       fxt1_quantize_CHROMA(cc, input);
1334    }
1335    (void)fxt1_quantize_ALPHA1;
1336    (void)fxt1_quantize_MIXED1;
1337    (void)fxt1_quantize_MIXED0;
1338 #else
1339    if (trualpha) {
1340       fxt1_quantize_ALPHA1(cc, input);
1341    } else if (l == 0) {
1342       cc[0] = cc[1] = cc[2] = ~0u;
1343       cc[3] = 0;
1344    } else if (l < N_TEXELS) {
1345       fxt1_quantize_MIXED1(cc, input);
1346    } else {
1347       fxt1_quantize_MIXED0(cc, input);
1348    }
1349    (void)fxt1_quantize_ALPHA0;
1350    (void)fxt1_quantize_HI;
1351    (void)fxt1_quantize_CHROMA;
1352 #endif
1353 }
1354
1355
1356 static void
1357 fxt1_encode (GLuint width, GLuint height, GLint comps,
1358              const void *source, GLint srcRowStride,
1359              void *dest, GLint destRowStride)
1360 {
1361    GLuint x, y;
1362    const GLubyte *data;
1363    GLuint *encoded = (GLuint *)dest;
1364    void *newSource = NULL;
1365
1366    assert(comps == 3 || comps == 4);
1367
1368    /* Replicate image if width is not M8 or height is not M4 */
1369    if ((width & 7) | (height & 3)) {
1370       GLint newWidth = (width + 7) & ~7;
1371       GLint newHeight = (height + 3) & ~3;
1372       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1373       if (!newSource) {
1374          GET_CURRENT_CONTEXT(ctx);
1375          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1376          goto cleanUp;
1377       }
1378       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1379                                comps, (const GLchan *) source,
1380                                srcRowStride, (GLchan *) newSource);
1381       source = newSource;
1382       width = newWidth;
1383       height = newHeight;
1384       srcRowStride = comps * newWidth;
1385    }
1386
1387    /* convert from 16/32-bit channels to GLubyte if needed */
1388    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1389       const GLuint n = width * height * comps;
1390       const GLchan *src = (const GLchan *) source;
1391       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1392       GLuint i;
1393       if (!dest) {
1394          GET_CURRENT_CONTEXT(ctx);
1395          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1396          goto cleanUp;
1397       }
1398       for (i = 0; i < n; i++) {
1399          dest[i] = CHAN_TO_UBYTE(src[i]);
1400       }
1401       if (newSource != NULL) {
1402          _mesa_free(newSource);
1403       }
1404       newSource = dest;  /* we'll free this buffer before returning */
1405       source = dest;  /* the new, GLubyte incoming image */
1406    }
1407
1408    data = (const GLubyte *) source;
1409    destRowStride = (destRowStride - width * 2) / 4;
1410    for (y = 0; y < height; y += 4) {
1411       GLuint offs = 0 + (y + 0) * srcRowStride;
1412       for (x = 0; x < width; x += 8) {
1413          const GLubyte *lines[4];
1414          lines[0] = &data[offs];
1415          lines[1] = lines[0] + srcRowStride;
1416          lines[2] = lines[1] + srcRowStride;
1417          lines[3] = lines[2] + srcRowStride;
1418          offs += 8 * comps;
1419          fxt1_quantize(encoded, lines, comps);
1420          /* 128 bits per 8x4 block */
1421          encoded += 4;
1422       }
1423       encoded += destRowStride;
1424    }
1425
1426  cleanUp:
1427    if (newSource != NULL) {
1428       _mesa_free(newSource);
1429    }
1430 }
1431
1432
1433 /***************************************************************************\
1434  * FXT1 decoder
1435  *
1436  * The decoder is based on GL_3DFX_texture_compression_FXT1
1437  * specification and serves as a concept for the encoder.
1438 \***************************************************************************/
1439
1440
1441 /* lookup table for scaling 5 bit colors up to 8 bits */
1442 static const GLubyte _rgb_scale_5[] = {
1443    0,   8,   16,  25,  33,  41,  49,  58,
1444    66,  74,  82,  90,  99,  107, 115, 123,
1445    132, 140, 148, 156, 165, 173, 181, 189,
1446    197, 206, 214, 222, 230, 239, 247, 255
1447 };
1448
1449 /* lookup table for scaling 6 bit colors up to 8 bits */
1450 static const GLubyte _rgb_scale_6[] = {
1451    0,   4,   8,   12,  16,  20,  24,  28,
1452    32,  36,  40,  45,  49,  53,  57,  61,
1453    65,  69,  73,  77,  81,  85,  89,  93,
1454    97,  101, 105, 109, 113, 117, 121, 125,
1455    130, 134, 138, 142, 146, 150, 154, 158,
1456    162, 166, 170, 174, 178, 182, 186, 190,
1457    194, 198, 202, 206, 210, 215, 219, 223,
1458    227, 231, 235, 239, 243, 247, 251, 255
1459 };
1460
1461
1462 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1463 #define UP5(c) _rgb_scale_5[(c) & 31]
1464 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1465 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1466
1467
1468 static void
1469 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1470 {
1471    const GLuint *cc;
1472
1473    t *= 3;
1474    cc = (const GLuint *)(code + t / 8);
1475    t = (cc[0] >> (t & 7)) & 7;
1476
1477    if (t == 7) {
1478       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1479    } else {
1480       GLubyte r, g, b;
1481       cc = (const GLuint *)(code + 12);
1482       if (t == 0) {
1483          b = UP5(CC_SEL(cc, 0));
1484          g = UP5(CC_SEL(cc, 5));
1485          r = UP5(CC_SEL(cc, 10));
1486       } else if (t == 6) {
1487          b = UP5(CC_SEL(cc, 15));
1488          g = UP5(CC_SEL(cc, 20));
1489          r = UP5(CC_SEL(cc, 25));
1490       } else {
1491          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1492          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1493          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1494       }
1495       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1496       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1497       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1498       rgba[ACOMP] = CHAN_MAX;
1499    }
1500 }
1501
1502
1503 static void
1504 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1505 {
1506    const GLuint *cc;
1507    GLuint kk;
1508
1509    cc = (const GLuint *)code;
1510    if (t & 16) {
1511       cc++;
1512       t &= 15;
1513    }
1514    t = (cc[0] >> (t * 2)) & 3;
1515
1516    t *= 15;
1517    cc = (const GLuint *)(code + 8 + t / 8);
1518    kk = cc[0] >> (t & 7);
1519    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1520    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1521    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1522    rgba[ACOMP] = CHAN_MAX;
1523 }
1524
1525
1526 static void
1527 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1528 {
1529    const GLuint *cc;
1530    GLuint col[2][3];
1531    GLint glsb, selb;
1532
1533    cc = (const GLuint *)code;
1534    if (t & 16) {
1535       t &= 15;
1536       t = (cc[1] >> (t * 2)) & 3;
1537       /* col 2 */
1538       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1539       col[0][GCOMP] = CC_SEL(cc, 99);
1540       col[0][RCOMP] = CC_SEL(cc, 104);
1541       /* col 3 */
1542       col[1][BCOMP] = CC_SEL(cc, 109);
1543       col[1][GCOMP] = CC_SEL(cc, 114);
1544       col[1][RCOMP] = CC_SEL(cc, 119);
1545       glsb = CC_SEL(cc, 126);
1546       selb = CC_SEL(cc, 33);
1547    } else {
1548       t = (cc[0] >> (t * 2)) & 3;
1549       /* col 0 */
1550       col[0][BCOMP] = CC_SEL(cc, 64);
1551       col[0][GCOMP] = CC_SEL(cc, 69);
1552       col[0][RCOMP] = CC_SEL(cc, 74);
1553       /* col 1 */
1554       col[1][BCOMP] = CC_SEL(cc, 79);
1555       col[1][GCOMP] = CC_SEL(cc, 84);
1556       col[1][RCOMP] = CC_SEL(cc, 89);
1557       glsb = CC_SEL(cc, 125);
1558       selb = CC_SEL(cc, 1);
1559    }
1560
1561    if (CC_SEL(cc, 124) & 1) {
1562       /* alpha[0] == 1 */
1563
1564       if (t == 3) {
1565          /* zero */
1566          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1567       } else {
1568          GLubyte r, g, b;
1569          if (t == 0) {
1570             b = UP5(col[0][BCOMP]);
1571             g = UP5(col[0][GCOMP]);
1572             r = UP5(col[0][RCOMP]);
1573          } else if (t == 2) {
1574             b = UP5(col[1][BCOMP]);
1575             g = UP6(col[1][GCOMP], glsb);
1576             r = UP5(col[1][RCOMP]);
1577          } else {
1578             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1579             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1580             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1581          }
1582          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1583          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1584          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1585          rgba[ACOMP] = CHAN_MAX;
1586       }
1587    } else {
1588       /* alpha[0] == 0 */
1589       GLubyte r, g, b;
1590       if (t == 0) {
1591          b = UP5(col[0][BCOMP]);
1592          g = UP6(col[0][GCOMP], glsb ^ selb);
1593          r = UP5(col[0][RCOMP]);
1594       } else if (t == 3) {
1595          b = UP5(col[1][BCOMP]);
1596          g = UP6(col[1][GCOMP], glsb);
1597          r = UP5(col[1][RCOMP]);
1598       } else {
1599          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1600          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1601                         UP6(col[1][GCOMP], glsb));
1602          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1603       }
1604       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1605       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1606       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1607       rgba[ACOMP] = CHAN_MAX;
1608    }
1609 }
1610
1611
1612 static void
1613 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1614 {
1615    const GLuint *cc;
1616    GLubyte r, g, b, a;
1617
1618    cc = (const GLuint *)code;
1619    if (CC_SEL(cc, 124) & 1) {
1620       /* lerp == 1 */
1621       GLuint col0[4];
1622
1623       if (t & 16) {
1624          t &= 15;
1625          t = (cc[1] >> (t * 2)) & 3;
1626          /* col 2 */
1627          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1628          col0[GCOMP] = CC_SEL(cc, 99);
1629          col0[RCOMP] = CC_SEL(cc, 104);
1630          col0[ACOMP] = CC_SEL(cc, 119);
1631       } else {
1632          t = (cc[0] >> (t * 2)) & 3;
1633          /* col 0 */
1634          col0[BCOMP] = CC_SEL(cc, 64);
1635          col0[GCOMP] = CC_SEL(cc, 69);
1636          col0[RCOMP] = CC_SEL(cc, 74);
1637          col0[ACOMP] = CC_SEL(cc, 109);
1638       }
1639
1640       if (t == 0) {
1641          b = UP5(col0[BCOMP]);
1642          g = UP5(col0[GCOMP]);
1643          r = UP5(col0[RCOMP]);
1644          a = UP5(col0[ACOMP]);
1645       } else if (t == 3) {
1646          b = UP5(CC_SEL(cc, 79));
1647          g = UP5(CC_SEL(cc, 84));
1648          r = UP5(CC_SEL(cc, 89));
1649          a = UP5(CC_SEL(cc, 114));
1650       } else {
1651          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1652          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1653          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1654          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1655       }
1656    } else {
1657       /* lerp == 0 */
1658
1659       if (t & 16) {
1660          cc++;
1661          t &= 15;
1662       }
1663       t = (cc[0] >> (t * 2)) & 3;
1664
1665       if (t == 3) {
1666          /* zero */
1667          r = g = b = a = 0;
1668       } else {
1669          GLuint kk;
1670          cc = (const GLuint *)code;
1671          a = UP5(cc[3] >> (t * 5 + 13));
1672          t *= 15;
1673          cc = (const GLuint *)(code + 8 + t / 8);
1674          kk = cc[0] >> (t & 7);
1675          b = UP5(kk);
1676          g = UP5(kk >> 5);
1677          r = UP5(kk >> 10);
1678       }
1679    }
1680    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1681    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1682    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1683    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1684 }
1685
1686
1687 void
1688 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1689                GLint i, GLint j, GLchan *rgba)
1690 {
1691    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1692       fxt1_decode_1HI,     /* cc-high   = "00?" */
1693       fxt1_decode_1HI,     /* cc-high   = "00?" */
1694       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1695       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1696       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1697       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1698       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1699       fxt1_decode_1MIXED   /* mixed     = "1??" */
1700    };
1701
1702    const GLubyte *code = (const GLubyte *)texture +
1703                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1704    GLint mode = CC_SEL(code, 125);
1705    GLint t = i & 7;
1706
1707    if (t & 4) {
1708       t += 12;
1709    }
1710    t += (j & 3) * 4;
1711
1712    decode_1[mode](code, t, rgba);
1713 }