src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texformat.h"
  42 #include "texstore.h"
  43
  44
  45 static void
  46 fxt1_encode (GLuint width, GLuint height, GLint comps,
  47              const void *source, GLint srcRowStride,
  48              void *dest, GLint destRowStride);
  49
  50 void
  51 fxt1_decode_1 (const void *texture, GLint stride,
  52                GLint i, GLint j, GLchan *rgba);
  53
  54
  55 /**
  56  * Called during context initialization.
  57  */
  58 void
  59 _mesa_init_texture_fxt1( GLcontext *ctx )
  60 {
  61    (void) ctx;
  62 }
  63
  64
  65 /**
  66  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  67  */
  68 GLboolean
  69 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  70 {
  71    const GLchan *pixels;
  72    GLint srcRowStride;
  73    GLubyte *dst;
  74    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  75    const GLchan *tempImage = NULL;
  76
  77    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  78    ASSERT(dstXoffset % 8 == 0);
  79    ASSERT(dstYoffset % 4 == 0);
  80    ASSERT(dstZoffset     == 0);
  81    (void) dstZoffset;
  82    (void) dstImageOffsets;
  83
  84    if (srcFormat != GL_RGB ||
  85        srcType != CHAN_TYPE ||
  86        ctx->_ImageTransferState ||
  87        srcPacking->SwapBytes) {
  88       /* convert image to RGB/GLchan */
  89       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  90                                              baseInternalFormat,
  91                                              dstFormat->BaseFormat,
  92                                              srcWidth, srcHeight, srcDepth,
  93                                              srcFormat, srcType, srcAddr,
  94                                              srcPacking);
  95       if (!tempImage)
  96          return GL_FALSE; /* out of memory */
  97       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  98       pixels = tempImage;
  99       srcRowStride = 3 * srcWidth;
 100       srcFormat = GL_RGB;
 101    }
 102    else {
 103       pixels = (const GLchan *) srcAddr;
 104       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 105                                             srcType) / sizeof(GLchan);
 106    }
 107
 108    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 109                                         dstFormat->MesaFormat,
 110                                         texWidth, (GLubyte *) dstAddr);
 111
 112    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 113                dst, dstRowStride);
 114
 115    if (tempImage)
 116       _mesa_free((void*) tempImage);
 117
 118    return GL_TRUE;
 119 }
 120
 121
 122 /**
 123  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 124  */
 125 GLboolean
 126 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 127 {
 128    const GLchan *pixels;
 129    GLint srcRowStride;
 130    GLubyte *dst;
 131    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 132    const GLchan *tempImage = NULL;
 133
 134    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 135    ASSERT(dstXoffset % 8 == 0);
 136    ASSERT(dstYoffset % 4 == 0);
 137    ASSERT(dstZoffset     == 0);
 138    (void) dstZoffset;
 139    (void) dstImageOffsets;
 140
 141    if (srcFormat != GL_RGBA ||
 142        srcType != CHAN_TYPE ||
 143        ctx->_ImageTransferState ||
 144        srcPacking->SwapBytes) {
 145       /* convert image to RGBA/GLchan */
 146       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 147                                              baseInternalFormat,
 148                                              dstFormat->BaseFormat,
 149                                              srcWidth, srcHeight, srcDepth,
 150                                              srcFormat, srcType, srcAddr,
 151                                              srcPacking);
 152       if (!tempImage)
 153          return GL_FALSE; /* out of memory */
 154       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 155       pixels = tempImage;
 156       srcRowStride = 4 * srcWidth;
 157       srcFormat = GL_RGBA;
 158    }
 159    else {
 160       pixels = (const GLchan *) srcAddr;
 161       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 162                                             srcType) / sizeof(GLchan);
 163    }
 164
 165    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 166                                         dstFormat->MesaFormat,
 167                                         texWidth, (GLubyte *) dstAddr);
 168
 169    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 170                dst, dstRowStride);
 171
 172    if (tempImage)
 173       _mesa_free((void*) tempImage);
 174
 175    return GL_TRUE;
 176 }
 177
 178
 179 void
 180 _mesa_fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 181                                 GLint i, GLint j, GLint k, GLchan *texel )
 182 {
 183    (void) k;
 184    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 185 }
 186
 187
 188 void
 189 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 190                                   GLint i, GLint j, GLint k, GLfloat *texel )
 191 {
 192    /* just sample as GLchan and convert to float here */
 193    GLchan rgba[4];
 194    (void) k;
 195    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 196    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 197    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 198    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 199    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 200 }
 201
 202
 203 void
 204 _mesa_fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 205                                GLint i, GLint j, GLint k, GLchan *texel )
 206 {
 207    (void) k;
 208    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 209    texel[ACOMP] = 255;
 210 }
 211
 212
 213 void
 214 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 215                                  GLint i, GLint j, GLint k, GLfloat *texel )
 216 {
 217    /* just sample as GLchan and convert to float here */
 218    GLchan rgba[4];
 219    (void) k;
 220    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 221    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 222    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 223    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 224    texel[ACOMP] = 1.0F;
 225 }
 226
 227
 228
 229 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 230    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 231    GL_RGB,                              /* BaseFormat */
 232    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 233    4, /*approx*/                        /* RedBits */
 234    4, /*approx*/                        /* GreenBits */
 235    4, /*approx*/                        /* BlueBits */
 236    0,                                   /* AlphaBits */
 237    0,                                   /* LuminanceBits */
 238    0,                                   /* IntensityBits */
 239    0,                                   /* IndexBits */
 240    0,                                   /* DepthBits */
 241    0,                                   /* StencilBits */
 242    0,                                   /* TexelBytes */
 243    _mesa_texstore_rgb_fxt1,             /* StoreTexImageFunc */
 244    NULL, /*impossible*/                 /* FetchTexel1D */
 245    _mesa_fetch_texel_2d_rgb_fxt1,               /* FetchTexel2D */
 246    NULL, /*impossible*/                 /* FetchTexel3D */
 247    NULL, /*impossible*/                 /* FetchTexel1Df */
 248    _mesa_fetch_texel_2d_f_rgb_fxt1,             /* FetchTexel2Df */
 249    NULL, /*impossible*/                 /* FetchTexel3Df */
 250    NULL                                 /* StoreTexel */
 251 };
 252
 253 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 254    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 255    GL_RGBA,                             /* BaseFormat */
 256    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 257    4, /*approx*/                        /* RedBits */
 258    4, /*approx*/                        /* GreenBits */
 259    4, /*approx*/                        /* BlueBits */
 260    1, /*approx*/                        /* AlphaBits */
 261    0,                                   /* LuminanceBits */
 262    0,                                   /* IntensityBits */
 263    0,                                   /* IndexBits */
 264    0,                                   /* DepthBits */
 265    0,                                   /* StencilBits */
 266    0,                                   /* TexelBytes */
 267    _mesa_texstore_rgba_fxt1,            /* StoreTexImageFunc */
 268    NULL, /*impossible*/                 /* FetchTexel1D */
 269    _mesa_fetch_texel_2d_rgba_fxt1,              /* FetchTexel2D */
 270    NULL, /*impossible*/                 /* FetchTexel3D */
 271    NULL, /*impossible*/                 /* FetchTexel1Df */
 272    _mesa_fetch_texel_2d_f_rgba_fxt1,            /* FetchTexel2Df */
 273    NULL, /*impossible*/                 /* FetchTexel3Df */
 274    NULL                                 /* StoreTexel */
 275 };
 276
 277
 278 /***************************************************************************\
 279  * FXT1 encoder
 280  *
 281  * The encoder was built by reversing the decoder,
 282  * and is vaguely based on Texus2 by 3dfx. Note that this code
 283  * is merely a proof of concept, since it is highly UNoptimized;
 284  * moreover, it is sub-optimal due to initial conditions passed
 285  * to Lloyd's algorithm (the interpolation modes are even worse).
 286 \***************************************************************************/
 287
 288
 289 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 290 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 291 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 292 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 293 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 294 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 295 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 296 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 297
 298
 299 /*
 300  * Define a 64-bit unsigned integer type and macros
 301  */
 302 #if 1
 303
 304 #define FX64_NATIVE 1
 305
 306 typedef uint64_t Fx64;
 307
 308 #define FX64_MOV32(a, b) a = b
 309 #define FX64_OR32(a, b)  a |= b
 310 #define FX64_SHL(a, c)   a <<= c
 311
 312 #else
 313
 314 #define FX64_NATIVE 0
 315
 316 typedef struct {
 317    GLuint lo, hi;
 318 } Fx64;
 319
 320 #define FX64_MOV32(a, b) a.lo = b
 321 #define FX64_OR32(a, b)  a.lo |= b
 322
 323 #define FX64_SHL(a, c)                                 \
 324    do {                                                \
 325        if ((c) >= 32) {                                \
 326           a.hi = a.lo << ((c) - 32);                   \
 327           a.lo = 0;                                    \
 328        } else {                                        \
 329           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 330           a.lo <<= (c);                                \
 331        }                                               \
 332    } while (0)
 333
 334 #endif
 335
 336
 337 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 338 #define SAFECDOT 1 /* for paranoids */
 339
 340 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 341    do {                                  \
 342       /* compute interpolation vector */ \
 343       GLfloat d2 = 0.0F;                 \
 344       GLfloat rd2;                       \
 345                                          \
 346       for (i = 0; i < NC; i++) {         \
 347          IV[i] = (V1[i] - V0[i]) * F(i); \
 348          d2 += IV[i] * IV[i];            \
 349       }                                  \
 350       rd2 = (GLfloat)NV / d2;            \
 351       B = 0;                             \
 352       for (i = 0; i < NC; i++) {         \
 353          IV[i] *= F(i);                  \
 354          B -= IV[i] * V0[i];             \
 355          IV[i] *= rd2;                   \
 356       }                                  \
 357       B = B * rd2 + 0.5f;                \
 358    } while (0)
 359
 360 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 361    do {                                  \
 362       GLfloat dot = 0.0F;                \
 363       for (i = 0; i < NC; i++) {         \
 364          dot += V[i] * IV[i];            \
 365       }                                  \
 366       TEXEL = (GLint)(dot + B);          \
 367       if (SAFECDOT) {                    \
 368          if (TEXEL < 0) {                \
 369             TEXEL = 0;                   \
 370          } else if (TEXEL > NV) {        \
 371             TEXEL = NV;                  \
 372          }                               \
 373       }                                  \
 374    } while (0)
 375
 376
 377 static GLint
 378 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 379               GLubyte input[MAX_COMP], GLint nc)
 380 {
 381    GLint i, j, best = -1;
 382    GLfloat err = 1e9; /* big enough */
 383
 384    for (j = 0; j < nv; j++) {
 385       GLfloat e = 0.0F;
 386       for (i = 0; i < nc; i++) {
 387          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 388       }
 389       if (e < err) {
 390          err = e;
 391          best = j;
 392       }
 393    }
 394
 395    return best;
 396 }
 397
 398
 399 static GLint
 400 fxt1_worst (GLfloat vec[MAX_COMP],
 401             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 402 {
 403    GLint i, k, worst = -1;
 404    GLfloat err = -1.0F; /* small enough */
 405
 406    for (k = 0; k < n; k++) {
 407       GLfloat e = 0.0F;
 408       for (i = 0; i < nc; i++) {
 409          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 410       }
 411       if (e > err) {
 412          err = e;
 413          worst = k;
 414       }
 415    }
 416
 417    return worst;
 418 }
 419
 420
 421 static GLint
 422 fxt1_variance (GLdouble variance[MAX_COMP],
 423                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 424 {
 425    GLint i, k, best = 0;
 426    GLint sx, sx2;
 427    GLdouble var, maxvar = -1; /* small enough */
 428    GLdouble teenth = 1.0 / n;
 429
 430    for (i = 0; i < nc; i++) {
 431       sx = sx2 = 0;
 432       for (k = 0; k < n; k++) {
 433          GLint t = input[k][i];
 434          sx += t;
 435          sx2 += t * t;
 436       }
 437       var = sx2 * teenth - sx * sx * teenth * teenth;
 438       if (maxvar < var) {
 439          maxvar = var;
 440          best = i;
 441       }
 442       if (variance) {
 443          variance[i] = var;
 444       }
 445    }
 446
 447    return best;
 448 }
 449
 450
 451 static GLint
 452 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 453              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 454 {
 455 #if 0
 456    /* Choose colors from a grid.
 457     */
 458    GLint i, j;
 459
 460    for (j = 0; j < nv; j++) {
 461       GLint m = j * (n - 1) / (nv - 1);
 462       for (i = 0; i < nc; i++) {
 463          vec[j][i] = input[m][i];
 464       }
 465    }
 466 #else
 467    /* Our solution here is to find the darkest and brightest colors in
 468     * the 8x4 tile and use those as the two representative colors.
 469     * There are probably better algorithms to use (histogram-based).
 470     */
 471    GLint i, j, k;
 472    GLint minSum = 2000; /* big enough */
 473    GLint maxSum = -1; /* small enough */
 474    GLint minCol = 0; /* phoudoin: silent compiler! */
 475    GLint maxCol = 0; /* phoudoin: silent compiler! */
 476
 477    struct {
 478       GLint flag;
 479       GLint key;
 480       GLint freq;
 481       GLint idx;
 482    } hist[N_TEXELS];
 483    GLint lenh = 0;
 484
 485    _mesa_memset(hist, 0, sizeof(hist));
 486
 487    for (k = 0; k < n; k++) {
 488       GLint l;
 489       GLint key = 0;
 490       GLint sum = 0;
 491       for (i = 0; i < nc; i++) {
 492          key <<= 8;
 493          key |= input[k][i];
 494          sum += input[k][i];
 495       }
 496       for (l = 0; l < n; l++) {
 497          if (!hist[l].flag) {
 498             /* alloc new slot */
 499             hist[l].flag = !0;
 500             hist[l].key = key;
 501             hist[l].freq = 1;
 502             hist[l].idx = k;
 503             lenh = l + 1;
 504             break;
 505          } else if (hist[l].key == key) {
 506             hist[l].freq++;
 507             break;
 508          }
 509       }
 510       if (minSum > sum) {
 511          minSum = sum;
 512          minCol = k;
 513       }
 514       if (maxSum < sum) {
 515          maxSum = sum;
 516          maxCol = k;
 517       }
 518    }
 519
 520    if (lenh <= nv) {
 521       for (j = 0; j < lenh; j++) {
 522          for (i = 0; i < nc; i++) {
 523             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 524          }
 525       }
 526       for (; j < nv; j++) {
 527          for (i = 0; i < nc; i++) {
 528             vec[j][i] = vec[0][i];
 529          }
 530       }
 531       return 0;
 532    }
 533
 534    for (j = 0; j < nv; j++) {
 535       for (i = 0; i < nc; i++) {
 536          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 537       }
 538    }
 539 #endif
 540
 541    return !0;
 542 }
 543
 544
 545 static GLint
 546 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 547             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 548 {
 549    /* Use the generalized lloyd's algorithm for VQ:
 550     *     find 4 color vectors.
 551     *
 552     *     for each sample color
 553     *         sort to nearest vector.
 554     *
 555     *     replace each vector with the centroid of it's matching colors.
 556     *
 557     *     repeat until RMS doesn't improve.
 558     *
 559     *     if a color vector has no samples, or becomes the same as another
 560     *     vector, replace it with the color which is farthest from a sample.
 561     *
 562     * vec[][MAX_COMP]           initial vectors and resulting colors
 563     * nv                        number of resulting colors required
 564     * input[N_TEXELS][MAX_COMP] input texels
 565     * nc                        number of components in input / vec
 566     * n                         number of input samples
 567     */
 568
 569    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 570    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 571    GLfloat error, lasterror = 1e9;
 572
 573    GLint i, j, k, rep;
 574
 575    /* the quantizer */
 576    for (rep = 0; rep < LL_N_REP; rep++) {
 577       /* reset sums & counters */
 578       for (j = 0; j < nv; j++) {
 579          for (i = 0; i < nc; i++) {
 580             sum[j][i] = 0;
 581          }
 582          cnt[j] = 0;
 583       }
 584       error = 0;
 585
 586       /* scan whole block */
 587       for (k = 0; k < n; k++) {
 588 #if 1
 589          GLint best = -1;
 590          GLfloat err = 1e9; /* big enough */
 591          /* determine best vector */
 592          for (j = 0; j < nv; j++) {
 593             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 594                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 595                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 596             if (nc == 4) {
 597                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 598             }
 599             if (e < err) {
 600                err = e;
 601                best = j;
 602             }
 603          }
 604 #else
 605          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 606 #endif
 607          /* add in closest color */
 608          for (i = 0; i < nc; i++) {
 609             sum[best][i] += input[k][i];
 610          }
 611          /* mark this vector as used */
 612          cnt[best]++;
 613          /* accumulate error */
 614          error += err;
 615       }
 616
 617       /* check RMS */
 618       if ((error < LL_RMS_E) ||
 619           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 620          return !0; /* good match */
 621       }
 622       lasterror = error;
 623
 624       /* move each vector to the barycenter of its closest colors */
 625       for (j = 0; j < nv; j++) {
 626          if (cnt[j]) {
 627             GLfloat div = 1.0F / cnt[j];
 628             for (i = 0; i < nc; i++) {
 629                vec[j][i] = div * sum[j][i];
 630             }
 631          } else {
 632             /* this vec has no samples or is identical with a previous vec */
 633             GLint worst = fxt1_worst(vec[j], input, nc, n);
 634             for (i = 0; i < nc; i++) {
 635                vec[j][i] = input[worst][i];
 636             }
 637          }
 638       }
 639    }
 640
 641    return 0; /* could not converge fast enough */
 642 }
 643
 644
 645 static void
 646 fxt1_quantize_CHROMA (GLuint *cc,
 647                       GLubyte input[N_TEXELS][MAX_COMP])
 648 {
 649    const GLint n_vect = 4; /* 4 base vectors to find */
 650    const GLint n_comp = 3; /* 3 components: R, G, B */
 651    GLfloat vec[MAX_VECT][MAX_COMP];
 652    GLint i, j, k;
 653    Fx64 hi; /* high quadword */
 654    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 655
 656    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 657       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 658    }
 659
 660    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 661    for (j = n_vect - 1; j >= 0; j--) {
 662       for (i = 0; i < n_comp; i++) {
 663          /* add in colors */
 664          FX64_SHL(hi, 5);
 665          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 666       }
 667    }
 668    ((Fx64 *)cc)[1] = hi;
 669
 670    lohi = lolo = 0;
 671    /* right microtile */
 672    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 673       lohi <<= 2;
 674       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 675    }
 676    /* left microtile */
 677    for (; k >= 0; k--) {
 678       lolo <<= 2;
 679       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 680    }
 681    cc[1] = lohi;
 682    cc[0] = lolo;
 683 }
 684
 685
 686 static void
 687 fxt1_quantize_ALPHA0 (GLuint *cc,
 688                       GLubyte input[N_TEXELS][MAX_COMP],
 689                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 690 {
 691    const GLint n_vect = 3; /* 3 base vectors to find */
 692    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 693    GLfloat vec[MAX_VECT][MAX_COMP];
 694    GLint i, j, k;
 695    Fx64 hi; /* high quadword */
 696    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 697
 698    /* the last vector indicates zero */
 699    for (i = 0; i < n_comp; i++) {
 700       vec[n_vect][i] = 0;
 701    }
 702
 703    /* the first n texels in reord are guaranteed to be non-zero */
 704    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 705       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 706    }
 707
 708    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 709    for (j = n_vect - 1; j >= 0; j--) {
 710       /* add in alphas */
 711       FX64_SHL(hi, 5);
 712       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 713    }
 714    for (j = n_vect - 1; j >= 0; j--) {
 715       for (i = 0; i < n_comp - 1; i++) {
 716          /* add in colors */
 717          FX64_SHL(hi, 5);
 718          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 719       }
 720    }
 721    ((Fx64 *)cc)[1] = hi;
 722
 723    lohi = lolo = 0;
 724    /* right microtile */
 725    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 726       lohi <<= 2;
 727       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 728    }
 729    /* left microtile */
 730    for (; k >= 0; k--) {
 731       lolo <<= 2;
 732       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 733    }
 734    cc[1] = lohi;
 735    cc[0] = lolo;
 736 }
 737
 738
 739 static void
 740 fxt1_quantize_ALPHA1 (GLuint *cc,
 741                       GLubyte input[N_TEXELS][MAX_COMP])
 742 {
 743    const GLint n_vect = 3; /* highest vector number in each microtile */
 744    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 745    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 746    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 747    GLint i, j, k;
 748    Fx64 hi; /* high quadword */
 749    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 750
 751    GLint minSum;
 752    GLint maxSum;
 753    GLint minColL = 0, maxColL = 0;
 754    GLint minColR = 0, maxColR = 0;
 755    GLint sumL = 0, sumR = 0;
 756    GLint nn_comp;
 757    /* Our solution here is to find the darkest and brightest colors in
 758     * the 4x4 tile and use those as the two representative colors.
 759     * There are probably better algorithms to use (histogram-based).
 760     */
 761    nn_comp = n_comp;
 762    while ((minColL == maxColL) && nn_comp) {
 763        minSum = 2000; /* big enough */
 764        maxSum = -1; /* small enough */
 765        for (k = 0; k < N_TEXELS / 2; k++) {
 766            GLint sum = 0;
 767            for (i = 0; i < nn_comp; i++) {
 768                sum += input[k][i];
 769            }
 770            if (minSum > sum) {
 771                minSum = sum;
 772                minColL = k;
 773            }
 774            if (maxSum < sum) {
 775                maxSum = sum;
 776                maxColL = k;
 777            }
 778            sumL += sum;
 779        }
 780
 781        nn_comp--;
 782    }
 783
 784    nn_comp = n_comp;
 785    while ((minColR == maxColR) && nn_comp) {
 786        minSum = 2000; /* big enough */
 787        maxSum = -1; /* small enough */
 788        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 789            GLint sum = 0;
 790            for (i = 0; i < nn_comp; i++) {
 791                sum += input[k][i];
 792            }
 793            if (minSum > sum) {
 794                minSum = sum;
 795                minColR = k;
 796            }
 797            if (maxSum < sum) {
 798                maxSum = sum;
 799                maxColR = k;
 800            }
 801            sumR += sum;
 802        }
 803
 804        nn_comp--;
 805    }
 806
 807    /* choose the common vector (yuck!) */
 808    {
 809       GLint j1, j2;
 810       GLint v1 = 0, v2 = 0;
 811       GLfloat err = 1e9; /* big enough */
 812       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 813       for (i = 0; i < n_comp; i++) {
 814          tv[0][i] = input[minColL][i];
 815          tv[1][i] = input[maxColL][i];
 816          tv[2][i] = input[minColR][i];
 817          tv[3][i] = input[maxColR][i];
 818       }
 819       for (j1 = 0; j1 < 2; j1++) {
 820          for (j2 = 2; j2 < 4; j2++) {
 821             GLfloat e = 0.0F;
 822             for (i = 0; i < n_comp; i++) {
 823                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 824             }
 825             if (e < err) {
 826                err = e;
 827                v1 = j1;
 828                v2 = j2;
 829             }
 830          }
 831       }
 832       for (i = 0; i < n_comp; i++) {
 833          vec[0][i] = tv[1 - v1][i];
 834          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 835          vec[2][i] = tv[5 - v2][i];
 836       }
 837    }
 838
 839    /* left microtile */
 840    cc[0] = 0;
 841    if (minColL != maxColL) {
 842       /* compute interpolation vector */
 843       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 844
 845       /* add in texels */
 846       lolo = 0;
 847       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 848          GLint texel;
 849          /* interpolate color */
 850          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 851          /* add in texel */
 852          lolo <<= 2;
 853          lolo |= texel;
 854       }
 855
 856       cc[0] = lolo;
 857    }
 858
 859    /* right microtile */
 860    cc[1] = 0;
 861    if (minColR != maxColR) {
 862       /* compute interpolation vector */
 863       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 864
 865       /* add in texels */
 866       lohi = 0;
 867       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 868          GLint texel;
 869          /* interpolate color */
 870          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 871          /* add in texel */
 872          lohi <<= 2;
 873          lohi |= texel;
 874       }
 875
 876       cc[1] = lohi;
 877    }
 878
 879    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 880    for (j = n_vect - 1; j >= 0; j--) {
 881       /* add in alphas */
 882       FX64_SHL(hi, 5);
 883       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 884    }
 885    for (j = n_vect - 1; j >= 0; j--) {
 886       for (i = 0; i < n_comp - 1; i++) {
 887          /* add in colors */
 888          FX64_SHL(hi, 5);
 889          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 890       }
 891    }
 892    ((Fx64 *)cc)[1] = hi;
 893 }
 894
 895
 896 static void
 897 fxt1_quantize_HI (GLuint *cc,
 898                   GLubyte input[N_TEXELS][MAX_COMP],
 899                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 900 {
 901    const GLint n_vect = 6; /* highest vector number */
 902    const GLint n_comp = 3; /* 3 components: R, G, B */
 903    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 904    GLfloat iv[MAX_COMP];   /* interpolation vector */
 905    GLint i, k;
 906    GLuint hihi; /* high quadword: hi dword */
 907
 908    GLint minSum = 2000; /* big enough */
 909    GLint maxSum = -1; /* small enough */
 910    GLint minCol = 0; /* phoudoin: silent compiler! */
 911    GLint maxCol = 0; /* phoudoin: silent compiler! */
 912
 913    /* Our solution here is to find the darkest and brightest colors in
 914     * the 8x4 tile and use those as the two representative colors.
 915     * There are probably better algorithms to use (histogram-based).
 916     */
 917    for (k = 0; k < n; k++) {
 918       GLint sum = 0;
 919       for (i = 0; i < n_comp; i++) {
 920          sum += reord[k][i];
 921       }
 922       if (minSum > sum) {
 923          minSum = sum;
 924          minCol = k;
 925       }
 926       if (maxSum < sum) {
 927          maxSum = sum;
 928          maxCol = k;
 929       }
 930    }
 931
 932    hihi = 0; /* cc-hi = "00" */
 933    for (i = 0; i < n_comp; i++) {
 934       /* add in colors */
 935       hihi <<= 5;
 936       hihi |= reord[maxCol][i] >> 3;
 937    }
 938    for (i = 0; i < n_comp; i++) {
 939       /* add in colors */
 940       hihi <<= 5;
 941       hihi |= reord[minCol][i] >> 3;
 942    }
 943    cc[3] = hihi;
 944    cc[0] = cc[1] = cc[2] = 0;
 945
 946    /* compute interpolation vector */
 947    if (minCol != maxCol) {
 948       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 949    }
 950
 951    /* add in texels */
 952    for (k = N_TEXELS - 1; k >= 0; k--) {
 953       GLint t = k * 3;
 954       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 955       GLint texel = n_vect + 1; /* transparent black */
 956
 957       if (!ISTBLACK(input[k])) {
 958          if (minCol != maxCol) {
 959             /* interpolate color */
 960             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 961             /* add in texel */
 962             kk[0] |= texel << (t & 7);
 963          }
 964       } else {
 965          /* add in texel */
 966          kk[0] |= texel << (t & 7);
 967       }
 968    }
 969 }
 970
 971
 972 static void
 973 fxt1_quantize_MIXED1 (GLuint *cc,
 974                       GLubyte input[N_TEXELS][MAX_COMP])
 975 {
 976    const GLint n_vect = 2; /* highest vector number in each microtile */
 977    const GLint n_comp = 3; /* 3 components: R, G, B */
 978    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 979    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 980    GLint i, j, k;
 981    Fx64 hi; /* high quadword */
 982    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 983
 984    GLint minSum;
 985    GLint maxSum;
 986    GLint minColL = 0, maxColL = -1;
 987    GLint minColR = 0, maxColR = -1;
 988
 989    /* Our solution here is to find the darkest and brightest colors in
 990     * the 4x4 tile and use those as the two representative colors.
 991     * There are probably better algorithms to use (histogram-based).
 992     */
 993    minSum = 2000; /* big enough */
 994    maxSum = -1; /* small enough */
 995    for (k = 0; k < N_TEXELS / 2; k++) {
 996       if (!ISTBLACK(input[k])) {
 997          GLint sum = 0;
 998          for (i = 0; i < n_comp; i++) {
 999             sum += input[k][i];
1000          }
1001          if (minSum > sum) {
1002             minSum = sum;
1003             minColL = k;
1004          }
1005          if (maxSum < sum) {
1006             maxSum = sum;
1007             maxColL = k;
1008          }
1009       }
1010    }
1011    minSum = 2000; /* big enough */
1012    maxSum = -1; /* small enough */
1013    for (; k < N_TEXELS; k++) {
1014       if (!ISTBLACK(input[k])) {
1015          GLint sum = 0;
1016          for (i = 0; i < n_comp; i++) {
1017             sum += input[k][i];
1018          }
1019          if (minSum > sum) {
1020             minSum = sum;
1021             minColR = k;
1022          }
1023          if (maxSum < sum) {
1024             maxSum = sum;
1025             maxColR = k;
1026          }
1027       }
1028    }
1029
1030    /* left microtile */
1031    if (maxColL == -1) {
1032       /* all transparent black */
1033       cc[0] = ~0u;
1034       for (i = 0; i < n_comp; i++) {
1035          vec[0][i] = 0;
1036          vec[1][i] = 0;
1037       }
1038    } else {
1039       cc[0] = 0;
1040       for (i = 0; i < n_comp; i++) {
1041          vec[0][i] = input[minColL][i];
1042          vec[1][i] = input[maxColL][i];
1043       }
1044       if (minColL != maxColL) {
1045          /* compute interpolation vector */
1046          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1047
1048          /* add in texels */
1049          lolo = 0;
1050          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1051             GLint texel = n_vect + 1; /* transparent black */
1052             if (!ISTBLACK(input[k])) {
1053                /* interpolate color */
1054                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1055             }
1056             /* add in texel */
1057             lolo <<= 2;
1058             lolo |= texel;
1059          }
1060          cc[0] = lolo;
1061       }
1062    }
1063
1064    /* right microtile */
1065    if (maxColR == -1) {
1066       /* all transparent black */
1067       cc[1] = ~0u;
1068       for (i = 0; i < n_comp; i++) {
1069          vec[2][i] = 0;
1070          vec[3][i] = 0;
1071       }
1072    } else {
1073       cc[1] = 0;
1074       for (i = 0; i < n_comp; i++) {
1075          vec[2][i] = input[minColR][i];
1076          vec[3][i] = input[maxColR][i];
1077       }
1078       if (minColR != maxColR) {
1079          /* compute interpolation vector */
1080          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1081
1082          /* add in texels */
1083          lohi = 0;
1084          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1085             GLint texel = n_vect + 1; /* transparent black */
1086             if (!ISTBLACK(input[k])) {
1087                /* interpolate color */
1088                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1089             }
1090             /* add in texel */
1091             lohi <<= 2;
1092             lohi |= texel;
1093          }
1094          cc[1] = lohi;
1095       }
1096    }
1097
1098    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1099    for (j = 2 * 2 - 1; j >= 0; j--) {
1100       for (i = 0; i < n_comp; i++) {
1101          /* add in colors */
1102          FX64_SHL(hi, 5);
1103          FX64_OR32(hi, vec[j][i] >> 3);
1104       }
1105    }
1106    ((Fx64 *)cc)[1] = hi;
1107 }
1108
1109
1110 static void
1111 fxt1_quantize_MIXED0 (GLuint *cc,
1112                       GLubyte input[N_TEXELS][MAX_COMP])
1113 {
1114    const GLint n_vect = 3; /* highest vector number in each microtile */
1115    const GLint n_comp = 3; /* 3 components: R, G, B */
1116    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1117    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1118    GLint i, j, k;
1119    Fx64 hi; /* high quadword */
1120    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1121
1122    GLint minColL = 0, maxColL = 0;
1123    GLint minColR = 0, maxColR = 0;
1124 #if 0
1125    GLint minSum;
1126    GLint maxSum;
1127
1128    /* Our solution here is to find the darkest and brightest colors in
1129     * the 4x4 tile and use those as the two representative colors.
1130     * There are probably better algorithms to use (histogram-based).
1131     */
1132    minSum = 2000; /* big enough */
1133    maxSum = -1; /* small enough */
1134    for (k = 0; k < N_TEXELS / 2; k++) {
1135       GLint sum = 0;
1136       for (i = 0; i < n_comp; i++) {
1137          sum += input[k][i];
1138       }
1139       if (minSum > sum) {
1140          minSum = sum;
1141          minColL = k;
1142       }
1143       if (maxSum < sum) {
1144          maxSum = sum;
1145          maxColL = k;
1146       }
1147    }
1148    minSum = 2000; /* big enough */
1149    maxSum = -1; /* small enough */
1150    for (; k < N_TEXELS; k++) {
1151       GLint sum = 0;
1152       for (i = 0; i < n_comp; i++) {
1153          sum += input[k][i];
1154       }
1155       if (minSum > sum) {
1156          minSum = sum;
1157          minColR = k;
1158       }
1159       if (maxSum < sum) {
1160          maxSum = sum;
1161          maxColR = k;
1162       }
1163    }
1164 #else
1165    GLint minVal;
1166    GLint maxVal;
1167    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1168    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1169
1170    /* Scan the channel with max variance for lo & hi
1171     * and use those as the two representative colors.
1172     */
1173    minVal = 2000; /* big enough */
1174    maxVal = -1; /* small enough */
1175    for (k = 0; k < N_TEXELS / 2; k++) {
1176       GLint t = input[k][maxVarL];
1177       if (minVal > t) {
1178          minVal = t;
1179          minColL = k;
1180       }
1181       if (maxVal < t) {
1182          maxVal = t;
1183          maxColL = k;
1184       }
1185    }
1186    minVal = 2000; /* big enough */
1187    maxVal = -1; /* small enough */
1188    for (; k < N_TEXELS; k++) {
1189       GLint t = input[k][maxVarR];
1190       if (minVal > t) {
1191          minVal = t;
1192          minColR = k;
1193       }
1194       if (maxVal < t) {
1195          maxVal = t;
1196          maxColR = k;
1197       }
1198    }
1199 #endif
1200
1201    /* left microtile */
1202    cc[0] = 0;
1203    for (i = 0; i < n_comp; i++) {
1204       vec[0][i] = input[minColL][i];
1205       vec[1][i] = input[maxColL][i];
1206    }
1207    if (minColL != maxColL) {
1208       /* compute interpolation vector */
1209       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1210
1211       /* add in texels */
1212       lolo = 0;
1213       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1214          GLint texel;
1215          /* interpolate color */
1216          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1217          /* add in texel */
1218          lolo <<= 2;
1219          lolo |= texel;
1220       }
1221
1222       /* funky encoding for LSB of green */
1223       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1224          for (i = 0; i < n_comp; i++) {
1225             vec[1][i] = input[minColL][i];
1226             vec[0][i] = input[maxColL][i];
1227          }
1228          lolo = ~lolo;
1229       }
1230
1231       cc[0] = lolo;
1232    }
1233
1234    /* right microtile */
1235    cc[1] = 0;
1236    for (i = 0; i < n_comp; i++) {
1237       vec[2][i] = input[minColR][i];
1238       vec[3][i] = input[maxColR][i];
1239    }
1240    if (minColR != maxColR) {
1241       /* compute interpolation vector */
1242       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1243
1244       /* add in texels */
1245       lohi = 0;
1246       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1247          GLint texel;
1248          /* interpolate color */
1249          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1250          /* add in texel */
1251          lohi <<= 2;
1252          lohi |= texel;
1253       }
1254
1255       /* funky encoding for LSB of green */
1256       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1257          for (i = 0; i < n_comp; i++) {
1258             vec[3][i] = input[minColR][i];
1259             vec[2][i] = input[maxColR][i];
1260          }
1261          lohi = ~lohi;
1262       }
1263
1264       cc[1] = lohi;
1265    }
1266
1267    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1268    for (j = 2 * 2 - 1; j >= 0; j--) {
1269       for (i = 0; i < n_comp; i++) {
1270          /* add in colors */
1271          FX64_SHL(hi, 5);
1272          FX64_OR32(hi, vec[j][i] >> 3);
1273       }
1274    }
1275    ((Fx64 *)cc)[1] = hi;
1276 }
1277
1278
1279 static void
1280 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1281 {
1282    GLint trualpha;
1283    GLubyte reord[N_TEXELS][MAX_COMP];
1284
1285    GLubyte input[N_TEXELS][MAX_COMP];
1286    GLint i, k, l;
1287
1288    if (comps == 3) {
1289       /* make the whole block opaque */
1290       _mesa_memset(input, -1, sizeof(input));
1291    }
1292
1293    /* 8 texels each line */
1294    for (l = 0; l < 4; l++) {
1295       for (k = 0; k < 4; k++) {
1296          for (i = 0; i < comps; i++) {
1297             input[k + l * 4][i] = *lines[l]++;
1298          }
1299       }
1300       for (; k < 8; k++) {
1301          for (i = 0; i < comps; i++) {
1302             input[k + l * 4 + 12][i] = *lines[l]++;
1303          }
1304       }
1305    }
1306
1307    /* block layout:
1308     * 00, 01, 02, 03, 08, 09, 0a, 0b
1309     * 10, 11, 12, 13, 18, 19, 1a, 1b
1310     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1311     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1312     */
1313
1314    /* [dBorca]
1315     * stupidity flows forth from this
1316     */
1317    l = N_TEXELS;
1318    trualpha = 0;
1319    if (comps == 4) {
1320       /* skip all transparent black texels */
1321       l = 0;
1322       for (k = 0; k < N_TEXELS; k++) {
1323          /* test all components against 0 */
1324          if (!ISTBLACK(input[k])) {
1325             /* texel is not transparent black */
1326             COPY_4UBV(reord[l], input[k]);
1327             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1328                /* non-opaque texel */
1329                trualpha = !0;
1330             }
1331             l++;
1332          }
1333       }
1334    }
1335
1336 #if 0
1337    if (trualpha) {
1338       fxt1_quantize_ALPHA0(cc, input, reord, l);
1339    } else if (l == 0) {
1340       cc[0] = cc[1] = cc[2] = -1;
1341       cc[3] = 0;
1342    } else if (l < N_TEXELS) {
1343       fxt1_quantize_HI(cc, input, reord, l);
1344    } else {
1345       fxt1_quantize_CHROMA(cc, input);
1346    }
1347    (void)fxt1_quantize_ALPHA1;
1348    (void)fxt1_quantize_MIXED1;
1349    (void)fxt1_quantize_MIXED0;
1350 #else
1351    if (trualpha) {
1352       fxt1_quantize_ALPHA1(cc, input);
1353    } else if (l == 0) {
1354       cc[0] = cc[1] = cc[2] = ~0u;
1355       cc[3] = 0;
1356    } else if (l < N_TEXELS) {
1357       fxt1_quantize_MIXED1(cc, input);
1358    } else {
1359       fxt1_quantize_MIXED0(cc, input);
1360    }
1361    (void)fxt1_quantize_ALPHA0;
1362    (void)fxt1_quantize_HI;
1363    (void)fxt1_quantize_CHROMA;
1364 #endif
1365 }
1366
1367
1368 static void
1369 fxt1_encode (GLuint width, GLuint height, GLint comps,
1370              const void *source, GLint srcRowStride,
1371              void *dest, GLint destRowStride)
1372 {
1373    GLuint x, y;
1374    const GLubyte *data;
1375    GLuint *encoded = (GLuint *)dest;
1376    void *newSource = NULL;
1377
1378    assert(comps == 3 || comps == 4);
1379
1380    /* Replicate image if width is not M8 or height is not M4 */
1381    if ((width & 7) | (height & 3)) {
1382       GLint newWidth = (width + 7) & ~7;
1383       GLint newHeight = (height + 3) & ~3;
1384       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1385       if (!newSource) {
1386          GET_CURRENT_CONTEXT(ctx);
1387          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1388          goto cleanUp;
1389       }
1390       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1391                                comps, (const GLchan *) source,
1392                                srcRowStride, (GLchan *) newSource);
1393       source = newSource;
1394       width = newWidth;
1395       height = newHeight;
1396       srcRowStride = comps * newWidth;
1397    }
1398
1399    /* convert from 16/32-bit channels to GLubyte if needed */
1400    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1401       const GLuint n = width * height * comps;
1402       const GLchan *src = (const GLchan *) source;
1403       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1404       GLuint i;
1405       if (!dest) {
1406          GET_CURRENT_CONTEXT(ctx);
1407          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1408          goto cleanUp;
1409       }
1410       for (i = 0; i < n; i++) {
1411          dest[i] = CHAN_TO_UBYTE(src[i]);
1412       }
1413       if (newSource != NULL) {
1414          _mesa_free(newSource);
1415       }
1416       newSource = dest;  /* we'll free this buffer before returning */
1417       source = dest;  /* the new, GLubyte incoming image */
1418    }
1419
1420    data = (const GLubyte *) source;
1421    destRowStride = (destRowStride - width * 2) / 4;
1422    for (y = 0; y < height; y += 4) {
1423       GLuint offs = 0 + (y + 0) * srcRowStride;
1424       for (x = 0; x < width; x += 8) {
1425          const GLubyte *lines[4];
1426          lines[0] = &data[offs];
1427          lines[1] = lines[0] + srcRowStride;
1428          lines[2] = lines[1] + srcRowStride;
1429          lines[3] = lines[2] + srcRowStride;
1430          offs += 8 * comps;
1431          fxt1_quantize(encoded, lines, comps);
1432          /* 128 bits per 8x4 block */
1433          encoded += 4;
1434       }
1435       encoded += destRowStride;
1436    }
1437
1438  cleanUp:
1439    if (newSource != NULL) {
1440       _mesa_free(newSource);
1441    }
1442 }
1443
1444
1445 /***************************************************************************\
1446  * FXT1 decoder
1447  *
1448  * The decoder is based on GL_3DFX_texture_compression_FXT1
1449  * specification and serves as a concept for the encoder.
1450 \***************************************************************************/
1451
1452
1453 /* lookup table for scaling 5 bit colors up to 8 bits */
1454 static const GLubyte _rgb_scale_5[] = {
1455    0,   8,   16,  25,  33,  41,  49,  58,
1456    66,  74,  82,  90,  99,  107, 115, 123,
1457    132, 140, 148, 156, 165, 173, 181, 189,
1458    197, 206, 214, 222, 230, 239, 247, 255
1459 };
1460
1461 /* lookup table for scaling 6 bit colors up to 8 bits */
1462 static const GLubyte _rgb_scale_6[] = {
1463    0,   4,   8,   12,  16,  20,  24,  28,
1464    32,  36,  40,  45,  49,  53,  57,  61,
1465    65,  69,  73,  77,  81,  85,  89,  93,
1466    97,  101, 105, 109, 113, 117, 121, 125,
1467    130, 134, 138, 142, 146, 150, 154, 158,
1468    162, 166, 170, 174, 178, 182, 186, 190,
1469    194, 198, 202, 206, 210, 215, 219, 223,
1470    227, 231, 235, 239, 243, 247, 251, 255
1471 };
1472
1473
1474 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1475 #define UP5(c) _rgb_scale_5[(c) & 31]
1476 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1477 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1478
1479
1480 static void
1481 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1482 {
1483    const GLuint *cc;
1484
1485    t *= 3;
1486    cc = (const GLuint *)(code + t / 8);
1487    t = (cc[0] >> (t & 7)) & 7;
1488
1489    if (t == 7) {
1490       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1491    } else {
1492       GLubyte r, g, b;
1493       cc = (const GLuint *)(code + 12);
1494       if (t == 0) {
1495          b = UP5(CC_SEL(cc, 0));
1496          g = UP5(CC_SEL(cc, 5));
1497          r = UP5(CC_SEL(cc, 10));
1498       } else if (t == 6) {
1499          b = UP5(CC_SEL(cc, 15));
1500          g = UP5(CC_SEL(cc, 20));
1501          r = UP5(CC_SEL(cc, 25));
1502       } else {
1503          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1504          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1505          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1506       }
1507       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1508       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1509       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1510       rgba[ACOMP] = CHAN_MAX;
1511    }
1512 }
1513
1514
1515 static void
1516 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1517 {
1518    const GLuint *cc;
1519    GLuint kk;
1520
1521    cc = (const GLuint *)code;
1522    if (t & 16) {
1523       cc++;
1524       t &= 15;
1525    }
1526    t = (cc[0] >> (t * 2)) & 3;
1527
1528    t *= 15;
1529    cc = (const GLuint *)(code + 8 + t / 8);
1530    kk = cc[0] >> (t & 7);
1531    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1532    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1533    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1534    rgba[ACOMP] = CHAN_MAX;
1535 }
1536
1537
1538 static void
1539 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1540 {
1541    const GLuint *cc;
1542    GLuint col[2][3];
1543    GLint glsb, selb;
1544
1545    cc = (const GLuint *)code;
1546    if (t & 16) {
1547       t &= 15;
1548       t = (cc[1] >> (t * 2)) & 3;
1549       /* col 2 */
1550       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1551       col[0][GCOMP] = CC_SEL(cc, 99);
1552       col[0][RCOMP] = CC_SEL(cc, 104);
1553       /* col 3 */
1554       col[1][BCOMP] = CC_SEL(cc, 109);
1555       col[1][GCOMP] = CC_SEL(cc, 114);
1556       col[1][RCOMP] = CC_SEL(cc, 119);
1557       glsb = CC_SEL(cc, 126);
1558       selb = CC_SEL(cc, 33);
1559    } else {
1560       t = (cc[0] >> (t * 2)) & 3;
1561       /* col 0 */
1562       col[0][BCOMP] = CC_SEL(cc, 64);
1563       col[0][GCOMP] = CC_SEL(cc, 69);
1564       col[0][RCOMP] = CC_SEL(cc, 74);
1565       /* col 1 */
1566       col[1][BCOMP] = CC_SEL(cc, 79);
1567       col[1][GCOMP] = CC_SEL(cc, 84);
1568       col[1][RCOMP] = CC_SEL(cc, 89);
1569       glsb = CC_SEL(cc, 125);
1570       selb = CC_SEL(cc, 1);
1571    }
1572
1573    if (CC_SEL(cc, 124) & 1) {
1574       /* alpha[0] == 1 */
1575
1576       if (t == 3) {
1577          /* zero */
1578          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1579       } else {
1580          GLubyte r, g, b;
1581          if (t == 0) {
1582             b = UP5(col[0][BCOMP]);
1583             g = UP5(col[0][GCOMP]);
1584             r = UP5(col[0][RCOMP]);
1585          } else if (t == 2) {
1586             b = UP5(col[1][BCOMP]);
1587             g = UP6(col[1][GCOMP], glsb);
1588             r = UP5(col[1][RCOMP]);
1589          } else {
1590             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1591             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1592             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1593          }
1594          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1595          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1596          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1597          rgba[ACOMP] = CHAN_MAX;
1598       }
1599    } else {
1600       /* alpha[0] == 0 */
1601       GLubyte r, g, b;
1602       if (t == 0) {
1603          b = UP5(col[0][BCOMP]);
1604          g = UP6(col[0][GCOMP], glsb ^ selb);
1605          r = UP5(col[0][RCOMP]);
1606       } else if (t == 3) {
1607          b = UP5(col[1][BCOMP]);
1608          g = UP6(col[1][GCOMP], glsb);
1609          r = UP5(col[1][RCOMP]);
1610       } else {
1611          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1612          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1613                         UP6(col[1][GCOMP], glsb));
1614          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1615       }
1616       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1617       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1618       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1619       rgba[ACOMP] = CHAN_MAX;
1620    }
1621 }
1622
1623
1624 static void
1625 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1626 {
1627    const GLuint *cc;
1628    GLubyte r, g, b, a;
1629
1630    cc = (const GLuint *)code;
1631    if (CC_SEL(cc, 124) & 1) {
1632       /* lerp == 1 */
1633       GLuint col0[4];
1634
1635       if (t & 16) {
1636          t &= 15;
1637          t = (cc[1] >> (t * 2)) & 3;
1638          /* col 2 */
1639          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1640          col0[GCOMP] = CC_SEL(cc, 99);
1641          col0[RCOMP] = CC_SEL(cc, 104);
1642          col0[ACOMP] = CC_SEL(cc, 119);
1643       } else {
1644          t = (cc[0] >> (t * 2)) & 3;
1645          /* col 0 */
1646          col0[BCOMP] = CC_SEL(cc, 64);
1647          col0[GCOMP] = CC_SEL(cc, 69);
1648          col0[RCOMP] = CC_SEL(cc, 74);
1649          col0[ACOMP] = CC_SEL(cc, 109);
1650       }
1651
1652       if (t == 0) {
1653          b = UP5(col0[BCOMP]);
1654          g = UP5(col0[GCOMP]);
1655          r = UP5(col0[RCOMP]);
1656          a = UP5(col0[ACOMP]);
1657       } else if (t == 3) {
1658          b = UP5(CC_SEL(cc, 79));
1659          g = UP5(CC_SEL(cc, 84));
1660          r = UP5(CC_SEL(cc, 89));
1661          a = UP5(CC_SEL(cc, 114));
1662       } else {
1663          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1664          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1665          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1666          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1667       }
1668    } else {
1669       /* lerp == 0 */
1670
1671       if (t & 16) {
1672          cc++;
1673          t &= 15;
1674       }
1675       t = (cc[0] >> (t * 2)) & 3;
1676
1677       if (t == 3) {
1678          /* zero */
1679          r = g = b = a = 0;
1680       } else {
1681          GLuint kk;
1682          cc = (const GLuint *)code;
1683          a = UP5(cc[3] >> (t * 5 + 13));
1684          t *= 15;
1685          cc = (const GLuint *)(code + 8 + t / 8);
1686          kk = cc[0] >> (t & 7);
1687          b = UP5(kk);
1688          g = UP5(kk >> 5);
1689          r = UP5(kk >> 10);
1690       }
1691    }
1692    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1693    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1694    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1695    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1696 }
1697
1698
1699 void
1700 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1701                GLint i, GLint j, GLchan *rgba)
1702 {
1703    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1704       fxt1_decode_1HI,     /* cc-high   = "00?" */
1705       fxt1_decode_1HI,     /* cc-high   = "00?" */
1706       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1707       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1708       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1709       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1710       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1711       fxt1_decode_1MIXED   /* mixed     = "1??" */
1712    };
1713
1714    const GLubyte *code = (const GLubyte *)texture +
1715                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1716    GLint mode = CC_SEL(code, 125);
1717    GLint t = i & 7;
1718
1719    if (t & 4) {
1720       t += 12;
1721    }
1722    t += (j & 3) * 4;
1723
1724    decode_1[mode](code, t, rgba);
1725 }