src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.5
   4  *
   5  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texformat.h"
  41 #include "texstore.h"
  42
  43
  44 static void
  45 fxt1_encode (GLuint width, GLuint height, GLint comps,
  46              const void *source, GLint srcRowStride,
  47              void *dest, GLint destRowStride);
  48
  49 void
  50 fxt1_decode_1 (const void *texture, GLint stride,
  51                GLint i, GLint j, GLchan *rgba);
  52
  53
  54 /**
  55  * Called during context initialization.
  56  */
  57 void
  58 _mesa_init_texture_fxt1( GLcontext *ctx )
  59 {
  60    (void) ctx;
  61 }
  62
  63
  64 /**
  65  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  66  */
  67 static GLboolean
  68 texstore_rgb_fxt1(TEXSTORE_PARAMS)
  69 {
  70    const GLchan *pixels;
  71    GLint srcRowStride;
  72    GLubyte *dst;
  73    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  74    const GLchan *tempImage = NULL;
  75
  76    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  77    ASSERT(dstXoffset % 8 == 0);
  78    ASSERT(dstYoffset % 4 == 0);
  79    ASSERT(dstZoffset     == 0);
  80    (void) dstZoffset;
  81    (void) dstImageOffsets;
  82
  83    if (srcFormat != GL_RGB ||
  84        srcType != CHAN_TYPE ||
  85        ctx->_ImageTransferState ||
  86        srcPacking->SwapBytes) {
  87       /* convert image to RGB/GLchan */
  88       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  89                                              baseInternalFormat,
  90                                              dstFormat->BaseFormat,
  91                                              srcWidth, srcHeight, srcDepth,
  92                                              srcFormat, srcType, srcAddr,
  93                                              srcPacking);
  94       if (!tempImage)
  95          return GL_FALSE; /* out of memory */
  96       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  97       pixels = tempImage;
  98       srcRowStride = 3 * srcWidth;
  99       srcFormat = GL_RGB;
 100    }
 101    else {
 102       pixels = (const GLchan *) srcAddr;
 103       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 104                                             srcType) / sizeof(GLchan);
 105    }
 106
 107    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 108                                         dstFormat->MesaFormat,
 109                                         texWidth, (GLubyte *) dstAddr);
 110
 111    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 112                dst, dstRowStride);
 113
 114    if (tempImage)
 115       _mesa_free((void*) tempImage);
 116
 117    return GL_TRUE;
 118 }
 119
 120
 121 /**
 122  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 123  */
 124 static GLboolean
 125 texstore_rgba_fxt1(TEXSTORE_PARAMS)
 126 {
 127    const GLchan *pixels;
 128    GLint srcRowStride;
 129    GLubyte *dst;
 130    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 131    const GLchan *tempImage = NULL;
 132
 133    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 134    ASSERT(dstXoffset % 8 == 0);
 135    ASSERT(dstYoffset % 4 == 0);
 136    ASSERT(dstZoffset     == 0);
 137    (void) dstZoffset;
 138    (void) dstImageOffsets;
 139
 140    if (srcFormat != GL_RGBA ||
 141        srcType != CHAN_TYPE ||
 142        ctx->_ImageTransferState ||
 143        srcPacking->SwapBytes) {
 144       /* convert image to RGBA/GLchan */
 145       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 146                                              baseInternalFormat,
 147                                              dstFormat->BaseFormat,
 148                                              srcWidth, srcHeight, srcDepth,
 149                                              srcFormat, srcType, srcAddr,
 150                                              srcPacking);
 151       if (!tempImage)
 152          return GL_FALSE; /* out of memory */
 153       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 154       pixels = tempImage;
 155       srcRowStride = 4 * srcWidth;
 156       srcFormat = GL_RGBA;
 157    }
 158    else {
 159       pixels = (const GLchan *) srcAddr;
 160       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 161                                             srcType) / sizeof(GLchan);
 162    }
 163
 164    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 165                                         dstFormat->MesaFormat,
 166                                         texWidth, (GLubyte *) dstAddr);
 167
 168    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 169                dst, dstRowStride);
 170
 171    if (tempImage)
 172       _mesa_free((void*) tempImage);
 173
 174    return GL_TRUE;
 175 }
 176
 177
 178 static void
 179 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 180                           GLint i, GLint j, GLint k, GLchan *texel )
 181 {
 182    (void) k;
 183    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 184 }
 185
 186
 187 static void
 188 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 189                             GLint i, GLint j, GLint k, GLfloat *texel )
 190 {
 191    /* just sample as GLchan and convert to float here */
 192    GLchan rgba[4];
 193    (void) k;
 194    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 195    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 196    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 197    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 198    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 199 }
 200
 201
 202 static void
 203 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 204                          GLint i, GLint j, GLint k, GLchan *texel )
 205 {
 206    (void) k;
 207    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 208    texel[ACOMP] = 255;
 209 }
 210
 211
 212 static void
 213 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 214                            GLint i, GLint j, GLint k, GLfloat *texel )
 215 {
 216    /* just sample as GLchan and convert to float here */
 217    GLchan rgba[4];
 218    (void) k;
 219    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 220    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 221    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 222    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 223    texel[ACOMP] = 1.0F;
 224 }
 225
 226
 227
 228 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 229    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 230    GL_RGB,                              /* BaseFormat */
 231    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 232    4, /*approx*/                        /* RedBits */
 233    4, /*approx*/                        /* GreenBits */
 234    4, /*approx*/                        /* BlueBits */
 235    0,                                   /* AlphaBits */
 236    0,                                   /* LuminanceBits */
 237    0,                                   /* IntensityBits */
 238    0,                                   /* IndexBits */
 239    0,                                   /* DepthBits */
 240    0,                                   /* StencilBits */
 241    0,                                   /* TexelBytes */
 242    texstore_rgb_fxt1,                   /* StoreTexImageFunc */
 243    NULL, /*impossible*/                 /* FetchTexel1D */
 244    fetch_texel_2d_rgb_fxt1,             /* FetchTexel2D */
 245    NULL, /*impossible*/                 /* FetchTexel3D */
 246    NULL, /*impossible*/                 /* FetchTexel1Df */
 247    fetch_texel_2d_f_rgb_fxt1,           /* FetchTexel2Df */
 248    NULL, /*impossible*/                 /* FetchTexel3Df */
 249    NULL                                 /* StoreTexel */
 250 };
 251
 252 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 253    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 254    GL_RGBA,                             /* BaseFormat */
 255    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 256    4, /*approx*/                        /* RedBits */
 257    4, /*approx*/                        /* GreenBits */
 258    4, /*approx*/                        /* BlueBits */
 259    1, /*approx*/                        /* AlphaBits */
 260    0,                                   /* LuminanceBits */
 261    0,                                   /* IntensityBits */
 262    0,                                   /* IndexBits */
 263    0,                                   /* DepthBits */
 264    0,                                   /* StencilBits */
 265    0,                                   /* TexelBytes */
 266    texstore_rgba_fxt1,                  /* StoreTexImageFunc */
 267    NULL, /*impossible*/                 /* FetchTexel1D */
 268    fetch_texel_2d_rgba_fxt1,            /* FetchTexel2D */
 269    NULL, /*impossible*/                 /* FetchTexel3D */
 270    NULL, /*impossible*/                 /* FetchTexel1Df */
 271    fetch_texel_2d_f_rgba_fxt1,          /* FetchTexel2Df */
 272    NULL, /*impossible*/                 /* FetchTexel3Df */
 273    NULL                                 /* StoreTexel */
 274 };
 275
 276
 277 /***************************************************************************\
 278  * FXT1 encoder
 279  *
 280  * The encoder was built by reversing the decoder,
 281  * and is vaguely based on Texus2 by 3dfx. Note that this code
 282  * is merely a proof of concept, since it is highly UNoptimized;
 283  * moreover, it is sub-optimal due to initial conditions passed
 284  * to Lloyd's algorithm (the interpolation modes are even worse).
 285 \***************************************************************************/
 286
 287
 288 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 289 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 290 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 291 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 292 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 293 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 294 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 295 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 296
 297
 298 /*
 299  * Define a 64-bit unsigned integer type and macros
 300  */
 301 #if defined(__GNUC__) && !defined(__cplusplus)
 302
 303 #define FX64_NATIVE 1
 304
 305 #ifdef __MINGW32__
 306 typedef unsigned long Fx64;
 307 #else
 308 typedef unsigned long long Fx64;
 309 #endif
 310
 311
 312 #define FX64_MOV32(a, b) a = b
 313 #define FX64_OR32(a, b)  a |= b
 314 #define FX64_SHL(a, c)   a <<= c
 315
 316 #else  /* !__GNUC__ */
 317
 318 #define FX64_NATIVE 0
 319
 320 typedef struct {
 321    GLuint lo, hi;
 322 } Fx64;
 323
 324 #define FX64_MOV32(a, b) a.lo = b
 325 #define FX64_OR32(a, b)  a.lo |= b
 326
 327 #define FX64_SHL(a, c)                                 \
 328    do {                                                \
 329        if ((c) >= 32) {                                \
 330           a.hi = a.lo << ((c) - 32);                   \
 331           a.lo = 0;                                    \
 332        } else {                                        \
 333           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 334           a.lo <<= (c);                                \
 335        }                                               \
 336    } while (0)
 337
 338 #endif /* !__GNUC__ */
 339
 340
 341 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 342 #define SAFECDOT 1 /* for paranoids */
 343
 344 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 345    do {                                  \
 346       /* compute interpolation vector */ \
 347       GLfloat d2 = 0.0F;                 \
 348       GLfloat rd2;                       \
 349                                          \
 350       for (i = 0; i < NC; i++) {         \
 351          IV[i] = (V1[i] - V0[i]) * F(i); \
 352          d2 += IV[i] * IV[i];            \
 353       }                                  \
 354       rd2 = (GLfloat)NV / d2;            \
 355       B = 0;                             \
 356       for (i = 0; i < NC; i++) {         \
 357          IV[i] *= F(i);                  \
 358          B -= IV[i] * V0[i];             \
 359          IV[i] *= rd2;                   \
 360       }                                  \
 361       B = B * rd2 + 0.5f;                \
 362    } while (0)
 363
 364 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 365    do {                                  \
 366       GLfloat dot = 0.0F;                \
 367       for (i = 0; i < NC; i++) {         \
 368          dot += V[i] * IV[i];            \
 369       }                                  \
 370       TEXEL = (GLint)(dot + B);          \
 371       if (SAFECDOT) {                    \
 372          if (TEXEL < 0) {                \
 373             TEXEL = 0;                   \
 374          } else if (TEXEL > NV) {        \
 375             TEXEL = NV;                  \
 376          }                               \
 377       }                                  \
 378    } while (0)
 379
 380
 381 static GLint
 382 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 383               GLubyte input[MAX_COMP], GLint nc)
 384 {
 385    GLint i, j, best = -1;
 386    GLfloat err = 1e9; /* big enough */
 387
 388    for (j = 0; j < nv; j++) {
 389       GLfloat e = 0.0F;
 390       for (i = 0; i < nc; i++) {
 391          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 392       }
 393       if (e < err) {
 394          err = e;
 395          best = j;
 396       }
 397    }
 398
 399    return best;
 400 }
 401
 402
 403 static GLint
 404 fxt1_worst (GLfloat vec[MAX_COMP],
 405             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 406 {
 407    GLint i, k, worst = -1;
 408    GLfloat err = -1.0F; /* small enough */
 409
 410    for (k = 0; k < n; k++) {
 411       GLfloat e = 0.0F;
 412       for (i = 0; i < nc; i++) {
 413          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 414       }
 415       if (e > err) {
 416          err = e;
 417          worst = k;
 418       }
 419    }
 420
 421    return worst;
 422 }
 423
 424
 425 static GLint
 426 fxt1_variance (GLdouble variance[MAX_COMP],
 427                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 428 {
 429    GLint i, k, best = 0;
 430    GLint sx, sx2;
 431    GLdouble var, maxvar = -1; /* small enough */
 432    GLdouble teenth = 1.0 / n;
 433
 434    for (i = 0; i < nc; i++) {
 435       sx = sx2 = 0;
 436       for (k = 0; k < n; k++) {
 437          GLint t = input[k][i];
 438          sx += t;
 439          sx2 += t * t;
 440       }
 441       var = sx2 * teenth - sx * sx * teenth * teenth;
 442       if (maxvar < var) {
 443          maxvar = var;
 444          best = i;
 445       }
 446       if (variance) {
 447          variance[i] = var;
 448       }
 449    }
 450
 451    return best;
 452 }
 453
 454
 455 static GLint
 456 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 457              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 458 {
 459 #if 0
 460    /* Choose colors from a grid.
 461     */
 462    GLint i, j;
 463
 464    for (j = 0; j < nv; j++) {
 465       GLint m = j * (n - 1) / (nv - 1);
 466       for (i = 0; i < nc; i++) {
 467          vec[j][i] = input[m][i];
 468       }
 469    }
 470 #else
 471    /* Our solution here is to find the darkest and brightest colors in
 472     * the 8x4 tile and use those as the two representative colors.
 473     * There are probably better algorithms to use (histogram-based).
 474     */
 475    GLint i, j, k;
 476    GLint minSum = 2000; /* big enough */
 477    GLint maxSum = -1; /* small enough */
 478    GLint minCol = 0; /* phoudoin: silent compiler! */
 479    GLint maxCol = 0; /* phoudoin: silent compiler! */
 480
 481    struct {
 482       GLint flag;
 483       GLint key;
 484       GLint freq;
 485       GLint idx;
 486    } hist[N_TEXELS];
 487    GLint lenh = 0;
 488
 489    _mesa_memset(hist, 0, sizeof(hist));
 490
 491    for (k = 0; k < n; k++) {
 492       GLint l;
 493       GLint key = 0;
 494       GLint sum = 0;
 495       for (i = 0; i < nc; i++) {
 496          key <<= 8;
 497          key |= input[k][i];
 498          sum += input[k][i];
 499       }
 500       for (l = 0; l < n; l++) {
 501          if (!hist[l].flag) {
 502             /* alloc new slot */
 503             hist[l].flag = !0;
 504             hist[l].key = key;
 505             hist[l].freq = 1;
 506             hist[l].idx = k;
 507             lenh = l + 1;
 508             break;
 509          } else if (hist[l].key == key) {
 510             hist[l].freq++;
 511             break;
 512          }
 513       }
 514       if (minSum > sum) {
 515          minSum = sum;
 516          minCol = k;
 517       }
 518       if (maxSum < sum) {
 519          maxSum = sum;
 520          maxCol = k;
 521       }
 522    }
 523
 524    if (lenh <= nv) {
 525       for (j = 0; j < lenh; j++) {
 526          for (i = 0; i < nc; i++) {
 527             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 528          }
 529       }
 530       for (; j < nv; j++) {
 531          for (i = 0; i < nc; i++) {
 532             vec[j][i] = vec[0][i];
 533          }
 534       }
 535       return 0;
 536    }
 537
 538    for (j = 0; j < nv; j++) {
 539       for (i = 0; i < nc; i++) {
 540          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 541       }
 542    }
 543 #endif
 544
 545    return !0;
 546 }
 547
 548
 549 static GLint
 550 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 551             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 552 {
 553    /* Use the generalized lloyd's algorithm for VQ:
 554     *     find 4 color vectors.
 555     *
 556     *     for each sample color
 557     *         sort to nearest vector.
 558     *
 559     *     replace each vector with the centroid of it's matching colors.
 560     *
 561     *     repeat until RMS doesn't improve.
 562     *
 563     *     if a color vector has no samples, or becomes the same as another
 564     *     vector, replace it with the color which is farthest from a sample.
 565     *
 566     * vec[][MAX_COMP]           initial vectors and resulting colors
 567     * nv                        number of resulting colors required
 568     * input[N_TEXELS][MAX_COMP] input texels
 569     * nc                        number of components in input / vec
 570     * n                         number of input samples
 571     */
 572
 573    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 574    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 575    GLfloat error, lasterror = 1e9;
 576
 577    GLint i, j, k, rep;
 578
 579    /* the quantizer */
 580    for (rep = 0; rep < LL_N_REP; rep++) {
 581       /* reset sums & counters */
 582       for (j = 0; j < nv; j++) {
 583          for (i = 0; i < nc; i++) {
 584             sum[j][i] = 0;
 585          }
 586          cnt[j] = 0;
 587       }
 588       error = 0;
 589
 590       /* scan whole block */
 591       for (k = 0; k < n; k++) {
 592 #if 1
 593          GLint best = -1;
 594          GLfloat err = 1e9; /* big enough */
 595          /* determine best vector */
 596          for (j = 0; j < nv; j++) {
 597             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 598                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 599                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 600             if (nc == 4) {
 601                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 602             }
 603             if (e < err) {
 604                err = e;
 605                best = j;
 606             }
 607          }
 608 #else
 609          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 610 #endif
 611          /* add in closest color */
 612          for (i = 0; i < nc; i++) {
 613             sum[best][i] += input[k][i];
 614          }
 615          /* mark this vector as used */
 616          cnt[best]++;
 617          /* accumulate error */
 618          error += err;
 619       }
 620
 621       /* check RMS */
 622       if ((error < LL_RMS_E) ||
 623           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 624          return !0; /* good match */
 625       }
 626       lasterror = error;
 627
 628       /* move each vector to the barycenter of its closest colors */
 629       for (j = 0; j < nv; j++) {
 630          if (cnt[j]) {
 631             GLfloat div = 1.0F / cnt[j];
 632             for (i = 0; i < nc; i++) {
 633                vec[j][i] = div * sum[j][i];
 634             }
 635          } else {
 636             /* this vec has no samples or is identical with a previous vec */
 637             GLint worst = fxt1_worst(vec[j], input, nc, n);
 638             for (i = 0; i < nc; i++) {
 639                vec[j][i] = input[worst][i];
 640             }
 641          }
 642       }
 643    }
 644
 645    return 0; /* could not converge fast enough */
 646 }
 647
 648
 649 static void
 650 fxt1_quantize_CHROMA (GLuint *cc,
 651                       GLubyte input[N_TEXELS][MAX_COMP])
 652 {
 653    const GLint n_vect = 4; /* 4 base vectors to find */
 654    const GLint n_comp = 3; /* 3 components: R, G, B */
 655    GLfloat vec[MAX_VECT][MAX_COMP];
 656    GLint i, j, k;
 657    Fx64 hi; /* high quadword */
 658    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 659
 660    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 661       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 662    }
 663
 664    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 665    for (j = n_vect - 1; j >= 0; j--) {
 666       for (i = 0; i < n_comp; i++) {
 667          /* add in colors */
 668          FX64_SHL(hi, 5);
 669          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 670       }
 671    }
 672    ((Fx64 *)cc)[1] = hi;
 673
 674    lohi = lolo = 0;
 675    /* right microtile */
 676    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 677       lohi <<= 2;
 678       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 679    }
 680    /* left microtile */
 681    for (; k >= 0; k--) {
 682       lolo <<= 2;
 683       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 684    }
 685    cc[1] = lohi;
 686    cc[0] = lolo;
 687 }
 688
 689
 690 static void
 691 fxt1_quantize_ALPHA0 (GLuint *cc,
 692                       GLubyte input[N_TEXELS][MAX_COMP],
 693                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 694 {
 695    const GLint n_vect = 3; /* 3 base vectors to find */
 696    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 697    GLfloat vec[MAX_VECT][MAX_COMP];
 698    GLint i, j, k;
 699    Fx64 hi; /* high quadword */
 700    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 701
 702    /* the last vector indicates zero */
 703    for (i = 0; i < n_comp; i++) {
 704       vec[n_vect][i] = 0;
 705    }
 706
 707    /* the first n texels in reord are guaranteed to be non-zero */
 708    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 709       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 710    }
 711
 712    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 713    for (j = n_vect - 1; j >= 0; j--) {
 714       /* add in alphas */
 715       FX64_SHL(hi, 5);
 716       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 717    }
 718    for (j = n_vect - 1; j >= 0; j--) {
 719       for (i = 0; i < n_comp - 1; i++) {
 720          /* add in colors */
 721          FX64_SHL(hi, 5);
 722          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 723       }
 724    }
 725    ((Fx64 *)cc)[1] = hi;
 726
 727    lohi = lolo = 0;
 728    /* right microtile */
 729    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 730       lohi <<= 2;
 731       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 732    }
 733    /* left microtile */
 734    for (; k >= 0; k--) {
 735       lolo <<= 2;
 736       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 737    }
 738    cc[1] = lohi;
 739    cc[0] = lolo;
 740 }
 741
 742
 743 static void
 744 fxt1_quantize_ALPHA1 (GLuint *cc,
 745                       GLubyte input[N_TEXELS][MAX_COMP])
 746 {
 747    const GLint n_vect = 3; /* highest vector number in each microtile */
 748    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 749    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 750    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 751    GLint i, j, k;
 752    Fx64 hi; /* high quadword */
 753    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 754
 755    GLint minSum;
 756    GLint maxSum;
 757    GLint minColL = 0, maxColL = 0;
 758    GLint minColR = 0, maxColR = 0;
 759    GLint sumL = 0, sumR = 0;
 760    GLint nn_comp;
 761    /* Our solution here is to find the darkest and brightest colors in
 762     * the 4x4 tile and use those as the two representative colors.
 763     * There are probably better algorithms to use (histogram-based).
 764     */
 765    nn_comp = n_comp;
 766    while ((minColL == maxColL) && nn_comp) {
 767        minSum = 2000; /* big enough */
 768        maxSum = -1; /* small enough */
 769        for (k = 0; k < N_TEXELS / 2; k++) {
 770            GLint sum = 0;
 771            for (i = 0; i < nn_comp; i++) {
 772                sum += input[k][i];
 773            }
 774            if (minSum > sum) {
 775                minSum = sum;
 776                minColL = k;
 777            }
 778            if (maxSum < sum) {
 779                maxSum = sum;
 780                maxColL = k;
 781            }
 782            sumL += sum;
 783        }
 784
 785        nn_comp--;
 786    }
 787
 788    nn_comp = n_comp;
 789    while ((minColR == maxColR) && nn_comp) {
 790        minSum = 2000; /* big enough */
 791        maxSum = -1; /* small enough */
 792        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 793            GLint sum = 0;
 794            for (i = 0; i < nn_comp; i++) {
 795                sum += input[k][i];
 796            }
 797            if (minSum > sum) {
 798                minSum = sum;
 799                minColR = k;
 800            }
 801            if (maxSum < sum) {
 802                maxSum = sum;
 803                maxColR = k;
 804            }
 805            sumR += sum;
 806        }
 807
 808        nn_comp--;
 809    }
 810
 811    /* choose the common vector (yuck!) */
 812    {
 813       GLint j1, j2;
 814       GLint v1 = 0, v2 = 0;
 815       GLfloat err = 1e9; /* big enough */
 816       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 817       for (i = 0; i < n_comp; i++) {
 818          tv[0][i] = input[minColL][i];
 819          tv[1][i] = input[maxColL][i];
 820          tv[2][i] = input[minColR][i];
 821          tv[3][i] = input[maxColR][i];
 822       }
 823       for (j1 = 0; j1 < 2; j1++) {
 824          for (j2 = 2; j2 < 4; j2++) {
 825             GLfloat e = 0.0F;
 826             for (i = 0; i < n_comp; i++) {
 827                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 828             }
 829             if (e < err) {
 830                err = e;
 831                v1 = j1;
 832                v2 = j2;
 833             }
 834          }
 835       }
 836       for (i = 0; i < n_comp; i++) {
 837          vec[0][i] = tv[1 - v1][i];
 838          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 839          vec[2][i] = tv[5 - v2][i];
 840       }
 841    }
 842
 843    /* left microtile */
 844    cc[0] = 0;
 845    if (minColL != maxColL) {
 846       /* compute interpolation vector */
 847       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 848
 849       /* add in texels */
 850       lolo = 0;
 851       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 852          GLint texel;
 853          /* interpolate color */
 854          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 855          /* add in texel */
 856          lolo <<= 2;
 857          lolo |= texel;
 858       }
 859
 860       cc[0] = lolo;
 861    }
 862
 863    /* right microtile */
 864    cc[1] = 0;
 865    if (minColR != maxColR) {
 866       /* compute interpolation vector */
 867       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 868
 869       /* add in texels */
 870       lohi = 0;
 871       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 872          GLint texel;
 873          /* interpolate color */
 874          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 875          /* add in texel */
 876          lohi <<= 2;
 877          lohi |= texel;
 878       }
 879
 880       cc[1] = lohi;
 881    }
 882
 883    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 884    for (j = n_vect - 1; j >= 0; j--) {
 885       /* add in alphas */
 886       FX64_SHL(hi, 5);
 887       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 888    }
 889    for (j = n_vect - 1; j >= 0; j--) {
 890       for (i = 0; i < n_comp - 1; i++) {
 891          /* add in colors */
 892          FX64_SHL(hi, 5);
 893          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 894       }
 895    }
 896    ((Fx64 *)cc)[1] = hi;
 897 }
 898
 899
 900 static void
 901 fxt1_quantize_HI (GLuint *cc,
 902                   GLubyte input[N_TEXELS][MAX_COMP],
 903                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 904 {
 905    const GLint n_vect = 6; /* highest vector number */
 906    const GLint n_comp = 3; /* 3 components: R, G, B */
 907    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 908    GLfloat iv[MAX_COMP];   /* interpolation vector */
 909    GLint i, k;
 910    GLuint hihi; /* high quadword: hi dword */
 911
 912    GLint minSum = 2000; /* big enough */
 913    GLint maxSum = -1; /* small enough */
 914    GLint minCol = 0; /* phoudoin: silent compiler! */
 915    GLint maxCol = 0; /* phoudoin: silent compiler! */
 916
 917    /* Our solution here is to find the darkest and brightest colors in
 918     * the 8x4 tile and use those as the two representative colors.
 919     * There are probably better algorithms to use (histogram-based).
 920     */
 921    for (k = 0; k < n; k++) {
 922       GLint sum = 0;
 923       for (i = 0; i < n_comp; i++) {
 924          sum += reord[k][i];
 925       }
 926       if (minSum > sum) {
 927          minSum = sum;
 928          minCol = k;
 929       }
 930       if (maxSum < sum) {
 931          maxSum = sum;
 932          maxCol = k;
 933       }
 934    }
 935
 936    hihi = 0; /* cc-hi = "00" */
 937    for (i = 0; i < n_comp; i++) {
 938       /* add in colors */
 939       hihi <<= 5;
 940       hihi |= reord[maxCol][i] >> 3;
 941    }
 942    for (i = 0; i < n_comp; i++) {
 943       /* add in colors */
 944       hihi <<= 5;
 945       hihi |= reord[minCol][i] >> 3;
 946    }
 947    cc[3] = hihi;
 948    cc[0] = cc[1] = cc[2] = 0;
 949
 950    /* compute interpolation vector */
 951    if (minCol != maxCol) {
 952       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 953    }
 954
 955    /* add in texels */
 956    for (k = N_TEXELS - 1; k >= 0; k--) {
 957       GLint t = k * 3;
 958       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 959       GLint texel = n_vect + 1; /* transparent black */
 960
 961       if (!ISTBLACK(input[k])) {
 962          if (minCol != maxCol) {
 963             /* interpolate color */
 964             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 965             /* add in texel */
 966             kk[0] |= texel << (t & 7);
 967          }
 968       } else {
 969          /* add in texel */
 970          kk[0] |= texel << (t & 7);
 971       }
 972    }
 973 }
 974
 975
 976 static void
 977 fxt1_quantize_MIXED1 (GLuint *cc,
 978                       GLubyte input[N_TEXELS][MAX_COMP])
 979 {
 980    const GLint n_vect = 2; /* highest vector number in each microtile */
 981    const GLint n_comp = 3; /* 3 components: R, G, B */
 982    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 983    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 984    GLint i, j, k;
 985    Fx64 hi; /* high quadword */
 986    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 987
 988    GLint minSum;
 989    GLint maxSum;
 990    GLint minColL = 0, maxColL = -1;
 991    GLint minColR = 0, maxColR = -1;
 992
 993    /* Our solution here is to find the darkest and brightest colors in
 994     * the 4x4 tile and use those as the two representative colors.
 995     * There are probably better algorithms to use (histogram-based).
 996     */
 997    minSum = 2000; /* big enough */
 998    maxSum = -1; /* small enough */
 999    for (k = 0; k < N_TEXELS / 2; k++) {
1000       if (!ISTBLACK(input[k])) {
1001          GLint sum = 0;
1002          for (i = 0; i < n_comp; i++) {
1003             sum += input[k][i];
1004          }
1005          if (minSum > sum) {
1006             minSum = sum;
1007             minColL = k;
1008          }
1009          if (maxSum < sum) {
1010             maxSum = sum;
1011             maxColL = k;
1012          }
1013       }
1014    }
1015    minSum = 2000; /* big enough */
1016    maxSum = -1; /* small enough */
1017    for (; k < N_TEXELS; k++) {
1018       if (!ISTBLACK(input[k])) {
1019          GLint sum = 0;
1020          for (i = 0; i < n_comp; i++) {
1021             sum += input[k][i];
1022          }
1023          if (minSum > sum) {
1024             minSum = sum;
1025             minColR = k;
1026          }
1027          if (maxSum < sum) {
1028             maxSum = sum;
1029             maxColR = k;
1030          }
1031       }
1032    }
1033
1034    /* left microtile */
1035    if (maxColL == -1) {
1036       /* all transparent black */
1037       cc[0] = ~0u;
1038       for (i = 0; i < n_comp; i++) {
1039          vec[0][i] = 0;
1040          vec[1][i] = 0;
1041       }
1042    } else {
1043       cc[0] = 0;
1044       for (i = 0; i < n_comp; i++) {
1045          vec[0][i] = input[minColL][i];
1046          vec[1][i] = input[maxColL][i];
1047       }
1048       if (minColL != maxColL) {
1049          /* compute interpolation vector */
1050          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1051
1052          /* add in texels */
1053          lolo = 0;
1054          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1055             GLint texel = n_vect + 1; /* transparent black */
1056             if (!ISTBLACK(input[k])) {
1057                /* interpolate color */
1058                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1059             }
1060             /* add in texel */
1061             lolo <<= 2;
1062             lolo |= texel;
1063          }
1064          cc[0] = lolo;
1065       }
1066    }
1067
1068    /* right microtile */
1069    if (maxColR == -1) {
1070       /* all transparent black */
1071       cc[1] = ~0u;
1072       for (i = 0; i < n_comp; i++) {
1073          vec[2][i] = 0;
1074          vec[3][i] = 0;
1075       }
1076    } else {
1077       cc[1] = 0;
1078       for (i = 0; i < n_comp; i++) {
1079          vec[2][i] = input[minColR][i];
1080          vec[3][i] = input[maxColR][i];
1081       }
1082       if (minColR != maxColR) {
1083          /* compute interpolation vector */
1084          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1085
1086          /* add in texels */
1087          lohi = 0;
1088          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1089             GLint texel = n_vect + 1; /* transparent black */
1090             if (!ISTBLACK(input[k])) {
1091                /* interpolate color */
1092                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1093             }
1094             /* add in texel */
1095             lohi <<= 2;
1096             lohi |= texel;
1097          }
1098          cc[1] = lohi;
1099       }
1100    }
1101
1102    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1103    for (j = 2 * 2 - 1; j >= 0; j--) {
1104       for (i = 0; i < n_comp; i++) {
1105          /* add in colors */
1106          FX64_SHL(hi, 5);
1107          FX64_OR32(hi, vec[j][i] >> 3);
1108       }
1109    }
1110    ((Fx64 *)cc)[1] = hi;
1111 }
1112
1113
1114 static void
1115 fxt1_quantize_MIXED0 (GLuint *cc,
1116                       GLubyte input[N_TEXELS][MAX_COMP])
1117 {
1118    const GLint n_vect = 3; /* highest vector number in each microtile */
1119    const GLint n_comp = 3; /* 3 components: R, G, B */
1120    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1121    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1122    GLint i, j, k;
1123    Fx64 hi; /* high quadword */
1124    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1125
1126    GLint minColL = 0, maxColL = 0;
1127    GLint minColR = 0, maxColR = 0;
1128 #if 0
1129    GLint minSum;
1130    GLint maxSum;
1131
1132    /* Our solution here is to find the darkest and brightest colors in
1133     * the 4x4 tile and use those as the two representative colors.
1134     * There are probably better algorithms to use (histogram-based).
1135     */
1136    minSum = 2000; /* big enough */
1137    maxSum = -1; /* small enough */
1138    for (k = 0; k < N_TEXELS / 2; k++) {
1139       GLint sum = 0;
1140       for (i = 0; i < n_comp; i++) {
1141          sum += input[k][i];
1142       }
1143       if (minSum > sum) {
1144          minSum = sum;
1145          minColL = k;
1146       }
1147       if (maxSum < sum) {
1148          maxSum = sum;
1149          maxColL = k;
1150       }
1151    }
1152    minSum = 2000; /* big enough */
1153    maxSum = -1; /* small enough */
1154    for (; k < N_TEXELS; k++) {
1155       GLint sum = 0;
1156       for (i = 0; i < n_comp; i++) {
1157          sum += input[k][i];
1158       }
1159       if (minSum > sum) {
1160          minSum = sum;
1161          minColR = k;
1162       }
1163       if (maxSum < sum) {
1164          maxSum = sum;
1165          maxColR = k;
1166       }
1167    }
1168 #else
1169    GLint minVal;
1170    GLint maxVal;
1171    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1172    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1173
1174    /* Scan the channel with max variance for lo & hi
1175     * and use those as the two representative colors.
1176     */
1177    minVal = 2000; /* big enough */
1178    maxVal = -1; /* small enough */
1179    for (k = 0; k < N_TEXELS / 2; k++) {
1180       GLint t = input[k][maxVarL];
1181       if (minVal > t) {
1182          minVal = t;
1183          minColL = k;
1184       }
1185       if (maxVal < t) {
1186          maxVal = t;
1187          maxColL = k;
1188       }
1189    }
1190    minVal = 2000; /* big enough */
1191    maxVal = -1; /* small enough */
1192    for (; k < N_TEXELS; k++) {
1193       GLint t = input[k][maxVarR];
1194       if (minVal > t) {
1195          minVal = t;
1196          minColR = k;
1197       }
1198       if (maxVal < t) {
1199          maxVal = t;
1200          maxColR = k;
1201       }
1202    }
1203 #endif
1204
1205    /* left microtile */
1206    cc[0] = 0;
1207    for (i = 0; i < n_comp; i++) {
1208       vec[0][i] = input[minColL][i];
1209       vec[1][i] = input[maxColL][i];
1210    }
1211    if (minColL != maxColL) {
1212       /* compute interpolation vector */
1213       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1214
1215       /* add in texels */
1216       lolo = 0;
1217       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1218          GLint texel;
1219          /* interpolate color */
1220          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1221          /* add in texel */
1222          lolo <<= 2;
1223          lolo |= texel;
1224       }
1225
1226       /* funky encoding for LSB of green */
1227       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1228          for (i = 0; i < n_comp; i++) {
1229             vec[1][i] = input[minColL][i];
1230             vec[0][i] = input[maxColL][i];
1231          }
1232          lolo = ~lolo;
1233       }
1234
1235       cc[0] = lolo;
1236    }
1237
1238    /* right microtile */
1239    cc[1] = 0;
1240    for (i = 0; i < n_comp; i++) {
1241       vec[2][i] = input[minColR][i];
1242       vec[3][i] = input[maxColR][i];
1243    }
1244    if (minColR != maxColR) {
1245       /* compute interpolation vector */
1246       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1247
1248       /* add in texels */
1249       lohi = 0;
1250       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1251          GLint texel;
1252          /* interpolate color */
1253          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1254          /* add in texel */
1255          lohi <<= 2;
1256          lohi |= texel;
1257       }
1258
1259       /* funky encoding for LSB of green */
1260       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1261          for (i = 0; i < n_comp; i++) {
1262             vec[3][i] = input[minColR][i];
1263             vec[2][i] = input[maxColR][i];
1264          }
1265          lohi = ~lohi;
1266       }
1267
1268       cc[1] = lohi;
1269    }
1270
1271    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1272    for (j = 2 * 2 - 1; j >= 0; j--) {
1273       for (i = 0; i < n_comp; i++) {
1274          /* add in colors */
1275          FX64_SHL(hi, 5);
1276          FX64_OR32(hi, vec[j][i] >> 3);
1277       }
1278    }
1279    ((Fx64 *)cc)[1] = hi;
1280 }
1281
1282
1283 static void
1284 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1285 {
1286    GLint trualpha;
1287    GLubyte reord[N_TEXELS][MAX_COMP];
1288
1289    GLubyte input[N_TEXELS][MAX_COMP];
1290    GLint i, k, l;
1291
1292    if (comps == 3) {
1293       /* make the whole block opaque */
1294       _mesa_memset(input, -1, sizeof(input));
1295    }
1296
1297    /* 8 texels each line */
1298    for (l = 0; l < 4; l++) {
1299       for (k = 0; k < 4; k++) {
1300          for (i = 0; i < comps; i++) {
1301             input[k + l * 4][i] = *lines[l]++;
1302          }
1303       }
1304       for (; k < 8; k++) {
1305          for (i = 0; i < comps; i++) {
1306             input[k + l * 4 + 12][i] = *lines[l]++;
1307          }
1308       }
1309    }
1310
1311    /* block layout:
1312     * 00, 01, 02, 03, 08, 09, 0a, 0b
1313     * 10, 11, 12, 13, 18, 19, 1a, 1b
1314     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1315     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1316     */
1317
1318    /* [dBorca]
1319     * stupidity flows forth from this
1320     */
1321    l = N_TEXELS;
1322    trualpha = 0;
1323    if (comps == 4) {
1324       /* skip all transparent black texels */
1325       l = 0;
1326       for (k = 0; k < N_TEXELS; k++) {
1327          /* test all components against 0 */
1328          if (!ISTBLACK(input[k])) {
1329             /* texel is not transparent black */
1330             COPY_4UBV(reord[l], input[k]);
1331             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1332                /* non-opaque texel */
1333                trualpha = !0;
1334             }
1335             l++;
1336          }
1337       }
1338    }
1339
1340 #if 0
1341    if (trualpha) {
1342       fxt1_quantize_ALPHA0(cc, input, reord, l);
1343    } else if (l == 0) {
1344       cc[0] = cc[1] = cc[2] = -1;
1345       cc[3] = 0;
1346    } else if (l < N_TEXELS) {
1347       fxt1_quantize_HI(cc, input, reord, l);
1348    } else {
1349       fxt1_quantize_CHROMA(cc, input);
1350    }
1351    (void)fxt1_quantize_ALPHA1;
1352    (void)fxt1_quantize_MIXED1;
1353    (void)fxt1_quantize_MIXED0;
1354 #else
1355    if (trualpha) {
1356       fxt1_quantize_ALPHA1(cc, input);
1357    } else if (l == 0) {
1358       cc[0] = cc[1] = cc[2] = ~0u;
1359       cc[3] = 0;
1360    } else if (l < N_TEXELS) {
1361       fxt1_quantize_MIXED1(cc, input);
1362    } else {
1363       fxt1_quantize_MIXED0(cc, input);
1364    }
1365    (void)fxt1_quantize_ALPHA0;
1366    (void)fxt1_quantize_HI;
1367    (void)fxt1_quantize_CHROMA;
1368 #endif
1369 }
1370
1371
1372 static void
1373 fxt1_encode (GLuint width, GLuint height, GLint comps,
1374              const void *source, GLint srcRowStride,
1375              void *dest, GLint destRowStride)
1376 {
1377    GLuint x, y;
1378    const GLubyte *data;
1379    GLuint *encoded = (GLuint *)dest;
1380    void *newSource = NULL;
1381
1382    assert(comps == 3 || comps == 4);
1383
1384    /* Replicate image if width is not M8 or height is not M4 */
1385    if ((width & 7) | (height & 3)) {
1386       GLint newWidth = (width + 7) & ~7;
1387       GLint newHeight = (height + 3) & ~3;
1388       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1389       if (!newSource) {
1390          GET_CURRENT_CONTEXT(ctx);
1391          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1392          goto cleanUp;
1393       }
1394       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1395                                comps, (const GLchan *) source,
1396                                srcRowStride, (GLchan *) newSource);
1397       source = newSource;
1398       width = newWidth;
1399       height = newHeight;
1400       srcRowStride = comps * newWidth;
1401    }
1402
1403    /* convert from 16/32-bit channels to GLubyte if needed */
1404    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1405       const GLuint n = width * height * comps;
1406       const GLchan *src = (const GLchan *) source;
1407       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1408       GLuint i;
1409       if (!dest) {
1410          GET_CURRENT_CONTEXT(ctx);
1411          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1412          goto cleanUp;
1413       }
1414       for (i = 0; i < n; i++) {
1415          dest[i] = CHAN_TO_UBYTE(src[i]);
1416       }
1417       if (newSource != NULL) {
1418          _mesa_free(newSource);
1419       }
1420       newSource = dest;  /* we'll free this buffer before returning */
1421       source = dest;  /* the new, GLubyte incoming image */
1422    }
1423
1424    data = (const GLubyte *) source;
1425    destRowStride = (destRowStride - width * 2) / 4;
1426    for (y = 0; y < height; y += 4) {
1427       GLuint offs = 0 + (y + 0) * srcRowStride;
1428       for (x = 0; x < width; x += 8) {
1429          const GLubyte *lines[4];
1430          lines[0] = &data[offs];
1431          lines[1] = lines[0] + srcRowStride;
1432          lines[2] = lines[1] + srcRowStride;
1433          lines[3] = lines[2] + srcRowStride;
1434          offs += 8 * comps;
1435          fxt1_quantize(encoded, lines, comps);
1436          /* 128 bits per 8x4 block */
1437          encoded += 4;
1438       }
1439       encoded += destRowStride;
1440    }
1441
1442  cleanUp:
1443    if (newSource != NULL) {
1444       _mesa_free(newSource);
1445    }
1446 }
1447
1448
1449 /***************************************************************************\
1450  * FXT1 decoder
1451  *
1452  * The decoder is based on GL_3DFX_texture_compression_FXT1
1453  * specification and serves as a concept for the encoder.
1454 \***************************************************************************/
1455
1456
1457 /* lookup table for scaling 5 bit colors up to 8 bits */
1458 static const GLubyte _rgb_scale_5[] = {
1459    0,   8,   16,  25,  33,  41,  49,  58,
1460    66,  74,  82,  90,  99,  107, 115, 123,
1461    132, 140, 148, 156, 165, 173, 181, 189,
1462    197, 206, 214, 222, 230, 239, 247, 255
1463 };
1464
1465 /* lookup table for scaling 6 bit colors up to 8 bits */
1466 static const GLubyte _rgb_scale_6[] = {
1467    0,   4,   8,   12,  16,  20,  24,  28,
1468    32,  36,  40,  45,  49,  53,  57,  61,
1469    65,  69,  73,  77,  81,  85,  89,  93,
1470    97,  101, 105, 109, 113, 117, 121, 125,
1471    130, 134, 138, 142, 146, 150, 154, 158,
1472    162, 166, 170, 174, 178, 182, 186, 190,
1473    194, 198, 202, 206, 210, 215, 219, 223,
1474    227, 231, 235, 239, 243, 247, 251, 255
1475 };
1476
1477
1478 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1479 #define UP5(c) _rgb_scale_5[(c) & 31]
1480 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1481 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1482
1483
1484 static void
1485 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1486 {
1487    const GLuint *cc;
1488
1489    t *= 3;
1490    cc = (const GLuint *)(code + t / 8);
1491    t = (cc[0] >> (t & 7)) & 7;
1492
1493    if (t == 7) {
1494       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1495    } else {
1496       GLubyte r, g, b;
1497       cc = (const GLuint *)(code + 12);
1498       if (t == 0) {
1499          b = UP5(CC_SEL(cc, 0));
1500          g = UP5(CC_SEL(cc, 5));
1501          r = UP5(CC_SEL(cc, 10));
1502       } else if (t == 6) {
1503          b = UP5(CC_SEL(cc, 15));
1504          g = UP5(CC_SEL(cc, 20));
1505          r = UP5(CC_SEL(cc, 25));
1506       } else {
1507          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1508          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1509          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1510       }
1511       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1512       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1513       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1514       rgba[ACOMP] = CHAN_MAX;
1515    }
1516 }
1517
1518
1519 static void
1520 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1521 {
1522    const GLuint *cc;
1523    GLuint kk;
1524
1525    cc = (const GLuint *)code;
1526    if (t & 16) {
1527       cc++;
1528       t &= 15;
1529    }
1530    t = (cc[0] >> (t * 2)) & 3;
1531
1532    t *= 15;
1533    cc = (const GLuint *)(code + 8 + t / 8);
1534    kk = cc[0] >> (t & 7);
1535    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1536    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1537    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1538    rgba[ACOMP] = CHAN_MAX;
1539 }
1540
1541
1542 static void
1543 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1544 {
1545    const GLuint *cc;
1546    GLuint col[2][3];
1547    GLint glsb, selb;
1548
1549    cc = (const GLuint *)code;
1550    if (t & 16) {
1551       t &= 15;
1552       t = (cc[1] >> (t * 2)) & 3;
1553       /* col 2 */
1554       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1555       col[0][GCOMP] = CC_SEL(cc, 99);
1556       col[0][RCOMP] = CC_SEL(cc, 104);
1557       /* col 3 */
1558       col[1][BCOMP] = CC_SEL(cc, 109);
1559       col[1][GCOMP] = CC_SEL(cc, 114);
1560       col[1][RCOMP] = CC_SEL(cc, 119);
1561       glsb = CC_SEL(cc, 126);
1562       selb = CC_SEL(cc, 33);
1563    } else {
1564       t = (cc[0] >> (t * 2)) & 3;
1565       /* col 0 */
1566       col[0][BCOMP] = CC_SEL(cc, 64);
1567       col[0][GCOMP] = CC_SEL(cc, 69);
1568       col[0][RCOMP] = CC_SEL(cc, 74);
1569       /* col 1 */
1570       col[1][BCOMP] = CC_SEL(cc, 79);
1571       col[1][GCOMP] = CC_SEL(cc, 84);
1572       col[1][RCOMP] = CC_SEL(cc, 89);
1573       glsb = CC_SEL(cc, 125);
1574       selb = CC_SEL(cc, 1);
1575    }
1576
1577    if (CC_SEL(cc, 124) & 1) {
1578       /* alpha[0] == 1 */
1579
1580       if (t == 3) {
1581          /* zero */
1582          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1583       } else {
1584          GLubyte r, g, b;
1585          if (t == 0) {
1586             b = UP5(col[0][BCOMP]);
1587             g = UP5(col[0][GCOMP]);
1588             r = UP5(col[0][RCOMP]);
1589          } else if (t == 2) {
1590             b = UP5(col[1][BCOMP]);
1591             g = UP6(col[1][GCOMP], glsb);
1592             r = UP5(col[1][RCOMP]);
1593          } else {
1594             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1595             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1596             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1597          }
1598          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1599          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1600          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1601          rgba[ACOMP] = CHAN_MAX;
1602       }
1603    } else {
1604       /* alpha[0] == 0 */
1605       GLubyte r, g, b;
1606       if (t == 0) {
1607          b = UP5(col[0][BCOMP]);
1608          g = UP6(col[0][GCOMP], glsb ^ selb);
1609          r = UP5(col[0][RCOMP]);
1610       } else if (t == 3) {
1611          b = UP5(col[1][BCOMP]);
1612          g = UP6(col[1][GCOMP], glsb);
1613          r = UP5(col[1][RCOMP]);
1614       } else {
1615          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1616          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1617                         UP6(col[1][GCOMP], glsb));
1618          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1619       }
1620       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1621       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1622       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1623       rgba[ACOMP] = CHAN_MAX;
1624    }
1625 }
1626
1627
1628 static void
1629 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1630 {
1631    const GLuint *cc;
1632    GLubyte r, g, b, a;
1633
1634    cc = (const GLuint *)code;
1635    if (CC_SEL(cc, 124) & 1) {
1636       /* lerp == 1 */
1637       GLuint col0[4];
1638
1639       if (t & 16) {
1640          t &= 15;
1641          t = (cc[1] >> (t * 2)) & 3;
1642          /* col 2 */
1643          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1644          col0[GCOMP] = CC_SEL(cc, 99);
1645          col0[RCOMP] = CC_SEL(cc, 104);
1646          col0[ACOMP] = CC_SEL(cc, 119);
1647       } else {
1648          t = (cc[0] >> (t * 2)) & 3;
1649          /* col 0 */
1650          col0[BCOMP] = CC_SEL(cc, 64);
1651          col0[GCOMP] = CC_SEL(cc, 69);
1652          col0[RCOMP] = CC_SEL(cc, 74);
1653          col0[ACOMP] = CC_SEL(cc, 109);
1654       }
1655
1656       if (t == 0) {
1657          b = UP5(col0[BCOMP]);
1658          g = UP5(col0[GCOMP]);
1659          r = UP5(col0[RCOMP]);
1660          a = UP5(col0[ACOMP]);
1661       } else if (t == 3) {
1662          b = UP5(CC_SEL(cc, 79));
1663          g = UP5(CC_SEL(cc, 84));
1664          r = UP5(CC_SEL(cc, 89));
1665          a = UP5(CC_SEL(cc, 114));
1666       } else {
1667          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1668          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1669          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1670          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1671       }
1672    } else {
1673       /* lerp == 0 */
1674
1675       if (t & 16) {
1676          cc++;
1677          t &= 15;
1678       }
1679       t = (cc[0] >> (t * 2)) & 3;
1680
1681       if (t == 3) {
1682          /* zero */
1683          r = g = b = a = 0;
1684       } else {
1685          GLuint kk;
1686          cc = (const GLuint *)code;
1687          a = UP5(cc[3] >> (t * 5 + 13));
1688          t *= 15;
1689          cc = (const GLuint *)(code + 8 + t / 8);
1690          kk = cc[0] >> (t & 7);
1691          b = UP5(kk);
1692          g = UP5(kk >> 5);
1693          r = UP5(kk >> 10);
1694       }
1695    }
1696    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1697    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1698    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1699    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1700 }
1701
1702
1703 void
1704 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1705                GLint i, GLint j, GLchan *rgba)
1706 {
1707    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1708       fxt1_decode_1HI,     /* cc-high   = "00?" */
1709       fxt1_decode_1HI,     /* cc-high   = "00?" */
1710       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1711       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1712       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1713       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1714       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1715       fxt1_decode_1MIXED   /* mixed     = "1??" */
1716    };
1717
1718    const GLubyte *code = (const GLubyte *)texture +
1719                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1720    GLint mode = CC_SEL(code, 125);
1721    GLint t = i & 7;
1722
1723    if (t & 4) {
1724       t += 12;
1725    }
1726    t += (j & 3) * 4;
1727
1728    decode_1[mode](code, t, rgba);
1729 }