src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mipmap.h"
  38 #include "texcompress.h"
  39 #include "texcompress_fxt1.h"
  40 #include "texstore.h"
  41
  42
  43 static void
  44 fxt1_encode (GLuint width, GLuint height, GLint comps,
  45              const void *source, GLint srcRowStride,
  46              void *dest, GLint destRowStride);
  47
  48 static void
  49 fxt1_decode_1 (const void *texture, GLint stride,
  50                GLint i, GLint j, GLubyte *rgba);
  51
  52
  53 /**
  54  * Store user's image in rgb_fxt1 format.
  55  */
  56 GLboolean
  57 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  58 {
  59    const GLubyte *pixels;
  60    GLint srcRowStride;
  61    GLubyte *dst;
  62    const GLubyte *tempImage = NULL;
  63
  64    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  65
  66    if (srcFormat != GL_RGB ||
  67        srcType != GL_UNSIGNED_BYTE ||
  68        ctx->_ImageTransferState ||
  69        srcPacking->RowLength != srcWidth ||
  70        srcPacking->SwapBytes) {
  71       /* convert image to RGB/GLubyte */
  72       GLubyte *tempImageSlices[1];
  73       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
  74       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
  75       if (!tempImage)
  76          return GL_FALSE; /* out of memory */
  77       tempImageSlices[0] = (GLubyte *) tempImage;
  78       _mesa_texstore(ctx, dims,
  79                      baseInternalFormat,
  80                      MESA_FORMAT_RGB_UNORM8,
  81                      rgbRowStride, tempImageSlices,
  82                      srcWidth, srcHeight, srcDepth,
  83                      srcFormat, srcType, srcAddr,
  84                      srcPacking);
  85       pixels = tempImage;
  86       srcRowStride = 3 * srcWidth;
  87       srcFormat = GL_RGB;
  88    }
  89    else {
  90       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  91                                      srcFormat, srcType, 0, 0);
  92
  93       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  94                                             srcType) / sizeof(GLubyte);
  95    }
  96
  97    dst = dstSlices[0];
  98
  99    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 100                dst, dstRowStride);
 101
 102    free((void*) tempImage);
 103
 104    return GL_TRUE;
 105 }
 106
 107
 108 /**
 109  * Store user's image in rgba_fxt1 format.
 110  */
 111 GLboolean
 112 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 113 {
 114    const GLubyte *pixels;
 115    GLint srcRowStride;
 116    GLubyte *dst;
 117    const GLubyte *tempImage = NULL;
 118
 119    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 120
 121    if (srcFormat != GL_RGBA ||
 122        srcType != GL_UNSIGNED_BYTE ||
 123        ctx->_ImageTransferState ||
 124        srcPacking->SwapBytes) {
 125       /* convert image to RGBA/GLubyte */
 126       GLubyte *tempImageSlices[1];
 127       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
 128       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
 129       if (!tempImage)
 130          return GL_FALSE; /* out of memory */
 131       tempImageSlices[0] = (GLubyte *) tempImage;
 132       _mesa_texstore(ctx, dims,
 133                      baseInternalFormat,
 134                      MESA_FORMAT_R8G8B8A8_UNORM,
 135                      rgbaRowStride, tempImageSlices,
 136                      srcWidth, srcHeight, srcDepth,
 137                      srcFormat, srcType, srcAddr,
 138                      srcPacking);
 139       pixels = tempImage;
 140       srcRowStride = 4 * srcWidth;
 141       srcFormat = GL_RGBA;
 142    }
 143    else {
 144       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 145                                      srcFormat, srcType, 0, 0);
 146
 147       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 148                                             srcType) / sizeof(GLubyte);
 149    }
 150
 151    dst = dstSlices[0];
 152
 153    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 154                dst, dstRowStride);
 155
 156    free((void*) tempImage);
 157
 158    return GL_TRUE;
 159 }
 160
 161
 162 /***************************************************************************\
 163  * FXT1 encoder
 164  *
 165  * The encoder was built by reversing the decoder,
 166  * and is vaguely based on Texus2 by 3dfx. Note that this code
 167  * is merely a proof of concept, since it is highly UNoptimized;
 168  * moreover, it is sub-optimal due to initial conditions passed
 169  * to Lloyd's algorithm (the interpolation modes are even worse).
 170 \***************************************************************************/
 171
 172
 173 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 174 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 175 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 176 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 177 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 178 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 179 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 180 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 181
 182
 183 /*
 184  * Define a 64-bit unsigned integer type and macros
 185  */
 186 #if 1
 187
 188 #define FX64_NATIVE 1
 189
 190 typedef uint64_t Fx64;
 191
 192 #define FX64_MOV32(a, b) a = b
 193 #define FX64_OR32(a, b)  a |= b
 194 #define FX64_SHL(a, c)   a <<= c
 195
 196 #else
 197
 198 #define FX64_NATIVE 0
 199
 200 typedef struct {
 201    GLuint lo, hi;
 202 } Fx64;
 203
 204 #define FX64_MOV32(a, b) a.lo = b
 205 #define FX64_OR32(a, b)  a.lo |= b
 206
 207 #define FX64_SHL(a, c)                                 \
 208    do {                                                \
 209        if ((c) >= 32) {                                \
 210           a.hi = a.lo << ((c) - 32);                   \
 211           a.lo = 0;                                    \
 212        } else {                                        \
 213           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 214           a.lo <<= (c);                                \
 215        }                                               \
 216    } while (0)
 217
 218 #endif
 219
 220
 221 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 222 #define SAFECDOT 1 /* for paranoids */
 223
 224 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 225    do {                                  \
 226       /* compute interpolation vector */ \
 227       GLfloat d2 = 0.0F;                 \
 228       GLfloat rd2;                       \
 229                                          \
 230       for (i = 0; i < NC; i++) {         \
 231          IV[i] = (V1[i] - V0[i]) * F(i); \
 232          d2 += IV[i] * IV[i];            \
 233       }                                  \
 234       rd2 = (GLfloat)NV / d2;            \
 235       B = 0;                             \
 236       for (i = 0; i < NC; i++) {         \
 237          IV[i] *= F(i);                  \
 238          B -= IV[i] * V0[i];             \
 239          IV[i] *= rd2;                   \
 240       }                                  \
 241       B = B * rd2 + 0.5f;                \
 242    } while (0)
 243
 244 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 245    do {                                  \
 246       GLfloat dot = 0.0F;                \
 247       for (i = 0; i < NC; i++) {         \
 248          dot += V[i] * IV[i];            \
 249       }                                  \
 250       TEXEL = (GLint)(dot + B);          \
 251       if (SAFECDOT) {                    \
 252          if (TEXEL < 0) {                \
 253             TEXEL = 0;                   \
 254          } else if (TEXEL > NV) {        \
 255             TEXEL = NV;                  \
 256          }                               \
 257       }                                  \
 258    } while (0)
 259
 260
 261 static GLint
 262 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 263               GLubyte input[MAX_COMP], GLint nc)
 264 {
 265    GLint i, j, best = -1;
 266    GLfloat err = 1e9; /* big enough */
 267
 268    for (j = 0; j < nv; j++) {
 269       GLfloat e = 0.0F;
 270       for (i = 0; i < nc; i++) {
 271          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 272       }
 273       if (e < err) {
 274          err = e;
 275          best = j;
 276       }
 277    }
 278
 279    return best;
 280 }
 281
 282
 283 static GLint
 284 fxt1_worst (GLfloat vec[MAX_COMP],
 285             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 286 {
 287    GLint i, k, worst = -1;
 288    GLfloat err = -1.0F; /* small enough */
 289
 290    for (k = 0; k < n; k++) {
 291       GLfloat e = 0.0F;
 292       for (i = 0; i < nc; i++) {
 293          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 294       }
 295       if (e > err) {
 296          err = e;
 297          worst = k;
 298       }
 299    }
 300
 301    return worst;
 302 }
 303
 304
 305 static GLint
 306 fxt1_variance (GLdouble variance[MAX_COMP],
 307                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 308 {
 309    GLint i, k, best = 0;
 310    GLint sx, sx2;
 311    GLdouble var, maxvar = -1; /* small enough */
 312    GLdouble teenth = 1.0 / n;
 313
 314    for (i = 0; i < nc; i++) {
 315       sx = sx2 = 0;
 316       for (k = 0; k < n; k++) {
 317          GLint t = input[k][i];
 318          sx += t;
 319          sx2 += t * t;
 320       }
 321       var = sx2 * teenth - sx * sx * teenth * teenth;
 322       if (maxvar < var) {
 323          maxvar = var;
 324          best = i;
 325       }
 326       if (variance) {
 327          variance[i] = var;
 328       }
 329    }
 330
 331    return best;
 332 }
 333
 334
 335 static GLint
 336 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 337              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 338 {
 339 #if 0
 340    /* Choose colors from a grid.
 341     */
 342    GLint i, j;
 343
 344    for (j = 0; j < nv; j++) {
 345       GLint m = j * (n - 1) / (nv - 1);
 346       for (i = 0; i < nc; i++) {
 347          vec[j][i] = input[m][i];
 348       }
 349    }
 350 #else
 351    /* Our solution here is to find the darkest and brightest colors in
 352     * the 8x4 tile and use those as the two representative colors.
 353     * There are probably better algorithms to use (histogram-based).
 354     */
 355    GLint i, j, k;
 356    GLint minSum = 2000; /* big enough */
 357    GLint maxSum = -1; /* small enough */
 358    GLint minCol = 0; /* phoudoin: silent compiler! */
 359    GLint maxCol = 0; /* phoudoin: silent compiler! */
 360
 361    struct {
 362       GLint flag;
 363       GLint key;
 364       GLint freq;
 365       GLint idx;
 366    } hist[N_TEXELS];
 367    GLint lenh = 0;
 368
 369    memset(hist, 0, sizeof(hist));
 370
 371    for (k = 0; k < n; k++) {
 372       GLint l;
 373       GLint key = 0;
 374       GLint sum = 0;
 375       for (i = 0; i < nc; i++) {
 376          key <<= 8;
 377          key |= input[k][i];
 378          sum += input[k][i];
 379       }
 380       for (l = 0; l < n; l++) {
 381          if (!hist[l].flag) {
 382             /* alloc new slot */
 383             hist[l].flag = !0;
 384             hist[l].key = key;
 385             hist[l].freq = 1;
 386             hist[l].idx = k;
 387             lenh = l + 1;
 388             break;
 389          } else if (hist[l].key == key) {
 390             hist[l].freq++;
 391             break;
 392          }
 393       }
 394       if (minSum > sum) {
 395          minSum = sum;
 396          minCol = k;
 397       }
 398       if (maxSum < sum) {
 399          maxSum = sum;
 400          maxCol = k;
 401       }
 402    }
 403
 404    if (lenh <= nv) {
 405       for (j = 0; j < lenh; j++) {
 406          for (i = 0; i < nc; i++) {
 407             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 408          }
 409       }
 410       for (; j < nv; j++) {
 411          for (i = 0; i < nc; i++) {
 412             vec[j][i] = vec[0][i];
 413          }
 414       }
 415       return 0;
 416    }
 417
 418    for (j = 0; j < nv; j++) {
 419       for (i = 0; i < nc; i++) {
 420          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 421       }
 422    }
 423 #endif
 424
 425    return !0;
 426 }
 427
 428
 429 static GLint
 430 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 431             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 432 {
 433    /* Use the generalized lloyd's algorithm for VQ:
 434     *     find 4 color vectors.
 435     *
 436     *     for each sample color
 437     *         sort to nearest vector.
 438     *
 439     *     replace each vector with the centroid of its matching colors.
 440     *
 441     *     repeat until RMS doesn't improve.
 442     *
 443     *     if a color vector has no samples, or becomes the same as another
 444     *     vector, replace it with the color which is farthest from a sample.
 445     *
 446     * vec[][MAX_COMP]           initial vectors and resulting colors
 447     * nv                        number of resulting colors required
 448     * input[N_TEXELS][MAX_COMP] input texels
 449     * nc                        number of components in input / vec
 450     * n                         number of input samples
 451     */
 452
 453    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 454    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 455    GLfloat error, lasterror = 1e9;
 456
 457    GLint i, j, k, rep;
 458
 459    /* the quantizer */
 460    for (rep = 0; rep < LL_N_REP; rep++) {
 461       /* reset sums & counters */
 462       for (j = 0; j < nv; j++) {
 463          for (i = 0; i < nc; i++) {
 464             sum[j][i] = 0;
 465          }
 466          cnt[j] = 0;
 467       }
 468       error = 0;
 469
 470       /* scan whole block */
 471       for (k = 0; k < n; k++) {
 472 #if 1
 473          GLint best = -1;
 474          GLfloat err = 1e9; /* big enough */
 475          /* determine best vector */
 476          for (j = 0; j < nv; j++) {
 477             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 478                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 479                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 480             if (nc == 4) {
 481                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 482             }
 483             if (e < err) {
 484                err = e;
 485                best = j;
 486             }
 487          }
 488 #else
 489          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 490 #endif
 491          assert(best >= 0);
 492          /* add in closest color */
 493          for (i = 0; i < nc; i++) {
 494             sum[best][i] += input[k][i];
 495          }
 496          /* mark this vector as used */
 497          cnt[best]++;
 498          /* accumulate error */
 499          error += err;
 500       }
 501
 502       /* check RMS */
 503       if ((error < LL_RMS_E) ||
 504           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 505          return !0; /* good match */
 506       }
 507       lasterror = error;
 508
 509       /* move each vector to the barycenter of its closest colors */
 510       for (j = 0; j < nv; j++) {
 511          if (cnt[j]) {
 512             GLfloat div = 1.0F / cnt[j];
 513             for (i = 0; i < nc; i++) {
 514                vec[j][i] = div * sum[j][i];
 515             }
 516          } else {
 517             /* this vec has no samples or is identical with a previous vec */
 518             GLint worst = fxt1_worst(vec[j], input, nc, n);
 519             for (i = 0; i < nc; i++) {
 520                vec[j][i] = input[worst][i];
 521             }
 522          }
 523       }
 524    }
 525
 526    return 0; /* could not converge fast enough */
 527 }
 528
 529
 530 static void
 531 fxt1_quantize_CHROMA (GLuint *cc,
 532                       GLubyte input[N_TEXELS][MAX_COMP])
 533 {
 534    const GLint n_vect = 4; /* 4 base vectors to find */
 535    const GLint n_comp = 3; /* 3 components: R, G, B */
 536    GLfloat vec[MAX_VECT][MAX_COMP];
 537    GLint i, j, k;
 538    Fx64 hi; /* high quadword */
 539    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 540
 541    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 542       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 543    }
 544
 545    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 546    for (j = n_vect - 1; j >= 0; j--) {
 547       for (i = 0; i < n_comp; i++) {
 548          /* add in colors */
 549          FX64_SHL(hi, 5);
 550          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 551       }
 552    }
 553    ((Fx64 *)cc)[1] = hi;
 554
 555    lohi = lolo = 0;
 556    /* right microtile */
 557    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 558       lohi <<= 2;
 559       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 560    }
 561    /* left microtile */
 562    for (; k >= 0; k--) {
 563       lolo <<= 2;
 564       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 565    }
 566    cc[1] = lohi;
 567    cc[0] = lolo;
 568 }
 569
 570
 571 static void
 572 fxt1_quantize_ALPHA0 (GLuint *cc,
 573                       GLubyte input[N_TEXELS][MAX_COMP],
 574                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 575 {
 576    const GLint n_vect = 3; /* 3 base vectors to find */
 577    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 578    GLfloat vec[MAX_VECT][MAX_COMP];
 579    GLint i, j, k;
 580    Fx64 hi; /* high quadword */
 581    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 582
 583    /* the last vector indicates zero */
 584    for (i = 0; i < n_comp; i++) {
 585       vec[n_vect][i] = 0;
 586    }
 587
 588    /* the first n texels in reord are guaranteed to be non-zero */
 589    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 590       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 591    }
 592
 593    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 594    for (j = n_vect - 1; j >= 0; j--) {
 595       /* add in alphas */
 596       FX64_SHL(hi, 5);
 597       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 598    }
 599    for (j = n_vect - 1; j >= 0; j--) {
 600       for (i = 0; i < n_comp - 1; i++) {
 601          /* add in colors */
 602          FX64_SHL(hi, 5);
 603          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 604       }
 605    }
 606    ((Fx64 *)cc)[1] = hi;
 607
 608    lohi = lolo = 0;
 609    /* right microtile */
 610    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 611       lohi <<= 2;
 612       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 613    }
 614    /* left microtile */
 615    for (; k >= 0; k--) {
 616       lolo <<= 2;
 617       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 618    }
 619    cc[1] = lohi;
 620    cc[0] = lolo;
 621 }
 622
 623
 624 static void
 625 fxt1_quantize_ALPHA1 (GLuint *cc,
 626                       GLubyte input[N_TEXELS][MAX_COMP])
 627 {
 628    const GLint n_vect = 3; /* highest vector number in each microtile */
 629    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 630    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 631    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 632    GLint i, j, k;
 633    Fx64 hi; /* high quadword */
 634    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 635
 636    GLint minSum;
 637    GLint maxSum;
 638    GLint minColL = 0, maxColL = 0;
 639    GLint minColR = 0, maxColR = 0;
 640    GLint sumL = 0, sumR = 0;
 641    GLint nn_comp;
 642    /* Our solution here is to find the darkest and brightest colors in
 643     * the 4x4 tile and use those as the two representative colors.
 644     * There are probably better algorithms to use (histogram-based).
 645     */
 646    nn_comp = n_comp;
 647    while ((minColL == maxColL) && nn_comp) {
 648        minSum = 2000; /* big enough */
 649        maxSum = -1; /* small enough */
 650        for (k = 0; k < N_TEXELS / 2; k++) {
 651            GLint sum = 0;
 652            for (i = 0; i < nn_comp; i++) {
 653                sum += input[k][i];
 654            }
 655            if (minSum > sum) {
 656                minSum = sum;
 657                minColL = k;
 658            }
 659            if (maxSum < sum) {
 660                maxSum = sum;
 661                maxColL = k;
 662            }
 663            sumL += sum;
 664        }
 665
 666        nn_comp--;
 667    }
 668
 669    nn_comp = n_comp;
 670    while ((minColR == maxColR) && nn_comp) {
 671        minSum = 2000; /* big enough */
 672        maxSum = -1; /* small enough */
 673        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 674            GLint sum = 0;
 675            for (i = 0; i < nn_comp; i++) {
 676                sum += input[k][i];
 677            }
 678            if (minSum > sum) {
 679                minSum = sum;
 680                minColR = k;
 681            }
 682            if (maxSum < sum) {
 683                maxSum = sum;
 684                maxColR = k;
 685            }
 686            sumR += sum;
 687        }
 688
 689        nn_comp--;
 690    }
 691
 692    /* choose the common vector (yuck!) */
 693    {
 694       GLint j1, j2;
 695       GLint v1 = 0, v2 = 0;
 696       GLfloat err = 1e9; /* big enough */
 697       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 698       for (i = 0; i < n_comp; i++) {
 699          tv[0][i] = input[minColL][i];
 700          tv[1][i] = input[maxColL][i];
 701          tv[2][i] = input[minColR][i];
 702          tv[3][i] = input[maxColR][i];
 703       }
 704       for (j1 = 0; j1 < 2; j1++) {
 705          for (j2 = 2; j2 < 4; j2++) {
 706             GLfloat e = 0.0F;
 707             for (i = 0; i < n_comp; i++) {
 708                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 709             }
 710             if (e < err) {
 711                err = e;
 712                v1 = j1;
 713                v2 = j2;
 714             }
 715          }
 716       }
 717       for (i = 0; i < n_comp; i++) {
 718          vec[0][i] = tv[1 - v1][i];
 719          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 720          vec[2][i] = tv[5 - v2][i];
 721       }
 722    }
 723
 724    /* left microtile */
 725    cc[0] = 0;
 726    if (minColL != maxColL) {
 727       /* compute interpolation vector */
 728       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 729
 730       /* add in texels */
 731       lolo = 0;
 732       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 733          GLint texel;
 734          /* interpolate color */
 735          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 736          /* add in texel */
 737          lolo <<= 2;
 738          lolo |= texel;
 739       }
 740
 741       cc[0] = lolo;
 742    }
 743
 744    /* right microtile */
 745    cc[1] = 0;
 746    if (minColR != maxColR) {
 747       /* compute interpolation vector */
 748       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 749
 750       /* add in texels */
 751       lohi = 0;
 752       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 753          GLint texel;
 754          /* interpolate color */
 755          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 756          /* add in texel */
 757          lohi <<= 2;
 758          lohi |= texel;
 759       }
 760
 761       cc[1] = lohi;
 762    }
 763
 764    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 765    for (j = n_vect - 1; j >= 0; j--) {
 766       /* add in alphas */
 767       FX64_SHL(hi, 5);
 768       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 769    }
 770    for (j = n_vect - 1; j >= 0; j--) {
 771       for (i = 0; i < n_comp - 1; i++) {
 772          /* add in colors */
 773          FX64_SHL(hi, 5);
 774          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 775       }
 776    }
 777    ((Fx64 *)cc)[1] = hi;
 778 }
 779
 780
 781 static void
 782 fxt1_quantize_HI (GLuint *cc,
 783                   GLubyte input[N_TEXELS][MAX_COMP],
 784                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 785 {
 786    const GLint n_vect = 6; /* highest vector number */
 787    const GLint n_comp = 3; /* 3 components: R, G, B */
 788    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 789    GLfloat iv[MAX_COMP];   /* interpolation vector */
 790    GLint i, k;
 791    GLuint hihi; /* high quadword: hi dword */
 792
 793    GLint minSum = 2000; /* big enough */
 794    GLint maxSum = -1; /* small enough */
 795    GLint minCol = 0; /* phoudoin: silent compiler! */
 796    GLint maxCol = 0; /* phoudoin: silent compiler! */
 797
 798    /* Our solution here is to find the darkest and brightest colors in
 799     * the 8x4 tile and use those as the two representative colors.
 800     * There are probably better algorithms to use (histogram-based).
 801     */
 802    for (k = 0; k < n; k++) {
 803       GLint sum = 0;
 804       for (i = 0; i < n_comp; i++) {
 805          sum += reord[k][i];
 806       }
 807       if (minSum > sum) {
 808          minSum = sum;
 809          minCol = k;
 810       }
 811       if (maxSum < sum) {
 812          maxSum = sum;
 813          maxCol = k;
 814       }
 815    }
 816
 817    hihi = 0; /* cc-hi = "00" */
 818    for (i = 0; i < n_comp; i++) {
 819       /* add in colors */
 820       hihi <<= 5;
 821       hihi |= reord[maxCol][i] >> 3;
 822    }
 823    for (i = 0; i < n_comp; i++) {
 824       /* add in colors */
 825       hihi <<= 5;
 826       hihi |= reord[minCol][i] >> 3;
 827    }
 828    cc[3] = hihi;
 829    cc[0] = cc[1] = cc[2] = 0;
 830
 831    /* compute interpolation vector */
 832    if (minCol != maxCol) {
 833       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 834    }
 835
 836    /* add in texels */
 837    for (k = N_TEXELS - 1; k >= 0; k--) {
 838       GLint t = k * 3;
 839       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 840       GLint texel = n_vect + 1; /* transparent black */
 841
 842       if (!ISTBLACK(input[k])) {
 843          if (minCol != maxCol) {
 844             /* interpolate color */
 845             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 846             /* add in texel */
 847             kk[0] |= texel << (t & 7);
 848          }
 849       } else {
 850          /* add in texel */
 851          kk[0] |= texel << (t & 7);
 852       }
 853    }
 854 }
 855
 856
 857 static void
 858 fxt1_quantize_MIXED1 (GLuint *cc,
 859                       GLubyte input[N_TEXELS][MAX_COMP])
 860 {
 861    const GLint n_vect = 2; /* highest vector number in each microtile */
 862    const GLint n_comp = 3; /* 3 components: R, G, B */
 863    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 864    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 865    GLint i, j, k;
 866    Fx64 hi; /* high quadword */
 867    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 868
 869    GLint minSum;
 870    GLint maxSum;
 871    GLint minColL = 0, maxColL = -1;
 872    GLint minColR = 0, maxColR = -1;
 873
 874    /* Our solution here is to find the darkest and brightest colors in
 875     * the 4x4 tile and use those as the two representative colors.
 876     * There are probably better algorithms to use (histogram-based).
 877     */
 878    minSum = 2000; /* big enough */
 879    maxSum = -1; /* small enough */
 880    for (k = 0; k < N_TEXELS / 2; k++) {
 881       if (!ISTBLACK(input[k])) {
 882          GLint sum = 0;
 883          for (i = 0; i < n_comp; i++) {
 884             sum += input[k][i];
 885          }
 886          if (minSum > sum) {
 887             minSum = sum;
 888             minColL = k;
 889          }
 890          if (maxSum < sum) {
 891             maxSum = sum;
 892             maxColL = k;
 893          }
 894       }
 895    }
 896    minSum = 2000; /* big enough */
 897    maxSum = -1; /* small enough */
 898    for (; k < N_TEXELS; k++) {
 899       if (!ISTBLACK(input[k])) {
 900          GLint sum = 0;
 901          for (i = 0; i < n_comp; i++) {
 902             sum += input[k][i];
 903          }
 904          if (minSum > sum) {
 905             minSum = sum;
 906             minColR = k;
 907          }
 908          if (maxSum < sum) {
 909             maxSum = sum;
 910             maxColR = k;
 911          }
 912       }
 913    }
 914
 915    /* left microtile */
 916    if (maxColL == -1) {
 917       /* all transparent black */
 918       cc[0] = ~0u;
 919       for (i = 0; i < n_comp; i++) {
 920          vec[0][i] = 0;
 921          vec[1][i] = 0;
 922       }
 923    } else {
 924       cc[0] = 0;
 925       for (i = 0; i < n_comp; i++) {
 926          vec[0][i] = input[minColL][i];
 927          vec[1][i] = input[maxColL][i];
 928       }
 929       if (minColL != maxColL) {
 930          /* compute interpolation vector */
 931          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 932
 933          /* add in texels */
 934          lolo = 0;
 935          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 936             GLint texel = n_vect + 1; /* transparent black */
 937             if (!ISTBLACK(input[k])) {
 938                /* interpolate color */
 939                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 940             }
 941             /* add in texel */
 942             lolo <<= 2;
 943             lolo |= texel;
 944          }
 945          cc[0] = lolo;
 946       }
 947    }
 948
 949    /* right microtile */
 950    if (maxColR == -1) {
 951       /* all transparent black */
 952       cc[1] = ~0u;
 953       for (i = 0; i < n_comp; i++) {
 954          vec[2][i] = 0;
 955          vec[3][i] = 0;
 956       }
 957    } else {
 958       cc[1] = 0;
 959       for (i = 0; i < n_comp; i++) {
 960          vec[2][i] = input[minColR][i];
 961          vec[3][i] = input[maxColR][i];
 962       }
 963       if (minColR != maxColR) {
 964          /* compute interpolation vector */
 965          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 966
 967          /* add in texels */
 968          lohi = 0;
 969          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 970             GLint texel = n_vect + 1; /* transparent black */
 971             if (!ISTBLACK(input[k])) {
 972                /* interpolate color */
 973                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 974             }
 975             /* add in texel */
 976             lohi <<= 2;
 977             lohi |= texel;
 978          }
 979          cc[1] = lohi;
 980       }
 981    }
 982
 983    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 984    for (j = 2 * 2 - 1; j >= 0; j--) {
 985       for (i = 0; i < n_comp; i++) {
 986          /* add in colors */
 987          FX64_SHL(hi, 5);
 988          FX64_OR32(hi, vec[j][i] >> 3);
 989       }
 990    }
 991    ((Fx64 *)cc)[1] = hi;
 992 }
 993
 994
 995 static void
 996 fxt1_quantize_MIXED0 (GLuint *cc,
 997                       GLubyte input[N_TEXELS][MAX_COMP])
 998 {
 999    const GLint n_vect = 3; /* highest vector number in each microtile */
1000    const GLint n_comp = 3; /* 3 components: R, G, B */
1001    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1002    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1003    GLint i, j, k;
1004    Fx64 hi; /* high quadword */
1005    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1006
1007    GLint minColL = 0, maxColL = 0;
1008    GLint minColR = 0, maxColR = 0;
1009 #if 0
1010    GLint minSum;
1011    GLint maxSum;
1012
1013    /* Our solution here is to find the darkest and brightest colors in
1014     * the 4x4 tile and use those as the two representative colors.
1015     * There are probably better algorithms to use (histogram-based).
1016     */
1017    minSum = 2000; /* big enough */
1018    maxSum = -1; /* small enough */
1019    for (k = 0; k < N_TEXELS / 2; k++) {
1020       GLint sum = 0;
1021       for (i = 0; i < n_comp; i++) {
1022          sum += input[k][i];
1023       }
1024       if (minSum > sum) {
1025          minSum = sum;
1026          minColL = k;
1027       }
1028       if (maxSum < sum) {
1029          maxSum = sum;
1030          maxColL = k;
1031       }
1032    }
1033    minSum = 2000; /* big enough */
1034    maxSum = -1; /* small enough */
1035    for (; k < N_TEXELS; k++) {
1036       GLint sum = 0;
1037       for (i = 0; i < n_comp; i++) {
1038          sum += input[k][i];
1039       }
1040       if (minSum > sum) {
1041          minSum = sum;
1042          minColR = k;
1043       }
1044       if (maxSum < sum) {
1045          maxSum = sum;
1046          maxColR = k;
1047       }
1048    }
1049 #else
1050    GLint minVal;
1051    GLint maxVal;
1052    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1053    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1054
1055    /* Scan the channel with max variance for lo & hi
1056     * and use those as the two representative colors.
1057     */
1058    minVal = 2000; /* big enough */
1059    maxVal = -1; /* small enough */
1060    for (k = 0; k < N_TEXELS / 2; k++) {
1061       GLint t = input[k][maxVarL];
1062       if (minVal > t) {
1063          minVal = t;
1064          minColL = k;
1065       }
1066       if (maxVal < t) {
1067          maxVal = t;
1068          maxColL = k;
1069       }
1070    }
1071    minVal = 2000; /* big enough */
1072    maxVal = -1; /* small enough */
1073    for (; k < N_TEXELS; k++) {
1074       GLint t = input[k][maxVarR];
1075       if (minVal > t) {
1076          minVal = t;
1077          minColR = k;
1078       }
1079       if (maxVal < t) {
1080          maxVal = t;
1081          maxColR = k;
1082       }
1083    }
1084 #endif
1085
1086    /* left microtile */
1087    cc[0] = 0;
1088    for (i = 0; i < n_comp; i++) {
1089       vec[0][i] = input[minColL][i];
1090       vec[1][i] = input[maxColL][i];
1091    }
1092    if (minColL != maxColL) {
1093       /* compute interpolation vector */
1094       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1095
1096       /* add in texels */
1097       lolo = 0;
1098       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1099          GLint texel;
1100          /* interpolate color */
1101          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1102          /* add in texel */
1103          lolo <<= 2;
1104          lolo |= texel;
1105       }
1106
1107       /* funky encoding for LSB of green */
1108       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1109          for (i = 0; i < n_comp; i++) {
1110             vec[1][i] = input[minColL][i];
1111             vec[0][i] = input[maxColL][i];
1112          }
1113          lolo = ~lolo;
1114       }
1115
1116       cc[0] = lolo;
1117    }
1118
1119    /* right microtile */
1120    cc[1] = 0;
1121    for (i = 0; i < n_comp; i++) {
1122       vec[2][i] = input[minColR][i];
1123       vec[3][i] = input[maxColR][i];
1124    }
1125    if (minColR != maxColR) {
1126       /* compute interpolation vector */
1127       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1128
1129       /* add in texels */
1130       lohi = 0;
1131       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1132          GLint texel;
1133          /* interpolate color */
1134          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1135          /* add in texel */
1136          lohi <<= 2;
1137          lohi |= texel;
1138       }
1139
1140       /* funky encoding for LSB of green */
1141       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1142          for (i = 0; i < n_comp; i++) {
1143             vec[3][i] = input[minColR][i];
1144             vec[2][i] = input[maxColR][i];
1145          }
1146          lohi = ~lohi;
1147       }
1148
1149       cc[1] = lohi;
1150    }
1151
1152    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1153    for (j = 2 * 2 - 1; j >= 0; j--) {
1154       for (i = 0; i < n_comp; i++) {
1155          /* add in colors */
1156          FX64_SHL(hi, 5);
1157          FX64_OR32(hi, vec[j][i] >> 3);
1158       }
1159    }
1160    ((Fx64 *)cc)[1] = hi;
1161 }
1162
1163
1164 static void
1165 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1166 {
1167    GLint trualpha;
1168    GLubyte reord[N_TEXELS][MAX_COMP];
1169
1170    GLubyte input[N_TEXELS][MAX_COMP];
1171    GLint i, k, l;
1172
1173    if (comps == 3) {
1174       /* make the whole block opaque */
1175       memset(input, -1, sizeof(input));
1176    }
1177
1178    /* 8 texels each line */
1179    for (l = 0; l < 4; l++) {
1180       for (k = 0; k < 4; k++) {
1181          for (i = 0; i < comps; i++) {
1182             input[k + l * 4][i] = *lines[l]++;
1183          }
1184       }
1185       for (; k < 8; k++) {
1186          for (i = 0; i < comps; i++) {
1187             input[k + l * 4 + 12][i] = *lines[l]++;
1188          }
1189       }
1190    }
1191
1192    /* block layout:
1193     * 00, 01, 02, 03, 08, 09, 0a, 0b
1194     * 10, 11, 12, 13, 18, 19, 1a, 1b
1195     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1196     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1197     */
1198
1199    /* [dBorca]
1200     * stupidity flows forth from this
1201     */
1202    l = N_TEXELS;
1203    trualpha = 0;
1204    if (comps == 4) {
1205       /* skip all transparent black texels */
1206       l = 0;
1207       for (k = 0; k < N_TEXELS; k++) {
1208          /* test all components against 0 */
1209          if (!ISTBLACK(input[k])) {
1210             /* texel is not transparent black */
1211             COPY_4UBV(reord[l], input[k]);
1212             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1213                /* non-opaque texel */
1214                trualpha = !0;
1215             }
1216             l++;
1217          }
1218       }
1219    }
1220
1221 #if 0
1222    if (trualpha) {
1223       fxt1_quantize_ALPHA0(cc, input, reord, l);
1224    } else if (l == 0) {
1225       cc[0] = cc[1] = cc[2] = -1;
1226       cc[3] = 0;
1227    } else if (l < N_TEXELS) {
1228       fxt1_quantize_HI(cc, input, reord, l);
1229    } else {
1230       fxt1_quantize_CHROMA(cc, input);
1231    }
1232    (void)fxt1_quantize_ALPHA1;
1233    (void)fxt1_quantize_MIXED1;
1234    (void)fxt1_quantize_MIXED0;
1235 #else
1236    if (trualpha) {
1237       fxt1_quantize_ALPHA1(cc, input);
1238    } else if (l == 0) {
1239       cc[0] = cc[1] = cc[2] = ~0u;
1240       cc[3] = 0;
1241    } else if (l < N_TEXELS) {
1242       fxt1_quantize_MIXED1(cc, input);
1243    } else {
1244       fxt1_quantize_MIXED0(cc, input);
1245    }
1246    (void)fxt1_quantize_ALPHA0;
1247    (void)fxt1_quantize_HI;
1248    (void)fxt1_quantize_CHROMA;
1249 #endif
1250 }
1251
1252
1253
1254 /**
1255  * Upscale an image by replication, not (typical) stretching.
1256  * We use this when the image width or height is less than a
1257  * certain size (4, 8) and we need to upscale an image.
1258  */
1259 static void
1260 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1261                    GLsizei outWidth, GLsizei outHeight,
1262                    GLint comps, const GLubyte *src, GLint srcRowStride,
1263                    GLubyte *dest )
1264 {
1265    GLint i, j, k;
1266
1267    ASSERT(outWidth >= inWidth);
1268    ASSERT(outHeight >= inHeight);
1269 #if 0
1270    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1271    ASSERT((outWidth & 3) == 0);
1272    ASSERT((outHeight & 3) == 0);
1273 #endif
1274
1275    for (i = 0; i < outHeight; i++) {
1276       const GLint ii = i % inHeight;
1277       for (j = 0; j < outWidth; j++) {
1278          const GLint jj = j % inWidth;
1279          for (k = 0; k < comps; k++) {
1280             dest[(i * outWidth + j) * comps + k]
1281                = src[ii * srcRowStride + jj * comps + k];
1282          }
1283       }
1284    }
1285 }
1286
1287
1288 static void
1289 fxt1_encode (GLuint width, GLuint height, GLint comps,
1290              const void *source, GLint srcRowStride,
1291              void *dest, GLint destRowStride)
1292 {
1293    GLuint x, y;
1294    const GLubyte *data;
1295    GLuint *encoded = (GLuint *)dest;
1296    void *newSource = NULL;
1297
1298    assert(comps == 3 || comps == 4);
1299
1300    /* Replicate image if width is not M8 or height is not M4 */
1301    if ((width & 7) | (height & 3)) {
1302       GLint newWidth = (width + 7) & ~7;
1303       GLint newHeight = (height + 3) & ~3;
1304       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1305       if (!newSource) {
1306          GET_CURRENT_CONTEXT(ctx);
1307          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1308          goto cleanUp;
1309       }
1310       upscale_teximage2d(width, height, newWidth, newHeight,
1311                          comps, (const GLubyte *) source,
1312                          srcRowStride, (GLubyte *) newSource);
1313       source = newSource;
1314       width = newWidth;
1315       height = newHeight;
1316       srcRowStride = comps * newWidth;
1317    }
1318
1319    data = (const GLubyte *) source;
1320    destRowStride = (destRowStride - width * 2) / 4;
1321    for (y = 0; y < height; y += 4) {
1322       GLuint offs = 0 + (y + 0) * srcRowStride;
1323       for (x = 0; x < width; x += 8) {
1324          const GLubyte *lines[4];
1325          lines[0] = &data[offs];
1326          lines[1] = lines[0] + srcRowStride;
1327          lines[2] = lines[1] + srcRowStride;
1328          lines[3] = lines[2] + srcRowStride;
1329          offs += 8 * comps;
1330          fxt1_quantize(encoded, lines, comps);
1331          /* 128 bits per 8x4 block */
1332          encoded += 4;
1333       }
1334       encoded += destRowStride;
1335    }
1336
1337  cleanUp:
1338    free(newSource);
1339 }
1340
1341
1342 /***************************************************************************\
1343  * FXT1 decoder
1344  *
1345  * The decoder is based on GL_3DFX_texture_compression_FXT1
1346  * specification and serves as a concept for the encoder.
1347 \***************************************************************************/
1348
1349
1350 /* lookup table for scaling 5 bit colors up to 8 bits */
1351 static const GLubyte _rgb_scale_5[] = {
1352    0,   8,   16,  25,  33,  41,  49,  58,
1353    66,  74,  82,  90,  99,  107, 115, 123,
1354    132, 140, 148, 156, 165, 173, 181, 189,
1355    197, 206, 214, 222, 230, 239, 247, 255
1356 };
1357
1358 /* lookup table for scaling 6 bit colors up to 8 bits */
1359 static const GLubyte _rgb_scale_6[] = {
1360    0,   4,   8,   12,  16,  20,  24,  28,
1361    32,  36,  40,  45,  49,  53,  57,  61,
1362    65,  69,  73,  77,  81,  85,  89,  93,
1363    97,  101, 105, 109, 113, 117, 121, 125,
1364    130, 134, 138, 142, 146, 150, 154, 158,
1365    162, 166, 170, 174, 178, 182, 186, 190,
1366    194, 198, 202, 206, 210, 215, 219, 223,
1367    227, 231, 235, 239, 243, 247, 251, 255
1368 };
1369
1370
1371 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1372 #define UP5(c) _rgb_scale_5[(c) & 31]
1373 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1374 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1375
1376
1377 static void
1378 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1379 {
1380    const GLuint *cc;
1381
1382    t *= 3;
1383    cc = (const GLuint *)(code + t / 8);
1384    t = (cc[0] >> (t & 7)) & 7;
1385
1386    if (t == 7) {
1387       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1388    } else {
1389       GLubyte r, g, b;
1390       cc = (const GLuint *)(code + 12);
1391       if (t == 0) {
1392          b = UP5(CC_SEL(cc, 0));
1393          g = UP5(CC_SEL(cc, 5));
1394          r = UP5(CC_SEL(cc, 10));
1395       } else if (t == 6) {
1396          b = UP5(CC_SEL(cc, 15));
1397          g = UP5(CC_SEL(cc, 20));
1398          r = UP5(CC_SEL(cc, 25));
1399       } else {
1400          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1401          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1402          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1403       }
1404       rgba[RCOMP] = r;
1405       rgba[GCOMP] = g;
1406       rgba[BCOMP] = b;
1407       rgba[ACOMP] = 255;
1408    }
1409 }
1410
1411
1412 static void
1413 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1414 {
1415    const GLuint *cc;
1416    GLuint kk;
1417
1418    cc = (const GLuint *)code;
1419    if (t & 16) {
1420       cc++;
1421       t &= 15;
1422    }
1423    t = (cc[0] >> (t * 2)) & 3;
1424
1425    t *= 15;
1426    cc = (const GLuint *)(code + 8 + t / 8);
1427    kk = cc[0] >> (t & 7);
1428    rgba[BCOMP] = UP5(kk);
1429    rgba[GCOMP] = UP5(kk >> 5);
1430    rgba[RCOMP] = UP5(kk >> 10);
1431    rgba[ACOMP] = 255;
1432 }
1433
1434
1435 static void
1436 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1437 {
1438    const GLuint *cc;
1439    GLuint col[2][3];
1440    GLint glsb, selb;
1441
1442    cc = (const GLuint *)code;
1443    if (t & 16) {
1444       t &= 15;
1445       t = (cc[1] >> (t * 2)) & 3;
1446       /* col 2 */
1447       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1448       col[0][GCOMP] = CC_SEL(cc, 99);
1449       col[0][RCOMP] = CC_SEL(cc, 104);
1450       /* col 3 */
1451       col[1][BCOMP] = CC_SEL(cc, 109);
1452       col[1][GCOMP] = CC_SEL(cc, 114);
1453       col[1][RCOMP] = CC_SEL(cc, 119);
1454       glsb = CC_SEL(cc, 126);
1455       selb = CC_SEL(cc, 33);
1456    } else {
1457       t = (cc[0] >> (t * 2)) & 3;
1458       /* col 0 */
1459       col[0][BCOMP] = CC_SEL(cc, 64);
1460       col[0][GCOMP] = CC_SEL(cc, 69);
1461       col[0][RCOMP] = CC_SEL(cc, 74);
1462       /* col 1 */
1463       col[1][BCOMP] = CC_SEL(cc, 79);
1464       col[1][GCOMP] = CC_SEL(cc, 84);
1465       col[1][RCOMP] = CC_SEL(cc, 89);
1466       glsb = CC_SEL(cc, 125);
1467       selb = CC_SEL(cc, 1);
1468    }
1469
1470    if (CC_SEL(cc, 124) & 1) {
1471       /* alpha[0] == 1 */
1472
1473       if (t == 3) {
1474          /* zero */
1475          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1476       } else {
1477          GLubyte r, g, b;
1478          if (t == 0) {
1479             b = UP5(col[0][BCOMP]);
1480             g = UP5(col[0][GCOMP]);
1481             r = UP5(col[0][RCOMP]);
1482          } else if (t == 2) {
1483             b = UP5(col[1][BCOMP]);
1484             g = UP6(col[1][GCOMP], glsb);
1485             r = UP5(col[1][RCOMP]);
1486          } else {
1487             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1488             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1489             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1490          }
1491          rgba[RCOMP] = r;
1492          rgba[GCOMP] = g;
1493          rgba[BCOMP] = b;
1494          rgba[ACOMP] = 255;
1495       }
1496    } else {
1497       /* alpha[0] == 0 */
1498       GLubyte r, g, b;
1499       if (t == 0) {
1500          b = UP5(col[0][BCOMP]);
1501          g = UP6(col[0][GCOMP], glsb ^ selb);
1502          r = UP5(col[0][RCOMP]);
1503       } else if (t == 3) {
1504          b = UP5(col[1][BCOMP]);
1505          g = UP6(col[1][GCOMP], glsb);
1506          r = UP5(col[1][RCOMP]);
1507       } else {
1508          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1509          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1510                         UP6(col[1][GCOMP], glsb));
1511          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1512       }
1513       rgba[RCOMP] = r;
1514       rgba[GCOMP] = g;
1515       rgba[BCOMP] = b;
1516       rgba[ACOMP] = 255;
1517    }
1518 }
1519
1520
1521 static void
1522 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1523 {
1524    const GLuint *cc;
1525    GLubyte r, g, b, a;
1526
1527    cc = (const GLuint *)code;
1528    if (CC_SEL(cc, 124) & 1) {
1529       /* lerp == 1 */
1530       GLuint col0[4];
1531
1532       if (t & 16) {
1533          t &= 15;
1534          t = (cc[1] >> (t * 2)) & 3;
1535          /* col 2 */
1536          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1537          col0[GCOMP] = CC_SEL(cc, 99);
1538          col0[RCOMP] = CC_SEL(cc, 104);
1539          col0[ACOMP] = CC_SEL(cc, 119);
1540       } else {
1541          t = (cc[0] >> (t * 2)) & 3;
1542          /* col 0 */
1543          col0[BCOMP] = CC_SEL(cc, 64);
1544          col0[GCOMP] = CC_SEL(cc, 69);
1545          col0[RCOMP] = CC_SEL(cc, 74);
1546          col0[ACOMP] = CC_SEL(cc, 109);
1547       }
1548
1549       if (t == 0) {
1550          b = UP5(col0[BCOMP]);
1551          g = UP5(col0[GCOMP]);
1552          r = UP5(col0[RCOMP]);
1553          a = UP5(col0[ACOMP]);
1554       } else if (t == 3) {
1555          b = UP5(CC_SEL(cc, 79));
1556          g = UP5(CC_SEL(cc, 84));
1557          r = UP5(CC_SEL(cc, 89));
1558          a = UP5(CC_SEL(cc, 114));
1559       } else {
1560          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1561          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1562          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1563          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1564       }
1565    } else {
1566       /* lerp == 0 */
1567
1568       if (t & 16) {
1569          cc++;
1570          t &= 15;
1571       }
1572       t = (cc[0] >> (t * 2)) & 3;
1573
1574       if (t == 3) {
1575          /* zero */
1576          r = g = b = a = 0;
1577       } else {
1578          GLuint kk;
1579          cc = (const GLuint *)code;
1580          a = UP5(cc[3] >> (t * 5 + 13));
1581          t *= 15;
1582          cc = (const GLuint *)(code + 8 + t / 8);
1583          kk = cc[0] >> (t & 7);
1584          b = UP5(kk);
1585          g = UP5(kk >> 5);
1586          r = UP5(kk >> 10);
1587       }
1588    }
1589    rgba[RCOMP] = r;
1590    rgba[GCOMP] = g;
1591    rgba[BCOMP] = b;
1592    rgba[ACOMP] = a;
1593 }
1594
1595
1596 static void
1597 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1598                GLint i, GLint j, GLubyte *rgba)
1599 {
1600    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1601       fxt1_decode_1HI,     /* cc-high   = "00?" */
1602       fxt1_decode_1HI,     /* cc-high   = "00?" */
1603       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1604       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1605       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1606       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1607       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1608       fxt1_decode_1MIXED   /* mixed     = "1??" */
1609    };
1610
1611    const GLubyte *code = (const GLubyte *)texture +
1612                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1613    GLint mode = CC_SEL(code, 125);
1614    GLint t = i & 7;
1615
1616    if (t & 4) {
1617       t += 12;
1618    }
1619    t += (j & 3) * 4;
1620
1621    decode_1[mode](code, t, rgba);
1622 }
1623
1624
1625
1626
1627 static void
1628 fetch_rgb_fxt1(const GLubyte *map,
1629                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1630 {
1631    GLubyte rgba[4];
1632    fxt1_decode_1(map, rowStride, i, j, rgba);
1633    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1634    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1635    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1636    texel[ACOMP] = 1.0F;
1637 }
1638
1639
1640 static void
1641 fetch_rgba_fxt1(const GLubyte *map,
1642                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1643 {
1644    GLubyte rgba[4];
1645    fxt1_decode_1(map, rowStride, i, j, rgba);
1646    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1647    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1648    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1649    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1650 }
1651
1652
1653 compressed_fetch_func
1654 _mesa_get_fxt_fetch_func(mesa_format format)
1655 {
1656    switch (format) {
1657    case MESA_FORMAT_RGB_FXT1:
1658       return fetch_rgb_fxt1;
1659    case MESA_FORMAT_RGBA_FXT1:
1660       return fetch_rgba_fxt1;
1661    default:
1662       return NULL;
1663    }
1664 }