src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "errors.h"
  33 #include "glheader.h"
  34 #include "imports.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mipmap.h"
  38 #include "texcompress.h"
  39 #include "texcompress_fxt1.h"
  40 #include "texstore.h"
  41 #include "mtypes.h"
  42
  43
  44 static void
  45 fxt1_encode (GLuint width, GLuint height, GLint comps,
  46              const void *source, GLint srcRowStride,
  47              void *dest, GLint destRowStride);
  48
  49 static void
  50 fxt1_decode_1 (const void *texture, GLint stride,
  51                GLint i, GLint j, GLubyte *rgba);
  52
  53
  54 /**
  55  * Store user's image in rgb_fxt1 format.
  56  */
  57 GLboolean
  58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  59 {
  60    const GLubyte *pixels;
  61    GLint srcRowStride;
  62    GLubyte *dst;
  63    const GLubyte *tempImage = NULL;
  64
  65    assert(dstFormat == MESA_FORMAT_RGB_FXT1);
  66
  67    if (srcFormat != GL_RGB ||
  68        srcType != GL_UNSIGNED_BYTE ||
  69        ctx->_ImageTransferState ||
  70        ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
  71        srcPacking->SwapBytes) {
  72       /* convert image to RGB/GLubyte */
  73       GLubyte *tempImageSlices[1];
  74       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
  75       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
  76       if (!tempImage)
  77          return GL_FALSE; /* out of memory */
  78       tempImageSlices[0] = (GLubyte *) tempImage;
  79       _mesa_texstore(ctx, dims,
  80                      baseInternalFormat,
  81                      MESA_FORMAT_RGB_UNORM8,
  82                      rgbRowStride, tempImageSlices,
  83                      srcWidth, srcHeight, srcDepth,
  84                      srcFormat, srcType, srcAddr,
  85                      srcPacking);
  86       pixels = tempImage;
  87       srcRowStride = 3 * srcWidth;
  88       srcFormat = GL_RGB;
  89    }
  90    else {
  91       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  92                                      srcFormat, srcType, 0, 0);
  93
  94       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  95                                             srcType) / sizeof(GLubyte);
  96    }
  97
  98    dst = dstSlices[0];
  99
 100    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 101                dst, dstRowStride);
 102
 103    free((void*) tempImage);
 104
 105    return GL_TRUE;
 106 }
 107
 108
 109 /**
 110  * Store user's image in rgba_fxt1 format.
 111  */
 112 GLboolean
 113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 114 {
 115    const GLubyte *pixels;
 116    GLint srcRowStride;
 117    GLubyte *dst;
 118    const GLubyte *tempImage = NULL;
 119
 120    assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
 121
 122    if (srcFormat != GL_RGBA ||
 123        srcType != GL_UNSIGNED_BYTE ||
 124        ctx->_ImageTransferState ||
 125        srcPacking->SwapBytes) {
 126       /* convert image to RGBA/GLubyte */
 127       GLubyte *tempImageSlices[1];
 128       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
 129       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
 130       if (!tempImage)
 131          return GL_FALSE; /* out of memory */
 132       tempImageSlices[0] = (GLubyte *) tempImage;
 133       _mesa_texstore(ctx, dims,
 134                      baseInternalFormat,
 135                      _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
 136                                            : MESA_FORMAT_A8B8G8R8_UNORM,
 137                      rgbaRowStride, tempImageSlices,
 138                      srcWidth, srcHeight, srcDepth,
 139                      srcFormat, srcType, srcAddr,
 140                      srcPacking);
 141       pixels = tempImage;
 142       srcRowStride = 4 * srcWidth;
 143       srcFormat = GL_RGBA;
 144    }
 145    else {
 146       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 147                                      srcFormat, srcType, 0, 0);
 148
 149       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 150                                             srcType) / sizeof(GLubyte);
 151    }
 152
 153    dst = dstSlices[0];
 154
 155    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 156                dst, dstRowStride);
 157
 158    free((void*) tempImage);
 159
 160    return GL_TRUE;
 161 }
 162
 163
 164 /***************************************************************************\
 165  * FXT1 encoder
 166  *
 167  * The encoder was built by reversing the decoder,
 168  * and is vaguely based on Texus2 by 3dfx. Note that this code
 169  * is merely a proof of concept, since it is highly UNoptimized;
 170  * moreover, it is sub-optimal due to initial conditions passed
 171  * to Lloyd's algorithm (the interpolation modes are even worse).
 172 \***************************************************************************/
 173
 174
 175 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 176 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 177 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 178 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 179 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 180 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 181 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 182 static const GLuint zero = 0;
 183 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
 184
 185 /*
 186  * Define a 64-bit unsigned integer type and macros
 187  */
 188 #if 1
 189
 190 #define FX64_NATIVE 1
 191
 192 typedef uint64_t Fx64;
 193
 194 #define FX64_MOV32(a, b) a = b
 195 #define FX64_OR32(a, b)  a |= b
 196 #define FX64_SHL(a, c)   a <<= c
 197
 198 #else
 199
 200 #define FX64_NATIVE 0
 201
 202 typedef struct {
 203    GLuint lo, hi;
 204 } Fx64;
 205
 206 #define FX64_MOV32(a, b) a.lo = b
 207 #define FX64_OR32(a, b)  a.lo |= b
 208
 209 #define FX64_SHL(a, c)                                 \
 210    do {                                                \
 211        if ((c) >= 32) {                                \
 212           a.hi = a.lo << ((c) - 32);                   \
 213           a.lo = 0;                                    \
 214        } else {                                        \
 215           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 216           a.lo <<= (c);                                \
 217        }                                               \
 218    } while (0)
 219
 220 #endif
 221
 222
 223 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 224 #define SAFECDOT 1 /* for paranoids */
 225
 226 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 227    do {                                  \
 228       /* compute interpolation vector */ \
 229       GLfloat d2 = 0.0F;                 \
 230       GLfloat rd2;                       \
 231                                          \
 232       for (i = 0; i < NC; i++) {         \
 233          IV[i] = (V1[i] - V0[i]) * F(i); \
 234          d2 += IV[i] * IV[i];            \
 235       }                                  \
 236       rd2 = (GLfloat)NV / d2;            \
 237       B = 0;                             \
 238       for (i = 0; i < NC; i++) {         \
 239          IV[i] *= F(i);                  \
 240          B -= IV[i] * V0[i];             \
 241          IV[i] *= rd2;                   \
 242       }                                  \
 243       B = B * rd2 + 0.5f;                \
 244    } while (0)
 245
 246 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 247    do {                                  \
 248       GLfloat dot = 0.0F;                \
 249       for (i = 0; i < NC; i++) {         \
 250          dot += V[i] * IV[i];            \
 251       }                                  \
 252       TEXEL = (GLint)(dot + B);          \
 253       if (SAFECDOT) {                    \
 254          if (TEXEL < 0) {                \
 255             TEXEL = 0;                   \
 256          } else if (TEXEL > NV) {        \
 257             TEXEL = NV;                  \
 258          }                               \
 259       }                                  \
 260    } while (0)
 261
 262
 263 static GLint
 264 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 265               GLubyte input[MAX_COMP], GLint nc)
 266 {
 267    GLint i, j, best = -1;
 268    GLfloat err = 1e9; /* big enough */
 269
 270    for (j = 0; j < nv; j++) {
 271       GLfloat e = 0.0F;
 272       for (i = 0; i < nc; i++) {
 273          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 274       }
 275       if (e < err) {
 276          err = e;
 277          best = j;
 278       }
 279    }
 280
 281    return best;
 282 }
 283
 284
 285 static GLint
 286 fxt1_worst (GLfloat vec[MAX_COMP],
 287             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 288 {
 289    GLint i, k, worst = -1;
 290    GLfloat err = -1.0F; /* small enough */
 291
 292    for (k = 0; k < n; k++) {
 293       GLfloat e = 0.0F;
 294       for (i = 0; i < nc; i++) {
 295          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 296       }
 297       if (e > err) {
 298          err = e;
 299          worst = k;
 300       }
 301    }
 302
 303    return worst;
 304 }
 305
 306
 307 static GLint
 308 fxt1_variance (GLdouble variance[MAX_COMP],
 309                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 310 {
 311    GLint i, k, best = 0;
 312    GLint sx, sx2;
 313    GLdouble var, maxvar = -1; /* small enough */
 314    GLdouble teenth = 1.0 / n;
 315
 316    for (i = 0; i < nc; i++) {
 317       sx = sx2 = 0;
 318       for (k = 0; k < n; k++) {
 319          GLint t = input[k][i];
 320          sx += t;
 321          sx2 += t * t;
 322       }
 323       var = sx2 * teenth - sx * sx * teenth * teenth;
 324       if (maxvar < var) {
 325          maxvar = var;
 326          best = i;
 327       }
 328       if (variance) {
 329          variance[i] = var;
 330       }
 331    }
 332
 333    return best;
 334 }
 335
 336
 337 static GLint
 338 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 339              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 340 {
 341 #if 0
 342    /* Choose colors from a grid.
 343     */
 344    GLint i, j;
 345
 346    for (j = 0; j < nv; j++) {
 347       GLint m = j * (n - 1) / (nv - 1);
 348       for (i = 0; i < nc; i++) {
 349          vec[j][i] = input[m][i];
 350       }
 351    }
 352 #else
 353    /* Our solution here is to find the darkest and brightest colors in
 354     * the 8x4 tile and use those as the two representative colors.
 355     * There are probably better algorithms to use (histogram-based).
 356     */
 357    GLint i, j, k;
 358    GLint minSum = 2000; /* big enough */
 359    GLint maxSum = -1; /* small enough */
 360    GLint minCol = 0; /* phoudoin: silent compiler! */
 361    GLint maxCol = 0; /* phoudoin: silent compiler! */
 362
 363    struct {
 364       GLint flag;
 365       GLint key;
 366       GLint freq;
 367       GLint idx;
 368    } hist[N_TEXELS];
 369    GLint lenh = 0;
 370
 371    memset(hist, 0, sizeof(hist));
 372
 373    for (k = 0; k < n; k++) {
 374       GLint l;
 375       GLint key = 0;
 376       GLint sum = 0;
 377       for (i = 0; i < nc; i++) {
 378          key <<= 8;
 379          key |= input[k][i];
 380          sum += input[k][i];
 381       }
 382       for (l = 0; l < n; l++) {
 383          if (!hist[l].flag) {
 384             /* alloc new slot */
 385             hist[l].flag = !0;
 386             hist[l].key = key;
 387             hist[l].freq = 1;
 388             hist[l].idx = k;
 389             lenh = l + 1;
 390             break;
 391          } else if (hist[l].key == key) {
 392             hist[l].freq++;
 393             break;
 394          }
 395       }
 396       if (minSum > sum) {
 397          minSum = sum;
 398          minCol = k;
 399       }
 400       if (maxSum < sum) {
 401          maxSum = sum;
 402          maxCol = k;
 403       }
 404    }
 405
 406    if (lenh <= nv) {
 407       for (j = 0; j < lenh; j++) {
 408          for (i = 0; i < nc; i++) {
 409             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 410          }
 411       }
 412       for (; j < nv; j++) {
 413          for (i = 0; i < nc; i++) {
 414             vec[j][i] = vec[0][i];
 415          }
 416       }
 417       return 0;
 418    }
 419
 420    for (j = 0; j < nv; j++) {
 421       for (i = 0; i < nc; i++) {
 422          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 423       }
 424    }
 425 #endif
 426
 427    return !0;
 428 }
 429
 430
 431 static GLint
 432 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 433             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 434 {
 435    /* Use the generalized lloyd's algorithm for VQ:
 436     *     find 4 color vectors.
 437     *
 438     *     for each sample color
 439     *         sort to nearest vector.
 440     *
 441     *     replace each vector with the centroid of its matching colors.
 442     *
 443     *     repeat until RMS doesn't improve.
 444     *
 445     *     if a color vector has no samples, or becomes the same as another
 446     *     vector, replace it with the color which is farthest from a sample.
 447     *
 448     * vec[][MAX_COMP]           initial vectors and resulting colors
 449     * nv                        number of resulting colors required
 450     * input[N_TEXELS][MAX_COMP] input texels
 451     * nc                        number of components in input / vec
 452     * n                         number of input samples
 453     */
 454
 455    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 456    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 457    GLfloat error, lasterror = 1e9;
 458
 459    GLint i, j, k, rep;
 460
 461    /* the quantizer */
 462    for (rep = 0; rep < LL_N_REP; rep++) {
 463       /* reset sums & counters */
 464       for (j = 0; j < nv; j++) {
 465          for (i = 0; i < nc; i++) {
 466             sum[j][i] = 0;
 467          }
 468          cnt[j] = 0;
 469       }
 470       error = 0;
 471
 472       /* scan whole block */
 473       for (k = 0; k < n; k++) {
 474 #if 1
 475          GLint best = -1;
 476          GLfloat err = 1e9; /* big enough */
 477          /* determine best vector */
 478          for (j = 0; j < nv; j++) {
 479             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 480                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 481                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 482             if (nc == 4) {
 483                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 484             }
 485             if (e < err) {
 486                err = e;
 487                best = j;
 488             }
 489          }
 490 #else
 491          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 492 #endif
 493          assert(best >= 0);
 494          /* add in closest color */
 495          for (i = 0; i < nc; i++) {
 496             sum[best][i] += input[k][i];
 497          }
 498          /* mark this vector as used */
 499          cnt[best]++;
 500          /* accumulate error */
 501          error += err;
 502       }
 503
 504       /* check RMS */
 505       if ((error < LL_RMS_E) ||
 506           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 507          return !0; /* good match */
 508       }
 509       lasterror = error;
 510
 511       /* move each vector to the barycenter of its closest colors */
 512       for (j = 0; j < nv; j++) {
 513          if (cnt[j]) {
 514             GLfloat div = 1.0F / cnt[j];
 515             for (i = 0; i < nc; i++) {
 516                vec[j][i] = div * sum[j][i];
 517             }
 518          } else {
 519             /* this vec has no samples or is identical with a previous vec */
 520             GLint worst = fxt1_worst(vec[j], input, nc, n);
 521             for (i = 0; i < nc; i++) {
 522                vec[j][i] = input[worst][i];
 523             }
 524          }
 525       }
 526    }
 527
 528    return 0; /* could not converge fast enough */
 529 }
 530
 531
 532 static void
 533 fxt1_quantize_CHROMA (GLuint *cc,
 534                       GLubyte input[N_TEXELS][MAX_COMP])
 535 {
 536    const GLint n_vect = 4; /* 4 base vectors to find */
 537    const GLint n_comp = 3; /* 3 components: R, G, B */
 538    GLfloat vec[MAX_VECT][MAX_COMP];
 539    GLint i, j, k;
 540    Fx64 hi; /* high quadword */
 541    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 542
 543    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 544       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 545    }
 546
 547    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 548    for (j = n_vect - 1; j >= 0; j--) {
 549       for (i = 0; i < n_comp; i++) {
 550          /* add in colors */
 551          FX64_SHL(hi, 5);
 552          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 553       }
 554    }
 555    ((Fx64 *)cc)[1] = hi;
 556
 557    lohi = lolo = 0;
 558    /* right microtile */
 559    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 560       lohi <<= 2;
 561       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 562    }
 563    /* left microtile */
 564    for (; k >= 0; k--) {
 565       lolo <<= 2;
 566       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 567    }
 568    cc[1] = lohi;
 569    cc[0] = lolo;
 570 }
 571
 572
 573 static void
 574 fxt1_quantize_ALPHA0 (GLuint *cc,
 575                       GLubyte input[N_TEXELS][MAX_COMP],
 576                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 577 {
 578    const GLint n_vect = 3; /* 3 base vectors to find */
 579    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 580    GLfloat vec[MAX_VECT][MAX_COMP];
 581    GLint i, j, k;
 582    Fx64 hi; /* high quadword */
 583    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 584
 585    /* the last vector indicates zero */
 586    for (i = 0; i < n_comp; i++) {
 587       vec[n_vect][i] = 0;
 588    }
 589
 590    /* the first n texels in reord are guaranteed to be non-zero */
 591    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 592       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 593    }
 594
 595    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 596    for (j = n_vect - 1; j >= 0; j--) {
 597       /* add in alphas */
 598       FX64_SHL(hi, 5);
 599       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 600    }
 601    for (j = n_vect - 1; j >= 0; j--) {
 602       for (i = 0; i < n_comp - 1; i++) {
 603          /* add in colors */
 604          FX64_SHL(hi, 5);
 605          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 606       }
 607    }
 608    ((Fx64 *)cc)[1] = hi;
 609
 610    lohi = lolo = 0;
 611    /* right microtile */
 612    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 613       lohi <<= 2;
 614       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 615    }
 616    /* left microtile */
 617    for (; k >= 0; k--) {
 618       lolo <<= 2;
 619       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 620    }
 621    cc[1] = lohi;
 622    cc[0] = lolo;
 623 }
 624
 625
 626 static void
 627 fxt1_quantize_ALPHA1 (GLuint *cc,
 628                       GLubyte input[N_TEXELS][MAX_COMP])
 629 {
 630    const GLint n_vect = 3; /* highest vector number in each microtile */
 631    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 632    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 633    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 634    GLint i, j, k;
 635    Fx64 hi; /* high quadword */
 636    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 637
 638    GLint minSum;
 639    GLint maxSum;
 640    GLint minColL = 0, maxColL = 0;
 641    GLint minColR = 0, maxColR = 0;
 642    GLint sumL = 0, sumR = 0;
 643    GLint nn_comp;
 644    /* Our solution here is to find the darkest and brightest colors in
 645     * the 4x4 tile and use those as the two representative colors.
 646     * There are probably better algorithms to use (histogram-based).
 647     */
 648    nn_comp = n_comp;
 649    while ((minColL == maxColL) && nn_comp) {
 650        minSum = 2000; /* big enough */
 651        maxSum = -1; /* small enough */
 652        for (k = 0; k < N_TEXELS / 2; k++) {
 653            GLint sum = 0;
 654            for (i = 0; i < nn_comp; i++) {
 655                sum += input[k][i];
 656            }
 657            if (minSum > sum) {
 658                minSum = sum;
 659                minColL = k;
 660            }
 661            if (maxSum < sum) {
 662                maxSum = sum;
 663                maxColL = k;
 664            }
 665            sumL += sum;
 666        }
 667
 668        nn_comp--;
 669    }
 670
 671    nn_comp = n_comp;
 672    while ((minColR == maxColR) && nn_comp) {
 673        minSum = 2000; /* big enough */
 674        maxSum = -1; /* small enough */
 675        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 676            GLint sum = 0;
 677            for (i = 0; i < nn_comp; i++) {
 678                sum += input[k][i];
 679            }
 680            if (minSum > sum) {
 681                minSum = sum;
 682                minColR = k;
 683            }
 684            if (maxSum < sum) {
 685                maxSum = sum;
 686                maxColR = k;
 687            }
 688            sumR += sum;
 689        }
 690
 691        nn_comp--;
 692    }
 693
 694    /* choose the common vector (yuck!) */
 695    {
 696       GLint j1, j2;
 697       GLint v1 = 0, v2 = 0;
 698       GLfloat err = 1e9; /* big enough */
 699       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 700       for (i = 0; i < n_comp; i++) {
 701          tv[0][i] = input[minColL][i];
 702          tv[1][i] = input[maxColL][i];
 703          tv[2][i] = input[minColR][i];
 704          tv[3][i] = input[maxColR][i];
 705       }
 706       for (j1 = 0; j1 < 2; j1++) {
 707          for (j2 = 2; j2 < 4; j2++) {
 708             GLfloat e = 0.0F;
 709             for (i = 0; i < n_comp; i++) {
 710                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 711             }
 712             if (e < err) {
 713                err = e;
 714                v1 = j1;
 715                v2 = j2;
 716             }
 717          }
 718       }
 719       for (i = 0; i < n_comp; i++) {
 720          vec[0][i] = tv[1 - v1][i];
 721          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 722          vec[2][i] = tv[5 - v2][i];
 723       }
 724    }
 725
 726    /* left microtile */
 727    cc[0] = 0;
 728    if (minColL != maxColL) {
 729       /* compute interpolation vector */
 730       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 731
 732       /* add in texels */
 733       lolo = 0;
 734       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 735          GLint texel;
 736          /* interpolate color */
 737          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 738          /* add in texel */
 739          lolo <<= 2;
 740          lolo |= texel;
 741       }
 742
 743       cc[0] = lolo;
 744    }
 745
 746    /* right microtile */
 747    cc[1] = 0;
 748    if (minColR != maxColR) {
 749       /* compute interpolation vector */
 750       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 751
 752       /* add in texels */
 753       lohi = 0;
 754       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 755          GLint texel;
 756          /* interpolate color */
 757          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 758          /* add in texel */
 759          lohi <<= 2;
 760          lohi |= texel;
 761       }
 762
 763       cc[1] = lohi;
 764    }
 765
 766    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 767    for (j = n_vect - 1; j >= 0; j--) {
 768       /* add in alphas */
 769       FX64_SHL(hi, 5);
 770       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 771    }
 772    for (j = n_vect - 1; j >= 0; j--) {
 773       for (i = 0; i < n_comp - 1; i++) {
 774          /* add in colors */
 775          FX64_SHL(hi, 5);
 776          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 777       }
 778    }
 779    ((Fx64 *)cc)[1] = hi;
 780 }
 781
 782
 783 static void
 784 fxt1_quantize_HI (GLuint *cc,
 785                   GLubyte input[N_TEXELS][MAX_COMP],
 786                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 787 {
 788    const GLint n_vect = 6; /* highest vector number */
 789    const GLint n_comp = 3; /* 3 components: R, G, B */
 790    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 791    GLfloat iv[MAX_COMP];   /* interpolation vector */
 792    GLint i, k;
 793    GLuint hihi; /* high quadword: hi dword */
 794
 795    GLint minSum = 2000; /* big enough */
 796    GLint maxSum = -1; /* small enough */
 797    GLint minCol = 0; /* phoudoin: silent compiler! */
 798    GLint maxCol = 0; /* phoudoin: silent compiler! */
 799
 800    /* Our solution here is to find the darkest and brightest colors in
 801     * the 8x4 tile and use those as the two representative colors.
 802     * There are probably better algorithms to use (histogram-based).
 803     */
 804    for (k = 0; k < n; k++) {
 805       GLint sum = 0;
 806       for (i = 0; i < n_comp; i++) {
 807          sum += reord[k][i];
 808       }
 809       if (minSum > sum) {
 810          minSum = sum;
 811          minCol = k;
 812       }
 813       if (maxSum < sum) {
 814          maxSum = sum;
 815          maxCol = k;
 816       }
 817    }
 818
 819    hihi = 0; /* cc-hi = "00" */
 820    for (i = 0; i < n_comp; i++) {
 821       /* add in colors */
 822       hihi <<= 5;
 823       hihi |= reord[maxCol][i] >> 3;
 824    }
 825    for (i = 0; i < n_comp; i++) {
 826       /* add in colors */
 827       hihi <<= 5;
 828       hihi |= reord[minCol][i] >> 3;
 829    }
 830    cc[3] = hihi;
 831    cc[0] = cc[1] = cc[2] = 0;
 832
 833    /* compute interpolation vector */
 834    if (minCol != maxCol) {
 835       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 836    }
 837
 838    /* add in texels */
 839    for (k = N_TEXELS - 1; k >= 0; k--) {
 840       GLint t = k * 3;
 841       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 842       GLint texel = n_vect + 1; /* transparent black */
 843
 844       if (!ISTBLACK(input[k])) {
 845          if (minCol != maxCol) {
 846             /* interpolate color */
 847             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 848             /* add in texel */
 849             kk[0] |= texel << (t & 7);
 850          }
 851       } else {
 852          /* add in texel */
 853          kk[0] |= texel << (t & 7);
 854       }
 855    }
 856 }
 857
 858
 859 static void
 860 fxt1_quantize_MIXED1 (GLuint *cc,
 861                       GLubyte input[N_TEXELS][MAX_COMP])
 862 {
 863    const GLint n_vect = 2; /* highest vector number in each microtile */
 864    const GLint n_comp = 3; /* 3 components: R, G, B */
 865    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 866    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 867    GLint i, j, k;
 868    Fx64 hi; /* high quadword */
 869    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 870
 871    GLint minSum;
 872    GLint maxSum;
 873    GLint minColL = 0, maxColL = -1;
 874    GLint minColR = 0, maxColR = -1;
 875
 876    /* Our solution here is to find the darkest and brightest colors in
 877     * the 4x4 tile and use those as the two representative colors.
 878     * There are probably better algorithms to use (histogram-based).
 879     */
 880    minSum = 2000; /* big enough */
 881    maxSum = -1; /* small enough */
 882    for (k = 0; k < N_TEXELS / 2; k++) {
 883       if (!ISTBLACK(input[k])) {
 884          GLint sum = 0;
 885          for (i = 0; i < n_comp; i++) {
 886             sum += input[k][i];
 887          }
 888          if (minSum > sum) {
 889             minSum = sum;
 890             minColL = k;
 891          }
 892          if (maxSum < sum) {
 893             maxSum = sum;
 894             maxColL = k;
 895          }
 896       }
 897    }
 898    minSum = 2000; /* big enough */
 899    maxSum = -1; /* small enough */
 900    for (; k < N_TEXELS; k++) {
 901       if (!ISTBLACK(input[k])) {
 902          GLint sum = 0;
 903          for (i = 0; i < n_comp; i++) {
 904             sum += input[k][i];
 905          }
 906          if (minSum > sum) {
 907             minSum = sum;
 908             minColR = k;
 909          }
 910          if (maxSum < sum) {
 911             maxSum = sum;
 912             maxColR = k;
 913          }
 914       }
 915    }
 916
 917    /* left microtile */
 918    if (maxColL == -1) {
 919       /* all transparent black */
 920       cc[0] = ~0u;
 921       for (i = 0; i < n_comp; i++) {
 922          vec[0][i] = 0;
 923          vec[1][i] = 0;
 924       }
 925    } else {
 926       cc[0] = 0;
 927       for (i = 0; i < n_comp; i++) {
 928          vec[0][i] = input[minColL][i];
 929          vec[1][i] = input[maxColL][i];
 930       }
 931       if (minColL != maxColL) {
 932          /* compute interpolation vector */
 933          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 934
 935          /* add in texels */
 936          lolo = 0;
 937          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 938             GLint texel = n_vect + 1; /* transparent black */
 939             if (!ISTBLACK(input[k])) {
 940                /* interpolate color */
 941                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 942             }
 943             /* add in texel */
 944             lolo <<= 2;
 945             lolo |= texel;
 946          }
 947          cc[0] = lolo;
 948       }
 949    }
 950
 951    /* right microtile */
 952    if (maxColR == -1) {
 953       /* all transparent black */
 954       cc[1] = ~0u;
 955       for (i = 0; i < n_comp; i++) {
 956          vec[2][i] = 0;
 957          vec[3][i] = 0;
 958       }
 959    } else {
 960       cc[1] = 0;
 961       for (i = 0; i < n_comp; i++) {
 962          vec[2][i] = input[minColR][i];
 963          vec[3][i] = input[maxColR][i];
 964       }
 965       if (minColR != maxColR) {
 966          /* compute interpolation vector */
 967          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 968
 969          /* add in texels */
 970          lohi = 0;
 971          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 972             GLint texel = n_vect + 1; /* transparent black */
 973             if (!ISTBLACK(input[k])) {
 974                /* interpolate color */
 975                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 976             }
 977             /* add in texel */
 978             lohi <<= 2;
 979             lohi |= texel;
 980          }
 981          cc[1] = lohi;
 982       }
 983    }
 984
 985    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 986    for (j = 2 * 2 - 1; j >= 0; j--) {
 987       for (i = 0; i < n_comp; i++) {
 988          /* add in colors */
 989          FX64_SHL(hi, 5);
 990          FX64_OR32(hi, vec[j][i] >> 3);
 991       }
 992    }
 993    ((Fx64 *)cc)[1] = hi;
 994 }
 995
 996
 997 static void
 998 fxt1_quantize_MIXED0 (GLuint *cc,
 999                       GLubyte input[N_TEXELS][MAX_COMP])
1000 {
1001    const GLint n_vect = 3; /* highest vector number in each microtile */
1002    const GLint n_comp = 3; /* 3 components: R, G, B */
1003    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1004    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1005    GLint i, j, k;
1006    Fx64 hi; /* high quadword */
1007    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1008
1009    GLint minColL = 0, maxColL = 0;
1010    GLint minColR = 0, maxColR = 0;
1011 #if 0
1012    GLint minSum;
1013    GLint maxSum;
1014
1015    /* Our solution here is to find the darkest and brightest colors in
1016     * the 4x4 tile and use those as the two representative colors.
1017     * There are probably better algorithms to use (histogram-based).
1018     */
1019    minSum = 2000; /* big enough */
1020    maxSum = -1; /* small enough */
1021    for (k = 0; k < N_TEXELS / 2; k++) {
1022       GLint sum = 0;
1023       for (i = 0; i < n_comp; i++) {
1024          sum += input[k][i];
1025       }
1026       if (minSum > sum) {
1027          minSum = sum;
1028          minColL = k;
1029       }
1030       if (maxSum < sum) {
1031          maxSum = sum;
1032          maxColL = k;
1033       }
1034    }
1035    minSum = 2000; /* big enough */
1036    maxSum = -1; /* small enough */
1037    for (; k < N_TEXELS; k++) {
1038       GLint sum = 0;
1039       for (i = 0; i < n_comp; i++) {
1040          sum += input[k][i];
1041       }
1042       if (minSum > sum) {
1043          minSum = sum;
1044          minColR = k;
1045       }
1046       if (maxSum < sum) {
1047          maxSum = sum;
1048          maxColR = k;
1049       }
1050    }
1051 #else
1052    GLint minVal;
1053    GLint maxVal;
1054    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1055    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1056
1057    /* Scan the channel with max variance for lo & hi
1058     * and use those as the two representative colors.
1059     */
1060    minVal = 2000; /* big enough */
1061    maxVal = -1; /* small enough */
1062    for (k = 0; k < N_TEXELS / 2; k++) {
1063       GLint t = input[k][maxVarL];
1064       if (minVal > t) {
1065          minVal = t;
1066          minColL = k;
1067       }
1068       if (maxVal < t) {
1069          maxVal = t;
1070          maxColL = k;
1071       }
1072    }
1073    minVal = 2000; /* big enough */
1074    maxVal = -1; /* small enough */
1075    for (; k < N_TEXELS; k++) {
1076       GLint t = input[k][maxVarR];
1077       if (minVal > t) {
1078          minVal = t;
1079          minColR = k;
1080       }
1081       if (maxVal < t) {
1082          maxVal = t;
1083          maxColR = k;
1084       }
1085    }
1086 #endif
1087
1088    /* left microtile */
1089    cc[0] = 0;
1090    for (i = 0; i < n_comp; i++) {
1091       vec[0][i] = input[minColL][i];
1092       vec[1][i] = input[maxColL][i];
1093    }
1094    if (minColL != maxColL) {
1095       /* compute interpolation vector */
1096       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1097
1098       /* add in texels */
1099       lolo = 0;
1100       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1101          GLint texel;
1102          /* interpolate color */
1103          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1104          /* add in texel */
1105          lolo <<= 2;
1106          lolo |= texel;
1107       }
1108
1109       /* funky encoding for LSB of green */
1110       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1111          for (i = 0; i < n_comp; i++) {
1112             vec[1][i] = input[minColL][i];
1113             vec[0][i] = input[maxColL][i];
1114          }
1115          lolo = ~lolo;
1116       }
1117
1118       cc[0] = lolo;
1119    }
1120
1121    /* right microtile */
1122    cc[1] = 0;
1123    for (i = 0; i < n_comp; i++) {
1124       vec[2][i] = input[minColR][i];
1125       vec[3][i] = input[maxColR][i];
1126    }
1127    if (minColR != maxColR) {
1128       /* compute interpolation vector */
1129       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1130
1131       /* add in texels */
1132       lohi = 0;
1133       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1134          GLint texel;
1135          /* interpolate color */
1136          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1137          /* add in texel */
1138          lohi <<= 2;
1139          lohi |= texel;
1140       }
1141
1142       /* funky encoding for LSB of green */
1143       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1144          for (i = 0; i < n_comp; i++) {
1145             vec[3][i] = input[minColR][i];
1146             vec[2][i] = input[maxColR][i];
1147          }
1148          lohi = ~lohi;
1149       }
1150
1151       cc[1] = lohi;
1152    }
1153
1154    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1155    for (j = 2 * 2 - 1; j >= 0; j--) {
1156       for (i = 0; i < n_comp; i++) {
1157          /* add in colors */
1158          FX64_SHL(hi, 5);
1159          FX64_OR32(hi, vec[j][i] >> 3);
1160       }
1161    }
1162    ((Fx64 *)cc)[1] = hi;
1163 }
1164
1165
1166 static void
1167 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1168 {
1169    GLint trualpha;
1170    GLubyte reord[N_TEXELS][MAX_COMP];
1171
1172    GLubyte input[N_TEXELS][MAX_COMP];
1173    GLint i, k, l;
1174
1175    if (comps == 3) {
1176       /* make the whole block opaque */
1177       memset(input, -1, sizeof(input));
1178    }
1179
1180    /* 8 texels each line */
1181    for (l = 0; l < 4; l++) {
1182       for (k = 0; k < 4; k++) {
1183          for (i = 0; i < comps; i++) {
1184             input[k + l * 4][i] = *lines[l]++;
1185          }
1186       }
1187       for (; k < 8; k++) {
1188          for (i = 0; i < comps; i++) {
1189             input[k + l * 4 + 12][i] = *lines[l]++;
1190          }
1191       }
1192    }
1193
1194    /* block layout:
1195     * 00, 01, 02, 03, 08, 09, 0a, 0b
1196     * 10, 11, 12, 13, 18, 19, 1a, 1b
1197     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1198     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1199     */
1200
1201    /* [dBorca]
1202     * stupidity flows forth from this
1203     */
1204    l = N_TEXELS;
1205    trualpha = 0;
1206    if (comps == 4) {
1207       /* skip all transparent black texels */
1208       l = 0;
1209       for (k = 0; k < N_TEXELS; k++) {
1210          /* test all components against 0 */
1211          if (!ISTBLACK(input[k])) {
1212             /* texel is not transparent black */
1213             COPY_4UBV(reord[l], input[k]);
1214             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1215                /* non-opaque texel */
1216                trualpha = !0;
1217             }
1218             l++;
1219          }
1220       }
1221    }
1222
1223 #if 0
1224    if (trualpha) {
1225       fxt1_quantize_ALPHA0(cc, input, reord, l);
1226    } else if (l == 0) {
1227       cc[0] = cc[1] = cc[2] = -1;
1228       cc[3] = 0;
1229    } else if (l < N_TEXELS) {
1230       fxt1_quantize_HI(cc, input, reord, l);
1231    } else {
1232       fxt1_quantize_CHROMA(cc, input);
1233    }
1234    (void)fxt1_quantize_ALPHA1;
1235    (void)fxt1_quantize_MIXED1;
1236    (void)fxt1_quantize_MIXED0;
1237 #else
1238    if (trualpha) {
1239       fxt1_quantize_ALPHA1(cc, input);
1240    } else if (l == 0) {
1241       cc[0] = cc[1] = cc[2] = ~0u;
1242       cc[3] = 0;
1243    } else if (l < N_TEXELS) {
1244       fxt1_quantize_MIXED1(cc, input);
1245    } else {
1246       fxt1_quantize_MIXED0(cc, input);
1247    }
1248    (void)fxt1_quantize_ALPHA0;
1249    (void)fxt1_quantize_HI;
1250    (void)fxt1_quantize_CHROMA;
1251 #endif
1252 }
1253
1254
1255
1256 /**
1257  * Upscale an image by replication, not (typical) stretching.
1258  * We use this when the image width or height is less than a
1259  * certain size (4, 8) and we need to upscale an image.
1260  */
1261 static void
1262 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1263                    GLsizei outWidth, GLsizei outHeight,
1264                    GLint comps, const GLubyte *src, GLint srcRowStride,
1265                    GLubyte *dest )
1266 {
1267    GLint i, j, k;
1268
1269    assert(outWidth >= inWidth);
1270    assert(outHeight >= inHeight);
1271 #if 0
1272    assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1273    assert((outWidth & 3) == 0);
1274    assert((outHeight & 3) == 0);
1275 #endif
1276
1277    for (i = 0; i < outHeight; i++) {
1278       const GLint ii = i % inHeight;
1279       for (j = 0; j < outWidth; j++) {
1280          const GLint jj = j % inWidth;
1281          for (k = 0; k < comps; k++) {
1282             dest[(i * outWidth + j) * comps + k]
1283                = src[ii * srcRowStride + jj * comps + k];
1284          }
1285       }
1286    }
1287 }
1288
1289
1290 static void
1291 fxt1_encode (GLuint width, GLuint height, GLint comps,
1292              const void *source, GLint srcRowStride,
1293              void *dest, GLint destRowStride)
1294 {
1295    GLuint x, y;
1296    const GLubyte *data;
1297    GLuint *encoded = (GLuint *)dest;
1298    void *newSource = NULL;
1299
1300    assert(comps == 3 || comps == 4);
1301
1302    /* Replicate image if width is not M8 or height is not M4 */
1303    if ((width & 7) | (height & 3)) {
1304       GLint newWidth = (width + 7) & ~7;
1305       GLint newHeight = (height + 3) & ~3;
1306       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1307       if (!newSource) {
1308          GET_CURRENT_CONTEXT(ctx);
1309          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1310          goto cleanUp;
1311       }
1312       upscale_teximage2d(width, height, newWidth, newHeight,
1313                          comps, (const GLubyte *) source,
1314                          srcRowStride, (GLubyte *) newSource);
1315       source = newSource;
1316       width = newWidth;
1317       height = newHeight;
1318       srcRowStride = comps * newWidth;
1319    }
1320
1321    data = (const GLubyte *) source;
1322    destRowStride = (destRowStride - width * 2) / 4;
1323    for (y = 0; y < height; y += 4) {
1324       GLuint offs = 0 + (y + 0) * srcRowStride;
1325       for (x = 0; x < width; x += 8) {
1326          const GLubyte *lines[4];
1327          lines[0] = &data[offs];
1328          lines[1] = lines[0] + srcRowStride;
1329          lines[2] = lines[1] + srcRowStride;
1330          lines[3] = lines[2] + srcRowStride;
1331          offs += 8 * comps;
1332          fxt1_quantize(encoded, lines, comps);
1333          /* 128 bits per 8x4 block */
1334          encoded += 4;
1335       }
1336       encoded += destRowStride;
1337    }
1338
1339  cleanUp:
1340    free(newSource);
1341 }
1342
1343
1344 /***************************************************************************\
1345  * FXT1 decoder
1346  *
1347  * The decoder is based on GL_3DFX_texture_compression_FXT1
1348  * specification and serves as a concept for the encoder.
1349 \***************************************************************************/
1350
1351
1352 /* lookup table for scaling 5 bit colors up to 8 bits */
1353 static const GLubyte _rgb_scale_5[] = {
1354    0,   8,   16,  25,  33,  41,  49,  58,
1355    66,  74,  82,  90,  99,  107, 115, 123,
1356    132, 140, 148, 156, 165, 173, 181, 189,
1357    197, 206, 214, 222, 230, 239, 247, 255
1358 };
1359
1360 /* lookup table for scaling 6 bit colors up to 8 bits */
1361 static const GLubyte _rgb_scale_6[] = {
1362    0,   4,   8,   12,  16,  20,  24,  28,
1363    32,  36,  40,  45,  49,  53,  57,  61,
1364    65,  69,  73,  77,  81,  85,  89,  93,
1365    97,  101, 105, 109, 113, 117, 121, 125,
1366    130, 134, 138, 142, 146, 150, 154, 158,
1367    162, 166, 170, 174, 178, 182, 186, 190,
1368    194, 198, 202, 206, 210, 215, 219, 223,
1369    227, 231, 235, 239, 243, 247, 251, 255
1370 };
1371
1372
1373 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1374 #define UP5(c) _rgb_scale_5[(c) & 31]
1375 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1376 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1377
1378
1379 static void
1380 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1381 {
1382    const GLuint *cc;
1383
1384    t *= 3;
1385    cc = (const GLuint *)(code + t / 8);
1386    t = (cc[0] >> (t & 7)) & 7;
1387
1388    if (t == 7) {
1389       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1390    } else {
1391       GLubyte r, g, b;
1392       cc = (const GLuint *)(code + 12);
1393       if (t == 0) {
1394          b = UP5(CC_SEL(cc, 0));
1395          g = UP5(CC_SEL(cc, 5));
1396          r = UP5(CC_SEL(cc, 10));
1397       } else if (t == 6) {
1398          b = UP5(CC_SEL(cc, 15));
1399          g = UP5(CC_SEL(cc, 20));
1400          r = UP5(CC_SEL(cc, 25));
1401       } else {
1402          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1403          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1404          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1405       }
1406       rgba[RCOMP] = r;
1407       rgba[GCOMP] = g;
1408       rgba[BCOMP] = b;
1409       rgba[ACOMP] = 255;
1410    }
1411 }
1412
1413
1414 static void
1415 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1416 {
1417    const GLuint *cc;
1418    GLuint kk;
1419
1420    cc = (const GLuint *)code;
1421    if (t & 16) {
1422       cc++;
1423       t &= 15;
1424    }
1425    t = (cc[0] >> (t * 2)) & 3;
1426
1427    t *= 15;
1428    cc = (const GLuint *)(code + 8 + t / 8);
1429    kk = cc[0] >> (t & 7);
1430    rgba[BCOMP] = UP5(kk);
1431    rgba[GCOMP] = UP5(kk >> 5);
1432    rgba[RCOMP] = UP5(kk >> 10);
1433    rgba[ACOMP] = 255;
1434 }
1435
1436
1437 static void
1438 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1439 {
1440    const GLuint *cc;
1441    GLuint col[2][3];
1442    GLint glsb, selb;
1443
1444    cc = (const GLuint *)code;
1445    if (t & 16) {
1446       t &= 15;
1447       t = (cc[1] >> (t * 2)) & 3;
1448       /* col 2 */
1449       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1450       col[0][GCOMP] = CC_SEL(cc, 99);
1451       col[0][RCOMP] = CC_SEL(cc, 104);
1452       /* col 3 */
1453       col[1][BCOMP] = CC_SEL(cc, 109);
1454       col[1][GCOMP] = CC_SEL(cc, 114);
1455       col[1][RCOMP] = CC_SEL(cc, 119);
1456       glsb = CC_SEL(cc, 126);
1457       selb = CC_SEL(cc, 33);
1458    } else {
1459       t = (cc[0] >> (t * 2)) & 3;
1460       /* col 0 */
1461       col[0][BCOMP] = CC_SEL(cc, 64);
1462       col[0][GCOMP] = CC_SEL(cc, 69);
1463       col[0][RCOMP] = CC_SEL(cc, 74);
1464       /* col 1 */
1465       col[1][BCOMP] = CC_SEL(cc, 79);
1466       col[1][GCOMP] = CC_SEL(cc, 84);
1467       col[1][RCOMP] = CC_SEL(cc, 89);
1468       glsb = CC_SEL(cc, 125);
1469       selb = CC_SEL(cc, 1);
1470    }
1471
1472    if (CC_SEL(cc, 124) & 1) {
1473       /* alpha[0] == 1 */
1474
1475       if (t == 3) {
1476          /* zero */
1477          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1478       } else {
1479          GLubyte r, g, b;
1480          if (t == 0) {
1481             b = UP5(col[0][BCOMP]);
1482             g = UP5(col[0][GCOMP]);
1483             r = UP5(col[0][RCOMP]);
1484          } else if (t == 2) {
1485             b = UP5(col[1][BCOMP]);
1486             g = UP6(col[1][GCOMP], glsb);
1487             r = UP5(col[1][RCOMP]);
1488          } else {
1489             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1490             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1491             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1492          }
1493          rgba[RCOMP] = r;
1494          rgba[GCOMP] = g;
1495          rgba[BCOMP] = b;
1496          rgba[ACOMP] = 255;
1497       }
1498    } else {
1499       /* alpha[0] == 0 */
1500       GLubyte r, g, b;
1501       if (t == 0) {
1502          b = UP5(col[0][BCOMP]);
1503          g = UP6(col[0][GCOMP], glsb ^ selb);
1504          r = UP5(col[0][RCOMP]);
1505       } else if (t == 3) {
1506          b = UP5(col[1][BCOMP]);
1507          g = UP6(col[1][GCOMP], glsb);
1508          r = UP5(col[1][RCOMP]);
1509       } else {
1510          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1511          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1512                         UP6(col[1][GCOMP], glsb));
1513          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1514       }
1515       rgba[RCOMP] = r;
1516       rgba[GCOMP] = g;
1517       rgba[BCOMP] = b;
1518       rgba[ACOMP] = 255;
1519    }
1520 }
1521
1522
1523 static void
1524 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1525 {
1526    const GLuint *cc;
1527    GLubyte r, g, b, a;
1528
1529    cc = (const GLuint *)code;
1530    if (CC_SEL(cc, 124) & 1) {
1531       /* lerp == 1 */
1532       GLuint col0[4];
1533
1534       if (t & 16) {
1535          t &= 15;
1536          t = (cc[1] >> (t * 2)) & 3;
1537          /* col 2 */
1538          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1539          col0[GCOMP] = CC_SEL(cc, 99);
1540          col0[RCOMP] = CC_SEL(cc, 104);
1541          col0[ACOMP] = CC_SEL(cc, 119);
1542       } else {
1543          t = (cc[0] >> (t * 2)) & 3;
1544          /* col 0 */
1545          col0[BCOMP] = CC_SEL(cc, 64);
1546          col0[GCOMP] = CC_SEL(cc, 69);
1547          col0[RCOMP] = CC_SEL(cc, 74);
1548          col0[ACOMP] = CC_SEL(cc, 109);
1549       }
1550
1551       if (t == 0) {
1552          b = UP5(col0[BCOMP]);
1553          g = UP5(col0[GCOMP]);
1554          r = UP5(col0[RCOMP]);
1555          a = UP5(col0[ACOMP]);
1556       } else if (t == 3) {
1557          b = UP5(CC_SEL(cc, 79));
1558          g = UP5(CC_SEL(cc, 84));
1559          r = UP5(CC_SEL(cc, 89));
1560          a = UP5(CC_SEL(cc, 114));
1561       } else {
1562          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1563          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1564          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1565          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1566       }
1567    } else {
1568       /* lerp == 0 */
1569
1570       if (t & 16) {
1571          cc++;
1572          t &= 15;
1573       }
1574       t = (cc[0] >> (t * 2)) & 3;
1575
1576       if (t == 3) {
1577          /* zero */
1578          r = g = b = a = 0;
1579       } else {
1580          GLuint kk;
1581          cc = (const GLuint *)code;
1582          a = UP5(cc[3] >> (t * 5 + 13));
1583          t *= 15;
1584          cc = (const GLuint *)(code + 8 + t / 8);
1585          kk = cc[0] >> (t & 7);
1586          b = UP5(kk);
1587          g = UP5(kk >> 5);
1588          r = UP5(kk >> 10);
1589       }
1590    }
1591    rgba[RCOMP] = r;
1592    rgba[GCOMP] = g;
1593    rgba[BCOMP] = b;
1594    rgba[ACOMP] = a;
1595 }
1596
1597
1598 static void
1599 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1600                GLint i, GLint j, GLubyte *rgba)
1601 {
1602    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1603       fxt1_decode_1HI,     /* cc-high   = "00?" */
1604       fxt1_decode_1HI,     /* cc-high   = "00?" */
1605       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1606       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1607       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1608       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1609       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1610       fxt1_decode_1MIXED   /* mixed     = "1??" */
1611    };
1612
1613    const GLubyte *code = (const GLubyte *)texture +
1614                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1615    GLint mode = CC_SEL(code, 125);
1616    GLint t = i & 7;
1617
1618    if (t & 4) {
1619       t += 12;
1620    }
1621    t += (j & 3) * 4;
1622
1623    decode_1[mode](code, t, rgba);
1624 }
1625
1626
1627
1628
1629 static void
1630 fetch_rgb_fxt1(const GLubyte *map,
1631                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1632 {
1633    GLubyte rgba[4];
1634    fxt1_decode_1(map, rowStride, i, j, rgba);
1635    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1636    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1637    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1638    texel[ACOMP] = 1.0F;
1639 }
1640
1641
1642 static void
1643 fetch_rgba_fxt1(const GLubyte *map,
1644                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1645 {
1646    GLubyte rgba[4];
1647    fxt1_decode_1(map, rowStride, i, j, rgba);
1648    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1649    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1650    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1651    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1652 }
1653
1654
1655 compressed_fetch_func
1656 _mesa_get_fxt_fetch_func(mesa_format format)
1657 {
1658    switch (format) {
1659    case MESA_FORMAT_RGB_FXT1:
1660       return fetch_rgb_fxt1;
1661    case MESA_FORMAT_RGBA_FXT1:
1662       return fetch_rgba_fxt1;
1663    default:
1664       return NULL;
1665    }
1666 }