src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42 #include "swrast/s_context.h"
  43
  44
  45 #if FEATURE_texture_fxt1
  46
  47
  48 static void
  49 fxt1_encode (GLuint width, GLuint height, GLint comps,
  50              const void *source, GLint srcRowStride,
  51              void *dest, GLint destRowStride);
  52
  53 void
  54 fxt1_decode_1 (const void *texture, GLint stride,
  55                GLint i, GLint j, GLubyte *rgba);
  56
  57
  58 /**
  59  * Store user's image in rgb_fxt1 format.
  60  */
  61 GLboolean
  62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  63 {
  64    const GLubyte *pixels;
  65    GLint srcRowStride;
  66    GLubyte *dst;
  67    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  68    const GLubyte *tempImage = NULL;
  69
  70    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  71    ASSERT(dstXoffset % 8 == 0);
  72    ASSERT(dstYoffset % 4 == 0);
  73    ASSERT(dstZoffset     == 0);
  74    (void) dstZoffset;
  75
  76    if (srcFormat != GL_RGB ||
  77        srcType != GL_UNSIGNED_BYTE ||
  78        ctx->_ImageTransferState ||
  79        srcPacking->RowLength != srcWidth ||
  80        srcPacking->SwapBytes) {
  81       /* convert image to RGB/GLubyte */
  82       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  83                                              baseInternalFormat,
  84                                              _mesa_get_format_base_format(dstFormat),
  85                                              srcWidth, srcHeight, srcDepth,
  86                                              srcFormat, srcType, srcAddr,
  87                                              srcPacking);
  88       if (!tempImage)
  89          return GL_FALSE; /* out of memory */
  90       pixels = tempImage;
  91       srcRowStride = 3 * srcWidth;
  92       srcFormat = GL_RGB;
  93    }
  94    else {
  95       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  96                                      srcFormat, srcType, 0, 0);
  97
  98       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  99                                             srcType) / sizeof(GLubyte);
 100    }
 101
 102    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 103                                         dstFormat,
 104                                         texWidth, dstSlices[0]);
 105
 106    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 107                dst, dstRowStride);
 108
 109    if (tempImage)
 110       free((void*) tempImage);
 111
 112    return GL_TRUE;
 113 }
 114
 115
 116 /**
 117  * Store user's image in rgba_fxt1 format.
 118  */
 119 GLboolean
 120 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 121 {
 122    const GLubyte *pixels;
 123    GLint srcRowStride;
 124    GLubyte *dst;
 125    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 126    const GLubyte *tempImage = NULL;
 127
 128    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 129    ASSERT(dstXoffset % 8 == 0);
 130    ASSERT(dstYoffset % 4 == 0);
 131    ASSERT(dstZoffset     == 0);
 132    (void) dstZoffset;
 133
 134    if (srcFormat != GL_RGBA ||
 135        srcType != GL_UNSIGNED_BYTE ||
 136        ctx->_ImageTransferState ||
 137        srcPacking->SwapBytes) {
 138       /* convert image to RGBA/GLubyte */
 139       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 140                                              baseInternalFormat,
 141                                              _mesa_get_format_base_format(dstFormat),
 142                                              srcWidth, srcHeight, srcDepth,
 143                                              srcFormat, srcType, srcAddr,
 144                                              srcPacking);
 145       if (!tempImage)
 146          return GL_FALSE; /* out of memory */
 147       pixels = tempImage;
 148       srcRowStride = 4 * srcWidth;
 149       srcFormat = GL_RGBA;
 150    }
 151    else {
 152       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 153                                      srcFormat, srcType, 0, 0);
 154
 155       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 156                                             srcType) / sizeof(GLubyte);
 157    }
 158
 159    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 160                                         dstFormat,
 161                                         texWidth, dstSlices[0]);
 162
 163    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 164                dst, dstRowStride);
 165
 166    if (tempImage)
 167       free((void*) tempImage);
 168
 169    return GL_TRUE;
 170 }
 171
 172
 173 void
 174 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
 175                                   GLint i, GLint j, GLint k, GLfloat *texel )
 176 {
 177    /* just sample as GLubyte and convert to float here */
 178    GLubyte rgba[4];
 179    (void) k;
 180    fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
 181    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 182    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 183    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 184    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
 185 }
 186
 187
 188 void
 189 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
 190                                  GLint i, GLint j, GLint k, GLfloat *texel )
 191 {
 192    /* just sample as GLubyte and convert to float here */
 193    GLubyte rgba[4];
 194    (void) k;
 195    fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
 196    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 197    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 198    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 199    texel[ACOMP] = 1.0F;
 200 }
 201
 202
 203
 204 /***************************************************************************\
 205  * FXT1 encoder
 206  *
 207  * The encoder was built by reversing the decoder,
 208  * and is vaguely based on Texus2 by 3dfx. Note that this code
 209  * is merely a proof of concept, since it is highly UNoptimized;
 210  * moreover, it is sub-optimal due to initial conditions passed
 211  * to Lloyd's algorithm (the interpolation modes are even worse).
 212 \***************************************************************************/
 213
 214
 215 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 216 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 217 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 218 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 219 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 220 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 221 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 222 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 223
 224
 225 /*
 226  * Define a 64-bit unsigned integer type and macros
 227  */
 228 #if 1
 229
 230 #define FX64_NATIVE 1
 231
 232 typedef uint64_t Fx64;
 233
 234 #define FX64_MOV32(a, b) a = b
 235 #define FX64_OR32(a, b)  a |= b
 236 #define FX64_SHL(a, c)   a <<= c
 237
 238 #else
 239
 240 #define FX64_NATIVE 0
 241
 242 typedef struct {
 243    GLuint lo, hi;
 244 } Fx64;
 245
 246 #define FX64_MOV32(a, b) a.lo = b
 247 #define FX64_OR32(a, b)  a.lo |= b
 248
 249 #define FX64_SHL(a, c)                                 \
 250    do {                                                \
 251        if ((c) >= 32) {                                \
 252           a.hi = a.lo << ((c) - 32);                   \
 253           a.lo = 0;                                    \
 254        } else {                                        \
 255           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 256           a.lo <<= (c);                                \
 257        }                                               \
 258    } while (0)
 259
 260 #endif
 261
 262
 263 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 264 #define SAFECDOT 1 /* for paranoids */
 265
 266 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 267    do {                                  \
 268       /* compute interpolation vector */ \
 269       GLfloat d2 = 0.0F;                 \
 270       GLfloat rd2;                       \
 271                                          \
 272       for (i = 0; i < NC; i++) {         \
 273          IV[i] = (V1[i] - V0[i]) * F(i); \
 274          d2 += IV[i] * IV[i];            \
 275       }                                  \
 276       rd2 = (GLfloat)NV / d2;            \
 277       B = 0;                             \
 278       for (i = 0; i < NC; i++) {         \
 279          IV[i] *= F(i);                  \
 280          B -= IV[i] * V0[i];             \
 281          IV[i] *= rd2;                   \
 282       }                                  \
 283       B = B * rd2 + 0.5f;                \
 284    } while (0)
 285
 286 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 287    do {                                  \
 288       GLfloat dot = 0.0F;                \
 289       for (i = 0; i < NC; i++) {         \
 290          dot += V[i] * IV[i];            \
 291       }                                  \
 292       TEXEL = (GLint)(dot + B);          \
 293       if (SAFECDOT) {                    \
 294          if (TEXEL < 0) {                \
 295             TEXEL = 0;                   \
 296          } else if (TEXEL > NV) {        \
 297             TEXEL = NV;                  \
 298          }                               \
 299       }                                  \
 300    } while (0)
 301
 302
 303 static GLint
 304 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 305               GLubyte input[MAX_COMP], GLint nc)
 306 {
 307    GLint i, j, best = -1;
 308    GLfloat err = 1e9; /* big enough */
 309
 310    for (j = 0; j < nv; j++) {
 311       GLfloat e = 0.0F;
 312       for (i = 0; i < nc; i++) {
 313          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 314       }
 315       if (e < err) {
 316          err = e;
 317          best = j;
 318       }
 319    }
 320
 321    return best;
 322 }
 323
 324
 325 static GLint
 326 fxt1_worst (GLfloat vec[MAX_COMP],
 327             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 328 {
 329    GLint i, k, worst = -1;
 330    GLfloat err = -1.0F; /* small enough */
 331
 332    for (k = 0; k < n; k++) {
 333       GLfloat e = 0.0F;
 334       for (i = 0; i < nc; i++) {
 335          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 336       }
 337       if (e > err) {
 338          err = e;
 339          worst = k;
 340       }
 341    }
 342
 343    return worst;
 344 }
 345
 346
 347 static GLint
 348 fxt1_variance (GLdouble variance[MAX_COMP],
 349                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 350 {
 351    GLint i, k, best = 0;
 352    GLint sx, sx2;
 353    GLdouble var, maxvar = -1; /* small enough */
 354    GLdouble teenth = 1.0 / n;
 355
 356    for (i = 0; i < nc; i++) {
 357       sx = sx2 = 0;
 358       for (k = 0; k < n; k++) {
 359          GLint t = input[k][i];
 360          sx += t;
 361          sx2 += t * t;
 362       }
 363       var = sx2 * teenth - sx * sx * teenth * teenth;
 364       if (maxvar < var) {
 365          maxvar = var;
 366          best = i;
 367       }
 368       if (variance) {
 369          variance[i] = var;
 370       }
 371    }
 372
 373    return best;
 374 }
 375
 376
 377 static GLint
 378 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 379              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 380 {
 381 #if 0
 382    /* Choose colors from a grid.
 383     */
 384    GLint i, j;
 385
 386    for (j = 0; j < nv; j++) {
 387       GLint m = j * (n - 1) / (nv - 1);
 388       for (i = 0; i < nc; i++) {
 389          vec[j][i] = input[m][i];
 390       }
 391    }
 392 #else
 393    /* Our solution here is to find the darkest and brightest colors in
 394     * the 8x4 tile and use those as the two representative colors.
 395     * There are probably better algorithms to use (histogram-based).
 396     */
 397    GLint i, j, k;
 398    GLint minSum = 2000; /* big enough */
 399    GLint maxSum = -1; /* small enough */
 400    GLint minCol = 0; /* phoudoin: silent compiler! */
 401    GLint maxCol = 0; /* phoudoin: silent compiler! */
 402
 403    struct {
 404       GLint flag;
 405       GLint key;
 406       GLint freq;
 407       GLint idx;
 408    } hist[N_TEXELS];
 409    GLint lenh = 0;
 410
 411    memset(hist, 0, sizeof(hist));
 412
 413    for (k = 0; k < n; k++) {
 414       GLint l;
 415       GLint key = 0;
 416       GLint sum = 0;
 417       for (i = 0; i < nc; i++) {
 418          key <<= 8;
 419          key |= input[k][i];
 420          sum += input[k][i];
 421       }
 422       for (l = 0; l < n; l++) {
 423          if (!hist[l].flag) {
 424             /* alloc new slot */
 425             hist[l].flag = !0;
 426             hist[l].key = key;
 427             hist[l].freq = 1;
 428             hist[l].idx = k;
 429             lenh = l + 1;
 430             break;
 431          } else if (hist[l].key == key) {
 432             hist[l].freq++;
 433             break;
 434          }
 435       }
 436       if (minSum > sum) {
 437          minSum = sum;
 438          minCol = k;
 439       }
 440       if (maxSum < sum) {
 441          maxSum = sum;
 442          maxCol = k;
 443       }
 444    }
 445
 446    if (lenh <= nv) {
 447       for (j = 0; j < lenh; j++) {
 448          for (i = 0; i < nc; i++) {
 449             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 450          }
 451       }
 452       for (; j < nv; j++) {
 453          for (i = 0; i < nc; i++) {
 454             vec[j][i] = vec[0][i];
 455          }
 456       }
 457       return 0;
 458    }
 459
 460    for (j = 0; j < nv; j++) {
 461       for (i = 0; i < nc; i++) {
 462          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 463       }
 464    }
 465 #endif
 466
 467    return !0;
 468 }
 469
 470
 471 static GLint
 472 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 473             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 474 {
 475    /* Use the generalized lloyd's algorithm for VQ:
 476     *     find 4 color vectors.
 477     *
 478     *     for each sample color
 479     *         sort to nearest vector.
 480     *
 481     *     replace each vector with the centroid of its matching colors.
 482     *
 483     *     repeat until RMS doesn't improve.
 484     *
 485     *     if a color vector has no samples, or becomes the same as another
 486     *     vector, replace it with the color which is farthest from a sample.
 487     *
 488     * vec[][MAX_COMP]           initial vectors and resulting colors
 489     * nv                        number of resulting colors required
 490     * input[N_TEXELS][MAX_COMP] input texels
 491     * nc                        number of components in input / vec
 492     * n                         number of input samples
 493     */
 494
 495    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 496    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 497    GLfloat error, lasterror = 1e9;
 498
 499    GLint i, j, k, rep;
 500
 501    /* the quantizer */
 502    for (rep = 0; rep < LL_N_REP; rep++) {
 503       /* reset sums & counters */
 504       for (j = 0; j < nv; j++) {
 505          for (i = 0; i < nc; i++) {
 506             sum[j][i] = 0;
 507          }
 508          cnt[j] = 0;
 509       }
 510       error = 0;
 511
 512       /* scan whole block */
 513       for (k = 0; k < n; k++) {
 514 #if 1
 515          GLint best = -1;
 516          GLfloat err = 1e9; /* big enough */
 517          /* determine best vector */
 518          for (j = 0; j < nv; j++) {
 519             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 520                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 521                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 522             if (nc == 4) {
 523                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 524             }
 525             if (e < err) {
 526                err = e;
 527                best = j;
 528             }
 529          }
 530 #else
 531          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 532 #endif
 533          assert(best >= 0);
 534          /* add in closest color */
 535          for (i = 0; i < nc; i++) {
 536             sum[best][i] += input[k][i];
 537          }
 538          /* mark this vector as used */
 539          cnt[best]++;
 540          /* accumulate error */
 541          error += err;
 542       }
 543
 544       /* check RMS */
 545       if ((error < LL_RMS_E) ||
 546           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 547          return !0; /* good match */
 548       }
 549       lasterror = error;
 550
 551       /* move each vector to the barycenter of its closest colors */
 552       for (j = 0; j < nv; j++) {
 553          if (cnt[j]) {
 554             GLfloat div = 1.0F / cnt[j];
 555             for (i = 0; i < nc; i++) {
 556                vec[j][i] = div * sum[j][i];
 557             }
 558          } else {
 559             /* this vec has no samples or is identical with a previous vec */
 560             GLint worst = fxt1_worst(vec[j], input, nc, n);
 561             for (i = 0; i < nc; i++) {
 562                vec[j][i] = input[worst][i];
 563             }
 564          }
 565       }
 566    }
 567
 568    return 0; /* could not converge fast enough */
 569 }
 570
 571
 572 static void
 573 fxt1_quantize_CHROMA (GLuint *cc,
 574                       GLubyte input[N_TEXELS][MAX_COMP])
 575 {
 576    const GLint n_vect = 4; /* 4 base vectors to find */
 577    const GLint n_comp = 3; /* 3 components: R, G, B */
 578    GLfloat vec[MAX_VECT][MAX_COMP];
 579    GLint i, j, k;
 580    Fx64 hi; /* high quadword */
 581    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 582
 583    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 584       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 585    }
 586
 587    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 588    for (j = n_vect - 1; j >= 0; j--) {
 589       for (i = 0; i < n_comp; i++) {
 590          /* add in colors */
 591          FX64_SHL(hi, 5);
 592          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 593       }
 594    }
 595    ((Fx64 *)cc)[1] = hi;
 596
 597    lohi = lolo = 0;
 598    /* right microtile */
 599    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 600       lohi <<= 2;
 601       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 602    }
 603    /* left microtile */
 604    for (; k >= 0; k--) {
 605       lolo <<= 2;
 606       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 607    }
 608    cc[1] = lohi;
 609    cc[0] = lolo;
 610 }
 611
 612
 613 static void
 614 fxt1_quantize_ALPHA0 (GLuint *cc,
 615                       GLubyte input[N_TEXELS][MAX_COMP],
 616                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 617 {
 618    const GLint n_vect = 3; /* 3 base vectors to find */
 619    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 620    GLfloat vec[MAX_VECT][MAX_COMP];
 621    GLint i, j, k;
 622    Fx64 hi; /* high quadword */
 623    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 624
 625    /* the last vector indicates zero */
 626    for (i = 0; i < n_comp; i++) {
 627       vec[n_vect][i] = 0;
 628    }
 629
 630    /* the first n texels in reord are guaranteed to be non-zero */
 631    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 632       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 633    }
 634
 635    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 636    for (j = n_vect - 1; j >= 0; j--) {
 637       /* add in alphas */
 638       FX64_SHL(hi, 5);
 639       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 640    }
 641    for (j = n_vect - 1; j >= 0; j--) {
 642       for (i = 0; i < n_comp - 1; i++) {
 643          /* add in colors */
 644          FX64_SHL(hi, 5);
 645          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 646       }
 647    }
 648    ((Fx64 *)cc)[1] = hi;
 649
 650    lohi = lolo = 0;
 651    /* right microtile */
 652    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 653       lohi <<= 2;
 654       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 655    }
 656    /* left microtile */
 657    for (; k >= 0; k--) {
 658       lolo <<= 2;
 659       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 660    }
 661    cc[1] = lohi;
 662    cc[0] = lolo;
 663 }
 664
 665
 666 static void
 667 fxt1_quantize_ALPHA1 (GLuint *cc,
 668                       GLubyte input[N_TEXELS][MAX_COMP])
 669 {
 670    const GLint n_vect = 3; /* highest vector number in each microtile */
 671    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 672    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 673    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 674    GLint i, j, k;
 675    Fx64 hi; /* high quadword */
 676    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 677
 678    GLint minSum;
 679    GLint maxSum;
 680    GLint minColL = 0, maxColL = 0;
 681    GLint minColR = 0, maxColR = 0;
 682    GLint sumL = 0, sumR = 0;
 683    GLint nn_comp;
 684    /* Our solution here is to find the darkest and brightest colors in
 685     * the 4x4 tile and use those as the two representative colors.
 686     * There are probably better algorithms to use (histogram-based).
 687     */
 688    nn_comp = n_comp;
 689    while ((minColL == maxColL) && nn_comp) {
 690        minSum = 2000; /* big enough */
 691        maxSum = -1; /* small enough */
 692        for (k = 0; k < N_TEXELS / 2; k++) {
 693            GLint sum = 0;
 694            for (i = 0; i < nn_comp; i++) {
 695                sum += input[k][i];
 696            }
 697            if (minSum > sum) {
 698                minSum = sum;
 699                minColL = k;
 700            }
 701            if (maxSum < sum) {
 702                maxSum = sum;
 703                maxColL = k;
 704            }
 705            sumL += sum;
 706        }
 707
 708        nn_comp--;
 709    }
 710
 711    nn_comp = n_comp;
 712    while ((minColR == maxColR) && nn_comp) {
 713        minSum = 2000; /* big enough */
 714        maxSum = -1; /* small enough */
 715        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 716            GLint sum = 0;
 717            for (i = 0; i < nn_comp; i++) {
 718                sum += input[k][i];
 719            }
 720            if (minSum > sum) {
 721                minSum = sum;
 722                minColR = k;
 723            }
 724            if (maxSum < sum) {
 725                maxSum = sum;
 726                maxColR = k;
 727            }
 728            sumR += sum;
 729        }
 730
 731        nn_comp--;
 732    }
 733
 734    /* choose the common vector (yuck!) */
 735    {
 736       GLint j1, j2;
 737       GLint v1 = 0, v2 = 0;
 738       GLfloat err = 1e9; /* big enough */
 739       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 740       for (i = 0; i < n_comp; i++) {
 741          tv[0][i] = input[minColL][i];
 742          tv[1][i] = input[maxColL][i];
 743          tv[2][i] = input[minColR][i];
 744          tv[3][i] = input[maxColR][i];
 745       }
 746       for (j1 = 0; j1 < 2; j1++) {
 747          for (j2 = 2; j2 < 4; j2++) {
 748             GLfloat e = 0.0F;
 749             for (i = 0; i < n_comp; i++) {
 750                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 751             }
 752             if (e < err) {
 753                err = e;
 754                v1 = j1;
 755                v2 = j2;
 756             }
 757          }
 758       }
 759       for (i = 0; i < n_comp; i++) {
 760          vec[0][i] = tv[1 - v1][i];
 761          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 762          vec[2][i] = tv[5 - v2][i];
 763       }
 764    }
 765
 766    /* left microtile */
 767    cc[0] = 0;
 768    if (minColL != maxColL) {
 769       /* compute interpolation vector */
 770       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 771
 772       /* add in texels */
 773       lolo = 0;
 774       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 775          GLint texel;
 776          /* interpolate color */
 777          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 778          /* add in texel */
 779          lolo <<= 2;
 780          lolo |= texel;
 781       }
 782
 783       cc[0] = lolo;
 784    }
 785
 786    /* right microtile */
 787    cc[1] = 0;
 788    if (minColR != maxColR) {
 789       /* compute interpolation vector */
 790       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 791
 792       /* add in texels */
 793       lohi = 0;
 794       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 795          GLint texel;
 796          /* interpolate color */
 797          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 798          /* add in texel */
 799          lohi <<= 2;
 800          lohi |= texel;
 801       }
 802
 803       cc[1] = lohi;
 804    }
 805
 806    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 807    for (j = n_vect - 1; j >= 0; j--) {
 808       /* add in alphas */
 809       FX64_SHL(hi, 5);
 810       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 811    }
 812    for (j = n_vect - 1; j >= 0; j--) {
 813       for (i = 0; i < n_comp - 1; i++) {
 814          /* add in colors */
 815          FX64_SHL(hi, 5);
 816          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 817       }
 818    }
 819    ((Fx64 *)cc)[1] = hi;
 820 }
 821
 822
 823 static void
 824 fxt1_quantize_HI (GLuint *cc,
 825                   GLubyte input[N_TEXELS][MAX_COMP],
 826                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 827 {
 828    const GLint n_vect = 6; /* highest vector number */
 829    const GLint n_comp = 3; /* 3 components: R, G, B */
 830    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 831    GLfloat iv[MAX_COMP];   /* interpolation vector */
 832    GLint i, k;
 833    GLuint hihi; /* high quadword: hi dword */
 834
 835    GLint minSum = 2000; /* big enough */
 836    GLint maxSum = -1; /* small enough */
 837    GLint minCol = 0; /* phoudoin: silent compiler! */
 838    GLint maxCol = 0; /* phoudoin: silent compiler! */
 839
 840    /* Our solution here is to find the darkest and brightest colors in
 841     * the 8x4 tile and use those as the two representative colors.
 842     * There are probably better algorithms to use (histogram-based).
 843     */
 844    for (k = 0; k < n; k++) {
 845       GLint sum = 0;
 846       for (i = 0; i < n_comp; i++) {
 847          sum += reord[k][i];
 848       }
 849       if (minSum > sum) {
 850          minSum = sum;
 851          minCol = k;
 852       }
 853       if (maxSum < sum) {
 854          maxSum = sum;
 855          maxCol = k;
 856       }
 857    }
 858
 859    hihi = 0; /* cc-hi = "00" */
 860    for (i = 0; i < n_comp; i++) {
 861       /* add in colors */
 862       hihi <<= 5;
 863       hihi |= reord[maxCol][i] >> 3;
 864    }
 865    for (i = 0; i < n_comp; i++) {
 866       /* add in colors */
 867       hihi <<= 5;
 868       hihi |= reord[minCol][i] >> 3;
 869    }
 870    cc[3] = hihi;
 871    cc[0] = cc[1] = cc[2] = 0;
 872
 873    /* compute interpolation vector */
 874    if (minCol != maxCol) {
 875       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 876    }
 877
 878    /* add in texels */
 879    for (k = N_TEXELS - 1; k >= 0; k--) {
 880       GLint t = k * 3;
 881       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 882       GLint texel = n_vect + 1; /* transparent black */
 883
 884       if (!ISTBLACK(input[k])) {
 885          if (minCol != maxCol) {
 886             /* interpolate color */
 887             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 888             /* add in texel */
 889             kk[0] |= texel << (t & 7);
 890          }
 891       } else {
 892          /* add in texel */
 893          kk[0] |= texel << (t & 7);
 894       }
 895    }
 896 }
 897
 898
 899 static void
 900 fxt1_quantize_MIXED1 (GLuint *cc,
 901                       GLubyte input[N_TEXELS][MAX_COMP])
 902 {
 903    const GLint n_vect = 2; /* highest vector number in each microtile */
 904    const GLint n_comp = 3; /* 3 components: R, G, B */
 905    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 906    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 907    GLint i, j, k;
 908    Fx64 hi; /* high quadword */
 909    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 910
 911    GLint minSum;
 912    GLint maxSum;
 913    GLint minColL = 0, maxColL = -1;
 914    GLint minColR = 0, maxColR = -1;
 915
 916    /* Our solution here is to find the darkest and brightest colors in
 917     * the 4x4 tile and use those as the two representative colors.
 918     * There are probably better algorithms to use (histogram-based).
 919     */
 920    minSum = 2000; /* big enough */
 921    maxSum = -1; /* small enough */
 922    for (k = 0; k < N_TEXELS / 2; k++) {
 923       if (!ISTBLACK(input[k])) {
 924          GLint sum = 0;
 925          for (i = 0; i < n_comp; i++) {
 926             sum += input[k][i];
 927          }
 928          if (minSum > sum) {
 929             minSum = sum;
 930             minColL = k;
 931          }
 932          if (maxSum < sum) {
 933             maxSum = sum;
 934             maxColL = k;
 935          }
 936       }
 937    }
 938    minSum = 2000; /* big enough */
 939    maxSum = -1; /* small enough */
 940    for (; k < N_TEXELS; k++) {
 941       if (!ISTBLACK(input[k])) {
 942          GLint sum = 0;
 943          for (i = 0; i < n_comp; i++) {
 944             sum += input[k][i];
 945          }
 946          if (minSum > sum) {
 947             minSum = sum;
 948             minColR = k;
 949          }
 950          if (maxSum < sum) {
 951             maxSum = sum;
 952             maxColR = k;
 953          }
 954       }
 955    }
 956
 957    /* left microtile */
 958    if (maxColL == -1) {
 959       /* all transparent black */
 960       cc[0] = ~0u;
 961       for (i = 0; i < n_comp; i++) {
 962          vec[0][i] = 0;
 963          vec[1][i] = 0;
 964       }
 965    } else {
 966       cc[0] = 0;
 967       for (i = 0; i < n_comp; i++) {
 968          vec[0][i] = input[minColL][i];
 969          vec[1][i] = input[maxColL][i];
 970       }
 971       if (minColL != maxColL) {
 972          /* compute interpolation vector */
 973          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 974
 975          /* add in texels */
 976          lolo = 0;
 977          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 978             GLint texel = n_vect + 1; /* transparent black */
 979             if (!ISTBLACK(input[k])) {
 980                /* interpolate color */
 981                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 982             }
 983             /* add in texel */
 984             lolo <<= 2;
 985             lolo |= texel;
 986          }
 987          cc[0] = lolo;
 988       }
 989    }
 990
 991    /* right microtile */
 992    if (maxColR == -1) {
 993       /* all transparent black */
 994       cc[1] = ~0u;
 995       for (i = 0; i < n_comp; i++) {
 996          vec[2][i] = 0;
 997          vec[3][i] = 0;
 998       }
 999    } else {
1000       cc[1] = 0;
1001       for (i = 0; i < n_comp; i++) {
1002          vec[2][i] = input[minColR][i];
1003          vec[3][i] = input[maxColR][i];
1004       }
1005       if (minColR != maxColR) {
1006          /* compute interpolation vector */
1007          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1008
1009          /* add in texels */
1010          lohi = 0;
1011          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1012             GLint texel = n_vect + 1; /* transparent black */
1013             if (!ISTBLACK(input[k])) {
1014                /* interpolate color */
1015                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1016             }
1017             /* add in texel */
1018             lohi <<= 2;
1019             lohi |= texel;
1020          }
1021          cc[1] = lohi;
1022       }
1023    }
1024
1025    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1026    for (j = 2 * 2 - 1; j >= 0; j--) {
1027       for (i = 0; i < n_comp; i++) {
1028          /* add in colors */
1029          FX64_SHL(hi, 5);
1030          FX64_OR32(hi, vec[j][i] >> 3);
1031       }
1032    }
1033    ((Fx64 *)cc)[1] = hi;
1034 }
1035
1036
1037 static void
1038 fxt1_quantize_MIXED0 (GLuint *cc,
1039                       GLubyte input[N_TEXELS][MAX_COMP])
1040 {
1041    const GLint n_vect = 3; /* highest vector number in each microtile */
1042    const GLint n_comp = 3; /* 3 components: R, G, B */
1043    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1044    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1045    GLint i, j, k;
1046    Fx64 hi; /* high quadword */
1047    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1048
1049    GLint minColL = 0, maxColL = 0;
1050    GLint minColR = 0, maxColR = 0;
1051 #if 0
1052    GLint minSum;
1053    GLint maxSum;
1054
1055    /* Our solution here is to find the darkest and brightest colors in
1056     * the 4x4 tile and use those as the two representative colors.
1057     * There are probably better algorithms to use (histogram-based).
1058     */
1059    minSum = 2000; /* big enough */
1060    maxSum = -1; /* small enough */
1061    for (k = 0; k < N_TEXELS / 2; k++) {
1062       GLint sum = 0;
1063       for (i = 0; i < n_comp; i++) {
1064          sum += input[k][i];
1065       }
1066       if (minSum > sum) {
1067          minSum = sum;
1068          minColL = k;
1069       }
1070       if (maxSum < sum) {
1071          maxSum = sum;
1072          maxColL = k;
1073       }
1074    }
1075    minSum = 2000; /* big enough */
1076    maxSum = -1; /* small enough */
1077    for (; k < N_TEXELS; k++) {
1078       GLint sum = 0;
1079       for (i = 0; i < n_comp; i++) {
1080          sum += input[k][i];
1081       }
1082       if (minSum > sum) {
1083          minSum = sum;
1084          minColR = k;
1085       }
1086       if (maxSum < sum) {
1087          maxSum = sum;
1088          maxColR = k;
1089       }
1090    }
1091 #else
1092    GLint minVal;
1093    GLint maxVal;
1094    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1095    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1096
1097    /* Scan the channel with max variance for lo & hi
1098     * and use those as the two representative colors.
1099     */
1100    minVal = 2000; /* big enough */
1101    maxVal = -1; /* small enough */
1102    for (k = 0; k < N_TEXELS / 2; k++) {
1103       GLint t = input[k][maxVarL];
1104       if (minVal > t) {
1105          minVal = t;
1106          minColL = k;
1107       }
1108       if (maxVal < t) {
1109          maxVal = t;
1110          maxColL = k;
1111       }
1112    }
1113    minVal = 2000; /* big enough */
1114    maxVal = -1; /* small enough */
1115    for (; k < N_TEXELS; k++) {
1116       GLint t = input[k][maxVarR];
1117       if (minVal > t) {
1118          minVal = t;
1119          minColR = k;
1120       }
1121       if (maxVal < t) {
1122          maxVal = t;
1123          maxColR = k;
1124       }
1125    }
1126 #endif
1127
1128    /* left microtile */
1129    cc[0] = 0;
1130    for (i = 0; i < n_comp; i++) {
1131       vec[0][i] = input[minColL][i];
1132       vec[1][i] = input[maxColL][i];
1133    }
1134    if (minColL != maxColL) {
1135       /* compute interpolation vector */
1136       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1137
1138       /* add in texels */
1139       lolo = 0;
1140       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1141          GLint texel;
1142          /* interpolate color */
1143          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1144          /* add in texel */
1145          lolo <<= 2;
1146          lolo |= texel;
1147       }
1148
1149       /* funky encoding for LSB of green */
1150       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1151          for (i = 0; i < n_comp; i++) {
1152             vec[1][i] = input[minColL][i];
1153             vec[0][i] = input[maxColL][i];
1154          }
1155          lolo = ~lolo;
1156       }
1157
1158       cc[0] = lolo;
1159    }
1160
1161    /* right microtile */
1162    cc[1] = 0;
1163    for (i = 0; i < n_comp; i++) {
1164       vec[2][i] = input[minColR][i];
1165       vec[3][i] = input[maxColR][i];
1166    }
1167    if (minColR != maxColR) {
1168       /* compute interpolation vector */
1169       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1170
1171       /* add in texels */
1172       lohi = 0;
1173       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1174          GLint texel;
1175          /* interpolate color */
1176          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1177          /* add in texel */
1178          lohi <<= 2;
1179          lohi |= texel;
1180       }
1181
1182       /* funky encoding for LSB of green */
1183       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1184          for (i = 0; i < n_comp; i++) {
1185             vec[3][i] = input[minColR][i];
1186             vec[2][i] = input[maxColR][i];
1187          }
1188          lohi = ~lohi;
1189       }
1190
1191       cc[1] = lohi;
1192    }
1193
1194    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1195    for (j = 2 * 2 - 1; j >= 0; j--) {
1196       for (i = 0; i < n_comp; i++) {
1197          /* add in colors */
1198          FX64_SHL(hi, 5);
1199          FX64_OR32(hi, vec[j][i] >> 3);
1200       }
1201    }
1202    ((Fx64 *)cc)[1] = hi;
1203 }
1204
1205
1206 static void
1207 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1208 {
1209    GLint trualpha;
1210    GLubyte reord[N_TEXELS][MAX_COMP];
1211
1212    GLubyte input[N_TEXELS][MAX_COMP];
1213    GLint i, k, l;
1214
1215    if (comps == 3) {
1216       /* make the whole block opaque */
1217       memset(input, -1, sizeof(input));
1218    }
1219
1220    /* 8 texels each line */
1221    for (l = 0; l < 4; l++) {
1222       for (k = 0; k < 4; k++) {
1223          for (i = 0; i < comps; i++) {
1224             input[k + l * 4][i] = *lines[l]++;
1225          }
1226       }
1227       for (; k < 8; k++) {
1228          for (i = 0; i < comps; i++) {
1229             input[k + l * 4 + 12][i] = *lines[l]++;
1230          }
1231       }
1232    }
1233
1234    /* block layout:
1235     * 00, 01, 02, 03, 08, 09, 0a, 0b
1236     * 10, 11, 12, 13, 18, 19, 1a, 1b
1237     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1238     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1239     */
1240
1241    /* [dBorca]
1242     * stupidity flows forth from this
1243     */
1244    l = N_TEXELS;
1245    trualpha = 0;
1246    if (comps == 4) {
1247       /* skip all transparent black texels */
1248       l = 0;
1249       for (k = 0; k < N_TEXELS; k++) {
1250          /* test all components against 0 */
1251          if (!ISTBLACK(input[k])) {
1252             /* texel is not transparent black */
1253             COPY_4UBV(reord[l], input[k]);
1254             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1255                /* non-opaque texel */
1256                trualpha = !0;
1257             }
1258             l++;
1259          }
1260       }
1261    }
1262
1263 #if 0
1264    if (trualpha) {
1265       fxt1_quantize_ALPHA0(cc, input, reord, l);
1266    } else if (l == 0) {
1267       cc[0] = cc[1] = cc[2] = -1;
1268       cc[3] = 0;
1269    } else if (l < N_TEXELS) {
1270       fxt1_quantize_HI(cc, input, reord, l);
1271    } else {
1272       fxt1_quantize_CHROMA(cc, input);
1273    }
1274    (void)fxt1_quantize_ALPHA1;
1275    (void)fxt1_quantize_MIXED1;
1276    (void)fxt1_quantize_MIXED0;
1277 #else
1278    if (trualpha) {
1279       fxt1_quantize_ALPHA1(cc, input);
1280    } else if (l == 0) {
1281       cc[0] = cc[1] = cc[2] = ~0u;
1282       cc[3] = 0;
1283    } else if (l < N_TEXELS) {
1284       fxt1_quantize_MIXED1(cc, input);
1285    } else {
1286       fxt1_quantize_MIXED0(cc, input);
1287    }
1288    (void)fxt1_quantize_ALPHA0;
1289    (void)fxt1_quantize_HI;
1290    (void)fxt1_quantize_CHROMA;
1291 #endif
1292 }
1293
1294
1295
1296 /**
1297  * Upscale an image by replication, not (typical) stretching.
1298  * We use this when the image width or height is less than a
1299  * certain size (4, 8) and we need to upscale an image.
1300  */
1301 static void
1302 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1303                    GLsizei outWidth, GLsizei outHeight,
1304                    GLint comps, const GLubyte *src, GLint srcRowStride,
1305                    GLubyte *dest )
1306 {
1307    GLint i, j, k;
1308
1309    ASSERT(outWidth >= inWidth);
1310    ASSERT(outHeight >= inHeight);
1311 #if 0
1312    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1313    ASSERT((outWidth & 3) == 0);
1314    ASSERT((outHeight & 3) == 0);
1315 #endif
1316
1317    for (i = 0; i < outHeight; i++) {
1318       const GLint ii = i % inHeight;
1319       for (j = 0; j < outWidth; j++) {
1320          const GLint jj = j % inWidth;
1321          for (k = 0; k < comps; k++) {
1322             dest[(i * outWidth + j) * comps + k]
1323                = src[ii * srcRowStride + jj * comps + k];
1324          }
1325       }
1326    }
1327 }
1328
1329
1330 static void
1331 fxt1_encode (GLuint width, GLuint height, GLint comps,
1332              const void *source, GLint srcRowStride,
1333              void *dest, GLint destRowStride)
1334 {
1335    GLuint x, y;
1336    const GLubyte *data;
1337    GLuint *encoded = (GLuint *)dest;
1338    void *newSource = NULL;
1339
1340    assert(comps == 3 || comps == 4);
1341
1342    /* Replicate image if width is not M8 or height is not M4 */
1343    if ((width & 7) | (height & 3)) {
1344       GLint newWidth = (width + 7) & ~7;
1345       GLint newHeight = (height + 3) & ~3;
1346       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1347       if (!newSource) {
1348          GET_CURRENT_CONTEXT(ctx);
1349          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1350          goto cleanUp;
1351       }
1352       upscale_teximage2d(width, height, newWidth, newHeight,
1353                          comps, (const GLubyte *) source,
1354                          srcRowStride, (GLubyte *) newSource);
1355       source = newSource;
1356       width = newWidth;
1357       height = newHeight;
1358       srcRowStride = comps * newWidth;
1359    }
1360
1361    data = (const GLubyte *) source;
1362    destRowStride = (destRowStride - width * 2) / 4;
1363    for (y = 0; y < height; y += 4) {
1364       GLuint offs = 0 + (y + 0) * srcRowStride;
1365       for (x = 0; x < width; x += 8) {
1366          const GLubyte *lines[4];
1367          lines[0] = &data[offs];
1368          lines[1] = lines[0] + srcRowStride;
1369          lines[2] = lines[1] + srcRowStride;
1370          lines[3] = lines[2] + srcRowStride;
1371          offs += 8 * comps;
1372          fxt1_quantize(encoded, lines, comps);
1373          /* 128 bits per 8x4 block */
1374          encoded += 4;
1375       }
1376       encoded += destRowStride;
1377    }
1378
1379  cleanUp:
1380    if (newSource != NULL) {
1381       free(newSource);
1382    }
1383 }
1384
1385
1386 /***************************************************************************\
1387  * FXT1 decoder
1388  *
1389  * The decoder is based on GL_3DFX_texture_compression_FXT1
1390  * specification and serves as a concept for the encoder.
1391 \***************************************************************************/
1392
1393
1394 /* lookup table for scaling 5 bit colors up to 8 bits */
1395 static const GLubyte _rgb_scale_5[] = {
1396    0,   8,   16,  25,  33,  41,  49,  58,
1397    66,  74,  82,  90,  99,  107, 115, 123,
1398    132, 140, 148, 156, 165, 173, 181, 189,
1399    197, 206, 214, 222, 230, 239, 247, 255
1400 };
1401
1402 /* lookup table for scaling 6 bit colors up to 8 bits */
1403 static const GLubyte _rgb_scale_6[] = {
1404    0,   4,   8,   12,  16,  20,  24,  28,
1405    32,  36,  40,  45,  49,  53,  57,  61,
1406    65,  69,  73,  77,  81,  85,  89,  93,
1407    97,  101, 105, 109, 113, 117, 121, 125,
1408    130, 134, 138, 142, 146, 150, 154, 158,
1409    162, 166, 170, 174, 178, 182, 186, 190,
1410    194, 198, 202, 206, 210, 215, 219, 223,
1411    227, 231, 235, 239, 243, 247, 251, 255
1412 };
1413
1414
1415 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1416 #define UP5(c) _rgb_scale_5[(c) & 31]
1417 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1418 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1419
1420
1421 static void
1422 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1423 {
1424    const GLuint *cc;
1425
1426    t *= 3;
1427    cc = (const GLuint *)(code + t / 8);
1428    t = (cc[0] >> (t & 7)) & 7;
1429
1430    if (t == 7) {
1431       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1432    } else {
1433       GLubyte r, g, b;
1434       cc = (const GLuint *)(code + 12);
1435       if (t == 0) {
1436          b = UP5(CC_SEL(cc, 0));
1437          g = UP5(CC_SEL(cc, 5));
1438          r = UP5(CC_SEL(cc, 10));
1439       } else if (t == 6) {
1440          b = UP5(CC_SEL(cc, 15));
1441          g = UP5(CC_SEL(cc, 20));
1442          r = UP5(CC_SEL(cc, 25));
1443       } else {
1444          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1445          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1446          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1447       }
1448       rgba[RCOMP] = r;
1449       rgba[GCOMP] = g;
1450       rgba[BCOMP] = b;
1451       rgba[ACOMP] = 255;
1452    }
1453 }
1454
1455
1456 static void
1457 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1458 {
1459    const GLuint *cc;
1460    GLuint kk;
1461
1462    cc = (const GLuint *)code;
1463    if (t & 16) {
1464       cc++;
1465       t &= 15;
1466    }
1467    t = (cc[0] >> (t * 2)) & 3;
1468
1469    t *= 15;
1470    cc = (const GLuint *)(code + 8 + t / 8);
1471    kk = cc[0] >> (t & 7);
1472    rgba[BCOMP] = UP5(kk);
1473    rgba[GCOMP] = UP5(kk >> 5);
1474    rgba[RCOMP] = UP5(kk >> 10);
1475    rgba[ACOMP] = 255;
1476 }
1477
1478
1479 static void
1480 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1481 {
1482    const GLuint *cc;
1483    GLuint col[2][3];
1484    GLint glsb, selb;
1485
1486    cc = (const GLuint *)code;
1487    if (t & 16) {
1488       t &= 15;
1489       t = (cc[1] >> (t * 2)) & 3;
1490       /* col 2 */
1491       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1492       col[0][GCOMP] = CC_SEL(cc, 99);
1493       col[0][RCOMP] = CC_SEL(cc, 104);
1494       /* col 3 */
1495       col[1][BCOMP] = CC_SEL(cc, 109);
1496       col[1][GCOMP] = CC_SEL(cc, 114);
1497       col[1][RCOMP] = CC_SEL(cc, 119);
1498       glsb = CC_SEL(cc, 126);
1499       selb = CC_SEL(cc, 33);
1500    } else {
1501       t = (cc[0] >> (t * 2)) & 3;
1502       /* col 0 */
1503       col[0][BCOMP] = CC_SEL(cc, 64);
1504       col[0][GCOMP] = CC_SEL(cc, 69);
1505       col[0][RCOMP] = CC_SEL(cc, 74);
1506       /* col 1 */
1507       col[1][BCOMP] = CC_SEL(cc, 79);
1508       col[1][GCOMP] = CC_SEL(cc, 84);
1509       col[1][RCOMP] = CC_SEL(cc, 89);
1510       glsb = CC_SEL(cc, 125);
1511       selb = CC_SEL(cc, 1);
1512    }
1513
1514    if (CC_SEL(cc, 124) & 1) {
1515       /* alpha[0] == 1 */
1516
1517       if (t == 3) {
1518          /* zero */
1519          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1520       } else {
1521          GLubyte r, g, b;
1522          if (t == 0) {
1523             b = UP5(col[0][BCOMP]);
1524             g = UP5(col[0][GCOMP]);
1525             r = UP5(col[0][RCOMP]);
1526          } else if (t == 2) {
1527             b = UP5(col[1][BCOMP]);
1528             g = UP6(col[1][GCOMP], glsb);
1529             r = UP5(col[1][RCOMP]);
1530          } else {
1531             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1532             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1533             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1534          }
1535          rgba[RCOMP] = r;
1536          rgba[GCOMP] = g;
1537          rgba[BCOMP] = b;
1538          rgba[ACOMP] = 255;
1539       }
1540    } else {
1541       /* alpha[0] == 0 */
1542       GLubyte r, g, b;
1543       if (t == 0) {
1544          b = UP5(col[0][BCOMP]);
1545          g = UP6(col[0][GCOMP], glsb ^ selb);
1546          r = UP5(col[0][RCOMP]);
1547       } else if (t == 3) {
1548          b = UP5(col[1][BCOMP]);
1549          g = UP6(col[1][GCOMP], glsb);
1550          r = UP5(col[1][RCOMP]);
1551       } else {
1552          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1553          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1554                         UP6(col[1][GCOMP], glsb));
1555          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1556       }
1557       rgba[RCOMP] = r;
1558       rgba[GCOMP] = g;
1559       rgba[BCOMP] = b;
1560       rgba[ACOMP] = 255;
1561    }
1562 }
1563
1564
1565 static void
1566 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1567 {
1568    const GLuint *cc;
1569    GLubyte r, g, b, a;
1570
1571    cc = (const GLuint *)code;
1572    if (CC_SEL(cc, 124) & 1) {
1573       /* lerp == 1 */
1574       GLuint col0[4];
1575
1576       if (t & 16) {
1577          t &= 15;
1578          t = (cc[1] >> (t * 2)) & 3;
1579          /* col 2 */
1580          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1581          col0[GCOMP] = CC_SEL(cc, 99);
1582          col0[RCOMP] = CC_SEL(cc, 104);
1583          col0[ACOMP] = CC_SEL(cc, 119);
1584       } else {
1585          t = (cc[0] >> (t * 2)) & 3;
1586          /* col 0 */
1587          col0[BCOMP] = CC_SEL(cc, 64);
1588          col0[GCOMP] = CC_SEL(cc, 69);
1589          col0[RCOMP] = CC_SEL(cc, 74);
1590          col0[ACOMP] = CC_SEL(cc, 109);
1591       }
1592
1593       if (t == 0) {
1594          b = UP5(col0[BCOMP]);
1595          g = UP5(col0[GCOMP]);
1596          r = UP5(col0[RCOMP]);
1597          a = UP5(col0[ACOMP]);
1598       } else if (t == 3) {
1599          b = UP5(CC_SEL(cc, 79));
1600          g = UP5(CC_SEL(cc, 84));
1601          r = UP5(CC_SEL(cc, 89));
1602          a = UP5(CC_SEL(cc, 114));
1603       } else {
1604          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1605          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1606          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1607          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1608       }
1609    } else {
1610       /* lerp == 0 */
1611
1612       if (t & 16) {
1613          cc++;
1614          t &= 15;
1615       }
1616       t = (cc[0] >> (t * 2)) & 3;
1617
1618       if (t == 3) {
1619          /* zero */
1620          r = g = b = a = 0;
1621       } else {
1622          GLuint kk;
1623          cc = (const GLuint *)code;
1624          a = UP5(cc[3] >> (t * 5 + 13));
1625          t *= 15;
1626          cc = (const GLuint *)(code + 8 + t / 8);
1627          kk = cc[0] >> (t & 7);
1628          b = UP5(kk);
1629          g = UP5(kk >> 5);
1630          r = UP5(kk >> 10);
1631       }
1632    }
1633    rgba[RCOMP] = r;
1634    rgba[GCOMP] = g;
1635    rgba[BCOMP] = b;
1636    rgba[ACOMP] = a;
1637 }
1638
1639
1640 void
1641 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1642                GLint i, GLint j, GLubyte *rgba)
1643 {
1644    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1645       fxt1_decode_1HI,     /* cc-high   = "00?" */
1646       fxt1_decode_1HI,     /* cc-high   = "00?" */
1647       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1648       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1649       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1650       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1651       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1652       fxt1_decode_1MIXED   /* mixed     = "1??" */
1653    };
1654
1655    const GLubyte *code = (const GLubyte *)texture +
1656                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1657    GLint mode = CC_SEL(code, 125);
1658    GLint t = i & 7;
1659
1660    if (t & 4) {
1661       t += 12;
1662    }
1663    t += (j & 3) * 4;
1664
1665    decode_1[mode](code, t, rgba);
1666 }
1667
1668
1669 #endif /* FEATURE_texture_fxt1 */