src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42 #include "swrast/s_context.h"
  43
  44
  45 #if FEATURE_texture_fxt1
  46
  47
  48 static void
  49 fxt1_encode (GLuint width, GLuint height, GLint comps,
  50              const void *source, GLint srcRowStride,
  51              void *dest, GLint destRowStride);
  52
  53 void
  54 fxt1_decode_1 (const void *texture, GLint stride,
  55                GLint i, GLint j, GLubyte *rgba);
  56
  57
  58 /**
  59  * Store user's image in rgb_fxt1 format.
  60  */
  61 GLboolean
  62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  63 {
  64    const GLubyte *pixels;
  65    GLint srcRowStride;
  66    GLubyte *dst;
  67    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  68    const GLubyte *tempImage = NULL;
  69
  70    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  71    ASSERT(dstXoffset % 8 == 0);
  72    ASSERT(dstYoffset % 4 == 0);
  73    ASSERT(dstZoffset     == 0);
  74    (void) dstZoffset;
  75    (void) dstImageOffsets;
  76
  77    if (srcFormat != GL_RGB ||
  78        srcType != GL_UNSIGNED_BYTE ||
  79        ctx->_ImageTransferState ||
  80        srcPacking->SwapBytes) {
  81       /* convert image to RGB/GLubyte */
  82       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  83                                              baseInternalFormat,
  84                                              _mesa_get_format_base_format(dstFormat),
  85                                              srcWidth, srcHeight, srcDepth,
  86                                              srcFormat, srcType, srcAddr,
  87                                              srcPacking);
  88       if (!tempImage)
  89          return GL_FALSE; /* out of memory */
  90       pixels = tempImage;
  91       srcRowStride = 3 * srcWidth;
  92       srcFormat = GL_RGB;
  93    }
  94    else {
  95       pixels = (const GLubyte *) srcAddr;
  96       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  97                                             srcType) / sizeof(GLubyte);
  98    }
  99
 100    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 101                                         dstFormat,
 102                                         texWidth, (GLubyte *) dstAddr);
 103
 104    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 105                dst, dstRowStride);
 106
 107    if (tempImage)
 108       free((void*) tempImage);
 109
 110    return GL_TRUE;
 111 }
 112
 113
 114 /**
 115  * Store user's image in rgba_fxt1 format.
 116  */
 117 GLboolean
 118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 119 {
 120    const GLubyte *pixels;
 121    GLint srcRowStride;
 122    GLubyte *dst;
 123    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 124    const GLubyte *tempImage = NULL;
 125
 126    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 127    ASSERT(dstXoffset % 8 == 0);
 128    ASSERT(dstYoffset % 4 == 0);
 129    ASSERT(dstZoffset     == 0);
 130    (void) dstZoffset;
 131    (void) dstImageOffsets;
 132
 133    if (srcFormat != GL_RGBA ||
 134        srcType != GL_UNSIGNED_BYTE ||
 135        ctx->_ImageTransferState ||
 136        srcPacking->SwapBytes) {
 137       /* convert image to RGBA/GLubyte */
 138       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 139                                              baseInternalFormat,
 140                                              _mesa_get_format_base_format(dstFormat),
 141                                              srcWidth, srcHeight, srcDepth,
 142                                              srcFormat, srcType, srcAddr,
 143                                              srcPacking);
 144       if (!tempImage)
 145          return GL_FALSE; /* out of memory */
 146       pixels = tempImage;
 147       srcRowStride = 4 * srcWidth;
 148       srcFormat = GL_RGBA;
 149    }
 150    else {
 151       pixels = (const GLubyte *) srcAddr;
 152       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 153                                             srcType) / sizeof(GLubyte);
 154    }
 155
 156    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 157                                         dstFormat,
 158                                         texWidth, (GLubyte *) dstAddr);
 159
 160    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 161                dst, dstRowStride);
 162
 163    if (tempImage)
 164       free((void*) tempImage);
 165
 166    return GL_TRUE;
 167 }
 168
 169
 170 void
 171 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
 172                                   GLint i, GLint j, GLint k, GLfloat *texel )
 173 {
 174    /* just sample as GLubyte and convert to float here */
 175    GLubyte rgba[4];
 176    (void) k;
 177    fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
 178    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 179    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 180    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 181    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
 182 }
 183
 184
 185 void
 186 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
 187                                  GLint i, GLint j, GLint k, GLfloat *texel )
 188 {
 189    /* just sample as GLubyte and convert to float here */
 190    GLubyte rgba[4];
 191    (void) k;
 192    fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
 193    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 194    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 195    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 196    texel[ACOMP] = 1.0F;
 197 }
 198
 199
 200
 201 /***************************************************************************\
 202  * FXT1 encoder
 203  *
 204  * The encoder was built by reversing the decoder,
 205  * and is vaguely based on Texus2 by 3dfx. Note that this code
 206  * is merely a proof of concept, since it is highly UNoptimized;
 207  * moreover, it is sub-optimal due to initial conditions passed
 208  * to Lloyd's algorithm (the interpolation modes are even worse).
 209 \***************************************************************************/
 210
 211
 212 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 213 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 214 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 215 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 216 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 217 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 218 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 219 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 220
 221
 222 /*
 223  * Define a 64-bit unsigned integer type and macros
 224  */
 225 #if 1
 226
 227 #define FX64_NATIVE 1
 228
 229 typedef uint64_t Fx64;
 230
 231 #define FX64_MOV32(a, b) a = b
 232 #define FX64_OR32(a, b)  a |= b
 233 #define FX64_SHL(a, c)   a <<= c
 234
 235 #else
 236
 237 #define FX64_NATIVE 0
 238
 239 typedef struct {
 240    GLuint lo, hi;
 241 } Fx64;
 242
 243 #define FX64_MOV32(a, b) a.lo = b
 244 #define FX64_OR32(a, b)  a.lo |= b
 245
 246 #define FX64_SHL(a, c)                                 \
 247    do {                                                \
 248        if ((c) >= 32) {                                \
 249           a.hi = a.lo << ((c) - 32);                   \
 250           a.lo = 0;                                    \
 251        } else {                                        \
 252           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 253           a.lo <<= (c);                                \
 254        }                                               \
 255    } while (0)
 256
 257 #endif
 258
 259
 260 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 261 #define SAFECDOT 1 /* for paranoids */
 262
 263 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 264    do {                                  \
 265       /* compute interpolation vector */ \
 266       GLfloat d2 = 0.0F;                 \
 267       GLfloat rd2;                       \
 268                                          \
 269       for (i = 0; i < NC; i++) {         \
 270          IV[i] = (V1[i] - V0[i]) * F(i); \
 271          d2 += IV[i] * IV[i];            \
 272       }                                  \
 273       rd2 = (GLfloat)NV / d2;            \
 274       B = 0;                             \
 275       for (i = 0; i < NC; i++) {         \
 276          IV[i] *= F(i);                  \
 277          B -= IV[i] * V0[i];             \
 278          IV[i] *= rd2;                   \
 279       }                                  \
 280       B = B * rd2 + 0.5f;                \
 281    } while (0)
 282
 283 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 284    do {                                  \
 285       GLfloat dot = 0.0F;                \
 286       for (i = 0; i < NC; i++) {         \
 287          dot += V[i] * IV[i];            \
 288       }                                  \
 289       TEXEL = (GLint)(dot + B);          \
 290       if (SAFECDOT) {                    \
 291          if (TEXEL < 0) {                \
 292             TEXEL = 0;                   \
 293          } else if (TEXEL > NV) {        \
 294             TEXEL = NV;                  \
 295          }                               \
 296       }                                  \
 297    } while (0)
 298
 299
 300 static GLint
 301 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 302               GLubyte input[MAX_COMP], GLint nc)
 303 {
 304    GLint i, j, best = -1;
 305    GLfloat err = 1e9; /* big enough */
 306
 307    for (j = 0; j < nv; j++) {
 308       GLfloat e = 0.0F;
 309       for (i = 0; i < nc; i++) {
 310          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 311       }
 312       if (e < err) {
 313          err = e;
 314          best = j;
 315       }
 316    }
 317
 318    return best;
 319 }
 320
 321
 322 static GLint
 323 fxt1_worst (GLfloat vec[MAX_COMP],
 324             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 325 {
 326    GLint i, k, worst = -1;
 327    GLfloat err = -1.0F; /* small enough */
 328
 329    for (k = 0; k < n; k++) {
 330       GLfloat e = 0.0F;
 331       for (i = 0; i < nc; i++) {
 332          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 333       }
 334       if (e > err) {
 335          err = e;
 336          worst = k;
 337       }
 338    }
 339
 340    return worst;
 341 }
 342
 343
 344 static GLint
 345 fxt1_variance (GLdouble variance[MAX_COMP],
 346                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 347 {
 348    GLint i, k, best = 0;
 349    GLint sx, sx2;
 350    GLdouble var, maxvar = -1; /* small enough */
 351    GLdouble teenth = 1.0 / n;
 352
 353    for (i = 0; i < nc; i++) {
 354       sx = sx2 = 0;
 355       for (k = 0; k < n; k++) {
 356          GLint t = input[k][i];
 357          sx += t;
 358          sx2 += t * t;
 359       }
 360       var = sx2 * teenth - sx * sx * teenth * teenth;
 361       if (maxvar < var) {
 362          maxvar = var;
 363          best = i;
 364       }
 365       if (variance) {
 366          variance[i] = var;
 367       }
 368    }
 369
 370    return best;
 371 }
 372
 373
 374 static GLint
 375 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 376              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 377 {
 378 #if 0
 379    /* Choose colors from a grid.
 380     */
 381    GLint i, j;
 382
 383    for (j = 0; j < nv; j++) {
 384       GLint m = j * (n - 1) / (nv - 1);
 385       for (i = 0; i < nc; i++) {
 386          vec[j][i] = input[m][i];
 387       }
 388    }
 389 #else
 390    /* Our solution here is to find the darkest and brightest colors in
 391     * the 8x4 tile and use those as the two representative colors.
 392     * There are probably better algorithms to use (histogram-based).
 393     */
 394    GLint i, j, k;
 395    GLint minSum = 2000; /* big enough */
 396    GLint maxSum = -1; /* small enough */
 397    GLint minCol = 0; /* phoudoin: silent compiler! */
 398    GLint maxCol = 0; /* phoudoin: silent compiler! */
 399
 400    struct {
 401       GLint flag;
 402       GLint key;
 403       GLint freq;
 404       GLint idx;
 405    } hist[N_TEXELS];
 406    GLint lenh = 0;
 407
 408    memset(hist, 0, sizeof(hist));
 409
 410    for (k = 0; k < n; k++) {
 411       GLint l;
 412       GLint key = 0;
 413       GLint sum = 0;
 414       for (i = 0; i < nc; i++) {
 415          key <<= 8;
 416          key |= input[k][i];
 417          sum += input[k][i];
 418       }
 419       for (l = 0; l < n; l++) {
 420          if (!hist[l].flag) {
 421             /* alloc new slot */
 422             hist[l].flag = !0;
 423             hist[l].key = key;
 424             hist[l].freq = 1;
 425             hist[l].idx = k;
 426             lenh = l + 1;
 427             break;
 428          } else if (hist[l].key == key) {
 429             hist[l].freq++;
 430             break;
 431          }
 432       }
 433       if (minSum > sum) {
 434          minSum = sum;
 435          minCol = k;
 436       }
 437       if (maxSum < sum) {
 438          maxSum = sum;
 439          maxCol = k;
 440       }
 441    }
 442
 443    if (lenh <= nv) {
 444       for (j = 0; j < lenh; j++) {
 445          for (i = 0; i < nc; i++) {
 446             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 447          }
 448       }
 449       for (; j < nv; j++) {
 450          for (i = 0; i < nc; i++) {
 451             vec[j][i] = vec[0][i];
 452          }
 453       }
 454       return 0;
 455    }
 456
 457    for (j = 0; j < nv; j++) {
 458       for (i = 0; i < nc; i++) {
 459          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 460       }
 461    }
 462 #endif
 463
 464    return !0;
 465 }
 466
 467
 468 static GLint
 469 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 470             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 471 {
 472    /* Use the generalized lloyd's algorithm for VQ:
 473     *     find 4 color vectors.
 474     *
 475     *     for each sample color
 476     *         sort to nearest vector.
 477     *
 478     *     replace each vector with the centroid of its matching colors.
 479     *
 480     *     repeat until RMS doesn't improve.
 481     *
 482     *     if a color vector has no samples, or becomes the same as another
 483     *     vector, replace it with the color which is farthest from a sample.
 484     *
 485     * vec[][MAX_COMP]           initial vectors and resulting colors
 486     * nv                        number of resulting colors required
 487     * input[N_TEXELS][MAX_COMP] input texels
 488     * nc                        number of components in input / vec
 489     * n                         number of input samples
 490     */
 491
 492    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 493    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 494    GLfloat error, lasterror = 1e9;
 495
 496    GLint i, j, k, rep;
 497
 498    /* the quantizer */
 499    for (rep = 0; rep < LL_N_REP; rep++) {
 500       /* reset sums & counters */
 501       for (j = 0; j < nv; j++) {
 502          for (i = 0; i < nc; i++) {
 503             sum[j][i] = 0;
 504          }
 505          cnt[j] = 0;
 506       }
 507       error = 0;
 508
 509       /* scan whole block */
 510       for (k = 0; k < n; k++) {
 511 #if 1
 512          GLint best = -1;
 513          GLfloat err = 1e9; /* big enough */
 514          /* determine best vector */
 515          for (j = 0; j < nv; j++) {
 516             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 517                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 518                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 519             if (nc == 4) {
 520                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 521             }
 522             if (e < err) {
 523                err = e;
 524                best = j;
 525             }
 526          }
 527 #else
 528          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 529 #endif
 530          assert(best >= 0);
 531          /* add in closest color */
 532          for (i = 0; i < nc; i++) {
 533             sum[best][i] += input[k][i];
 534          }
 535          /* mark this vector as used */
 536          cnt[best]++;
 537          /* accumulate error */
 538          error += err;
 539       }
 540
 541       /* check RMS */
 542       if ((error < LL_RMS_E) ||
 543           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 544          return !0; /* good match */
 545       }
 546       lasterror = error;
 547
 548       /* move each vector to the barycenter of its closest colors */
 549       for (j = 0; j < nv; j++) {
 550          if (cnt[j]) {
 551             GLfloat div = 1.0F / cnt[j];
 552             for (i = 0; i < nc; i++) {
 553                vec[j][i] = div * sum[j][i];
 554             }
 555          } else {
 556             /* this vec has no samples or is identical with a previous vec */
 557             GLint worst = fxt1_worst(vec[j], input, nc, n);
 558             for (i = 0; i < nc; i++) {
 559                vec[j][i] = input[worst][i];
 560             }
 561          }
 562       }
 563    }
 564
 565    return 0; /* could not converge fast enough */
 566 }
 567
 568
 569 static void
 570 fxt1_quantize_CHROMA (GLuint *cc,
 571                       GLubyte input[N_TEXELS][MAX_COMP])
 572 {
 573    const GLint n_vect = 4; /* 4 base vectors to find */
 574    const GLint n_comp = 3; /* 3 components: R, G, B */
 575    GLfloat vec[MAX_VECT][MAX_COMP];
 576    GLint i, j, k;
 577    Fx64 hi; /* high quadword */
 578    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 579
 580    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 581       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 582    }
 583
 584    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 585    for (j = n_vect - 1; j >= 0; j--) {
 586       for (i = 0; i < n_comp; i++) {
 587          /* add in colors */
 588          FX64_SHL(hi, 5);
 589          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 590       }
 591    }
 592    ((Fx64 *)cc)[1] = hi;
 593
 594    lohi = lolo = 0;
 595    /* right microtile */
 596    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 597       lohi <<= 2;
 598       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 599    }
 600    /* left microtile */
 601    for (; k >= 0; k--) {
 602       lolo <<= 2;
 603       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 604    }
 605    cc[1] = lohi;
 606    cc[0] = lolo;
 607 }
 608
 609
 610 static void
 611 fxt1_quantize_ALPHA0 (GLuint *cc,
 612                       GLubyte input[N_TEXELS][MAX_COMP],
 613                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 614 {
 615    const GLint n_vect = 3; /* 3 base vectors to find */
 616    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 617    GLfloat vec[MAX_VECT][MAX_COMP];
 618    GLint i, j, k;
 619    Fx64 hi; /* high quadword */
 620    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 621
 622    /* the last vector indicates zero */
 623    for (i = 0; i < n_comp; i++) {
 624       vec[n_vect][i] = 0;
 625    }
 626
 627    /* the first n texels in reord are guaranteed to be non-zero */
 628    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 629       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 630    }
 631
 632    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 633    for (j = n_vect - 1; j >= 0; j--) {
 634       /* add in alphas */
 635       FX64_SHL(hi, 5);
 636       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 637    }
 638    for (j = n_vect - 1; j >= 0; j--) {
 639       for (i = 0; i < n_comp - 1; i++) {
 640          /* add in colors */
 641          FX64_SHL(hi, 5);
 642          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 643       }
 644    }
 645    ((Fx64 *)cc)[1] = hi;
 646
 647    lohi = lolo = 0;
 648    /* right microtile */
 649    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 650       lohi <<= 2;
 651       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 652    }
 653    /* left microtile */
 654    for (; k >= 0; k--) {
 655       lolo <<= 2;
 656       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 657    }
 658    cc[1] = lohi;
 659    cc[0] = lolo;
 660 }
 661
 662
 663 static void
 664 fxt1_quantize_ALPHA1 (GLuint *cc,
 665                       GLubyte input[N_TEXELS][MAX_COMP])
 666 {
 667    const GLint n_vect = 3; /* highest vector number in each microtile */
 668    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 669    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 670    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 671    GLint i, j, k;
 672    Fx64 hi; /* high quadword */
 673    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 674
 675    GLint minSum;
 676    GLint maxSum;
 677    GLint minColL = 0, maxColL = 0;
 678    GLint minColR = 0, maxColR = 0;
 679    GLint sumL = 0, sumR = 0;
 680    GLint nn_comp;
 681    /* Our solution here is to find the darkest and brightest colors in
 682     * the 4x4 tile and use those as the two representative colors.
 683     * There are probably better algorithms to use (histogram-based).
 684     */
 685    nn_comp = n_comp;
 686    while ((minColL == maxColL) && nn_comp) {
 687        minSum = 2000; /* big enough */
 688        maxSum = -1; /* small enough */
 689        for (k = 0; k < N_TEXELS / 2; k++) {
 690            GLint sum = 0;
 691            for (i = 0; i < nn_comp; i++) {
 692                sum += input[k][i];
 693            }
 694            if (minSum > sum) {
 695                minSum = sum;
 696                minColL = k;
 697            }
 698            if (maxSum < sum) {
 699                maxSum = sum;
 700                maxColL = k;
 701            }
 702            sumL += sum;
 703        }
 704
 705        nn_comp--;
 706    }
 707
 708    nn_comp = n_comp;
 709    while ((minColR == maxColR) && nn_comp) {
 710        minSum = 2000; /* big enough */
 711        maxSum = -1; /* small enough */
 712        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 713            GLint sum = 0;
 714            for (i = 0; i < nn_comp; i++) {
 715                sum += input[k][i];
 716            }
 717            if (minSum > sum) {
 718                minSum = sum;
 719                minColR = k;
 720            }
 721            if (maxSum < sum) {
 722                maxSum = sum;
 723                maxColR = k;
 724            }
 725            sumR += sum;
 726        }
 727
 728        nn_comp--;
 729    }
 730
 731    /* choose the common vector (yuck!) */
 732    {
 733       GLint j1, j2;
 734       GLint v1 = 0, v2 = 0;
 735       GLfloat err = 1e9; /* big enough */
 736       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 737       for (i = 0; i < n_comp; i++) {
 738          tv[0][i] = input[minColL][i];
 739          tv[1][i] = input[maxColL][i];
 740          tv[2][i] = input[minColR][i];
 741          tv[3][i] = input[maxColR][i];
 742       }
 743       for (j1 = 0; j1 < 2; j1++) {
 744          for (j2 = 2; j2 < 4; j2++) {
 745             GLfloat e = 0.0F;
 746             for (i = 0; i < n_comp; i++) {
 747                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 748             }
 749             if (e < err) {
 750                err = e;
 751                v1 = j1;
 752                v2 = j2;
 753             }
 754          }
 755       }
 756       for (i = 0; i < n_comp; i++) {
 757          vec[0][i] = tv[1 - v1][i];
 758          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 759          vec[2][i] = tv[5 - v2][i];
 760       }
 761    }
 762
 763    /* left microtile */
 764    cc[0] = 0;
 765    if (minColL != maxColL) {
 766       /* compute interpolation vector */
 767       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 768
 769       /* add in texels */
 770       lolo = 0;
 771       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 772          GLint texel;
 773          /* interpolate color */
 774          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 775          /* add in texel */
 776          lolo <<= 2;
 777          lolo |= texel;
 778       }
 779
 780       cc[0] = lolo;
 781    }
 782
 783    /* right microtile */
 784    cc[1] = 0;
 785    if (minColR != maxColR) {
 786       /* compute interpolation vector */
 787       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 788
 789       /* add in texels */
 790       lohi = 0;
 791       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 792          GLint texel;
 793          /* interpolate color */
 794          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 795          /* add in texel */
 796          lohi <<= 2;
 797          lohi |= texel;
 798       }
 799
 800       cc[1] = lohi;
 801    }
 802
 803    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 804    for (j = n_vect - 1; j >= 0; j--) {
 805       /* add in alphas */
 806       FX64_SHL(hi, 5);
 807       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 808    }
 809    for (j = n_vect - 1; j >= 0; j--) {
 810       for (i = 0; i < n_comp - 1; i++) {
 811          /* add in colors */
 812          FX64_SHL(hi, 5);
 813          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 814       }
 815    }
 816    ((Fx64 *)cc)[1] = hi;
 817 }
 818
 819
 820 static void
 821 fxt1_quantize_HI (GLuint *cc,
 822                   GLubyte input[N_TEXELS][MAX_COMP],
 823                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 824 {
 825    const GLint n_vect = 6; /* highest vector number */
 826    const GLint n_comp = 3; /* 3 components: R, G, B */
 827    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 828    GLfloat iv[MAX_COMP];   /* interpolation vector */
 829    GLint i, k;
 830    GLuint hihi; /* high quadword: hi dword */
 831
 832    GLint minSum = 2000; /* big enough */
 833    GLint maxSum = -1; /* small enough */
 834    GLint minCol = 0; /* phoudoin: silent compiler! */
 835    GLint maxCol = 0; /* phoudoin: silent compiler! */
 836
 837    /* Our solution here is to find the darkest and brightest colors in
 838     * the 8x4 tile and use those as the two representative colors.
 839     * There are probably better algorithms to use (histogram-based).
 840     */
 841    for (k = 0; k < n; k++) {
 842       GLint sum = 0;
 843       for (i = 0; i < n_comp; i++) {
 844          sum += reord[k][i];
 845       }
 846       if (minSum > sum) {
 847          minSum = sum;
 848          minCol = k;
 849       }
 850       if (maxSum < sum) {
 851          maxSum = sum;
 852          maxCol = k;
 853       }
 854    }
 855
 856    hihi = 0; /* cc-hi = "00" */
 857    for (i = 0; i < n_comp; i++) {
 858       /* add in colors */
 859       hihi <<= 5;
 860       hihi |= reord[maxCol][i] >> 3;
 861    }
 862    for (i = 0; i < n_comp; i++) {
 863       /* add in colors */
 864       hihi <<= 5;
 865       hihi |= reord[minCol][i] >> 3;
 866    }
 867    cc[3] = hihi;
 868    cc[0] = cc[1] = cc[2] = 0;
 869
 870    /* compute interpolation vector */
 871    if (minCol != maxCol) {
 872       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 873    }
 874
 875    /* add in texels */
 876    for (k = N_TEXELS - 1; k >= 0; k--) {
 877       GLint t = k * 3;
 878       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 879       GLint texel = n_vect + 1; /* transparent black */
 880
 881       if (!ISTBLACK(input[k])) {
 882          if (minCol != maxCol) {
 883             /* interpolate color */
 884             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 885             /* add in texel */
 886             kk[0] |= texel << (t & 7);
 887          }
 888       } else {
 889          /* add in texel */
 890          kk[0] |= texel << (t & 7);
 891       }
 892    }
 893 }
 894
 895
 896 static void
 897 fxt1_quantize_MIXED1 (GLuint *cc,
 898                       GLubyte input[N_TEXELS][MAX_COMP])
 899 {
 900    const GLint n_vect = 2; /* highest vector number in each microtile */
 901    const GLint n_comp = 3; /* 3 components: R, G, B */
 902    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 903    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 904    GLint i, j, k;
 905    Fx64 hi; /* high quadword */
 906    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 907
 908    GLint minSum;
 909    GLint maxSum;
 910    GLint minColL = 0, maxColL = -1;
 911    GLint minColR = 0, maxColR = -1;
 912
 913    /* Our solution here is to find the darkest and brightest colors in
 914     * the 4x4 tile and use those as the two representative colors.
 915     * There are probably better algorithms to use (histogram-based).
 916     */
 917    minSum = 2000; /* big enough */
 918    maxSum = -1; /* small enough */
 919    for (k = 0; k < N_TEXELS / 2; k++) {
 920       if (!ISTBLACK(input[k])) {
 921          GLint sum = 0;
 922          for (i = 0; i < n_comp; i++) {
 923             sum += input[k][i];
 924          }
 925          if (minSum > sum) {
 926             minSum = sum;
 927             minColL = k;
 928          }
 929          if (maxSum < sum) {
 930             maxSum = sum;
 931             maxColL = k;
 932          }
 933       }
 934    }
 935    minSum = 2000; /* big enough */
 936    maxSum = -1; /* small enough */
 937    for (; k < N_TEXELS; k++) {
 938       if (!ISTBLACK(input[k])) {
 939          GLint sum = 0;
 940          for (i = 0; i < n_comp; i++) {
 941             sum += input[k][i];
 942          }
 943          if (minSum > sum) {
 944             minSum = sum;
 945             minColR = k;
 946          }
 947          if (maxSum < sum) {
 948             maxSum = sum;
 949             maxColR = k;
 950          }
 951       }
 952    }
 953
 954    /* left microtile */
 955    if (maxColL == -1) {
 956       /* all transparent black */
 957       cc[0] = ~0u;
 958       for (i = 0; i < n_comp; i++) {
 959          vec[0][i] = 0;
 960          vec[1][i] = 0;
 961       }
 962    } else {
 963       cc[0] = 0;
 964       for (i = 0; i < n_comp; i++) {
 965          vec[0][i] = input[minColL][i];
 966          vec[1][i] = input[maxColL][i];
 967       }
 968       if (minColL != maxColL) {
 969          /* compute interpolation vector */
 970          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 971
 972          /* add in texels */
 973          lolo = 0;
 974          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 975             GLint texel = n_vect + 1; /* transparent black */
 976             if (!ISTBLACK(input[k])) {
 977                /* interpolate color */
 978                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 979             }
 980             /* add in texel */
 981             lolo <<= 2;
 982             lolo |= texel;
 983          }
 984          cc[0] = lolo;
 985       }
 986    }
 987
 988    /* right microtile */
 989    if (maxColR == -1) {
 990       /* all transparent black */
 991       cc[1] = ~0u;
 992       for (i = 0; i < n_comp; i++) {
 993          vec[2][i] = 0;
 994          vec[3][i] = 0;
 995       }
 996    } else {
 997       cc[1] = 0;
 998       for (i = 0; i < n_comp; i++) {
 999          vec[2][i] = input[minColR][i];
1000          vec[3][i] = input[maxColR][i];
1001       }
1002       if (minColR != maxColR) {
1003          /* compute interpolation vector */
1004          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1005
1006          /* add in texels */
1007          lohi = 0;
1008          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1009             GLint texel = n_vect + 1; /* transparent black */
1010             if (!ISTBLACK(input[k])) {
1011                /* interpolate color */
1012                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1013             }
1014             /* add in texel */
1015             lohi <<= 2;
1016             lohi |= texel;
1017          }
1018          cc[1] = lohi;
1019       }
1020    }
1021
1022    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1023    for (j = 2 * 2 - 1; j >= 0; j--) {
1024       for (i = 0; i < n_comp; i++) {
1025          /* add in colors */
1026          FX64_SHL(hi, 5);
1027          FX64_OR32(hi, vec[j][i] >> 3);
1028       }
1029    }
1030    ((Fx64 *)cc)[1] = hi;
1031 }
1032
1033
1034 static void
1035 fxt1_quantize_MIXED0 (GLuint *cc,
1036                       GLubyte input[N_TEXELS][MAX_COMP])
1037 {
1038    const GLint n_vect = 3; /* highest vector number in each microtile */
1039    const GLint n_comp = 3; /* 3 components: R, G, B */
1040    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1041    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1042    GLint i, j, k;
1043    Fx64 hi; /* high quadword */
1044    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1045
1046    GLint minColL = 0, maxColL = 0;
1047    GLint minColR = 0, maxColR = 0;
1048 #if 0
1049    GLint minSum;
1050    GLint maxSum;
1051
1052    /* Our solution here is to find the darkest and brightest colors in
1053     * the 4x4 tile and use those as the two representative colors.
1054     * There are probably better algorithms to use (histogram-based).
1055     */
1056    minSum = 2000; /* big enough */
1057    maxSum = -1; /* small enough */
1058    for (k = 0; k < N_TEXELS / 2; k++) {
1059       GLint sum = 0;
1060       for (i = 0; i < n_comp; i++) {
1061          sum += input[k][i];
1062       }
1063       if (minSum > sum) {
1064          minSum = sum;
1065          minColL = k;
1066       }
1067       if (maxSum < sum) {
1068          maxSum = sum;
1069          maxColL = k;
1070       }
1071    }
1072    minSum = 2000; /* big enough */
1073    maxSum = -1; /* small enough */
1074    for (; k < N_TEXELS; k++) {
1075       GLint sum = 0;
1076       for (i = 0; i < n_comp; i++) {
1077          sum += input[k][i];
1078       }
1079       if (minSum > sum) {
1080          minSum = sum;
1081          minColR = k;
1082       }
1083       if (maxSum < sum) {
1084          maxSum = sum;
1085          maxColR = k;
1086       }
1087    }
1088 #else
1089    GLint minVal;
1090    GLint maxVal;
1091    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1092    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1093
1094    /* Scan the channel with max variance for lo & hi
1095     * and use those as the two representative colors.
1096     */
1097    minVal = 2000; /* big enough */
1098    maxVal = -1; /* small enough */
1099    for (k = 0; k < N_TEXELS / 2; k++) {
1100       GLint t = input[k][maxVarL];
1101       if (minVal > t) {
1102          minVal = t;
1103          minColL = k;
1104       }
1105       if (maxVal < t) {
1106          maxVal = t;
1107          maxColL = k;
1108       }
1109    }
1110    minVal = 2000; /* big enough */
1111    maxVal = -1; /* small enough */
1112    for (; k < N_TEXELS; k++) {
1113       GLint t = input[k][maxVarR];
1114       if (minVal > t) {
1115          minVal = t;
1116          minColR = k;
1117       }
1118       if (maxVal < t) {
1119          maxVal = t;
1120          maxColR = k;
1121       }
1122    }
1123 #endif
1124
1125    /* left microtile */
1126    cc[0] = 0;
1127    for (i = 0; i < n_comp; i++) {
1128       vec[0][i] = input[minColL][i];
1129       vec[1][i] = input[maxColL][i];
1130    }
1131    if (minColL != maxColL) {
1132       /* compute interpolation vector */
1133       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1134
1135       /* add in texels */
1136       lolo = 0;
1137       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1138          GLint texel;
1139          /* interpolate color */
1140          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1141          /* add in texel */
1142          lolo <<= 2;
1143          lolo |= texel;
1144       }
1145
1146       /* funky encoding for LSB of green */
1147       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1148          for (i = 0; i < n_comp; i++) {
1149             vec[1][i] = input[minColL][i];
1150             vec[0][i] = input[maxColL][i];
1151          }
1152          lolo = ~lolo;
1153       }
1154
1155       cc[0] = lolo;
1156    }
1157
1158    /* right microtile */
1159    cc[1] = 0;
1160    for (i = 0; i < n_comp; i++) {
1161       vec[2][i] = input[minColR][i];
1162       vec[3][i] = input[maxColR][i];
1163    }
1164    if (minColR != maxColR) {
1165       /* compute interpolation vector */
1166       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1167
1168       /* add in texels */
1169       lohi = 0;
1170       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1171          GLint texel;
1172          /* interpolate color */
1173          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1174          /* add in texel */
1175          lohi <<= 2;
1176          lohi |= texel;
1177       }
1178
1179       /* funky encoding for LSB of green */
1180       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1181          for (i = 0; i < n_comp; i++) {
1182             vec[3][i] = input[minColR][i];
1183             vec[2][i] = input[maxColR][i];
1184          }
1185          lohi = ~lohi;
1186       }
1187
1188       cc[1] = lohi;
1189    }
1190
1191    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1192    for (j = 2 * 2 - 1; j >= 0; j--) {
1193       for (i = 0; i < n_comp; i++) {
1194          /* add in colors */
1195          FX64_SHL(hi, 5);
1196          FX64_OR32(hi, vec[j][i] >> 3);
1197       }
1198    }
1199    ((Fx64 *)cc)[1] = hi;
1200 }
1201
1202
1203 static void
1204 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1205 {
1206    GLint trualpha;
1207    GLubyte reord[N_TEXELS][MAX_COMP];
1208
1209    GLubyte input[N_TEXELS][MAX_COMP];
1210    GLint i, k, l;
1211
1212    if (comps == 3) {
1213       /* make the whole block opaque */
1214       memset(input, -1, sizeof(input));
1215    }
1216
1217    /* 8 texels each line */
1218    for (l = 0; l < 4; l++) {
1219       for (k = 0; k < 4; k++) {
1220          for (i = 0; i < comps; i++) {
1221             input[k + l * 4][i] = *lines[l]++;
1222          }
1223       }
1224       for (; k < 8; k++) {
1225          for (i = 0; i < comps; i++) {
1226             input[k + l * 4 + 12][i] = *lines[l]++;
1227          }
1228       }
1229    }
1230
1231    /* block layout:
1232     * 00, 01, 02, 03, 08, 09, 0a, 0b
1233     * 10, 11, 12, 13, 18, 19, 1a, 1b
1234     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1235     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1236     */
1237
1238    /* [dBorca]
1239     * stupidity flows forth from this
1240     */
1241    l = N_TEXELS;
1242    trualpha = 0;
1243    if (comps == 4) {
1244       /* skip all transparent black texels */
1245       l = 0;
1246       for (k = 0; k < N_TEXELS; k++) {
1247          /* test all components against 0 */
1248          if (!ISTBLACK(input[k])) {
1249             /* texel is not transparent black */
1250             COPY_4UBV(reord[l], input[k]);
1251             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1252                /* non-opaque texel */
1253                trualpha = !0;
1254             }
1255             l++;
1256          }
1257       }
1258    }
1259
1260 #if 0
1261    if (trualpha) {
1262       fxt1_quantize_ALPHA0(cc, input, reord, l);
1263    } else if (l == 0) {
1264       cc[0] = cc[1] = cc[2] = -1;
1265       cc[3] = 0;
1266    } else if (l < N_TEXELS) {
1267       fxt1_quantize_HI(cc, input, reord, l);
1268    } else {
1269       fxt1_quantize_CHROMA(cc, input);
1270    }
1271    (void)fxt1_quantize_ALPHA1;
1272    (void)fxt1_quantize_MIXED1;
1273    (void)fxt1_quantize_MIXED0;
1274 #else
1275    if (trualpha) {
1276       fxt1_quantize_ALPHA1(cc, input);
1277    } else if (l == 0) {
1278       cc[0] = cc[1] = cc[2] = ~0u;
1279       cc[3] = 0;
1280    } else if (l < N_TEXELS) {
1281       fxt1_quantize_MIXED1(cc, input);
1282    } else {
1283       fxt1_quantize_MIXED0(cc, input);
1284    }
1285    (void)fxt1_quantize_ALPHA0;
1286    (void)fxt1_quantize_HI;
1287    (void)fxt1_quantize_CHROMA;
1288 #endif
1289 }
1290
1291
1292
1293 /**
1294  * Upscale an image by replication, not (typical) stretching.
1295  * We use this when the image width or height is less than a
1296  * certain size (4, 8) and we need to upscale an image.
1297  */
1298 static void
1299 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1300                    GLsizei outWidth, GLsizei outHeight,
1301                    GLint comps, const GLubyte *src, GLint srcRowStride,
1302                    GLubyte *dest )
1303 {
1304    GLint i, j, k;
1305
1306    ASSERT(outWidth >= inWidth);
1307    ASSERT(outHeight >= inHeight);
1308 #if 0
1309    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1310    ASSERT((outWidth & 3) == 0);
1311    ASSERT((outHeight & 3) == 0);
1312 #endif
1313
1314    for (i = 0; i < outHeight; i++) {
1315       const GLint ii = i % inHeight;
1316       for (j = 0; j < outWidth; j++) {
1317          const GLint jj = j % inWidth;
1318          for (k = 0; k < comps; k++) {
1319             dest[(i * outWidth + j) * comps + k]
1320                = src[ii * srcRowStride + jj * comps + k];
1321          }
1322       }
1323    }
1324 }
1325
1326
1327 static void
1328 fxt1_encode (GLuint width, GLuint height, GLint comps,
1329              const void *source, GLint srcRowStride,
1330              void *dest, GLint destRowStride)
1331 {
1332    GLuint x, y;
1333    const GLubyte *data;
1334    GLuint *encoded = (GLuint *)dest;
1335    void *newSource = NULL;
1336
1337    assert(comps == 3 || comps == 4);
1338
1339    /* Replicate image if width is not M8 or height is not M4 */
1340    if ((width & 7) | (height & 3)) {
1341       GLint newWidth = (width + 7) & ~7;
1342       GLint newHeight = (height + 3) & ~3;
1343       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1344       if (!newSource) {
1345          GET_CURRENT_CONTEXT(ctx);
1346          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1347          goto cleanUp;
1348       }
1349       upscale_teximage2d(width, height, newWidth, newHeight,
1350                          comps, (const GLubyte *) source,
1351                          srcRowStride, (GLubyte *) newSource);
1352       source = newSource;
1353       width = newWidth;
1354       height = newHeight;
1355       srcRowStride = comps * newWidth;
1356    }
1357
1358    data = (const GLubyte *) source;
1359    destRowStride = (destRowStride - width * 2) / 4;
1360    for (y = 0; y < height; y += 4) {
1361       GLuint offs = 0 + (y + 0) * srcRowStride;
1362       for (x = 0; x < width; x += 8) {
1363          const GLubyte *lines[4];
1364          lines[0] = &data[offs];
1365          lines[1] = lines[0] + srcRowStride;
1366          lines[2] = lines[1] + srcRowStride;
1367          lines[3] = lines[2] + srcRowStride;
1368          offs += 8 * comps;
1369          fxt1_quantize(encoded, lines, comps);
1370          /* 128 bits per 8x4 block */
1371          encoded += 4;
1372       }
1373       encoded += destRowStride;
1374    }
1375
1376  cleanUp:
1377    if (newSource != NULL) {
1378       free(newSource);
1379    }
1380 }
1381
1382
1383 /***************************************************************************\
1384  * FXT1 decoder
1385  *
1386  * The decoder is based on GL_3DFX_texture_compression_FXT1
1387  * specification and serves as a concept for the encoder.
1388 \***************************************************************************/
1389
1390
1391 /* lookup table for scaling 5 bit colors up to 8 bits */
1392 static const GLubyte _rgb_scale_5[] = {
1393    0,   8,   16,  25,  33,  41,  49,  58,
1394    66,  74,  82,  90,  99,  107, 115, 123,
1395    132, 140, 148, 156, 165, 173, 181, 189,
1396    197, 206, 214, 222, 230, 239, 247, 255
1397 };
1398
1399 /* lookup table for scaling 6 bit colors up to 8 bits */
1400 static const GLubyte _rgb_scale_6[] = {
1401    0,   4,   8,   12,  16,  20,  24,  28,
1402    32,  36,  40,  45,  49,  53,  57,  61,
1403    65,  69,  73,  77,  81,  85,  89,  93,
1404    97,  101, 105, 109, 113, 117, 121, 125,
1405    130, 134, 138, 142, 146, 150, 154, 158,
1406    162, 166, 170, 174, 178, 182, 186, 190,
1407    194, 198, 202, 206, 210, 215, 219, 223,
1408    227, 231, 235, 239, 243, 247, 251, 255
1409 };
1410
1411
1412 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1413 #define UP5(c) _rgb_scale_5[(c) & 31]
1414 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1415 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1416
1417
1418 static void
1419 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1420 {
1421    const GLuint *cc;
1422
1423    t *= 3;
1424    cc = (const GLuint *)(code + t / 8);
1425    t = (cc[0] >> (t & 7)) & 7;
1426
1427    if (t == 7) {
1428       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1429    } else {
1430       GLubyte r, g, b;
1431       cc = (const GLuint *)(code + 12);
1432       if (t == 0) {
1433          b = UP5(CC_SEL(cc, 0));
1434          g = UP5(CC_SEL(cc, 5));
1435          r = UP5(CC_SEL(cc, 10));
1436       } else if (t == 6) {
1437          b = UP5(CC_SEL(cc, 15));
1438          g = UP5(CC_SEL(cc, 20));
1439          r = UP5(CC_SEL(cc, 25));
1440       } else {
1441          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1442          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1443          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1444       }
1445       rgba[RCOMP] = r;
1446       rgba[GCOMP] = g;
1447       rgba[BCOMP] = b;
1448       rgba[ACOMP] = 255;
1449    }
1450 }
1451
1452
1453 static void
1454 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1455 {
1456    const GLuint *cc;
1457    GLuint kk;
1458
1459    cc = (const GLuint *)code;
1460    if (t & 16) {
1461       cc++;
1462       t &= 15;
1463    }
1464    t = (cc[0] >> (t * 2)) & 3;
1465
1466    t *= 15;
1467    cc = (const GLuint *)(code + 8 + t / 8);
1468    kk = cc[0] >> (t & 7);
1469    rgba[BCOMP] = UP5(kk);
1470    rgba[GCOMP] = UP5(kk >> 5);
1471    rgba[RCOMP] = UP5(kk >> 10);
1472    rgba[ACOMP] = 255;
1473 }
1474
1475
1476 static void
1477 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1478 {
1479    const GLuint *cc;
1480    GLuint col[2][3];
1481    GLint glsb, selb;
1482
1483    cc = (const GLuint *)code;
1484    if (t & 16) {
1485       t &= 15;
1486       t = (cc[1] >> (t * 2)) & 3;
1487       /* col 2 */
1488       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1489       col[0][GCOMP] = CC_SEL(cc, 99);
1490       col[0][RCOMP] = CC_SEL(cc, 104);
1491       /* col 3 */
1492       col[1][BCOMP] = CC_SEL(cc, 109);
1493       col[1][GCOMP] = CC_SEL(cc, 114);
1494       col[1][RCOMP] = CC_SEL(cc, 119);
1495       glsb = CC_SEL(cc, 126);
1496       selb = CC_SEL(cc, 33);
1497    } else {
1498       t = (cc[0] >> (t * 2)) & 3;
1499       /* col 0 */
1500       col[0][BCOMP] = CC_SEL(cc, 64);
1501       col[0][GCOMP] = CC_SEL(cc, 69);
1502       col[0][RCOMP] = CC_SEL(cc, 74);
1503       /* col 1 */
1504       col[1][BCOMP] = CC_SEL(cc, 79);
1505       col[1][GCOMP] = CC_SEL(cc, 84);
1506       col[1][RCOMP] = CC_SEL(cc, 89);
1507       glsb = CC_SEL(cc, 125);
1508       selb = CC_SEL(cc, 1);
1509    }
1510
1511    if (CC_SEL(cc, 124) & 1) {
1512       /* alpha[0] == 1 */
1513
1514       if (t == 3) {
1515          /* zero */
1516          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1517       } else {
1518          GLubyte r, g, b;
1519          if (t == 0) {
1520             b = UP5(col[0][BCOMP]);
1521             g = UP5(col[0][GCOMP]);
1522             r = UP5(col[0][RCOMP]);
1523          } else if (t == 2) {
1524             b = UP5(col[1][BCOMP]);
1525             g = UP6(col[1][GCOMP], glsb);
1526             r = UP5(col[1][RCOMP]);
1527          } else {
1528             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1529             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1530             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1531          }
1532          rgba[RCOMP] = r;
1533          rgba[GCOMP] = g;
1534          rgba[BCOMP] = b;
1535          rgba[ACOMP] = 255;
1536       }
1537    } else {
1538       /* alpha[0] == 0 */
1539       GLubyte r, g, b;
1540       if (t == 0) {
1541          b = UP5(col[0][BCOMP]);
1542          g = UP6(col[0][GCOMP], glsb ^ selb);
1543          r = UP5(col[0][RCOMP]);
1544       } else if (t == 3) {
1545          b = UP5(col[1][BCOMP]);
1546          g = UP6(col[1][GCOMP], glsb);
1547          r = UP5(col[1][RCOMP]);
1548       } else {
1549          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1550          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1551                         UP6(col[1][GCOMP], glsb));
1552          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1553       }
1554       rgba[RCOMP] = r;
1555       rgba[GCOMP] = g;
1556       rgba[BCOMP] = b;
1557       rgba[ACOMP] = 255;
1558    }
1559 }
1560
1561
1562 static void
1563 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1564 {
1565    const GLuint *cc;
1566    GLubyte r, g, b, a;
1567
1568    cc = (const GLuint *)code;
1569    if (CC_SEL(cc, 124) & 1) {
1570       /* lerp == 1 */
1571       GLuint col0[4];
1572
1573       if (t & 16) {
1574          t &= 15;
1575          t = (cc[1] >> (t * 2)) & 3;
1576          /* col 2 */
1577          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1578          col0[GCOMP] = CC_SEL(cc, 99);
1579          col0[RCOMP] = CC_SEL(cc, 104);
1580          col0[ACOMP] = CC_SEL(cc, 119);
1581       } else {
1582          t = (cc[0] >> (t * 2)) & 3;
1583          /* col 0 */
1584          col0[BCOMP] = CC_SEL(cc, 64);
1585          col0[GCOMP] = CC_SEL(cc, 69);
1586          col0[RCOMP] = CC_SEL(cc, 74);
1587          col0[ACOMP] = CC_SEL(cc, 109);
1588       }
1589
1590       if (t == 0) {
1591          b = UP5(col0[BCOMP]);
1592          g = UP5(col0[GCOMP]);
1593          r = UP5(col0[RCOMP]);
1594          a = UP5(col0[ACOMP]);
1595       } else if (t == 3) {
1596          b = UP5(CC_SEL(cc, 79));
1597          g = UP5(CC_SEL(cc, 84));
1598          r = UP5(CC_SEL(cc, 89));
1599          a = UP5(CC_SEL(cc, 114));
1600       } else {
1601          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1602          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1603          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1604          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1605       }
1606    } else {
1607       /* lerp == 0 */
1608
1609       if (t & 16) {
1610          cc++;
1611          t &= 15;
1612       }
1613       t = (cc[0] >> (t * 2)) & 3;
1614
1615       if (t == 3) {
1616          /* zero */
1617          r = g = b = a = 0;
1618       } else {
1619          GLuint kk;
1620          cc = (const GLuint *)code;
1621          a = UP5(cc[3] >> (t * 5 + 13));
1622          t *= 15;
1623          cc = (const GLuint *)(code + 8 + t / 8);
1624          kk = cc[0] >> (t & 7);
1625          b = UP5(kk);
1626          g = UP5(kk >> 5);
1627          r = UP5(kk >> 10);
1628       }
1629    }
1630    rgba[RCOMP] = r;
1631    rgba[GCOMP] = g;
1632    rgba[BCOMP] = b;
1633    rgba[ACOMP] = a;
1634 }
1635
1636
1637 void
1638 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1639                GLint i, GLint j, GLubyte *rgba)
1640 {
1641    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1642       fxt1_decode_1HI,     /* cc-high   = "00?" */
1643       fxt1_decode_1HI,     /* cc-high   = "00?" */
1644       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1645       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1646       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1647       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1648       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1649       fxt1_decode_1MIXED   /* mixed     = "1??" */
1650    };
1651
1652    const GLubyte *code = (const GLubyte *)texture +
1653                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1654    GLint mode = CC_SEL(code, 125);
1655    GLint t = i & 7;
1656
1657    if (t & 4) {
1658       t += 12;
1659    }
1660    t += (j & 3) * 4;
1661
1662    decode_1[mode](code, t, rgba);
1663 }
1664
1665
1666 #endif /* FEATURE_texture_fxt1 */