src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42 #include "swrast/s_context.h"
  43
  44
  45 #if FEATURE_texture_fxt1
  46
  47
  48 static void
  49 fxt1_encode (GLuint width, GLuint height, GLint comps,
  50              const void *source, GLint srcRowStride,
  51              void *dest, GLint destRowStride);
  52
  53 static void
  54 fxt1_decode_1 (const void *texture, GLint stride,
  55                GLint i, GLint j, GLubyte *rgba);
  56
  57
  58 /**
  59  * Store user's image in rgb_fxt1 format.
  60  */
  61 GLboolean
  62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  63 {
  64    const GLubyte *pixels;
  65    GLint srcRowStride;
  66    GLubyte *dst;
  67    const GLubyte *tempImage = NULL;
  68
  69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  70
  71    if (srcFormat != GL_RGB ||
  72        srcType != GL_UNSIGNED_BYTE ||
  73        ctx->_ImageTransferState ||
  74        srcPacking->RowLength != srcWidth ||
  75        srcPacking->SwapBytes) {
  76       /* convert image to RGB/GLubyte */
  77       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  78                                              baseInternalFormat,
  79                                              _mesa_get_format_base_format(dstFormat),
  80                                              srcWidth, srcHeight, srcDepth,
  81                                              srcFormat, srcType, srcAddr,
  82                                              srcPacking);
  83       if (!tempImage)
  84          return GL_FALSE; /* out of memory */
  85       pixels = tempImage;
  86       srcRowStride = 3 * srcWidth;
  87       srcFormat = GL_RGB;
  88    }
  89    else {
  90       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  91                                      srcFormat, srcType, 0, 0);
  92
  93       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  94                                             srcType) / sizeof(GLubyte);
  95    }
  96
  97    dst = dstSlices[0];
  98
  99    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 100                dst, dstRowStride);
 101
 102    free((void*) tempImage);
 103
 104    return GL_TRUE;
 105 }
 106
 107
 108 /**
 109  * Store user's image in rgba_fxt1 format.
 110  */
 111 GLboolean
 112 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 113 {
 114    const GLubyte *pixels;
 115    GLint srcRowStride;
 116    GLubyte *dst;
 117    const GLubyte *tempImage = NULL;
 118
 119    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 120
 121    if (srcFormat != GL_RGBA ||
 122        srcType != GL_UNSIGNED_BYTE ||
 123        ctx->_ImageTransferState ||
 124        srcPacking->SwapBytes) {
 125       /* convert image to RGBA/GLubyte */
 126       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 127                                              baseInternalFormat,
 128                                              _mesa_get_format_base_format(dstFormat),
 129                                              srcWidth, srcHeight, srcDepth,
 130                                              srcFormat, srcType, srcAddr,
 131                                              srcPacking);
 132       if (!tempImage)
 133          return GL_FALSE; /* out of memory */
 134       pixels = tempImage;
 135       srcRowStride = 4 * srcWidth;
 136       srcFormat = GL_RGBA;
 137    }
 138    else {
 139       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 140                                      srcFormat, srcType, 0, 0);
 141
 142       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 143                                             srcType) / sizeof(GLubyte);
 144    }
 145
 146    dst = dstSlices[0];
 147
 148    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 149                dst, dstRowStride);
 150
 151    free((void*) tempImage);
 152
 153    return GL_TRUE;
 154 }
 155
 156
 157 void
 158 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
 159                                   GLint i, GLint j, GLint k, GLfloat *texel )
 160 {
 161    /* just sample as GLubyte and convert to float here */
 162    GLubyte rgba[4];
 163    (void) k;
 164    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
 165    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 166    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 167    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 168    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
 169 }
 170
 171
 172 void
 173 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
 174                                  GLint i, GLint j, GLint k, GLfloat *texel )
 175 {
 176    /* just sample as GLubyte and convert to float here */
 177    GLubyte rgba[4];
 178    (void) k;
 179    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
 180    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 181    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 182    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 183    texel[ACOMP] = 1.0F;
 184 }
 185
 186
 187
 188 /***************************************************************************\
 189  * FXT1 encoder
 190  *
 191  * The encoder was built by reversing the decoder,
 192  * and is vaguely based on Texus2 by 3dfx. Note that this code
 193  * is merely a proof of concept, since it is highly UNoptimized;
 194  * moreover, it is sub-optimal due to initial conditions passed
 195  * to Lloyd's algorithm (the interpolation modes are even worse).
 196 \***************************************************************************/
 197
 198
 199 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 200 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 201 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 202 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 203 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 204 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 205 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 206 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 207
 208
 209 /*
 210  * Define a 64-bit unsigned integer type and macros
 211  */
 212 #if 1
 213
 214 #define FX64_NATIVE 1
 215
 216 typedef uint64_t Fx64;
 217
 218 #define FX64_MOV32(a, b) a = b
 219 #define FX64_OR32(a, b)  a |= b
 220 #define FX64_SHL(a, c)   a <<= c
 221
 222 #else
 223
 224 #define FX64_NATIVE 0
 225
 226 typedef struct {
 227    GLuint lo, hi;
 228 } Fx64;
 229
 230 #define FX64_MOV32(a, b) a.lo = b
 231 #define FX64_OR32(a, b)  a.lo |= b
 232
 233 #define FX64_SHL(a, c)                                 \
 234    do {                                                \
 235        if ((c) >= 32) {                                \
 236           a.hi = a.lo << ((c) - 32);                   \
 237           a.lo = 0;                                    \
 238        } else {                                        \
 239           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 240           a.lo <<= (c);                                \
 241        }                                               \
 242    } while (0)
 243
 244 #endif
 245
 246
 247 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 248 #define SAFECDOT 1 /* for paranoids */
 249
 250 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 251    do {                                  \
 252       /* compute interpolation vector */ \
 253       GLfloat d2 = 0.0F;                 \
 254       GLfloat rd2;                       \
 255                                          \
 256       for (i = 0; i < NC; i++) {         \
 257          IV[i] = (V1[i] - V0[i]) * F(i); \
 258          d2 += IV[i] * IV[i];            \
 259       }                                  \
 260       rd2 = (GLfloat)NV / d2;            \
 261       B = 0;                             \
 262       for (i = 0; i < NC; i++) {         \
 263          IV[i] *= F(i);                  \
 264          B -= IV[i] * V0[i];             \
 265          IV[i] *= rd2;                   \
 266       }                                  \
 267       B = B * rd2 + 0.5f;                \
 268    } while (0)
 269
 270 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 271    do {                                  \
 272       GLfloat dot = 0.0F;                \
 273       for (i = 0; i < NC; i++) {         \
 274          dot += V[i] * IV[i];            \
 275       }                                  \
 276       TEXEL = (GLint)(dot + B);          \
 277       if (SAFECDOT) {                    \
 278          if (TEXEL < 0) {                \
 279             TEXEL = 0;                   \
 280          } else if (TEXEL > NV) {        \
 281             TEXEL = NV;                  \
 282          }                               \
 283       }                                  \
 284    } while (0)
 285
 286
 287 static GLint
 288 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 289               GLubyte input[MAX_COMP], GLint nc)
 290 {
 291    GLint i, j, best = -1;
 292    GLfloat err = 1e9; /* big enough */
 293
 294    for (j = 0; j < nv; j++) {
 295       GLfloat e = 0.0F;
 296       for (i = 0; i < nc; i++) {
 297          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 298       }
 299       if (e < err) {
 300          err = e;
 301          best = j;
 302       }
 303    }
 304
 305    return best;
 306 }
 307
 308
 309 static GLint
 310 fxt1_worst (GLfloat vec[MAX_COMP],
 311             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 312 {
 313    GLint i, k, worst = -1;
 314    GLfloat err = -1.0F; /* small enough */
 315
 316    for (k = 0; k < n; k++) {
 317       GLfloat e = 0.0F;
 318       for (i = 0; i < nc; i++) {
 319          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 320       }
 321       if (e > err) {
 322          err = e;
 323          worst = k;
 324       }
 325    }
 326
 327    return worst;
 328 }
 329
 330
 331 static GLint
 332 fxt1_variance (GLdouble variance[MAX_COMP],
 333                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 334 {
 335    GLint i, k, best = 0;
 336    GLint sx, sx2;
 337    GLdouble var, maxvar = -1; /* small enough */
 338    GLdouble teenth = 1.0 / n;
 339
 340    for (i = 0; i < nc; i++) {
 341       sx = sx2 = 0;
 342       for (k = 0; k < n; k++) {
 343          GLint t = input[k][i];
 344          sx += t;
 345          sx2 += t * t;
 346       }
 347       var = sx2 * teenth - sx * sx * teenth * teenth;
 348       if (maxvar < var) {
 349          maxvar = var;
 350          best = i;
 351       }
 352       if (variance) {
 353          variance[i] = var;
 354       }
 355    }
 356
 357    return best;
 358 }
 359
 360
 361 static GLint
 362 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 363              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 364 {
 365 #if 0
 366    /* Choose colors from a grid.
 367     */
 368    GLint i, j;
 369
 370    for (j = 0; j < nv; j++) {
 371       GLint m = j * (n - 1) / (nv - 1);
 372       for (i = 0; i < nc; i++) {
 373          vec[j][i] = input[m][i];
 374       }
 375    }
 376 #else
 377    /* Our solution here is to find the darkest and brightest colors in
 378     * the 8x4 tile and use those as the two representative colors.
 379     * There are probably better algorithms to use (histogram-based).
 380     */
 381    GLint i, j, k;
 382    GLint minSum = 2000; /* big enough */
 383    GLint maxSum = -1; /* small enough */
 384    GLint minCol = 0; /* phoudoin: silent compiler! */
 385    GLint maxCol = 0; /* phoudoin: silent compiler! */
 386
 387    struct {
 388       GLint flag;
 389       GLint key;
 390       GLint freq;
 391       GLint idx;
 392    } hist[N_TEXELS];
 393    GLint lenh = 0;
 394
 395    memset(hist, 0, sizeof(hist));
 396
 397    for (k = 0; k < n; k++) {
 398       GLint l;
 399       GLint key = 0;
 400       GLint sum = 0;
 401       for (i = 0; i < nc; i++) {
 402          key <<= 8;
 403          key |= input[k][i];
 404          sum += input[k][i];
 405       }
 406       for (l = 0; l < n; l++) {
 407          if (!hist[l].flag) {
 408             /* alloc new slot */
 409             hist[l].flag = !0;
 410             hist[l].key = key;
 411             hist[l].freq = 1;
 412             hist[l].idx = k;
 413             lenh = l + 1;
 414             break;
 415          } else if (hist[l].key == key) {
 416             hist[l].freq++;
 417             break;
 418          }
 419       }
 420       if (minSum > sum) {
 421          minSum = sum;
 422          minCol = k;
 423       }
 424       if (maxSum < sum) {
 425          maxSum = sum;
 426          maxCol = k;
 427       }
 428    }
 429
 430    if (lenh <= nv) {
 431       for (j = 0; j < lenh; j++) {
 432          for (i = 0; i < nc; i++) {
 433             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 434          }
 435       }
 436       for (; j < nv; j++) {
 437          for (i = 0; i < nc; i++) {
 438             vec[j][i] = vec[0][i];
 439          }
 440       }
 441       return 0;
 442    }
 443
 444    for (j = 0; j < nv; j++) {
 445       for (i = 0; i < nc; i++) {
 446          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 447       }
 448    }
 449 #endif
 450
 451    return !0;
 452 }
 453
 454
 455 static GLint
 456 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 457             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 458 {
 459    /* Use the generalized lloyd's algorithm for VQ:
 460     *     find 4 color vectors.
 461     *
 462     *     for each sample color
 463     *         sort to nearest vector.
 464     *
 465     *     replace each vector with the centroid of its matching colors.
 466     *
 467     *     repeat until RMS doesn't improve.
 468     *
 469     *     if a color vector has no samples, or becomes the same as another
 470     *     vector, replace it with the color which is farthest from a sample.
 471     *
 472     * vec[][MAX_COMP]           initial vectors and resulting colors
 473     * nv                        number of resulting colors required
 474     * input[N_TEXELS][MAX_COMP] input texels
 475     * nc                        number of components in input / vec
 476     * n                         number of input samples
 477     */
 478
 479    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 480    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 481    GLfloat error, lasterror = 1e9;
 482
 483    GLint i, j, k, rep;
 484
 485    /* the quantizer */
 486    for (rep = 0; rep < LL_N_REP; rep++) {
 487       /* reset sums & counters */
 488       for (j = 0; j < nv; j++) {
 489          for (i = 0; i < nc; i++) {
 490             sum[j][i] = 0;
 491          }
 492          cnt[j] = 0;
 493       }
 494       error = 0;
 495
 496       /* scan whole block */
 497       for (k = 0; k < n; k++) {
 498 #if 1
 499          GLint best = -1;
 500          GLfloat err = 1e9; /* big enough */
 501          /* determine best vector */
 502          for (j = 0; j < nv; j++) {
 503             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 504                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 505                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 506             if (nc == 4) {
 507                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 508             }
 509             if (e < err) {
 510                err = e;
 511                best = j;
 512             }
 513          }
 514 #else
 515          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 516 #endif
 517          assert(best >= 0);
 518          /* add in closest color */
 519          for (i = 0; i < nc; i++) {
 520             sum[best][i] += input[k][i];
 521          }
 522          /* mark this vector as used */
 523          cnt[best]++;
 524          /* accumulate error */
 525          error += err;
 526       }
 527
 528       /* check RMS */
 529       if ((error < LL_RMS_E) ||
 530           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 531          return !0; /* good match */
 532       }
 533       lasterror = error;
 534
 535       /* move each vector to the barycenter of its closest colors */
 536       for (j = 0; j < nv; j++) {
 537          if (cnt[j]) {
 538             GLfloat div = 1.0F / cnt[j];
 539             for (i = 0; i < nc; i++) {
 540                vec[j][i] = div * sum[j][i];
 541             }
 542          } else {
 543             /* this vec has no samples or is identical with a previous vec */
 544             GLint worst = fxt1_worst(vec[j], input, nc, n);
 545             for (i = 0; i < nc; i++) {
 546                vec[j][i] = input[worst][i];
 547             }
 548          }
 549       }
 550    }
 551
 552    return 0; /* could not converge fast enough */
 553 }
 554
 555
 556 static void
 557 fxt1_quantize_CHROMA (GLuint *cc,
 558                       GLubyte input[N_TEXELS][MAX_COMP])
 559 {
 560    const GLint n_vect = 4; /* 4 base vectors to find */
 561    const GLint n_comp = 3; /* 3 components: R, G, B */
 562    GLfloat vec[MAX_VECT][MAX_COMP];
 563    GLint i, j, k;
 564    Fx64 hi; /* high quadword */
 565    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 566
 567    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 568       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 569    }
 570
 571    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 572    for (j = n_vect - 1; j >= 0; j--) {
 573       for (i = 0; i < n_comp; i++) {
 574          /* add in colors */
 575          FX64_SHL(hi, 5);
 576          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 577       }
 578    }
 579    ((Fx64 *)cc)[1] = hi;
 580
 581    lohi = lolo = 0;
 582    /* right microtile */
 583    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 584       lohi <<= 2;
 585       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 586    }
 587    /* left microtile */
 588    for (; k >= 0; k--) {
 589       lolo <<= 2;
 590       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 591    }
 592    cc[1] = lohi;
 593    cc[0] = lolo;
 594 }
 595
 596
 597 static void
 598 fxt1_quantize_ALPHA0 (GLuint *cc,
 599                       GLubyte input[N_TEXELS][MAX_COMP],
 600                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 601 {
 602    const GLint n_vect = 3; /* 3 base vectors to find */
 603    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 604    GLfloat vec[MAX_VECT][MAX_COMP];
 605    GLint i, j, k;
 606    Fx64 hi; /* high quadword */
 607    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 608
 609    /* the last vector indicates zero */
 610    for (i = 0; i < n_comp; i++) {
 611       vec[n_vect][i] = 0;
 612    }
 613
 614    /* the first n texels in reord are guaranteed to be non-zero */
 615    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 616       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 617    }
 618
 619    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 620    for (j = n_vect - 1; j >= 0; j--) {
 621       /* add in alphas */
 622       FX64_SHL(hi, 5);
 623       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 624    }
 625    for (j = n_vect - 1; j >= 0; j--) {
 626       for (i = 0; i < n_comp - 1; i++) {
 627          /* add in colors */
 628          FX64_SHL(hi, 5);
 629          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 630       }
 631    }
 632    ((Fx64 *)cc)[1] = hi;
 633
 634    lohi = lolo = 0;
 635    /* right microtile */
 636    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 637       lohi <<= 2;
 638       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 639    }
 640    /* left microtile */
 641    for (; k >= 0; k--) {
 642       lolo <<= 2;
 643       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 644    }
 645    cc[1] = lohi;
 646    cc[0] = lolo;
 647 }
 648
 649
 650 static void
 651 fxt1_quantize_ALPHA1 (GLuint *cc,
 652                       GLubyte input[N_TEXELS][MAX_COMP])
 653 {
 654    const GLint n_vect = 3; /* highest vector number in each microtile */
 655    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 656    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 657    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 658    GLint i, j, k;
 659    Fx64 hi; /* high quadword */
 660    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 661
 662    GLint minSum;
 663    GLint maxSum;
 664    GLint minColL = 0, maxColL = 0;
 665    GLint minColR = 0, maxColR = 0;
 666    GLint sumL = 0, sumR = 0;
 667    GLint nn_comp;
 668    /* Our solution here is to find the darkest and brightest colors in
 669     * the 4x4 tile and use those as the two representative colors.
 670     * There are probably better algorithms to use (histogram-based).
 671     */
 672    nn_comp = n_comp;
 673    while ((minColL == maxColL) && nn_comp) {
 674        minSum = 2000; /* big enough */
 675        maxSum = -1; /* small enough */
 676        for (k = 0; k < N_TEXELS / 2; k++) {
 677            GLint sum = 0;
 678            for (i = 0; i < nn_comp; i++) {
 679                sum += input[k][i];
 680            }
 681            if (minSum > sum) {
 682                minSum = sum;
 683                minColL = k;
 684            }
 685            if (maxSum < sum) {
 686                maxSum = sum;
 687                maxColL = k;
 688            }
 689            sumL += sum;
 690        }
 691
 692        nn_comp--;
 693    }
 694
 695    nn_comp = n_comp;
 696    while ((minColR == maxColR) && nn_comp) {
 697        minSum = 2000; /* big enough */
 698        maxSum = -1; /* small enough */
 699        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 700            GLint sum = 0;
 701            for (i = 0; i < nn_comp; i++) {
 702                sum += input[k][i];
 703            }
 704            if (minSum > sum) {
 705                minSum = sum;
 706                minColR = k;
 707            }
 708            if (maxSum < sum) {
 709                maxSum = sum;
 710                maxColR = k;
 711            }
 712            sumR += sum;
 713        }
 714
 715        nn_comp--;
 716    }
 717
 718    /* choose the common vector (yuck!) */
 719    {
 720       GLint j1, j2;
 721       GLint v1 = 0, v2 = 0;
 722       GLfloat err = 1e9; /* big enough */
 723       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 724       for (i = 0; i < n_comp; i++) {
 725          tv[0][i] = input[minColL][i];
 726          tv[1][i] = input[maxColL][i];
 727          tv[2][i] = input[minColR][i];
 728          tv[3][i] = input[maxColR][i];
 729       }
 730       for (j1 = 0; j1 < 2; j1++) {
 731          for (j2 = 2; j2 < 4; j2++) {
 732             GLfloat e = 0.0F;
 733             for (i = 0; i < n_comp; i++) {
 734                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 735             }
 736             if (e < err) {
 737                err = e;
 738                v1 = j1;
 739                v2 = j2;
 740             }
 741          }
 742       }
 743       for (i = 0; i < n_comp; i++) {
 744          vec[0][i] = tv[1 - v1][i];
 745          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 746          vec[2][i] = tv[5 - v2][i];
 747       }
 748    }
 749
 750    /* left microtile */
 751    cc[0] = 0;
 752    if (minColL != maxColL) {
 753       /* compute interpolation vector */
 754       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 755
 756       /* add in texels */
 757       lolo = 0;
 758       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 759          GLint texel;
 760          /* interpolate color */
 761          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 762          /* add in texel */
 763          lolo <<= 2;
 764          lolo |= texel;
 765       }
 766
 767       cc[0] = lolo;
 768    }
 769
 770    /* right microtile */
 771    cc[1] = 0;
 772    if (minColR != maxColR) {
 773       /* compute interpolation vector */
 774       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 775
 776       /* add in texels */
 777       lohi = 0;
 778       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 779          GLint texel;
 780          /* interpolate color */
 781          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 782          /* add in texel */
 783          lohi <<= 2;
 784          lohi |= texel;
 785       }
 786
 787       cc[1] = lohi;
 788    }
 789
 790    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 791    for (j = n_vect - 1; j >= 0; j--) {
 792       /* add in alphas */
 793       FX64_SHL(hi, 5);
 794       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 795    }
 796    for (j = n_vect - 1; j >= 0; j--) {
 797       for (i = 0; i < n_comp - 1; i++) {
 798          /* add in colors */
 799          FX64_SHL(hi, 5);
 800          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 801       }
 802    }
 803    ((Fx64 *)cc)[1] = hi;
 804 }
 805
 806
 807 static void
 808 fxt1_quantize_HI (GLuint *cc,
 809                   GLubyte input[N_TEXELS][MAX_COMP],
 810                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 811 {
 812    const GLint n_vect = 6; /* highest vector number */
 813    const GLint n_comp = 3; /* 3 components: R, G, B */
 814    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 815    GLfloat iv[MAX_COMP];   /* interpolation vector */
 816    GLint i, k;
 817    GLuint hihi; /* high quadword: hi dword */
 818
 819    GLint minSum = 2000; /* big enough */
 820    GLint maxSum = -1; /* small enough */
 821    GLint minCol = 0; /* phoudoin: silent compiler! */
 822    GLint maxCol = 0; /* phoudoin: silent compiler! */
 823
 824    /* Our solution here is to find the darkest and brightest colors in
 825     * the 8x4 tile and use those as the two representative colors.
 826     * There are probably better algorithms to use (histogram-based).
 827     */
 828    for (k = 0; k < n; k++) {
 829       GLint sum = 0;
 830       for (i = 0; i < n_comp; i++) {
 831          sum += reord[k][i];
 832       }
 833       if (minSum > sum) {
 834          minSum = sum;
 835          minCol = k;
 836       }
 837       if (maxSum < sum) {
 838          maxSum = sum;
 839          maxCol = k;
 840       }
 841    }
 842
 843    hihi = 0; /* cc-hi = "00" */
 844    for (i = 0; i < n_comp; i++) {
 845       /* add in colors */
 846       hihi <<= 5;
 847       hihi |= reord[maxCol][i] >> 3;
 848    }
 849    for (i = 0; i < n_comp; i++) {
 850       /* add in colors */
 851       hihi <<= 5;
 852       hihi |= reord[minCol][i] >> 3;
 853    }
 854    cc[3] = hihi;
 855    cc[0] = cc[1] = cc[2] = 0;
 856
 857    /* compute interpolation vector */
 858    if (minCol != maxCol) {
 859       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 860    }
 861
 862    /* add in texels */
 863    for (k = N_TEXELS - 1; k >= 0; k--) {
 864       GLint t = k * 3;
 865       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 866       GLint texel = n_vect + 1; /* transparent black */
 867
 868       if (!ISTBLACK(input[k])) {
 869          if (minCol != maxCol) {
 870             /* interpolate color */
 871             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 872             /* add in texel */
 873             kk[0] |= texel << (t & 7);
 874          }
 875       } else {
 876          /* add in texel */
 877          kk[0] |= texel << (t & 7);
 878       }
 879    }
 880 }
 881
 882
 883 static void
 884 fxt1_quantize_MIXED1 (GLuint *cc,
 885                       GLubyte input[N_TEXELS][MAX_COMP])
 886 {
 887    const GLint n_vect = 2; /* highest vector number in each microtile */
 888    const GLint n_comp = 3; /* 3 components: R, G, B */
 889    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 890    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 891    GLint i, j, k;
 892    Fx64 hi; /* high quadword */
 893    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 894
 895    GLint minSum;
 896    GLint maxSum;
 897    GLint minColL = 0, maxColL = -1;
 898    GLint minColR = 0, maxColR = -1;
 899
 900    /* Our solution here is to find the darkest and brightest colors in
 901     * the 4x4 tile and use those as the two representative colors.
 902     * There are probably better algorithms to use (histogram-based).
 903     */
 904    minSum = 2000; /* big enough */
 905    maxSum = -1; /* small enough */
 906    for (k = 0; k < N_TEXELS / 2; k++) {
 907       if (!ISTBLACK(input[k])) {
 908          GLint sum = 0;
 909          for (i = 0; i < n_comp; i++) {
 910             sum += input[k][i];
 911          }
 912          if (minSum > sum) {
 913             minSum = sum;
 914             minColL = k;
 915          }
 916          if (maxSum < sum) {
 917             maxSum = sum;
 918             maxColL = k;
 919          }
 920       }
 921    }
 922    minSum = 2000; /* big enough */
 923    maxSum = -1; /* small enough */
 924    for (; k < N_TEXELS; k++) {
 925       if (!ISTBLACK(input[k])) {
 926          GLint sum = 0;
 927          for (i = 0; i < n_comp; i++) {
 928             sum += input[k][i];
 929          }
 930          if (minSum > sum) {
 931             minSum = sum;
 932             minColR = k;
 933          }
 934          if (maxSum < sum) {
 935             maxSum = sum;
 936             maxColR = k;
 937          }
 938       }
 939    }
 940
 941    /* left microtile */
 942    if (maxColL == -1) {
 943       /* all transparent black */
 944       cc[0] = ~0u;
 945       for (i = 0; i < n_comp; i++) {
 946          vec[0][i] = 0;
 947          vec[1][i] = 0;
 948       }
 949    } else {
 950       cc[0] = 0;
 951       for (i = 0; i < n_comp; i++) {
 952          vec[0][i] = input[minColL][i];
 953          vec[1][i] = input[maxColL][i];
 954       }
 955       if (minColL != maxColL) {
 956          /* compute interpolation vector */
 957          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 958
 959          /* add in texels */
 960          lolo = 0;
 961          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 962             GLint texel = n_vect + 1; /* transparent black */
 963             if (!ISTBLACK(input[k])) {
 964                /* interpolate color */
 965                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 966             }
 967             /* add in texel */
 968             lolo <<= 2;
 969             lolo |= texel;
 970          }
 971          cc[0] = lolo;
 972       }
 973    }
 974
 975    /* right microtile */
 976    if (maxColR == -1) {
 977       /* all transparent black */
 978       cc[1] = ~0u;
 979       for (i = 0; i < n_comp; i++) {
 980          vec[2][i] = 0;
 981          vec[3][i] = 0;
 982       }
 983    } else {
 984       cc[1] = 0;
 985       for (i = 0; i < n_comp; i++) {
 986          vec[2][i] = input[minColR][i];
 987          vec[3][i] = input[maxColR][i];
 988       }
 989       if (minColR != maxColR) {
 990          /* compute interpolation vector */
 991          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 992
 993          /* add in texels */
 994          lohi = 0;
 995          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 996             GLint texel = n_vect + 1; /* transparent black */
 997             if (!ISTBLACK(input[k])) {
 998                /* interpolate color */
 999                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1000             }
1001             /* add in texel */
1002             lohi <<= 2;
1003             lohi |= texel;
1004          }
1005          cc[1] = lohi;
1006       }
1007    }
1008
1009    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1010    for (j = 2 * 2 - 1; j >= 0; j--) {
1011       for (i = 0; i < n_comp; i++) {
1012          /* add in colors */
1013          FX64_SHL(hi, 5);
1014          FX64_OR32(hi, vec[j][i] >> 3);
1015       }
1016    }
1017    ((Fx64 *)cc)[1] = hi;
1018 }
1019
1020
1021 static void
1022 fxt1_quantize_MIXED0 (GLuint *cc,
1023                       GLubyte input[N_TEXELS][MAX_COMP])
1024 {
1025    const GLint n_vect = 3; /* highest vector number in each microtile */
1026    const GLint n_comp = 3; /* 3 components: R, G, B */
1027    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1028    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1029    GLint i, j, k;
1030    Fx64 hi; /* high quadword */
1031    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1032
1033    GLint minColL = 0, maxColL = 0;
1034    GLint minColR = 0, maxColR = 0;
1035 #if 0
1036    GLint minSum;
1037    GLint maxSum;
1038
1039    /* Our solution here is to find the darkest and brightest colors in
1040     * the 4x4 tile and use those as the two representative colors.
1041     * There are probably better algorithms to use (histogram-based).
1042     */
1043    minSum = 2000; /* big enough */
1044    maxSum = -1; /* small enough */
1045    for (k = 0; k < N_TEXELS / 2; k++) {
1046       GLint sum = 0;
1047       for (i = 0; i < n_comp; i++) {
1048          sum += input[k][i];
1049       }
1050       if (minSum > sum) {
1051          minSum = sum;
1052          minColL = k;
1053       }
1054       if (maxSum < sum) {
1055          maxSum = sum;
1056          maxColL = k;
1057       }
1058    }
1059    minSum = 2000; /* big enough */
1060    maxSum = -1; /* small enough */
1061    for (; k < N_TEXELS; k++) {
1062       GLint sum = 0;
1063       for (i = 0; i < n_comp; i++) {
1064          sum += input[k][i];
1065       }
1066       if (minSum > sum) {
1067          minSum = sum;
1068          minColR = k;
1069       }
1070       if (maxSum < sum) {
1071          maxSum = sum;
1072          maxColR = k;
1073       }
1074    }
1075 #else
1076    GLint minVal;
1077    GLint maxVal;
1078    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1079    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1080
1081    /* Scan the channel with max variance for lo & hi
1082     * and use those as the two representative colors.
1083     */
1084    minVal = 2000; /* big enough */
1085    maxVal = -1; /* small enough */
1086    for (k = 0; k < N_TEXELS / 2; k++) {
1087       GLint t = input[k][maxVarL];
1088       if (minVal > t) {
1089          minVal = t;
1090          minColL = k;
1091       }
1092       if (maxVal < t) {
1093          maxVal = t;
1094          maxColL = k;
1095       }
1096    }
1097    minVal = 2000; /* big enough */
1098    maxVal = -1; /* small enough */
1099    for (; k < N_TEXELS; k++) {
1100       GLint t = input[k][maxVarR];
1101       if (minVal > t) {
1102          minVal = t;
1103          minColR = k;
1104       }
1105       if (maxVal < t) {
1106          maxVal = t;
1107          maxColR = k;
1108       }
1109    }
1110 #endif
1111
1112    /* left microtile */
1113    cc[0] = 0;
1114    for (i = 0; i < n_comp; i++) {
1115       vec[0][i] = input[minColL][i];
1116       vec[1][i] = input[maxColL][i];
1117    }
1118    if (minColL != maxColL) {
1119       /* compute interpolation vector */
1120       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1121
1122       /* add in texels */
1123       lolo = 0;
1124       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1125          GLint texel;
1126          /* interpolate color */
1127          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1128          /* add in texel */
1129          lolo <<= 2;
1130          lolo |= texel;
1131       }
1132
1133       /* funky encoding for LSB of green */
1134       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1135          for (i = 0; i < n_comp; i++) {
1136             vec[1][i] = input[minColL][i];
1137             vec[0][i] = input[maxColL][i];
1138          }
1139          lolo = ~lolo;
1140       }
1141
1142       cc[0] = lolo;
1143    }
1144
1145    /* right microtile */
1146    cc[1] = 0;
1147    for (i = 0; i < n_comp; i++) {
1148       vec[2][i] = input[minColR][i];
1149       vec[3][i] = input[maxColR][i];
1150    }
1151    if (minColR != maxColR) {
1152       /* compute interpolation vector */
1153       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1154
1155       /* add in texels */
1156       lohi = 0;
1157       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1158          GLint texel;
1159          /* interpolate color */
1160          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1161          /* add in texel */
1162          lohi <<= 2;
1163          lohi |= texel;
1164       }
1165
1166       /* funky encoding for LSB of green */
1167       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1168          for (i = 0; i < n_comp; i++) {
1169             vec[3][i] = input[minColR][i];
1170             vec[2][i] = input[maxColR][i];
1171          }
1172          lohi = ~lohi;
1173       }
1174
1175       cc[1] = lohi;
1176    }
1177
1178    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1179    for (j = 2 * 2 - 1; j >= 0; j--) {
1180       for (i = 0; i < n_comp; i++) {
1181          /* add in colors */
1182          FX64_SHL(hi, 5);
1183          FX64_OR32(hi, vec[j][i] >> 3);
1184       }
1185    }
1186    ((Fx64 *)cc)[1] = hi;
1187 }
1188
1189
1190 static void
1191 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1192 {
1193    GLint trualpha;
1194    GLubyte reord[N_TEXELS][MAX_COMP];
1195
1196    GLubyte input[N_TEXELS][MAX_COMP];
1197    GLint i, k, l;
1198
1199    if (comps == 3) {
1200       /* make the whole block opaque */
1201       memset(input, -1, sizeof(input));
1202    }
1203
1204    /* 8 texels each line */
1205    for (l = 0; l < 4; l++) {
1206       for (k = 0; k < 4; k++) {
1207          for (i = 0; i < comps; i++) {
1208             input[k + l * 4][i] = *lines[l]++;
1209          }
1210       }
1211       for (; k < 8; k++) {
1212          for (i = 0; i < comps; i++) {
1213             input[k + l * 4 + 12][i] = *lines[l]++;
1214          }
1215       }
1216    }
1217
1218    /* block layout:
1219     * 00, 01, 02, 03, 08, 09, 0a, 0b
1220     * 10, 11, 12, 13, 18, 19, 1a, 1b
1221     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1222     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1223     */
1224
1225    /* [dBorca]
1226     * stupidity flows forth from this
1227     */
1228    l = N_TEXELS;
1229    trualpha = 0;
1230    if (comps == 4) {
1231       /* skip all transparent black texels */
1232       l = 0;
1233       for (k = 0; k < N_TEXELS; k++) {
1234          /* test all components against 0 */
1235          if (!ISTBLACK(input[k])) {
1236             /* texel is not transparent black */
1237             COPY_4UBV(reord[l], input[k]);
1238             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1239                /* non-opaque texel */
1240                trualpha = !0;
1241             }
1242             l++;
1243          }
1244       }
1245    }
1246
1247 #if 0
1248    if (trualpha) {
1249       fxt1_quantize_ALPHA0(cc, input, reord, l);
1250    } else if (l == 0) {
1251       cc[0] = cc[1] = cc[2] = -1;
1252       cc[3] = 0;
1253    } else if (l < N_TEXELS) {
1254       fxt1_quantize_HI(cc, input, reord, l);
1255    } else {
1256       fxt1_quantize_CHROMA(cc, input);
1257    }
1258    (void)fxt1_quantize_ALPHA1;
1259    (void)fxt1_quantize_MIXED1;
1260    (void)fxt1_quantize_MIXED0;
1261 #else
1262    if (trualpha) {
1263       fxt1_quantize_ALPHA1(cc, input);
1264    } else if (l == 0) {
1265       cc[0] = cc[1] = cc[2] = ~0u;
1266       cc[3] = 0;
1267    } else if (l < N_TEXELS) {
1268       fxt1_quantize_MIXED1(cc, input);
1269    } else {
1270       fxt1_quantize_MIXED0(cc, input);
1271    }
1272    (void)fxt1_quantize_ALPHA0;
1273    (void)fxt1_quantize_HI;
1274    (void)fxt1_quantize_CHROMA;
1275 #endif
1276 }
1277
1278
1279
1280 /**
1281  * Upscale an image by replication, not (typical) stretching.
1282  * We use this when the image width or height is less than a
1283  * certain size (4, 8) and we need to upscale an image.
1284  */
1285 static void
1286 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1287                    GLsizei outWidth, GLsizei outHeight,
1288                    GLint comps, const GLubyte *src, GLint srcRowStride,
1289                    GLubyte *dest )
1290 {
1291    GLint i, j, k;
1292
1293    ASSERT(outWidth >= inWidth);
1294    ASSERT(outHeight >= inHeight);
1295 #if 0
1296    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1297    ASSERT((outWidth & 3) == 0);
1298    ASSERT((outHeight & 3) == 0);
1299 #endif
1300
1301    for (i = 0; i < outHeight; i++) {
1302       const GLint ii = i % inHeight;
1303       for (j = 0; j < outWidth; j++) {
1304          const GLint jj = j % inWidth;
1305          for (k = 0; k < comps; k++) {
1306             dest[(i * outWidth + j) * comps + k]
1307                = src[ii * srcRowStride + jj * comps + k];
1308          }
1309       }
1310    }
1311 }
1312
1313
1314 static void
1315 fxt1_encode (GLuint width, GLuint height, GLint comps,
1316              const void *source, GLint srcRowStride,
1317              void *dest, GLint destRowStride)
1318 {
1319    GLuint x, y;
1320    const GLubyte *data;
1321    GLuint *encoded = (GLuint *)dest;
1322    void *newSource = NULL;
1323
1324    assert(comps == 3 || comps == 4);
1325
1326    /* Replicate image if width is not M8 or height is not M4 */
1327    if ((width & 7) | (height & 3)) {
1328       GLint newWidth = (width + 7) & ~7;
1329       GLint newHeight = (height + 3) & ~3;
1330       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1331       if (!newSource) {
1332          GET_CURRENT_CONTEXT(ctx);
1333          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1334          goto cleanUp;
1335       }
1336       upscale_teximage2d(width, height, newWidth, newHeight,
1337                          comps, (const GLubyte *) source,
1338                          srcRowStride, (GLubyte *) newSource);
1339       source = newSource;
1340       width = newWidth;
1341       height = newHeight;
1342       srcRowStride = comps * newWidth;
1343    }
1344
1345    data = (const GLubyte *) source;
1346    destRowStride = (destRowStride - width * 2) / 4;
1347    for (y = 0; y < height; y += 4) {
1348       GLuint offs = 0 + (y + 0) * srcRowStride;
1349       for (x = 0; x < width; x += 8) {
1350          const GLubyte *lines[4];
1351          lines[0] = &data[offs];
1352          lines[1] = lines[0] + srcRowStride;
1353          lines[2] = lines[1] + srcRowStride;
1354          lines[3] = lines[2] + srcRowStride;
1355          offs += 8 * comps;
1356          fxt1_quantize(encoded, lines, comps);
1357          /* 128 bits per 8x4 block */
1358          encoded += 4;
1359       }
1360       encoded += destRowStride;
1361    }
1362
1363  cleanUp:
1364    free(newSource);
1365 }
1366
1367
1368 /***************************************************************************\
1369  * FXT1 decoder
1370  *
1371  * The decoder is based on GL_3DFX_texture_compression_FXT1
1372  * specification and serves as a concept for the encoder.
1373 \***************************************************************************/
1374
1375
1376 /* lookup table for scaling 5 bit colors up to 8 bits */
1377 static const GLubyte _rgb_scale_5[] = {
1378    0,   8,   16,  25,  33,  41,  49,  58,
1379    66,  74,  82,  90,  99,  107, 115, 123,
1380    132, 140, 148, 156, 165, 173, 181, 189,
1381    197, 206, 214, 222, 230, 239, 247, 255
1382 };
1383
1384 /* lookup table for scaling 6 bit colors up to 8 bits */
1385 static const GLubyte _rgb_scale_6[] = {
1386    0,   4,   8,   12,  16,  20,  24,  28,
1387    32,  36,  40,  45,  49,  53,  57,  61,
1388    65,  69,  73,  77,  81,  85,  89,  93,
1389    97,  101, 105, 109, 113, 117, 121, 125,
1390    130, 134, 138, 142, 146, 150, 154, 158,
1391    162, 166, 170, 174, 178, 182, 186, 190,
1392    194, 198, 202, 206, 210, 215, 219, 223,
1393    227, 231, 235, 239, 243, 247, 251, 255
1394 };
1395
1396
1397 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1398 #define UP5(c) _rgb_scale_5[(c) & 31]
1399 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1400 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1401
1402
1403 static void
1404 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1405 {
1406    const GLuint *cc;
1407
1408    t *= 3;
1409    cc = (const GLuint *)(code + t / 8);
1410    t = (cc[0] >> (t & 7)) & 7;
1411
1412    if (t == 7) {
1413       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1414    } else {
1415       GLubyte r, g, b;
1416       cc = (const GLuint *)(code + 12);
1417       if (t == 0) {
1418          b = UP5(CC_SEL(cc, 0));
1419          g = UP5(CC_SEL(cc, 5));
1420          r = UP5(CC_SEL(cc, 10));
1421       } else if (t == 6) {
1422          b = UP5(CC_SEL(cc, 15));
1423          g = UP5(CC_SEL(cc, 20));
1424          r = UP5(CC_SEL(cc, 25));
1425       } else {
1426          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1427          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1428          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1429       }
1430       rgba[RCOMP] = r;
1431       rgba[GCOMP] = g;
1432       rgba[BCOMP] = b;
1433       rgba[ACOMP] = 255;
1434    }
1435 }
1436
1437
1438 static void
1439 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1440 {
1441    const GLuint *cc;
1442    GLuint kk;
1443
1444    cc = (const GLuint *)code;
1445    if (t & 16) {
1446       cc++;
1447       t &= 15;
1448    }
1449    t = (cc[0] >> (t * 2)) & 3;
1450
1451    t *= 15;
1452    cc = (const GLuint *)(code + 8 + t / 8);
1453    kk = cc[0] >> (t & 7);
1454    rgba[BCOMP] = UP5(kk);
1455    rgba[GCOMP] = UP5(kk >> 5);
1456    rgba[RCOMP] = UP5(kk >> 10);
1457    rgba[ACOMP] = 255;
1458 }
1459
1460
1461 static void
1462 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1463 {
1464    const GLuint *cc;
1465    GLuint col[2][3];
1466    GLint glsb, selb;
1467
1468    cc = (const GLuint *)code;
1469    if (t & 16) {
1470       t &= 15;
1471       t = (cc[1] >> (t * 2)) & 3;
1472       /* col 2 */
1473       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1474       col[0][GCOMP] = CC_SEL(cc, 99);
1475       col[0][RCOMP] = CC_SEL(cc, 104);
1476       /* col 3 */
1477       col[1][BCOMP] = CC_SEL(cc, 109);
1478       col[1][GCOMP] = CC_SEL(cc, 114);
1479       col[1][RCOMP] = CC_SEL(cc, 119);
1480       glsb = CC_SEL(cc, 126);
1481       selb = CC_SEL(cc, 33);
1482    } else {
1483       t = (cc[0] >> (t * 2)) & 3;
1484       /* col 0 */
1485       col[0][BCOMP] = CC_SEL(cc, 64);
1486       col[0][GCOMP] = CC_SEL(cc, 69);
1487       col[0][RCOMP] = CC_SEL(cc, 74);
1488       /* col 1 */
1489       col[1][BCOMP] = CC_SEL(cc, 79);
1490       col[1][GCOMP] = CC_SEL(cc, 84);
1491       col[1][RCOMP] = CC_SEL(cc, 89);
1492       glsb = CC_SEL(cc, 125);
1493       selb = CC_SEL(cc, 1);
1494    }
1495
1496    if (CC_SEL(cc, 124) & 1) {
1497       /* alpha[0] == 1 */
1498
1499       if (t == 3) {
1500          /* zero */
1501          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1502       } else {
1503          GLubyte r, g, b;
1504          if (t == 0) {
1505             b = UP5(col[0][BCOMP]);
1506             g = UP5(col[0][GCOMP]);
1507             r = UP5(col[0][RCOMP]);
1508          } else if (t == 2) {
1509             b = UP5(col[1][BCOMP]);
1510             g = UP6(col[1][GCOMP], glsb);
1511             r = UP5(col[1][RCOMP]);
1512          } else {
1513             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1514             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1515             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1516          }
1517          rgba[RCOMP] = r;
1518          rgba[GCOMP] = g;
1519          rgba[BCOMP] = b;
1520          rgba[ACOMP] = 255;
1521       }
1522    } else {
1523       /* alpha[0] == 0 */
1524       GLubyte r, g, b;
1525       if (t == 0) {
1526          b = UP5(col[0][BCOMP]);
1527          g = UP6(col[0][GCOMP], glsb ^ selb);
1528          r = UP5(col[0][RCOMP]);
1529       } else if (t == 3) {
1530          b = UP5(col[1][BCOMP]);
1531          g = UP6(col[1][GCOMP], glsb);
1532          r = UP5(col[1][RCOMP]);
1533       } else {
1534          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1535          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1536                         UP6(col[1][GCOMP], glsb));
1537          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1538       }
1539       rgba[RCOMP] = r;
1540       rgba[GCOMP] = g;
1541       rgba[BCOMP] = b;
1542       rgba[ACOMP] = 255;
1543    }
1544 }
1545
1546
1547 static void
1548 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1549 {
1550    const GLuint *cc;
1551    GLubyte r, g, b, a;
1552
1553    cc = (const GLuint *)code;
1554    if (CC_SEL(cc, 124) & 1) {
1555       /* lerp == 1 */
1556       GLuint col0[4];
1557
1558       if (t & 16) {
1559          t &= 15;
1560          t = (cc[1] >> (t * 2)) & 3;
1561          /* col 2 */
1562          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1563          col0[GCOMP] = CC_SEL(cc, 99);
1564          col0[RCOMP] = CC_SEL(cc, 104);
1565          col0[ACOMP] = CC_SEL(cc, 119);
1566       } else {
1567          t = (cc[0] >> (t * 2)) & 3;
1568          /* col 0 */
1569          col0[BCOMP] = CC_SEL(cc, 64);
1570          col0[GCOMP] = CC_SEL(cc, 69);
1571          col0[RCOMP] = CC_SEL(cc, 74);
1572          col0[ACOMP] = CC_SEL(cc, 109);
1573       }
1574
1575       if (t == 0) {
1576          b = UP5(col0[BCOMP]);
1577          g = UP5(col0[GCOMP]);
1578          r = UP5(col0[RCOMP]);
1579          a = UP5(col0[ACOMP]);
1580       } else if (t == 3) {
1581          b = UP5(CC_SEL(cc, 79));
1582          g = UP5(CC_SEL(cc, 84));
1583          r = UP5(CC_SEL(cc, 89));
1584          a = UP5(CC_SEL(cc, 114));
1585       } else {
1586          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1587          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1588          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1589          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1590       }
1591    } else {
1592       /* lerp == 0 */
1593
1594       if (t & 16) {
1595          cc++;
1596          t &= 15;
1597       }
1598       t = (cc[0] >> (t * 2)) & 3;
1599
1600       if (t == 3) {
1601          /* zero */
1602          r = g = b = a = 0;
1603       } else {
1604          GLuint kk;
1605          cc = (const GLuint *)code;
1606          a = UP5(cc[3] >> (t * 5 + 13));
1607          t *= 15;
1608          cc = (const GLuint *)(code + 8 + t / 8);
1609          kk = cc[0] >> (t & 7);
1610          b = UP5(kk);
1611          g = UP5(kk >> 5);
1612          r = UP5(kk >> 10);
1613       }
1614    }
1615    rgba[RCOMP] = r;
1616    rgba[GCOMP] = g;
1617    rgba[BCOMP] = b;
1618    rgba[ACOMP] = a;
1619 }
1620
1621
1622 static void
1623 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1624                GLint i, GLint j, GLubyte *rgba)
1625 {
1626    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1627       fxt1_decode_1HI,     /* cc-high   = "00?" */
1628       fxt1_decode_1HI,     /* cc-high   = "00?" */
1629       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1630       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1631       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1632       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1633       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1634       fxt1_decode_1MIXED   /* mixed     = "1??" */
1635    };
1636
1637    const GLubyte *code = (const GLubyte *)texture +
1638                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1639    GLint mode = CC_SEL(code, 125);
1640    GLint t = i & 7;
1641
1642    if (t & 4) {
1643       t += 12;
1644    }
1645    t += (j & 3) * 4;
1646
1647    decode_1[mode](code, t, rgba);
1648 }
1649
1650
1651 #endif /* FEATURE_texture_fxt1 */