src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42 #include "swrast/s_context.h"
  43
  44
  45 static void
  46 fxt1_encode (GLuint width, GLuint height, GLint comps,
  47              const void *source, GLint srcRowStride,
  48              void *dest, GLint destRowStride);
  49
  50 static void
  51 fxt1_decode_1 (const void *texture, GLint stride,
  52                GLint i, GLint j, GLubyte *rgba);
  53
  54
  55 /**
  56  * Store user's image in rgb_fxt1 format.
  57  */
  58 GLboolean
  59 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  60 {
  61    const GLubyte *pixels;
  62    GLint srcRowStride;
  63    GLubyte *dst;
  64    const GLubyte *tempImage = NULL;
  65
  66    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  67
  68    if (srcFormat != GL_RGB ||
  69        srcType != GL_UNSIGNED_BYTE ||
  70        ctx->_ImageTransferState ||
  71        srcPacking->RowLength != srcWidth ||
  72        srcPacking->SwapBytes) {
  73       /* convert image to RGB/GLubyte */
  74       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  75                                              baseInternalFormat,
  76                                              _mesa_get_format_base_format(dstFormat),
  77                                              srcWidth, srcHeight, srcDepth,
  78                                              srcFormat, srcType, srcAddr,
  79                                              srcPacking);
  80       if (!tempImage)
  81          return GL_FALSE; /* out of memory */
  82       pixels = tempImage;
  83       srcRowStride = 3 * srcWidth;
  84       srcFormat = GL_RGB;
  85    }
  86    else {
  87       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  88                                      srcFormat, srcType, 0, 0);
  89
  90       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  91                                             srcType) / sizeof(GLubyte);
  92    }
  93
  94    dst = dstSlices[0];
  95
  96    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
  97                dst, dstRowStride);
  98
  99    free((void*) tempImage);
 100
 101    return GL_TRUE;
 102 }
 103
 104
 105 /**
 106  * Store user's image in rgba_fxt1 format.
 107  */
 108 GLboolean
 109 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 110 {
 111    const GLubyte *pixels;
 112    GLint srcRowStride;
 113    GLubyte *dst;
 114    const GLubyte *tempImage = NULL;
 115
 116    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 117
 118    if (srcFormat != GL_RGBA ||
 119        srcType != GL_UNSIGNED_BYTE ||
 120        ctx->_ImageTransferState ||
 121        srcPacking->SwapBytes) {
 122       /* convert image to RGBA/GLubyte */
 123       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 124                                              baseInternalFormat,
 125                                              _mesa_get_format_base_format(dstFormat),
 126                                              srcWidth, srcHeight, srcDepth,
 127                                              srcFormat, srcType, srcAddr,
 128                                              srcPacking);
 129       if (!tempImage)
 130          return GL_FALSE; /* out of memory */
 131       pixels = tempImage;
 132       srcRowStride = 4 * srcWidth;
 133       srcFormat = GL_RGBA;
 134    }
 135    else {
 136       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 137                                      srcFormat, srcType, 0, 0);
 138
 139       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 140                                             srcType) / sizeof(GLubyte);
 141    }
 142
 143    dst = dstSlices[0];
 144
 145    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 146                dst, dstRowStride);
 147
 148    free((void*) tempImage);
 149
 150    return GL_TRUE;
 151 }
 152
 153
 154 void
 155 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
 156                                   GLint i, GLint j, GLint k, GLfloat *texel )
 157 {
 158    /* just sample as GLubyte and convert to float here */
 159    GLubyte rgba[4];
 160    (void) k;
 161    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
 162    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 163    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 164    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 165    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
 166 }
 167
 168
 169 void
 170 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
 171                                  GLint i, GLint j, GLint k, GLfloat *texel )
 172 {
 173    /* just sample as GLubyte and convert to float here */
 174    GLubyte rgba[4];
 175    (void) k;
 176    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
 177    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
 178    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
 179    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
 180    texel[ACOMP] = 1.0F;
 181 }
 182
 183
 184
 185 /***************************************************************************\
 186  * FXT1 encoder
 187  *
 188  * The encoder was built by reversing the decoder,
 189  * and is vaguely based on Texus2 by 3dfx. Note that this code
 190  * is merely a proof of concept, since it is highly UNoptimized;
 191  * moreover, it is sub-optimal due to initial conditions passed
 192  * to Lloyd's algorithm (the interpolation modes are even worse).
 193 \***************************************************************************/
 194
 195
 196 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 197 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 198 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 199 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 200 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 201 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 202 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 203 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 204
 205
 206 /*
 207  * Define a 64-bit unsigned integer type and macros
 208  */
 209 #if 1
 210
 211 #define FX64_NATIVE 1
 212
 213 typedef uint64_t Fx64;
 214
 215 #define FX64_MOV32(a, b) a = b
 216 #define FX64_OR32(a, b)  a |= b
 217 #define FX64_SHL(a, c)   a <<= c
 218
 219 #else
 220
 221 #define FX64_NATIVE 0
 222
 223 typedef struct {
 224    GLuint lo, hi;
 225 } Fx64;
 226
 227 #define FX64_MOV32(a, b) a.lo = b
 228 #define FX64_OR32(a, b)  a.lo |= b
 229
 230 #define FX64_SHL(a, c)                                 \
 231    do {                                                \
 232        if ((c) >= 32) {                                \
 233           a.hi = a.lo << ((c) - 32);                   \
 234           a.lo = 0;                                    \
 235        } else {                                        \
 236           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 237           a.lo <<= (c);                                \
 238        }                                               \
 239    } while (0)
 240
 241 #endif
 242
 243
 244 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 245 #define SAFECDOT 1 /* for paranoids */
 246
 247 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 248    do {                                  \
 249       /* compute interpolation vector */ \
 250       GLfloat d2 = 0.0F;                 \
 251       GLfloat rd2;                       \
 252                                          \
 253       for (i = 0; i < NC; i++) {         \
 254          IV[i] = (V1[i] - V0[i]) * F(i); \
 255          d2 += IV[i] * IV[i];            \
 256       }                                  \
 257       rd2 = (GLfloat)NV / d2;            \
 258       B = 0;                             \
 259       for (i = 0; i < NC; i++) {         \
 260          IV[i] *= F(i);                  \
 261          B -= IV[i] * V0[i];             \
 262          IV[i] *= rd2;                   \
 263       }                                  \
 264       B = B * rd2 + 0.5f;                \
 265    } while (0)
 266
 267 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 268    do {                                  \
 269       GLfloat dot = 0.0F;                \
 270       for (i = 0; i < NC; i++) {         \
 271          dot += V[i] * IV[i];            \
 272       }                                  \
 273       TEXEL = (GLint)(dot + B);          \
 274       if (SAFECDOT) {                    \
 275          if (TEXEL < 0) {                \
 276             TEXEL = 0;                   \
 277          } else if (TEXEL > NV) {        \
 278             TEXEL = NV;                  \
 279          }                               \
 280       }                                  \
 281    } while (0)
 282
 283
 284 static GLint
 285 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 286               GLubyte input[MAX_COMP], GLint nc)
 287 {
 288    GLint i, j, best = -1;
 289    GLfloat err = 1e9; /* big enough */
 290
 291    for (j = 0; j < nv; j++) {
 292       GLfloat e = 0.0F;
 293       for (i = 0; i < nc; i++) {
 294          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 295       }
 296       if (e < err) {
 297          err = e;
 298          best = j;
 299       }
 300    }
 301
 302    return best;
 303 }
 304
 305
 306 static GLint
 307 fxt1_worst (GLfloat vec[MAX_COMP],
 308             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 309 {
 310    GLint i, k, worst = -1;
 311    GLfloat err = -1.0F; /* small enough */
 312
 313    for (k = 0; k < n; k++) {
 314       GLfloat e = 0.0F;
 315       for (i = 0; i < nc; i++) {
 316          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 317       }
 318       if (e > err) {
 319          err = e;
 320          worst = k;
 321       }
 322    }
 323
 324    return worst;
 325 }
 326
 327
 328 static GLint
 329 fxt1_variance (GLdouble variance[MAX_COMP],
 330                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 331 {
 332    GLint i, k, best = 0;
 333    GLint sx, sx2;
 334    GLdouble var, maxvar = -1; /* small enough */
 335    GLdouble teenth = 1.0 / n;
 336
 337    for (i = 0; i < nc; i++) {
 338       sx = sx2 = 0;
 339       for (k = 0; k < n; k++) {
 340          GLint t = input[k][i];
 341          sx += t;
 342          sx2 += t * t;
 343       }
 344       var = sx2 * teenth - sx * sx * teenth * teenth;
 345       if (maxvar < var) {
 346          maxvar = var;
 347          best = i;
 348       }
 349       if (variance) {
 350          variance[i] = var;
 351       }
 352    }
 353
 354    return best;
 355 }
 356
 357
 358 static GLint
 359 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 360              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 361 {
 362 #if 0
 363    /* Choose colors from a grid.
 364     */
 365    GLint i, j;
 366
 367    for (j = 0; j < nv; j++) {
 368       GLint m = j * (n - 1) / (nv - 1);
 369       for (i = 0; i < nc; i++) {
 370          vec[j][i] = input[m][i];
 371       }
 372    }
 373 #else
 374    /* Our solution here is to find the darkest and brightest colors in
 375     * the 8x4 tile and use those as the two representative colors.
 376     * There are probably better algorithms to use (histogram-based).
 377     */
 378    GLint i, j, k;
 379    GLint minSum = 2000; /* big enough */
 380    GLint maxSum = -1; /* small enough */
 381    GLint minCol = 0; /* phoudoin: silent compiler! */
 382    GLint maxCol = 0; /* phoudoin: silent compiler! */
 383
 384    struct {
 385       GLint flag;
 386       GLint key;
 387       GLint freq;
 388       GLint idx;
 389    } hist[N_TEXELS];
 390    GLint lenh = 0;
 391
 392    memset(hist, 0, sizeof(hist));
 393
 394    for (k = 0; k < n; k++) {
 395       GLint l;
 396       GLint key = 0;
 397       GLint sum = 0;
 398       for (i = 0; i < nc; i++) {
 399          key <<= 8;
 400          key |= input[k][i];
 401          sum += input[k][i];
 402       }
 403       for (l = 0; l < n; l++) {
 404          if (!hist[l].flag) {
 405             /* alloc new slot */
 406             hist[l].flag = !0;
 407             hist[l].key = key;
 408             hist[l].freq = 1;
 409             hist[l].idx = k;
 410             lenh = l + 1;
 411             break;
 412          } else if (hist[l].key == key) {
 413             hist[l].freq++;
 414             break;
 415          }
 416       }
 417       if (minSum > sum) {
 418          minSum = sum;
 419          minCol = k;
 420       }
 421       if (maxSum < sum) {
 422          maxSum = sum;
 423          maxCol = k;
 424       }
 425    }
 426
 427    if (lenh <= nv) {
 428       for (j = 0; j < lenh; j++) {
 429          for (i = 0; i < nc; i++) {
 430             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 431          }
 432       }
 433       for (; j < nv; j++) {
 434          for (i = 0; i < nc; i++) {
 435             vec[j][i] = vec[0][i];
 436          }
 437       }
 438       return 0;
 439    }
 440
 441    for (j = 0; j < nv; j++) {
 442       for (i = 0; i < nc; i++) {
 443          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 444       }
 445    }
 446 #endif
 447
 448    return !0;
 449 }
 450
 451
 452 static GLint
 453 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 454             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 455 {
 456    /* Use the generalized lloyd's algorithm for VQ:
 457     *     find 4 color vectors.
 458     *
 459     *     for each sample color
 460     *         sort to nearest vector.
 461     *
 462     *     replace each vector with the centroid of its matching colors.
 463     *
 464     *     repeat until RMS doesn't improve.
 465     *
 466     *     if a color vector has no samples, or becomes the same as another
 467     *     vector, replace it with the color which is farthest from a sample.
 468     *
 469     * vec[][MAX_COMP]           initial vectors and resulting colors
 470     * nv                        number of resulting colors required
 471     * input[N_TEXELS][MAX_COMP] input texels
 472     * nc                        number of components in input / vec
 473     * n                         number of input samples
 474     */
 475
 476    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 477    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 478    GLfloat error, lasterror = 1e9;
 479
 480    GLint i, j, k, rep;
 481
 482    /* the quantizer */
 483    for (rep = 0; rep < LL_N_REP; rep++) {
 484       /* reset sums & counters */
 485       for (j = 0; j < nv; j++) {
 486          for (i = 0; i < nc; i++) {
 487             sum[j][i] = 0;
 488          }
 489          cnt[j] = 0;
 490       }
 491       error = 0;
 492
 493       /* scan whole block */
 494       for (k = 0; k < n; k++) {
 495 #if 1
 496          GLint best = -1;
 497          GLfloat err = 1e9; /* big enough */
 498          /* determine best vector */
 499          for (j = 0; j < nv; j++) {
 500             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 501                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 502                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 503             if (nc == 4) {
 504                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 505             }
 506             if (e < err) {
 507                err = e;
 508                best = j;
 509             }
 510          }
 511 #else
 512          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 513 #endif
 514          assert(best >= 0);
 515          /* add in closest color */
 516          for (i = 0; i < nc; i++) {
 517             sum[best][i] += input[k][i];
 518          }
 519          /* mark this vector as used */
 520          cnt[best]++;
 521          /* accumulate error */
 522          error += err;
 523       }
 524
 525       /* check RMS */
 526       if ((error < LL_RMS_E) ||
 527           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 528          return !0; /* good match */
 529       }
 530       lasterror = error;
 531
 532       /* move each vector to the barycenter of its closest colors */
 533       for (j = 0; j < nv; j++) {
 534          if (cnt[j]) {
 535             GLfloat div = 1.0F / cnt[j];
 536             for (i = 0; i < nc; i++) {
 537                vec[j][i] = div * sum[j][i];
 538             }
 539          } else {
 540             /* this vec has no samples or is identical with a previous vec */
 541             GLint worst = fxt1_worst(vec[j], input, nc, n);
 542             for (i = 0; i < nc; i++) {
 543                vec[j][i] = input[worst][i];
 544             }
 545          }
 546       }
 547    }
 548
 549    return 0; /* could not converge fast enough */
 550 }
 551
 552
 553 static void
 554 fxt1_quantize_CHROMA (GLuint *cc,
 555                       GLubyte input[N_TEXELS][MAX_COMP])
 556 {
 557    const GLint n_vect = 4; /* 4 base vectors to find */
 558    const GLint n_comp = 3; /* 3 components: R, G, B */
 559    GLfloat vec[MAX_VECT][MAX_COMP];
 560    GLint i, j, k;
 561    Fx64 hi; /* high quadword */
 562    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 563
 564    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 565       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 566    }
 567
 568    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 569    for (j = n_vect - 1; j >= 0; j--) {
 570       for (i = 0; i < n_comp; i++) {
 571          /* add in colors */
 572          FX64_SHL(hi, 5);
 573          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 574       }
 575    }
 576    ((Fx64 *)cc)[1] = hi;
 577
 578    lohi = lolo = 0;
 579    /* right microtile */
 580    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 581       lohi <<= 2;
 582       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 583    }
 584    /* left microtile */
 585    for (; k >= 0; k--) {
 586       lolo <<= 2;
 587       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 588    }
 589    cc[1] = lohi;
 590    cc[0] = lolo;
 591 }
 592
 593
 594 static void
 595 fxt1_quantize_ALPHA0 (GLuint *cc,
 596                       GLubyte input[N_TEXELS][MAX_COMP],
 597                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 598 {
 599    const GLint n_vect = 3; /* 3 base vectors to find */
 600    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 601    GLfloat vec[MAX_VECT][MAX_COMP];
 602    GLint i, j, k;
 603    Fx64 hi; /* high quadword */
 604    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 605
 606    /* the last vector indicates zero */
 607    for (i = 0; i < n_comp; i++) {
 608       vec[n_vect][i] = 0;
 609    }
 610
 611    /* the first n texels in reord are guaranteed to be non-zero */
 612    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 613       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 614    }
 615
 616    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 617    for (j = n_vect - 1; j >= 0; j--) {
 618       /* add in alphas */
 619       FX64_SHL(hi, 5);
 620       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 621    }
 622    for (j = n_vect - 1; j >= 0; j--) {
 623       for (i = 0; i < n_comp - 1; i++) {
 624          /* add in colors */
 625          FX64_SHL(hi, 5);
 626          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 627       }
 628    }
 629    ((Fx64 *)cc)[1] = hi;
 630
 631    lohi = lolo = 0;
 632    /* right microtile */
 633    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 634       lohi <<= 2;
 635       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 636    }
 637    /* left microtile */
 638    for (; k >= 0; k--) {
 639       lolo <<= 2;
 640       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 641    }
 642    cc[1] = lohi;
 643    cc[0] = lolo;
 644 }
 645
 646
 647 static void
 648 fxt1_quantize_ALPHA1 (GLuint *cc,
 649                       GLubyte input[N_TEXELS][MAX_COMP])
 650 {
 651    const GLint n_vect = 3; /* highest vector number in each microtile */
 652    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 653    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 654    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 655    GLint i, j, k;
 656    Fx64 hi; /* high quadword */
 657    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 658
 659    GLint minSum;
 660    GLint maxSum;
 661    GLint minColL = 0, maxColL = 0;
 662    GLint minColR = 0, maxColR = 0;
 663    GLint sumL = 0, sumR = 0;
 664    GLint nn_comp;
 665    /* Our solution here is to find the darkest and brightest colors in
 666     * the 4x4 tile and use those as the two representative colors.
 667     * There are probably better algorithms to use (histogram-based).
 668     */
 669    nn_comp = n_comp;
 670    while ((minColL == maxColL) && nn_comp) {
 671        minSum = 2000; /* big enough */
 672        maxSum = -1; /* small enough */
 673        for (k = 0; k < N_TEXELS / 2; k++) {
 674            GLint sum = 0;
 675            for (i = 0; i < nn_comp; i++) {
 676                sum += input[k][i];
 677            }
 678            if (minSum > sum) {
 679                minSum = sum;
 680                minColL = k;
 681            }
 682            if (maxSum < sum) {
 683                maxSum = sum;
 684                maxColL = k;
 685            }
 686            sumL += sum;
 687        }
 688
 689        nn_comp--;
 690    }
 691
 692    nn_comp = n_comp;
 693    while ((minColR == maxColR) && nn_comp) {
 694        minSum = 2000; /* big enough */
 695        maxSum = -1; /* small enough */
 696        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 697            GLint sum = 0;
 698            for (i = 0; i < nn_comp; i++) {
 699                sum += input[k][i];
 700            }
 701            if (minSum > sum) {
 702                minSum = sum;
 703                minColR = k;
 704            }
 705            if (maxSum < sum) {
 706                maxSum = sum;
 707                maxColR = k;
 708            }
 709            sumR += sum;
 710        }
 711
 712        nn_comp--;
 713    }
 714
 715    /* choose the common vector (yuck!) */
 716    {
 717       GLint j1, j2;
 718       GLint v1 = 0, v2 = 0;
 719       GLfloat err = 1e9; /* big enough */
 720       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 721       for (i = 0; i < n_comp; i++) {
 722          tv[0][i] = input[minColL][i];
 723          tv[1][i] = input[maxColL][i];
 724          tv[2][i] = input[minColR][i];
 725          tv[3][i] = input[maxColR][i];
 726       }
 727       for (j1 = 0; j1 < 2; j1++) {
 728          for (j2 = 2; j2 < 4; j2++) {
 729             GLfloat e = 0.0F;
 730             for (i = 0; i < n_comp; i++) {
 731                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 732             }
 733             if (e < err) {
 734                err = e;
 735                v1 = j1;
 736                v2 = j2;
 737             }
 738          }
 739       }
 740       for (i = 0; i < n_comp; i++) {
 741          vec[0][i] = tv[1 - v1][i];
 742          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 743          vec[2][i] = tv[5 - v2][i];
 744       }
 745    }
 746
 747    /* left microtile */
 748    cc[0] = 0;
 749    if (minColL != maxColL) {
 750       /* compute interpolation vector */
 751       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 752
 753       /* add in texels */
 754       lolo = 0;
 755       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 756          GLint texel;
 757          /* interpolate color */
 758          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 759          /* add in texel */
 760          lolo <<= 2;
 761          lolo |= texel;
 762       }
 763
 764       cc[0] = lolo;
 765    }
 766
 767    /* right microtile */
 768    cc[1] = 0;
 769    if (minColR != maxColR) {
 770       /* compute interpolation vector */
 771       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 772
 773       /* add in texels */
 774       lohi = 0;
 775       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 776          GLint texel;
 777          /* interpolate color */
 778          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 779          /* add in texel */
 780          lohi <<= 2;
 781          lohi |= texel;
 782       }
 783
 784       cc[1] = lohi;
 785    }
 786
 787    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 788    for (j = n_vect - 1; j >= 0; j--) {
 789       /* add in alphas */
 790       FX64_SHL(hi, 5);
 791       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 792    }
 793    for (j = n_vect - 1; j >= 0; j--) {
 794       for (i = 0; i < n_comp - 1; i++) {
 795          /* add in colors */
 796          FX64_SHL(hi, 5);
 797          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 798       }
 799    }
 800    ((Fx64 *)cc)[1] = hi;
 801 }
 802
 803
 804 static void
 805 fxt1_quantize_HI (GLuint *cc,
 806                   GLubyte input[N_TEXELS][MAX_COMP],
 807                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 808 {
 809    const GLint n_vect = 6; /* highest vector number */
 810    const GLint n_comp = 3; /* 3 components: R, G, B */
 811    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 812    GLfloat iv[MAX_COMP];   /* interpolation vector */
 813    GLint i, k;
 814    GLuint hihi; /* high quadword: hi dword */
 815
 816    GLint minSum = 2000; /* big enough */
 817    GLint maxSum = -1; /* small enough */
 818    GLint minCol = 0; /* phoudoin: silent compiler! */
 819    GLint maxCol = 0; /* phoudoin: silent compiler! */
 820
 821    /* Our solution here is to find the darkest and brightest colors in
 822     * the 8x4 tile and use those as the two representative colors.
 823     * There are probably better algorithms to use (histogram-based).
 824     */
 825    for (k = 0; k < n; k++) {
 826       GLint sum = 0;
 827       for (i = 0; i < n_comp; i++) {
 828          sum += reord[k][i];
 829       }
 830       if (minSum > sum) {
 831          minSum = sum;
 832          minCol = k;
 833       }
 834       if (maxSum < sum) {
 835          maxSum = sum;
 836          maxCol = k;
 837       }
 838    }
 839
 840    hihi = 0; /* cc-hi = "00" */
 841    for (i = 0; i < n_comp; i++) {
 842       /* add in colors */
 843       hihi <<= 5;
 844       hihi |= reord[maxCol][i] >> 3;
 845    }
 846    for (i = 0; i < n_comp; i++) {
 847       /* add in colors */
 848       hihi <<= 5;
 849       hihi |= reord[minCol][i] >> 3;
 850    }
 851    cc[3] = hihi;
 852    cc[0] = cc[1] = cc[2] = 0;
 853
 854    /* compute interpolation vector */
 855    if (minCol != maxCol) {
 856       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 857    }
 858
 859    /* add in texels */
 860    for (k = N_TEXELS - 1; k >= 0; k--) {
 861       GLint t = k * 3;
 862       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 863       GLint texel = n_vect + 1; /* transparent black */
 864
 865       if (!ISTBLACK(input[k])) {
 866          if (minCol != maxCol) {
 867             /* interpolate color */
 868             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 869             /* add in texel */
 870             kk[0] |= texel << (t & 7);
 871          }
 872       } else {
 873          /* add in texel */
 874          kk[0] |= texel << (t & 7);
 875       }
 876    }
 877 }
 878
 879
 880 static void
 881 fxt1_quantize_MIXED1 (GLuint *cc,
 882                       GLubyte input[N_TEXELS][MAX_COMP])
 883 {
 884    const GLint n_vect = 2; /* highest vector number in each microtile */
 885    const GLint n_comp = 3; /* 3 components: R, G, B */
 886    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 887    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 888    GLint i, j, k;
 889    Fx64 hi; /* high quadword */
 890    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 891
 892    GLint minSum;
 893    GLint maxSum;
 894    GLint minColL = 0, maxColL = -1;
 895    GLint minColR = 0, maxColR = -1;
 896
 897    /* Our solution here is to find the darkest and brightest colors in
 898     * the 4x4 tile and use those as the two representative colors.
 899     * There are probably better algorithms to use (histogram-based).
 900     */
 901    minSum = 2000; /* big enough */
 902    maxSum = -1; /* small enough */
 903    for (k = 0; k < N_TEXELS / 2; k++) {
 904       if (!ISTBLACK(input[k])) {
 905          GLint sum = 0;
 906          for (i = 0; i < n_comp; i++) {
 907             sum += input[k][i];
 908          }
 909          if (minSum > sum) {
 910             minSum = sum;
 911             minColL = k;
 912          }
 913          if (maxSum < sum) {
 914             maxSum = sum;
 915             maxColL = k;
 916          }
 917       }
 918    }
 919    minSum = 2000; /* big enough */
 920    maxSum = -1; /* small enough */
 921    for (; k < N_TEXELS; k++) {
 922       if (!ISTBLACK(input[k])) {
 923          GLint sum = 0;
 924          for (i = 0; i < n_comp; i++) {
 925             sum += input[k][i];
 926          }
 927          if (minSum > sum) {
 928             minSum = sum;
 929             minColR = k;
 930          }
 931          if (maxSum < sum) {
 932             maxSum = sum;
 933             maxColR = k;
 934          }
 935       }
 936    }
 937
 938    /* left microtile */
 939    if (maxColL == -1) {
 940       /* all transparent black */
 941       cc[0] = ~0u;
 942       for (i = 0; i < n_comp; i++) {
 943          vec[0][i] = 0;
 944          vec[1][i] = 0;
 945       }
 946    } else {
 947       cc[0] = 0;
 948       for (i = 0; i < n_comp; i++) {
 949          vec[0][i] = input[minColL][i];
 950          vec[1][i] = input[maxColL][i];
 951       }
 952       if (minColL != maxColL) {
 953          /* compute interpolation vector */
 954          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 955
 956          /* add in texels */
 957          lolo = 0;
 958          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 959             GLint texel = n_vect + 1; /* transparent black */
 960             if (!ISTBLACK(input[k])) {
 961                /* interpolate color */
 962                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 963             }
 964             /* add in texel */
 965             lolo <<= 2;
 966             lolo |= texel;
 967          }
 968          cc[0] = lolo;
 969       }
 970    }
 971
 972    /* right microtile */
 973    if (maxColR == -1) {
 974       /* all transparent black */
 975       cc[1] = ~0u;
 976       for (i = 0; i < n_comp; i++) {
 977          vec[2][i] = 0;
 978          vec[3][i] = 0;
 979       }
 980    } else {
 981       cc[1] = 0;
 982       for (i = 0; i < n_comp; i++) {
 983          vec[2][i] = input[minColR][i];
 984          vec[3][i] = input[maxColR][i];
 985       }
 986       if (minColR != maxColR) {
 987          /* compute interpolation vector */
 988          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 989
 990          /* add in texels */
 991          lohi = 0;
 992          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 993             GLint texel = n_vect + 1; /* transparent black */
 994             if (!ISTBLACK(input[k])) {
 995                /* interpolate color */
 996                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 997             }
 998             /* add in texel */
 999             lohi <<= 2;
1000             lohi |= texel;
1001          }
1002          cc[1] = lohi;
1003       }
1004    }
1005
1006    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1007    for (j = 2 * 2 - 1; j >= 0; j--) {
1008       for (i = 0; i < n_comp; i++) {
1009          /* add in colors */
1010          FX64_SHL(hi, 5);
1011          FX64_OR32(hi, vec[j][i] >> 3);
1012       }
1013    }
1014    ((Fx64 *)cc)[1] = hi;
1015 }
1016
1017
1018 static void
1019 fxt1_quantize_MIXED0 (GLuint *cc,
1020                       GLubyte input[N_TEXELS][MAX_COMP])
1021 {
1022    const GLint n_vect = 3; /* highest vector number in each microtile */
1023    const GLint n_comp = 3; /* 3 components: R, G, B */
1024    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1025    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1026    GLint i, j, k;
1027    Fx64 hi; /* high quadword */
1028    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1029
1030    GLint minColL = 0, maxColL = 0;
1031    GLint minColR = 0, maxColR = 0;
1032 #if 0
1033    GLint minSum;
1034    GLint maxSum;
1035
1036    /* Our solution here is to find the darkest and brightest colors in
1037     * the 4x4 tile and use those as the two representative colors.
1038     * There are probably better algorithms to use (histogram-based).
1039     */
1040    minSum = 2000; /* big enough */
1041    maxSum = -1; /* small enough */
1042    for (k = 0; k < N_TEXELS / 2; k++) {
1043       GLint sum = 0;
1044       for (i = 0; i < n_comp; i++) {
1045          sum += input[k][i];
1046       }
1047       if (minSum > sum) {
1048          minSum = sum;
1049          minColL = k;
1050       }
1051       if (maxSum < sum) {
1052          maxSum = sum;
1053          maxColL = k;
1054       }
1055    }
1056    minSum = 2000; /* big enough */
1057    maxSum = -1; /* small enough */
1058    for (; k < N_TEXELS; k++) {
1059       GLint sum = 0;
1060       for (i = 0; i < n_comp; i++) {
1061          sum += input[k][i];
1062       }
1063       if (minSum > sum) {
1064          minSum = sum;
1065          minColR = k;
1066       }
1067       if (maxSum < sum) {
1068          maxSum = sum;
1069          maxColR = k;
1070       }
1071    }
1072 #else
1073    GLint minVal;
1074    GLint maxVal;
1075    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1076    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1077
1078    /* Scan the channel with max variance for lo & hi
1079     * and use those as the two representative colors.
1080     */
1081    minVal = 2000; /* big enough */
1082    maxVal = -1; /* small enough */
1083    for (k = 0; k < N_TEXELS / 2; k++) {
1084       GLint t = input[k][maxVarL];
1085       if (minVal > t) {
1086          minVal = t;
1087          minColL = k;
1088       }
1089       if (maxVal < t) {
1090          maxVal = t;
1091          maxColL = k;
1092       }
1093    }
1094    minVal = 2000; /* big enough */
1095    maxVal = -1; /* small enough */
1096    for (; k < N_TEXELS; k++) {
1097       GLint t = input[k][maxVarR];
1098       if (minVal > t) {
1099          minVal = t;
1100          minColR = k;
1101       }
1102       if (maxVal < t) {
1103          maxVal = t;
1104          maxColR = k;
1105       }
1106    }
1107 #endif
1108
1109    /* left microtile */
1110    cc[0] = 0;
1111    for (i = 0; i < n_comp; i++) {
1112       vec[0][i] = input[minColL][i];
1113       vec[1][i] = input[maxColL][i];
1114    }
1115    if (minColL != maxColL) {
1116       /* compute interpolation vector */
1117       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1118
1119       /* add in texels */
1120       lolo = 0;
1121       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1122          GLint texel;
1123          /* interpolate color */
1124          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1125          /* add in texel */
1126          lolo <<= 2;
1127          lolo |= texel;
1128       }
1129
1130       /* funky encoding for LSB of green */
1131       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1132          for (i = 0; i < n_comp; i++) {
1133             vec[1][i] = input[minColL][i];
1134             vec[0][i] = input[maxColL][i];
1135          }
1136          lolo = ~lolo;
1137       }
1138
1139       cc[0] = lolo;
1140    }
1141
1142    /* right microtile */
1143    cc[1] = 0;
1144    for (i = 0; i < n_comp; i++) {
1145       vec[2][i] = input[minColR][i];
1146       vec[3][i] = input[maxColR][i];
1147    }
1148    if (minColR != maxColR) {
1149       /* compute interpolation vector */
1150       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1151
1152       /* add in texels */
1153       lohi = 0;
1154       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1155          GLint texel;
1156          /* interpolate color */
1157          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1158          /* add in texel */
1159          lohi <<= 2;
1160          lohi |= texel;
1161       }
1162
1163       /* funky encoding for LSB of green */
1164       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1165          for (i = 0; i < n_comp; i++) {
1166             vec[3][i] = input[minColR][i];
1167             vec[2][i] = input[maxColR][i];
1168          }
1169          lohi = ~lohi;
1170       }
1171
1172       cc[1] = lohi;
1173    }
1174
1175    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1176    for (j = 2 * 2 - 1; j >= 0; j--) {
1177       for (i = 0; i < n_comp; i++) {
1178          /* add in colors */
1179          FX64_SHL(hi, 5);
1180          FX64_OR32(hi, vec[j][i] >> 3);
1181       }
1182    }
1183    ((Fx64 *)cc)[1] = hi;
1184 }
1185
1186
1187 static void
1188 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1189 {
1190    GLint trualpha;
1191    GLubyte reord[N_TEXELS][MAX_COMP];
1192
1193    GLubyte input[N_TEXELS][MAX_COMP];
1194    GLint i, k, l;
1195
1196    if (comps == 3) {
1197       /* make the whole block opaque */
1198       memset(input, -1, sizeof(input));
1199    }
1200
1201    /* 8 texels each line */
1202    for (l = 0; l < 4; l++) {
1203       for (k = 0; k < 4; k++) {
1204          for (i = 0; i < comps; i++) {
1205             input[k + l * 4][i] = *lines[l]++;
1206          }
1207       }
1208       for (; k < 8; k++) {
1209          for (i = 0; i < comps; i++) {
1210             input[k + l * 4 + 12][i] = *lines[l]++;
1211          }
1212       }
1213    }
1214
1215    /* block layout:
1216     * 00, 01, 02, 03, 08, 09, 0a, 0b
1217     * 10, 11, 12, 13, 18, 19, 1a, 1b
1218     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1219     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1220     */
1221
1222    /* [dBorca]
1223     * stupidity flows forth from this
1224     */
1225    l = N_TEXELS;
1226    trualpha = 0;
1227    if (comps == 4) {
1228       /* skip all transparent black texels */
1229       l = 0;
1230       for (k = 0; k < N_TEXELS; k++) {
1231          /* test all components against 0 */
1232          if (!ISTBLACK(input[k])) {
1233             /* texel is not transparent black */
1234             COPY_4UBV(reord[l], input[k]);
1235             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1236                /* non-opaque texel */
1237                trualpha = !0;
1238             }
1239             l++;
1240          }
1241       }
1242    }
1243
1244 #if 0
1245    if (trualpha) {
1246       fxt1_quantize_ALPHA0(cc, input, reord, l);
1247    } else if (l == 0) {
1248       cc[0] = cc[1] = cc[2] = -1;
1249       cc[3] = 0;
1250    } else if (l < N_TEXELS) {
1251       fxt1_quantize_HI(cc, input, reord, l);
1252    } else {
1253       fxt1_quantize_CHROMA(cc, input);
1254    }
1255    (void)fxt1_quantize_ALPHA1;
1256    (void)fxt1_quantize_MIXED1;
1257    (void)fxt1_quantize_MIXED0;
1258 #else
1259    if (trualpha) {
1260       fxt1_quantize_ALPHA1(cc, input);
1261    } else if (l == 0) {
1262       cc[0] = cc[1] = cc[2] = ~0u;
1263       cc[3] = 0;
1264    } else if (l < N_TEXELS) {
1265       fxt1_quantize_MIXED1(cc, input);
1266    } else {
1267       fxt1_quantize_MIXED0(cc, input);
1268    }
1269    (void)fxt1_quantize_ALPHA0;
1270    (void)fxt1_quantize_HI;
1271    (void)fxt1_quantize_CHROMA;
1272 #endif
1273 }
1274
1275
1276
1277 /**
1278  * Upscale an image by replication, not (typical) stretching.
1279  * We use this when the image width or height is less than a
1280  * certain size (4, 8) and we need to upscale an image.
1281  */
1282 static void
1283 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1284                    GLsizei outWidth, GLsizei outHeight,
1285                    GLint comps, const GLubyte *src, GLint srcRowStride,
1286                    GLubyte *dest )
1287 {
1288    GLint i, j, k;
1289
1290    ASSERT(outWidth >= inWidth);
1291    ASSERT(outHeight >= inHeight);
1292 #if 0
1293    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1294    ASSERT((outWidth & 3) == 0);
1295    ASSERT((outHeight & 3) == 0);
1296 #endif
1297
1298    for (i = 0; i < outHeight; i++) {
1299       const GLint ii = i % inHeight;
1300       for (j = 0; j < outWidth; j++) {
1301          const GLint jj = j % inWidth;
1302          for (k = 0; k < comps; k++) {
1303             dest[(i * outWidth + j) * comps + k]
1304                = src[ii * srcRowStride + jj * comps + k];
1305          }
1306       }
1307    }
1308 }
1309
1310
1311 static void
1312 fxt1_encode (GLuint width, GLuint height, GLint comps,
1313              const void *source, GLint srcRowStride,
1314              void *dest, GLint destRowStride)
1315 {
1316    GLuint x, y;
1317    const GLubyte *data;
1318    GLuint *encoded = (GLuint *)dest;
1319    void *newSource = NULL;
1320
1321    assert(comps == 3 || comps == 4);
1322
1323    /* Replicate image if width is not M8 or height is not M4 */
1324    if ((width & 7) | (height & 3)) {
1325       GLint newWidth = (width + 7) & ~7;
1326       GLint newHeight = (height + 3) & ~3;
1327       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1328       if (!newSource) {
1329          GET_CURRENT_CONTEXT(ctx);
1330          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1331          goto cleanUp;
1332       }
1333       upscale_teximage2d(width, height, newWidth, newHeight,
1334                          comps, (const GLubyte *) source,
1335                          srcRowStride, (GLubyte *) newSource);
1336       source = newSource;
1337       width = newWidth;
1338       height = newHeight;
1339       srcRowStride = comps * newWidth;
1340    }
1341
1342    data = (const GLubyte *) source;
1343    destRowStride = (destRowStride - width * 2) / 4;
1344    for (y = 0; y < height; y += 4) {
1345       GLuint offs = 0 + (y + 0) * srcRowStride;
1346       for (x = 0; x < width; x += 8) {
1347          const GLubyte *lines[4];
1348          lines[0] = &data[offs];
1349          lines[1] = lines[0] + srcRowStride;
1350          lines[2] = lines[1] + srcRowStride;
1351          lines[3] = lines[2] + srcRowStride;
1352          offs += 8 * comps;
1353          fxt1_quantize(encoded, lines, comps);
1354          /* 128 bits per 8x4 block */
1355          encoded += 4;
1356       }
1357       encoded += destRowStride;
1358    }
1359
1360  cleanUp:
1361    free(newSource);
1362 }
1363
1364
1365 /***************************************************************************\
1366  * FXT1 decoder
1367  *
1368  * The decoder is based on GL_3DFX_texture_compression_FXT1
1369  * specification and serves as a concept for the encoder.
1370 \***************************************************************************/
1371
1372
1373 /* lookup table for scaling 5 bit colors up to 8 bits */
1374 static const GLubyte _rgb_scale_5[] = {
1375    0,   8,   16,  25,  33,  41,  49,  58,
1376    66,  74,  82,  90,  99,  107, 115, 123,
1377    132, 140, 148, 156, 165, 173, 181, 189,
1378    197, 206, 214, 222, 230, 239, 247, 255
1379 };
1380
1381 /* lookup table for scaling 6 bit colors up to 8 bits */
1382 static const GLubyte _rgb_scale_6[] = {
1383    0,   4,   8,   12,  16,  20,  24,  28,
1384    32,  36,  40,  45,  49,  53,  57,  61,
1385    65,  69,  73,  77,  81,  85,  89,  93,
1386    97,  101, 105, 109, 113, 117, 121, 125,
1387    130, 134, 138, 142, 146, 150, 154, 158,
1388    162, 166, 170, 174, 178, 182, 186, 190,
1389    194, 198, 202, 206, 210, 215, 219, 223,
1390    227, 231, 235, 239, 243, 247, 251, 255
1391 };
1392
1393
1394 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1395 #define UP5(c) _rgb_scale_5[(c) & 31]
1396 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1397 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1398
1399
1400 static void
1401 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1402 {
1403    const GLuint *cc;
1404
1405    t *= 3;
1406    cc = (const GLuint *)(code + t / 8);
1407    t = (cc[0] >> (t & 7)) & 7;
1408
1409    if (t == 7) {
1410       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1411    } else {
1412       GLubyte r, g, b;
1413       cc = (const GLuint *)(code + 12);
1414       if (t == 0) {
1415          b = UP5(CC_SEL(cc, 0));
1416          g = UP5(CC_SEL(cc, 5));
1417          r = UP5(CC_SEL(cc, 10));
1418       } else if (t == 6) {
1419          b = UP5(CC_SEL(cc, 15));
1420          g = UP5(CC_SEL(cc, 20));
1421          r = UP5(CC_SEL(cc, 25));
1422       } else {
1423          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1424          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1425          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1426       }
1427       rgba[RCOMP] = r;
1428       rgba[GCOMP] = g;
1429       rgba[BCOMP] = b;
1430       rgba[ACOMP] = 255;
1431    }
1432 }
1433
1434
1435 static void
1436 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1437 {
1438    const GLuint *cc;
1439    GLuint kk;
1440
1441    cc = (const GLuint *)code;
1442    if (t & 16) {
1443       cc++;
1444       t &= 15;
1445    }
1446    t = (cc[0] >> (t * 2)) & 3;
1447
1448    t *= 15;
1449    cc = (const GLuint *)(code + 8 + t / 8);
1450    kk = cc[0] >> (t & 7);
1451    rgba[BCOMP] = UP5(kk);
1452    rgba[GCOMP] = UP5(kk >> 5);
1453    rgba[RCOMP] = UP5(kk >> 10);
1454    rgba[ACOMP] = 255;
1455 }
1456
1457
1458 static void
1459 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1460 {
1461    const GLuint *cc;
1462    GLuint col[2][3];
1463    GLint glsb, selb;
1464
1465    cc = (const GLuint *)code;
1466    if (t & 16) {
1467       t &= 15;
1468       t = (cc[1] >> (t * 2)) & 3;
1469       /* col 2 */
1470       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1471       col[0][GCOMP] = CC_SEL(cc, 99);
1472       col[0][RCOMP] = CC_SEL(cc, 104);
1473       /* col 3 */
1474       col[1][BCOMP] = CC_SEL(cc, 109);
1475       col[1][GCOMP] = CC_SEL(cc, 114);
1476       col[1][RCOMP] = CC_SEL(cc, 119);
1477       glsb = CC_SEL(cc, 126);
1478       selb = CC_SEL(cc, 33);
1479    } else {
1480       t = (cc[0] >> (t * 2)) & 3;
1481       /* col 0 */
1482       col[0][BCOMP] = CC_SEL(cc, 64);
1483       col[0][GCOMP] = CC_SEL(cc, 69);
1484       col[0][RCOMP] = CC_SEL(cc, 74);
1485       /* col 1 */
1486       col[1][BCOMP] = CC_SEL(cc, 79);
1487       col[1][GCOMP] = CC_SEL(cc, 84);
1488       col[1][RCOMP] = CC_SEL(cc, 89);
1489       glsb = CC_SEL(cc, 125);
1490       selb = CC_SEL(cc, 1);
1491    }
1492
1493    if (CC_SEL(cc, 124) & 1) {
1494       /* alpha[0] == 1 */
1495
1496       if (t == 3) {
1497          /* zero */
1498          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1499       } else {
1500          GLubyte r, g, b;
1501          if (t == 0) {
1502             b = UP5(col[0][BCOMP]);
1503             g = UP5(col[0][GCOMP]);
1504             r = UP5(col[0][RCOMP]);
1505          } else if (t == 2) {
1506             b = UP5(col[1][BCOMP]);
1507             g = UP6(col[1][GCOMP], glsb);
1508             r = UP5(col[1][RCOMP]);
1509          } else {
1510             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1511             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1512             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1513          }
1514          rgba[RCOMP] = r;
1515          rgba[GCOMP] = g;
1516          rgba[BCOMP] = b;
1517          rgba[ACOMP] = 255;
1518       }
1519    } else {
1520       /* alpha[0] == 0 */
1521       GLubyte r, g, b;
1522       if (t == 0) {
1523          b = UP5(col[0][BCOMP]);
1524          g = UP6(col[0][GCOMP], glsb ^ selb);
1525          r = UP5(col[0][RCOMP]);
1526       } else if (t == 3) {
1527          b = UP5(col[1][BCOMP]);
1528          g = UP6(col[1][GCOMP], glsb);
1529          r = UP5(col[1][RCOMP]);
1530       } else {
1531          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1532          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1533                         UP6(col[1][GCOMP], glsb));
1534          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1535       }
1536       rgba[RCOMP] = r;
1537       rgba[GCOMP] = g;
1538       rgba[BCOMP] = b;
1539       rgba[ACOMP] = 255;
1540    }
1541 }
1542
1543
1544 static void
1545 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1546 {
1547    const GLuint *cc;
1548    GLubyte r, g, b, a;
1549
1550    cc = (const GLuint *)code;
1551    if (CC_SEL(cc, 124) & 1) {
1552       /* lerp == 1 */
1553       GLuint col0[4];
1554
1555       if (t & 16) {
1556          t &= 15;
1557          t = (cc[1] >> (t * 2)) & 3;
1558          /* col 2 */
1559          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1560          col0[GCOMP] = CC_SEL(cc, 99);
1561          col0[RCOMP] = CC_SEL(cc, 104);
1562          col0[ACOMP] = CC_SEL(cc, 119);
1563       } else {
1564          t = (cc[0] >> (t * 2)) & 3;
1565          /* col 0 */
1566          col0[BCOMP] = CC_SEL(cc, 64);
1567          col0[GCOMP] = CC_SEL(cc, 69);
1568          col0[RCOMP] = CC_SEL(cc, 74);
1569          col0[ACOMP] = CC_SEL(cc, 109);
1570       }
1571
1572       if (t == 0) {
1573          b = UP5(col0[BCOMP]);
1574          g = UP5(col0[GCOMP]);
1575          r = UP5(col0[RCOMP]);
1576          a = UP5(col0[ACOMP]);
1577       } else if (t == 3) {
1578          b = UP5(CC_SEL(cc, 79));
1579          g = UP5(CC_SEL(cc, 84));
1580          r = UP5(CC_SEL(cc, 89));
1581          a = UP5(CC_SEL(cc, 114));
1582       } else {
1583          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1584          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1585          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1586          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1587       }
1588    } else {
1589       /* lerp == 0 */
1590
1591       if (t & 16) {
1592          cc++;
1593          t &= 15;
1594       }
1595       t = (cc[0] >> (t * 2)) & 3;
1596
1597       if (t == 3) {
1598          /* zero */
1599          r = g = b = a = 0;
1600       } else {
1601          GLuint kk;
1602          cc = (const GLuint *)code;
1603          a = UP5(cc[3] >> (t * 5 + 13));
1604          t *= 15;
1605          cc = (const GLuint *)(code + 8 + t / 8);
1606          kk = cc[0] >> (t & 7);
1607          b = UP5(kk);
1608          g = UP5(kk >> 5);
1609          r = UP5(kk >> 10);
1610       }
1611    }
1612    rgba[RCOMP] = r;
1613    rgba[GCOMP] = g;
1614    rgba[BCOMP] = b;
1615    rgba[ACOMP] = a;
1616 }
1617
1618
1619 static void
1620 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1621                GLint i, GLint j, GLubyte *rgba)
1622 {
1623    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1624       fxt1_decode_1HI,     /* cc-high   = "00?" */
1625       fxt1_decode_1HI,     /* cc-high   = "00?" */
1626       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1627       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1628       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1629       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1630       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1631       fxt1_decode_1MIXED   /* mixed     = "1??" */
1632    };
1633
1634    const GLubyte *code = (const GLubyte *)texture +
1635                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1636    GLint mode = CC_SEL(code, 125);
1637    GLint t = i & 7;
1638
1639    if (t & 4) {
1640       t += 12;
1641    }
1642    t += (j & 3) * 4;
1643
1644    decode_1[mode](code, t, rgba);
1645 }