src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42
  43
  44 #if FEATURE_texture_fxt1
  45
  46
  47 static void
  48 fxt1_encode (GLuint width, GLuint height, GLint comps,
  49              const void *source, GLint srcRowStride,
  50              void *dest, GLint destRowStride);
  51
  52 void
  53 fxt1_decode_1 (const void *texture, GLint stride,
  54                GLint i, GLint j, GLchan *rgba);
  55
  56
  57 /**
  58  * Store user's image in rgb_fxt1 format.
  59  */
  60 GLboolean
  61 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  62 {
  63    const GLchan *pixels;
  64    GLint srcRowStride;
  65    GLubyte *dst;
  66    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  67    const GLchan *tempImage = NULL;
  68
  69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  70    ASSERT(dstXoffset % 8 == 0);
  71    ASSERT(dstYoffset % 4 == 0);
  72    ASSERT(dstZoffset     == 0);
  73    (void) dstZoffset;
  74    (void) dstImageOffsets;
  75
  76    if (srcFormat != GL_RGB ||
  77        srcType != CHAN_TYPE ||
  78        ctx->_ImageTransferState ||
  79        srcPacking->SwapBytes) {
  80       /* convert image to RGB/GLchan */
  81       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  82                                              baseInternalFormat,
  83                                              _mesa_get_format_base_format(dstFormat),
  84                                              srcWidth, srcHeight, srcDepth,
  85                                              srcFormat, srcType, srcAddr,
  86                                              srcPacking);
  87       if (!tempImage)
  88          return GL_FALSE; /* out of memory */
  89       pixels = tempImage;
  90       srcRowStride = 3 * srcWidth;
  91       srcFormat = GL_RGB;
  92    }
  93    else {
  94       pixels = (const GLchan *) srcAddr;
  95       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  96                                             srcType) / sizeof(GLchan);
  97    }
  98
  99    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 100                                         dstFormat,
 101                                         texWidth, (GLubyte *) dstAddr);
 102
 103    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 104                dst, dstRowStride);
 105
 106    if (tempImage)
 107       free((void*) tempImage);
 108
 109    return GL_TRUE;
 110 }
 111
 112
 113 /**
 114  * Store user's image in rgba_fxt1 format.
 115  */
 116 GLboolean
 117 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 118 {
 119    const GLchan *pixels;
 120    GLint srcRowStride;
 121    GLubyte *dst;
 122    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 123    const GLchan *tempImage = NULL;
 124
 125    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 126    ASSERT(dstXoffset % 8 == 0);
 127    ASSERT(dstYoffset % 4 == 0);
 128    ASSERT(dstZoffset     == 0);
 129    (void) dstZoffset;
 130    (void) dstImageOffsets;
 131
 132    if (srcFormat != GL_RGBA ||
 133        srcType != CHAN_TYPE ||
 134        ctx->_ImageTransferState ||
 135        srcPacking->SwapBytes) {
 136       /* convert image to RGBA/GLchan */
 137       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 138                                              baseInternalFormat,
 139                                              _mesa_get_format_base_format(dstFormat),
 140                                              srcWidth, srcHeight, srcDepth,
 141                                              srcFormat, srcType, srcAddr,
 142                                              srcPacking);
 143       if (!tempImage)
 144          return GL_FALSE; /* out of memory */
 145       pixels = tempImage;
 146       srcRowStride = 4 * srcWidth;
 147       srcFormat = GL_RGBA;
 148    }
 149    else {
 150       pixels = (const GLchan *) srcAddr;
 151       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 152                                             srcType) / sizeof(GLchan);
 153    }
 154
 155    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 156                                         dstFormat,
 157                                         texWidth, (GLubyte *) dstAddr);
 158
 159    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 160                dst, dstRowStride);
 161
 162    if (tempImage)
 163       free((void*) tempImage);
 164
 165    return GL_TRUE;
 166 }
 167
 168
 169 void
 170 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 171                                   GLint i, GLint j, GLint k, GLfloat *texel )
 172 {
 173    /* just sample as GLchan and convert to float here */
 174    GLchan rgba[4];
 175    (void) k;
 176    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 177    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 178    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 179    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 180    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 181 }
 182
 183
 184 void
 185 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 186                                  GLint i, GLint j, GLint k, GLfloat *texel )
 187 {
 188    /* just sample as GLchan and convert to float here */
 189    GLchan rgba[4];
 190    (void) k;
 191    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 192    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 193    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 194    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 195    texel[ACOMP] = 1.0F;
 196 }
 197
 198
 199
 200 /***************************************************************************\
 201  * FXT1 encoder
 202  *
 203  * The encoder was built by reversing the decoder,
 204  * and is vaguely based on Texus2 by 3dfx. Note that this code
 205  * is merely a proof of concept, since it is highly UNoptimized;
 206  * moreover, it is sub-optimal due to initial conditions passed
 207  * to Lloyd's algorithm (the interpolation modes are even worse).
 208 \***************************************************************************/
 209
 210
 211 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 212 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 213 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 214 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 215 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 216 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 217 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 218 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 219
 220
 221 /*
 222  * Define a 64-bit unsigned integer type and macros
 223  */
 224 #if 1
 225
 226 #define FX64_NATIVE 1
 227
 228 typedef uint64_t Fx64;
 229
 230 #define FX64_MOV32(a, b) a = b
 231 #define FX64_OR32(a, b)  a |= b
 232 #define FX64_SHL(a, c)   a <<= c
 233
 234 #else
 235
 236 #define FX64_NATIVE 0
 237
 238 typedef struct {
 239    GLuint lo, hi;
 240 } Fx64;
 241
 242 #define FX64_MOV32(a, b) a.lo = b
 243 #define FX64_OR32(a, b)  a.lo |= b
 244
 245 #define FX64_SHL(a, c)                                 \
 246    do {                                                \
 247        if ((c) >= 32) {                                \
 248           a.hi = a.lo << ((c) - 32);                   \
 249           a.lo = 0;                                    \
 250        } else {                                        \
 251           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 252           a.lo <<= (c);                                \
 253        }                                               \
 254    } while (0)
 255
 256 #endif
 257
 258
 259 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 260 #define SAFECDOT 1 /* for paranoids */
 261
 262 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 263    do {                                  \
 264       /* compute interpolation vector */ \
 265       GLfloat d2 = 0.0F;                 \
 266       GLfloat rd2;                       \
 267                                          \
 268       for (i = 0; i < NC; i++) {         \
 269          IV[i] = (V1[i] - V0[i]) * F(i); \
 270          d2 += IV[i] * IV[i];            \
 271       }                                  \
 272       rd2 = (GLfloat)NV / d2;            \
 273       B = 0;                             \
 274       for (i = 0; i < NC; i++) {         \
 275          IV[i] *= F(i);                  \
 276          B -= IV[i] * V0[i];             \
 277          IV[i] *= rd2;                   \
 278       }                                  \
 279       B = B * rd2 + 0.5f;                \
 280    } while (0)
 281
 282 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 283    do {                                  \
 284       GLfloat dot = 0.0F;                \
 285       for (i = 0; i < NC; i++) {         \
 286          dot += V[i] * IV[i];            \
 287       }                                  \
 288       TEXEL = (GLint)(dot + B);          \
 289       if (SAFECDOT) {                    \
 290          if (TEXEL < 0) {                \
 291             TEXEL = 0;                   \
 292          } else if (TEXEL > NV) {        \
 293             TEXEL = NV;                  \
 294          }                               \
 295       }                                  \
 296    } while (0)
 297
 298
 299 static GLint
 300 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 301               GLubyte input[MAX_COMP], GLint nc)
 302 {
 303    GLint i, j, best = -1;
 304    GLfloat err = 1e9; /* big enough */
 305
 306    for (j = 0; j < nv; j++) {
 307       GLfloat e = 0.0F;
 308       for (i = 0; i < nc; i++) {
 309          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 310       }
 311       if (e < err) {
 312          err = e;
 313          best = j;
 314       }
 315    }
 316
 317    return best;
 318 }
 319
 320
 321 static GLint
 322 fxt1_worst (GLfloat vec[MAX_COMP],
 323             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 324 {
 325    GLint i, k, worst = -1;
 326    GLfloat err = -1.0F; /* small enough */
 327
 328    for (k = 0; k < n; k++) {
 329       GLfloat e = 0.0F;
 330       for (i = 0; i < nc; i++) {
 331          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 332       }
 333       if (e > err) {
 334          err = e;
 335          worst = k;
 336       }
 337    }
 338
 339    return worst;
 340 }
 341
 342
 343 static GLint
 344 fxt1_variance (GLdouble variance[MAX_COMP],
 345                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 346 {
 347    GLint i, k, best = 0;
 348    GLint sx, sx2;
 349    GLdouble var, maxvar = -1; /* small enough */
 350    GLdouble teenth = 1.0 / n;
 351
 352    for (i = 0; i < nc; i++) {
 353       sx = sx2 = 0;
 354       for (k = 0; k < n; k++) {
 355          GLint t = input[k][i];
 356          sx += t;
 357          sx2 += t * t;
 358       }
 359       var = sx2 * teenth - sx * sx * teenth * teenth;
 360       if (maxvar < var) {
 361          maxvar = var;
 362          best = i;
 363       }
 364       if (variance) {
 365          variance[i] = var;
 366       }
 367    }
 368
 369    return best;
 370 }
 371
 372
 373 static GLint
 374 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 375              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 376 {
 377 #if 0
 378    /* Choose colors from a grid.
 379     */
 380    GLint i, j;
 381
 382    for (j = 0; j < nv; j++) {
 383       GLint m = j * (n - 1) / (nv - 1);
 384       for (i = 0; i < nc; i++) {
 385          vec[j][i] = input[m][i];
 386       }
 387    }
 388 #else
 389    /* Our solution here is to find the darkest and brightest colors in
 390     * the 8x4 tile and use those as the two representative colors.
 391     * There are probably better algorithms to use (histogram-based).
 392     */
 393    GLint i, j, k;
 394    GLint minSum = 2000; /* big enough */
 395    GLint maxSum = -1; /* small enough */
 396    GLint minCol = 0; /* phoudoin: silent compiler! */
 397    GLint maxCol = 0; /* phoudoin: silent compiler! */
 398
 399    struct {
 400       GLint flag;
 401       GLint key;
 402       GLint freq;
 403       GLint idx;
 404    } hist[N_TEXELS];
 405    GLint lenh = 0;
 406
 407    memset(hist, 0, sizeof(hist));
 408
 409    for (k = 0; k < n; k++) {
 410       GLint l;
 411       GLint key = 0;
 412       GLint sum = 0;
 413       for (i = 0; i < nc; i++) {
 414          key <<= 8;
 415          key |= input[k][i];
 416          sum += input[k][i];
 417       }
 418       for (l = 0; l < n; l++) {
 419          if (!hist[l].flag) {
 420             /* alloc new slot */
 421             hist[l].flag = !0;
 422             hist[l].key = key;
 423             hist[l].freq = 1;
 424             hist[l].idx = k;
 425             lenh = l + 1;
 426             break;
 427          } else if (hist[l].key == key) {
 428             hist[l].freq++;
 429             break;
 430          }
 431       }
 432       if (minSum > sum) {
 433          minSum = sum;
 434          minCol = k;
 435       }
 436       if (maxSum < sum) {
 437          maxSum = sum;
 438          maxCol = k;
 439       }
 440    }
 441
 442    if (lenh <= nv) {
 443       for (j = 0; j < lenh; j++) {
 444          for (i = 0; i < nc; i++) {
 445             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 446          }
 447       }
 448       for (; j < nv; j++) {
 449          for (i = 0; i < nc; i++) {
 450             vec[j][i] = vec[0][i];
 451          }
 452       }
 453       return 0;
 454    }
 455
 456    for (j = 0; j < nv; j++) {
 457       for (i = 0; i < nc; i++) {
 458          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 459       }
 460    }
 461 #endif
 462
 463    return !0;
 464 }
 465
 466
 467 static GLint
 468 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 469             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 470 {
 471    /* Use the generalized lloyd's algorithm for VQ:
 472     *     find 4 color vectors.
 473     *
 474     *     for each sample color
 475     *         sort to nearest vector.
 476     *
 477     *     replace each vector with the centroid of its matching colors.
 478     *
 479     *     repeat until RMS doesn't improve.
 480     *
 481     *     if a color vector has no samples, or becomes the same as another
 482     *     vector, replace it with the color which is farthest from a sample.
 483     *
 484     * vec[][MAX_COMP]           initial vectors and resulting colors
 485     * nv                        number of resulting colors required
 486     * input[N_TEXELS][MAX_COMP] input texels
 487     * nc                        number of components in input / vec
 488     * n                         number of input samples
 489     */
 490
 491    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 492    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 493    GLfloat error, lasterror = 1e9;
 494
 495    GLint i, j, k, rep;
 496
 497    /* the quantizer */
 498    for (rep = 0; rep < LL_N_REP; rep++) {
 499       /* reset sums & counters */
 500       for (j = 0; j < nv; j++) {
 501          for (i = 0; i < nc; i++) {
 502             sum[j][i] = 0;
 503          }
 504          cnt[j] = 0;
 505       }
 506       error = 0;
 507
 508       /* scan whole block */
 509       for (k = 0; k < n; k++) {
 510 #if 1
 511          GLint best = -1;
 512          GLfloat err = 1e9; /* big enough */
 513          /* determine best vector */
 514          for (j = 0; j < nv; j++) {
 515             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 516                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 517                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 518             if (nc == 4) {
 519                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 520             }
 521             if (e < err) {
 522                err = e;
 523                best = j;
 524             }
 525          }
 526 #else
 527          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 528 #endif
 529          assert(best >= 0);
 530          /* add in closest color */
 531          for (i = 0; i < nc; i++) {
 532             sum[best][i] += input[k][i];
 533          }
 534          /* mark this vector as used */
 535          cnt[best]++;
 536          /* accumulate error */
 537          error += err;
 538       }
 539
 540       /* check RMS */
 541       if ((error < LL_RMS_E) ||
 542           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 543          return !0; /* good match */
 544       }
 545       lasterror = error;
 546
 547       /* move each vector to the barycenter of its closest colors */
 548       for (j = 0; j < nv; j++) {
 549          if (cnt[j]) {
 550             GLfloat div = 1.0F / cnt[j];
 551             for (i = 0; i < nc; i++) {
 552                vec[j][i] = div * sum[j][i];
 553             }
 554          } else {
 555             /* this vec has no samples or is identical with a previous vec */
 556             GLint worst = fxt1_worst(vec[j], input, nc, n);
 557             for (i = 0; i < nc; i++) {
 558                vec[j][i] = input[worst][i];
 559             }
 560          }
 561       }
 562    }
 563
 564    return 0; /* could not converge fast enough */
 565 }
 566
 567
 568 static void
 569 fxt1_quantize_CHROMA (GLuint *cc,
 570                       GLubyte input[N_TEXELS][MAX_COMP])
 571 {
 572    const GLint n_vect = 4; /* 4 base vectors to find */
 573    const GLint n_comp = 3; /* 3 components: R, G, B */
 574    GLfloat vec[MAX_VECT][MAX_COMP];
 575    GLint i, j, k;
 576    Fx64 hi; /* high quadword */
 577    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 578
 579    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 580       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 581    }
 582
 583    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 584    for (j = n_vect - 1; j >= 0; j--) {
 585       for (i = 0; i < n_comp; i++) {
 586          /* add in colors */
 587          FX64_SHL(hi, 5);
 588          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 589       }
 590    }
 591    ((Fx64 *)cc)[1] = hi;
 592
 593    lohi = lolo = 0;
 594    /* right microtile */
 595    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 596       lohi <<= 2;
 597       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 598    }
 599    /* left microtile */
 600    for (; k >= 0; k--) {
 601       lolo <<= 2;
 602       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 603    }
 604    cc[1] = lohi;
 605    cc[0] = lolo;
 606 }
 607
 608
 609 static void
 610 fxt1_quantize_ALPHA0 (GLuint *cc,
 611                       GLubyte input[N_TEXELS][MAX_COMP],
 612                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 613 {
 614    const GLint n_vect = 3; /* 3 base vectors to find */
 615    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 616    GLfloat vec[MAX_VECT][MAX_COMP];
 617    GLint i, j, k;
 618    Fx64 hi; /* high quadword */
 619    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 620
 621    /* the last vector indicates zero */
 622    for (i = 0; i < n_comp; i++) {
 623       vec[n_vect][i] = 0;
 624    }
 625
 626    /* the first n texels in reord are guaranteed to be non-zero */
 627    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 628       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 629    }
 630
 631    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 632    for (j = n_vect - 1; j >= 0; j--) {
 633       /* add in alphas */
 634       FX64_SHL(hi, 5);
 635       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 636    }
 637    for (j = n_vect - 1; j >= 0; j--) {
 638       for (i = 0; i < n_comp - 1; i++) {
 639          /* add in colors */
 640          FX64_SHL(hi, 5);
 641          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 642       }
 643    }
 644    ((Fx64 *)cc)[1] = hi;
 645
 646    lohi = lolo = 0;
 647    /* right microtile */
 648    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 649       lohi <<= 2;
 650       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 651    }
 652    /* left microtile */
 653    for (; k >= 0; k--) {
 654       lolo <<= 2;
 655       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 656    }
 657    cc[1] = lohi;
 658    cc[0] = lolo;
 659 }
 660
 661
 662 static void
 663 fxt1_quantize_ALPHA1 (GLuint *cc,
 664                       GLubyte input[N_TEXELS][MAX_COMP])
 665 {
 666    const GLint n_vect = 3; /* highest vector number in each microtile */
 667    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 668    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 669    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 670    GLint i, j, k;
 671    Fx64 hi; /* high quadword */
 672    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 673
 674    GLint minSum;
 675    GLint maxSum;
 676    GLint minColL = 0, maxColL = 0;
 677    GLint minColR = 0, maxColR = 0;
 678    GLint sumL = 0, sumR = 0;
 679    GLint nn_comp;
 680    /* Our solution here is to find the darkest and brightest colors in
 681     * the 4x4 tile and use those as the two representative colors.
 682     * There are probably better algorithms to use (histogram-based).
 683     */
 684    nn_comp = n_comp;
 685    while ((minColL == maxColL) && nn_comp) {
 686        minSum = 2000; /* big enough */
 687        maxSum = -1; /* small enough */
 688        for (k = 0; k < N_TEXELS / 2; k++) {
 689            GLint sum = 0;
 690            for (i = 0; i < nn_comp; i++) {
 691                sum += input[k][i];
 692            }
 693            if (minSum > sum) {
 694                minSum = sum;
 695                minColL = k;
 696            }
 697            if (maxSum < sum) {
 698                maxSum = sum;
 699                maxColL = k;
 700            }
 701            sumL += sum;
 702        }
 703
 704        nn_comp--;
 705    }
 706
 707    nn_comp = n_comp;
 708    while ((minColR == maxColR) && nn_comp) {
 709        minSum = 2000; /* big enough */
 710        maxSum = -1; /* small enough */
 711        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 712            GLint sum = 0;
 713            for (i = 0; i < nn_comp; i++) {
 714                sum += input[k][i];
 715            }
 716            if (minSum > sum) {
 717                minSum = sum;
 718                minColR = k;
 719            }
 720            if (maxSum < sum) {
 721                maxSum = sum;
 722                maxColR = k;
 723            }
 724            sumR += sum;
 725        }
 726
 727        nn_comp--;
 728    }
 729
 730    /* choose the common vector (yuck!) */
 731    {
 732       GLint j1, j2;
 733       GLint v1 = 0, v2 = 0;
 734       GLfloat err = 1e9; /* big enough */
 735       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 736       for (i = 0; i < n_comp; i++) {
 737          tv[0][i] = input[minColL][i];
 738          tv[1][i] = input[maxColL][i];
 739          tv[2][i] = input[minColR][i];
 740          tv[3][i] = input[maxColR][i];
 741       }
 742       for (j1 = 0; j1 < 2; j1++) {
 743          for (j2 = 2; j2 < 4; j2++) {
 744             GLfloat e = 0.0F;
 745             for (i = 0; i < n_comp; i++) {
 746                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 747             }
 748             if (e < err) {
 749                err = e;
 750                v1 = j1;
 751                v2 = j2;
 752             }
 753          }
 754       }
 755       for (i = 0; i < n_comp; i++) {
 756          vec[0][i] = tv[1 - v1][i];
 757          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 758          vec[2][i] = tv[5 - v2][i];
 759       }
 760    }
 761
 762    /* left microtile */
 763    cc[0] = 0;
 764    if (minColL != maxColL) {
 765       /* compute interpolation vector */
 766       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 767
 768       /* add in texels */
 769       lolo = 0;
 770       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 771          GLint texel;
 772          /* interpolate color */
 773          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 774          /* add in texel */
 775          lolo <<= 2;
 776          lolo |= texel;
 777       }
 778
 779       cc[0] = lolo;
 780    }
 781
 782    /* right microtile */
 783    cc[1] = 0;
 784    if (minColR != maxColR) {
 785       /* compute interpolation vector */
 786       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 787
 788       /* add in texels */
 789       lohi = 0;
 790       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 791          GLint texel;
 792          /* interpolate color */
 793          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 794          /* add in texel */
 795          lohi <<= 2;
 796          lohi |= texel;
 797       }
 798
 799       cc[1] = lohi;
 800    }
 801
 802    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 803    for (j = n_vect - 1; j >= 0; j--) {
 804       /* add in alphas */
 805       FX64_SHL(hi, 5);
 806       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 807    }
 808    for (j = n_vect - 1; j >= 0; j--) {
 809       for (i = 0; i < n_comp - 1; i++) {
 810          /* add in colors */
 811          FX64_SHL(hi, 5);
 812          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 813       }
 814    }
 815    ((Fx64 *)cc)[1] = hi;
 816 }
 817
 818
 819 static void
 820 fxt1_quantize_HI (GLuint *cc,
 821                   GLubyte input[N_TEXELS][MAX_COMP],
 822                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 823 {
 824    const GLint n_vect = 6; /* highest vector number */
 825    const GLint n_comp = 3; /* 3 components: R, G, B */
 826    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 827    GLfloat iv[MAX_COMP];   /* interpolation vector */
 828    GLint i, k;
 829    GLuint hihi; /* high quadword: hi dword */
 830
 831    GLint minSum = 2000; /* big enough */
 832    GLint maxSum = -1; /* small enough */
 833    GLint minCol = 0; /* phoudoin: silent compiler! */
 834    GLint maxCol = 0; /* phoudoin: silent compiler! */
 835
 836    /* Our solution here is to find the darkest and brightest colors in
 837     * the 8x4 tile and use those as the two representative colors.
 838     * There are probably better algorithms to use (histogram-based).
 839     */
 840    for (k = 0; k < n; k++) {
 841       GLint sum = 0;
 842       for (i = 0; i < n_comp; i++) {
 843          sum += reord[k][i];
 844       }
 845       if (minSum > sum) {
 846          minSum = sum;
 847          minCol = k;
 848       }
 849       if (maxSum < sum) {
 850          maxSum = sum;
 851          maxCol = k;
 852       }
 853    }
 854
 855    hihi = 0; /* cc-hi = "00" */
 856    for (i = 0; i < n_comp; i++) {
 857       /* add in colors */
 858       hihi <<= 5;
 859       hihi |= reord[maxCol][i] >> 3;
 860    }
 861    for (i = 0; i < n_comp; i++) {
 862       /* add in colors */
 863       hihi <<= 5;
 864       hihi |= reord[minCol][i] >> 3;
 865    }
 866    cc[3] = hihi;
 867    cc[0] = cc[1] = cc[2] = 0;
 868
 869    /* compute interpolation vector */
 870    if (minCol != maxCol) {
 871       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 872    }
 873
 874    /* add in texels */
 875    for (k = N_TEXELS - 1; k >= 0; k--) {
 876       GLint t = k * 3;
 877       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 878       GLint texel = n_vect + 1; /* transparent black */
 879
 880       if (!ISTBLACK(input[k])) {
 881          if (minCol != maxCol) {
 882             /* interpolate color */
 883             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 884             /* add in texel */
 885             kk[0] |= texel << (t & 7);
 886          }
 887       } else {
 888          /* add in texel */
 889          kk[0] |= texel << (t & 7);
 890       }
 891    }
 892 }
 893
 894
 895 static void
 896 fxt1_quantize_MIXED1 (GLuint *cc,
 897                       GLubyte input[N_TEXELS][MAX_COMP])
 898 {
 899    const GLint n_vect = 2; /* highest vector number in each microtile */
 900    const GLint n_comp = 3; /* 3 components: R, G, B */
 901    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 902    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 903    GLint i, j, k;
 904    Fx64 hi; /* high quadword */
 905    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 906
 907    GLint minSum;
 908    GLint maxSum;
 909    GLint minColL = 0, maxColL = -1;
 910    GLint minColR = 0, maxColR = -1;
 911
 912    /* Our solution here is to find the darkest and brightest colors in
 913     * the 4x4 tile and use those as the two representative colors.
 914     * There are probably better algorithms to use (histogram-based).
 915     */
 916    minSum = 2000; /* big enough */
 917    maxSum = -1; /* small enough */
 918    for (k = 0; k < N_TEXELS / 2; k++) {
 919       if (!ISTBLACK(input[k])) {
 920          GLint sum = 0;
 921          for (i = 0; i < n_comp; i++) {
 922             sum += input[k][i];
 923          }
 924          if (minSum > sum) {
 925             minSum = sum;
 926             minColL = k;
 927          }
 928          if (maxSum < sum) {
 929             maxSum = sum;
 930             maxColL = k;
 931          }
 932       }
 933    }
 934    minSum = 2000; /* big enough */
 935    maxSum = -1; /* small enough */
 936    for (; k < N_TEXELS; k++) {
 937       if (!ISTBLACK(input[k])) {
 938          GLint sum = 0;
 939          for (i = 0; i < n_comp; i++) {
 940             sum += input[k][i];
 941          }
 942          if (minSum > sum) {
 943             minSum = sum;
 944             minColR = k;
 945          }
 946          if (maxSum < sum) {
 947             maxSum = sum;
 948             maxColR = k;
 949          }
 950       }
 951    }
 952
 953    /* left microtile */
 954    if (maxColL == -1) {
 955       /* all transparent black */
 956       cc[0] = ~0u;
 957       for (i = 0; i < n_comp; i++) {
 958          vec[0][i] = 0;
 959          vec[1][i] = 0;
 960       }
 961    } else {
 962       cc[0] = 0;
 963       for (i = 0; i < n_comp; i++) {
 964          vec[0][i] = input[minColL][i];
 965          vec[1][i] = input[maxColL][i];
 966       }
 967       if (minColL != maxColL) {
 968          /* compute interpolation vector */
 969          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 970
 971          /* add in texels */
 972          lolo = 0;
 973          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 974             GLint texel = n_vect + 1; /* transparent black */
 975             if (!ISTBLACK(input[k])) {
 976                /* interpolate color */
 977                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 978             }
 979             /* add in texel */
 980             lolo <<= 2;
 981             lolo |= texel;
 982          }
 983          cc[0] = lolo;
 984       }
 985    }
 986
 987    /* right microtile */
 988    if (maxColR == -1) {
 989       /* all transparent black */
 990       cc[1] = ~0u;
 991       for (i = 0; i < n_comp; i++) {
 992          vec[2][i] = 0;
 993          vec[3][i] = 0;
 994       }
 995    } else {
 996       cc[1] = 0;
 997       for (i = 0; i < n_comp; i++) {
 998          vec[2][i] = input[minColR][i];
 999          vec[3][i] = input[maxColR][i];
1000       }
1001       if (minColR != maxColR) {
1002          /* compute interpolation vector */
1003          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1004
1005          /* add in texels */
1006          lohi = 0;
1007          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1008             GLint texel = n_vect + 1; /* transparent black */
1009             if (!ISTBLACK(input[k])) {
1010                /* interpolate color */
1011                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1012             }
1013             /* add in texel */
1014             lohi <<= 2;
1015             lohi |= texel;
1016          }
1017          cc[1] = lohi;
1018       }
1019    }
1020
1021    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1022    for (j = 2 * 2 - 1; j >= 0; j--) {
1023       for (i = 0; i < n_comp; i++) {
1024          /* add in colors */
1025          FX64_SHL(hi, 5);
1026          FX64_OR32(hi, vec[j][i] >> 3);
1027       }
1028    }
1029    ((Fx64 *)cc)[1] = hi;
1030 }
1031
1032
1033 static void
1034 fxt1_quantize_MIXED0 (GLuint *cc,
1035                       GLubyte input[N_TEXELS][MAX_COMP])
1036 {
1037    const GLint n_vect = 3; /* highest vector number in each microtile */
1038    const GLint n_comp = 3; /* 3 components: R, G, B */
1039    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1040    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1041    GLint i, j, k;
1042    Fx64 hi; /* high quadword */
1043    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1044
1045    GLint minColL = 0, maxColL = 0;
1046    GLint minColR = 0, maxColR = 0;
1047 #if 0
1048    GLint minSum;
1049    GLint maxSum;
1050
1051    /* Our solution here is to find the darkest and brightest colors in
1052     * the 4x4 tile and use those as the two representative colors.
1053     * There are probably better algorithms to use (histogram-based).
1054     */
1055    minSum = 2000; /* big enough */
1056    maxSum = -1; /* small enough */
1057    for (k = 0; k < N_TEXELS / 2; k++) {
1058       GLint sum = 0;
1059       for (i = 0; i < n_comp; i++) {
1060          sum += input[k][i];
1061       }
1062       if (minSum > sum) {
1063          minSum = sum;
1064          minColL = k;
1065       }
1066       if (maxSum < sum) {
1067          maxSum = sum;
1068          maxColL = k;
1069       }
1070    }
1071    minSum = 2000; /* big enough */
1072    maxSum = -1; /* small enough */
1073    for (; k < N_TEXELS; k++) {
1074       GLint sum = 0;
1075       for (i = 0; i < n_comp; i++) {
1076          sum += input[k][i];
1077       }
1078       if (minSum > sum) {
1079          minSum = sum;
1080          minColR = k;
1081       }
1082       if (maxSum < sum) {
1083          maxSum = sum;
1084          maxColR = k;
1085       }
1086    }
1087 #else
1088    GLint minVal;
1089    GLint maxVal;
1090    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1091    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1092
1093    /* Scan the channel with max variance for lo & hi
1094     * and use those as the two representative colors.
1095     */
1096    minVal = 2000; /* big enough */
1097    maxVal = -1; /* small enough */
1098    for (k = 0; k < N_TEXELS / 2; k++) {
1099       GLint t = input[k][maxVarL];
1100       if (minVal > t) {
1101          minVal = t;
1102          minColL = k;
1103       }
1104       if (maxVal < t) {
1105          maxVal = t;
1106          maxColL = k;
1107       }
1108    }
1109    minVal = 2000; /* big enough */
1110    maxVal = -1; /* small enough */
1111    for (; k < N_TEXELS; k++) {
1112       GLint t = input[k][maxVarR];
1113       if (minVal > t) {
1114          minVal = t;
1115          minColR = k;
1116       }
1117       if (maxVal < t) {
1118          maxVal = t;
1119          maxColR = k;
1120       }
1121    }
1122 #endif
1123
1124    /* left microtile */
1125    cc[0] = 0;
1126    for (i = 0; i < n_comp; i++) {
1127       vec[0][i] = input[minColL][i];
1128       vec[1][i] = input[maxColL][i];
1129    }
1130    if (minColL != maxColL) {
1131       /* compute interpolation vector */
1132       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1133
1134       /* add in texels */
1135       lolo = 0;
1136       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1137          GLint texel;
1138          /* interpolate color */
1139          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1140          /* add in texel */
1141          lolo <<= 2;
1142          lolo |= texel;
1143       }
1144
1145       /* funky encoding for LSB of green */
1146       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1147          for (i = 0; i < n_comp; i++) {
1148             vec[1][i] = input[minColL][i];
1149             vec[0][i] = input[maxColL][i];
1150          }
1151          lolo = ~lolo;
1152       }
1153
1154       cc[0] = lolo;
1155    }
1156
1157    /* right microtile */
1158    cc[1] = 0;
1159    for (i = 0; i < n_comp; i++) {
1160       vec[2][i] = input[minColR][i];
1161       vec[3][i] = input[maxColR][i];
1162    }
1163    if (minColR != maxColR) {
1164       /* compute interpolation vector */
1165       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1166
1167       /* add in texels */
1168       lohi = 0;
1169       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1170          GLint texel;
1171          /* interpolate color */
1172          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1173          /* add in texel */
1174          lohi <<= 2;
1175          lohi |= texel;
1176       }
1177
1178       /* funky encoding for LSB of green */
1179       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1180          for (i = 0; i < n_comp; i++) {
1181             vec[3][i] = input[minColR][i];
1182             vec[2][i] = input[maxColR][i];
1183          }
1184          lohi = ~lohi;
1185       }
1186
1187       cc[1] = lohi;
1188    }
1189
1190    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1191    for (j = 2 * 2 - 1; j >= 0; j--) {
1192       for (i = 0; i < n_comp; i++) {
1193          /* add in colors */
1194          FX64_SHL(hi, 5);
1195          FX64_OR32(hi, vec[j][i] >> 3);
1196       }
1197    }
1198    ((Fx64 *)cc)[1] = hi;
1199 }
1200
1201
1202 static void
1203 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1204 {
1205    GLint trualpha;
1206    GLubyte reord[N_TEXELS][MAX_COMP];
1207
1208    GLubyte input[N_TEXELS][MAX_COMP];
1209    GLint i, k, l;
1210
1211    if (comps == 3) {
1212       /* make the whole block opaque */
1213       memset(input, -1, sizeof(input));
1214    }
1215
1216    /* 8 texels each line */
1217    for (l = 0; l < 4; l++) {
1218       for (k = 0; k < 4; k++) {
1219          for (i = 0; i < comps; i++) {
1220             input[k + l * 4][i] = *lines[l]++;
1221          }
1222       }
1223       for (; k < 8; k++) {
1224          for (i = 0; i < comps; i++) {
1225             input[k + l * 4 + 12][i] = *lines[l]++;
1226          }
1227       }
1228    }
1229
1230    /* block layout:
1231     * 00, 01, 02, 03, 08, 09, 0a, 0b
1232     * 10, 11, 12, 13, 18, 19, 1a, 1b
1233     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1234     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1235     */
1236
1237    /* [dBorca]
1238     * stupidity flows forth from this
1239     */
1240    l = N_TEXELS;
1241    trualpha = 0;
1242    if (comps == 4) {
1243       /* skip all transparent black texels */
1244       l = 0;
1245       for (k = 0; k < N_TEXELS; k++) {
1246          /* test all components against 0 */
1247          if (!ISTBLACK(input[k])) {
1248             /* texel is not transparent black */
1249             COPY_4UBV(reord[l], input[k]);
1250             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1251                /* non-opaque texel */
1252                trualpha = !0;
1253             }
1254             l++;
1255          }
1256       }
1257    }
1258
1259 #if 0
1260    if (trualpha) {
1261       fxt1_quantize_ALPHA0(cc, input, reord, l);
1262    } else if (l == 0) {
1263       cc[0] = cc[1] = cc[2] = -1;
1264       cc[3] = 0;
1265    } else if (l < N_TEXELS) {
1266       fxt1_quantize_HI(cc, input, reord, l);
1267    } else {
1268       fxt1_quantize_CHROMA(cc, input);
1269    }
1270    (void)fxt1_quantize_ALPHA1;
1271    (void)fxt1_quantize_MIXED1;
1272    (void)fxt1_quantize_MIXED0;
1273 #else
1274    if (trualpha) {
1275       fxt1_quantize_ALPHA1(cc, input);
1276    } else if (l == 0) {
1277       cc[0] = cc[1] = cc[2] = ~0u;
1278       cc[3] = 0;
1279    } else if (l < N_TEXELS) {
1280       fxt1_quantize_MIXED1(cc, input);
1281    } else {
1282       fxt1_quantize_MIXED0(cc, input);
1283    }
1284    (void)fxt1_quantize_ALPHA0;
1285    (void)fxt1_quantize_HI;
1286    (void)fxt1_quantize_CHROMA;
1287 #endif
1288 }
1289
1290
1291 static void
1292 fxt1_encode (GLuint width, GLuint height, GLint comps,
1293              const void *source, GLint srcRowStride,
1294              void *dest, GLint destRowStride)
1295 {
1296    GLuint x, y;
1297    const GLubyte *data;
1298    GLuint *encoded = (GLuint *)dest;
1299    void *newSource = NULL;
1300
1301    assert(comps == 3 || comps == 4);
1302
1303    /* Replicate image if width is not M8 or height is not M4 */
1304    if ((width & 7) | (height & 3)) {
1305       GLint newWidth = (width + 7) & ~7;
1306       GLint newHeight = (height + 3) & ~3;
1307       newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1308       if (!newSource) {
1309          GET_CURRENT_CONTEXT(ctx);
1310          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1311          goto cleanUp;
1312       }
1313       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1314                                comps, (const GLchan *) source,
1315                                srcRowStride, (GLchan *) newSource);
1316       source = newSource;
1317       width = newWidth;
1318       height = newHeight;
1319       srcRowStride = comps * newWidth;
1320    }
1321
1322    /* convert from 16/32-bit channels to GLubyte if needed */
1323    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1324       const GLuint n = width * height * comps;
1325       const GLchan *src = (const GLchan *) source;
1326       GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1327       GLuint i;
1328       if (!dest) {
1329          GET_CURRENT_CONTEXT(ctx);
1330          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1331          goto cleanUp;
1332       }
1333       for (i = 0; i < n; i++) {
1334          dest[i] = CHAN_TO_UBYTE(src[i]);
1335       }
1336       if (newSource != NULL) {
1337          free(newSource);
1338       }
1339       newSource = dest;  /* we'll free this buffer before returning */
1340       source = dest;  /* the new, GLubyte incoming image */
1341    }
1342
1343    data = (const GLubyte *) source;
1344    destRowStride = (destRowStride - width * 2) / 4;
1345    for (y = 0; y < height; y += 4) {
1346       GLuint offs = 0 + (y + 0) * srcRowStride;
1347       for (x = 0; x < width; x += 8) {
1348          const GLubyte *lines[4];
1349          lines[0] = &data[offs];
1350          lines[1] = lines[0] + srcRowStride;
1351          lines[2] = lines[1] + srcRowStride;
1352          lines[3] = lines[2] + srcRowStride;
1353          offs += 8 * comps;
1354          fxt1_quantize(encoded, lines, comps);
1355          /* 128 bits per 8x4 block */
1356          encoded += 4;
1357       }
1358       encoded += destRowStride;
1359    }
1360
1361  cleanUp:
1362    if (newSource != NULL) {
1363       free(newSource);
1364    }
1365 }
1366
1367
1368 /***************************************************************************\
1369  * FXT1 decoder
1370  *
1371  * The decoder is based on GL_3DFX_texture_compression_FXT1
1372  * specification and serves as a concept for the encoder.
1373 \***************************************************************************/
1374
1375
1376 /* lookup table for scaling 5 bit colors up to 8 bits */
1377 static const GLubyte _rgb_scale_5[] = {
1378    0,   8,   16,  25,  33,  41,  49,  58,
1379    66,  74,  82,  90,  99,  107, 115, 123,
1380    132, 140, 148, 156, 165, 173, 181, 189,
1381    197, 206, 214, 222, 230, 239, 247, 255
1382 };
1383
1384 /* lookup table for scaling 6 bit colors up to 8 bits */
1385 static const GLubyte _rgb_scale_6[] = {
1386    0,   4,   8,   12,  16,  20,  24,  28,
1387    32,  36,  40,  45,  49,  53,  57,  61,
1388    65,  69,  73,  77,  81,  85,  89,  93,
1389    97,  101, 105, 109, 113, 117, 121, 125,
1390    130, 134, 138, 142, 146, 150, 154, 158,
1391    162, 166, 170, 174, 178, 182, 186, 190,
1392    194, 198, 202, 206, 210, 215, 219, 223,
1393    227, 231, 235, 239, 243, 247, 251, 255
1394 };
1395
1396
1397 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1398 #define UP5(c) _rgb_scale_5[(c) & 31]
1399 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1400 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1401
1402
1403 static void
1404 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1405 {
1406    const GLuint *cc;
1407
1408    t *= 3;
1409    cc = (const GLuint *)(code + t / 8);
1410    t = (cc[0] >> (t & 7)) & 7;
1411
1412    if (t == 7) {
1413       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1414    } else {
1415       GLubyte r, g, b;
1416       cc = (const GLuint *)(code + 12);
1417       if (t == 0) {
1418          b = UP5(CC_SEL(cc, 0));
1419          g = UP5(CC_SEL(cc, 5));
1420          r = UP5(CC_SEL(cc, 10));
1421       } else if (t == 6) {
1422          b = UP5(CC_SEL(cc, 15));
1423          g = UP5(CC_SEL(cc, 20));
1424          r = UP5(CC_SEL(cc, 25));
1425       } else {
1426          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1427          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1428          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1429       }
1430       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1431       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1432       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1433       rgba[ACOMP] = CHAN_MAX;
1434    }
1435 }
1436
1437
1438 static void
1439 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1440 {
1441    const GLuint *cc;
1442    GLuint kk;
1443
1444    cc = (const GLuint *)code;
1445    if (t & 16) {
1446       cc++;
1447       t &= 15;
1448    }
1449    t = (cc[0] >> (t * 2)) & 3;
1450
1451    t *= 15;
1452    cc = (const GLuint *)(code + 8 + t / 8);
1453    kk = cc[0] >> (t & 7);
1454    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1455    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1456    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1457    rgba[ACOMP] = CHAN_MAX;
1458 }
1459
1460
1461 static void
1462 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1463 {
1464    const GLuint *cc;
1465    GLuint col[2][3];
1466    GLint glsb, selb;
1467
1468    cc = (const GLuint *)code;
1469    if (t & 16) {
1470       t &= 15;
1471       t = (cc[1] >> (t * 2)) & 3;
1472       /* col 2 */
1473       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1474       col[0][GCOMP] = CC_SEL(cc, 99);
1475       col[0][RCOMP] = CC_SEL(cc, 104);
1476       /* col 3 */
1477       col[1][BCOMP] = CC_SEL(cc, 109);
1478       col[1][GCOMP] = CC_SEL(cc, 114);
1479       col[1][RCOMP] = CC_SEL(cc, 119);
1480       glsb = CC_SEL(cc, 126);
1481       selb = CC_SEL(cc, 33);
1482    } else {
1483       t = (cc[0] >> (t * 2)) & 3;
1484       /* col 0 */
1485       col[0][BCOMP] = CC_SEL(cc, 64);
1486       col[0][GCOMP] = CC_SEL(cc, 69);
1487       col[0][RCOMP] = CC_SEL(cc, 74);
1488       /* col 1 */
1489       col[1][BCOMP] = CC_SEL(cc, 79);
1490       col[1][GCOMP] = CC_SEL(cc, 84);
1491       col[1][RCOMP] = CC_SEL(cc, 89);
1492       glsb = CC_SEL(cc, 125);
1493       selb = CC_SEL(cc, 1);
1494    }
1495
1496    if (CC_SEL(cc, 124) & 1) {
1497       /* alpha[0] == 1 */
1498
1499       if (t == 3) {
1500          /* zero */
1501          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1502       } else {
1503          GLubyte r, g, b;
1504          if (t == 0) {
1505             b = UP5(col[0][BCOMP]);
1506             g = UP5(col[0][GCOMP]);
1507             r = UP5(col[0][RCOMP]);
1508          } else if (t == 2) {
1509             b = UP5(col[1][BCOMP]);
1510             g = UP6(col[1][GCOMP], glsb);
1511             r = UP5(col[1][RCOMP]);
1512          } else {
1513             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1514             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1515             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1516          }
1517          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1518          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1519          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1520          rgba[ACOMP] = CHAN_MAX;
1521       }
1522    } else {
1523       /* alpha[0] == 0 */
1524       GLubyte r, g, b;
1525       if (t == 0) {
1526          b = UP5(col[0][BCOMP]);
1527          g = UP6(col[0][GCOMP], glsb ^ selb);
1528          r = UP5(col[0][RCOMP]);
1529       } else if (t == 3) {
1530          b = UP5(col[1][BCOMP]);
1531          g = UP6(col[1][GCOMP], glsb);
1532          r = UP5(col[1][RCOMP]);
1533       } else {
1534          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1535          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1536                         UP6(col[1][GCOMP], glsb));
1537          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1538       }
1539       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1540       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1541       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1542       rgba[ACOMP] = CHAN_MAX;
1543    }
1544 }
1545
1546
1547 static void
1548 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1549 {
1550    const GLuint *cc;
1551    GLubyte r, g, b, a;
1552
1553    cc = (const GLuint *)code;
1554    if (CC_SEL(cc, 124) & 1) {
1555       /* lerp == 1 */
1556       GLuint col0[4];
1557
1558       if (t & 16) {
1559          t &= 15;
1560          t = (cc[1] >> (t * 2)) & 3;
1561          /* col 2 */
1562          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1563          col0[GCOMP] = CC_SEL(cc, 99);
1564          col0[RCOMP] = CC_SEL(cc, 104);
1565          col0[ACOMP] = CC_SEL(cc, 119);
1566       } else {
1567          t = (cc[0] >> (t * 2)) & 3;
1568          /* col 0 */
1569          col0[BCOMP] = CC_SEL(cc, 64);
1570          col0[GCOMP] = CC_SEL(cc, 69);
1571          col0[RCOMP] = CC_SEL(cc, 74);
1572          col0[ACOMP] = CC_SEL(cc, 109);
1573       }
1574
1575       if (t == 0) {
1576          b = UP5(col0[BCOMP]);
1577          g = UP5(col0[GCOMP]);
1578          r = UP5(col0[RCOMP]);
1579          a = UP5(col0[ACOMP]);
1580       } else if (t == 3) {
1581          b = UP5(CC_SEL(cc, 79));
1582          g = UP5(CC_SEL(cc, 84));
1583          r = UP5(CC_SEL(cc, 89));
1584          a = UP5(CC_SEL(cc, 114));
1585       } else {
1586          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1587          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1588          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1589          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1590       }
1591    } else {
1592       /* lerp == 0 */
1593
1594       if (t & 16) {
1595          cc++;
1596          t &= 15;
1597       }
1598       t = (cc[0] >> (t * 2)) & 3;
1599
1600       if (t == 3) {
1601          /* zero */
1602          r = g = b = a = 0;
1603       } else {
1604          GLuint kk;
1605          cc = (const GLuint *)code;
1606          a = UP5(cc[3] >> (t * 5 + 13));
1607          t *= 15;
1608          cc = (const GLuint *)(code + 8 + t / 8);
1609          kk = cc[0] >> (t & 7);
1610          b = UP5(kk);
1611          g = UP5(kk >> 5);
1612          r = UP5(kk >> 10);
1613       }
1614    }
1615    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1616    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1617    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1618    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1619 }
1620
1621
1622 void
1623 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1624                GLint i, GLint j, GLchan *rgba)
1625 {
1626    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1627       fxt1_decode_1HI,     /* cc-high   = "00?" */
1628       fxt1_decode_1HI,     /* cc-high   = "00?" */
1629       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1630       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1631       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1632       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1633       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1634       fxt1_decode_1MIXED   /* mixed     = "1??" */
1635    };
1636
1637    const GLubyte *code = (const GLubyte *)texture +
1638                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1639    GLint mode = CC_SEL(code, 125);
1640    GLint t = i & 7;
1641
1642    if (t & 4) {
1643       t += 12;
1644    }
1645    t += (j & 3) * 4;
1646
1647    decode_1[mode](code, t, rgba);
1648 }
1649
1650
1651 #endif /* FEATURE_texture_fxt1 */