src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42
  43
  44 #if FEATURE_texture_fxt1
  45
  46
  47 static void
  48 fxt1_encode (GLuint width, GLuint height, GLint comps,
  49              const void *source, GLint srcRowStride,
  50              void *dest, GLint destRowStride);
  51
  52 void
  53 fxt1_decode_1 (const void *texture, GLint stride,
  54                GLint i, GLint j, GLchan *rgba);
  55
  56
  57 /**
  58  * Store user's image in rgb_fxt1 format.
  59  */
  60 GLboolean
  61 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  62 {
  63    const GLchan *pixels;
  64    GLint srcRowStride;
  65    GLubyte *dst;
  66    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  67    const GLchan *tempImage = NULL;
  68
  69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  70    ASSERT(dstXoffset % 8 == 0);
  71    ASSERT(dstYoffset % 4 == 0);
  72    ASSERT(dstZoffset     == 0);
  73    (void) dstZoffset;
  74    (void) dstImageOffsets;
  75
  76    if (srcFormat != GL_RGB ||
  77        srcType != CHAN_TYPE ||
  78        ctx->_ImageTransferState ||
  79        srcPacking->SwapBytes) {
  80       /* convert image to RGB/GLchan */
  81       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  82                                              baseInternalFormat,
  83                                              _mesa_get_format_base_format(dstFormat),
  84                                              srcWidth, srcHeight, srcDepth,
  85                                              srcFormat, srcType, srcAddr,
  86                                              srcPacking);
  87       if (!tempImage)
  88          return GL_FALSE; /* out of memory */
  89       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  90       pixels = tempImage;
  91       srcRowStride = 3 * srcWidth;
  92       srcFormat = GL_RGB;
  93    }
  94    else {
  95       pixels = (const GLchan *) srcAddr;
  96       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  97                                             srcType) / sizeof(GLchan);
  98    }
  99
 100    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 101                                         dstFormat,
 102                                         texWidth, (GLubyte *) dstAddr);
 103
 104    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 105                dst, dstRowStride);
 106
 107    if (tempImage)
 108       _mesa_free((void*) tempImage);
 109
 110    return GL_TRUE;
 111 }
 112
 113
 114 /**
 115  * Store user's image in rgba_fxt1 format.
 116  */
 117 GLboolean
 118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 119 {
 120    const GLchan *pixels;
 121    GLint srcRowStride;
 122    GLubyte *dst;
 123    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 124    const GLchan *tempImage = NULL;
 125
 126    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 127    ASSERT(dstXoffset % 8 == 0);
 128    ASSERT(dstYoffset % 4 == 0);
 129    ASSERT(dstZoffset     == 0);
 130    (void) dstZoffset;
 131    (void) dstImageOffsets;
 132
 133    if (srcFormat != GL_RGBA ||
 134        srcType != CHAN_TYPE ||
 135        ctx->_ImageTransferState ||
 136        srcPacking->SwapBytes) {
 137       /* convert image to RGBA/GLchan */
 138       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 139                                              baseInternalFormat,
 140                                              _mesa_get_format_base_format(dstFormat),
 141                                              srcWidth, srcHeight, srcDepth,
 142                                              srcFormat, srcType, srcAddr,
 143                                              srcPacking);
 144       if (!tempImage)
 145          return GL_FALSE; /* out of memory */
 146       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 147       pixels = tempImage;
 148       srcRowStride = 4 * srcWidth;
 149       srcFormat = GL_RGBA;
 150    }
 151    else {
 152       pixels = (const GLchan *) srcAddr;
 153       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 154                                             srcType) / sizeof(GLchan);
 155    }
 156
 157    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 158                                         dstFormat,
 159                                         texWidth, (GLubyte *) dstAddr);
 160
 161    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 162                dst, dstRowStride);
 163
 164    if (tempImage)
 165       _mesa_free((void*) tempImage);
 166
 167    return GL_TRUE;
 168 }
 169
 170
 171 void
 172 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 173                                   GLint i, GLint j, GLint k, GLfloat *texel )
 174 {
 175    /* just sample as GLchan and convert to float here */
 176    GLchan rgba[4];
 177    (void) k;
 178    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 179    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 180    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 181    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 182    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 183 }
 184
 185
 186 void
 187 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 188                                  GLint i, GLint j, GLint k, GLfloat *texel )
 189 {
 190    /* just sample as GLchan and convert to float here */
 191    GLchan rgba[4];
 192    (void) k;
 193    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 194    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 195    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 196    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 197    texel[ACOMP] = 1.0F;
 198 }
 199
 200
 201
 202 /***************************************************************************\
 203  * FXT1 encoder
 204  *
 205  * The encoder was built by reversing the decoder,
 206  * and is vaguely based on Texus2 by 3dfx. Note that this code
 207  * is merely a proof of concept, since it is highly UNoptimized;
 208  * moreover, it is sub-optimal due to initial conditions passed
 209  * to Lloyd's algorithm (the interpolation modes are even worse).
 210 \***************************************************************************/
 211
 212
 213 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 214 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 215 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 216 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 217 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 218 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 219 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 220 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 221
 222
 223 /*
 224  * Define a 64-bit unsigned integer type and macros
 225  */
 226 #if 1
 227
 228 #define FX64_NATIVE 1
 229
 230 typedef uint64_t Fx64;
 231
 232 #define FX64_MOV32(a, b) a = b
 233 #define FX64_OR32(a, b)  a |= b
 234 #define FX64_SHL(a, c)   a <<= c
 235
 236 #else
 237
 238 #define FX64_NATIVE 0
 239
 240 typedef struct {
 241    GLuint lo, hi;
 242 } Fx64;
 243
 244 #define FX64_MOV32(a, b) a.lo = b
 245 #define FX64_OR32(a, b)  a.lo |= b
 246
 247 #define FX64_SHL(a, c)                                 \
 248    do {                                                \
 249        if ((c) >= 32) {                                \
 250           a.hi = a.lo << ((c) - 32);                   \
 251           a.lo = 0;                                    \
 252        } else {                                        \
 253           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 254           a.lo <<= (c);                                \
 255        }                                               \
 256    } while (0)
 257
 258 #endif
 259
 260
 261 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 262 #define SAFECDOT 1 /* for paranoids */
 263
 264 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 265    do {                                  \
 266       /* compute interpolation vector */ \
 267       GLfloat d2 = 0.0F;                 \
 268       GLfloat rd2;                       \
 269                                          \
 270       for (i = 0; i < NC; i++) {         \
 271          IV[i] = (V1[i] - V0[i]) * F(i); \
 272          d2 += IV[i] * IV[i];            \
 273       }                                  \
 274       rd2 = (GLfloat)NV / d2;            \
 275       B = 0;                             \
 276       for (i = 0; i < NC; i++) {         \
 277          IV[i] *= F(i);                  \
 278          B -= IV[i] * V0[i];             \
 279          IV[i] *= rd2;                   \
 280       }                                  \
 281       B = B * rd2 + 0.5f;                \
 282    } while (0)
 283
 284 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 285    do {                                  \
 286       GLfloat dot = 0.0F;                \
 287       for (i = 0; i < NC; i++) {         \
 288          dot += V[i] * IV[i];            \
 289       }                                  \
 290       TEXEL = (GLint)(dot + B);          \
 291       if (SAFECDOT) {                    \
 292          if (TEXEL < 0) {                \
 293             TEXEL = 0;                   \
 294          } else if (TEXEL > NV) {        \
 295             TEXEL = NV;                  \
 296          }                               \
 297       }                                  \
 298    } while (0)
 299
 300
 301 static GLint
 302 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 303               GLubyte input[MAX_COMP], GLint nc)
 304 {
 305    GLint i, j, best = -1;
 306    GLfloat err = 1e9; /* big enough */
 307
 308    for (j = 0; j < nv; j++) {
 309       GLfloat e = 0.0F;
 310       for (i = 0; i < nc; i++) {
 311          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 312       }
 313       if (e < err) {
 314          err = e;
 315          best = j;
 316       }
 317    }
 318
 319    return best;
 320 }
 321
 322
 323 static GLint
 324 fxt1_worst (GLfloat vec[MAX_COMP],
 325             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 326 {
 327    GLint i, k, worst = -1;
 328    GLfloat err = -1.0F; /* small enough */
 329
 330    for (k = 0; k < n; k++) {
 331       GLfloat e = 0.0F;
 332       for (i = 0; i < nc; i++) {
 333          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 334       }
 335       if (e > err) {
 336          err = e;
 337          worst = k;
 338       }
 339    }
 340
 341    return worst;
 342 }
 343
 344
 345 static GLint
 346 fxt1_variance (GLdouble variance[MAX_COMP],
 347                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 348 {
 349    GLint i, k, best = 0;
 350    GLint sx, sx2;
 351    GLdouble var, maxvar = -1; /* small enough */
 352    GLdouble teenth = 1.0 / n;
 353
 354    for (i = 0; i < nc; i++) {
 355       sx = sx2 = 0;
 356       for (k = 0; k < n; k++) {
 357          GLint t = input[k][i];
 358          sx += t;
 359          sx2 += t * t;
 360       }
 361       var = sx2 * teenth - sx * sx * teenth * teenth;
 362       if (maxvar < var) {
 363          maxvar = var;
 364          best = i;
 365       }
 366       if (variance) {
 367          variance[i] = var;
 368       }
 369    }
 370
 371    return best;
 372 }
 373
 374
 375 static GLint
 376 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 377              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 378 {
 379 #if 0
 380    /* Choose colors from a grid.
 381     */
 382    GLint i, j;
 383
 384    for (j = 0; j < nv; j++) {
 385       GLint m = j * (n - 1) / (nv - 1);
 386       for (i = 0; i < nc; i++) {
 387          vec[j][i] = input[m][i];
 388       }
 389    }
 390 #else
 391    /* Our solution here is to find the darkest and brightest colors in
 392     * the 8x4 tile and use those as the two representative colors.
 393     * There are probably better algorithms to use (histogram-based).
 394     */
 395    GLint i, j, k;
 396    GLint minSum = 2000; /* big enough */
 397    GLint maxSum = -1; /* small enough */
 398    GLint minCol = 0; /* phoudoin: silent compiler! */
 399    GLint maxCol = 0; /* phoudoin: silent compiler! */
 400
 401    struct {
 402       GLint flag;
 403       GLint key;
 404       GLint freq;
 405       GLint idx;
 406    } hist[N_TEXELS];
 407    GLint lenh = 0;
 408
 409    _mesa_memset(hist, 0, sizeof(hist));
 410
 411    for (k = 0; k < n; k++) {
 412       GLint l;
 413       GLint key = 0;
 414       GLint sum = 0;
 415       for (i = 0; i < nc; i++) {
 416          key <<= 8;
 417          key |= input[k][i];
 418          sum += input[k][i];
 419       }
 420       for (l = 0; l < n; l++) {
 421          if (!hist[l].flag) {
 422             /* alloc new slot */
 423             hist[l].flag = !0;
 424             hist[l].key = key;
 425             hist[l].freq = 1;
 426             hist[l].idx = k;
 427             lenh = l + 1;
 428             break;
 429          } else if (hist[l].key == key) {
 430             hist[l].freq++;
 431             break;
 432          }
 433       }
 434       if (minSum > sum) {
 435          minSum = sum;
 436          minCol = k;
 437       }
 438       if (maxSum < sum) {
 439          maxSum = sum;
 440          maxCol = k;
 441       }
 442    }
 443
 444    if (lenh <= nv) {
 445       for (j = 0; j < lenh; j++) {
 446          for (i = 0; i < nc; i++) {
 447             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 448          }
 449       }
 450       for (; j < nv; j++) {
 451          for (i = 0; i < nc; i++) {
 452             vec[j][i] = vec[0][i];
 453          }
 454       }
 455       return 0;
 456    }
 457
 458    for (j = 0; j < nv; j++) {
 459       for (i = 0; i < nc; i++) {
 460          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 461       }
 462    }
 463 #endif
 464
 465    return !0;
 466 }
 467
 468
 469 static GLint
 470 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 471             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 472 {
 473    /* Use the generalized lloyd's algorithm for VQ:
 474     *     find 4 color vectors.
 475     *
 476     *     for each sample color
 477     *         sort to nearest vector.
 478     *
 479     *     replace each vector with the centroid of it's matching colors.
 480     *
 481     *     repeat until RMS doesn't improve.
 482     *
 483     *     if a color vector has no samples, or becomes the same as another
 484     *     vector, replace it with the color which is farthest from a sample.
 485     *
 486     * vec[][MAX_COMP]           initial vectors and resulting colors
 487     * nv                        number of resulting colors required
 488     * input[N_TEXELS][MAX_COMP] input texels
 489     * nc                        number of components in input / vec
 490     * n                         number of input samples
 491     */
 492
 493    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 494    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 495    GLfloat error, lasterror = 1e9;
 496
 497    GLint i, j, k, rep;
 498
 499    /* the quantizer */
 500    for (rep = 0; rep < LL_N_REP; rep++) {
 501       /* reset sums & counters */
 502       for (j = 0; j < nv; j++) {
 503          for (i = 0; i < nc; i++) {
 504             sum[j][i] = 0;
 505          }
 506          cnt[j] = 0;
 507       }
 508       error = 0;
 509
 510       /* scan whole block */
 511       for (k = 0; k < n; k++) {
 512 #if 1
 513          GLint best = -1;
 514          GLfloat err = 1e9; /* big enough */
 515          /* determine best vector */
 516          for (j = 0; j < nv; j++) {
 517             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 518                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 519                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 520             if (nc == 4) {
 521                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 522             }
 523             if (e < err) {
 524                err = e;
 525                best = j;
 526             }
 527          }
 528 #else
 529          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 530 #endif
 531          /* add in closest color */
 532          for (i = 0; i < nc; i++) {
 533             sum[best][i] += input[k][i];
 534          }
 535          /* mark this vector as used */
 536          cnt[best]++;
 537          /* accumulate error */
 538          error += err;
 539       }
 540
 541       /* check RMS */
 542       if ((error < LL_RMS_E) ||
 543           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 544          return !0; /* good match */
 545       }
 546       lasterror = error;
 547
 548       /* move each vector to the barycenter of its closest colors */
 549       for (j = 0; j < nv; j++) {
 550          if (cnt[j]) {
 551             GLfloat div = 1.0F / cnt[j];
 552             for (i = 0; i < nc; i++) {
 553                vec[j][i] = div * sum[j][i];
 554             }
 555          } else {
 556             /* this vec has no samples or is identical with a previous vec */
 557             GLint worst = fxt1_worst(vec[j], input, nc, n);
 558             for (i = 0; i < nc; i++) {
 559                vec[j][i] = input[worst][i];
 560             }
 561          }
 562       }
 563    }
 564
 565    return 0; /* could not converge fast enough */
 566 }
 567
 568
 569 static void
 570 fxt1_quantize_CHROMA (GLuint *cc,
 571                       GLubyte input[N_TEXELS][MAX_COMP])
 572 {
 573    const GLint n_vect = 4; /* 4 base vectors to find */
 574    const GLint n_comp = 3; /* 3 components: R, G, B */
 575    GLfloat vec[MAX_VECT][MAX_COMP];
 576    GLint i, j, k;
 577    Fx64 hi; /* high quadword */
 578    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 579
 580    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 581       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 582    }
 583
 584    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 585    for (j = n_vect - 1; j >= 0; j--) {
 586       for (i = 0; i < n_comp; i++) {
 587          /* add in colors */
 588          FX64_SHL(hi, 5);
 589          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 590       }
 591    }
 592    ((Fx64 *)cc)[1] = hi;
 593
 594    lohi = lolo = 0;
 595    /* right microtile */
 596    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 597       lohi <<= 2;
 598       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 599    }
 600    /* left microtile */
 601    for (; k >= 0; k--) {
 602       lolo <<= 2;
 603       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 604    }
 605    cc[1] = lohi;
 606    cc[0] = lolo;
 607 }
 608
 609
 610 static void
 611 fxt1_quantize_ALPHA0 (GLuint *cc,
 612                       GLubyte input[N_TEXELS][MAX_COMP],
 613                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 614 {
 615    const GLint n_vect = 3; /* 3 base vectors to find */
 616    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 617    GLfloat vec[MAX_VECT][MAX_COMP];
 618    GLint i, j, k;
 619    Fx64 hi; /* high quadword */
 620    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 621
 622    /* the last vector indicates zero */
 623    for (i = 0; i < n_comp; i++) {
 624       vec[n_vect][i] = 0;
 625    }
 626
 627    /* the first n texels in reord are guaranteed to be non-zero */
 628    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 629       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 630    }
 631
 632    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 633    for (j = n_vect - 1; j >= 0; j--) {
 634       /* add in alphas */
 635       FX64_SHL(hi, 5);
 636       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 637    }
 638    for (j = n_vect - 1; j >= 0; j--) {
 639       for (i = 0; i < n_comp - 1; i++) {
 640          /* add in colors */
 641          FX64_SHL(hi, 5);
 642          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 643       }
 644    }
 645    ((Fx64 *)cc)[1] = hi;
 646
 647    lohi = lolo = 0;
 648    /* right microtile */
 649    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 650       lohi <<= 2;
 651       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 652    }
 653    /* left microtile */
 654    for (; k >= 0; k--) {
 655       lolo <<= 2;
 656       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 657    }
 658    cc[1] = lohi;
 659    cc[0] = lolo;
 660 }
 661
 662
 663 static void
 664 fxt1_quantize_ALPHA1 (GLuint *cc,
 665                       GLubyte input[N_TEXELS][MAX_COMP])
 666 {
 667    const GLint n_vect = 3; /* highest vector number in each microtile */
 668    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 669    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 670    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 671    GLint i, j, k;
 672    Fx64 hi; /* high quadword */
 673    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 674
 675    GLint minSum;
 676    GLint maxSum;
 677    GLint minColL = 0, maxColL = 0;
 678    GLint minColR = 0, maxColR = 0;
 679    GLint sumL = 0, sumR = 0;
 680    GLint nn_comp;
 681    /* Our solution here is to find the darkest and brightest colors in
 682     * the 4x4 tile and use those as the two representative colors.
 683     * There are probably better algorithms to use (histogram-based).
 684     */
 685    nn_comp = n_comp;
 686    while ((minColL == maxColL) && nn_comp) {
 687        minSum = 2000; /* big enough */
 688        maxSum = -1; /* small enough */
 689        for (k = 0; k < N_TEXELS / 2; k++) {
 690            GLint sum = 0;
 691            for (i = 0; i < nn_comp; i++) {
 692                sum += input[k][i];
 693            }
 694            if (minSum > sum) {
 695                minSum = sum;
 696                minColL = k;
 697            }
 698            if (maxSum < sum) {
 699                maxSum = sum;
 700                maxColL = k;
 701            }
 702            sumL += sum;
 703        }
 704
 705        nn_comp--;
 706    }
 707
 708    nn_comp = n_comp;
 709    while ((minColR == maxColR) && nn_comp) {
 710        minSum = 2000; /* big enough */
 711        maxSum = -1; /* small enough */
 712        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 713            GLint sum = 0;
 714            for (i = 0; i < nn_comp; i++) {
 715                sum += input[k][i];
 716            }
 717            if (minSum > sum) {
 718                minSum = sum;
 719                minColR = k;
 720            }
 721            if (maxSum < sum) {
 722                maxSum = sum;
 723                maxColR = k;
 724            }
 725            sumR += sum;
 726        }
 727
 728        nn_comp--;
 729    }
 730
 731    /* choose the common vector (yuck!) */
 732    {
 733       GLint j1, j2;
 734       GLint v1 = 0, v2 = 0;
 735       GLfloat err = 1e9; /* big enough */
 736       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 737       for (i = 0; i < n_comp; i++) {
 738          tv[0][i] = input[minColL][i];
 739          tv[1][i] = input[maxColL][i];
 740          tv[2][i] = input[minColR][i];
 741          tv[3][i] = input[maxColR][i];
 742       }
 743       for (j1 = 0; j1 < 2; j1++) {
 744          for (j2 = 2; j2 < 4; j2++) {
 745             GLfloat e = 0.0F;
 746             for (i = 0; i < n_comp; i++) {
 747                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 748             }
 749             if (e < err) {
 750                err = e;
 751                v1 = j1;
 752                v2 = j2;
 753             }
 754          }
 755       }
 756       for (i = 0; i < n_comp; i++) {
 757          vec[0][i] = tv[1 - v1][i];
 758          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 759          vec[2][i] = tv[5 - v2][i];
 760       }
 761    }
 762
 763    /* left microtile */
 764    cc[0] = 0;
 765    if (minColL != maxColL) {
 766       /* compute interpolation vector */
 767       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 768
 769       /* add in texels */
 770       lolo = 0;
 771       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 772          GLint texel;
 773          /* interpolate color */
 774          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 775          /* add in texel */
 776          lolo <<= 2;
 777          lolo |= texel;
 778       }
 779
 780       cc[0] = lolo;
 781    }
 782
 783    /* right microtile */
 784    cc[1] = 0;
 785    if (minColR != maxColR) {
 786       /* compute interpolation vector */
 787       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 788
 789       /* add in texels */
 790       lohi = 0;
 791       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 792          GLint texel;
 793          /* interpolate color */
 794          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 795          /* add in texel */
 796          lohi <<= 2;
 797          lohi |= texel;
 798       }
 799
 800       cc[1] = lohi;
 801    }
 802
 803    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 804    for (j = n_vect - 1; j >= 0; j--) {
 805       /* add in alphas */
 806       FX64_SHL(hi, 5);
 807       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 808    }
 809    for (j = n_vect - 1; j >= 0; j--) {
 810       for (i = 0; i < n_comp - 1; i++) {
 811          /* add in colors */
 812          FX64_SHL(hi, 5);
 813          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 814       }
 815    }
 816    ((Fx64 *)cc)[1] = hi;
 817 }
 818
 819
 820 static void
 821 fxt1_quantize_HI (GLuint *cc,
 822                   GLubyte input[N_TEXELS][MAX_COMP],
 823                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 824 {
 825    const GLint n_vect = 6; /* highest vector number */
 826    const GLint n_comp = 3; /* 3 components: R, G, B */
 827    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 828    GLfloat iv[MAX_COMP];   /* interpolation vector */
 829    GLint i, k;
 830    GLuint hihi; /* high quadword: hi dword */
 831
 832    GLint minSum = 2000; /* big enough */
 833    GLint maxSum = -1; /* small enough */
 834    GLint minCol = 0; /* phoudoin: silent compiler! */
 835    GLint maxCol = 0; /* phoudoin: silent compiler! */
 836
 837    /* Our solution here is to find the darkest and brightest colors in
 838     * the 8x4 tile and use those as the two representative colors.
 839     * There are probably better algorithms to use (histogram-based).
 840     */
 841    for (k = 0; k < n; k++) {
 842       GLint sum = 0;
 843       for (i = 0; i < n_comp; i++) {
 844          sum += reord[k][i];
 845       }
 846       if (minSum > sum) {
 847          minSum = sum;
 848          minCol = k;
 849       }
 850       if (maxSum < sum) {
 851          maxSum = sum;
 852          maxCol = k;
 853       }
 854    }
 855
 856    hihi = 0; /* cc-hi = "00" */
 857    for (i = 0; i < n_comp; i++) {
 858       /* add in colors */
 859       hihi <<= 5;
 860       hihi |= reord[maxCol][i] >> 3;
 861    }
 862    for (i = 0; i < n_comp; i++) {
 863       /* add in colors */
 864       hihi <<= 5;
 865       hihi |= reord[minCol][i] >> 3;
 866    }
 867    cc[3] = hihi;
 868    cc[0] = cc[1] = cc[2] = 0;
 869
 870    /* compute interpolation vector */
 871    if (minCol != maxCol) {
 872       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 873    }
 874
 875    /* add in texels */
 876    for (k = N_TEXELS - 1; k >= 0; k--) {
 877       GLint t = k * 3;
 878       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 879       GLint texel = n_vect + 1; /* transparent black */
 880
 881       if (!ISTBLACK(input[k])) {
 882          if (minCol != maxCol) {
 883             /* interpolate color */
 884             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 885             /* add in texel */
 886             kk[0] |= texel << (t & 7);
 887          }
 888       } else {
 889          /* add in texel */
 890          kk[0] |= texel << (t & 7);
 891       }
 892    }
 893 }
 894
 895
 896 static void
 897 fxt1_quantize_MIXED1 (GLuint *cc,
 898                       GLubyte input[N_TEXELS][MAX_COMP])
 899 {
 900    const GLint n_vect = 2; /* highest vector number in each microtile */
 901    const GLint n_comp = 3; /* 3 components: R, G, B */
 902    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 903    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 904    GLint i, j, k;
 905    Fx64 hi; /* high quadword */
 906    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 907
 908    GLint minSum;
 909    GLint maxSum;
 910    GLint minColL = 0, maxColL = -1;
 911    GLint minColR = 0, maxColR = -1;
 912
 913    /* Our solution here is to find the darkest and brightest colors in
 914     * the 4x4 tile and use those as the two representative colors.
 915     * There are probably better algorithms to use (histogram-based).
 916     */
 917    minSum = 2000; /* big enough */
 918    maxSum = -1; /* small enough */
 919    for (k = 0; k < N_TEXELS / 2; k++) {
 920       if (!ISTBLACK(input[k])) {
 921          GLint sum = 0;
 922          for (i = 0; i < n_comp; i++) {
 923             sum += input[k][i];
 924          }
 925          if (minSum > sum) {
 926             minSum = sum;
 927             minColL = k;
 928          }
 929          if (maxSum < sum) {
 930             maxSum = sum;
 931             maxColL = k;
 932          }
 933       }
 934    }
 935    minSum = 2000; /* big enough */
 936    maxSum = -1; /* small enough */
 937    for (; k < N_TEXELS; k++) {
 938       if (!ISTBLACK(input[k])) {
 939          GLint sum = 0;
 940          for (i = 0; i < n_comp; i++) {
 941             sum += input[k][i];
 942          }
 943          if (minSum > sum) {
 944             minSum = sum;
 945             minColR = k;
 946          }
 947          if (maxSum < sum) {
 948             maxSum = sum;
 949             maxColR = k;
 950          }
 951       }
 952    }
 953
 954    /* left microtile */
 955    if (maxColL == -1) {
 956       /* all transparent black */
 957       cc[0] = ~0u;
 958       for (i = 0; i < n_comp; i++) {
 959          vec[0][i] = 0;
 960          vec[1][i] = 0;
 961       }
 962    } else {
 963       cc[0] = 0;
 964       for (i = 0; i < n_comp; i++) {
 965          vec[0][i] = input[minColL][i];
 966          vec[1][i] = input[maxColL][i];
 967       }
 968       if (minColL != maxColL) {
 969          /* compute interpolation vector */
 970          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 971
 972          /* add in texels */
 973          lolo = 0;
 974          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 975             GLint texel = n_vect + 1; /* transparent black */
 976             if (!ISTBLACK(input[k])) {
 977                /* interpolate color */
 978                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 979             }
 980             /* add in texel */
 981             lolo <<= 2;
 982             lolo |= texel;
 983          }
 984          cc[0] = lolo;
 985       }
 986    }
 987
 988    /* right microtile */
 989    if (maxColR == -1) {
 990       /* all transparent black */
 991       cc[1] = ~0u;
 992       for (i = 0; i < n_comp; i++) {
 993          vec[2][i] = 0;
 994          vec[3][i] = 0;
 995       }
 996    } else {
 997       cc[1] = 0;
 998       for (i = 0; i < n_comp; i++) {
 999          vec[2][i] = input[minColR][i];
1000          vec[3][i] = input[maxColR][i];
1001       }
1002       if (minColR != maxColR) {
1003          /* compute interpolation vector */
1004          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1005
1006          /* add in texels */
1007          lohi = 0;
1008          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1009             GLint texel = n_vect + 1; /* transparent black */
1010             if (!ISTBLACK(input[k])) {
1011                /* interpolate color */
1012                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1013             }
1014             /* add in texel */
1015             lohi <<= 2;
1016             lohi |= texel;
1017          }
1018          cc[1] = lohi;
1019       }
1020    }
1021
1022    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1023    for (j = 2 * 2 - 1; j >= 0; j--) {
1024       for (i = 0; i < n_comp; i++) {
1025          /* add in colors */
1026          FX64_SHL(hi, 5);
1027          FX64_OR32(hi, vec[j][i] >> 3);
1028       }
1029    }
1030    ((Fx64 *)cc)[1] = hi;
1031 }
1032
1033
1034 static void
1035 fxt1_quantize_MIXED0 (GLuint *cc,
1036                       GLubyte input[N_TEXELS][MAX_COMP])
1037 {
1038    const GLint n_vect = 3; /* highest vector number in each microtile */
1039    const GLint n_comp = 3; /* 3 components: R, G, B */
1040    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1041    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1042    GLint i, j, k;
1043    Fx64 hi; /* high quadword */
1044    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1045
1046    GLint minColL = 0, maxColL = 0;
1047    GLint minColR = 0, maxColR = 0;
1048 #if 0
1049    GLint minSum;
1050    GLint maxSum;
1051
1052    /* Our solution here is to find the darkest and brightest colors in
1053     * the 4x4 tile and use those as the two representative colors.
1054     * There are probably better algorithms to use (histogram-based).
1055     */
1056    minSum = 2000; /* big enough */
1057    maxSum = -1; /* small enough */
1058    for (k = 0; k < N_TEXELS / 2; k++) {
1059       GLint sum = 0;
1060       for (i = 0; i < n_comp; i++) {
1061          sum += input[k][i];
1062       }
1063       if (minSum > sum) {
1064          minSum = sum;
1065          minColL = k;
1066       }
1067       if (maxSum < sum) {
1068          maxSum = sum;
1069          maxColL = k;
1070       }
1071    }
1072    minSum = 2000; /* big enough */
1073    maxSum = -1; /* small enough */
1074    for (; k < N_TEXELS; k++) {
1075       GLint sum = 0;
1076       for (i = 0; i < n_comp; i++) {
1077          sum += input[k][i];
1078       }
1079       if (minSum > sum) {
1080          minSum = sum;
1081          minColR = k;
1082       }
1083       if (maxSum < sum) {
1084          maxSum = sum;
1085          maxColR = k;
1086       }
1087    }
1088 #else
1089    GLint minVal;
1090    GLint maxVal;
1091    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1092    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1093
1094    /* Scan the channel with max variance for lo & hi
1095     * and use those as the two representative colors.
1096     */
1097    minVal = 2000; /* big enough */
1098    maxVal = -1; /* small enough */
1099    for (k = 0; k < N_TEXELS / 2; k++) {
1100       GLint t = input[k][maxVarL];
1101       if (minVal > t) {
1102          minVal = t;
1103          minColL = k;
1104       }
1105       if (maxVal < t) {
1106          maxVal = t;
1107          maxColL = k;
1108       }
1109    }
1110    minVal = 2000; /* big enough */
1111    maxVal = -1; /* small enough */
1112    for (; k < N_TEXELS; k++) {
1113       GLint t = input[k][maxVarR];
1114       if (minVal > t) {
1115          minVal = t;
1116          minColR = k;
1117       }
1118       if (maxVal < t) {
1119          maxVal = t;
1120          maxColR = k;
1121       }
1122    }
1123 #endif
1124
1125    /* left microtile */
1126    cc[0] = 0;
1127    for (i = 0; i < n_comp; i++) {
1128       vec[0][i] = input[minColL][i];
1129       vec[1][i] = input[maxColL][i];
1130    }
1131    if (minColL != maxColL) {
1132       /* compute interpolation vector */
1133       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1134
1135       /* add in texels */
1136       lolo = 0;
1137       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1138          GLint texel;
1139          /* interpolate color */
1140          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1141          /* add in texel */
1142          lolo <<= 2;
1143          lolo |= texel;
1144       }
1145
1146       /* funky encoding for LSB of green */
1147       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1148          for (i = 0; i < n_comp; i++) {
1149             vec[1][i] = input[minColL][i];
1150             vec[0][i] = input[maxColL][i];
1151          }
1152          lolo = ~lolo;
1153       }
1154
1155       cc[0] = lolo;
1156    }
1157
1158    /* right microtile */
1159    cc[1] = 0;
1160    for (i = 0; i < n_comp; i++) {
1161       vec[2][i] = input[minColR][i];
1162       vec[3][i] = input[maxColR][i];
1163    }
1164    if (minColR != maxColR) {
1165       /* compute interpolation vector */
1166       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1167
1168       /* add in texels */
1169       lohi = 0;
1170       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1171          GLint texel;
1172          /* interpolate color */
1173          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1174          /* add in texel */
1175          lohi <<= 2;
1176          lohi |= texel;
1177       }
1178
1179       /* funky encoding for LSB of green */
1180       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1181          for (i = 0; i < n_comp; i++) {
1182             vec[3][i] = input[minColR][i];
1183             vec[2][i] = input[maxColR][i];
1184          }
1185          lohi = ~lohi;
1186       }
1187
1188       cc[1] = lohi;
1189    }
1190
1191    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1192    for (j = 2 * 2 - 1; j >= 0; j--) {
1193       for (i = 0; i < n_comp; i++) {
1194          /* add in colors */
1195          FX64_SHL(hi, 5);
1196          FX64_OR32(hi, vec[j][i] >> 3);
1197       }
1198    }
1199    ((Fx64 *)cc)[1] = hi;
1200 }
1201
1202
1203 static void
1204 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1205 {
1206    GLint trualpha;
1207    GLubyte reord[N_TEXELS][MAX_COMP];
1208
1209    GLubyte input[N_TEXELS][MAX_COMP];
1210    GLint i, k, l;
1211
1212    if (comps == 3) {
1213       /* make the whole block opaque */
1214       _mesa_memset(input, -1, sizeof(input));
1215    }
1216
1217    /* 8 texels each line */
1218    for (l = 0; l < 4; l++) {
1219       for (k = 0; k < 4; k++) {
1220          for (i = 0; i < comps; i++) {
1221             input[k + l * 4][i] = *lines[l]++;
1222          }
1223       }
1224       for (; k < 8; k++) {
1225          for (i = 0; i < comps; i++) {
1226             input[k + l * 4 + 12][i] = *lines[l]++;
1227          }
1228       }
1229    }
1230
1231    /* block layout:
1232     * 00, 01, 02, 03, 08, 09, 0a, 0b
1233     * 10, 11, 12, 13, 18, 19, 1a, 1b
1234     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1235     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1236     */
1237
1238    /* [dBorca]
1239     * stupidity flows forth from this
1240     */
1241    l = N_TEXELS;
1242    trualpha = 0;
1243    if (comps == 4) {
1244       /* skip all transparent black texels */
1245       l = 0;
1246       for (k = 0; k < N_TEXELS; k++) {
1247          /* test all components against 0 */
1248          if (!ISTBLACK(input[k])) {
1249             /* texel is not transparent black */
1250             COPY_4UBV(reord[l], input[k]);
1251             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1252                /* non-opaque texel */
1253                trualpha = !0;
1254             }
1255             l++;
1256          }
1257       }
1258    }
1259
1260 #if 0
1261    if (trualpha) {
1262       fxt1_quantize_ALPHA0(cc, input, reord, l);
1263    } else if (l == 0) {
1264       cc[0] = cc[1] = cc[2] = -1;
1265       cc[3] = 0;
1266    } else if (l < N_TEXELS) {
1267       fxt1_quantize_HI(cc, input, reord, l);
1268    } else {
1269       fxt1_quantize_CHROMA(cc, input);
1270    }
1271    (void)fxt1_quantize_ALPHA1;
1272    (void)fxt1_quantize_MIXED1;
1273    (void)fxt1_quantize_MIXED0;
1274 #else
1275    if (trualpha) {
1276       fxt1_quantize_ALPHA1(cc, input);
1277    } else if (l == 0) {
1278       cc[0] = cc[1] = cc[2] = ~0u;
1279       cc[3] = 0;
1280    } else if (l < N_TEXELS) {
1281       fxt1_quantize_MIXED1(cc, input);
1282    } else {
1283       fxt1_quantize_MIXED0(cc, input);
1284    }
1285    (void)fxt1_quantize_ALPHA0;
1286    (void)fxt1_quantize_HI;
1287    (void)fxt1_quantize_CHROMA;
1288 #endif
1289 }
1290
1291
1292 static void
1293 fxt1_encode (GLuint width, GLuint height, GLint comps,
1294              const void *source, GLint srcRowStride,
1295              void *dest, GLint destRowStride)
1296 {
1297    GLuint x, y;
1298    const GLubyte *data;
1299    GLuint *encoded = (GLuint *)dest;
1300    void *newSource = NULL;
1301
1302    assert(comps == 3 || comps == 4);
1303
1304    /* Replicate image if width is not M8 or height is not M4 */
1305    if ((width & 7) | (height & 3)) {
1306       GLint newWidth = (width + 7) & ~7;
1307       GLint newHeight = (height + 3) & ~3;
1308       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1309       if (!newSource) {
1310          GET_CURRENT_CONTEXT(ctx);
1311          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1312          goto cleanUp;
1313       }
1314       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1315                                comps, (const GLchan *) source,
1316                                srcRowStride, (GLchan *) newSource);
1317       source = newSource;
1318       width = newWidth;
1319       height = newHeight;
1320       srcRowStride = comps * newWidth;
1321    }
1322
1323    /* convert from 16/32-bit channels to GLubyte if needed */
1324    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1325       const GLuint n = width * height * comps;
1326       const GLchan *src = (const GLchan *) source;
1327       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1328       GLuint i;
1329       if (!dest) {
1330          GET_CURRENT_CONTEXT(ctx);
1331          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1332          goto cleanUp;
1333       }
1334       for (i = 0; i < n; i++) {
1335          dest[i] = CHAN_TO_UBYTE(src[i]);
1336       }
1337       if (newSource != NULL) {
1338          _mesa_free(newSource);
1339       }
1340       newSource = dest;  /* we'll free this buffer before returning */
1341       source = dest;  /* the new, GLubyte incoming image */
1342    }
1343
1344    data = (const GLubyte *) source;
1345    destRowStride = (destRowStride - width * 2) / 4;
1346    for (y = 0; y < height; y += 4) {
1347       GLuint offs = 0 + (y + 0) * srcRowStride;
1348       for (x = 0; x < width; x += 8) {
1349          const GLubyte *lines[4];
1350          lines[0] = &data[offs];
1351          lines[1] = lines[0] + srcRowStride;
1352          lines[2] = lines[1] + srcRowStride;
1353          lines[3] = lines[2] + srcRowStride;
1354          offs += 8 * comps;
1355          fxt1_quantize(encoded, lines, comps);
1356          /* 128 bits per 8x4 block */
1357          encoded += 4;
1358       }
1359       encoded += destRowStride;
1360    }
1361
1362  cleanUp:
1363    if (newSource != NULL) {
1364       _mesa_free(newSource);
1365    }
1366 }
1367
1368
1369 /***************************************************************************\
1370  * FXT1 decoder
1371  *
1372  * The decoder is based on GL_3DFX_texture_compression_FXT1
1373  * specification and serves as a concept for the encoder.
1374 \***************************************************************************/
1375
1376
1377 /* lookup table for scaling 5 bit colors up to 8 bits */
1378 static const GLubyte _rgb_scale_5[] = {
1379    0,   8,   16,  25,  33,  41,  49,  58,
1380    66,  74,  82,  90,  99,  107, 115, 123,
1381    132, 140, 148, 156, 165, 173, 181, 189,
1382    197, 206, 214, 222, 230, 239, 247, 255
1383 };
1384
1385 /* lookup table for scaling 6 bit colors up to 8 bits */
1386 static const GLubyte _rgb_scale_6[] = {
1387    0,   4,   8,   12,  16,  20,  24,  28,
1388    32,  36,  40,  45,  49,  53,  57,  61,
1389    65,  69,  73,  77,  81,  85,  89,  93,
1390    97,  101, 105, 109, 113, 117, 121, 125,
1391    130, 134, 138, 142, 146, 150, 154, 158,
1392    162, 166, 170, 174, 178, 182, 186, 190,
1393    194, 198, 202, 206, 210, 215, 219, 223,
1394    227, 231, 235, 239, 243, 247, 251, 255
1395 };
1396
1397
1398 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1399 #define UP5(c) _rgb_scale_5[(c) & 31]
1400 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1401 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1402
1403
1404 static void
1405 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1406 {
1407    const GLuint *cc;
1408
1409    t *= 3;
1410    cc = (const GLuint *)(code + t / 8);
1411    t = (cc[0] >> (t & 7)) & 7;
1412
1413    if (t == 7) {
1414       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1415    } else {
1416       GLubyte r, g, b;
1417       cc = (const GLuint *)(code + 12);
1418       if (t == 0) {
1419          b = UP5(CC_SEL(cc, 0));
1420          g = UP5(CC_SEL(cc, 5));
1421          r = UP5(CC_SEL(cc, 10));
1422       } else if (t == 6) {
1423          b = UP5(CC_SEL(cc, 15));
1424          g = UP5(CC_SEL(cc, 20));
1425          r = UP5(CC_SEL(cc, 25));
1426       } else {
1427          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1428          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1429          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1430       }
1431       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1432       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1433       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1434       rgba[ACOMP] = CHAN_MAX;
1435    }
1436 }
1437
1438
1439 static void
1440 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1441 {
1442    const GLuint *cc;
1443    GLuint kk;
1444
1445    cc = (const GLuint *)code;
1446    if (t & 16) {
1447       cc++;
1448       t &= 15;
1449    }
1450    t = (cc[0] >> (t * 2)) & 3;
1451
1452    t *= 15;
1453    cc = (const GLuint *)(code + 8 + t / 8);
1454    kk = cc[0] >> (t & 7);
1455    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1456    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1457    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1458    rgba[ACOMP] = CHAN_MAX;
1459 }
1460
1461
1462 static void
1463 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1464 {
1465    const GLuint *cc;
1466    GLuint col[2][3];
1467    GLint glsb, selb;
1468
1469    cc = (const GLuint *)code;
1470    if (t & 16) {
1471       t &= 15;
1472       t = (cc[1] >> (t * 2)) & 3;
1473       /* col 2 */
1474       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1475       col[0][GCOMP] = CC_SEL(cc, 99);
1476       col[0][RCOMP] = CC_SEL(cc, 104);
1477       /* col 3 */
1478       col[1][BCOMP] = CC_SEL(cc, 109);
1479       col[1][GCOMP] = CC_SEL(cc, 114);
1480       col[1][RCOMP] = CC_SEL(cc, 119);
1481       glsb = CC_SEL(cc, 126);
1482       selb = CC_SEL(cc, 33);
1483    } else {
1484       t = (cc[0] >> (t * 2)) & 3;
1485       /* col 0 */
1486       col[0][BCOMP] = CC_SEL(cc, 64);
1487       col[0][GCOMP] = CC_SEL(cc, 69);
1488       col[0][RCOMP] = CC_SEL(cc, 74);
1489       /* col 1 */
1490       col[1][BCOMP] = CC_SEL(cc, 79);
1491       col[1][GCOMP] = CC_SEL(cc, 84);
1492       col[1][RCOMP] = CC_SEL(cc, 89);
1493       glsb = CC_SEL(cc, 125);
1494       selb = CC_SEL(cc, 1);
1495    }
1496
1497    if (CC_SEL(cc, 124) & 1) {
1498       /* alpha[0] == 1 */
1499
1500       if (t == 3) {
1501          /* zero */
1502          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1503       } else {
1504          GLubyte r, g, b;
1505          if (t == 0) {
1506             b = UP5(col[0][BCOMP]);
1507             g = UP5(col[0][GCOMP]);
1508             r = UP5(col[0][RCOMP]);
1509          } else if (t == 2) {
1510             b = UP5(col[1][BCOMP]);
1511             g = UP6(col[1][GCOMP], glsb);
1512             r = UP5(col[1][RCOMP]);
1513          } else {
1514             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1515             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1516             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1517          }
1518          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1519          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1520          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1521          rgba[ACOMP] = CHAN_MAX;
1522       }
1523    } else {
1524       /* alpha[0] == 0 */
1525       GLubyte r, g, b;
1526       if (t == 0) {
1527          b = UP5(col[0][BCOMP]);
1528          g = UP6(col[0][GCOMP], glsb ^ selb);
1529          r = UP5(col[0][RCOMP]);
1530       } else if (t == 3) {
1531          b = UP5(col[1][BCOMP]);
1532          g = UP6(col[1][GCOMP], glsb);
1533          r = UP5(col[1][RCOMP]);
1534       } else {
1535          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1536          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1537                         UP6(col[1][GCOMP], glsb));
1538          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1539       }
1540       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1541       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1542       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1543       rgba[ACOMP] = CHAN_MAX;
1544    }
1545 }
1546
1547
1548 static void
1549 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1550 {
1551    const GLuint *cc;
1552    GLubyte r, g, b, a;
1553
1554    cc = (const GLuint *)code;
1555    if (CC_SEL(cc, 124) & 1) {
1556       /* lerp == 1 */
1557       GLuint col0[4];
1558
1559       if (t & 16) {
1560          t &= 15;
1561          t = (cc[1] >> (t * 2)) & 3;
1562          /* col 2 */
1563          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1564          col0[GCOMP] = CC_SEL(cc, 99);
1565          col0[RCOMP] = CC_SEL(cc, 104);
1566          col0[ACOMP] = CC_SEL(cc, 119);
1567       } else {
1568          t = (cc[0] >> (t * 2)) & 3;
1569          /* col 0 */
1570          col0[BCOMP] = CC_SEL(cc, 64);
1571          col0[GCOMP] = CC_SEL(cc, 69);
1572          col0[RCOMP] = CC_SEL(cc, 74);
1573          col0[ACOMP] = CC_SEL(cc, 109);
1574       }
1575
1576       if (t == 0) {
1577          b = UP5(col0[BCOMP]);
1578          g = UP5(col0[GCOMP]);
1579          r = UP5(col0[RCOMP]);
1580          a = UP5(col0[ACOMP]);
1581       } else if (t == 3) {
1582          b = UP5(CC_SEL(cc, 79));
1583          g = UP5(CC_SEL(cc, 84));
1584          r = UP5(CC_SEL(cc, 89));
1585          a = UP5(CC_SEL(cc, 114));
1586       } else {
1587          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1588          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1589          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1590          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1591       }
1592    } else {
1593       /* lerp == 0 */
1594
1595       if (t & 16) {
1596          cc++;
1597          t &= 15;
1598       }
1599       t = (cc[0] >> (t * 2)) & 3;
1600
1601       if (t == 3) {
1602          /* zero */
1603          r = g = b = a = 0;
1604       } else {
1605          GLuint kk;
1606          cc = (const GLuint *)code;
1607          a = UP5(cc[3] >> (t * 5 + 13));
1608          t *= 15;
1609          cc = (const GLuint *)(code + 8 + t / 8);
1610          kk = cc[0] >> (t & 7);
1611          b = UP5(kk);
1612          g = UP5(kk >> 5);
1613          r = UP5(kk >> 10);
1614       }
1615    }
1616    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1617    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1618    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1619    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1620 }
1621
1622
1623 void
1624 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1625                GLint i, GLint j, GLchan *rgba)
1626 {
1627    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1628       fxt1_decode_1HI,     /* cc-high   = "00?" */
1629       fxt1_decode_1HI,     /* cc-high   = "00?" */
1630       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1631       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1632       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1633       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1634       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1635       fxt1_decode_1MIXED   /* mixed     = "1??" */
1636    };
1637
1638    const GLubyte *code = (const GLubyte *)texture +
1639                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1640    GLint mode = CC_SEL(code, 125);
1641    GLint t = i & 7;
1642
1643    if (t & 4) {
1644       t += 12;
1645    }
1646    t += (j & 3) * 4;
1647
1648    decode_1[mode](code, t, rgba);
1649 }
1650
1651
1652 #endif /* FEATURE_texture_fxt1 */