src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42
  43
  44 #if FEATURE_texture_fxt1
  45
  46
  47 static void
  48 fxt1_encode (GLuint width, GLuint height, GLint comps,
  49              const void *source, GLint srcRowStride,
  50              void *dest, GLint destRowStride);
  51
  52 void
  53 fxt1_decode_1 (const void *texture, GLint stride,
  54                GLint i, GLint j, GLchan *rgba);
  55
  56
  57 /**
  58  * Store user's image in rgb_fxt1 format.
  59  */
  60 GLboolean
  61 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  62 {
  63    const GLchan *pixels;
  64    GLint srcRowStride;
  65    GLubyte *dst;
  66    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  67    const GLchan *tempImage = NULL;
  68
  69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  70    ASSERT(dstXoffset % 8 == 0);
  71    ASSERT(dstYoffset % 4 == 0);
  72    ASSERT(dstZoffset     == 0);
  73    (void) dstZoffset;
  74    (void) dstImageOffsets;
  75
  76    if (srcFormat != GL_RGB ||
  77        srcType != CHAN_TYPE ||
  78        ctx->_ImageTransferState ||
  79        srcPacking->SwapBytes) {
  80       /* convert image to RGB/GLchan */
  81       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  82                                              baseInternalFormat,
  83                                              _mesa_get_format_base_format(dstFormat),
  84                                              srcWidth, srcHeight, srcDepth,
  85                                              srcFormat, srcType, srcAddr,
  86                                              srcPacking);
  87       if (!tempImage)
  88          return GL_FALSE; /* out of memory */
  89       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  90       pixels = tempImage;
  91       srcRowStride = 3 * srcWidth;
  92       srcFormat = GL_RGB;
  93    }
  94    else {
  95       pixels = (const GLchan *) srcAddr;
  96       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  97                                             srcType) / sizeof(GLchan);
  98    }
  99
 100    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 101                                         dstFormat,
 102                                         texWidth, (GLubyte *) dstAddr);
 103
 104    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 105                dst, dstRowStride);
 106
 107    if (tempImage)
 108       free((void*) tempImage);
 109
 110    return GL_TRUE;
 111 }
 112
 113
 114 /**
 115  * Store user's image in rgba_fxt1 format.
 116  */
 117 GLboolean
 118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 119 {
 120    const GLchan *pixels;
 121    GLint srcRowStride;
 122    GLubyte *dst;
 123    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 124    const GLchan *tempImage = NULL;
 125
 126    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 127    ASSERT(dstXoffset % 8 == 0);
 128    ASSERT(dstYoffset % 4 == 0);
 129    ASSERT(dstZoffset     == 0);
 130    (void) dstZoffset;
 131    (void) dstImageOffsets;
 132
 133    if (srcFormat != GL_RGBA ||
 134        srcType != CHAN_TYPE ||
 135        ctx->_ImageTransferState ||
 136        srcPacking->SwapBytes) {
 137       /* convert image to RGBA/GLchan */
 138       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 139                                              baseInternalFormat,
 140                                              _mesa_get_format_base_format(dstFormat),
 141                                              srcWidth, srcHeight, srcDepth,
 142                                              srcFormat, srcType, srcAddr,
 143                                              srcPacking);
 144       if (!tempImage)
 145          return GL_FALSE; /* out of memory */
 146       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 147       pixels = tempImage;
 148       srcRowStride = 4 * srcWidth;
 149       srcFormat = GL_RGBA;
 150    }
 151    else {
 152       pixels = (const GLchan *) srcAddr;
 153       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 154                                             srcType) / sizeof(GLchan);
 155    }
 156
 157    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 158                                         dstFormat,
 159                                         texWidth, (GLubyte *) dstAddr);
 160
 161    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 162                dst, dstRowStride);
 163
 164    if (tempImage)
 165       free((void*) tempImage);
 166
 167    return GL_TRUE;
 168 }
 169
 170
 171 void
 172 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 173                                   GLint i, GLint j, GLint k, GLfloat *texel )
 174 {
 175    /* just sample as GLchan and convert to float here */
 176    GLchan rgba[4];
 177    (void) k;
 178    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 179    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 180    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 181    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 182    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 183 }
 184
 185
 186 void
 187 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 188                                  GLint i, GLint j, GLint k, GLfloat *texel )
 189 {
 190    /* just sample as GLchan and convert to float here */
 191    GLchan rgba[4];
 192    (void) k;
 193    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 194    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 195    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 196    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 197    texel[ACOMP] = 1.0F;
 198 }
 199
 200
 201
 202 /***************************************************************************\
 203  * FXT1 encoder
 204  *
 205  * The encoder was built by reversing the decoder,
 206  * and is vaguely based on Texus2 by 3dfx. Note that this code
 207  * is merely a proof of concept, since it is highly UNoptimized;
 208  * moreover, it is sub-optimal due to initial conditions passed
 209  * to Lloyd's algorithm (the interpolation modes are even worse).
 210 \***************************************************************************/
 211
 212
 213 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 214 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 215 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 216 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 217 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 218 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 219 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 220 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 221
 222
 223 /*
 224  * Define a 64-bit unsigned integer type and macros
 225  */
 226 #if 1
 227
 228 #define FX64_NATIVE 1
 229
 230 typedef uint64_t Fx64;
 231
 232 #define FX64_MOV32(a, b) a = b
 233 #define FX64_OR32(a, b)  a |= b
 234 #define FX64_SHL(a, c)   a <<= c
 235
 236 #else
 237
 238 #define FX64_NATIVE 0
 239
 240 typedef struct {
 241    GLuint lo, hi;
 242 } Fx64;
 243
 244 #define FX64_MOV32(a, b) a.lo = b
 245 #define FX64_OR32(a, b)  a.lo |= b
 246
 247 #define FX64_SHL(a, c)                                 \
 248    do {                                                \
 249        if ((c) >= 32) {                                \
 250           a.hi = a.lo << ((c) - 32);                   \
 251           a.lo = 0;                                    \
 252        } else {                                        \
 253           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 254           a.lo <<= (c);                                \
 255        }                                               \
 256    } while (0)
 257
 258 #endif
 259
 260
 261 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 262 #define SAFECDOT 1 /* for paranoids */
 263
 264 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 265    do {                                  \
 266       /* compute interpolation vector */ \
 267       GLfloat d2 = 0.0F;                 \
 268       GLfloat rd2;                       \
 269                                          \
 270       for (i = 0; i < NC; i++) {         \
 271          IV[i] = (V1[i] - V0[i]) * F(i); \
 272          d2 += IV[i] * IV[i];            \
 273       }                                  \
 274       rd2 = (GLfloat)NV / d2;            \
 275       B = 0;                             \
 276       for (i = 0; i < NC; i++) {         \
 277          IV[i] *= F(i);                  \
 278          B -= IV[i] * V0[i];             \
 279          IV[i] *= rd2;                   \
 280       }                                  \
 281       B = B * rd2 + 0.5f;                \
 282    } while (0)
 283
 284 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 285    do {                                  \
 286       GLfloat dot = 0.0F;                \
 287       for (i = 0; i < NC; i++) {         \
 288          dot += V[i] * IV[i];            \
 289       }                                  \
 290       TEXEL = (GLint)(dot + B);          \
 291       if (SAFECDOT) {                    \
 292          if (TEXEL < 0) {                \
 293             TEXEL = 0;                   \
 294          } else if (TEXEL > NV) {        \
 295             TEXEL = NV;                  \
 296          }                               \
 297       }                                  \
 298    } while (0)
 299
 300
 301 static GLint
 302 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 303               GLubyte input[MAX_COMP], GLint nc)
 304 {
 305    GLint i, j, best = -1;
 306    GLfloat err = 1e9; /* big enough */
 307
 308    for (j = 0; j < nv; j++) {
 309       GLfloat e = 0.0F;
 310       for (i = 0; i < nc; i++) {
 311          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 312       }
 313       if (e < err) {
 314          err = e;
 315          best = j;
 316       }
 317    }
 318
 319    return best;
 320 }
 321
 322
 323 static GLint
 324 fxt1_worst (GLfloat vec[MAX_COMP],
 325             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 326 {
 327    GLint i, k, worst = -1;
 328    GLfloat err = -1.0F; /* small enough */
 329
 330    for (k = 0; k < n; k++) {
 331       GLfloat e = 0.0F;
 332       for (i = 0; i < nc; i++) {
 333          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 334       }
 335       if (e > err) {
 336          err = e;
 337          worst = k;
 338       }
 339    }
 340
 341    return worst;
 342 }
 343
 344
 345 static GLint
 346 fxt1_variance (GLdouble variance[MAX_COMP],
 347                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 348 {
 349    GLint i, k, best = 0;
 350    GLint sx, sx2;
 351    GLdouble var, maxvar = -1; /* small enough */
 352    GLdouble teenth = 1.0 / n;
 353
 354    for (i = 0; i < nc; i++) {
 355       sx = sx2 = 0;
 356       for (k = 0; k < n; k++) {
 357          GLint t = input[k][i];
 358          sx += t;
 359          sx2 += t * t;
 360       }
 361       var = sx2 * teenth - sx * sx * teenth * teenth;
 362       if (maxvar < var) {
 363          maxvar = var;
 364          best = i;
 365       }
 366       if (variance) {
 367          variance[i] = var;
 368       }
 369    }
 370
 371    return best;
 372 }
 373
 374
 375 static GLint
 376 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 377              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 378 {
 379 #if 0
 380    /* Choose colors from a grid.
 381     */
 382    GLint i, j;
 383
 384    for (j = 0; j < nv; j++) {
 385       GLint m = j * (n - 1) / (nv - 1);
 386       for (i = 0; i < nc; i++) {
 387          vec[j][i] = input[m][i];
 388       }
 389    }
 390 #else
 391    /* Our solution here is to find the darkest and brightest colors in
 392     * the 8x4 tile and use those as the two representative colors.
 393     * There are probably better algorithms to use (histogram-based).
 394     */
 395    GLint i, j, k;
 396    GLint minSum = 2000; /* big enough */
 397    GLint maxSum = -1; /* small enough */
 398    GLint minCol = 0; /* phoudoin: silent compiler! */
 399    GLint maxCol = 0; /* phoudoin: silent compiler! */
 400
 401    struct {
 402       GLint flag;
 403       GLint key;
 404       GLint freq;
 405       GLint idx;
 406    } hist[N_TEXELS];
 407    GLint lenh = 0;
 408
 409    memset(hist, 0, sizeof(hist));
 410
 411    for (k = 0; k < n; k++) {
 412       GLint l;
 413       GLint key = 0;
 414       GLint sum = 0;
 415       for (i = 0; i < nc; i++) {
 416          key <<= 8;
 417          key |= input[k][i];
 418          sum += input[k][i];
 419       }
 420       for (l = 0; l < n; l++) {
 421          if (!hist[l].flag) {
 422             /* alloc new slot */
 423             hist[l].flag = !0;
 424             hist[l].key = key;
 425             hist[l].freq = 1;
 426             hist[l].idx = k;
 427             lenh = l + 1;
 428             break;
 429          } else if (hist[l].key == key) {
 430             hist[l].freq++;
 431             break;
 432          }
 433       }
 434       if (minSum > sum) {
 435          minSum = sum;
 436          minCol = k;
 437       }
 438       if (maxSum < sum) {
 439          maxSum = sum;
 440          maxCol = k;
 441       }
 442    }
 443
 444    if (lenh <= nv) {
 445       for (j = 0; j < lenh; j++) {
 446          for (i = 0; i < nc; i++) {
 447             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 448          }
 449       }
 450       for (; j < nv; j++) {
 451          for (i = 0; i < nc; i++) {
 452             vec[j][i] = vec[0][i];
 453          }
 454       }
 455       return 0;
 456    }
 457
 458    for (j = 0; j < nv; j++) {
 459       for (i = 0; i < nc; i++) {
 460          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 461       }
 462    }
 463 #endif
 464
 465    return !0;
 466 }
 467
 468
 469 static GLint
 470 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 471             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 472 {
 473    /* Use the generalized lloyd's algorithm for VQ:
 474     *     find 4 color vectors.
 475     *
 476     *     for each sample color
 477     *         sort to nearest vector.
 478     *
 479     *     replace each vector with the centroid of its matching colors.
 480     *
 481     *     repeat until RMS doesn't improve.
 482     *
 483     *     if a color vector has no samples, or becomes the same as another
 484     *     vector, replace it with the color which is farthest from a sample.
 485     *
 486     * vec[][MAX_COMP]           initial vectors and resulting colors
 487     * nv                        number of resulting colors required
 488     * input[N_TEXELS][MAX_COMP] input texels
 489     * nc                        number of components in input / vec
 490     * n                         number of input samples
 491     */
 492
 493    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 494    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 495    GLfloat error, lasterror = 1e9;
 496
 497    GLint i, j, k, rep;
 498
 499    /* the quantizer */
 500    for (rep = 0; rep < LL_N_REP; rep++) {
 501       /* reset sums & counters */
 502       for (j = 0; j < nv; j++) {
 503          for (i = 0; i < nc; i++) {
 504             sum[j][i] = 0;
 505          }
 506          cnt[j] = 0;
 507       }
 508       error = 0;
 509
 510       /* scan whole block */
 511       for (k = 0; k < n; k++) {
 512 #if 1
 513          GLint best = -1;
 514          GLfloat err = 1e9; /* big enough */
 515          /* determine best vector */
 516          for (j = 0; j < nv; j++) {
 517             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 518                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 519                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 520             if (nc == 4) {
 521                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 522             }
 523             if (e < err) {
 524                err = e;
 525                best = j;
 526             }
 527          }
 528 #else
 529          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 530 #endif
 531          assert(best >= 0);
 532          /* add in closest color */
 533          for (i = 0; i < nc; i++) {
 534             sum[best][i] += input[k][i];
 535          }
 536          /* mark this vector as used */
 537          cnt[best]++;
 538          /* accumulate error */
 539          error += err;
 540       }
 541
 542       /* check RMS */
 543       if ((error < LL_RMS_E) ||
 544           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 545          return !0; /* good match */
 546       }
 547       lasterror = error;
 548
 549       /* move each vector to the barycenter of its closest colors */
 550       for (j = 0; j < nv; j++) {
 551          if (cnt[j]) {
 552             GLfloat div = 1.0F / cnt[j];
 553             for (i = 0; i < nc; i++) {
 554                vec[j][i] = div * sum[j][i];
 555             }
 556          } else {
 557             /* this vec has no samples or is identical with a previous vec */
 558             GLint worst = fxt1_worst(vec[j], input, nc, n);
 559             for (i = 0; i < nc; i++) {
 560                vec[j][i] = input[worst][i];
 561             }
 562          }
 563       }
 564    }
 565
 566    return 0; /* could not converge fast enough */
 567 }
 568
 569
 570 static void
 571 fxt1_quantize_CHROMA (GLuint *cc,
 572                       GLubyte input[N_TEXELS][MAX_COMP])
 573 {
 574    const GLint n_vect = 4; /* 4 base vectors to find */
 575    const GLint n_comp = 3; /* 3 components: R, G, B */
 576    GLfloat vec[MAX_VECT][MAX_COMP];
 577    GLint i, j, k;
 578    Fx64 hi; /* high quadword */
 579    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 580
 581    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 582       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 583    }
 584
 585    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 586    for (j = n_vect - 1; j >= 0; j--) {
 587       for (i = 0; i < n_comp; i++) {
 588          /* add in colors */
 589          FX64_SHL(hi, 5);
 590          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 591       }
 592    }
 593    ((Fx64 *)cc)[1] = hi;
 594
 595    lohi = lolo = 0;
 596    /* right microtile */
 597    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 598       lohi <<= 2;
 599       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 600    }
 601    /* left microtile */
 602    for (; k >= 0; k--) {
 603       lolo <<= 2;
 604       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 605    }
 606    cc[1] = lohi;
 607    cc[0] = lolo;
 608 }
 609
 610
 611 static void
 612 fxt1_quantize_ALPHA0 (GLuint *cc,
 613                       GLubyte input[N_TEXELS][MAX_COMP],
 614                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 615 {
 616    const GLint n_vect = 3; /* 3 base vectors to find */
 617    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 618    GLfloat vec[MAX_VECT][MAX_COMP];
 619    GLint i, j, k;
 620    Fx64 hi; /* high quadword */
 621    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 622
 623    /* the last vector indicates zero */
 624    for (i = 0; i < n_comp; i++) {
 625       vec[n_vect][i] = 0;
 626    }
 627
 628    /* the first n texels in reord are guaranteed to be non-zero */
 629    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 630       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 631    }
 632
 633    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 634    for (j = n_vect - 1; j >= 0; j--) {
 635       /* add in alphas */
 636       FX64_SHL(hi, 5);
 637       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 638    }
 639    for (j = n_vect - 1; j >= 0; j--) {
 640       for (i = 0; i < n_comp - 1; i++) {
 641          /* add in colors */
 642          FX64_SHL(hi, 5);
 643          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 644       }
 645    }
 646    ((Fx64 *)cc)[1] = hi;
 647
 648    lohi = lolo = 0;
 649    /* right microtile */
 650    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 651       lohi <<= 2;
 652       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 653    }
 654    /* left microtile */
 655    for (; k >= 0; k--) {
 656       lolo <<= 2;
 657       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 658    }
 659    cc[1] = lohi;
 660    cc[0] = lolo;
 661 }
 662
 663
 664 static void
 665 fxt1_quantize_ALPHA1 (GLuint *cc,
 666                       GLubyte input[N_TEXELS][MAX_COMP])
 667 {
 668    const GLint n_vect = 3; /* highest vector number in each microtile */
 669    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 670    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 671    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 672    GLint i, j, k;
 673    Fx64 hi; /* high quadword */
 674    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 675
 676    GLint minSum;
 677    GLint maxSum;
 678    GLint minColL = 0, maxColL = 0;
 679    GLint minColR = 0, maxColR = 0;
 680    GLint sumL = 0, sumR = 0;
 681    GLint nn_comp;
 682    /* Our solution here is to find the darkest and brightest colors in
 683     * the 4x4 tile and use those as the two representative colors.
 684     * There are probably better algorithms to use (histogram-based).
 685     */
 686    nn_comp = n_comp;
 687    while ((minColL == maxColL) && nn_comp) {
 688        minSum = 2000; /* big enough */
 689        maxSum = -1; /* small enough */
 690        for (k = 0; k < N_TEXELS / 2; k++) {
 691            GLint sum = 0;
 692            for (i = 0; i < nn_comp; i++) {
 693                sum += input[k][i];
 694            }
 695            if (minSum > sum) {
 696                minSum = sum;
 697                minColL = k;
 698            }
 699            if (maxSum < sum) {
 700                maxSum = sum;
 701                maxColL = k;
 702            }
 703            sumL += sum;
 704        }
 705
 706        nn_comp--;
 707    }
 708
 709    nn_comp = n_comp;
 710    while ((minColR == maxColR) && nn_comp) {
 711        minSum = 2000; /* big enough */
 712        maxSum = -1; /* small enough */
 713        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 714            GLint sum = 0;
 715            for (i = 0; i < nn_comp; i++) {
 716                sum += input[k][i];
 717            }
 718            if (minSum > sum) {
 719                minSum = sum;
 720                minColR = k;
 721            }
 722            if (maxSum < sum) {
 723                maxSum = sum;
 724                maxColR = k;
 725            }
 726            sumR += sum;
 727        }
 728
 729        nn_comp--;
 730    }
 731
 732    /* choose the common vector (yuck!) */
 733    {
 734       GLint j1, j2;
 735       GLint v1 = 0, v2 = 0;
 736       GLfloat err = 1e9; /* big enough */
 737       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 738       for (i = 0; i < n_comp; i++) {
 739          tv[0][i] = input[minColL][i];
 740          tv[1][i] = input[maxColL][i];
 741          tv[2][i] = input[minColR][i];
 742          tv[3][i] = input[maxColR][i];
 743       }
 744       for (j1 = 0; j1 < 2; j1++) {
 745          for (j2 = 2; j2 < 4; j2++) {
 746             GLfloat e = 0.0F;
 747             for (i = 0; i < n_comp; i++) {
 748                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 749             }
 750             if (e < err) {
 751                err = e;
 752                v1 = j1;
 753                v2 = j2;
 754             }
 755          }
 756       }
 757       for (i = 0; i < n_comp; i++) {
 758          vec[0][i] = tv[1 - v1][i];
 759          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 760          vec[2][i] = tv[5 - v2][i];
 761       }
 762    }
 763
 764    /* left microtile */
 765    cc[0] = 0;
 766    if (minColL != maxColL) {
 767       /* compute interpolation vector */
 768       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 769
 770       /* add in texels */
 771       lolo = 0;
 772       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 773          GLint texel;
 774          /* interpolate color */
 775          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 776          /* add in texel */
 777          lolo <<= 2;
 778          lolo |= texel;
 779       }
 780
 781       cc[0] = lolo;
 782    }
 783
 784    /* right microtile */
 785    cc[1] = 0;
 786    if (minColR != maxColR) {
 787       /* compute interpolation vector */
 788       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 789
 790       /* add in texels */
 791       lohi = 0;
 792       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 793          GLint texel;
 794          /* interpolate color */
 795          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 796          /* add in texel */
 797          lohi <<= 2;
 798          lohi |= texel;
 799       }
 800
 801       cc[1] = lohi;
 802    }
 803
 804    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 805    for (j = n_vect - 1; j >= 0; j--) {
 806       /* add in alphas */
 807       FX64_SHL(hi, 5);
 808       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 809    }
 810    for (j = n_vect - 1; j >= 0; j--) {
 811       for (i = 0; i < n_comp - 1; i++) {
 812          /* add in colors */
 813          FX64_SHL(hi, 5);
 814          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 815       }
 816    }
 817    ((Fx64 *)cc)[1] = hi;
 818 }
 819
 820
 821 static void
 822 fxt1_quantize_HI (GLuint *cc,
 823                   GLubyte input[N_TEXELS][MAX_COMP],
 824                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 825 {
 826    const GLint n_vect = 6; /* highest vector number */
 827    const GLint n_comp = 3; /* 3 components: R, G, B */
 828    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 829    GLfloat iv[MAX_COMP];   /* interpolation vector */
 830    GLint i, k;
 831    GLuint hihi; /* high quadword: hi dword */
 832
 833    GLint minSum = 2000; /* big enough */
 834    GLint maxSum = -1; /* small enough */
 835    GLint minCol = 0; /* phoudoin: silent compiler! */
 836    GLint maxCol = 0; /* phoudoin: silent compiler! */
 837
 838    /* Our solution here is to find the darkest and brightest colors in
 839     * the 8x4 tile and use those as the two representative colors.
 840     * There are probably better algorithms to use (histogram-based).
 841     */
 842    for (k = 0; k < n; k++) {
 843       GLint sum = 0;
 844       for (i = 0; i < n_comp; i++) {
 845          sum += reord[k][i];
 846       }
 847       if (minSum > sum) {
 848          minSum = sum;
 849          minCol = k;
 850       }
 851       if (maxSum < sum) {
 852          maxSum = sum;
 853          maxCol = k;
 854       }
 855    }
 856
 857    hihi = 0; /* cc-hi = "00" */
 858    for (i = 0; i < n_comp; i++) {
 859       /* add in colors */
 860       hihi <<= 5;
 861       hihi |= reord[maxCol][i] >> 3;
 862    }
 863    for (i = 0; i < n_comp; i++) {
 864       /* add in colors */
 865       hihi <<= 5;
 866       hihi |= reord[minCol][i] >> 3;
 867    }
 868    cc[3] = hihi;
 869    cc[0] = cc[1] = cc[2] = 0;
 870
 871    /* compute interpolation vector */
 872    if (minCol != maxCol) {
 873       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 874    }
 875
 876    /* add in texels */
 877    for (k = N_TEXELS - 1; k >= 0; k--) {
 878       GLint t = k * 3;
 879       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 880       GLint texel = n_vect + 1; /* transparent black */
 881
 882       if (!ISTBLACK(input[k])) {
 883          if (minCol != maxCol) {
 884             /* interpolate color */
 885             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 886             /* add in texel */
 887             kk[0] |= texel << (t & 7);
 888          }
 889       } else {
 890          /* add in texel */
 891          kk[0] |= texel << (t & 7);
 892       }
 893    }
 894 }
 895
 896
 897 static void
 898 fxt1_quantize_MIXED1 (GLuint *cc,
 899                       GLubyte input[N_TEXELS][MAX_COMP])
 900 {
 901    const GLint n_vect = 2; /* highest vector number in each microtile */
 902    const GLint n_comp = 3; /* 3 components: R, G, B */
 903    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 904    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 905    GLint i, j, k;
 906    Fx64 hi; /* high quadword */
 907    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 908
 909    GLint minSum;
 910    GLint maxSum;
 911    GLint minColL = 0, maxColL = -1;
 912    GLint minColR = 0, maxColR = -1;
 913
 914    /* Our solution here is to find the darkest and brightest colors in
 915     * the 4x4 tile and use those as the two representative colors.
 916     * There are probably better algorithms to use (histogram-based).
 917     */
 918    minSum = 2000; /* big enough */
 919    maxSum = -1; /* small enough */
 920    for (k = 0; k < N_TEXELS / 2; k++) {
 921       if (!ISTBLACK(input[k])) {
 922          GLint sum = 0;
 923          for (i = 0; i < n_comp; i++) {
 924             sum += input[k][i];
 925          }
 926          if (minSum > sum) {
 927             minSum = sum;
 928             minColL = k;
 929          }
 930          if (maxSum < sum) {
 931             maxSum = sum;
 932             maxColL = k;
 933          }
 934       }
 935    }
 936    minSum = 2000; /* big enough */
 937    maxSum = -1; /* small enough */
 938    for (; k < N_TEXELS; k++) {
 939       if (!ISTBLACK(input[k])) {
 940          GLint sum = 0;
 941          for (i = 0; i < n_comp; i++) {
 942             sum += input[k][i];
 943          }
 944          if (minSum > sum) {
 945             minSum = sum;
 946             minColR = k;
 947          }
 948          if (maxSum < sum) {
 949             maxSum = sum;
 950             maxColR = k;
 951          }
 952       }
 953    }
 954
 955    /* left microtile */
 956    if (maxColL == -1) {
 957       /* all transparent black */
 958       cc[0] = ~0u;
 959       for (i = 0; i < n_comp; i++) {
 960          vec[0][i] = 0;
 961          vec[1][i] = 0;
 962       }
 963    } else {
 964       cc[0] = 0;
 965       for (i = 0; i < n_comp; i++) {
 966          vec[0][i] = input[minColL][i];
 967          vec[1][i] = input[maxColL][i];
 968       }
 969       if (minColL != maxColL) {
 970          /* compute interpolation vector */
 971          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 972
 973          /* add in texels */
 974          lolo = 0;
 975          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 976             GLint texel = n_vect + 1; /* transparent black */
 977             if (!ISTBLACK(input[k])) {
 978                /* interpolate color */
 979                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 980             }
 981             /* add in texel */
 982             lolo <<= 2;
 983             lolo |= texel;
 984          }
 985          cc[0] = lolo;
 986       }
 987    }
 988
 989    /* right microtile */
 990    if (maxColR == -1) {
 991       /* all transparent black */
 992       cc[1] = ~0u;
 993       for (i = 0; i < n_comp; i++) {
 994          vec[2][i] = 0;
 995          vec[3][i] = 0;
 996       }
 997    } else {
 998       cc[1] = 0;
 999       for (i = 0; i < n_comp; i++) {
1000          vec[2][i] = input[minColR][i];
1001          vec[3][i] = input[maxColR][i];
1002       }
1003       if (minColR != maxColR) {
1004          /* compute interpolation vector */
1005          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1006
1007          /* add in texels */
1008          lohi = 0;
1009          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1010             GLint texel = n_vect + 1; /* transparent black */
1011             if (!ISTBLACK(input[k])) {
1012                /* interpolate color */
1013                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1014             }
1015             /* add in texel */
1016             lohi <<= 2;
1017             lohi |= texel;
1018          }
1019          cc[1] = lohi;
1020       }
1021    }
1022
1023    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1024    for (j = 2 * 2 - 1; j >= 0; j--) {
1025       for (i = 0; i < n_comp; i++) {
1026          /* add in colors */
1027          FX64_SHL(hi, 5);
1028          FX64_OR32(hi, vec[j][i] >> 3);
1029       }
1030    }
1031    ((Fx64 *)cc)[1] = hi;
1032 }
1033
1034
1035 static void
1036 fxt1_quantize_MIXED0 (GLuint *cc,
1037                       GLubyte input[N_TEXELS][MAX_COMP])
1038 {
1039    const GLint n_vect = 3; /* highest vector number in each microtile */
1040    const GLint n_comp = 3; /* 3 components: R, G, B */
1041    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1042    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1043    GLint i, j, k;
1044    Fx64 hi; /* high quadword */
1045    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1046
1047    GLint minColL = 0, maxColL = 0;
1048    GLint minColR = 0, maxColR = 0;
1049 #if 0
1050    GLint minSum;
1051    GLint maxSum;
1052
1053    /* Our solution here is to find the darkest and brightest colors in
1054     * the 4x4 tile and use those as the two representative colors.
1055     * There are probably better algorithms to use (histogram-based).
1056     */
1057    minSum = 2000; /* big enough */
1058    maxSum = -1; /* small enough */
1059    for (k = 0; k < N_TEXELS / 2; k++) {
1060       GLint sum = 0;
1061       for (i = 0; i < n_comp; i++) {
1062          sum += input[k][i];
1063       }
1064       if (minSum > sum) {
1065          minSum = sum;
1066          minColL = k;
1067       }
1068       if (maxSum < sum) {
1069          maxSum = sum;
1070          maxColL = k;
1071       }
1072    }
1073    minSum = 2000; /* big enough */
1074    maxSum = -1; /* small enough */
1075    for (; k < N_TEXELS; k++) {
1076       GLint sum = 0;
1077       for (i = 0; i < n_comp; i++) {
1078          sum += input[k][i];
1079       }
1080       if (minSum > sum) {
1081          minSum = sum;
1082          minColR = k;
1083       }
1084       if (maxSum < sum) {
1085          maxSum = sum;
1086          maxColR = k;
1087       }
1088    }
1089 #else
1090    GLint minVal;
1091    GLint maxVal;
1092    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1093    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1094
1095    /* Scan the channel with max variance for lo & hi
1096     * and use those as the two representative colors.
1097     */
1098    minVal = 2000; /* big enough */
1099    maxVal = -1; /* small enough */
1100    for (k = 0; k < N_TEXELS / 2; k++) {
1101       GLint t = input[k][maxVarL];
1102       if (minVal > t) {
1103          minVal = t;
1104          minColL = k;
1105       }
1106       if (maxVal < t) {
1107          maxVal = t;
1108          maxColL = k;
1109       }
1110    }
1111    minVal = 2000; /* big enough */
1112    maxVal = -1; /* small enough */
1113    for (; k < N_TEXELS; k++) {
1114       GLint t = input[k][maxVarR];
1115       if (minVal > t) {
1116          minVal = t;
1117          minColR = k;
1118       }
1119       if (maxVal < t) {
1120          maxVal = t;
1121          maxColR = k;
1122       }
1123    }
1124 #endif
1125
1126    /* left microtile */
1127    cc[0] = 0;
1128    for (i = 0; i < n_comp; i++) {
1129       vec[0][i] = input[minColL][i];
1130       vec[1][i] = input[maxColL][i];
1131    }
1132    if (minColL != maxColL) {
1133       /* compute interpolation vector */
1134       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1135
1136       /* add in texels */
1137       lolo = 0;
1138       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1139          GLint texel;
1140          /* interpolate color */
1141          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1142          /* add in texel */
1143          lolo <<= 2;
1144          lolo |= texel;
1145       }
1146
1147       /* funky encoding for LSB of green */
1148       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1149          for (i = 0; i < n_comp; i++) {
1150             vec[1][i] = input[minColL][i];
1151             vec[0][i] = input[maxColL][i];
1152          }
1153          lolo = ~lolo;
1154       }
1155
1156       cc[0] = lolo;
1157    }
1158
1159    /* right microtile */
1160    cc[1] = 0;
1161    for (i = 0; i < n_comp; i++) {
1162       vec[2][i] = input[minColR][i];
1163       vec[3][i] = input[maxColR][i];
1164    }
1165    if (minColR != maxColR) {
1166       /* compute interpolation vector */
1167       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1168
1169       /* add in texels */
1170       lohi = 0;
1171       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1172          GLint texel;
1173          /* interpolate color */
1174          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1175          /* add in texel */
1176          lohi <<= 2;
1177          lohi |= texel;
1178       }
1179
1180       /* funky encoding for LSB of green */
1181       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1182          for (i = 0; i < n_comp; i++) {
1183             vec[3][i] = input[minColR][i];
1184             vec[2][i] = input[maxColR][i];
1185          }
1186          lohi = ~lohi;
1187       }
1188
1189       cc[1] = lohi;
1190    }
1191
1192    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1193    for (j = 2 * 2 - 1; j >= 0; j--) {
1194       for (i = 0; i < n_comp; i++) {
1195          /* add in colors */
1196          FX64_SHL(hi, 5);
1197          FX64_OR32(hi, vec[j][i] >> 3);
1198       }
1199    }
1200    ((Fx64 *)cc)[1] = hi;
1201 }
1202
1203
1204 static void
1205 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1206 {
1207    GLint trualpha;
1208    GLubyte reord[N_TEXELS][MAX_COMP];
1209
1210    GLubyte input[N_TEXELS][MAX_COMP];
1211    GLint i, k, l;
1212
1213    if (comps == 3) {
1214       /* make the whole block opaque */
1215       memset(input, -1, sizeof(input));
1216    }
1217
1218    /* 8 texels each line */
1219    for (l = 0; l < 4; l++) {
1220       for (k = 0; k < 4; k++) {
1221          for (i = 0; i < comps; i++) {
1222             input[k + l * 4][i] = *lines[l]++;
1223          }
1224       }
1225       for (; k < 8; k++) {
1226          for (i = 0; i < comps; i++) {
1227             input[k + l * 4 + 12][i] = *lines[l]++;
1228          }
1229       }
1230    }
1231
1232    /* block layout:
1233     * 00, 01, 02, 03, 08, 09, 0a, 0b
1234     * 10, 11, 12, 13, 18, 19, 1a, 1b
1235     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1236     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1237     */
1238
1239    /* [dBorca]
1240     * stupidity flows forth from this
1241     */
1242    l = N_TEXELS;
1243    trualpha = 0;
1244    if (comps == 4) {
1245       /* skip all transparent black texels */
1246       l = 0;
1247       for (k = 0; k < N_TEXELS; k++) {
1248          /* test all components against 0 */
1249          if (!ISTBLACK(input[k])) {
1250             /* texel is not transparent black */
1251             COPY_4UBV(reord[l], input[k]);
1252             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1253                /* non-opaque texel */
1254                trualpha = !0;
1255             }
1256             l++;
1257          }
1258       }
1259    }
1260
1261 #if 0
1262    if (trualpha) {
1263       fxt1_quantize_ALPHA0(cc, input, reord, l);
1264    } else if (l == 0) {
1265       cc[0] = cc[1] = cc[2] = -1;
1266       cc[3] = 0;
1267    } else if (l < N_TEXELS) {
1268       fxt1_quantize_HI(cc, input, reord, l);
1269    } else {
1270       fxt1_quantize_CHROMA(cc, input);
1271    }
1272    (void)fxt1_quantize_ALPHA1;
1273    (void)fxt1_quantize_MIXED1;
1274    (void)fxt1_quantize_MIXED0;
1275 #else
1276    if (trualpha) {
1277       fxt1_quantize_ALPHA1(cc, input);
1278    } else if (l == 0) {
1279       cc[0] = cc[1] = cc[2] = ~0u;
1280       cc[3] = 0;
1281    } else if (l < N_TEXELS) {
1282       fxt1_quantize_MIXED1(cc, input);
1283    } else {
1284       fxt1_quantize_MIXED0(cc, input);
1285    }
1286    (void)fxt1_quantize_ALPHA0;
1287    (void)fxt1_quantize_HI;
1288    (void)fxt1_quantize_CHROMA;
1289 #endif
1290 }
1291
1292
1293 static void
1294 fxt1_encode (GLuint width, GLuint height, GLint comps,
1295              const void *source, GLint srcRowStride,
1296              void *dest, GLint destRowStride)
1297 {
1298    GLuint x, y;
1299    const GLubyte *data;
1300    GLuint *encoded = (GLuint *)dest;
1301    void *newSource = NULL;
1302
1303    assert(comps == 3 || comps == 4);
1304
1305    /* Replicate image if width is not M8 or height is not M4 */
1306    if ((width & 7) | (height & 3)) {
1307       GLint newWidth = (width + 7) & ~7;
1308       GLint newHeight = (height + 3) & ~3;
1309       newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1310       if (!newSource) {
1311          GET_CURRENT_CONTEXT(ctx);
1312          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1313          goto cleanUp;
1314       }
1315       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1316                                comps, (const GLchan *) source,
1317                                srcRowStride, (GLchan *) newSource);
1318       source = newSource;
1319       width = newWidth;
1320       height = newHeight;
1321       srcRowStride = comps * newWidth;
1322    }
1323
1324    /* convert from 16/32-bit channels to GLubyte if needed */
1325    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1326       const GLuint n = width * height * comps;
1327       const GLchan *src = (const GLchan *) source;
1328       GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1329       GLuint i;
1330       if (!dest) {
1331          GET_CURRENT_CONTEXT(ctx);
1332          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1333          goto cleanUp;
1334       }
1335       for (i = 0; i < n; i++) {
1336          dest[i] = CHAN_TO_UBYTE(src[i]);
1337       }
1338       if (newSource != NULL) {
1339          free(newSource);
1340       }
1341       newSource = dest;  /* we'll free this buffer before returning */
1342       source = dest;  /* the new, GLubyte incoming image */
1343    }
1344
1345    data = (const GLubyte *) source;
1346    destRowStride = (destRowStride - width * 2) / 4;
1347    for (y = 0; y < height; y += 4) {
1348       GLuint offs = 0 + (y + 0) * srcRowStride;
1349       for (x = 0; x < width; x += 8) {
1350          const GLubyte *lines[4];
1351          lines[0] = &data[offs];
1352          lines[1] = lines[0] + srcRowStride;
1353          lines[2] = lines[1] + srcRowStride;
1354          lines[3] = lines[2] + srcRowStride;
1355          offs += 8 * comps;
1356          fxt1_quantize(encoded, lines, comps);
1357          /* 128 bits per 8x4 block */
1358          encoded += 4;
1359       }
1360       encoded += destRowStride;
1361    }
1362
1363  cleanUp:
1364    if (newSource != NULL) {
1365       free(newSource);
1366    }
1367 }
1368
1369
1370 /***************************************************************************\
1371  * FXT1 decoder
1372  *
1373  * The decoder is based on GL_3DFX_texture_compression_FXT1
1374  * specification and serves as a concept for the encoder.
1375 \***************************************************************************/
1376
1377
1378 /* lookup table for scaling 5 bit colors up to 8 bits */
1379 static const GLubyte _rgb_scale_5[] = {
1380    0,   8,   16,  25,  33,  41,  49,  58,
1381    66,  74,  82,  90,  99,  107, 115, 123,
1382    132, 140, 148, 156, 165, 173, 181, 189,
1383    197, 206, 214, 222, 230, 239, 247, 255
1384 };
1385
1386 /* lookup table for scaling 6 bit colors up to 8 bits */
1387 static const GLubyte _rgb_scale_6[] = {
1388    0,   4,   8,   12,  16,  20,  24,  28,
1389    32,  36,  40,  45,  49,  53,  57,  61,
1390    65,  69,  73,  77,  81,  85,  89,  93,
1391    97,  101, 105, 109, 113, 117, 121, 125,
1392    130, 134, 138, 142, 146, 150, 154, 158,
1393    162, 166, 170, 174, 178, 182, 186, 190,
1394    194, 198, 202, 206, 210, 215, 219, 223,
1395    227, 231, 235, 239, 243, 247, 251, 255
1396 };
1397
1398
1399 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1400 #define UP5(c) _rgb_scale_5[(c) & 31]
1401 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1402 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1403
1404
1405 static void
1406 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1407 {
1408    const GLuint *cc;
1409
1410    t *= 3;
1411    cc = (const GLuint *)(code + t / 8);
1412    t = (cc[0] >> (t & 7)) & 7;
1413
1414    if (t == 7) {
1415       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1416    } else {
1417       GLubyte r, g, b;
1418       cc = (const GLuint *)(code + 12);
1419       if (t == 0) {
1420          b = UP5(CC_SEL(cc, 0));
1421          g = UP5(CC_SEL(cc, 5));
1422          r = UP5(CC_SEL(cc, 10));
1423       } else if (t == 6) {
1424          b = UP5(CC_SEL(cc, 15));
1425          g = UP5(CC_SEL(cc, 20));
1426          r = UP5(CC_SEL(cc, 25));
1427       } else {
1428          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1429          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1430          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1431       }
1432       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1433       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1434       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1435       rgba[ACOMP] = CHAN_MAX;
1436    }
1437 }
1438
1439
1440 static void
1441 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1442 {
1443    const GLuint *cc;
1444    GLuint kk;
1445
1446    cc = (const GLuint *)code;
1447    if (t & 16) {
1448       cc++;
1449       t &= 15;
1450    }
1451    t = (cc[0] >> (t * 2)) & 3;
1452
1453    t *= 15;
1454    cc = (const GLuint *)(code + 8 + t / 8);
1455    kk = cc[0] >> (t & 7);
1456    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1457    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1458    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1459    rgba[ACOMP] = CHAN_MAX;
1460 }
1461
1462
1463 static void
1464 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1465 {
1466    const GLuint *cc;
1467    GLuint col[2][3];
1468    GLint glsb, selb;
1469
1470    cc = (const GLuint *)code;
1471    if (t & 16) {
1472       t &= 15;
1473       t = (cc[1] >> (t * 2)) & 3;
1474       /* col 2 */
1475       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1476       col[0][GCOMP] = CC_SEL(cc, 99);
1477       col[0][RCOMP] = CC_SEL(cc, 104);
1478       /* col 3 */
1479       col[1][BCOMP] = CC_SEL(cc, 109);
1480       col[1][GCOMP] = CC_SEL(cc, 114);
1481       col[1][RCOMP] = CC_SEL(cc, 119);
1482       glsb = CC_SEL(cc, 126);
1483       selb = CC_SEL(cc, 33);
1484    } else {
1485       t = (cc[0] >> (t * 2)) & 3;
1486       /* col 0 */
1487       col[0][BCOMP] = CC_SEL(cc, 64);
1488       col[0][GCOMP] = CC_SEL(cc, 69);
1489       col[0][RCOMP] = CC_SEL(cc, 74);
1490       /* col 1 */
1491       col[1][BCOMP] = CC_SEL(cc, 79);
1492       col[1][GCOMP] = CC_SEL(cc, 84);
1493       col[1][RCOMP] = CC_SEL(cc, 89);
1494       glsb = CC_SEL(cc, 125);
1495       selb = CC_SEL(cc, 1);
1496    }
1497
1498    if (CC_SEL(cc, 124) & 1) {
1499       /* alpha[0] == 1 */
1500
1501       if (t == 3) {
1502          /* zero */
1503          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1504       } else {
1505          GLubyte r, g, b;
1506          if (t == 0) {
1507             b = UP5(col[0][BCOMP]);
1508             g = UP5(col[0][GCOMP]);
1509             r = UP5(col[0][RCOMP]);
1510          } else if (t == 2) {
1511             b = UP5(col[1][BCOMP]);
1512             g = UP6(col[1][GCOMP], glsb);
1513             r = UP5(col[1][RCOMP]);
1514          } else {
1515             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1516             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1517             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1518          }
1519          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1520          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1521          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1522          rgba[ACOMP] = CHAN_MAX;
1523       }
1524    } else {
1525       /* alpha[0] == 0 */
1526       GLubyte r, g, b;
1527       if (t == 0) {
1528          b = UP5(col[0][BCOMP]);
1529          g = UP6(col[0][GCOMP], glsb ^ selb);
1530          r = UP5(col[0][RCOMP]);
1531       } else if (t == 3) {
1532          b = UP5(col[1][BCOMP]);
1533          g = UP6(col[1][GCOMP], glsb);
1534          r = UP5(col[1][RCOMP]);
1535       } else {
1536          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1537          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1538                         UP6(col[1][GCOMP], glsb));
1539          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1540       }
1541       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1542       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1543       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1544       rgba[ACOMP] = CHAN_MAX;
1545    }
1546 }
1547
1548
1549 static void
1550 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1551 {
1552    const GLuint *cc;
1553    GLubyte r, g, b, a;
1554
1555    cc = (const GLuint *)code;
1556    if (CC_SEL(cc, 124) & 1) {
1557       /* lerp == 1 */
1558       GLuint col0[4];
1559
1560       if (t & 16) {
1561          t &= 15;
1562          t = (cc[1] >> (t * 2)) & 3;
1563          /* col 2 */
1564          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1565          col0[GCOMP] = CC_SEL(cc, 99);
1566          col0[RCOMP] = CC_SEL(cc, 104);
1567          col0[ACOMP] = CC_SEL(cc, 119);
1568       } else {
1569          t = (cc[0] >> (t * 2)) & 3;
1570          /* col 0 */
1571          col0[BCOMP] = CC_SEL(cc, 64);
1572          col0[GCOMP] = CC_SEL(cc, 69);
1573          col0[RCOMP] = CC_SEL(cc, 74);
1574          col0[ACOMP] = CC_SEL(cc, 109);
1575       }
1576
1577       if (t == 0) {
1578          b = UP5(col0[BCOMP]);
1579          g = UP5(col0[GCOMP]);
1580          r = UP5(col0[RCOMP]);
1581          a = UP5(col0[ACOMP]);
1582       } else if (t == 3) {
1583          b = UP5(CC_SEL(cc, 79));
1584          g = UP5(CC_SEL(cc, 84));
1585          r = UP5(CC_SEL(cc, 89));
1586          a = UP5(CC_SEL(cc, 114));
1587       } else {
1588          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1589          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1590          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1591          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1592       }
1593    } else {
1594       /* lerp == 0 */
1595
1596       if (t & 16) {
1597          cc++;
1598          t &= 15;
1599       }
1600       t = (cc[0] >> (t * 2)) & 3;
1601
1602       if (t == 3) {
1603          /* zero */
1604          r = g = b = a = 0;
1605       } else {
1606          GLuint kk;
1607          cc = (const GLuint *)code;
1608          a = UP5(cc[3] >> (t * 5 + 13));
1609          t *= 15;
1610          cc = (const GLuint *)(code + 8 + t / 8);
1611          kk = cc[0] >> (t & 7);
1612          b = UP5(kk);
1613          g = UP5(kk >> 5);
1614          r = UP5(kk >> 10);
1615       }
1616    }
1617    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1618    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1619    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1620    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1621 }
1622
1623
1624 void
1625 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1626                GLint i, GLint j, GLchan *rgba)
1627 {
1628    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1629       fxt1_decode_1HI,     /* cc-high   = "00?" */
1630       fxt1_decode_1HI,     /* cc-high   = "00?" */
1631       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1632       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1633       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1634       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1635       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1636       fxt1_decode_1MIXED   /* mixed     = "1??" */
1637    };
1638
1639    const GLubyte *code = (const GLubyte *)texture +
1640                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1641    GLint mode = CC_SEL(code, 125);
1642    GLint t = i & 7;
1643
1644    if (t & 4) {
1645       t += 12;
1646    }
1647    t += (j & 3) * 4;
1648
1649    decode_1[mode](code, t, rgba);
1650 }
1651
1652
1653 #endif /* FEATURE_texture_fxt1 */