src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mipmap.h"
  38 #include "texcompress.h"
  39 #include "texcompress_fxt1.h"
  40 #include "texstore.h"
  41
  42
  43 #if FEATURE_texture_fxt1
  44
  45
  46 static void
  47 fxt1_encode (GLuint width, GLuint height, GLint comps,
  48              const void *source, GLint srcRowStride,
  49              void *dest, GLint destRowStride);
  50
  51 void
  52 fxt1_decode_1 (const void *texture, GLint stride,
  53                GLint i, GLint j, GLchan *rgba);
  54
  55
  56 /**
  57  * Store user's image in rgb_fxt1 format.
  58  */
  59 GLboolean
  60 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  61 {
  62    const GLchan *pixels;
  63    GLint srcRowStride;
  64    GLubyte *dst;
  65    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  66    const GLchan *tempImage = NULL;
  67
  68    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  69    ASSERT(dstXoffset % 8 == 0);
  70    ASSERT(dstYoffset % 4 == 0);
  71    ASSERT(dstZoffset     == 0);
  72    (void) dstZoffset;
  73    (void) dstImageOffsets;
  74
  75    if (srcFormat != GL_RGB ||
  76        srcType != CHAN_TYPE ||
  77        ctx->_ImageTransferState ||
  78        srcPacking->SwapBytes) {
  79       /* convert image to RGB/GLchan */
  80       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  81                                              baseInternalFormat,
  82                                              _mesa_get_format_base_format(dstFormat),
  83                                              srcWidth, srcHeight, srcDepth,
  84                                              srcFormat, srcType, srcAddr,
  85                                              srcPacking);
  86       if (!tempImage)
  87          return GL_FALSE; /* out of memory */
  88       pixels = tempImage;
  89       srcRowStride = 3 * srcWidth;
  90       srcFormat = GL_RGB;
  91    }
  92    else {
  93       pixels = (const GLchan *) srcAddr;
  94       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  95                                             srcType) / sizeof(GLchan);
  96    }
  97
  98    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
  99                                         dstFormat,
 100                                         texWidth, (GLubyte *) dstAddr);
 101
 102    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 103                dst, dstRowStride);
 104
 105    if (tempImage)
 106       free((void*) tempImage);
 107
 108    return GL_TRUE;
 109 }
 110
 111
 112 /**
 113  * Store user's image in rgba_fxt1 format.
 114  */
 115 GLboolean
 116 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 117 {
 118    const GLchan *pixels;
 119    GLint srcRowStride;
 120    GLubyte *dst;
 121    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 122    const GLchan *tempImage = NULL;
 123
 124    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 125    ASSERT(dstXoffset % 8 == 0);
 126    ASSERT(dstYoffset % 4 == 0);
 127    ASSERT(dstZoffset     == 0);
 128    (void) dstZoffset;
 129    (void) dstImageOffsets;
 130
 131    if (srcFormat != GL_RGBA ||
 132        srcType != CHAN_TYPE ||
 133        ctx->_ImageTransferState ||
 134        srcPacking->SwapBytes) {
 135       /* convert image to RGBA/GLchan */
 136       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 137                                              baseInternalFormat,
 138                                              _mesa_get_format_base_format(dstFormat),
 139                                              srcWidth, srcHeight, srcDepth,
 140                                              srcFormat, srcType, srcAddr,
 141                                              srcPacking);
 142       if (!tempImage)
 143          return GL_FALSE; /* out of memory */
 144       pixels = tempImage;
 145       srcRowStride = 4 * srcWidth;
 146       srcFormat = GL_RGBA;
 147    }
 148    else {
 149       pixels = (const GLchan *) srcAddr;
 150       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 151                                             srcType) / sizeof(GLchan);
 152    }
 153
 154    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 155                                         dstFormat,
 156                                         texWidth, (GLubyte *) dstAddr);
 157
 158    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 159                dst, dstRowStride);
 160
 161    if (tempImage)
 162       free((void*) tempImage);
 163
 164    return GL_TRUE;
 165 }
 166
 167
 168 void
 169 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 170                                   GLint i, GLint j, GLint k, GLfloat *texel )
 171 {
 172    /* just sample as GLchan and convert to float here */
 173    GLchan rgba[4];
 174    (void) k;
 175    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 176    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 177    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 178    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 179    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 180 }
 181
 182
 183 void
 184 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 185                                  GLint i, GLint j, GLint k, GLfloat *texel )
 186 {
 187    /* just sample as GLchan and convert to float here */
 188    GLchan rgba[4];
 189    (void) k;
 190    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 191    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 192    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 193    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 194    texel[ACOMP] = 1.0F;
 195 }
 196
 197
 198
 199 /***************************************************************************\
 200  * FXT1 encoder
 201  *
 202  * The encoder was built by reversing the decoder,
 203  * and is vaguely based on Texus2 by 3dfx. Note that this code
 204  * is merely a proof of concept, since it is highly UNoptimized;
 205  * moreover, it is sub-optimal due to initial conditions passed
 206  * to Lloyd's algorithm (the interpolation modes are even worse).
 207 \***************************************************************************/
 208
 209
 210 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 211 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 212 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 213 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 214 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 215 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 216 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 217 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 218
 219
 220 /*
 221  * Define a 64-bit unsigned integer type and macros
 222  */
 223 #if 1
 224
 225 #define FX64_NATIVE 1
 226
 227 typedef uint64_t Fx64;
 228
 229 #define FX64_MOV32(a, b) a = b
 230 #define FX64_OR32(a, b)  a |= b
 231 #define FX64_SHL(a, c)   a <<= c
 232
 233 #else
 234
 235 #define FX64_NATIVE 0
 236
 237 typedef struct {
 238    GLuint lo, hi;
 239 } Fx64;
 240
 241 #define FX64_MOV32(a, b) a.lo = b
 242 #define FX64_OR32(a, b)  a.lo |= b
 243
 244 #define FX64_SHL(a, c)                                 \
 245    do {                                                \
 246        if ((c) >= 32) {                                \
 247           a.hi = a.lo << ((c) - 32);                   \
 248           a.lo = 0;                                    \
 249        } else {                                        \
 250           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 251           a.lo <<= (c);                                \
 252        }                                               \
 253    } while (0)
 254
 255 #endif
 256
 257
 258 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 259 #define SAFECDOT 1 /* for paranoids */
 260
 261 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 262    do {                                  \
 263       /* compute interpolation vector */ \
 264       GLfloat d2 = 0.0F;                 \
 265       GLfloat rd2;                       \
 266                                          \
 267       for (i = 0; i < NC; i++) {         \
 268          IV[i] = (V1[i] - V0[i]) * F(i); \
 269          d2 += IV[i] * IV[i];            \
 270       }                                  \
 271       rd2 = (GLfloat)NV / d2;            \
 272       B = 0;                             \
 273       for (i = 0; i < NC; i++) {         \
 274          IV[i] *= F(i);                  \
 275          B -= IV[i] * V0[i];             \
 276          IV[i] *= rd2;                   \
 277       }                                  \
 278       B = B * rd2 + 0.5f;                \
 279    } while (0)
 280
 281 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 282    do {                                  \
 283       GLfloat dot = 0.0F;                \
 284       for (i = 0; i < NC; i++) {         \
 285          dot += V[i] * IV[i];            \
 286       }                                  \
 287       TEXEL = (GLint)(dot + B);          \
 288       if (SAFECDOT) {                    \
 289          if (TEXEL < 0) {                \
 290             TEXEL = 0;                   \
 291          } else if (TEXEL > NV) {        \
 292             TEXEL = NV;                  \
 293          }                               \
 294       }                                  \
 295    } while (0)
 296
 297
 298 static GLint
 299 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 300               GLubyte input[MAX_COMP], GLint nc)
 301 {
 302    GLint i, j, best = -1;
 303    GLfloat err = 1e9; /* big enough */
 304
 305    for (j = 0; j < nv; j++) {
 306       GLfloat e = 0.0F;
 307       for (i = 0; i < nc; i++) {
 308          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 309       }
 310       if (e < err) {
 311          err = e;
 312          best = j;
 313       }
 314    }
 315
 316    return best;
 317 }
 318
 319
 320 static GLint
 321 fxt1_worst (GLfloat vec[MAX_COMP],
 322             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 323 {
 324    GLint i, k, worst = -1;
 325    GLfloat err = -1.0F; /* small enough */
 326
 327    for (k = 0; k < n; k++) {
 328       GLfloat e = 0.0F;
 329       for (i = 0; i < nc; i++) {
 330          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 331       }
 332       if (e > err) {
 333          err = e;
 334          worst = k;
 335       }
 336    }
 337
 338    return worst;
 339 }
 340
 341
 342 static GLint
 343 fxt1_variance (GLdouble variance[MAX_COMP],
 344                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 345 {
 346    GLint i, k, best = 0;
 347    GLint sx, sx2;
 348    GLdouble var, maxvar = -1; /* small enough */
 349    GLdouble teenth = 1.0 / n;
 350
 351    for (i = 0; i < nc; i++) {
 352       sx = sx2 = 0;
 353       for (k = 0; k < n; k++) {
 354          GLint t = input[k][i];
 355          sx += t;
 356          sx2 += t * t;
 357       }
 358       var = sx2 * teenth - sx * sx * teenth * teenth;
 359       if (maxvar < var) {
 360          maxvar = var;
 361          best = i;
 362       }
 363       if (variance) {
 364          variance[i] = var;
 365       }
 366    }
 367
 368    return best;
 369 }
 370
 371
 372 static GLint
 373 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 374              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 375 {
 376 #if 0
 377    /* Choose colors from a grid.
 378     */
 379    GLint i, j;
 380
 381    for (j = 0; j < nv; j++) {
 382       GLint m = j * (n - 1) / (nv - 1);
 383       for (i = 0; i < nc; i++) {
 384          vec[j][i] = input[m][i];
 385       }
 386    }
 387 #else
 388    /* Our solution here is to find the darkest and brightest colors in
 389     * the 8x4 tile and use those as the two representative colors.
 390     * There are probably better algorithms to use (histogram-based).
 391     */
 392    GLint i, j, k;
 393    GLint minSum = 2000; /* big enough */
 394    GLint maxSum = -1; /* small enough */
 395    GLint minCol = 0; /* phoudoin: silent compiler! */
 396    GLint maxCol = 0; /* phoudoin: silent compiler! */
 397
 398    struct {
 399       GLint flag;
 400       GLint key;
 401       GLint freq;
 402       GLint idx;
 403    } hist[N_TEXELS];
 404    GLint lenh = 0;
 405
 406    memset(hist, 0, sizeof(hist));
 407
 408    for (k = 0; k < n; k++) {
 409       GLint l;
 410       GLint key = 0;
 411       GLint sum = 0;
 412       for (i = 0; i < nc; i++) {
 413          key <<= 8;
 414          key |= input[k][i];
 415          sum += input[k][i];
 416       }
 417       for (l = 0; l < n; l++) {
 418          if (!hist[l].flag) {
 419             /* alloc new slot */
 420             hist[l].flag = !0;
 421             hist[l].key = key;
 422             hist[l].freq = 1;
 423             hist[l].idx = k;
 424             lenh = l + 1;
 425             break;
 426          } else if (hist[l].key == key) {
 427             hist[l].freq++;
 428             break;
 429          }
 430       }
 431       if (minSum > sum) {
 432          minSum = sum;
 433          minCol = k;
 434       }
 435       if (maxSum < sum) {
 436          maxSum = sum;
 437          maxCol = k;
 438       }
 439    }
 440
 441    if (lenh <= nv) {
 442       for (j = 0; j < lenh; j++) {
 443          for (i = 0; i < nc; i++) {
 444             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 445          }
 446       }
 447       for (; j < nv; j++) {
 448          for (i = 0; i < nc; i++) {
 449             vec[j][i] = vec[0][i];
 450          }
 451       }
 452       return 0;
 453    }
 454
 455    for (j = 0; j < nv; j++) {
 456       for (i = 0; i < nc; i++) {
 457          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 458       }
 459    }
 460 #endif
 461
 462    return !0;
 463 }
 464
 465
 466 static GLint
 467 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 468             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 469 {
 470    /* Use the generalized lloyd's algorithm for VQ:
 471     *     find 4 color vectors.
 472     *
 473     *     for each sample color
 474     *         sort to nearest vector.
 475     *
 476     *     replace each vector with the centroid of its matching colors.
 477     *
 478     *     repeat until RMS doesn't improve.
 479     *
 480     *     if a color vector has no samples, or becomes the same as another
 481     *     vector, replace it with the color which is farthest from a sample.
 482     *
 483     * vec[][MAX_COMP]           initial vectors and resulting colors
 484     * nv                        number of resulting colors required
 485     * input[N_TEXELS][MAX_COMP] input texels
 486     * nc                        number of components in input / vec
 487     * n                         number of input samples
 488     */
 489
 490    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 491    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 492    GLfloat error, lasterror = 1e9;
 493
 494    GLint i, j, k, rep;
 495
 496    /* the quantizer */
 497    for (rep = 0; rep < LL_N_REP; rep++) {
 498       /* reset sums & counters */
 499       for (j = 0; j < nv; j++) {
 500          for (i = 0; i < nc; i++) {
 501             sum[j][i] = 0;
 502          }
 503          cnt[j] = 0;
 504       }
 505       error = 0;
 506
 507       /* scan whole block */
 508       for (k = 0; k < n; k++) {
 509 #if 1
 510          GLint best = -1;
 511          GLfloat err = 1e9; /* big enough */
 512          /* determine best vector */
 513          for (j = 0; j < nv; j++) {
 514             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 515                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 516                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 517             if (nc == 4) {
 518                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 519             }
 520             if (e < err) {
 521                err = e;
 522                best = j;
 523             }
 524          }
 525 #else
 526          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 527 #endif
 528          assert(best >= 0);
 529          /* add in closest color */
 530          for (i = 0; i < nc; i++) {
 531             sum[best][i] += input[k][i];
 532          }
 533          /* mark this vector as used */
 534          cnt[best]++;
 535          /* accumulate error */
 536          error += err;
 537       }
 538
 539       /* check RMS */
 540       if ((error < LL_RMS_E) ||
 541           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 542          return !0; /* good match */
 543       }
 544       lasterror = error;
 545
 546       /* move each vector to the barycenter of its closest colors */
 547       for (j = 0; j < nv; j++) {
 548          if (cnt[j]) {
 549             GLfloat div = 1.0F / cnt[j];
 550             for (i = 0; i < nc; i++) {
 551                vec[j][i] = div * sum[j][i];
 552             }
 553          } else {
 554             /* this vec has no samples or is identical with a previous vec */
 555             GLint worst = fxt1_worst(vec[j], input, nc, n);
 556             for (i = 0; i < nc; i++) {
 557                vec[j][i] = input[worst][i];
 558             }
 559          }
 560       }
 561    }
 562
 563    return 0; /* could not converge fast enough */
 564 }
 565
 566
 567 static void
 568 fxt1_quantize_CHROMA (GLuint *cc,
 569                       GLubyte input[N_TEXELS][MAX_COMP])
 570 {
 571    const GLint n_vect = 4; /* 4 base vectors to find */
 572    const GLint n_comp = 3; /* 3 components: R, G, B */
 573    GLfloat vec[MAX_VECT][MAX_COMP];
 574    GLint i, j, k;
 575    Fx64 hi; /* high quadword */
 576    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 577
 578    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 579       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 580    }
 581
 582    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 583    for (j = n_vect - 1; j >= 0; j--) {
 584       for (i = 0; i < n_comp; i++) {
 585          /* add in colors */
 586          FX64_SHL(hi, 5);
 587          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 588       }
 589    }
 590    ((Fx64 *)cc)[1] = hi;
 591
 592    lohi = lolo = 0;
 593    /* right microtile */
 594    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 595       lohi <<= 2;
 596       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 597    }
 598    /* left microtile */
 599    for (; k >= 0; k--) {
 600       lolo <<= 2;
 601       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 602    }
 603    cc[1] = lohi;
 604    cc[0] = lolo;
 605 }
 606
 607
 608 static void
 609 fxt1_quantize_ALPHA0 (GLuint *cc,
 610                       GLubyte input[N_TEXELS][MAX_COMP],
 611                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 612 {
 613    const GLint n_vect = 3; /* 3 base vectors to find */
 614    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 615    GLfloat vec[MAX_VECT][MAX_COMP];
 616    GLint i, j, k;
 617    Fx64 hi; /* high quadword */
 618    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 619
 620    /* the last vector indicates zero */
 621    for (i = 0; i < n_comp; i++) {
 622       vec[n_vect][i] = 0;
 623    }
 624
 625    /* the first n texels in reord are guaranteed to be non-zero */
 626    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 627       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 628    }
 629
 630    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 631    for (j = n_vect - 1; j >= 0; j--) {
 632       /* add in alphas */
 633       FX64_SHL(hi, 5);
 634       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 635    }
 636    for (j = n_vect - 1; j >= 0; j--) {
 637       for (i = 0; i < n_comp - 1; i++) {
 638          /* add in colors */
 639          FX64_SHL(hi, 5);
 640          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 641       }
 642    }
 643    ((Fx64 *)cc)[1] = hi;
 644
 645    lohi = lolo = 0;
 646    /* right microtile */
 647    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 648       lohi <<= 2;
 649       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 650    }
 651    /* left microtile */
 652    for (; k >= 0; k--) {
 653       lolo <<= 2;
 654       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 655    }
 656    cc[1] = lohi;
 657    cc[0] = lolo;
 658 }
 659
 660
 661 static void
 662 fxt1_quantize_ALPHA1 (GLuint *cc,
 663                       GLubyte input[N_TEXELS][MAX_COMP])
 664 {
 665    const GLint n_vect = 3; /* highest vector number in each microtile */
 666    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 667    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 668    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 669    GLint i, j, k;
 670    Fx64 hi; /* high quadword */
 671    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 672
 673    GLint minSum;
 674    GLint maxSum;
 675    GLint minColL = 0, maxColL = 0;
 676    GLint minColR = 0, maxColR = 0;
 677    GLint sumL = 0, sumR = 0;
 678    GLint nn_comp;
 679    /* Our solution here is to find the darkest and brightest colors in
 680     * the 4x4 tile and use those as the two representative colors.
 681     * There are probably better algorithms to use (histogram-based).
 682     */
 683    nn_comp = n_comp;
 684    while ((minColL == maxColL) && nn_comp) {
 685        minSum = 2000; /* big enough */
 686        maxSum = -1; /* small enough */
 687        for (k = 0; k < N_TEXELS / 2; k++) {
 688            GLint sum = 0;
 689            for (i = 0; i < nn_comp; i++) {
 690                sum += input[k][i];
 691            }
 692            if (minSum > sum) {
 693                minSum = sum;
 694                minColL = k;
 695            }
 696            if (maxSum < sum) {
 697                maxSum = sum;
 698                maxColL = k;
 699            }
 700            sumL += sum;
 701        }
 702
 703        nn_comp--;
 704    }
 705
 706    nn_comp = n_comp;
 707    while ((minColR == maxColR) && nn_comp) {
 708        minSum = 2000; /* big enough */
 709        maxSum = -1; /* small enough */
 710        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 711            GLint sum = 0;
 712            for (i = 0; i < nn_comp; i++) {
 713                sum += input[k][i];
 714            }
 715            if (minSum > sum) {
 716                minSum = sum;
 717                minColR = k;
 718            }
 719            if (maxSum < sum) {
 720                maxSum = sum;
 721                maxColR = k;
 722            }
 723            sumR += sum;
 724        }
 725
 726        nn_comp--;
 727    }
 728
 729    /* choose the common vector (yuck!) */
 730    {
 731       GLint j1, j2;
 732       GLint v1 = 0, v2 = 0;
 733       GLfloat err = 1e9; /* big enough */
 734       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 735       for (i = 0; i < n_comp; i++) {
 736          tv[0][i] = input[minColL][i];
 737          tv[1][i] = input[maxColL][i];
 738          tv[2][i] = input[minColR][i];
 739          tv[3][i] = input[maxColR][i];
 740       }
 741       for (j1 = 0; j1 < 2; j1++) {
 742          for (j2 = 2; j2 < 4; j2++) {
 743             GLfloat e = 0.0F;
 744             for (i = 0; i < n_comp; i++) {
 745                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 746             }
 747             if (e < err) {
 748                err = e;
 749                v1 = j1;
 750                v2 = j2;
 751             }
 752          }
 753       }
 754       for (i = 0; i < n_comp; i++) {
 755          vec[0][i] = tv[1 - v1][i];
 756          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 757          vec[2][i] = tv[5 - v2][i];
 758       }
 759    }
 760
 761    /* left microtile */
 762    cc[0] = 0;
 763    if (minColL != maxColL) {
 764       /* compute interpolation vector */
 765       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 766
 767       /* add in texels */
 768       lolo = 0;
 769       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 770          GLint texel;
 771          /* interpolate color */
 772          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 773          /* add in texel */
 774          lolo <<= 2;
 775          lolo |= texel;
 776       }
 777
 778       cc[0] = lolo;
 779    }
 780
 781    /* right microtile */
 782    cc[1] = 0;
 783    if (minColR != maxColR) {
 784       /* compute interpolation vector */
 785       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 786
 787       /* add in texels */
 788       lohi = 0;
 789       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 790          GLint texel;
 791          /* interpolate color */
 792          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 793          /* add in texel */
 794          lohi <<= 2;
 795          lohi |= texel;
 796       }
 797
 798       cc[1] = lohi;
 799    }
 800
 801    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 802    for (j = n_vect - 1; j >= 0; j--) {
 803       /* add in alphas */
 804       FX64_SHL(hi, 5);
 805       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 806    }
 807    for (j = n_vect - 1; j >= 0; j--) {
 808       for (i = 0; i < n_comp - 1; i++) {
 809          /* add in colors */
 810          FX64_SHL(hi, 5);
 811          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 812       }
 813    }
 814    ((Fx64 *)cc)[1] = hi;
 815 }
 816
 817
 818 static void
 819 fxt1_quantize_HI (GLuint *cc,
 820                   GLubyte input[N_TEXELS][MAX_COMP],
 821                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 822 {
 823    const GLint n_vect = 6; /* highest vector number */
 824    const GLint n_comp = 3; /* 3 components: R, G, B */
 825    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 826    GLfloat iv[MAX_COMP];   /* interpolation vector */
 827    GLint i, k;
 828    GLuint hihi; /* high quadword: hi dword */
 829
 830    GLint minSum = 2000; /* big enough */
 831    GLint maxSum = -1; /* small enough */
 832    GLint minCol = 0; /* phoudoin: silent compiler! */
 833    GLint maxCol = 0; /* phoudoin: silent compiler! */
 834
 835    /* Our solution here is to find the darkest and brightest colors in
 836     * the 8x4 tile and use those as the two representative colors.
 837     * There are probably better algorithms to use (histogram-based).
 838     */
 839    for (k = 0; k < n; k++) {
 840       GLint sum = 0;
 841       for (i = 0; i < n_comp; i++) {
 842          sum += reord[k][i];
 843       }
 844       if (minSum > sum) {
 845          minSum = sum;
 846          minCol = k;
 847       }
 848       if (maxSum < sum) {
 849          maxSum = sum;
 850          maxCol = k;
 851       }
 852    }
 853
 854    hihi = 0; /* cc-hi = "00" */
 855    for (i = 0; i < n_comp; i++) {
 856       /* add in colors */
 857       hihi <<= 5;
 858       hihi |= reord[maxCol][i] >> 3;
 859    }
 860    for (i = 0; i < n_comp; i++) {
 861       /* add in colors */
 862       hihi <<= 5;
 863       hihi |= reord[minCol][i] >> 3;
 864    }
 865    cc[3] = hihi;
 866    cc[0] = cc[1] = cc[2] = 0;
 867
 868    /* compute interpolation vector */
 869    if (minCol != maxCol) {
 870       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 871    }
 872
 873    /* add in texels */
 874    for (k = N_TEXELS - 1; k >= 0; k--) {
 875       GLint t = k * 3;
 876       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 877       GLint texel = n_vect + 1; /* transparent black */
 878
 879       if (!ISTBLACK(input[k])) {
 880          if (minCol != maxCol) {
 881             /* interpolate color */
 882             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 883             /* add in texel */
 884             kk[0] |= texel << (t & 7);
 885          }
 886       } else {
 887          /* add in texel */
 888          kk[0] |= texel << (t & 7);
 889       }
 890    }
 891 }
 892
 893
 894 static void
 895 fxt1_quantize_MIXED1 (GLuint *cc,
 896                       GLubyte input[N_TEXELS][MAX_COMP])
 897 {
 898    const GLint n_vect = 2; /* highest vector number in each microtile */
 899    const GLint n_comp = 3; /* 3 components: R, G, B */
 900    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 901    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 902    GLint i, j, k;
 903    Fx64 hi; /* high quadword */
 904    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 905
 906    GLint minSum;
 907    GLint maxSum;
 908    GLint minColL = 0, maxColL = -1;
 909    GLint minColR = 0, maxColR = -1;
 910
 911    /* Our solution here is to find the darkest and brightest colors in
 912     * the 4x4 tile and use those as the two representative colors.
 913     * There are probably better algorithms to use (histogram-based).
 914     */
 915    minSum = 2000; /* big enough */
 916    maxSum = -1; /* small enough */
 917    for (k = 0; k < N_TEXELS / 2; k++) {
 918       if (!ISTBLACK(input[k])) {
 919          GLint sum = 0;
 920          for (i = 0; i < n_comp; i++) {
 921             sum += input[k][i];
 922          }
 923          if (minSum > sum) {
 924             minSum = sum;
 925             minColL = k;
 926          }
 927          if (maxSum < sum) {
 928             maxSum = sum;
 929             maxColL = k;
 930          }
 931       }
 932    }
 933    minSum = 2000; /* big enough */
 934    maxSum = -1; /* small enough */
 935    for (; k < N_TEXELS; k++) {
 936       if (!ISTBLACK(input[k])) {
 937          GLint sum = 0;
 938          for (i = 0; i < n_comp; i++) {
 939             sum += input[k][i];
 940          }
 941          if (minSum > sum) {
 942             minSum = sum;
 943             minColR = k;
 944          }
 945          if (maxSum < sum) {
 946             maxSum = sum;
 947             maxColR = k;
 948          }
 949       }
 950    }
 951
 952    /* left microtile */
 953    if (maxColL == -1) {
 954       /* all transparent black */
 955       cc[0] = ~0u;
 956       for (i = 0; i < n_comp; i++) {
 957          vec[0][i] = 0;
 958          vec[1][i] = 0;
 959       }
 960    } else {
 961       cc[0] = 0;
 962       for (i = 0; i < n_comp; i++) {
 963          vec[0][i] = input[minColL][i];
 964          vec[1][i] = input[maxColL][i];
 965       }
 966       if (minColL != maxColL) {
 967          /* compute interpolation vector */
 968          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 969
 970          /* add in texels */
 971          lolo = 0;
 972          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 973             GLint texel = n_vect + 1; /* transparent black */
 974             if (!ISTBLACK(input[k])) {
 975                /* interpolate color */
 976                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 977             }
 978             /* add in texel */
 979             lolo <<= 2;
 980             lolo |= texel;
 981          }
 982          cc[0] = lolo;
 983       }
 984    }
 985
 986    /* right microtile */
 987    if (maxColR == -1) {
 988       /* all transparent black */
 989       cc[1] = ~0u;
 990       for (i = 0; i < n_comp; i++) {
 991          vec[2][i] = 0;
 992          vec[3][i] = 0;
 993       }
 994    } else {
 995       cc[1] = 0;
 996       for (i = 0; i < n_comp; i++) {
 997          vec[2][i] = input[minColR][i];
 998          vec[3][i] = input[maxColR][i];
 999       }
1000       if (minColR != maxColR) {
1001          /* compute interpolation vector */
1002          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1003
1004          /* add in texels */
1005          lohi = 0;
1006          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1007             GLint texel = n_vect + 1; /* transparent black */
1008             if (!ISTBLACK(input[k])) {
1009                /* interpolate color */
1010                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1011             }
1012             /* add in texel */
1013             lohi <<= 2;
1014             lohi |= texel;
1015          }
1016          cc[1] = lohi;
1017       }
1018    }
1019
1020    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1021    for (j = 2 * 2 - 1; j >= 0; j--) {
1022       for (i = 0; i < n_comp; i++) {
1023          /* add in colors */
1024          FX64_SHL(hi, 5);
1025          FX64_OR32(hi, vec[j][i] >> 3);
1026       }
1027    }
1028    ((Fx64 *)cc)[1] = hi;
1029 }
1030
1031
1032 static void
1033 fxt1_quantize_MIXED0 (GLuint *cc,
1034                       GLubyte input[N_TEXELS][MAX_COMP])
1035 {
1036    const GLint n_vect = 3; /* highest vector number in each microtile */
1037    const GLint n_comp = 3; /* 3 components: R, G, B */
1038    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1039    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1040    GLint i, j, k;
1041    Fx64 hi; /* high quadword */
1042    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1043
1044    GLint minColL = 0, maxColL = 0;
1045    GLint minColR = 0, maxColR = 0;
1046 #if 0
1047    GLint minSum;
1048    GLint maxSum;
1049
1050    /* Our solution here is to find the darkest and brightest colors in
1051     * the 4x4 tile and use those as the two representative colors.
1052     * There are probably better algorithms to use (histogram-based).
1053     */
1054    minSum = 2000; /* big enough */
1055    maxSum = -1; /* small enough */
1056    for (k = 0; k < N_TEXELS / 2; k++) {
1057       GLint sum = 0;
1058       for (i = 0; i < n_comp; i++) {
1059          sum += input[k][i];
1060       }
1061       if (minSum > sum) {
1062          minSum = sum;
1063          minColL = k;
1064       }
1065       if (maxSum < sum) {
1066          maxSum = sum;
1067          maxColL = k;
1068       }
1069    }
1070    minSum = 2000; /* big enough */
1071    maxSum = -1; /* small enough */
1072    for (; k < N_TEXELS; k++) {
1073       GLint sum = 0;
1074       for (i = 0; i < n_comp; i++) {
1075          sum += input[k][i];
1076       }
1077       if (minSum > sum) {
1078          minSum = sum;
1079          minColR = k;
1080       }
1081       if (maxSum < sum) {
1082          maxSum = sum;
1083          maxColR = k;
1084       }
1085    }
1086 #else
1087    GLint minVal;
1088    GLint maxVal;
1089    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1090    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1091
1092    /* Scan the channel with max variance for lo & hi
1093     * and use those as the two representative colors.
1094     */
1095    minVal = 2000; /* big enough */
1096    maxVal = -1; /* small enough */
1097    for (k = 0; k < N_TEXELS / 2; k++) {
1098       GLint t = input[k][maxVarL];
1099       if (minVal > t) {
1100          minVal = t;
1101          minColL = k;
1102       }
1103       if (maxVal < t) {
1104          maxVal = t;
1105          maxColL = k;
1106       }
1107    }
1108    minVal = 2000; /* big enough */
1109    maxVal = -1; /* small enough */
1110    for (; k < N_TEXELS; k++) {
1111       GLint t = input[k][maxVarR];
1112       if (minVal > t) {
1113          minVal = t;
1114          minColR = k;
1115       }
1116       if (maxVal < t) {
1117          maxVal = t;
1118          maxColR = k;
1119       }
1120    }
1121 #endif
1122
1123    /* left microtile */
1124    cc[0] = 0;
1125    for (i = 0; i < n_comp; i++) {
1126       vec[0][i] = input[minColL][i];
1127       vec[1][i] = input[maxColL][i];
1128    }
1129    if (minColL != maxColL) {
1130       /* compute interpolation vector */
1131       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1132
1133       /* add in texels */
1134       lolo = 0;
1135       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1136          GLint texel;
1137          /* interpolate color */
1138          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1139          /* add in texel */
1140          lolo <<= 2;
1141          lolo |= texel;
1142       }
1143
1144       /* funky encoding for LSB of green */
1145       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1146          for (i = 0; i < n_comp; i++) {
1147             vec[1][i] = input[minColL][i];
1148             vec[0][i] = input[maxColL][i];
1149          }
1150          lolo = ~lolo;
1151       }
1152
1153       cc[0] = lolo;
1154    }
1155
1156    /* right microtile */
1157    cc[1] = 0;
1158    for (i = 0; i < n_comp; i++) {
1159       vec[2][i] = input[minColR][i];
1160       vec[3][i] = input[maxColR][i];
1161    }
1162    if (minColR != maxColR) {
1163       /* compute interpolation vector */
1164       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1165
1166       /* add in texels */
1167       lohi = 0;
1168       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1169          GLint texel;
1170          /* interpolate color */
1171          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1172          /* add in texel */
1173          lohi <<= 2;
1174          lohi |= texel;
1175       }
1176
1177       /* funky encoding for LSB of green */
1178       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1179          for (i = 0; i < n_comp; i++) {
1180             vec[3][i] = input[minColR][i];
1181             vec[2][i] = input[maxColR][i];
1182          }
1183          lohi = ~lohi;
1184       }
1185
1186       cc[1] = lohi;
1187    }
1188
1189    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1190    for (j = 2 * 2 - 1; j >= 0; j--) {
1191       for (i = 0; i < n_comp; i++) {
1192          /* add in colors */
1193          FX64_SHL(hi, 5);
1194          FX64_OR32(hi, vec[j][i] >> 3);
1195       }
1196    }
1197    ((Fx64 *)cc)[1] = hi;
1198 }
1199
1200
1201 static void
1202 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1203 {
1204    GLint trualpha;
1205    GLubyte reord[N_TEXELS][MAX_COMP];
1206
1207    GLubyte input[N_TEXELS][MAX_COMP];
1208    GLint i, k, l;
1209
1210    if (comps == 3) {
1211       /* make the whole block opaque */
1212       memset(input, -1, sizeof(input));
1213    }
1214
1215    /* 8 texels each line */
1216    for (l = 0; l < 4; l++) {
1217       for (k = 0; k < 4; k++) {
1218          for (i = 0; i < comps; i++) {
1219             input[k + l * 4][i] = *lines[l]++;
1220          }
1221       }
1222       for (; k < 8; k++) {
1223          for (i = 0; i < comps; i++) {
1224             input[k + l * 4 + 12][i] = *lines[l]++;
1225          }
1226       }
1227    }
1228
1229    /* block layout:
1230     * 00, 01, 02, 03, 08, 09, 0a, 0b
1231     * 10, 11, 12, 13, 18, 19, 1a, 1b
1232     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1233     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1234     */
1235
1236    /* [dBorca]
1237     * stupidity flows forth from this
1238     */
1239    l = N_TEXELS;
1240    trualpha = 0;
1241    if (comps == 4) {
1242       /* skip all transparent black texels */
1243       l = 0;
1244       for (k = 0; k < N_TEXELS; k++) {
1245          /* test all components against 0 */
1246          if (!ISTBLACK(input[k])) {
1247             /* texel is not transparent black */
1248             COPY_4UBV(reord[l], input[k]);
1249             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1250                /* non-opaque texel */
1251                trualpha = !0;
1252             }
1253             l++;
1254          }
1255       }
1256    }
1257
1258 #if 0
1259    if (trualpha) {
1260       fxt1_quantize_ALPHA0(cc, input, reord, l);
1261    } else if (l == 0) {
1262       cc[0] = cc[1] = cc[2] = -1;
1263       cc[3] = 0;
1264    } else if (l < N_TEXELS) {
1265       fxt1_quantize_HI(cc, input, reord, l);
1266    } else {
1267       fxt1_quantize_CHROMA(cc, input);
1268    }
1269    (void)fxt1_quantize_ALPHA1;
1270    (void)fxt1_quantize_MIXED1;
1271    (void)fxt1_quantize_MIXED0;
1272 #else
1273    if (trualpha) {
1274       fxt1_quantize_ALPHA1(cc, input);
1275    } else if (l == 0) {
1276       cc[0] = cc[1] = cc[2] = ~0u;
1277       cc[3] = 0;
1278    } else if (l < N_TEXELS) {
1279       fxt1_quantize_MIXED1(cc, input);
1280    } else {
1281       fxt1_quantize_MIXED0(cc, input);
1282    }
1283    (void)fxt1_quantize_ALPHA0;
1284    (void)fxt1_quantize_HI;
1285    (void)fxt1_quantize_CHROMA;
1286 #endif
1287 }
1288
1289
1290 static void
1291 fxt1_encode (GLuint width, GLuint height, GLint comps,
1292              const void *source, GLint srcRowStride,
1293              void *dest, GLint destRowStride)
1294 {
1295    GLuint x, y;
1296    const GLubyte *data;
1297    GLuint *encoded = (GLuint *)dest;
1298    void *newSource = NULL;
1299
1300    assert(comps == 3 || comps == 4);
1301
1302    /* Replicate image if width is not M8 or height is not M4 */
1303    if ((width & 7) | (height & 3)) {
1304       GLint newWidth = (width + 7) & ~7;
1305       GLint newHeight = (height + 3) & ~3;
1306       newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1307       if (!newSource) {
1308          GET_CURRENT_CONTEXT(ctx);
1309          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1310          goto cleanUp;
1311       }
1312       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1313                                comps, (const GLchan *) source,
1314                                srcRowStride, (GLchan *) newSource);
1315       source = newSource;
1316       width = newWidth;
1317       height = newHeight;
1318       srcRowStride = comps * newWidth;
1319    }
1320
1321    /* convert from 16/32-bit channels to GLubyte if needed */
1322    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1323       const GLuint n = width * height * comps;
1324       const GLchan *src = (const GLchan *) source;
1325       GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1326       GLuint i;
1327       if (!dest) {
1328          GET_CURRENT_CONTEXT(ctx);
1329          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1330          goto cleanUp;
1331       }
1332       for (i = 0; i < n; i++) {
1333          dest[i] = CHAN_TO_UBYTE(src[i]);
1334       }
1335       if (newSource != NULL) {
1336          free(newSource);
1337       }
1338       newSource = dest;  /* we'll free this buffer before returning */
1339       source = dest;  /* the new, GLubyte incoming image */
1340    }
1341
1342    data = (const GLubyte *) source;
1343    destRowStride = (destRowStride - width * 2) / 4;
1344    for (y = 0; y < height; y += 4) {
1345       GLuint offs = 0 + (y + 0) * srcRowStride;
1346       for (x = 0; x < width; x += 8) {
1347          const GLubyte *lines[4];
1348          lines[0] = &data[offs];
1349          lines[1] = lines[0] + srcRowStride;
1350          lines[2] = lines[1] + srcRowStride;
1351          lines[3] = lines[2] + srcRowStride;
1352          offs += 8 * comps;
1353          fxt1_quantize(encoded, lines, comps);
1354          /* 128 bits per 8x4 block */
1355          encoded += 4;
1356       }
1357       encoded += destRowStride;
1358    }
1359
1360  cleanUp:
1361    if (newSource != NULL) {
1362       free(newSource);
1363    }
1364 }
1365
1366
1367 /***************************************************************************\
1368  * FXT1 decoder
1369  *
1370  * The decoder is based on GL_3DFX_texture_compression_FXT1
1371  * specification and serves as a concept for the encoder.
1372 \***************************************************************************/
1373
1374
1375 /* lookup table for scaling 5 bit colors up to 8 bits */
1376 static const GLubyte _rgb_scale_5[] = {
1377    0,   8,   16,  25,  33,  41,  49,  58,
1378    66,  74,  82,  90,  99,  107, 115, 123,
1379    132, 140, 148, 156, 165, 173, 181, 189,
1380    197, 206, 214, 222, 230, 239, 247, 255
1381 };
1382
1383 /* lookup table for scaling 6 bit colors up to 8 bits */
1384 static const GLubyte _rgb_scale_6[] = {
1385    0,   4,   8,   12,  16,  20,  24,  28,
1386    32,  36,  40,  45,  49,  53,  57,  61,
1387    65,  69,  73,  77,  81,  85,  89,  93,
1388    97,  101, 105, 109, 113, 117, 121, 125,
1389    130, 134, 138, 142, 146, 150, 154, 158,
1390    162, 166, 170, 174, 178, 182, 186, 190,
1391    194, 198, 202, 206, 210, 215, 219, 223,
1392    227, 231, 235, 239, 243, 247, 251, 255
1393 };
1394
1395
1396 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1397 #define UP5(c) _rgb_scale_5[(c) & 31]
1398 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1399 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1400
1401
1402 static void
1403 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1404 {
1405    const GLuint *cc;
1406
1407    t *= 3;
1408    cc = (const GLuint *)(code + t / 8);
1409    t = (cc[0] >> (t & 7)) & 7;
1410
1411    if (t == 7) {
1412       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1413    } else {
1414       GLubyte r, g, b;
1415       cc = (const GLuint *)(code + 12);
1416       if (t == 0) {
1417          b = UP5(CC_SEL(cc, 0));
1418          g = UP5(CC_SEL(cc, 5));
1419          r = UP5(CC_SEL(cc, 10));
1420       } else if (t == 6) {
1421          b = UP5(CC_SEL(cc, 15));
1422          g = UP5(CC_SEL(cc, 20));
1423          r = UP5(CC_SEL(cc, 25));
1424       } else {
1425          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1426          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1427          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1428       }
1429       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1430       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1431       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1432       rgba[ACOMP] = CHAN_MAX;
1433    }
1434 }
1435
1436
1437 static void
1438 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1439 {
1440    const GLuint *cc;
1441    GLuint kk;
1442
1443    cc = (const GLuint *)code;
1444    if (t & 16) {
1445       cc++;
1446       t &= 15;
1447    }
1448    t = (cc[0] >> (t * 2)) & 3;
1449
1450    t *= 15;
1451    cc = (const GLuint *)(code + 8 + t / 8);
1452    kk = cc[0] >> (t & 7);
1453    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1454    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1455    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1456    rgba[ACOMP] = CHAN_MAX;
1457 }
1458
1459
1460 static void
1461 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1462 {
1463    const GLuint *cc;
1464    GLuint col[2][3];
1465    GLint glsb, selb;
1466
1467    cc = (const GLuint *)code;
1468    if (t & 16) {
1469       t &= 15;
1470       t = (cc[1] >> (t * 2)) & 3;
1471       /* col 2 */
1472       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1473       col[0][GCOMP] = CC_SEL(cc, 99);
1474       col[0][RCOMP] = CC_SEL(cc, 104);
1475       /* col 3 */
1476       col[1][BCOMP] = CC_SEL(cc, 109);
1477       col[1][GCOMP] = CC_SEL(cc, 114);
1478       col[1][RCOMP] = CC_SEL(cc, 119);
1479       glsb = CC_SEL(cc, 126);
1480       selb = CC_SEL(cc, 33);
1481    } else {
1482       t = (cc[0] >> (t * 2)) & 3;
1483       /* col 0 */
1484       col[0][BCOMP] = CC_SEL(cc, 64);
1485       col[0][GCOMP] = CC_SEL(cc, 69);
1486       col[0][RCOMP] = CC_SEL(cc, 74);
1487       /* col 1 */
1488       col[1][BCOMP] = CC_SEL(cc, 79);
1489       col[1][GCOMP] = CC_SEL(cc, 84);
1490       col[1][RCOMP] = CC_SEL(cc, 89);
1491       glsb = CC_SEL(cc, 125);
1492       selb = CC_SEL(cc, 1);
1493    }
1494
1495    if (CC_SEL(cc, 124) & 1) {
1496       /* alpha[0] == 1 */
1497
1498       if (t == 3) {
1499          /* zero */
1500          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1501       } else {
1502          GLubyte r, g, b;
1503          if (t == 0) {
1504             b = UP5(col[0][BCOMP]);
1505             g = UP5(col[0][GCOMP]);
1506             r = UP5(col[0][RCOMP]);
1507          } else if (t == 2) {
1508             b = UP5(col[1][BCOMP]);
1509             g = UP6(col[1][GCOMP], glsb);
1510             r = UP5(col[1][RCOMP]);
1511          } else {
1512             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1513             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1514             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1515          }
1516          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1517          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1518          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1519          rgba[ACOMP] = CHAN_MAX;
1520       }
1521    } else {
1522       /* alpha[0] == 0 */
1523       GLubyte r, g, b;
1524       if (t == 0) {
1525          b = UP5(col[0][BCOMP]);
1526          g = UP6(col[0][GCOMP], glsb ^ selb);
1527          r = UP5(col[0][RCOMP]);
1528       } else if (t == 3) {
1529          b = UP5(col[1][BCOMP]);
1530          g = UP6(col[1][GCOMP], glsb);
1531          r = UP5(col[1][RCOMP]);
1532       } else {
1533          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1534          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1535                         UP6(col[1][GCOMP], glsb));
1536          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1537       }
1538       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1539       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1540       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1541       rgba[ACOMP] = CHAN_MAX;
1542    }
1543 }
1544
1545
1546 static void
1547 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1548 {
1549    const GLuint *cc;
1550    GLubyte r, g, b, a;
1551
1552    cc = (const GLuint *)code;
1553    if (CC_SEL(cc, 124) & 1) {
1554       /* lerp == 1 */
1555       GLuint col0[4];
1556
1557       if (t & 16) {
1558          t &= 15;
1559          t = (cc[1] >> (t * 2)) & 3;
1560          /* col 2 */
1561          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1562          col0[GCOMP] = CC_SEL(cc, 99);
1563          col0[RCOMP] = CC_SEL(cc, 104);
1564          col0[ACOMP] = CC_SEL(cc, 119);
1565       } else {
1566          t = (cc[0] >> (t * 2)) & 3;
1567          /* col 0 */
1568          col0[BCOMP] = CC_SEL(cc, 64);
1569          col0[GCOMP] = CC_SEL(cc, 69);
1570          col0[RCOMP] = CC_SEL(cc, 74);
1571          col0[ACOMP] = CC_SEL(cc, 109);
1572       }
1573
1574       if (t == 0) {
1575          b = UP5(col0[BCOMP]);
1576          g = UP5(col0[GCOMP]);
1577          r = UP5(col0[RCOMP]);
1578          a = UP5(col0[ACOMP]);
1579       } else if (t == 3) {
1580          b = UP5(CC_SEL(cc, 79));
1581          g = UP5(CC_SEL(cc, 84));
1582          r = UP5(CC_SEL(cc, 89));
1583          a = UP5(CC_SEL(cc, 114));
1584       } else {
1585          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1586          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1587          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1588          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1589       }
1590    } else {
1591       /* lerp == 0 */
1592
1593       if (t & 16) {
1594          cc++;
1595          t &= 15;
1596       }
1597       t = (cc[0] >> (t * 2)) & 3;
1598
1599       if (t == 3) {
1600          /* zero */
1601          r = g = b = a = 0;
1602       } else {
1603          GLuint kk;
1604          cc = (const GLuint *)code;
1605          a = UP5(cc[3] >> (t * 5 + 13));
1606          t *= 15;
1607          cc = (const GLuint *)(code + 8 + t / 8);
1608          kk = cc[0] >> (t & 7);
1609          b = UP5(kk);
1610          g = UP5(kk >> 5);
1611          r = UP5(kk >> 10);
1612       }
1613    }
1614    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1615    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1616    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1617    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1618 }
1619
1620
1621 void
1622 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1623                GLint i, GLint j, GLchan *rgba)
1624 {
1625    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1626       fxt1_decode_1HI,     /* cc-high   = "00?" */
1627       fxt1_decode_1HI,     /* cc-high   = "00?" */
1628       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1629       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1630       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1631       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1632       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1633       fxt1_decode_1MIXED   /* mixed     = "1??" */
1634    };
1635
1636    const GLubyte *code = (const GLubyte *)texture +
1637                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1638    GLint mode = CC_SEL(code, 125);
1639    GLint t = i & 7;
1640
1641    if (t & 4) {
1642       t += 12;
1643    }
1644    t += (j & 3) * 4;
1645
1646    decode_1[mode](code, t, rgba);
1647 }
1648
1649
1650 #endif /* FEATURE_texture_fxt1 */