src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "image.h"
  35 #include "macros.h"
  36 #include "mipmap.h"
  37 #include "texcompress.h"
  38 #include "texcompress_fxt1.h"
  39 #include "texstore.h"
  40
  41
  42 static void
  43 fxt1_encode (GLuint width, GLuint height, GLint comps,
  44              const void *source, GLint srcRowStride,
  45              void *dest, GLint destRowStride);
  46
  47 static void
  48 fxt1_decode_1 (const void *texture, GLint stride,
  49                GLint i, GLint j, GLubyte *rgba);
  50
  51
  52 /**
  53  * Store user's image in rgb_fxt1 format.
  54  */
  55 GLboolean
  56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  57 {
  58    const GLubyte *pixels;
  59    GLint srcRowStride;
  60    GLubyte *dst;
  61    const GLubyte *tempImage = NULL;
  62
  63    assert(dstFormat == MESA_FORMAT_RGB_FXT1);
  64
  65    if (srcFormat != GL_RGB ||
  66        srcType != GL_UNSIGNED_BYTE ||
  67        ctx->_ImageTransferState ||
  68        srcPacking->RowLength != srcWidth ||
  69        srcPacking->SwapBytes) {
  70       /* convert image to RGB/GLubyte */
  71       GLubyte *tempImageSlices[1];
  72       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
  73       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
  74       if (!tempImage)
  75          return GL_FALSE; /* out of memory */
  76       tempImageSlices[0] = (GLubyte *) tempImage;
  77       _mesa_texstore(ctx, dims,
  78                      baseInternalFormat,
  79                      MESA_FORMAT_RGB_UNORM8,
  80                      rgbRowStride, tempImageSlices,
  81                      srcWidth, srcHeight, srcDepth,
  82                      srcFormat, srcType, srcAddr,
  83                      srcPacking);
  84       pixels = tempImage;
  85       srcRowStride = 3 * srcWidth;
  86       srcFormat = GL_RGB;
  87    }
  88    else {
  89       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  90                                      srcFormat, srcType, 0, 0);
  91
  92       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  93                                             srcType) / sizeof(GLubyte);
  94    }
  95
  96    dst = dstSlices[0];
  97
  98    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
  99                dst, dstRowStride);
 100
 101    free((void*) tempImage);
 102
 103    return GL_TRUE;
 104 }
 105
 106
 107 /**
 108  * Store user's image in rgba_fxt1 format.
 109  */
 110 GLboolean
 111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 112 {
 113    const GLubyte *pixels;
 114    GLint srcRowStride;
 115    GLubyte *dst;
 116    const GLubyte *tempImage = NULL;
 117
 118    assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
 119
 120    if (srcFormat != GL_RGBA ||
 121        srcType != GL_UNSIGNED_BYTE ||
 122        ctx->_ImageTransferState ||
 123        srcPacking->SwapBytes) {
 124       /* convert image to RGBA/GLubyte */
 125       GLubyte *tempImageSlices[1];
 126       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
 127       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
 128       if (!tempImage)
 129          return GL_FALSE; /* out of memory */
 130       tempImageSlices[0] = (GLubyte *) tempImage;
 131       _mesa_texstore(ctx, dims,
 132                      baseInternalFormat,
 133                      MESA_FORMAT_R8G8B8A8_UNORM,
 134                      rgbaRowStride, tempImageSlices,
 135                      srcWidth, srcHeight, srcDepth,
 136                      srcFormat, srcType, srcAddr,
 137                      srcPacking);
 138       pixels = tempImage;
 139       srcRowStride = 4 * srcWidth;
 140       srcFormat = GL_RGBA;
 141    }
 142    else {
 143       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 144                                      srcFormat, srcType, 0, 0);
 145
 146       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 147                                             srcType) / sizeof(GLubyte);
 148    }
 149
 150    dst = dstSlices[0];
 151
 152    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 153                dst, dstRowStride);
 154
 155    free((void*) tempImage);
 156
 157    return GL_TRUE;
 158 }
 159
 160
 161 /***************************************************************************\
 162  * FXT1 encoder
 163  *
 164  * The encoder was built by reversing the decoder,
 165  * and is vaguely based on Texus2 by 3dfx. Note that this code
 166  * is merely a proof of concept, since it is highly UNoptimized;
 167  * moreover, it is sub-optimal due to initial conditions passed
 168  * to Lloyd's algorithm (the interpolation modes are even worse).
 169 \***************************************************************************/
 170
 171
 172 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 173 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 174 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 175 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 176 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 177 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 178 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 179 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 180
 181
 182 /*
 183  * Define a 64-bit unsigned integer type and macros
 184  */
 185 #if 1
 186
 187 #define FX64_NATIVE 1
 188
 189 typedef uint64_t Fx64;
 190
 191 #define FX64_MOV32(a, b) a = b
 192 #define FX64_OR32(a, b)  a |= b
 193 #define FX64_SHL(a, c)   a <<= c
 194
 195 #else
 196
 197 #define FX64_NATIVE 0
 198
 199 typedef struct {
 200    GLuint lo, hi;
 201 } Fx64;
 202
 203 #define FX64_MOV32(a, b) a.lo = b
 204 #define FX64_OR32(a, b)  a.lo |= b
 205
 206 #define FX64_SHL(a, c)                                 \
 207    do {                                                \
 208        if ((c) >= 32) {                                \
 209           a.hi = a.lo << ((c) - 32);                   \
 210           a.lo = 0;                                    \
 211        } else {                                        \
 212           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 213           a.lo <<= (c);                                \
 214        }                                               \
 215    } while (0)
 216
 217 #endif
 218
 219
 220 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 221 #define SAFECDOT 1 /* for paranoids */
 222
 223 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 224    do {                                  \
 225       /* compute interpolation vector */ \
 226       GLfloat d2 = 0.0F;                 \
 227       GLfloat rd2;                       \
 228                                          \
 229       for (i = 0; i < NC; i++) {         \
 230          IV[i] = (V1[i] - V0[i]) * F(i); \
 231          d2 += IV[i] * IV[i];            \
 232       }                                  \
 233       rd2 = (GLfloat)NV / d2;            \
 234       B = 0;                             \
 235       for (i = 0; i < NC; i++) {         \
 236          IV[i] *= F(i);                  \
 237          B -= IV[i] * V0[i];             \
 238          IV[i] *= rd2;                   \
 239       }                                  \
 240       B = B * rd2 + 0.5f;                \
 241    } while (0)
 242
 243 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 244    do {                                  \
 245       GLfloat dot = 0.0F;                \
 246       for (i = 0; i < NC; i++) {         \
 247          dot += V[i] * IV[i];            \
 248       }                                  \
 249       TEXEL = (GLint)(dot + B);          \
 250       if (SAFECDOT) {                    \
 251          if (TEXEL < 0) {                \
 252             TEXEL = 0;                   \
 253          } else if (TEXEL > NV) {        \
 254             TEXEL = NV;                  \
 255          }                               \
 256       }                                  \
 257    } while (0)
 258
 259
 260 static GLint
 261 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 262               GLubyte input[MAX_COMP], GLint nc)
 263 {
 264    GLint i, j, best = -1;
 265    GLfloat err = 1e9; /* big enough */
 266
 267    for (j = 0; j < nv; j++) {
 268       GLfloat e = 0.0F;
 269       for (i = 0; i < nc; i++) {
 270          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 271       }
 272       if (e < err) {
 273          err = e;
 274          best = j;
 275       }
 276    }
 277
 278    return best;
 279 }
 280
 281
 282 static GLint
 283 fxt1_worst (GLfloat vec[MAX_COMP],
 284             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 285 {
 286    GLint i, k, worst = -1;
 287    GLfloat err = -1.0F; /* small enough */
 288
 289    for (k = 0; k < n; k++) {
 290       GLfloat e = 0.0F;
 291       for (i = 0; i < nc; i++) {
 292          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 293       }
 294       if (e > err) {
 295          err = e;
 296          worst = k;
 297       }
 298    }
 299
 300    return worst;
 301 }
 302
 303
 304 static GLint
 305 fxt1_variance (GLdouble variance[MAX_COMP],
 306                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 307 {
 308    GLint i, k, best = 0;
 309    GLint sx, sx2;
 310    GLdouble var, maxvar = -1; /* small enough */
 311    GLdouble teenth = 1.0 / n;
 312
 313    for (i = 0; i < nc; i++) {
 314       sx = sx2 = 0;
 315       for (k = 0; k < n; k++) {
 316          GLint t = input[k][i];
 317          sx += t;
 318          sx2 += t * t;
 319       }
 320       var = sx2 * teenth - sx * sx * teenth * teenth;
 321       if (maxvar < var) {
 322          maxvar = var;
 323          best = i;
 324       }
 325       if (variance) {
 326          variance[i] = var;
 327       }
 328    }
 329
 330    return best;
 331 }
 332
 333
 334 static GLint
 335 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 336              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 337 {
 338 #if 0
 339    /* Choose colors from a grid.
 340     */
 341    GLint i, j;
 342
 343    for (j = 0; j < nv; j++) {
 344       GLint m = j * (n - 1) / (nv - 1);
 345       for (i = 0; i < nc; i++) {
 346          vec[j][i] = input[m][i];
 347       }
 348    }
 349 #else
 350    /* Our solution here is to find the darkest and brightest colors in
 351     * the 8x4 tile and use those as the two representative colors.
 352     * There are probably better algorithms to use (histogram-based).
 353     */
 354    GLint i, j, k;
 355    GLint minSum = 2000; /* big enough */
 356    GLint maxSum = -1; /* small enough */
 357    GLint minCol = 0; /* phoudoin: silent compiler! */
 358    GLint maxCol = 0; /* phoudoin: silent compiler! */
 359
 360    struct {
 361       GLint flag;
 362       GLint key;
 363       GLint freq;
 364       GLint idx;
 365    } hist[N_TEXELS];
 366    GLint lenh = 0;
 367
 368    memset(hist, 0, sizeof(hist));
 369
 370    for (k = 0; k < n; k++) {
 371       GLint l;
 372       GLint key = 0;
 373       GLint sum = 0;
 374       for (i = 0; i < nc; i++) {
 375          key <<= 8;
 376          key |= input[k][i];
 377          sum += input[k][i];
 378       }
 379       for (l = 0; l < n; l++) {
 380          if (!hist[l].flag) {
 381             /* alloc new slot */
 382             hist[l].flag = !0;
 383             hist[l].key = key;
 384             hist[l].freq = 1;
 385             hist[l].idx = k;
 386             lenh = l + 1;
 387             break;
 388          } else if (hist[l].key == key) {
 389             hist[l].freq++;
 390             break;
 391          }
 392       }
 393       if (minSum > sum) {
 394          minSum = sum;
 395          minCol = k;
 396       }
 397       if (maxSum < sum) {
 398          maxSum = sum;
 399          maxCol = k;
 400       }
 401    }
 402
 403    if (lenh <= nv) {
 404       for (j = 0; j < lenh; j++) {
 405          for (i = 0; i < nc; i++) {
 406             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 407          }
 408       }
 409       for (; j < nv; j++) {
 410          for (i = 0; i < nc; i++) {
 411             vec[j][i] = vec[0][i];
 412          }
 413       }
 414       return 0;
 415    }
 416
 417    for (j = 0; j < nv; j++) {
 418       for (i = 0; i < nc; i++) {
 419          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 420       }
 421    }
 422 #endif
 423
 424    return !0;
 425 }
 426
 427
 428 static GLint
 429 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 430             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 431 {
 432    /* Use the generalized lloyd's algorithm for VQ:
 433     *     find 4 color vectors.
 434     *
 435     *     for each sample color
 436     *         sort to nearest vector.
 437     *
 438     *     replace each vector with the centroid of its matching colors.
 439     *
 440     *     repeat until RMS doesn't improve.
 441     *
 442     *     if a color vector has no samples, or becomes the same as another
 443     *     vector, replace it with the color which is farthest from a sample.
 444     *
 445     * vec[][MAX_COMP]           initial vectors and resulting colors
 446     * nv                        number of resulting colors required
 447     * input[N_TEXELS][MAX_COMP] input texels
 448     * nc                        number of components in input / vec
 449     * n                         number of input samples
 450     */
 451
 452    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 453    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 454    GLfloat error, lasterror = 1e9;
 455
 456    GLint i, j, k, rep;
 457
 458    /* the quantizer */
 459    for (rep = 0; rep < LL_N_REP; rep++) {
 460       /* reset sums & counters */
 461       for (j = 0; j < nv; j++) {
 462          for (i = 0; i < nc; i++) {
 463             sum[j][i] = 0;
 464          }
 465          cnt[j] = 0;
 466       }
 467       error = 0;
 468
 469       /* scan whole block */
 470       for (k = 0; k < n; k++) {
 471 #if 1
 472          GLint best = -1;
 473          GLfloat err = 1e9; /* big enough */
 474          /* determine best vector */
 475          for (j = 0; j < nv; j++) {
 476             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 477                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 478                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 479             if (nc == 4) {
 480                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 481             }
 482             if (e < err) {
 483                err = e;
 484                best = j;
 485             }
 486          }
 487 #else
 488          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 489 #endif
 490          assert(best >= 0);
 491          /* add in closest color */
 492          for (i = 0; i < nc; i++) {
 493             sum[best][i] += input[k][i];
 494          }
 495          /* mark this vector as used */
 496          cnt[best]++;
 497          /* accumulate error */
 498          error += err;
 499       }
 500
 501       /* check RMS */
 502       if ((error < LL_RMS_E) ||
 503           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 504          return !0; /* good match */
 505       }
 506       lasterror = error;
 507
 508       /* move each vector to the barycenter of its closest colors */
 509       for (j = 0; j < nv; j++) {
 510          if (cnt[j]) {
 511             GLfloat div = 1.0F / cnt[j];
 512             for (i = 0; i < nc; i++) {
 513                vec[j][i] = div * sum[j][i];
 514             }
 515          } else {
 516             /* this vec has no samples or is identical with a previous vec */
 517             GLint worst = fxt1_worst(vec[j], input, nc, n);
 518             for (i = 0; i < nc; i++) {
 519                vec[j][i] = input[worst][i];
 520             }
 521          }
 522       }
 523    }
 524
 525    return 0; /* could not converge fast enough */
 526 }
 527
 528
 529 static void
 530 fxt1_quantize_CHROMA (GLuint *cc,
 531                       GLubyte input[N_TEXELS][MAX_COMP])
 532 {
 533    const GLint n_vect = 4; /* 4 base vectors to find */
 534    const GLint n_comp = 3; /* 3 components: R, G, B */
 535    GLfloat vec[MAX_VECT][MAX_COMP];
 536    GLint i, j, k;
 537    Fx64 hi; /* high quadword */
 538    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 539
 540    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 541       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 542    }
 543
 544    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 545    for (j = n_vect - 1; j >= 0; j--) {
 546       for (i = 0; i < n_comp; i++) {
 547          /* add in colors */
 548          FX64_SHL(hi, 5);
 549          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 550       }
 551    }
 552    ((Fx64 *)cc)[1] = hi;
 553
 554    lohi = lolo = 0;
 555    /* right microtile */
 556    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 557       lohi <<= 2;
 558       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 559    }
 560    /* left microtile */
 561    for (; k >= 0; k--) {
 562       lolo <<= 2;
 563       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 564    }
 565    cc[1] = lohi;
 566    cc[0] = lolo;
 567 }
 568
 569
 570 static void
 571 fxt1_quantize_ALPHA0 (GLuint *cc,
 572                       GLubyte input[N_TEXELS][MAX_COMP],
 573                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 574 {
 575    const GLint n_vect = 3; /* 3 base vectors to find */
 576    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 577    GLfloat vec[MAX_VECT][MAX_COMP];
 578    GLint i, j, k;
 579    Fx64 hi; /* high quadword */
 580    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 581
 582    /* the last vector indicates zero */
 583    for (i = 0; i < n_comp; i++) {
 584       vec[n_vect][i] = 0;
 585    }
 586
 587    /* the first n texels in reord are guaranteed to be non-zero */
 588    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 589       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 590    }
 591
 592    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 593    for (j = n_vect - 1; j >= 0; j--) {
 594       /* add in alphas */
 595       FX64_SHL(hi, 5);
 596       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 597    }
 598    for (j = n_vect - 1; j >= 0; j--) {
 599       for (i = 0; i < n_comp - 1; i++) {
 600          /* add in colors */
 601          FX64_SHL(hi, 5);
 602          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 603       }
 604    }
 605    ((Fx64 *)cc)[1] = hi;
 606
 607    lohi = lolo = 0;
 608    /* right microtile */
 609    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 610       lohi <<= 2;
 611       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 612    }
 613    /* left microtile */
 614    for (; k >= 0; k--) {
 615       lolo <<= 2;
 616       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 617    }
 618    cc[1] = lohi;
 619    cc[0] = lolo;
 620 }
 621
 622
 623 static void
 624 fxt1_quantize_ALPHA1 (GLuint *cc,
 625                       GLubyte input[N_TEXELS][MAX_COMP])
 626 {
 627    const GLint n_vect = 3; /* highest vector number in each microtile */
 628    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 629    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 630    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 631    GLint i, j, k;
 632    Fx64 hi; /* high quadword */
 633    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 634
 635    GLint minSum;
 636    GLint maxSum;
 637    GLint minColL = 0, maxColL = 0;
 638    GLint minColR = 0, maxColR = 0;
 639    GLint sumL = 0, sumR = 0;
 640    GLint nn_comp;
 641    /* Our solution here is to find the darkest and brightest colors in
 642     * the 4x4 tile and use those as the two representative colors.
 643     * There are probably better algorithms to use (histogram-based).
 644     */
 645    nn_comp = n_comp;
 646    while ((minColL == maxColL) && nn_comp) {
 647        minSum = 2000; /* big enough */
 648        maxSum = -1; /* small enough */
 649        for (k = 0; k < N_TEXELS / 2; k++) {
 650            GLint sum = 0;
 651            for (i = 0; i < nn_comp; i++) {
 652                sum += input[k][i];
 653            }
 654            if (minSum > sum) {
 655                minSum = sum;
 656                minColL = k;
 657            }
 658            if (maxSum < sum) {
 659                maxSum = sum;
 660                maxColL = k;
 661            }
 662            sumL += sum;
 663        }
 664
 665        nn_comp--;
 666    }
 667
 668    nn_comp = n_comp;
 669    while ((minColR == maxColR) && nn_comp) {
 670        minSum = 2000; /* big enough */
 671        maxSum = -1; /* small enough */
 672        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 673            GLint sum = 0;
 674            for (i = 0; i < nn_comp; i++) {
 675                sum += input[k][i];
 676            }
 677            if (minSum > sum) {
 678                minSum = sum;
 679                minColR = k;
 680            }
 681            if (maxSum < sum) {
 682                maxSum = sum;
 683                maxColR = k;
 684            }
 685            sumR += sum;
 686        }
 687
 688        nn_comp--;
 689    }
 690
 691    /* choose the common vector (yuck!) */
 692    {
 693       GLint j1, j2;
 694       GLint v1 = 0, v2 = 0;
 695       GLfloat err = 1e9; /* big enough */
 696       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 697       for (i = 0; i < n_comp; i++) {
 698          tv[0][i] = input[minColL][i];
 699          tv[1][i] = input[maxColL][i];
 700          tv[2][i] = input[minColR][i];
 701          tv[3][i] = input[maxColR][i];
 702       }
 703       for (j1 = 0; j1 < 2; j1++) {
 704          for (j2 = 2; j2 < 4; j2++) {
 705             GLfloat e = 0.0F;
 706             for (i = 0; i < n_comp; i++) {
 707                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 708             }
 709             if (e < err) {
 710                err = e;
 711                v1 = j1;
 712                v2 = j2;
 713             }
 714          }
 715       }
 716       for (i = 0; i < n_comp; i++) {
 717          vec[0][i] = tv[1 - v1][i];
 718          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 719          vec[2][i] = tv[5 - v2][i];
 720       }
 721    }
 722
 723    /* left microtile */
 724    cc[0] = 0;
 725    if (minColL != maxColL) {
 726       /* compute interpolation vector */
 727       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 728
 729       /* add in texels */
 730       lolo = 0;
 731       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 732          GLint texel;
 733          /* interpolate color */
 734          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 735          /* add in texel */
 736          lolo <<= 2;
 737          lolo |= texel;
 738       }
 739
 740       cc[0] = lolo;
 741    }
 742
 743    /* right microtile */
 744    cc[1] = 0;
 745    if (minColR != maxColR) {
 746       /* compute interpolation vector */
 747       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 748
 749       /* add in texels */
 750       lohi = 0;
 751       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 752          GLint texel;
 753          /* interpolate color */
 754          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 755          /* add in texel */
 756          lohi <<= 2;
 757          lohi |= texel;
 758       }
 759
 760       cc[1] = lohi;
 761    }
 762
 763    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 764    for (j = n_vect - 1; j >= 0; j--) {
 765       /* add in alphas */
 766       FX64_SHL(hi, 5);
 767       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 768    }
 769    for (j = n_vect - 1; j >= 0; j--) {
 770       for (i = 0; i < n_comp - 1; i++) {
 771          /* add in colors */
 772          FX64_SHL(hi, 5);
 773          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 774       }
 775    }
 776    ((Fx64 *)cc)[1] = hi;
 777 }
 778
 779
 780 static void
 781 fxt1_quantize_HI (GLuint *cc,
 782                   GLubyte input[N_TEXELS][MAX_COMP],
 783                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 784 {
 785    const GLint n_vect = 6; /* highest vector number */
 786    const GLint n_comp = 3; /* 3 components: R, G, B */
 787    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 788    GLfloat iv[MAX_COMP];   /* interpolation vector */
 789    GLint i, k;
 790    GLuint hihi; /* high quadword: hi dword */
 791
 792    GLint minSum = 2000; /* big enough */
 793    GLint maxSum = -1; /* small enough */
 794    GLint minCol = 0; /* phoudoin: silent compiler! */
 795    GLint maxCol = 0; /* phoudoin: silent compiler! */
 796
 797    /* Our solution here is to find the darkest and brightest colors in
 798     * the 8x4 tile and use those as the two representative colors.
 799     * There are probably better algorithms to use (histogram-based).
 800     */
 801    for (k = 0; k < n; k++) {
 802       GLint sum = 0;
 803       for (i = 0; i < n_comp; i++) {
 804          sum += reord[k][i];
 805       }
 806       if (minSum > sum) {
 807          minSum = sum;
 808          minCol = k;
 809       }
 810       if (maxSum < sum) {
 811          maxSum = sum;
 812          maxCol = k;
 813       }
 814    }
 815
 816    hihi = 0; /* cc-hi = "00" */
 817    for (i = 0; i < n_comp; i++) {
 818       /* add in colors */
 819       hihi <<= 5;
 820       hihi |= reord[maxCol][i] >> 3;
 821    }
 822    for (i = 0; i < n_comp; i++) {
 823       /* add in colors */
 824       hihi <<= 5;
 825       hihi |= reord[minCol][i] >> 3;
 826    }
 827    cc[3] = hihi;
 828    cc[0] = cc[1] = cc[2] = 0;
 829
 830    /* compute interpolation vector */
 831    if (minCol != maxCol) {
 832       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 833    }
 834
 835    /* add in texels */
 836    for (k = N_TEXELS - 1; k >= 0; k--) {
 837       GLint t = k * 3;
 838       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 839       GLint texel = n_vect + 1; /* transparent black */
 840
 841       if (!ISTBLACK(input[k])) {
 842          if (minCol != maxCol) {
 843             /* interpolate color */
 844             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 845             /* add in texel */
 846             kk[0] |= texel << (t & 7);
 847          }
 848       } else {
 849          /* add in texel */
 850          kk[0] |= texel << (t & 7);
 851       }
 852    }
 853 }
 854
 855
 856 static void
 857 fxt1_quantize_MIXED1 (GLuint *cc,
 858                       GLubyte input[N_TEXELS][MAX_COMP])
 859 {
 860    const GLint n_vect = 2; /* highest vector number in each microtile */
 861    const GLint n_comp = 3; /* 3 components: R, G, B */
 862    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 863    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 864    GLint i, j, k;
 865    Fx64 hi; /* high quadword */
 866    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 867
 868    GLint minSum;
 869    GLint maxSum;
 870    GLint minColL = 0, maxColL = -1;
 871    GLint minColR = 0, maxColR = -1;
 872
 873    /* Our solution here is to find the darkest and brightest colors in
 874     * the 4x4 tile and use those as the two representative colors.
 875     * There are probably better algorithms to use (histogram-based).
 876     */
 877    minSum = 2000; /* big enough */
 878    maxSum = -1; /* small enough */
 879    for (k = 0; k < N_TEXELS / 2; k++) {
 880       if (!ISTBLACK(input[k])) {
 881          GLint sum = 0;
 882          for (i = 0; i < n_comp; i++) {
 883             sum += input[k][i];
 884          }
 885          if (minSum > sum) {
 886             minSum = sum;
 887             minColL = k;
 888          }
 889          if (maxSum < sum) {
 890             maxSum = sum;
 891             maxColL = k;
 892          }
 893       }
 894    }
 895    minSum = 2000; /* big enough */
 896    maxSum = -1; /* small enough */
 897    for (; k < N_TEXELS; k++) {
 898       if (!ISTBLACK(input[k])) {
 899          GLint sum = 0;
 900          for (i = 0; i < n_comp; i++) {
 901             sum += input[k][i];
 902          }
 903          if (minSum > sum) {
 904             minSum = sum;
 905             minColR = k;
 906          }
 907          if (maxSum < sum) {
 908             maxSum = sum;
 909             maxColR = k;
 910          }
 911       }
 912    }
 913
 914    /* left microtile */
 915    if (maxColL == -1) {
 916       /* all transparent black */
 917       cc[0] = ~0u;
 918       for (i = 0; i < n_comp; i++) {
 919          vec[0][i] = 0;
 920          vec[1][i] = 0;
 921       }
 922    } else {
 923       cc[0] = 0;
 924       for (i = 0; i < n_comp; i++) {
 925          vec[0][i] = input[minColL][i];
 926          vec[1][i] = input[maxColL][i];
 927       }
 928       if (minColL != maxColL) {
 929          /* compute interpolation vector */
 930          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 931
 932          /* add in texels */
 933          lolo = 0;
 934          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 935             GLint texel = n_vect + 1; /* transparent black */
 936             if (!ISTBLACK(input[k])) {
 937                /* interpolate color */
 938                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 939             }
 940             /* add in texel */
 941             lolo <<= 2;
 942             lolo |= texel;
 943          }
 944          cc[0] = lolo;
 945       }
 946    }
 947
 948    /* right microtile */
 949    if (maxColR == -1) {
 950       /* all transparent black */
 951       cc[1] = ~0u;
 952       for (i = 0; i < n_comp; i++) {
 953          vec[2][i] = 0;
 954          vec[3][i] = 0;
 955       }
 956    } else {
 957       cc[1] = 0;
 958       for (i = 0; i < n_comp; i++) {
 959          vec[2][i] = input[minColR][i];
 960          vec[3][i] = input[maxColR][i];
 961       }
 962       if (minColR != maxColR) {
 963          /* compute interpolation vector */
 964          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 965
 966          /* add in texels */
 967          lohi = 0;
 968          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 969             GLint texel = n_vect + 1; /* transparent black */
 970             if (!ISTBLACK(input[k])) {
 971                /* interpolate color */
 972                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 973             }
 974             /* add in texel */
 975             lohi <<= 2;
 976             lohi |= texel;
 977          }
 978          cc[1] = lohi;
 979       }
 980    }
 981
 982    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 983    for (j = 2 * 2 - 1; j >= 0; j--) {
 984       for (i = 0; i < n_comp; i++) {
 985          /* add in colors */
 986          FX64_SHL(hi, 5);
 987          FX64_OR32(hi, vec[j][i] >> 3);
 988       }
 989    }
 990    ((Fx64 *)cc)[1] = hi;
 991 }
 992
 993
 994 static void
 995 fxt1_quantize_MIXED0 (GLuint *cc,
 996                       GLubyte input[N_TEXELS][MAX_COMP])
 997 {
 998    const GLint n_vect = 3; /* highest vector number in each microtile */
 999    const GLint n_comp = 3; /* 3 components: R, G, B */
1000    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1001    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1002    GLint i, j, k;
1003    Fx64 hi; /* high quadword */
1004    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1005
1006    GLint minColL = 0, maxColL = 0;
1007    GLint minColR = 0, maxColR = 0;
1008 #if 0
1009    GLint minSum;
1010    GLint maxSum;
1011
1012    /* Our solution here is to find the darkest and brightest colors in
1013     * the 4x4 tile and use those as the two representative colors.
1014     * There are probably better algorithms to use (histogram-based).
1015     */
1016    minSum = 2000; /* big enough */
1017    maxSum = -1; /* small enough */
1018    for (k = 0; k < N_TEXELS / 2; k++) {
1019       GLint sum = 0;
1020       for (i = 0; i < n_comp; i++) {
1021          sum += input[k][i];
1022       }
1023       if (minSum > sum) {
1024          minSum = sum;
1025          minColL = k;
1026       }
1027       if (maxSum < sum) {
1028          maxSum = sum;
1029          maxColL = k;
1030       }
1031    }
1032    minSum = 2000; /* big enough */
1033    maxSum = -1; /* small enough */
1034    for (; k < N_TEXELS; k++) {
1035       GLint sum = 0;
1036       for (i = 0; i < n_comp; i++) {
1037          sum += input[k][i];
1038       }
1039       if (minSum > sum) {
1040          minSum = sum;
1041          minColR = k;
1042       }
1043       if (maxSum < sum) {
1044          maxSum = sum;
1045          maxColR = k;
1046       }
1047    }
1048 #else
1049    GLint minVal;
1050    GLint maxVal;
1051    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1052    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1053
1054    /* Scan the channel with max variance for lo & hi
1055     * and use those as the two representative colors.
1056     */
1057    minVal = 2000; /* big enough */
1058    maxVal = -1; /* small enough */
1059    for (k = 0; k < N_TEXELS / 2; k++) {
1060       GLint t = input[k][maxVarL];
1061       if (minVal > t) {
1062          minVal = t;
1063          minColL = k;
1064       }
1065       if (maxVal < t) {
1066          maxVal = t;
1067          maxColL = k;
1068       }
1069    }
1070    minVal = 2000; /* big enough */
1071    maxVal = -1; /* small enough */
1072    for (; k < N_TEXELS; k++) {
1073       GLint t = input[k][maxVarR];
1074       if (minVal > t) {
1075          minVal = t;
1076          minColR = k;
1077       }
1078       if (maxVal < t) {
1079          maxVal = t;
1080          maxColR = k;
1081       }
1082    }
1083 #endif
1084
1085    /* left microtile */
1086    cc[0] = 0;
1087    for (i = 0; i < n_comp; i++) {
1088       vec[0][i] = input[minColL][i];
1089       vec[1][i] = input[maxColL][i];
1090    }
1091    if (minColL != maxColL) {
1092       /* compute interpolation vector */
1093       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1094
1095       /* add in texels */
1096       lolo = 0;
1097       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1098          GLint texel;
1099          /* interpolate color */
1100          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1101          /* add in texel */
1102          lolo <<= 2;
1103          lolo |= texel;
1104       }
1105
1106       /* funky encoding for LSB of green */
1107       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1108          for (i = 0; i < n_comp; i++) {
1109             vec[1][i] = input[minColL][i];
1110             vec[0][i] = input[maxColL][i];
1111          }
1112          lolo = ~lolo;
1113       }
1114
1115       cc[0] = lolo;
1116    }
1117
1118    /* right microtile */
1119    cc[1] = 0;
1120    for (i = 0; i < n_comp; i++) {
1121       vec[2][i] = input[minColR][i];
1122       vec[3][i] = input[maxColR][i];
1123    }
1124    if (minColR != maxColR) {
1125       /* compute interpolation vector */
1126       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1127
1128       /* add in texels */
1129       lohi = 0;
1130       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1131          GLint texel;
1132          /* interpolate color */
1133          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1134          /* add in texel */
1135          lohi <<= 2;
1136          lohi |= texel;
1137       }
1138
1139       /* funky encoding for LSB of green */
1140       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1141          for (i = 0; i < n_comp; i++) {
1142             vec[3][i] = input[minColR][i];
1143             vec[2][i] = input[maxColR][i];
1144          }
1145          lohi = ~lohi;
1146       }
1147
1148       cc[1] = lohi;
1149    }
1150
1151    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1152    for (j = 2 * 2 - 1; j >= 0; j--) {
1153       for (i = 0; i < n_comp; i++) {
1154          /* add in colors */
1155          FX64_SHL(hi, 5);
1156          FX64_OR32(hi, vec[j][i] >> 3);
1157       }
1158    }
1159    ((Fx64 *)cc)[1] = hi;
1160 }
1161
1162
1163 static void
1164 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1165 {
1166    GLint trualpha;
1167    GLubyte reord[N_TEXELS][MAX_COMP];
1168
1169    GLubyte input[N_TEXELS][MAX_COMP];
1170    GLint i, k, l;
1171
1172    if (comps == 3) {
1173       /* make the whole block opaque */
1174       memset(input, -1, sizeof(input));
1175    }
1176
1177    /* 8 texels each line */
1178    for (l = 0; l < 4; l++) {
1179       for (k = 0; k < 4; k++) {
1180          for (i = 0; i < comps; i++) {
1181             input[k + l * 4][i] = *lines[l]++;
1182          }
1183       }
1184       for (; k < 8; k++) {
1185          for (i = 0; i < comps; i++) {
1186             input[k + l * 4 + 12][i] = *lines[l]++;
1187          }
1188       }
1189    }
1190
1191    /* block layout:
1192     * 00, 01, 02, 03, 08, 09, 0a, 0b
1193     * 10, 11, 12, 13, 18, 19, 1a, 1b
1194     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1195     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1196     */
1197
1198    /* [dBorca]
1199     * stupidity flows forth from this
1200     */
1201    l = N_TEXELS;
1202    trualpha = 0;
1203    if (comps == 4) {
1204       /* skip all transparent black texels */
1205       l = 0;
1206       for (k = 0; k < N_TEXELS; k++) {
1207          /* test all components against 0 */
1208          if (!ISTBLACK(input[k])) {
1209             /* texel is not transparent black */
1210             COPY_4UBV(reord[l], input[k]);
1211             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1212                /* non-opaque texel */
1213                trualpha = !0;
1214             }
1215             l++;
1216          }
1217       }
1218    }
1219
1220 #if 0
1221    if (trualpha) {
1222       fxt1_quantize_ALPHA0(cc, input, reord, l);
1223    } else if (l == 0) {
1224       cc[0] = cc[1] = cc[2] = -1;
1225       cc[3] = 0;
1226    } else if (l < N_TEXELS) {
1227       fxt1_quantize_HI(cc, input, reord, l);
1228    } else {
1229       fxt1_quantize_CHROMA(cc, input);
1230    }
1231    (void)fxt1_quantize_ALPHA1;
1232    (void)fxt1_quantize_MIXED1;
1233    (void)fxt1_quantize_MIXED0;
1234 #else
1235    if (trualpha) {
1236       fxt1_quantize_ALPHA1(cc, input);
1237    } else if (l == 0) {
1238       cc[0] = cc[1] = cc[2] = ~0u;
1239       cc[3] = 0;
1240    } else if (l < N_TEXELS) {
1241       fxt1_quantize_MIXED1(cc, input);
1242    } else {
1243       fxt1_quantize_MIXED0(cc, input);
1244    }
1245    (void)fxt1_quantize_ALPHA0;
1246    (void)fxt1_quantize_HI;
1247    (void)fxt1_quantize_CHROMA;
1248 #endif
1249 }
1250
1251
1252
1253 /**
1254  * Upscale an image by replication, not (typical) stretching.
1255  * We use this when the image width or height is less than a
1256  * certain size (4, 8) and we need to upscale an image.
1257  */
1258 static void
1259 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1260                    GLsizei outWidth, GLsizei outHeight,
1261                    GLint comps, const GLubyte *src, GLint srcRowStride,
1262                    GLubyte *dest )
1263 {
1264    GLint i, j, k;
1265
1266    assert(outWidth >= inWidth);
1267    assert(outHeight >= inHeight);
1268 #if 0
1269    assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1270    assert((outWidth & 3) == 0);
1271    assert((outHeight & 3) == 0);
1272 #endif
1273
1274    for (i = 0; i < outHeight; i++) {
1275       const GLint ii = i % inHeight;
1276       for (j = 0; j < outWidth; j++) {
1277          const GLint jj = j % inWidth;
1278          for (k = 0; k < comps; k++) {
1279             dest[(i * outWidth + j) * comps + k]
1280                = src[ii * srcRowStride + jj * comps + k];
1281          }
1282       }
1283    }
1284 }
1285
1286
1287 static void
1288 fxt1_encode (GLuint width, GLuint height, GLint comps,
1289              const void *source, GLint srcRowStride,
1290              void *dest, GLint destRowStride)
1291 {
1292    GLuint x, y;
1293    const GLubyte *data;
1294    GLuint *encoded = (GLuint *)dest;
1295    void *newSource = NULL;
1296
1297    assert(comps == 3 || comps == 4);
1298
1299    /* Replicate image if width is not M8 or height is not M4 */
1300    if ((width & 7) | (height & 3)) {
1301       GLint newWidth = (width + 7) & ~7;
1302       GLint newHeight = (height + 3) & ~3;
1303       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1304       if (!newSource) {
1305          GET_CURRENT_CONTEXT(ctx);
1306          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1307          goto cleanUp;
1308       }
1309       upscale_teximage2d(width, height, newWidth, newHeight,
1310                          comps, (const GLubyte *) source,
1311                          srcRowStride, (GLubyte *) newSource);
1312       source = newSource;
1313       width = newWidth;
1314       height = newHeight;
1315       srcRowStride = comps * newWidth;
1316    }
1317
1318    data = (const GLubyte *) source;
1319    destRowStride = (destRowStride - width * 2) / 4;
1320    for (y = 0; y < height; y += 4) {
1321       GLuint offs = 0 + (y + 0) * srcRowStride;
1322       for (x = 0; x < width; x += 8) {
1323          const GLubyte *lines[4];
1324          lines[0] = &data[offs];
1325          lines[1] = lines[0] + srcRowStride;
1326          lines[2] = lines[1] + srcRowStride;
1327          lines[3] = lines[2] + srcRowStride;
1328          offs += 8 * comps;
1329          fxt1_quantize(encoded, lines, comps);
1330          /* 128 bits per 8x4 block */
1331          encoded += 4;
1332       }
1333       encoded += destRowStride;
1334    }
1335
1336  cleanUp:
1337    free(newSource);
1338 }
1339
1340
1341 /***************************************************************************\
1342  * FXT1 decoder
1343  *
1344  * The decoder is based on GL_3DFX_texture_compression_FXT1
1345  * specification and serves as a concept for the encoder.
1346 \***************************************************************************/
1347
1348
1349 /* lookup table for scaling 5 bit colors up to 8 bits */
1350 static const GLubyte _rgb_scale_5[] = {
1351    0,   8,   16,  25,  33,  41,  49,  58,
1352    66,  74,  82,  90,  99,  107, 115, 123,
1353    132, 140, 148, 156, 165, 173, 181, 189,
1354    197, 206, 214, 222, 230, 239, 247, 255
1355 };
1356
1357 /* lookup table for scaling 6 bit colors up to 8 bits */
1358 static const GLubyte _rgb_scale_6[] = {
1359    0,   4,   8,   12,  16,  20,  24,  28,
1360    32,  36,  40,  45,  49,  53,  57,  61,
1361    65,  69,  73,  77,  81,  85,  89,  93,
1362    97,  101, 105, 109, 113, 117, 121, 125,
1363    130, 134, 138, 142, 146, 150, 154, 158,
1364    162, 166, 170, 174, 178, 182, 186, 190,
1365    194, 198, 202, 206, 210, 215, 219, 223,
1366    227, 231, 235, 239, 243, 247, 251, 255
1367 };
1368
1369
1370 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1371 #define UP5(c) _rgb_scale_5[(c) & 31]
1372 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1373 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1374
1375
1376 static void
1377 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1378 {
1379    const GLuint *cc;
1380
1381    t *= 3;
1382    cc = (const GLuint *)(code + t / 8);
1383    t = (cc[0] >> (t & 7)) & 7;
1384
1385    if (t == 7) {
1386       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1387    } else {
1388       GLubyte r, g, b;
1389       cc = (const GLuint *)(code + 12);
1390       if (t == 0) {
1391          b = UP5(CC_SEL(cc, 0));
1392          g = UP5(CC_SEL(cc, 5));
1393          r = UP5(CC_SEL(cc, 10));
1394       } else if (t == 6) {
1395          b = UP5(CC_SEL(cc, 15));
1396          g = UP5(CC_SEL(cc, 20));
1397          r = UP5(CC_SEL(cc, 25));
1398       } else {
1399          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1400          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1401          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1402       }
1403       rgba[RCOMP] = r;
1404       rgba[GCOMP] = g;
1405       rgba[BCOMP] = b;
1406       rgba[ACOMP] = 255;
1407    }
1408 }
1409
1410
1411 static void
1412 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1413 {
1414    const GLuint *cc;
1415    GLuint kk;
1416
1417    cc = (const GLuint *)code;
1418    if (t & 16) {
1419       cc++;
1420       t &= 15;
1421    }
1422    t = (cc[0] >> (t * 2)) & 3;
1423
1424    t *= 15;
1425    cc = (const GLuint *)(code + 8 + t / 8);
1426    kk = cc[0] >> (t & 7);
1427    rgba[BCOMP] = UP5(kk);
1428    rgba[GCOMP] = UP5(kk >> 5);
1429    rgba[RCOMP] = UP5(kk >> 10);
1430    rgba[ACOMP] = 255;
1431 }
1432
1433
1434 static void
1435 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1436 {
1437    const GLuint *cc;
1438    GLuint col[2][3];
1439    GLint glsb, selb;
1440
1441    cc = (const GLuint *)code;
1442    if (t & 16) {
1443       t &= 15;
1444       t = (cc[1] >> (t * 2)) & 3;
1445       /* col 2 */
1446       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1447       col[0][GCOMP] = CC_SEL(cc, 99);
1448       col[0][RCOMP] = CC_SEL(cc, 104);
1449       /* col 3 */
1450       col[1][BCOMP] = CC_SEL(cc, 109);
1451       col[1][GCOMP] = CC_SEL(cc, 114);
1452       col[1][RCOMP] = CC_SEL(cc, 119);
1453       glsb = CC_SEL(cc, 126);
1454       selb = CC_SEL(cc, 33);
1455    } else {
1456       t = (cc[0] >> (t * 2)) & 3;
1457       /* col 0 */
1458       col[0][BCOMP] = CC_SEL(cc, 64);
1459       col[0][GCOMP] = CC_SEL(cc, 69);
1460       col[0][RCOMP] = CC_SEL(cc, 74);
1461       /* col 1 */
1462       col[1][BCOMP] = CC_SEL(cc, 79);
1463       col[1][GCOMP] = CC_SEL(cc, 84);
1464       col[1][RCOMP] = CC_SEL(cc, 89);
1465       glsb = CC_SEL(cc, 125);
1466       selb = CC_SEL(cc, 1);
1467    }
1468
1469    if (CC_SEL(cc, 124) & 1) {
1470       /* alpha[0] == 1 */
1471
1472       if (t == 3) {
1473          /* zero */
1474          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1475       } else {
1476          GLubyte r, g, b;
1477          if (t == 0) {
1478             b = UP5(col[0][BCOMP]);
1479             g = UP5(col[0][GCOMP]);
1480             r = UP5(col[0][RCOMP]);
1481          } else if (t == 2) {
1482             b = UP5(col[1][BCOMP]);
1483             g = UP6(col[1][GCOMP], glsb);
1484             r = UP5(col[1][RCOMP]);
1485          } else {
1486             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1487             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1488             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1489          }
1490          rgba[RCOMP] = r;
1491          rgba[GCOMP] = g;
1492          rgba[BCOMP] = b;
1493          rgba[ACOMP] = 255;
1494       }
1495    } else {
1496       /* alpha[0] == 0 */
1497       GLubyte r, g, b;
1498       if (t == 0) {
1499          b = UP5(col[0][BCOMP]);
1500          g = UP6(col[0][GCOMP], glsb ^ selb);
1501          r = UP5(col[0][RCOMP]);
1502       } else if (t == 3) {
1503          b = UP5(col[1][BCOMP]);
1504          g = UP6(col[1][GCOMP], glsb);
1505          r = UP5(col[1][RCOMP]);
1506       } else {
1507          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1508          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1509                         UP6(col[1][GCOMP], glsb));
1510          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1511       }
1512       rgba[RCOMP] = r;
1513       rgba[GCOMP] = g;
1514       rgba[BCOMP] = b;
1515       rgba[ACOMP] = 255;
1516    }
1517 }
1518
1519
1520 static void
1521 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1522 {
1523    const GLuint *cc;
1524    GLubyte r, g, b, a;
1525
1526    cc = (const GLuint *)code;
1527    if (CC_SEL(cc, 124) & 1) {
1528       /* lerp == 1 */
1529       GLuint col0[4];
1530
1531       if (t & 16) {
1532          t &= 15;
1533          t = (cc[1] >> (t * 2)) & 3;
1534          /* col 2 */
1535          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1536          col0[GCOMP] = CC_SEL(cc, 99);
1537          col0[RCOMP] = CC_SEL(cc, 104);
1538          col0[ACOMP] = CC_SEL(cc, 119);
1539       } else {
1540          t = (cc[0] >> (t * 2)) & 3;
1541          /* col 0 */
1542          col0[BCOMP] = CC_SEL(cc, 64);
1543          col0[GCOMP] = CC_SEL(cc, 69);
1544          col0[RCOMP] = CC_SEL(cc, 74);
1545          col0[ACOMP] = CC_SEL(cc, 109);
1546       }
1547
1548       if (t == 0) {
1549          b = UP5(col0[BCOMP]);
1550          g = UP5(col0[GCOMP]);
1551          r = UP5(col0[RCOMP]);
1552          a = UP5(col0[ACOMP]);
1553       } else if (t == 3) {
1554          b = UP5(CC_SEL(cc, 79));
1555          g = UP5(CC_SEL(cc, 84));
1556          r = UP5(CC_SEL(cc, 89));
1557          a = UP5(CC_SEL(cc, 114));
1558       } else {
1559          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1560          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1561          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1562          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1563       }
1564    } else {
1565       /* lerp == 0 */
1566
1567       if (t & 16) {
1568          cc++;
1569          t &= 15;
1570       }
1571       t = (cc[0] >> (t * 2)) & 3;
1572
1573       if (t == 3) {
1574          /* zero */
1575          r = g = b = a = 0;
1576       } else {
1577          GLuint kk;
1578          cc = (const GLuint *)code;
1579          a = UP5(cc[3] >> (t * 5 + 13));
1580          t *= 15;
1581          cc = (const GLuint *)(code + 8 + t / 8);
1582          kk = cc[0] >> (t & 7);
1583          b = UP5(kk);
1584          g = UP5(kk >> 5);
1585          r = UP5(kk >> 10);
1586       }
1587    }
1588    rgba[RCOMP] = r;
1589    rgba[GCOMP] = g;
1590    rgba[BCOMP] = b;
1591    rgba[ACOMP] = a;
1592 }
1593
1594
1595 static void
1596 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1597                GLint i, GLint j, GLubyte *rgba)
1598 {
1599    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1600       fxt1_decode_1HI,     /* cc-high   = "00?" */
1601       fxt1_decode_1HI,     /* cc-high   = "00?" */
1602       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1603       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1604       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1605       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1606       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1607       fxt1_decode_1MIXED   /* mixed     = "1??" */
1608    };
1609
1610    const GLubyte *code = (const GLubyte *)texture +
1611                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1612    GLint mode = CC_SEL(code, 125);
1613    GLint t = i & 7;
1614
1615    if (t & 4) {
1616       t += 12;
1617    }
1618    t += (j & 3) * 4;
1619
1620    decode_1[mode](code, t, rgba);
1621 }
1622
1623
1624
1625
1626 static void
1627 fetch_rgb_fxt1(const GLubyte *map,
1628                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1629 {
1630    GLubyte rgba[4];
1631    fxt1_decode_1(map, rowStride, i, j, rgba);
1632    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1633    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1634    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1635    texel[ACOMP] = 1.0F;
1636 }
1637
1638
1639 static void
1640 fetch_rgba_fxt1(const GLubyte *map,
1641                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1642 {
1643    GLubyte rgba[4];
1644    fxt1_decode_1(map, rowStride, i, j, rgba);
1645    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1646    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1647    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1648    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1649 }
1650
1651
1652 compressed_fetch_func
1653 _mesa_get_fxt_fetch_func(mesa_format format)
1654 {
1655    switch (format) {
1656    case MESA_FORMAT_RGB_FXT1:
1657       return fetch_rgb_fxt1;
1658    case MESA_FORMAT_RGBA_FXT1:
1659       return fetch_rgba_fxt1;
1660    default:
1661       return NULL;
1662    }
1663 }