src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mipmap.h"
  38 #include "texcompress.h"
  39 #include "texcompress_fxt1.h"
  40 #include "texstore.h"
  41
  42
  43 static void
  44 fxt1_encode (GLuint width, GLuint height, GLint comps,
  45              const void *source, GLint srcRowStride,
  46              void *dest, GLint destRowStride);
  47
  48 static void
  49 fxt1_decode_1 (const void *texture, GLint stride,
  50                GLint i, GLint j, GLubyte *rgba);
  51
  52
  53 /**
  54  * Store user's image in rgb_fxt1 format.
  55  */
  56 GLboolean
  57 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  58 {
  59    const GLubyte *pixels;
  60    GLint srcRowStride;
  61    GLubyte *dst;
  62    const GLubyte *tempImage = NULL;
  63
  64    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  65
  66    if (srcFormat != GL_RGB ||
  67        srcType != GL_UNSIGNED_BYTE ||
  68        ctx->_ImageTransferState ||
  69        srcPacking->RowLength != srcWidth ||
  70        srcPacking->SwapBytes) {
  71       /* convert image to RGB/GLubyte */
  72       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  73                                              baseInternalFormat,
  74                                              _mesa_get_format_base_format(dstFormat),
  75                                              srcWidth, srcHeight, srcDepth,
  76                                              srcFormat, srcType, srcAddr,
  77                                              srcPacking);
  78       if (!tempImage)
  79          return GL_FALSE; /* out of memory */
  80       pixels = tempImage;
  81       srcRowStride = 3 * srcWidth;
  82       srcFormat = GL_RGB;
  83    }
  84    else {
  85       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  86                                      srcFormat, srcType, 0, 0);
  87
  88       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  89                                             srcType) / sizeof(GLubyte);
  90    }
  91
  92    dst = dstSlices[0];
  93
  94    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
  95                dst, dstRowStride);
  96
  97    free((void*) tempImage);
  98
  99    return GL_TRUE;
 100 }
 101
 102
 103 /**
 104  * Store user's image in rgba_fxt1 format.
 105  */
 106 GLboolean
 107 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 108 {
 109    const GLubyte *pixels;
 110    GLint srcRowStride;
 111    GLubyte *dst;
 112    const GLubyte *tempImage = NULL;
 113
 114    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 115
 116    if (srcFormat != GL_RGBA ||
 117        srcType != GL_UNSIGNED_BYTE ||
 118        ctx->_ImageTransferState ||
 119        srcPacking->SwapBytes) {
 120       /* convert image to RGBA/GLubyte */
 121       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 122                                              baseInternalFormat,
 123                                              _mesa_get_format_base_format(dstFormat),
 124                                              srcWidth, srcHeight, srcDepth,
 125                                              srcFormat, srcType, srcAddr,
 126                                              srcPacking);
 127       if (!tempImage)
 128          return GL_FALSE; /* out of memory */
 129       pixels = tempImage;
 130       srcRowStride = 4 * srcWidth;
 131       srcFormat = GL_RGBA;
 132    }
 133    else {
 134       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 135                                      srcFormat, srcType, 0, 0);
 136
 137       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 138                                             srcType) / sizeof(GLubyte);
 139    }
 140
 141    dst = dstSlices[0];
 142
 143    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 144                dst, dstRowStride);
 145
 146    free((void*) tempImage);
 147
 148    return GL_TRUE;
 149 }
 150
 151
 152 /***************************************************************************\
 153  * FXT1 encoder
 154  *
 155  * The encoder was built by reversing the decoder,
 156  * and is vaguely based on Texus2 by 3dfx. Note that this code
 157  * is merely a proof of concept, since it is highly UNoptimized;
 158  * moreover, it is sub-optimal due to initial conditions passed
 159  * to Lloyd's algorithm (the interpolation modes are even worse).
 160 \***************************************************************************/
 161
 162
 163 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 164 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 165 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 166 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 167 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 168 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 169 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 170 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 171
 172
 173 /*
 174  * Define a 64-bit unsigned integer type and macros
 175  */
 176 #if 1
 177
 178 #define FX64_NATIVE 1
 179
 180 typedef uint64_t Fx64;
 181
 182 #define FX64_MOV32(a, b) a = b
 183 #define FX64_OR32(a, b)  a |= b
 184 #define FX64_SHL(a, c)   a <<= c
 185
 186 #else
 187
 188 #define FX64_NATIVE 0
 189
 190 typedef struct {
 191    GLuint lo, hi;
 192 } Fx64;
 193
 194 #define FX64_MOV32(a, b) a.lo = b
 195 #define FX64_OR32(a, b)  a.lo |= b
 196
 197 #define FX64_SHL(a, c)                                 \
 198    do {                                                \
 199        if ((c) >= 32) {                                \
 200           a.hi = a.lo << ((c) - 32);                   \
 201           a.lo = 0;                                    \
 202        } else {                                        \
 203           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 204           a.lo <<= (c);                                \
 205        }                                               \
 206    } while (0)
 207
 208 #endif
 209
 210
 211 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 212 #define SAFECDOT 1 /* for paranoids */
 213
 214 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 215    do {                                  \
 216       /* compute interpolation vector */ \
 217       GLfloat d2 = 0.0F;                 \
 218       GLfloat rd2;                       \
 219                                          \
 220       for (i = 0; i < NC; i++) {         \
 221          IV[i] = (V1[i] - V0[i]) * F(i); \
 222          d2 += IV[i] * IV[i];            \
 223       }                                  \
 224       rd2 = (GLfloat)NV / d2;            \
 225       B = 0;                             \
 226       for (i = 0; i < NC; i++) {         \
 227          IV[i] *= F(i);                  \
 228          B -= IV[i] * V0[i];             \
 229          IV[i] *= rd2;                   \
 230       }                                  \
 231       B = B * rd2 + 0.5f;                \
 232    } while (0)
 233
 234 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 235    do {                                  \
 236       GLfloat dot = 0.0F;                \
 237       for (i = 0; i < NC; i++) {         \
 238          dot += V[i] * IV[i];            \
 239       }                                  \
 240       TEXEL = (GLint)(dot + B);          \
 241       if (SAFECDOT) {                    \
 242          if (TEXEL < 0) {                \
 243             TEXEL = 0;                   \
 244          } else if (TEXEL > NV) {        \
 245             TEXEL = NV;                  \
 246          }                               \
 247       }                                  \
 248    } while (0)
 249
 250
 251 static GLint
 252 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 253               GLubyte input[MAX_COMP], GLint nc)
 254 {
 255    GLint i, j, best = -1;
 256    GLfloat err = 1e9; /* big enough */
 257
 258    for (j = 0; j < nv; j++) {
 259       GLfloat e = 0.0F;
 260       for (i = 0; i < nc; i++) {
 261          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 262       }
 263       if (e < err) {
 264          err = e;
 265          best = j;
 266       }
 267    }
 268
 269    return best;
 270 }
 271
 272
 273 static GLint
 274 fxt1_worst (GLfloat vec[MAX_COMP],
 275             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 276 {
 277    GLint i, k, worst = -1;
 278    GLfloat err = -1.0F; /* small enough */
 279
 280    for (k = 0; k < n; k++) {
 281       GLfloat e = 0.0F;
 282       for (i = 0; i < nc; i++) {
 283          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 284       }
 285       if (e > err) {
 286          err = e;
 287          worst = k;
 288       }
 289    }
 290
 291    return worst;
 292 }
 293
 294
 295 static GLint
 296 fxt1_variance (GLdouble variance[MAX_COMP],
 297                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 298 {
 299    GLint i, k, best = 0;
 300    GLint sx, sx2;
 301    GLdouble var, maxvar = -1; /* small enough */
 302    GLdouble teenth = 1.0 / n;
 303
 304    for (i = 0; i < nc; i++) {
 305       sx = sx2 = 0;
 306       for (k = 0; k < n; k++) {
 307          GLint t = input[k][i];
 308          sx += t;
 309          sx2 += t * t;
 310       }
 311       var = sx2 * teenth - sx * sx * teenth * teenth;
 312       if (maxvar < var) {
 313          maxvar = var;
 314          best = i;
 315       }
 316       if (variance) {
 317          variance[i] = var;
 318       }
 319    }
 320
 321    return best;
 322 }
 323
 324
 325 static GLint
 326 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 327              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 328 {
 329 #if 0
 330    /* Choose colors from a grid.
 331     */
 332    GLint i, j;
 333
 334    for (j = 0; j < nv; j++) {
 335       GLint m = j * (n - 1) / (nv - 1);
 336       for (i = 0; i < nc; i++) {
 337          vec[j][i] = input[m][i];
 338       }
 339    }
 340 #else
 341    /* Our solution here is to find the darkest and brightest colors in
 342     * the 8x4 tile and use those as the two representative colors.
 343     * There are probably better algorithms to use (histogram-based).
 344     */
 345    GLint i, j, k;
 346    GLint minSum = 2000; /* big enough */
 347    GLint maxSum = -1; /* small enough */
 348    GLint minCol = 0; /* phoudoin: silent compiler! */
 349    GLint maxCol = 0; /* phoudoin: silent compiler! */
 350
 351    struct {
 352       GLint flag;
 353       GLint key;
 354       GLint freq;
 355       GLint idx;
 356    } hist[N_TEXELS];
 357    GLint lenh = 0;
 358
 359    memset(hist, 0, sizeof(hist));
 360
 361    for (k = 0; k < n; k++) {
 362       GLint l;
 363       GLint key = 0;
 364       GLint sum = 0;
 365       for (i = 0; i < nc; i++) {
 366          key <<= 8;
 367          key |= input[k][i];
 368          sum += input[k][i];
 369       }
 370       for (l = 0; l < n; l++) {
 371          if (!hist[l].flag) {
 372             /* alloc new slot */
 373             hist[l].flag = !0;
 374             hist[l].key = key;
 375             hist[l].freq = 1;
 376             hist[l].idx = k;
 377             lenh = l + 1;
 378             break;
 379          } else if (hist[l].key == key) {
 380             hist[l].freq++;
 381             break;
 382          }
 383       }
 384       if (minSum > sum) {
 385          minSum = sum;
 386          minCol = k;
 387       }
 388       if (maxSum < sum) {
 389          maxSum = sum;
 390          maxCol = k;
 391       }
 392    }
 393
 394    if (lenh <= nv) {
 395       for (j = 0; j < lenh; j++) {
 396          for (i = 0; i < nc; i++) {
 397             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 398          }
 399       }
 400       for (; j < nv; j++) {
 401          for (i = 0; i < nc; i++) {
 402             vec[j][i] = vec[0][i];
 403          }
 404       }
 405       return 0;
 406    }
 407
 408    for (j = 0; j < nv; j++) {
 409       for (i = 0; i < nc; i++) {
 410          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 411       }
 412    }
 413 #endif
 414
 415    return !0;
 416 }
 417
 418
 419 static GLint
 420 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 421             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 422 {
 423    /* Use the generalized lloyd's algorithm for VQ:
 424     *     find 4 color vectors.
 425     *
 426     *     for each sample color
 427     *         sort to nearest vector.
 428     *
 429     *     replace each vector with the centroid of its matching colors.
 430     *
 431     *     repeat until RMS doesn't improve.
 432     *
 433     *     if a color vector has no samples, or becomes the same as another
 434     *     vector, replace it with the color which is farthest from a sample.
 435     *
 436     * vec[][MAX_COMP]           initial vectors and resulting colors
 437     * nv                        number of resulting colors required
 438     * input[N_TEXELS][MAX_COMP] input texels
 439     * nc                        number of components in input / vec
 440     * n                         number of input samples
 441     */
 442
 443    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 444    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 445    GLfloat error, lasterror = 1e9;
 446
 447    GLint i, j, k, rep;
 448
 449    /* the quantizer */
 450    for (rep = 0; rep < LL_N_REP; rep++) {
 451       /* reset sums & counters */
 452       for (j = 0; j < nv; j++) {
 453          for (i = 0; i < nc; i++) {
 454             sum[j][i] = 0;
 455          }
 456          cnt[j] = 0;
 457       }
 458       error = 0;
 459
 460       /* scan whole block */
 461       for (k = 0; k < n; k++) {
 462 #if 1
 463          GLint best = -1;
 464          GLfloat err = 1e9; /* big enough */
 465          /* determine best vector */
 466          for (j = 0; j < nv; j++) {
 467             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 468                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 469                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 470             if (nc == 4) {
 471                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 472             }
 473             if (e < err) {
 474                err = e;
 475                best = j;
 476             }
 477          }
 478 #else
 479          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 480 #endif
 481          assert(best >= 0);
 482          /* add in closest color */
 483          for (i = 0; i < nc; i++) {
 484             sum[best][i] += input[k][i];
 485          }
 486          /* mark this vector as used */
 487          cnt[best]++;
 488          /* accumulate error */
 489          error += err;
 490       }
 491
 492       /* check RMS */
 493       if ((error < LL_RMS_E) ||
 494           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 495          return !0; /* good match */
 496       }
 497       lasterror = error;
 498
 499       /* move each vector to the barycenter of its closest colors */
 500       for (j = 0; j < nv; j++) {
 501          if (cnt[j]) {
 502             GLfloat div = 1.0F / cnt[j];
 503             for (i = 0; i < nc; i++) {
 504                vec[j][i] = div * sum[j][i];
 505             }
 506          } else {
 507             /* this vec has no samples or is identical with a previous vec */
 508             GLint worst = fxt1_worst(vec[j], input, nc, n);
 509             for (i = 0; i < nc; i++) {
 510                vec[j][i] = input[worst][i];
 511             }
 512          }
 513       }
 514    }
 515
 516    return 0; /* could not converge fast enough */
 517 }
 518
 519
 520 static void
 521 fxt1_quantize_CHROMA (GLuint *cc,
 522                       GLubyte input[N_TEXELS][MAX_COMP])
 523 {
 524    const GLint n_vect = 4; /* 4 base vectors to find */
 525    const GLint n_comp = 3; /* 3 components: R, G, B */
 526    GLfloat vec[MAX_VECT][MAX_COMP];
 527    GLint i, j, k;
 528    Fx64 hi; /* high quadword */
 529    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 530
 531    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 532       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 533    }
 534
 535    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 536    for (j = n_vect - 1; j >= 0; j--) {
 537       for (i = 0; i < n_comp; i++) {
 538          /* add in colors */
 539          FX64_SHL(hi, 5);
 540          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 541       }
 542    }
 543    ((Fx64 *)cc)[1] = hi;
 544
 545    lohi = lolo = 0;
 546    /* right microtile */
 547    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 548       lohi <<= 2;
 549       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 550    }
 551    /* left microtile */
 552    for (; k >= 0; k--) {
 553       lolo <<= 2;
 554       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 555    }
 556    cc[1] = lohi;
 557    cc[0] = lolo;
 558 }
 559
 560
 561 static void
 562 fxt1_quantize_ALPHA0 (GLuint *cc,
 563                       GLubyte input[N_TEXELS][MAX_COMP],
 564                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 565 {
 566    const GLint n_vect = 3; /* 3 base vectors to find */
 567    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 568    GLfloat vec[MAX_VECT][MAX_COMP];
 569    GLint i, j, k;
 570    Fx64 hi; /* high quadword */
 571    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 572
 573    /* the last vector indicates zero */
 574    for (i = 0; i < n_comp; i++) {
 575       vec[n_vect][i] = 0;
 576    }
 577
 578    /* the first n texels in reord are guaranteed to be non-zero */
 579    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 580       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 581    }
 582
 583    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 584    for (j = n_vect - 1; j >= 0; j--) {
 585       /* add in alphas */
 586       FX64_SHL(hi, 5);
 587       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 588    }
 589    for (j = n_vect - 1; j >= 0; j--) {
 590       for (i = 0; i < n_comp - 1; i++) {
 591          /* add in colors */
 592          FX64_SHL(hi, 5);
 593          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 594       }
 595    }
 596    ((Fx64 *)cc)[1] = hi;
 597
 598    lohi = lolo = 0;
 599    /* right microtile */
 600    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 601       lohi <<= 2;
 602       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 603    }
 604    /* left microtile */
 605    for (; k >= 0; k--) {
 606       lolo <<= 2;
 607       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 608    }
 609    cc[1] = lohi;
 610    cc[0] = lolo;
 611 }
 612
 613
 614 static void
 615 fxt1_quantize_ALPHA1 (GLuint *cc,
 616                       GLubyte input[N_TEXELS][MAX_COMP])
 617 {
 618    const GLint n_vect = 3; /* highest vector number in each microtile */
 619    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 620    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 621    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 622    GLint i, j, k;
 623    Fx64 hi; /* high quadword */
 624    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 625
 626    GLint minSum;
 627    GLint maxSum;
 628    GLint minColL = 0, maxColL = 0;
 629    GLint minColR = 0, maxColR = 0;
 630    GLint sumL = 0, sumR = 0;
 631    GLint nn_comp;
 632    /* Our solution here is to find the darkest and brightest colors in
 633     * the 4x4 tile and use those as the two representative colors.
 634     * There are probably better algorithms to use (histogram-based).
 635     */
 636    nn_comp = n_comp;
 637    while ((minColL == maxColL) && nn_comp) {
 638        minSum = 2000; /* big enough */
 639        maxSum = -1; /* small enough */
 640        for (k = 0; k < N_TEXELS / 2; k++) {
 641            GLint sum = 0;
 642            for (i = 0; i < nn_comp; i++) {
 643                sum += input[k][i];
 644            }
 645            if (minSum > sum) {
 646                minSum = sum;
 647                minColL = k;
 648            }
 649            if (maxSum < sum) {
 650                maxSum = sum;
 651                maxColL = k;
 652            }
 653            sumL += sum;
 654        }
 655
 656        nn_comp--;
 657    }
 658
 659    nn_comp = n_comp;
 660    while ((minColR == maxColR) && nn_comp) {
 661        minSum = 2000; /* big enough */
 662        maxSum = -1; /* small enough */
 663        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 664            GLint sum = 0;
 665            for (i = 0; i < nn_comp; i++) {
 666                sum += input[k][i];
 667            }
 668            if (minSum > sum) {
 669                minSum = sum;
 670                minColR = k;
 671            }
 672            if (maxSum < sum) {
 673                maxSum = sum;
 674                maxColR = k;
 675            }
 676            sumR += sum;
 677        }
 678
 679        nn_comp--;
 680    }
 681
 682    /* choose the common vector (yuck!) */
 683    {
 684       GLint j1, j2;
 685       GLint v1 = 0, v2 = 0;
 686       GLfloat err = 1e9; /* big enough */
 687       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 688       for (i = 0; i < n_comp; i++) {
 689          tv[0][i] = input[minColL][i];
 690          tv[1][i] = input[maxColL][i];
 691          tv[2][i] = input[minColR][i];
 692          tv[3][i] = input[maxColR][i];
 693       }
 694       for (j1 = 0; j1 < 2; j1++) {
 695          for (j2 = 2; j2 < 4; j2++) {
 696             GLfloat e = 0.0F;
 697             for (i = 0; i < n_comp; i++) {
 698                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 699             }
 700             if (e < err) {
 701                err = e;
 702                v1 = j1;
 703                v2 = j2;
 704             }
 705          }
 706       }
 707       for (i = 0; i < n_comp; i++) {
 708          vec[0][i] = tv[1 - v1][i];
 709          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 710          vec[2][i] = tv[5 - v2][i];
 711       }
 712    }
 713
 714    /* left microtile */
 715    cc[0] = 0;
 716    if (minColL != maxColL) {
 717       /* compute interpolation vector */
 718       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 719
 720       /* add in texels */
 721       lolo = 0;
 722       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 723          GLint texel;
 724          /* interpolate color */
 725          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 726          /* add in texel */
 727          lolo <<= 2;
 728          lolo |= texel;
 729       }
 730
 731       cc[0] = lolo;
 732    }
 733
 734    /* right microtile */
 735    cc[1] = 0;
 736    if (minColR != maxColR) {
 737       /* compute interpolation vector */
 738       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 739
 740       /* add in texels */
 741       lohi = 0;
 742       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 743          GLint texel;
 744          /* interpolate color */
 745          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 746          /* add in texel */
 747          lohi <<= 2;
 748          lohi |= texel;
 749       }
 750
 751       cc[1] = lohi;
 752    }
 753
 754    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 755    for (j = n_vect - 1; j >= 0; j--) {
 756       /* add in alphas */
 757       FX64_SHL(hi, 5);
 758       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 759    }
 760    for (j = n_vect - 1; j >= 0; j--) {
 761       for (i = 0; i < n_comp - 1; i++) {
 762          /* add in colors */
 763          FX64_SHL(hi, 5);
 764          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 765       }
 766    }
 767    ((Fx64 *)cc)[1] = hi;
 768 }
 769
 770
 771 static void
 772 fxt1_quantize_HI (GLuint *cc,
 773                   GLubyte input[N_TEXELS][MAX_COMP],
 774                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 775 {
 776    const GLint n_vect = 6; /* highest vector number */
 777    const GLint n_comp = 3; /* 3 components: R, G, B */
 778    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 779    GLfloat iv[MAX_COMP];   /* interpolation vector */
 780    GLint i, k;
 781    GLuint hihi; /* high quadword: hi dword */
 782
 783    GLint minSum = 2000; /* big enough */
 784    GLint maxSum = -1; /* small enough */
 785    GLint minCol = 0; /* phoudoin: silent compiler! */
 786    GLint maxCol = 0; /* phoudoin: silent compiler! */
 787
 788    /* Our solution here is to find the darkest and brightest colors in
 789     * the 8x4 tile and use those as the two representative colors.
 790     * There are probably better algorithms to use (histogram-based).
 791     */
 792    for (k = 0; k < n; k++) {
 793       GLint sum = 0;
 794       for (i = 0; i < n_comp; i++) {
 795          sum += reord[k][i];
 796       }
 797       if (minSum > sum) {
 798          minSum = sum;
 799          minCol = k;
 800       }
 801       if (maxSum < sum) {
 802          maxSum = sum;
 803          maxCol = k;
 804       }
 805    }
 806
 807    hihi = 0; /* cc-hi = "00" */
 808    for (i = 0; i < n_comp; i++) {
 809       /* add in colors */
 810       hihi <<= 5;
 811       hihi |= reord[maxCol][i] >> 3;
 812    }
 813    for (i = 0; i < n_comp; i++) {
 814       /* add in colors */
 815       hihi <<= 5;
 816       hihi |= reord[minCol][i] >> 3;
 817    }
 818    cc[3] = hihi;
 819    cc[0] = cc[1] = cc[2] = 0;
 820
 821    /* compute interpolation vector */
 822    if (minCol != maxCol) {
 823       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 824    }
 825
 826    /* add in texels */
 827    for (k = N_TEXELS - 1; k >= 0; k--) {
 828       GLint t = k * 3;
 829       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 830       GLint texel = n_vect + 1; /* transparent black */
 831
 832       if (!ISTBLACK(input[k])) {
 833          if (minCol != maxCol) {
 834             /* interpolate color */
 835             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 836             /* add in texel */
 837             kk[0] |= texel << (t & 7);
 838          }
 839       } else {
 840          /* add in texel */
 841          kk[0] |= texel << (t & 7);
 842       }
 843    }
 844 }
 845
 846
 847 static void
 848 fxt1_quantize_MIXED1 (GLuint *cc,
 849                       GLubyte input[N_TEXELS][MAX_COMP])
 850 {
 851    const GLint n_vect = 2; /* highest vector number in each microtile */
 852    const GLint n_comp = 3; /* 3 components: R, G, B */
 853    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 854    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 855    GLint i, j, k;
 856    Fx64 hi; /* high quadword */
 857    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 858
 859    GLint minSum;
 860    GLint maxSum;
 861    GLint minColL = 0, maxColL = -1;
 862    GLint minColR = 0, maxColR = -1;
 863
 864    /* Our solution here is to find the darkest and brightest colors in
 865     * the 4x4 tile and use those as the two representative colors.
 866     * There are probably better algorithms to use (histogram-based).
 867     */
 868    minSum = 2000; /* big enough */
 869    maxSum = -1; /* small enough */
 870    for (k = 0; k < N_TEXELS / 2; k++) {
 871       if (!ISTBLACK(input[k])) {
 872          GLint sum = 0;
 873          for (i = 0; i < n_comp; i++) {
 874             sum += input[k][i];
 875          }
 876          if (minSum > sum) {
 877             minSum = sum;
 878             minColL = k;
 879          }
 880          if (maxSum < sum) {
 881             maxSum = sum;
 882             maxColL = k;
 883          }
 884       }
 885    }
 886    minSum = 2000; /* big enough */
 887    maxSum = -1; /* small enough */
 888    for (; k < N_TEXELS; k++) {
 889       if (!ISTBLACK(input[k])) {
 890          GLint sum = 0;
 891          for (i = 0; i < n_comp; i++) {
 892             sum += input[k][i];
 893          }
 894          if (minSum > sum) {
 895             minSum = sum;
 896             minColR = k;
 897          }
 898          if (maxSum < sum) {
 899             maxSum = sum;
 900             maxColR = k;
 901          }
 902       }
 903    }
 904
 905    /* left microtile */
 906    if (maxColL == -1) {
 907       /* all transparent black */
 908       cc[0] = ~0u;
 909       for (i = 0; i < n_comp; i++) {
 910          vec[0][i] = 0;
 911          vec[1][i] = 0;
 912       }
 913    } else {
 914       cc[0] = 0;
 915       for (i = 0; i < n_comp; i++) {
 916          vec[0][i] = input[minColL][i];
 917          vec[1][i] = input[maxColL][i];
 918       }
 919       if (minColL != maxColL) {
 920          /* compute interpolation vector */
 921          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 922
 923          /* add in texels */
 924          lolo = 0;
 925          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 926             GLint texel = n_vect + 1; /* transparent black */
 927             if (!ISTBLACK(input[k])) {
 928                /* interpolate color */
 929                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 930             }
 931             /* add in texel */
 932             lolo <<= 2;
 933             lolo |= texel;
 934          }
 935          cc[0] = lolo;
 936       }
 937    }
 938
 939    /* right microtile */
 940    if (maxColR == -1) {
 941       /* all transparent black */
 942       cc[1] = ~0u;
 943       for (i = 0; i < n_comp; i++) {
 944          vec[2][i] = 0;
 945          vec[3][i] = 0;
 946       }
 947    } else {
 948       cc[1] = 0;
 949       for (i = 0; i < n_comp; i++) {
 950          vec[2][i] = input[minColR][i];
 951          vec[3][i] = input[maxColR][i];
 952       }
 953       if (minColR != maxColR) {
 954          /* compute interpolation vector */
 955          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 956
 957          /* add in texels */
 958          lohi = 0;
 959          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 960             GLint texel = n_vect + 1; /* transparent black */
 961             if (!ISTBLACK(input[k])) {
 962                /* interpolate color */
 963                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 964             }
 965             /* add in texel */
 966             lohi <<= 2;
 967             lohi |= texel;
 968          }
 969          cc[1] = lohi;
 970       }
 971    }
 972
 973    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 974    for (j = 2 * 2 - 1; j >= 0; j--) {
 975       for (i = 0; i < n_comp; i++) {
 976          /* add in colors */
 977          FX64_SHL(hi, 5);
 978          FX64_OR32(hi, vec[j][i] >> 3);
 979       }
 980    }
 981    ((Fx64 *)cc)[1] = hi;
 982 }
 983
 984
 985 static void
 986 fxt1_quantize_MIXED0 (GLuint *cc,
 987                       GLubyte input[N_TEXELS][MAX_COMP])
 988 {
 989    const GLint n_vect = 3; /* highest vector number in each microtile */
 990    const GLint n_comp = 3; /* 3 components: R, G, B */
 991    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 992    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 993    GLint i, j, k;
 994    Fx64 hi; /* high quadword */
 995    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 996
 997    GLint minColL = 0, maxColL = 0;
 998    GLint minColR = 0, maxColR = 0;
 999 #if 0
1000    GLint minSum;
1001    GLint maxSum;
1002
1003    /* Our solution here is to find the darkest and brightest colors in
1004     * the 4x4 tile and use those as the two representative colors.
1005     * There are probably better algorithms to use (histogram-based).
1006     */
1007    minSum = 2000; /* big enough */
1008    maxSum = -1; /* small enough */
1009    for (k = 0; k < N_TEXELS / 2; k++) {
1010       GLint sum = 0;
1011       for (i = 0; i < n_comp; i++) {
1012          sum += input[k][i];
1013       }
1014       if (minSum > sum) {
1015          minSum = sum;
1016          minColL = k;
1017       }
1018       if (maxSum < sum) {
1019          maxSum = sum;
1020          maxColL = k;
1021       }
1022    }
1023    minSum = 2000; /* big enough */
1024    maxSum = -1; /* small enough */
1025    for (; k < N_TEXELS; k++) {
1026       GLint sum = 0;
1027       for (i = 0; i < n_comp; i++) {
1028          sum += input[k][i];
1029       }
1030       if (minSum > sum) {
1031          minSum = sum;
1032          minColR = k;
1033       }
1034       if (maxSum < sum) {
1035          maxSum = sum;
1036          maxColR = k;
1037       }
1038    }
1039 #else
1040    GLint minVal;
1041    GLint maxVal;
1042    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1043    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1044
1045    /* Scan the channel with max variance for lo & hi
1046     * and use those as the two representative colors.
1047     */
1048    minVal = 2000; /* big enough */
1049    maxVal = -1; /* small enough */
1050    for (k = 0; k < N_TEXELS / 2; k++) {
1051       GLint t = input[k][maxVarL];
1052       if (minVal > t) {
1053          minVal = t;
1054          minColL = k;
1055       }
1056       if (maxVal < t) {
1057          maxVal = t;
1058          maxColL = k;
1059       }
1060    }
1061    minVal = 2000; /* big enough */
1062    maxVal = -1; /* small enough */
1063    for (; k < N_TEXELS; k++) {
1064       GLint t = input[k][maxVarR];
1065       if (minVal > t) {
1066          minVal = t;
1067          minColR = k;
1068       }
1069       if (maxVal < t) {
1070          maxVal = t;
1071          maxColR = k;
1072       }
1073    }
1074 #endif
1075
1076    /* left microtile */
1077    cc[0] = 0;
1078    for (i = 0; i < n_comp; i++) {
1079       vec[0][i] = input[minColL][i];
1080       vec[1][i] = input[maxColL][i];
1081    }
1082    if (minColL != maxColL) {
1083       /* compute interpolation vector */
1084       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1085
1086       /* add in texels */
1087       lolo = 0;
1088       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1089          GLint texel;
1090          /* interpolate color */
1091          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1092          /* add in texel */
1093          lolo <<= 2;
1094          lolo |= texel;
1095       }
1096
1097       /* funky encoding for LSB of green */
1098       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1099          for (i = 0; i < n_comp; i++) {
1100             vec[1][i] = input[minColL][i];
1101             vec[0][i] = input[maxColL][i];
1102          }
1103          lolo = ~lolo;
1104       }
1105
1106       cc[0] = lolo;
1107    }
1108
1109    /* right microtile */
1110    cc[1] = 0;
1111    for (i = 0; i < n_comp; i++) {
1112       vec[2][i] = input[minColR][i];
1113       vec[3][i] = input[maxColR][i];
1114    }
1115    if (minColR != maxColR) {
1116       /* compute interpolation vector */
1117       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1118
1119       /* add in texels */
1120       lohi = 0;
1121       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1122          GLint texel;
1123          /* interpolate color */
1124          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1125          /* add in texel */
1126          lohi <<= 2;
1127          lohi |= texel;
1128       }
1129
1130       /* funky encoding for LSB of green */
1131       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1132          for (i = 0; i < n_comp; i++) {
1133             vec[3][i] = input[minColR][i];
1134             vec[2][i] = input[maxColR][i];
1135          }
1136          lohi = ~lohi;
1137       }
1138
1139       cc[1] = lohi;
1140    }
1141
1142    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1143    for (j = 2 * 2 - 1; j >= 0; j--) {
1144       for (i = 0; i < n_comp; i++) {
1145          /* add in colors */
1146          FX64_SHL(hi, 5);
1147          FX64_OR32(hi, vec[j][i] >> 3);
1148       }
1149    }
1150    ((Fx64 *)cc)[1] = hi;
1151 }
1152
1153
1154 static void
1155 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1156 {
1157    GLint trualpha;
1158    GLubyte reord[N_TEXELS][MAX_COMP];
1159
1160    GLubyte input[N_TEXELS][MAX_COMP];
1161    GLint i, k, l;
1162
1163    if (comps == 3) {
1164       /* make the whole block opaque */
1165       memset(input, -1, sizeof(input));
1166    }
1167
1168    /* 8 texels each line */
1169    for (l = 0; l < 4; l++) {
1170       for (k = 0; k < 4; k++) {
1171          for (i = 0; i < comps; i++) {
1172             input[k + l * 4][i] = *lines[l]++;
1173          }
1174       }
1175       for (; k < 8; k++) {
1176          for (i = 0; i < comps; i++) {
1177             input[k + l * 4 + 12][i] = *lines[l]++;
1178          }
1179       }
1180    }
1181
1182    /* block layout:
1183     * 00, 01, 02, 03, 08, 09, 0a, 0b
1184     * 10, 11, 12, 13, 18, 19, 1a, 1b
1185     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1186     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1187     */
1188
1189    /* [dBorca]
1190     * stupidity flows forth from this
1191     */
1192    l = N_TEXELS;
1193    trualpha = 0;
1194    if (comps == 4) {
1195       /* skip all transparent black texels */
1196       l = 0;
1197       for (k = 0; k < N_TEXELS; k++) {
1198          /* test all components against 0 */
1199          if (!ISTBLACK(input[k])) {
1200             /* texel is not transparent black */
1201             COPY_4UBV(reord[l], input[k]);
1202             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1203                /* non-opaque texel */
1204                trualpha = !0;
1205             }
1206             l++;
1207          }
1208       }
1209    }
1210
1211 #if 0
1212    if (trualpha) {
1213       fxt1_quantize_ALPHA0(cc, input, reord, l);
1214    } else if (l == 0) {
1215       cc[0] = cc[1] = cc[2] = -1;
1216       cc[3] = 0;
1217    } else if (l < N_TEXELS) {
1218       fxt1_quantize_HI(cc, input, reord, l);
1219    } else {
1220       fxt1_quantize_CHROMA(cc, input);
1221    }
1222    (void)fxt1_quantize_ALPHA1;
1223    (void)fxt1_quantize_MIXED1;
1224    (void)fxt1_quantize_MIXED0;
1225 #else
1226    if (trualpha) {
1227       fxt1_quantize_ALPHA1(cc, input);
1228    } else if (l == 0) {
1229       cc[0] = cc[1] = cc[2] = ~0u;
1230       cc[3] = 0;
1231    } else if (l < N_TEXELS) {
1232       fxt1_quantize_MIXED1(cc, input);
1233    } else {
1234       fxt1_quantize_MIXED0(cc, input);
1235    }
1236    (void)fxt1_quantize_ALPHA0;
1237    (void)fxt1_quantize_HI;
1238    (void)fxt1_quantize_CHROMA;
1239 #endif
1240 }
1241
1242
1243
1244 /**
1245  * Upscale an image by replication, not (typical) stretching.
1246  * We use this when the image width or height is less than a
1247  * certain size (4, 8) and we need to upscale an image.
1248  */
1249 static void
1250 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1251                    GLsizei outWidth, GLsizei outHeight,
1252                    GLint comps, const GLubyte *src, GLint srcRowStride,
1253                    GLubyte *dest )
1254 {
1255    GLint i, j, k;
1256
1257    ASSERT(outWidth >= inWidth);
1258    ASSERT(outHeight >= inHeight);
1259 #if 0
1260    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1261    ASSERT((outWidth & 3) == 0);
1262    ASSERT((outHeight & 3) == 0);
1263 #endif
1264
1265    for (i = 0; i < outHeight; i++) {
1266       const GLint ii = i % inHeight;
1267       for (j = 0; j < outWidth; j++) {
1268          const GLint jj = j % inWidth;
1269          for (k = 0; k < comps; k++) {
1270             dest[(i * outWidth + j) * comps + k]
1271                = src[ii * srcRowStride + jj * comps + k];
1272          }
1273       }
1274    }
1275 }
1276
1277
1278 static void
1279 fxt1_encode (GLuint width, GLuint height, GLint comps,
1280              const void *source, GLint srcRowStride,
1281              void *dest, GLint destRowStride)
1282 {
1283    GLuint x, y;
1284    const GLubyte *data;
1285    GLuint *encoded = (GLuint *)dest;
1286    void *newSource = NULL;
1287
1288    assert(comps == 3 || comps == 4);
1289
1290    /* Replicate image if width is not M8 or height is not M4 */
1291    if ((width & 7) | (height & 3)) {
1292       GLint newWidth = (width + 7) & ~7;
1293       GLint newHeight = (height + 3) & ~3;
1294       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1295       if (!newSource) {
1296          GET_CURRENT_CONTEXT(ctx);
1297          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1298          goto cleanUp;
1299       }
1300       upscale_teximage2d(width, height, newWidth, newHeight,
1301                          comps, (const GLubyte *) source,
1302                          srcRowStride, (GLubyte *) newSource);
1303       source = newSource;
1304       width = newWidth;
1305       height = newHeight;
1306       srcRowStride = comps * newWidth;
1307    }
1308
1309    data = (const GLubyte *) source;
1310    destRowStride = (destRowStride - width * 2) / 4;
1311    for (y = 0; y < height; y += 4) {
1312       GLuint offs = 0 + (y + 0) * srcRowStride;
1313       for (x = 0; x < width; x += 8) {
1314          const GLubyte *lines[4];
1315          lines[0] = &data[offs];
1316          lines[1] = lines[0] + srcRowStride;
1317          lines[2] = lines[1] + srcRowStride;
1318          lines[3] = lines[2] + srcRowStride;
1319          offs += 8 * comps;
1320          fxt1_quantize(encoded, lines, comps);
1321          /* 128 bits per 8x4 block */
1322          encoded += 4;
1323       }
1324       encoded += destRowStride;
1325    }
1326
1327  cleanUp:
1328    free(newSource);
1329 }
1330
1331
1332 /***************************************************************************\
1333  * FXT1 decoder
1334  *
1335  * The decoder is based on GL_3DFX_texture_compression_FXT1
1336  * specification and serves as a concept for the encoder.
1337 \***************************************************************************/
1338
1339
1340 /* lookup table for scaling 5 bit colors up to 8 bits */
1341 static const GLubyte _rgb_scale_5[] = {
1342    0,   8,   16,  25,  33,  41,  49,  58,
1343    66,  74,  82,  90,  99,  107, 115, 123,
1344    132, 140, 148, 156, 165, 173, 181, 189,
1345    197, 206, 214, 222, 230, 239, 247, 255
1346 };
1347
1348 /* lookup table for scaling 6 bit colors up to 8 bits */
1349 static const GLubyte _rgb_scale_6[] = {
1350    0,   4,   8,   12,  16,  20,  24,  28,
1351    32,  36,  40,  45,  49,  53,  57,  61,
1352    65,  69,  73,  77,  81,  85,  89,  93,
1353    97,  101, 105, 109, 113, 117, 121, 125,
1354    130, 134, 138, 142, 146, 150, 154, 158,
1355    162, 166, 170, 174, 178, 182, 186, 190,
1356    194, 198, 202, 206, 210, 215, 219, 223,
1357    227, 231, 235, 239, 243, 247, 251, 255
1358 };
1359
1360
1361 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1362 #define UP5(c) _rgb_scale_5[(c) & 31]
1363 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1364 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1365
1366
1367 static void
1368 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1369 {
1370    const GLuint *cc;
1371
1372    t *= 3;
1373    cc = (const GLuint *)(code + t / 8);
1374    t = (cc[0] >> (t & 7)) & 7;
1375
1376    if (t == 7) {
1377       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1378    } else {
1379       GLubyte r, g, b;
1380       cc = (const GLuint *)(code + 12);
1381       if (t == 0) {
1382          b = UP5(CC_SEL(cc, 0));
1383          g = UP5(CC_SEL(cc, 5));
1384          r = UP5(CC_SEL(cc, 10));
1385       } else if (t == 6) {
1386          b = UP5(CC_SEL(cc, 15));
1387          g = UP5(CC_SEL(cc, 20));
1388          r = UP5(CC_SEL(cc, 25));
1389       } else {
1390          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1391          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1392          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1393       }
1394       rgba[RCOMP] = r;
1395       rgba[GCOMP] = g;
1396       rgba[BCOMP] = b;
1397       rgba[ACOMP] = 255;
1398    }
1399 }
1400
1401
1402 static void
1403 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1404 {
1405    const GLuint *cc;
1406    GLuint kk;
1407
1408    cc = (const GLuint *)code;
1409    if (t & 16) {
1410       cc++;
1411       t &= 15;
1412    }
1413    t = (cc[0] >> (t * 2)) & 3;
1414
1415    t *= 15;
1416    cc = (const GLuint *)(code + 8 + t / 8);
1417    kk = cc[0] >> (t & 7);
1418    rgba[BCOMP] = UP5(kk);
1419    rgba[GCOMP] = UP5(kk >> 5);
1420    rgba[RCOMP] = UP5(kk >> 10);
1421    rgba[ACOMP] = 255;
1422 }
1423
1424
1425 static void
1426 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1427 {
1428    const GLuint *cc;
1429    GLuint col[2][3];
1430    GLint glsb, selb;
1431
1432    cc = (const GLuint *)code;
1433    if (t & 16) {
1434       t &= 15;
1435       t = (cc[1] >> (t * 2)) & 3;
1436       /* col 2 */
1437       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1438       col[0][GCOMP] = CC_SEL(cc, 99);
1439       col[0][RCOMP] = CC_SEL(cc, 104);
1440       /* col 3 */
1441       col[1][BCOMP] = CC_SEL(cc, 109);
1442       col[1][GCOMP] = CC_SEL(cc, 114);
1443       col[1][RCOMP] = CC_SEL(cc, 119);
1444       glsb = CC_SEL(cc, 126);
1445       selb = CC_SEL(cc, 33);
1446    } else {
1447       t = (cc[0] >> (t * 2)) & 3;
1448       /* col 0 */
1449       col[0][BCOMP] = CC_SEL(cc, 64);
1450       col[0][GCOMP] = CC_SEL(cc, 69);
1451       col[0][RCOMP] = CC_SEL(cc, 74);
1452       /* col 1 */
1453       col[1][BCOMP] = CC_SEL(cc, 79);
1454       col[1][GCOMP] = CC_SEL(cc, 84);
1455       col[1][RCOMP] = CC_SEL(cc, 89);
1456       glsb = CC_SEL(cc, 125);
1457       selb = CC_SEL(cc, 1);
1458    }
1459
1460    if (CC_SEL(cc, 124) & 1) {
1461       /* alpha[0] == 1 */
1462
1463       if (t == 3) {
1464          /* zero */
1465          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1466       } else {
1467          GLubyte r, g, b;
1468          if (t == 0) {
1469             b = UP5(col[0][BCOMP]);
1470             g = UP5(col[0][GCOMP]);
1471             r = UP5(col[0][RCOMP]);
1472          } else if (t == 2) {
1473             b = UP5(col[1][BCOMP]);
1474             g = UP6(col[1][GCOMP], glsb);
1475             r = UP5(col[1][RCOMP]);
1476          } else {
1477             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1478             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1479             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1480          }
1481          rgba[RCOMP] = r;
1482          rgba[GCOMP] = g;
1483          rgba[BCOMP] = b;
1484          rgba[ACOMP] = 255;
1485       }
1486    } else {
1487       /* alpha[0] == 0 */
1488       GLubyte r, g, b;
1489       if (t == 0) {
1490          b = UP5(col[0][BCOMP]);
1491          g = UP6(col[0][GCOMP], glsb ^ selb);
1492          r = UP5(col[0][RCOMP]);
1493       } else if (t == 3) {
1494          b = UP5(col[1][BCOMP]);
1495          g = UP6(col[1][GCOMP], glsb);
1496          r = UP5(col[1][RCOMP]);
1497       } else {
1498          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1499          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1500                         UP6(col[1][GCOMP], glsb));
1501          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1502       }
1503       rgba[RCOMP] = r;
1504       rgba[GCOMP] = g;
1505       rgba[BCOMP] = b;
1506       rgba[ACOMP] = 255;
1507    }
1508 }
1509
1510
1511 static void
1512 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1513 {
1514    const GLuint *cc;
1515    GLubyte r, g, b, a;
1516
1517    cc = (const GLuint *)code;
1518    if (CC_SEL(cc, 124) & 1) {
1519       /* lerp == 1 */
1520       GLuint col0[4];
1521
1522       if (t & 16) {
1523          t &= 15;
1524          t = (cc[1] >> (t * 2)) & 3;
1525          /* col 2 */
1526          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1527          col0[GCOMP] = CC_SEL(cc, 99);
1528          col0[RCOMP] = CC_SEL(cc, 104);
1529          col0[ACOMP] = CC_SEL(cc, 119);
1530       } else {
1531          t = (cc[0] >> (t * 2)) & 3;
1532          /* col 0 */
1533          col0[BCOMP] = CC_SEL(cc, 64);
1534          col0[GCOMP] = CC_SEL(cc, 69);
1535          col0[RCOMP] = CC_SEL(cc, 74);
1536          col0[ACOMP] = CC_SEL(cc, 109);
1537       }
1538
1539       if (t == 0) {
1540          b = UP5(col0[BCOMP]);
1541          g = UP5(col0[GCOMP]);
1542          r = UP5(col0[RCOMP]);
1543          a = UP5(col0[ACOMP]);
1544       } else if (t == 3) {
1545          b = UP5(CC_SEL(cc, 79));
1546          g = UP5(CC_SEL(cc, 84));
1547          r = UP5(CC_SEL(cc, 89));
1548          a = UP5(CC_SEL(cc, 114));
1549       } else {
1550          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1551          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1552          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1553          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1554       }
1555    } else {
1556       /* lerp == 0 */
1557
1558       if (t & 16) {
1559          cc++;
1560          t &= 15;
1561       }
1562       t = (cc[0] >> (t * 2)) & 3;
1563
1564       if (t == 3) {
1565          /* zero */
1566          r = g = b = a = 0;
1567       } else {
1568          GLuint kk;
1569          cc = (const GLuint *)code;
1570          a = UP5(cc[3] >> (t * 5 + 13));
1571          t *= 15;
1572          cc = (const GLuint *)(code + 8 + t / 8);
1573          kk = cc[0] >> (t & 7);
1574          b = UP5(kk);
1575          g = UP5(kk >> 5);
1576          r = UP5(kk >> 10);
1577       }
1578    }
1579    rgba[RCOMP] = r;
1580    rgba[GCOMP] = g;
1581    rgba[BCOMP] = b;
1582    rgba[ACOMP] = a;
1583 }
1584
1585
1586 static void
1587 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1588                GLint i, GLint j, GLubyte *rgba)
1589 {
1590    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1591       fxt1_decode_1HI,     /* cc-high   = "00?" */
1592       fxt1_decode_1HI,     /* cc-high   = "00?" */
1593       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1594       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1595       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1596       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1597       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1598       fxt1_decode_1MIXED   /* mixed     = "1??" */
1599    };
1600
1601    const GLubyte *code = (const GLubyte *)texture +
1602                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1603    GLint mode = CC_SEL(code, 125);
1604    GLint t = i & 7;
1605
1606    if (t & 4) {
1607       t += 12;
1608    }
1609    t += (j & 3) * 4;
1610
1611    decode_1[mode](code, t, rgba);
1612 }
1613
1614
1615
1616
1617 static void
1618 fetch_rgb_fxt1(const GLubyte *map,
1619                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1620 {
1621    GLubyte rgba[4];
1622    fxt1_decode_1(map, rowStride, i, j, rgba);
1623    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1624    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1625    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1626    texel[ACOMP] = 1.0F;
1627 }
1628
1629
1630 static void
1631 fetch_rgba_fxt1(const GLubyte *map,
1632                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1633 {
1634    GLubyte rgba[4];
1635    fxt1_decode_1(map, rowStride, i, j, rgba);
1636    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1637    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1638    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1639    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1640 }
1641
1642
1643 compressed_fetch_func
1644 _mesa_get_fxt_fetch_func(gl_format format)
1645 {
1646    switch (format) {
1647    case MESA_FORMAT_RGB_FXT1:
1648       return fetch_rgb_fxt1;
1649    case MESA_FORMAT_RGBA_FXT1:
1650       return fetch_rgba_fxt1;
1651    default:
1652       return NULL;
1653    }
1654 }