src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mfeatures.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texstore.h"
  42
  43
  44 static void
  45 fxt1_encode (GLuint width, GLuint height, GLint comps,
  46              const void *source, GLint srcRowStride,
  47              void *dest, GLint destRowStride);
  48
  49 static void
  50 fxt1_decode_1 (const void *texture, GLint stride,
  51                GLint i, GLint j, GLubyte *rgba);
  52
  53
  54 /**
  55  * Store user's image in rgb_fxt1 format.
  56  */
  57 GLboolean
  58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  59 {
  60    const GLubyte *pixels;
  61    GLint srcRowStride;
  62    GLubyte *dst;
  63    const GLubyte *tempImage = NULL;
  64
  65    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  66
  67    if (srcFormat != GL_RGB ||
  68        srcType != GL_UNSIGNED_BYTE ||
  69        ctx->_ImageTransferState ||
  70        srcPacking->RowLength != srcWidth ||
  71        srcPacking->SwapBytes) {
  72       /* convert image to RGB/GLubyte */
  73       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
  74                                              baseInternalFormat,
  75                                              _mesa_get_format_base_format(dstFormat),
  76                                              srcWidth, srcHeight, srcDepth,
  77                                              srcFormat, srcType, srcAddr,
  78                                              srcPacking);
  79       if (!tempImage)
  80          return GL_FALSE; /* out of memory */
  81       pixels = tempImage;
  82       srcRowStride = 3 * srcWidth;
  83       srcFormat = GL_RGB;
  84    }
  85    else {
  86       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  87                                      srcFormat, srcType, 0, 0);
  88
  89       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  90                                             srcType) / sizeof(GLubyte);
  91    }
  92
  93    dst = dstSlices[0];
  94
  95    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
  96                dst, dstRowStride);
  97
  98    free((void*) tempImage);
  99
 100    return GL_TRUE;
 101 }
 102
 103
 104 /**
 105  * Store user's image in rgba_fxt1 format.
 106  */
 107 GLboolean
 108 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 109 {
 110    const GLubyte *pixels;
 111    GLint srcRowStride;
 112    GLubyte *dst;
 113    const GLubyte *tempImage = NULL;
 114
 115    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 116
 117    if (srcFormat != GL_RGBA ||
 118        srcType != GL_UNSIGNED_BYTE ||
 119        ctx->_ImageTransferState ||
 120        srcPacking->SwapBytes) {
 121       /* convert image to RGBA/GLubyte */
 122       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
 123                                              baseInternalFormat,
 124                                              _mesa_get_format_base_format(dstFormat),
 125                                              srcWidth, srcHeight, srcDepth,
 126                                              srcFormat, srcType, srcAddr,
 127                                              srcPacking);
 128       if (!tempImage)
 129          return GL_FALSE; /* out of memory */
 130       pixels = tempImage;
 131       srcRowStride = 4 * srcWidth;
 132       srcFormat = GL_RGBA;
 133    }
 134    else {
 135       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 136                                      srcFormat, srcType, 0, 0);
 137
 138       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 139                                             srcType) / sizeof(GLubyte);
 140    }
 141
 142    dst = dstSlices[0];
 143
 144    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 145                dst, dstRowStride);
 146
 147    free((void*) tempImage);
 148
 149    return GL_TRUE;
 150 }
 151
 152
 153 /***************************************************************************\
 154  * FXT1 encoder
 155  *
 156  * The encoder was built by reversing the decoder,
 157  * and is vaguely based on Texus2 by 3dfx. Note that this code
 158  * is merely a proof of concept, since it is highly UNoptimized;
 159  * moreover, it is sub-optimal due to initial conditions passed
 160  * to Lloyd's algorithm (the interpolation modes are even worse).
 161 \***************************************************************************/
 162
 163
 164 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 165 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 166 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 167 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 168 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 169 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 170 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 171 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 172
 173
 174 /*
 175  * Define a 64-bit unsigned integer type and macros
 176  */
 177 #if 1
 178
 179 #define FX64_NATIVE 1
 180
 181 typedef uint64_t Fx64;
 182
 183 #define FX64_MOV32(a, b) a = b
 184 #define FX64_OR32(a, b)  a |= b
 185 #define FX64_SHL(a, c)   a <<= c
 186
 187 #else
 188
 189 #define FX64_NATIVE 0
 190
 191 typedef struct {
 192    GLuint lo, hi;
 193 } Fx64;
 194
 195 #define FX64_MOV32(a, b) a.lo = b
 196 #define FX64_OR32(a, b)  a.lo |= b
 197
 198 #define FX64_SHL(a, c)                                 \
 199    do {                                                \
 200        if ((c) >= 32) {                                \
 201           a.hi = a.lo << ((c) - 32);                   \
 202           a.lo = 0;                                    \
 203        } else {                                        \
 204           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 205           a.lo <<= (c);                                \
 206        }                                               \
 207    } while (0)
 208
 209 #endif
 210
 211
 212 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 213 #define SAFECDOT 1 /* for paranoids */
 214
 215 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 216    do {                                  \
 217       /* compute interpolation vector */ \
 218       GLfloat d2 = 0.0F;                 \
 219       GLfloat rd2;                       \
 220                                          \
 221       for (i = 0; i < NC; i++) {         \
 222          IV[i] = (V1[i] - V0[i]) * F(i); \
 223          d2 += IV[i] * IV[i];            \
 224       }                                  \
 225       rd2 = (GLfloat)NV / d2;            \
 226       B = 0;                             \
 227       for (i = 0; i < NC; i++) {         \
 228          IV[i] *= F(i);                  \
 229          B -= IV[i] * V0[i];             \
 230          IV[i] *= rd2;                   \
 231       }                                  \
 232       B = B * rd2 + 0.5f;                \
 233    } while (0)
 234
 235 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 236    do {                                  \
 237       GLfloat dot = 0.0F;                \
 238       for (i = 0; i < NC; i++) {         \
 239          dot += V[i] * IV[i];            \
 240       }                                  \
 241       TEXEL = (GLint)(dot + B);          \
 242       if (SAFECDOT) {                    \
 243          if (TEXEL < 0) {                \
 244             TEXEL = 0;                   \
 245          } else if (TEXEL > NV) {        \
 246             TEXEL = NV;                  \
 247          }                               \
 248       }                                  \
 249    } while (0)
 250
 251
 252 static GLint
 253 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 254               GLubyte input[MAX_COMP], GLint nc)
 255 {
 256    GLint i, j, best = -1;
 257    GLfloat err = 1e9; /* big enough */
 258
 259    for (j = 0; j < nv; j++) {
 260       GLfloat e = 0.0F;
 261       for (i = 0; i < nc; i++) {
 262          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 263       }
 264       if (e < err) {
 265          err = e;
 266          best = j;
 267       }
 268    }
 269
 270    return best;
 271 }
 272
 273
 274 static GLint
 275 fxt1_worst (GLfloat vec[MAX_COMP],
 276             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 277 {
 278    GLint i, k, worst = -1;
 279    GLfloat err = -1.0F; /* small enough */
 280
 281    for (k = 0; k < n; k++) {
 282       GLfloat e = 0.0F;
 283       for (i = 0; i < nc; i++) {
 284          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 285       }
 286       if (e > err) {
 287          err = e;
 288          worst = k;
 289       }
 290    }
 291
 292    return worst;
 293 }
 294
 295
 296 static GLint
 297 fxt1_variance (GLdouble variance[MAX_COMP],
 298                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 299 {
 300    GLint i, k, best = 0;
 301    GLint sx, sx2;
 302    GLdouble var, maxvar = -1; /* small enough */
 303    GLdouble teenth = 1.0 / n;
 304
 305    for (i = 0; i < nc; i++) {
 306       sx = sx2 = 0;
 307       for (k = 0; k < n; k++) {
 308          GLint t = input[k][i];
 309          sx += t;
 310          sx2 += t * t;
 311       }
 312       var = sx2 * teenth - sx * sx * teenth * teenth;
 313       if (maxvar < var) {
 314          maxvar = var;
 315          best = i;
 316       }
 317       if (variance) {
 318          variance[i] = var;
 319       }
 320    }
 321
 322    return best;
 323 }
 324
 325
 326 static GLint
 327 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 328              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 329 {
 330 #if 0
 331    /* Choose colors from a grid.
 332     */
 333    GLint i, j;
 334
 335    for (j = 0; j < nv; j++) {
 336       GLint m = j * (n - 1) / (nv - 1);
 337       for (i = 0; i < nc; i++) {
 338          vec[j][i] = input[m][i];
 339       }
 340    }
 341 #else
 342    /* Our solution here is to find the darkest and brightest colors in
 343     * the 8x4 tile and use those as the two representative colors.
 344     * There are probably better algorithms to use (histogram-based).
 345     */
 346    GLint i, j, k;
 347    GLint minSum = 2000; /* big enough */
 348    GLint maxSum = -1; /* small enough */
 349    GLint minCol = 0; /* phoudoin: silent compiler! */
 350    GLint maxCol = 0; /* phoudoin: silent compiler! */
 351
 352    struct {
 353       GLint flag;
 354       GLint key;
 355       GLint freq;
 356       GLint idx;
 357    } hist[N_TEXELS];
 358    GLint lenh = 0;
 359
 360    memset(hist, 0, sizeof(hist));
 361
 362    for (k = 0; k < n; k++) {
 363       GLint l;
 364       GLint key = 0;
 365       GLint sum = 0;
 366       for (i = 0; i < nc; i++) {
 367          key <<= 8;
 368          key |= input[k][i];
 369          sum += input[k][i];
 370       }
 371       for (l = 0; l < n; l++) {
 372          if (!hist[l].flag) {
 373             /* alloc new slot */
 374             hist[l].flag = !0;
 375             hist[l].key = key;
 376             hist[l].freq = 1;
 377             hist[l].idx = k;
 378             lenh = l + 1;
 379             break;
 380          } else if (hist[l].key == key) {
 381             hist[l].freq++;
 382             break;
 383          }
 384       }
 385       if (minSum > sum) {
 386          minSum = sum;
 387          minCol = k;
 388       }
 389       if (maxSum < sum) {
 390          maxSum = sum;
 391          maxCol = k;
 392       }
 393    }
 394
 395    if (lenh <= nv) {
 396       for (j = 0; j < lenh; j++) {
 397          for (i = 0; i < nc; i++) {
 398             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 399          }
 400       }
 401       for (; j < nv; j++) {
 402          for (i = 0; i < nc; i++) {
 403             vec[j][i] = vec[0][i];
 404          }
 405       }
 406       return 0;
 407    }
 408
 409    for (j = 0; j < nv; j++) {
 410       for (i = 0; i < nc; i++) {
 411          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 412       }
 413    }
 414 #endif
 415
 416    return !0;
 417 }
 418
 419
 420 static GLint
 421 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 422             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 423 {
 424    /* Use the generalized lloyd's algorithm for VQ:
 425     *     find 4 color vectors.
 426     *
 427     *     for each sample color
 428     *         sort to nearest vector.
 429     *
 430     *     replace each vector with the centroid of its matching colors.
 431     *
 432     *     repeat until RMS doesn't improve.
 433     *
 434     *     if a color vector has no samples, or becomes the same as another
 435     *     vector, replace it with the color which is farthest from a sample.
 436     *
 437     * vec[][MAX_COMP]           initial vectors and resulting colors
 438     * nv                        number of resulting colors required
 439     * input[N_TEXELS][MAX_COMP] input texels
 440     * nc                        number of components in input / vec
 441     * n                         number of input samples
 442     */
 443
 444    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 445    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 446    GLfloat error, lasterror = 1e9;
 447
 448    GLint i, j, k, rep;
 449
 450    /* the quantizer */
 451    for (rep = 0; rep < LL_N_REP; rep++) {
 452       /* reset sums & counters */
 453       for (j = 0; j < nv; j++) {
 454          for (i = 0; i < nc; i++) {
 455             sum[j][i] = 0;
 456          }
 457          cnt[j] = 0;
 458       }
 459       error = 0;
 460
 461       /* scan whole block */
 462       for (k = 0; k < n; k++) {
 463 #if 1
 464          GLint best = -1;
 465          GLfloat err = 1e9; /* big enough */
 466          /* determine best vector */
 467          for (j = 0; j < nv; j++) {
 468             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 469                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 470                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 471             if (nc == 4) {
 472                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 473             }
 474             if (e < err) {
 475                err = e;
 476                best = j;
 477             }
 478          }
 479 #else
 480          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 481 #endif
 482          assert(best >= 0);
 483          /* add in closest color */
 484          for (i = 0; i < nc; i++) {
 485             sum[best][i] += input[k][i];
 486          }
 487          /* mark this vector as used */
 488          cnt[best]++;
 489          /* accumulate error */
 490          error += err;
 491       }
 492
 493       /* check RMS */
 494       if ((error < LL_RMS_E) ||
 495           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 496          return !0; /* good match */
 497       }
 498       lasterror = error;
 499
 500       /* move each vector to the barycenter of its closest colors */
 501       for (j = 0; j < nv; j++) {
 502          if (cnt[j]) {
 503             GLfloat div = 1.0F / cnt[j];
 504             for (i = 0; i < nc; i++) {
 505                vec[j][i] = div * sum[j][i];
 506             }
 507          } else {
 508             /* this vec has no samples or is identical with a previous vec */
 509             GLint worst = fxt1_worst(vec[j], input, nc, n);
 510             for (i = 0; i < nc; i++) {
 511                vec[j][i] = input[worst][i];
 512             }
 513          }
 514       }
 515    }
 516
 517    return 0; /* could not converge fast enough */
 518 }
 519
 520
 521 static void
 522 fxt1_quantize_CHROMA (GLuint *cc,
 523                       GLubyte input[N_TEXELS][MAX_COMP])
 524 {
 525    const GLint n_vect = 4; /* 4 base vectors to find */
 526    const GLint n_comp = 3; /* 3 components: R, G, B */
 527    GLfloat vec[MAX_VECT][MAX_COMP];
 528    GLint i, j, k;
 529    Fx64 hi; /* high quadword */
 530    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 531
 532    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 533       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 534    }
 535
 536    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 537    for (j = n_vect - 1; j >= 0; j--) {
 538       for (i = 0; i < n_comp; i++) {
 539          /* add in colors */
 540          FX64_SHL(hi, 5);
 541          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 542       }
 543    }
 544    ((Fx64 *)cc)[1] = hi;
 545
 546    lohi = lolo = 0;
 547    /* right microtile */
 548    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 549       lohi <<= 2;
 550       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 551    }
 552    /* left microtile */
 553    for (; k >= 0; k--) {
 554       lolo <<= 2;
 555       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 556    }
 557    cc[1] = lohi;
 558    cc[0] = lolo;
 559 }
 560
 561
 562 static void
 563 fxt1_quantize_ALPHA0 (GLuint *cc,
 564                       GLubyte input[N_TEXELS][MAX_COMP],
 565                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 566 {
 567    const GLint n_vect = 3; /* 3 base vectors to find */
 568    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 569    GLfloat vec[MAX_VECT][MAX_COMP];
 570    GLint i, j, k;
 571    Fx64 hi; /* high quadword */
 572    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 573
 574    /* the last vector indicates zero */
 575    for (i = 0; i < n_comp; i++) {
 576       vec[n_vect][i] = 0;
 577    }
 578
 579    /* the first n texels in reord are guaranteed to be non-zero */
 580    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 581       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 582    }
 583
 584    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 585    for (j = n_vect - 1; j >= 0; j--) {
 586       /* add in alphas */
 587       FX64_SHL(hi, 5);
 588       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 589    }
 590    for (j = n_vect - 1; j >= 0; j--) {
 591       for (i = 0; i < n_comp - 1; i++) {
 592          /* add in colors */
 593          FX64_SHL(hi, 5);
 594          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 595       }
 596    }
 597    ((Fx64 *)cc)[1] = hi;
 598
 599    lohi = lolo = 0;
 600    /* right microtile */
 601    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 602       lohi <<= 2;
 603       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 604    }
 605    /* left microtile */
 606    for (; k >= 0; k--) {
 607       lolo <<= 2;
 608       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 609    }
 610    cc[1] = lohi;
 611    cc[0] = lolo;
 612 }
 613
 614
 615 static void
 616 fxt1_quantize_ALPHA1 (GLuint *cc,
 617                       GLubyte input[N_TEXELS][MAX_COMP])
 618 {
 619    const GLint n_vect = 3; /* highest vector number in each microtile */
 620    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 621    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 622    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 623    GLint i, j, k;
 624    Fx64 hi; /* high quadword */
 625    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 626
 627    GLint minSum;
 628    GLint maxSum;
 629    GLint minColL = 0, maxColL = 0;
 630    GLint minColR = 0, maxColR = 0;
 631    GLint sumL = 0, sumR = 0;
 632    GLint nn_comp;
 633    /* Our solution here is to find the darkest and brightest colors in
 634     * the 4x4 tile and use those as the two representative colors.
 635     * There are probably better algorithms to use (histogram-based).
 636     */
 637    nn_comp = n_comp;
 638    while ((minColL == maxColL) && nn_comp) {
 639        minSum = 2000; /* big enough */
 640        maxSum = -1; /* small enough */
 641        for (k = 0; k < N_TEXELS / 2; k++) {
 642            GLint sum = 0;
 643            for (i = 0; i < nn_comp; i++) {
 644                sum += input[k][i];
 645            }
 646            if (minSum > sum) {
 647                minSum = sum;
 648                minColL = k;
 649            }
 650            if (maxSum < sum) {
 651                maxSum = sum;
 652                maxColL = k;
 653            }
 654            sumL += sum;
 655        }
 656
 657        nn_comp--;
 658    }
 659
 660    nn_comp = n_comp;
 661    while ((minColR == maxColR) && nn_comp) {
 662        minSum = 2000; /* big enough */
 663        maxSum = -1; /* small enough */
 664        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 665            GLint sum = 0;
 666            for (i = 0; i < nn_comp; i++) {
 667                sum += input[k][i];
 668            }
 669            if (minSum > sum) {
 670                minSum = sum;
 671                minColR = k;
 672            }
 673            if (maxSum < sum) {
 674                maxSum = sum;
 675                maxColR = k;
 676            }
 677            sumR += sum;
 678        }
 679
 680        nn_comp--;
 681    }
 682
 683    /* choose the common vector (yuck!) */
 684    {
 685       GLint j1, j2;
 686       GLint v1 = 0, v2 = 0;
 687       GLfloat err = 1e9; /* big enough */
 688       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 689       for (i = 0; i < n_comp; i++) {
 690          tv[0][i] = input[minColL][i];
 691          tv[1][i] = input[maxColL][i];
 692          tv[2][i] = input[minColR][i];
 693          tv[3][i] = input[maxColR][i];
 694       }
 695       for (j1 = 0; j1 < 2; j1++) {
 696          for (j2 = 2; j2 < 4; j2++) {
 697             GLfloat e = 0.0F;
 698             for (i = 0; i < n_comp; i++) {
 699                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 700             }
 701             if (e < err) {
 702                err = e;
 703                v1 = j1;
 704                v2 = j2;
 705             }
 706          }
 707       }
 708       for (i = 0; i < n_comp; i++) {
 709          vec[0][i] = tv[1 - v1][i];
 710          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 711          vec[2][i] = tv[5 - v2][i];
 712       }
 713    }
 714
 715    /* left microtile */
 716    cc[0] = 0;
 717    if (minColL != maxColL) {
 718       /* compute interpolation vector */
 719       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 720
 721       /* add in texels */
 722       lolo = 0;
 723       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 724          GLint texel;
 725          /* interpolate color */
 726          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 727          /* add in texel */
 728          lolo <<= 2;
 729          lolo |= texel;
 730       }
 731
 732       cc[0] = lolo;
 733    }
 734
 735    /* right microtile */
 736    cc[1] = 0;
 737    if (minColR != maxColR) {
 738       /* compute interpolation vector */
 739       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 740
 741       /* add in texels */
 742       lohi = 0;
 743       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 744          GLint texel;
 745          /* interpolate color */
 746          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 747          /* add in texel */
 748          lohi <<= 2;
 749          lohi |= texel;
 750       }
 751
 752       cc[1] = lohi;
 753    }
 754
 755    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 756    for (j = n_vect - 1; j >= 0; j--) {
 757       /* add in alphas */
 758       FX64_SHL(hi, 5);
 759       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 760    }
 761    for (j = n_vect - 1; j >= 0; j--) {
 762       for (i = 0; i < n_comp - 1; i++) {
 763          /* add in colors */
 764          FX64_SHL(hi, 5);
 765          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 766       }
 767    }
 768    ((Fx64 *)cc)[1] = hi;
 769 }
 770
 771
 772 static void
 773 fxt1_quantize_HI (GLuint *cc,
 774                   GLubyte input[N_TEXELS][MAX_COMP],
 775                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 776 {
 777    const GLint n_vect = 6; /* highest vector number */
 778    const GLint n_comp = 3; /* 3 components: R, G, B */
 779    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 780    GLfloat iv[MAX_COMP];   /* interpolation vector */
 781    GLint i, k;
 782    GLuint hihi; /* high quadword: hi dword */
 783
 784    GLint minSum = 2000; /* big enough */
 785    GLint maxSum = -1; /* small enough */
 786    GLint minCol = 0; /* phoudoin: silent compiler! */
 787    GLint maxCol = 0; /* phoudoin: silent compiler! */
 788
 789    /* Our solution here is to find the darkest and brightest colors in
 790     * the 8x4 tile and use those as the two representative colors.
 791     * There are probably better algorithms to use (histogram-based).
 792     */
 793    for (k = 0; k < n; k++) {
 794       GLint sum = 0;
 795       for (i = 0; i < n_comp; i++) {
 796          sum += reord[k][i];
 797       }
 798       if (minSum > sum) {
 799          minSum = sum;
 800          minCol = k;
 801       }
 802       if (maxSum < sum) {
 803          maxSum = sum;
 804          maxCol = k;
 805       }
 806    }
 807
 808    hihi = 0; /* cc-hi = "00" */
 809    for (i = 0; i < n_comp; i++) {
 810       /* add in colors */
 811       hihi <<= 5;
 812       hihi |= reord[maxCol][i] >> 3;
 813    }
 814    for (i = 0; i < n_comp; i++) {
 815       /* add in colors */
 816       hihi <<= 5;
 817       hihi |= reord[minCol][i] >> 3;
 818    }
 819    cc[3] = hihi;
 820    cc[0] = cc[1] = cc[2] = 0;
 821
 822    /* compute interpolation vector */
 823    if (minCol != maxCol) {
 824       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 825    }
 826
 827    /* add in texels */
 828    for (k = N_TEXELS - 1; k >= 0; k--) {
 829       GLint t = k * 3;
 830       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 831       GLint texel = n_vect + 1; /* transparent black */
 832
 833       if (!ISTBLACK(input[k])) {
 834          if (minCol != maxCol) {
 835             /* interpolate color */
 836             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 837             /* add in texel */
 838             kk[0] |= texel << (t & 7);
 839          }
 840       } else {
 841          /* add in texel */
 842          kk[0] |= texel << (t & 7);
 843       }
 844    }
 845 }
 846
 847
 848 static void
 849 fxt1_quantize_MIXED1 (GLuint *cc,
 850                       GLubyte input[N_TEXELS][MAX_COMP])
 851 {
 852    const GLint n_vect = 2; /* highest vector number in each microtile */
 853    const GLint n_comp = 3; /* 3 components: R, G, B */
 854    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 855    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 856    GLint i, j, k;
 857    Fx64 hi; /* high quadword */
 858    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 859
 860    GLint minSum;
 861    GLint maxSum;
 862    GLint minColL = 0, maxColL = -1;
 863    GLint minColR = 0, maxColR = -1;
 864
 865    /* Our solution here is to find the darkest and brightest colors in
 866     * the 4x4 tile and use those as the two representative colors.
 867     * There are probably better algorithms to use (histogram-based).
 868     */
 869    minSum = 2000; /* big enough */
 870    maxSum = -1; /* small enough */
 871    for (k = 0; k < N_TEXELS / 2; k++) {
 872       if (!ISTBLACK(input[k])) {
 873          GLint sum = 0;
 874          for (i = 0; i < n_comp; i++) {
 875             sum += input[k][i];
 876          }
 877          if (minSum > sum) {
 878             minSum = sum;
 879             minColL = k;
 880          }
 881          if (maxSum < sum) {
 882             maxSum = sum;
 883             maxColL = k;
 884          }
 885       }
 886    }
 887    minSum = 2000; /* big enough */
 888    maxSum = -1; /* small enough */
 889    for (; k < N_TEXELS; k++) {
 890       if (!ISTBLACK(input[k])) {
 891          GLint sum = 0;
 892          for (i = 0; i < n_comp; i++) {
 893             sum += input[k][i];
 894          }
 895          if (minSum > sum) {
 896             minSum = sum;
 897             minColR = k;
 898          }
 899          if (maxSum < sum) {
 900             maxSum = sum;
 901             maxColR = k;
 902          }
 903       }
 904    }
 905
 906    /* left microtile */
 907    if (maxColL == -1) {
 908       /* all transparent black */
 909       cc[0] = ~0u;
 910       for (i = 0; i < n_comp; i++) {
 911          vec[0][i] = 0;
 912          vec[1][i] = 0;
 913       }
 914    } else {
 915       cc[0] = 0;
 916       for (i = 0; i < n_comp; i++) {
 917          vec[0][i] = input[minColL][i];
 918          vec[1][i] = input[maxColL][i];
 919       }
 920       if (minColL != maxColL) {
 921          /* compute interpolation vector */
 922          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 923
 924          /* add in texels */
 925          lolo = 0;
 926          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 927             GLint texel = n_vect + 1; /* transparent black */
 928             if (!ISTBLACK(input[k])) {
 929                /* interpolate color */
 930                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 931             }
 932             /* add in texel */
 933             lolo <<= 2;
 934             lolo |= texel;
 935          }
 936          cc[0] = lolo;
 937       }
 938    }
 939
 940    /* right microtile */
 941    if (maxColR == -1) {
 942       /* all transparent black */
 943       cc[1] = ~0u;
 944       for (i = 0; i < n_comp; i++) {
 945          vec[2][i] = 0;
 946          vec[3][i] = 0;
 947       }
 948    } else {
 949       cc[1] = 0;
 950       for (i = 0; i < n_comp; i++) {
 951          vec[2][i] = input[minColR][i];
 952          vec[3][i] = input[maxColR][i];
 953       }
 954       if (minColR != maxColR) {
 955          /* compute interpolation vector */
 956          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 957
 958          /* add in texels */
 959          lohi = 0;
 960          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 961             GLint texel = n_vect + 1; /* transparent black */
 962             if (!ISTBLACK(input[k])) {
 963                /* interpolate color */
 964                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 965             }
 966             /* add in texel */
 967             lohi <<= 2;
 968             lohi |= texel;
 969          }
 970          cc[1] = lohi;
 971       }
 972    }
 973
 974    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 975    for (j = 2 * 2 - 1; j >= 0; j--) {
 976       for (i = 0; i < n_comp; i++) {
 977          /* add in colors */
 978          FX64_SHL(hi, 5);
 979          FX64_OR32(hi, vec[j][i] >> 3);
 980       }
 981    }
 982    ((Fx64 *)cc)[1] = hi;
 983 }
 984
 985
 986 static void
 987 fxt1_quantize_MIXED0 (GLuint *cc,
 988                       GLubyte input[N_TEXELS][MAX_COMP])
 989 {
 990    const GLint n_vect = 3; /* highest vector number in each microtile */
 991    const GLint n_comp = 3; /* 3 components: R, G, B */
 992    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 993    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 994    GLint i, j, k;
 995    Fx64 hi; /* high quadword */
 996    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 997
 998    GLint minColL = 0, maxColL = 0;
 999    GLint minColR = 0, maxColR = 0;
1000 #if 0
1001    GLint minSum;
1002    GLint maxSum;
1003
1004    /* Our solution here is to find the darkest and brightest colors in
1005     * the 4x4 tile and use those as the two representative colors.
1006     * There are probably better algorithms to use (histogram-based).
1007     */
1008    minSum = 2000; /* big enough */
1009    maxSum = -1; /* small enough */
1010    for (k = 0; k < N_TEXELS / 2; k++) {
1011       GLint sum = 0;
1012       for (i = 0; i < n_comp; i++) {
1013          sum += input[k][i];
1014       }
1015       if (minSum > sum) {
1016          minSum = sum;
1017          minColL = k;
1018       }
1019       if (maxSum < sum) {
1020          maxSum = sum;
1021          maxColL = k;
1022       }
1023    }
1024    minSum = 2000; /* big enough */
1025    maxSum = -1; /* small enough */
1026    for (; k < N_TEXELS; k++) {
1027       GLint sum = 0;
1028       for (i = 0; i < n_comp; i++) {
1029          sum += input[k][i];
1030       }
1031       if (minSum > sum) {
1032          minSum = sum;
1033          minColR = k;
1034       }
1035       if (maxSum < sum) {
1036          maxSum = sum;
1037          maxColR = k;
1038       }
1039    }
1040 #else
1041    GLint minVal;
1042    GLint maxVal;
1043    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1044    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1045
1046    /* Scan the channel with max variance for lo & hi
1047     * and use those as the two representative colors.
1048     */
1049    minVal = 2000; /* big enough */
1050    maxVal = -1; /* small enough */
1051    for (k = 0; k < N_TEXELS / 2; k++) {
1052       GLint t = input[k][maxVarL];
1053       if (minVal > t) {
1054          minVal = t;
1055          minColL = k;
1056       }
1057       if (maxVal < t) {
1058          maxVal = t;
1059          maxColL = k;
1060       }
1061    }
1062    minVal = 2000; /* big enough */
1063    maxVal = -1; /* small enough */
1064    for (; k < N_TEXELS; k++) {
1065       GLint t = input[k][maxVarR];
1066       if (minVal > t) {
1067          minVal = t;
1068          minColR = k;
1069       }
1070       if (maxVal < t) {
1071          maxVal = t;
1072          maxColR = k;
1073       }
1074    }
1075 #endif
1076
1077    /* left microtile */
1078    cc[0] = 0;
1079    for (i = 0; i < n_comp; i++) {
1080       vec[0][i] = input[minColL][i];
1081       vec[1][i] = input[maxColL][i];
1082    }
1083    if (minColL != maxColL) {
1084       /* compute interpolation vector */
1085       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1086
1087       /* add in texels */
1088       lolo = 0;
1089       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1090          GLint texel;
1091          /* interpolate color */
1092          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1093          /* add in texel */
1094          lolo <<= 2;
1095          lolo |= texel;
1096       }
1097
1098       /* funky encoding for LSB of green */
1099       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1100          for (i = 0; i < n_comp; i++) {
1101             vec[1][i] = input[minColL][i];
1102             vec[0][i] = input[maxColL][i];
1103          }
1104          lolo = ~lolo;
1105       }
1106
1107       cc[0] = lolo;
1108    }
1109
1110    /* right microtile */
1111    cc[1] = 0;
1112    for (i = 0; i < n_comp; i++) {
1113       vec[2][i] = input[minColR][i];
1114       vec[3][i] = input[maxColR][i];
1115    }
1116    if (minColR != maxColR) {
1117       /* compute interpolation vector */
1118       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1119
1120       /* add in texels */
1121       lohi = 0;
1122       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1123          GLint texel;
1124          /* interpolate color */
1125          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1126          /* add in texel */
1127          lohi <<= 2;
1128          lohi |= texel;
1129       }
1130
1131       /* funky encoding for LSB of green */
1132       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1133          for (i = 0; i < n_comp; i++) {
1134             vec[3][i] = input[minColR][i];
1135             vec[2][i] = input[maxColR][i];
1136          }
1137          lohi = ~lohi;
1138       }
1139
1140       cc[1] = lohi;
1141    }
1142
1143    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1144    for (j = 2 * 2 - 1; j >= 0; j--) {
1145       for (i = 0; i < n_comp; i++) {
1146          /* add in colors */
1147          FX64_SHL(hi, 5);
1148          FX64_OR32(hi, vec[j][i] >> 3);
1149       }
1150    }
1151    ((Fx64 *)cc)[1] = hi;
1152 }
1153
1154
1155 static void
1156 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1157 {
1158    GLint trualpha;
1159    GLubyte reord[N_TEXELS][MAX_COMP];
1160
1161    GLubyte input[N_TEXELS][MAX_COMP];
1162    GLint i, k, l;
1163
1164    if (comps == 3) {
1165       /* make the whole block opaque */
1166       memset(input, -1, sizeof(input));
1167    }
1168
1169    /* 8 texels each line */
1170    for (l = 0; l < 4; l++) {
1171       for (k = 0; k < 4; k++) {
1172          for (i = 0; i < comps; i++) {
1173             input[k + l * 4][i] = *lines[l]++;
1174          }
1175       }
1176       for (; k < 8; k++) {
1177          for (i = 0; i < comps; i++) {
1178             input[k + l * 4 + 12][i] = *lines[l]++;
1179          }
1180       }
1181    }
1182
1183    /* block layout:
1184     * 00, 01, 02, 03, 08, 09, 0a, 0b
1185     * 10, 11, 12, 13, 18, 19, 1a, 1b
1186     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1187     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1188     */
1189
1190    /* [dBorca]
1191     * stupidity flows forth from this
1192     */
1193    l = N_TEXELS;
1194    trualpha = 0;
1195    if (comps == 4) {
1196       /* skip all transparent black texels */
1197       l = 0;
1198       for (k = 0; k < N_TEXELS; k++) {
1199          /* test all components against 0 */
1200          if (!ISTBLACK(input[k])) {
1201             /* texel is not transparent black */
1202             COPY_4UBV(reord[l], input[k]);
1203             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1204                /* non-opaque texel */
1205                trualpha = !0;
1206             }
1207             l++;
1208          }
1209       }
1210    }
1211
1212 #if 0
1213    if (trualpha) {
1214       fxt1_quantize_ALPHA0(cc, input, reord, l);
1215    } else if (l == 0) {
1216       cc[0] = cc[1] = cc[2] = -1;
1217       cc[3] = 0;
1218    } else if (l < N_TEXELS) {
1219       fxt1_quantize_HI(cc, input, reord, l);
1220    } else {
1221       fxt1_quantize_CHROMA(cc, input);
1222    }
1223    (void)fxt1_quantize_ALPHA1;
1224    (void)fxt1_quantize_MIXED1;
1225    (void)fxt1_quantize_MIXED0;
1226 #else
1227    if (trualpha) {
1228       fxt1_quantize_ALPHA1(cc, input);
1229    } else if (l == 0) {
1230       cc[0] = cc[1] = cc[2] = ~0u;
1231       cc[3] = 0;
1232    } else if (l < N_TEXELS) {
1233       fxt1_quantize_MIXED1(cc, input);
1234    } else {
1235       fxt1_quantize_MIXED0(cc, input);
1236    }
1237    (void)fxt1_quantize_ALPHA0;
1238    (void)fxt1_quantize_HI;
1239    (void)fxt1_quantize_CHROMA;
1240 #endif
1241 }
1242
1243
1244
1245 /**
1246  * Upscale an image by replication, not (typical) stretching.
1247  * We use this when the image width or height is less than a
1248  * certain size (4, 8) and we need to upscale an image.
1249  */
1250 static void
1251 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1252                    GLsizei outWidth, GLsizei outHeight,
1253                    GLint comps, const GLubyte *src, GLint srcRowStride,
1254                    GLubyte *dest )
1255 {
1256    GLint i, j, k;
1257
1258    ASSERT(outWidth >= inWidth);
1259    ASSERT(outHeight >= inHeight);
1260 #if 0
1261    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1262    ASSERT((outWidth & 3) == 0);
1263    ASSERT((outHeight & 3) == 0);
1264 #endif
1265
1266    for (i = 0; i < outHeight; i++) {
1267       const GLint ii = i % inHeight;
1268       for (j = 0; j < outWidth; j++) {
1269          const GLint jj = j % inWidth;
1270          for (k = 0; k < comps; k++) {
1271             dest[(i * outWidth + j) * comps + k]
1272                = src[ii * srcRowStride + jj * comps + k];
1273          }
1274       }
1275    }
1276 }
1277
1278
1279 static void
1280 fxt1_encode (GLuint width, GLuint height, GLint comps,
1281              const void *source, GLint srcRowStride,
1282              void *dest, GLint destRowStride)
1283 {
1284    GLuint x, y;
1285    const GLubyte *data;
1286    GLuint *encoded = (GLuint *)dest;
1287    void *newSource = NULL;
1288
1289    assert(comps == 3 || comps == 4);
1290
1291    /* Replicate image if width is not M8 or height is not M4 */
1292    if ((width & 7) | (height & 3)) {
1293       GLint newWidth = (width + 7) & ~7;
1294       GLint newHeight = (height + 3) & ~3;
1295       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1296       if (!newSource) {
1297          GET_CURRENT_CONTEXT(ctx);
1298          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1299          goto cleanUp;
1300       }
1301       upscale_teximage2d(width, height, newWidth, newHeight,
1302                          comps, (const GLubyte *) source,
1303                          srcRowStride, (GLubyte *) newSource);
1304       source = newSource;
1305       width = newWidth;
1306       height = newHeight;
1307       srcRowStride = comps * newWidth;
1308    }
1309
1310    data = (const GLubyte *) source;
1311    destRowStride = (destRowStride - width * 2) / 4;
1312    for (y = 0; y < height; y += 4) {
1313       GLuint offs = 0 + (y + 0) * srcRowStride;
1314       for (x = 0; x < width; x += 8) {
1315          const GLubyte *lines[4];
1316          lines[0] = &data[offs];
1317          lines[1] = lines[0] + srcRowStride;
1318          lines[2] = lines[1] + srcRowStride;
1319          lines[3] = lines[2] + srcRowStride;
1320          offs += 8 * comps;
1321          fxt1_quantize(encoded, lines, comps);
1322          /* 128 bits per 8x4 block */
1323          encoded += 4;
1324       }
1325       encoded += destRowStride;
1326    }
1327
1328  cleanUp:
1329    free(newSource);
1330 }
1331
1332
1333 /***************************************************************************\
1334  * FXT1 decoder
1335  *
1336  * The decoder is based on GL_3DFX_texture_compression_FXT1
1337  * specification and serves as a concept for the encoder.
1338 \***************************************************************************/
1339
1340
1341 /* lookup table for scaling 5 bit colors up to 8 bits */
1342 static const GLubyte _rgb_scale_5[] = {
1343    0,   8,   16,  25,  33,  41,  49,  58,
1344    66,  74,  82,  90,  99,  107, 115, 123,
1345    132, 140, 148, 156, 165, 173, 181, 189,
1346    197, 206, 214, 222, 230, 239, 247, 255
1347 };
1348
1349 /* lookup table for scaling 6 bit colors up to 8 bits */
1350 static const GLubyte _rgb_scale_6[] = {
1351    0,   4,   8,   12,  16,  20,  24,  28,
1352    32,  36,  40,  45,  49,  53,  57,  61,
1353    65,  69,  73,  77,  81,  85,  89,  93,
1354    97,  101, 105, 109, 113, 117, 121, 125,
1355    130, 134, 138, 142, 146, 150, 154, 158,
1356    162, 166, 170, 174, 178, 182, 186, 190,
1357    194, 198, 202, 206, 210, 215, 219, 223,
1358    227, 231, 235, 239, 243, 247, 251, 255
1359 };
1360
1361
1362 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1363 #define UP5(c) _rgb_scale_5[(c) & 31]
1364 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1365 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1366
1367
1368 static void
1369 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1370 {
1371    const GLuint *cc;
1372
1373    t *= 3;
1374    cc = (const GLuint *)(code + t / 8);
1375    t = (cc[0] >> (t & 7)) & 7;
1376
1377    if (t == 7) {
1378       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1379    } else {
1380       GLubyte r, g, b;
1381       cc = (const GLuint *)(code + 12);
1382       if (t == 0) {
1383          b = UP5(CC_SEL(cc, 0));
1384          g = UP5(CC_SEL(cc, 5));
1385          r = UP5(CC_SEL(cc, 10));
1386       } else if (t == 6) {
1387          b = UP5(CC_SEL(cc, 15));
1388          g = UP5(CC_SEL(cc, 20));
1389          r = UP5(CC_SEL(cc, 25));
1390       } else {
1391          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1392          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1393          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1394       }
1395       rgba[RCOMP] = r;
1396       rgba[GCOMP] = g;
1397       rgba[BCOMP] = b;
1398       rgba[ACOMP] = 255;
1399    }
1400 }
1401
1402
1403 static void
1404 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1405 {
1406    const GLuint *cc;
1407    GLuint kk;
1408
1409    cc = (const GLuint *)code;
1410    if (t & 16) {
1411       cc++;
1412       t &= 15;
1413    }
1414    t = (cc[0] >> (t * 2)) & 3;
1415
1416    t *= 15;
1417    cc = (const GLuint *)(code + 8 + t / 8);
1418    kk = cc[0] >> (t & 7);
1419    rgba[BCOMP] = UP5(kk);
1420    rgba[GCOMP] = UP5(kk >> 5);
1421    rgba[RCOMP] = UP5(kk >> 10);
1422    rgba[ACOMP] = 255;
1423 }
1424
1425
1426 static void
1427 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1428 {
1429    const GLuint *cc;
1430    GLuint col[2][3];
1431    GLint glsb, selb;
1432
1433    cc = (const GLuint *)code;
1434    if (t & 16) {
1435       t &= 15;
1436       t = (cc[1] >> (t * 2)) & 3;
1437       /* col 2 */
1438       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1439       col[0][GCOMP] = CC_SEL(cc, 99);
1440       col[0][RCOMP] = CC_SEL(cc, 104);
1441       /* col 3 */
1442       col[1][BCOMP] = CC_SEL(cc, 109);
1443       col[1][GCOMP] = CC_SEL(cc, 114);
1444       col[1][RCOMP] = CC_SEL(cc, 119);
1445       glsb = CC_SEL(cc, 126);
1446       selb = CC_SEL(cc, 33);
1447    } else {
1448       t = (cc[0] >> (t * 2)) & 3;
1449       /* col 0 */
1450       col[0][BCOMP] = CC_SEL(cc, 64);
1451       col[0][GCOMP] = CC_SEL(cc, 69);
1452       col[0][RCOMP] = CC_SEL(cc, 74);
1453       /* col 1 */
1454       col[1][BCOMP] = CC_SEL(cc, 79);
1455       col[1][GCOMP] = CC_SEL(cc, 84);
1456       col[1][RCOMP] = CC_SEL(cc, 89);
1457       glsb = CC_SEL(cc, 125);
1458       selb = CC_SEL(cc, 1);
1459    }
1460
1461    if (CC_SEL(cc, 124) & 1) {
1462       /* alpha[0] == 1 */
1463
1464       if (t == 3) {
1465          /* zero */
1466          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1467       } else {
1468          GLubyte r, g, b;
1469          if (t == 0) {
1470             b = UP5(col[0][BCOMP]);
1471             g = UP5(col[0][GCOMP]);
1472             r = UP5(col[0][RCOMP]);
1473          } else if (t == 2) {
1474             b = UP5(col[1][BCOMP]);
1475             g = UP6(col[1][GCOMP], glsb);
1476             r = UP5(col[1][RCOMP]);
1477          } else {
1478             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1479             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1480             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1481          }
1482          rgba[RCOMP] = r;
1483          rgba[GCOMP] = g;
1484          rgba[BCOMP] = b;
1485          rgba[ACOMP] = 255;
1486       }
1487    } else {
1488       /* alpha[0] == 0 */
1489       GLubyte r, g, b;
1490       if (t == 0) {
1491          b = UP5(col[0][BCOMP]);
1492          g = UP6(col[0][GCOMP], glsb ^ selb);
1493          r = UP5(col[0][RCOMP]);
1494       } else if (t == 3) {
1495          b = UP5(col[1][BCOMP]);
1496          g = UP6(col[1][GCOMP], glsb);
1497          r = UP5(col[1][RCOMP]);
1498       } else {
1499          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1500          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1501                         UP6(col[1][GCOMP], glsb));
1502          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1503       }
1504       rgba[RCOMP] = r;
1505       rgba[GCOMP] = g;
1506       rgba[BCOMP] = b;
1507       rgba[ACOMP] = 255;
1508    }
1509 }
1510
1511
1512 static void
1513 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1514 {
1515    const GLuint *cc;
1516    GLubyte r, g, b, a;
1517
1518    cc = (const GLuint *)code;
1519    if (CC_SEL(cc, 124) & 1) {
1520       /* lerp == 1 */
1521       GLuint col0[4];
1522
1523       if (t & 16) {
1524          t &= 15;
1525          t = (cc[1] >> (t * 2)) & 3;
1526          /* col 2 */
1527          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1528          col0[GCOMP] = CC_SEL(cc, 99);
1529          col0[RCOMP] = CC_SEL(cc, 104);
1530          col0[ACOMP] = CC_SEL(cc, 119);
1531       } else {
1532          t = (cc[0] >> (t * 2)) & 3;
1533          /* col 0 */
1534          col0[BCOMP] = CC_SEL(cc, 64);
1535          col0[GCOMP] = CC_SEL(cc, 69);
1536          col0[RCOMP] = CC_SEL(cc, 74);
1537          col0[ACOMP] = CC_SEL(cc, 109);
1538       }
1539
1540       if (t == 0) {
1541          b = UP5(col0[BCOMP]);
1542          g = UP5(col0[GCOMP]);
1543          r = UP5(col0[RCOMP]);
1544          a = UP5(col0[ACOMP]);
1545       } else if (t == 3) {
1546          b = UP5(CC_SEL(cc, 79));
1547          g = UP5(CC_SEL(cc, 84));
1548          r = UP5(CC_SEL(cc, 89));
1549          a = UP5(CC_SEL(cc, 114));
1550       } else {
1551          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1552          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1553          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1554          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1555       }
1556    } else {
1557       /* lerp == 0 */
1558
1559       if (t & 16) {
1560          cc++;
1561          t &= 15;
1562       }
1563       t = (cc[0] >> (t * 2)) & 3;
1564
1565       if (t == 3) {
1566          /* zero */
1567          r = g = b = a = 0;
1568       } else {
1569          GLuint kk;
1570          cc = (const GLuint *)code;
1571          a = UP5(cc[3] >> (t * 5 + 13));
1572          t *= 15;
1573          cc = (const GLuint *)(code + 8 + t / 8);
1574          kk = cc[0] >> (t & 7);
1575          b = UP5(kk);
1576          g = UP5(kk >> 5);
1577          r = UP5(kk >> 10);
1578       }
1579    }
1580    rgba[RCOMP] = r;
1581    rgba[GCOMP] = g;
1582    rgba[BCOMP] = b;
1583    rgba[ACOMP] = a;
1584 }
1585
1586
1587 static void
1588 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1589                GLint i, GLint j, GLubyte *rgba)
1590 {
1591    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1592       fxt1_decode_1HI,     /* cc-high   = "00?" */
1593       fxt1_decode_1HI,     /* cc-high   = "00?" */
1594       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1595       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1596       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1597       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1598       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1599       fxt1_decode_1MIXED   /* mixed     = "1??" */
1600    };
1601
1602    const GLubyte *code = (const GLubyte *)texture +
1603                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1604    GLint mode = CC_SEL(code, 125);
1605    GLint t = i & 7;
1606
1607    if (t & 4) {
1608       t += 12;
1609    }
1610    t += (j & 3) * 4;
1611
1612    decode_1[mode](code, t, rgba);
1613 }
1614
1615
1616
1617
1618 static void
1619 fetch_rgb_fxt1(const GLubyte *map, const GLuint imageOffsets[],
1620                GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
1621 {
1622    GLubyte rgba[4];
1623    fxt1_decode_1(map, rowStride, i, j, rgba);
1624    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1625    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1626    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1627    texel[ACOMP] = 1.0F;
1628 }
1629
1630
1631 static void
1632 fetch_rgba_fxt1(const GLubyte *map, const GLuint imageOffsets[],
1633                 GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
1634 {
1635    GLubyte rgba[4];
1636    fxt1_decode_1(map, rowStride, i, j, rgba);
1637    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1638    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1639    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1640    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1641 }
1642
1643
1644 compressed_fetch_func
1645 _mesa_get_fxt_fetch_func(gl_format format)
1646 {
1647    switch (format) {
1648    case MESA_FORMAT_RGB_FXT1:
1649       return fetch_rgb_fxt1;
1650    case MESA_FORMAT_RGBA_FXT1:
1651       return fetch_rgba_fxt1;
1652    default:
1653       return NULL;
1654    }
1655 }