src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_3DFX_texture_compression_FXT1 support.
  29  */
  30
  31
  32 #include "errors.h"
  33 #include "glheader.h"
  34
  35 #include "image.h"
  36 #include "macros.h"
  37 #include "mipmap.h"
  38 #include "texcompress.h"
  39 #include "texcompress_fxt1.h"
  40 #include "texstore.h"
  41 #include "mtypes.h"
  42
  43
  44 static void
  45 fxt1_encode (GLuint width, GLuint height, GLint comps,
  46              const void *source, GLint srcRowStride,
  47              void *dest, GLint destRowStride);
  48
  49 static void
  50 fxt1_decode_1 (const void *texture, GLint stride,
  51                GLint i, GLint j, GLubyte *rgba);
  52
  53
  54 /**
  55  * Store user's image in rgb_fxt1 format.
  56  */
  57 GLboolean
  58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  59 {
  60    const GLubyte *pixels;
  61    GLint srcRowStride;
  62    GLubyte *dst;
  63    const GLubyte *tempImage = NULL;
  64
  65    assert(dstFormat == MESA_FORMAT_RGB_FXT1);
  66
  67    if (srcFormat != GL_RGB ||
  68        srcType != GL_UNSIGNED_BYTE ||
  69        ctx->_ImageTransferState ||
  70        ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
  71        srcPacking->SwapBytes) {
  72       /* convert image to RGB/GLubyte */
  73       GLubyte *tempImageSlices[1];
  74       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
  75       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
  76       if (!tempImage)
  77          return GL_FALSE; /* out of memory */
  78       tempImageSlices[0] = (GLubyte *) tempImage;
  79       _mesa_texstore(ctx, dims,
  80                      baseInternalFormat,
  81                      MESA_FORMAT_RGB_UNORM8,
  82                      rgbRowStride, tempImageSlices,
  83                      srcWidth, srcHeight, srcDepth,
  84                      srcFormat, srcType, srcAddr,
  85                      srcPacking);
  86       pixels = tempImage;
  87       srcRowStride = 3 * srcWidth;
  88       srcFormat = GL_RGB;
  89    }
  90    else {
  91       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
  92                                      srcFormat, srcType, 0, 0);
  93
  94       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
  95                                             srcType) / sizeof(GLubyte);
  96    }
  97
  98    dst = dstSlices[0];
  99
 100    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 101                dst, dstRowStride);
 102
 103    free((void*) tempImage);
 104
 105    return GL_TRUE;
 106 }
 107
 108
 109 /**
 110  * Store user's image in rgba_fxt1 format.
 111  */
 112 GLboolean
 113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 114 {
 115    const GLubyte *pixels;
 116    GLint srcRowStride;
 117    GLubyte *dst;
 118    const GLubyte *tempImage = NULL;
 119
 120    assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
 121
 122    if (srcFormat != GL_RGBA ||
 123        srcType != GL_UNSIGNED_BYTE ||
 124        ctx->_ImageTransferState ||
 125        srcPacking->SwapBytes) {
 126       /* convert image to RGBA/GLubyte */
 127       GLubyte *tempImageSlices[1];
 128       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
 129       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
 130       if (!tempImage)
 131          return GL_FALSE; /* out of memory */
 132       tempImageSlices[0] = (GLubyte *) tempImage;
 133       _mesa_texstore(ctx, dims,
 134                      baseInternalFormat,
 135 #if UTIL_ARCH_LITTLE_ENDIAN
 136                      MESA_FORMAT_R8G8B8A8_UNORM,
 137 #else
 138                      MESA_FORMAT_A8B8G8R8_UNORM,
 139 #endif
 140                      rgbaRowStride, tempImageSlices,
 141                      srcWidth, srcHeight, srcDepth,
 142                      srcFormat, srcType, srcAddr,
 143                      srcPacking);
 144       pixels = tempImage;
 145       srcRowStride = 4 * srcWidth;
 146       srcFormat = GL_RGBA;
 147    }
 148    else {
 149       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
 150                                      srcFormat, srcType, 0, 0);
 151
 152       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 153                                             srcType) / sizeof(GLubyte);
 154    }
 155
 156    dst = dstSlices[0];
 157
 158    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 159                dst, dstRowStride);
 160
 161    free((void*) tempImage);
 162
 163    return GL_TRUE;
 164 }
 165
 166
 167 /***************************************************************************\
 168  * FXT1 encoder
 169  *
 170  * The encoder was built by reversing the decoder,
 171  * and is vaguely based on Texus2 by 3dfx. Note that this code
 172  * is merely a proof of concept, since it is highly UNoptimized;
 173  * moreover, it is sub-optimal due to initial conditions passed
 174  * to Lloyd's algorithm (the interpolation modes are even worse).
 175 \***************************************************************************/
 176
 177
 178 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 179 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 180 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 181 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 182 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 183 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 184 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 185 static const GLuint zero = 0;
 186 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
 187
 188 /*
 189  * Define a 64-bit unsigned integer type and macros
 190  */
 191 #if 1
 192
 193 #define FX64_NATIVE 1
 194
 195 typedef uint64_t Fx64;
 196
 197 #define FX64_MOV32(a, b) a = b
 198 #define FX64_OR32(a, b)  a |= b
 199 #define FX64_SHL(a, c)   a <<= c
 200
 201 #else
 202
 203 #define FX64_NATIVE 0
 204
 205 typedef struct {
 206    GLuint lo, hi;
 207 } Fx64;
 208
 209 #define FX64_MOV32(a, b) a.lo = b
 210 #define FX64_OR32(a, b)  a.lo |= b
 211
 212 #define FX64_SHL(a, c)                                 \
 213    do {                                                \
 214        if ((c) >= 32) {                                \
 215           a.hi = a.lo << ((c) - 32);                   \
 216           a.lo = 0;                                    \
 217        } else {                                        \
 218           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 219           a.lo <<= (c);                                \
 220        }                                               \
 221    } while (0)
 222
 223 #endif
 224
 225
 226 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 227 #define SAFECDOT 1 /* for paranoids */
 228
 229 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 230    do {                                  \
 231       /* compute interpolation vector */ \
 232       GLfloat d2 = 0.0F;                 \
 233       GLfloat rd2;                       \
 234                                          \
 235       for (i = 0; i < NC; i++) {         \
 236          IV[i] = (V1[i] - V0[i]) * F(i); \
 237          d2 += IV[i] * IV[i];            \
 238       }                                  \
 239       rd2 = (GLfloat)NV / d2;            \
 240       B = 0;                             \
 241       for (i = 0; i < NC; i++) {         \
 242          IV[i] *= F(i);                  \
 243          B -= IV[i] * V0[i];             \
 244          IV[i] *= rd2;                   \
 245       }                                  \
 246       B = B * rd2 + 0.5f;                \
 247    } while (0)
 248
 249 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 250    do {                                  \
 251       GLfloat dot = 0.0F;                \
 252       for (i = 0; i < NC; i++) {         \
 253          dot += V[i] * IV[i];            \
 254       }                                  \
 255       TEXEL = (GLint)(dot + B);          \
 256       if (SAFECDOT) {                    \
 257          if (TEXEL < 0) {                \
 258             TEXEL = 0;                   \
 259          } else if (TEXEL > NV) {        \
 260             TEXEL = NV;                  \
 261          }                               \
 262       }                                  \
 263    } while (0)
 264
 265
 266 static GLint
 267 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 268               GLubyte input[MAX_COMP], GLint nc)
 269 {
 270    GLint i, j, best = -1;
 271    GLfloat err = 1e9; /* big enough */
 272
 273    for (j = 0; j < nv; j++) {
 274       GLfloat e = 0.0F;
 275       for (i = 0; i < nc; i++) {
 276          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 277       }
 278       if (e < err) {
 279          err = e;
 280          best = j;
 281       }
 282    }
 283
 284    return best;
 285 }
 286
 287
 288 static GLint
 289 fxt1_worst (GLfloat vec[MAX_COMP],
 290             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 291 {
 292    GLint i, k, worst = -1;
 293    GLfloat err = -1.0F; /* small enough */
 294
 295    for (k = 0; k < n; k++) {
 296       GLfloat e = 0.0F;
 297       for (i = 0; i < nc; i++) {
 298          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 299       }
 300       if (e > err) {
 301          err = e;
 302          worst = k;
 303       }
 304    }
 305
 306    return worst;
 307 }
 308
 309
 310 static GLint
 311 fxt1_variance (GLdouble variance[MAX_COMP],
 312                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 313 {
 314    GLint i, k, best = 0;
 315    GLint sx, sx2;
 316    GLdouble var, maxvar = -1; /* small enough */
 317    GLdouble teenth = 1.0 / n;
 318
 319    for (i = 0; i < nc; i++) {
 320       sx = sx2 = 0;
 321       for (k = 0; k < n; k++) {
 322          GLint t = input[k][i];
 323          sx += t;
 324          sx2 += t * t;
 325       }
 326       var = sx2 * teenth - sx * sx * teenth * teenth;
 327       if (maxvar < var) {
 328          maxvar = var;
 329          best = i;
 330       }
 331       if (variance) {
 332          variance[i] = var;
 333       }
 334    }
 335
 336    return best;
 337 }
 338
 339
 340 static GLint
 341 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 342              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 343 {
 344 #if 0
 345    /* Choose colors from a grid.
 346     */
 347    GLint i, j;
 348
 349    for (j = 0; j < nv; j++) {
 350       GLint m = j * (n - 1) / (nv - 1);
 351       for (i = 0; i < nc; i++) {
 352          vec[j][i] = input[m][i];
 353       }
 354    }
 355 #else
 356    /* Our solution here is to find the darkest and brightest colors in
 357     * the 8x4 tile and use those as the two representative colors.
 358     * There are probably better algorithms to use (histogram-based).
 359     */
 360    GLint i, j, k;
 361    GLint minSum = 2000; /* big enough */
 362    GLint maxSum = -1; /* small enough */
 363    GLint minCol = 0; /* phoudoin: silent compiler! */
 364    GLint maxCol = 0; /* phoudoin: silent compiler! */
 365
 366    struct {
 367       GLint flag;
 368       GLint key;
 369       GLint freq;
 370       GLint idx;
 371    } hist[N_TEXELS];
 372    GLint lenh = 0;
 373
 374    memset(hist, 0, sizeof(hist));
 375
 376    for (k = 0; k < n; k++) {
 377       GLint l;
 378       GLint key = 0;
 379       GLint sum = 0;
 380       for (i = 0; i < nc; i++) {
 381          key <<= 8;
 382          key |= input[k][i];
 383          sum += input[k][i];
 384       }
 385       for (l = 0; l < n; l++) {
 386          if (!hist[l].flag) {
 387             /* alloc new slot */
 388             hist[l].flag = !0;
 389             hist[l].key = key;
 390             hist[l].freq = 1;
 391             hist[l].idx = k;
 392             lenh = l + 1;
 393             break;
 394          } else if (hist[l].key == key) {
 395             hist[l].freq++;
 396             break;
 397          }
 398       }
 399       if (minSum > sum) {
 400          minSum = sum;
 401          minCol = k;
 402       }
 403       if (maxSum < sum) {
 404          maxSum = sum;
 405          maxCol = k;
 406       }
 407    }
 408
 409    if (lenh <= nv) {
 410       for (j = 0; j < lenh; j++) {
 411          for (i = 0; i < nc; i++) {
 412             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 413          }
 414       }
 415       for (; j < nv; j++) {
 416          for (i = 0; i < nc; i++) {
 417             vec[j][i] = vec[0][i];
 418          }
 419       }
 420       return 0;
 421    }
 422
 423    for (j = 0; j < nv; j++) {
 424       for (i = 0; i < nc; i++) {
 425          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 426       }
 427    }
 428 #endif
 429
 430    return !0;
 431 }
 432
 433
 434 static GLint
 435 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 436             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 437 {
 438    /* Use the generalized lloyd's algorithm for VQ:
 439     *     find 4 color vectors.
 440     *
 441     *     for each sample color
 442     *         sort to nearest vector.
 443     *
 444     *     replace each vector with the centroid of its matching colors.
 445     *
 446     *     repeat until RMS doesn't improve.
 447     *
 448     *     if a color vector has no samples, or becomes the same as another
 449     *     vector, replace it with the color which is farthest from a sample.
 450     *
 451     * vec[][MAX_COMP]           initial vectors and resulting colors
 452     * nv                        number of resulting colors required
 453     * input[N_TEXELS][MAX_COMP] input texels
 454     * nc                        number of components in input / vec
 455     * n                         number of input samples
 456     */
 457
 458    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 459    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 460    GLfloat error, lasterror = 1e9;
 461
 462    GLint i, j, k, rep;
 463
 464    /* the quantizer */
 465    for (rep = 0; rep < LL_N_REP; rep++) {
 466       /* reset sums & counters */
 467       for (j = 0; j < nv; j++) {
 468          for (i = 0; i < nc; i++) {
 469             sum[j][i] = 0;
 470          }
 471          cnt[j] = 0;
 472       }
 473       error = 0;
 474
 475       /* scan whole block */
 476       for (k = 0; k < n; k++) {
 477 #if 1
 478          GLint best = -1;
 479          GLfloat err = 1e9; /* big enough */
 480          /* determine best vector */
 481          for (j = 0; j < nv; j++) {
 482             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 483                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 484                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 485             if (nc == 4) {
 486                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 487             }
 488             if (e < err) {
 489                err = e;
 490                best = j;
 491             }
 492          }
 493 #else
 494          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 495 #endif
 496          assert(best >= 0);
 497          /* add in closest color */
 498          for (i = 0; i < nc; i++) {
 499             sum[best][i] += input[k][i];
 500          }
 501          /* mark this vector as used */
 502          cnt[best]++;
 503          /* accumulate error */
 504          error += err;
 505       }
 506
 507       /* check RMS */
 508       if ((error < LL_RMS_E) ||
 509           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 510          return !0; /* good match */
 511       }
 512       lasterror = error;
 513
 514       /* move each vector to the barycenter of its closest colors */
 515       for (j = 0; j < nv; j++) {
 516          if (cnt[j]) {
 517             GLfloat div = 1.0F / cnt[j];
 518             for (i = 0; i < nc; i++) {
 519                vec[j][i] = div * sum[j][i];
 520             }
 521          } else {
 522             /* this vec has no samples or is identical with a previous vec */
 523             GLint worst = fxt1_worst(vec[j], input, nc, n);
 524             for (i = 0; i < nc; i++) {
 525                vec[j][i] = input[worst][i];
 526             }
 527          }
 528       }
 529    }
 530
 531    return 0; /* could not converge fast enough */
 532 }
 533
 534
 535 static void
 536 fxt1_quantize_CHROMA (GLuint *cc,
 537                       GLubyte input[N_TEXELS][MAX_COMP])
 538 {
 539    const GLint n_vect = 4; /* 4 base vectors to find */
 540    const GLint n_comp = 3; /* 3 components: R, G, B */
 541    GLfloat vec[MAX_VECT][MAX_COMP];
 542    GLint i, j, k;
 543    Fx64 hi; /* high quadword */
 544    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 545
 546    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 547       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 548    }
 549
 550    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 551    for (j = n_vect - 1; j >= 0; j--) {
 552       for (i = 0; i < n_comp; i++) {
 553          /* add in colors */
 554          FX64_SHL(hi, 5);
 555          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 556       }
 557    }
 558    ((Fx64 *)cc)[1] = hi;
 559
 560    lohi = lolo = 0;
 561    /* right microtile */
 562    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 563       lohi <<= 2;
 564       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 565    }
 566    /* left microtile */
 567    for (; k >= 0; k--) {
 568       lolo <<= 2;
 569       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 570    }
 571    cc[1] = lohi;
 572    cc[0] = lolo;
 573 }
 574
 575
 576 static void
 577 fxt1_quantize_ALPHA0 (GLuint *cc,
 578                       GLubyte input[N_TEXELS][MAX_COMP],
 579                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 580 {
 581    const GLint n_vect = 3; /* 3 base vectors to find */
 582    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 583    GLfloat vec[MAX_VECT][MAX_COMP];
 584    GLint i, j, k;
 585    Fx64 hi; /* high quadword */
 586    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 587
 588    /* the last vector indicates zero */
 589    for (i = 0; i < n_comp; i++) {
 590       vec[n_vect][i] = 0;
 591    }
 592
 593    /* the first n texels in reord are guaranteed to be non-zero */
 594    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 595       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 596    }
 597
 598    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 599    for (j = n_vect - 1; j >= 0; j--) {
 600       /* add in alphas */
 601       FX64_SHL(hi, 5);
 602       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 603    }
 604    for (j = n_vect - 1; j >= 0; j--) {
 605       for (i = 0; i < n_comp - 1; i++) {
 606          /* add in colors */
 607          FX64_SHL(hi, 5);
 608          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 609       }
 610    }
 611    ((Fx64 *)cc)[1] = hi;
 612
 613    lohi = lolo = 0;
 614    /* right microtile */
 615    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 616       lohi <<= 2;
 617       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 618    }
 619    /* left microtile */
 620    for (; k >= 0; k--) {
 621       lolo <<= 2;
 622       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 623    }
 624    cc[1] = lohi;
 625    cc[0] = lolo;
 626 }
 627
 628
 629 static void
 630 fxt1_quantize_ALPHA1 (GLuint *cc,
 631                       GLubyte input[N_TEXELS][MAX_COMP])
 632 {
 633    const GLint n_vect = 3; /* highest vector number in each microtile */
 634    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 635    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 636    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 637    GLint i, j, k;
 638    Fx64 hi; /* high quadword */
 639    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 640
 641    GLint minSum;
 642    GLint maxSum;
 643    GLint minColL = 0, maxColL = 0;
 644    GLint minColR = 0, maxColR = 0;
 645    GLint sumL = 0, sumR = 0;
 646    GLint nn_comp;
 647    /* Our solution here is to find the darkest and brightest colors in
 648     * the 4x4 tile and use those as the two representative colors.
 649     * There are probably better algorithms to use (histogram-based).
 650     */
 651    nn_comp = n_comp;
 652    while ((minColL == maxColL) && nn_comp) {
 653        minSum = 2000; /* big enough */
 654        maxSum = -1; /* small enough */
 655        for (k = 0; k < N_TEXELS / 2; k++) {
 656            GLint sum = 0;
 657            for (i = 0; i < nn_comp; i++) {
 658                sum += input[k][i];
 659            }
 660            if (minSum > sum) {
 661                minSum = sum;
 662                minColL = k;
 663            }
 664            if (maxSum < sum) {
 665                maxSum = sum;
 666                maxColL = k;
 667            }
 668            sumL += sum;
 669        }
 670
 671        nn_comp--;
 672    }
 673
 674    nn_comp = n_comp;
 675    while ((minColR == maxColR) && nn_comp) {
 676        minSum = 2000; /* big enough */
 677        maxSum = -1; /* small enough */
 678        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 679            GLint sum = 0;
 680            for (i = 0; i < nn_comp; i++) {
 681                sum += input[k][i];
 682            }
 683            if (minSum > sum) {
 684                minSum = sum;
 685                minColR = k;
 686            }
 687            if (maxSum < sum) {
 688                maxSum = sum;
 689                maxColR = k;
 690            }
 691            sumR += sum;
 692        }
 693
 694        nn_comp--;
 695    }
 696
 697    /* choose the common vector (yuck!) */
 698    {
 699       GLint j1, j2;
 700       GLint v1 = 0, v2 = 0;
 701       GLfloat err = 1e9; /* big enough */
 702       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 703       for (i = 0; i < n_comp; i++) {
 704          tv[0][i] = input[minColL][i];
 705          tv[1][i] = input[maxColL][i];
 706          tv[2][i] = input[minColR][i];
 707          tv[3][i] = input[maxColR][i];
 708       }
 709       for (j1 = 0; j1 < 2; j1++) {
 710          for (j2 = 2; j2 < 4; j2++) {
 711             GLfloat e = 0.0F;
 712             for (i = 0; i < n_comp; i++) {
 713                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 714             }
 715             if (e < err) {
 716                err = e;
 717                v1 = j1;
 718                v2 = j2;
 719             }
 720          }
 721       }
 722       for (i = 0; i < n_comp; i++) {
 723          vec[0][i] = tv[1 - v1][i];
 724          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 725          vec[2][i] = tv[5 - v2][i];
 726       }
 727    }
 728
 729    /* left microtile */
 730    cc[0] = 0;
 731    if (minColL != maxColL) {
 732       /* compute interpolation vector */
 733       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 734
 735       /* add in texels */
 736       lolo = 0;
 737       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 738          GLint texel;
 739          /* interpolate color */
 740          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 741          /* add in texel */
 742          lolo <<= 2;
 743          lolo |= texel;
 744       }
 745
 746       cc[0] = lolo;
 747    }
 748
 749    /* right microtile */
 750    cc[1] = 0;
 751    if (minColR != maxColR) {
 752       /* compute interpolation vector */
 753       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 754
 755       /* add in texels */
 756       lohi = 0;
 757       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 758          GLint texel;
 759          /* interpolate color */
 760          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 761          /* add in texel */
 762          lohi <<= 2;
 763          lohi |= texel;
 764       }
 765
 766       cc[1] = lohi;
 767    }
 768
 769    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 770    for (j = n_vect - 1; j >= 0; j--) {
 771       /* add in alphas */
 772       FX64_SHL(hi, 5);
 773       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 774    }
 775    for (j = n_vect - 1; j >= 0; j--) {
 776       for (i = 0; i < n_comp - 1; i++) {
 777          /* add in colors */
 778          FX64_SHL(hi, 5);
 779          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 780       }
 781    }
 782    ((Fx64 *)cc)[1] = hi;
 783 }
 784
 785
 786 static void
 787 fxt1_quantize_HI (GLuint *cc,
 788                   GLubyte input[N_TEXELS][MAX_COMP],
 789                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 790 {
 791    const GLint n_vect = 6; /* highest vector number */
 792    const GLint n_comp = 3; /* 3 components: R, G, B */
 793    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 794    GLfloat iv[MAX_COMP];   /* interpolation vector */
 795    GLint i, k;
 796    GLuint hihi; /* high quadword: hi dword */
 797
 798    GLint minSum = 2000; /* big enough */
 799    GLint maxSum = -1; /* small enough */
 800    GLint minCol = 0; /* phoudoin: silent compiler! */
 801    GLint maxCol = 0; /* phoudoin: silent compiler! */
 802
 803    /* Our solution here is to find the darkest and brightest colors in
 804     * the 8x4 tile and use those as the two representative colors.
 805     * There are probably better algorithms to use (histogram-based).
 806     */
 807    for (k = 0; k < n; k++) {
 808       GLint sum = 0;
 809       for (i = 0; i < n_comp; i++) {
 810          sum += reord[k][i];
 811       }
 812       if (minSum > sum) {
 813          minSum = sum;
 814          minCol = k;
 815       }
 816       if (maxSum < sum) {
 817          maxSum = sum;
 818          maxCol = k;
 819       }
 820    }
 821
 822    hihi = 0; /* cc-hi = "00" */
 823    for (i = 0; i < n_comp; i++) {
 824       /* add in colors */
 825       hihi <<= 5;
 826       hihi |= reord[maxCol][i] >> 3;
 827    }
 828    for (i = 0; i < n_comp; i++) {
 829       /* add in colors */
 830       hihi <<= 5;
 831       hihi |= reord[minCol][i] >> 3;
 832    }
 833    cc[3] = hihi;
 834    cc[0] = cc[1] = cc[2] = 0;
 835
 836    /* compute interpolation vector */
 837    if (minCol != maxCol) {
 838       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 839    }
 840
 841    /* add in texels */
 842    for (k = N_TEXELS - 1; k >= 0; k--) {
 843       GLint t = k * 3;
 844       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 845       GLint texel = n_vect + 1; /* transparent black */
 846
 847       if (!ISTBLACK(input[k])) {
 848          if (minCol != maxCol) {
 849             /* interpolate color */
 850             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 851             /* add in texel */
 852             kk[0] |= texel << (t & 7);
 853          }
 854       } else {
 855          /* add in texel */
 856          kk[0] |= texel << (t & 7);
 857       }
 858    }
 859 }
 860
 861
 862 static void
 863 fxt1_quantize_MIXED1 (GLuint *cc,
 864                       GLubyte input[N_TEXELS][MAX_COMP])
 865 {
 866    const GLint n_vect = 2; /* highest vector number in each microtile */
 867    const GLint n_comp = 3; /* 3 components: R, G, B */
 868    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 869    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 870    GLint i, j, k;
 871    Fx64 hi; /* high quadword */
 872    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 873
 874    GLint minSum;
 875    GLint maxSum;
 876    GLint minColL = 0, maxColL = -1;
 877    GLint minColR = 0, maxColR = -1;
 878
 879    /* Our solution here is to find the darkest and brightest colors in
 880     * the 4x4 tile and use those as the two representative colors.
 881     * There are probably better algorithms to use (histogram-based).
 882     */
 883    minSum = 2000; /* big enough */
 884    maxSum = -1; /* small enough */
 885    for (k = 0; k < N_TEXELS / 2; k++) {
 886       if (!ISTBLACK(input[k])) {
 887          GLint sum = 0;
 888          for (i = 0; i < n_comp; i++) {
 889             sum += input[k][i];
 890          }
 891          if (minSum > sum) {
 892             minSum = sum;
 893             minColL = k;
 894          }
 895          if (maxSum < sum) {
 896             maxSum = sum;
 897             maxColL = k;
 898          }
 899       }
 900    }
 901    minSum = 2000; /* big enough */
 902    maxSum = -1; /* small enough */
 903    for (; k < N_TEXELS; k++) {
 904       if (!ISTBLACK(input[k])) {
 905          GLint sum = 0;
 906          for (i = 0; i < n_comp; i++) {
 907             sum += input[k][i];
 908          }
 909          if (minSum > sum) {
 910             minSum = sum;
 911             minColR = k;
 912          }
 913          if (maxSum < sum) {
 914             maxSum = sum;
 915             maxColR = k;
 916          }
 917       }
 918    }
 919
 920    /* left microtile */
 921    if (maxColL == -1) {
 922       /* all transparent black */
 923       cc[0] = ~0u;
 924       for (i = 0; i < n_comp; i++) {
 925          vec[0][i] = 0;
 926          vec[1][i] = 0;
 927       }
 928    } else {
 929       cc[0] = 0;
 930       for (i = 0; i < n_comp; i++) {
 931          vec[0][i] = input[minColL][i];
 932          vec[1][i] = input[maxColL][i];
 933       }
 934       if (minColL != maxColL) {
 935          /* compute interpolation vector */
 936          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 937
 938          /* add in texels */
 939          lolo = 0;
 940          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 941             GLint texel = n_vect + 1; /* transparent black */
 942             if (!ISTBLACK(input[k])) {
 943                /* interpolate color */
 944                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 945             }
 946             /* add in texel */
 947             lolo <<= 2;
 948             lolo |= texel;
 949          }
 950          cc[0] = lolo;
 951       }
 952    }
 953
 954    /* right microtile */
 955    if (maxColR == -1) {
 956       /* all transparent black */
 957       cc[1] = ~0u;
 958       for (i = 0; i < n_comp; i++) {
 959          vec[2][i] = 0;
 960          vec[3][i] = 0;
 961       }
 962    } else {
 963       cc[1] = 0;
 964       for (i = 0; i < n_comp; i++) {
 965          vec[2][i] = input[minColR][i];
 966          vec[3][i] = input[maxColR][i];
 967       }
 968       if (minColR != maxColR) {
 969          /* compute interpolation vector */
 970          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 971
 972          /* add in texels */
 973          lohi = 0;
 974          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 975             GLint texel = n_vect + 1; /* transparent black */
 976             if (!ISTBLACK(input[k])) {
 977                /* interpolate color */
 978                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 979             }
 980             /* add in texel */
 981             lohi <<= 2;
 982             lohi |= texel;
 983          }
 984          cc[1] = lohi;
 985       }
 986    }
 987
 988    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 989    for (j = 2 * 2 - 1; j >= 0; j--) {
 990       for (i = 0; i < n_comp; i++) {
 991          /* add in colors */
 992          FX64_SHL(hi, 5);
 993          FX64_OR32(hi, vec[j][i] >> 3);
 994       }
 995    }
 996    ((Fx64 *)cc)[1] = hi;
 997 }
 998
 999
1000 static void
1001 fxt1_quantize_MIXED0 (GLuint *cc,
1002                       GLubyte input[N_TEXELS][MAX_COMP])
1003 {
1004    const GLint n_vect = 3; /* highest vector number in each microtile */
1005    const GLint n_comp = 3; /* 3 components: R, G, B */
1006    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1007    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1008    GLint i, j, k;
1009    Fx64 hi; /* high quadword */
1010    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1011
1012    GLint minColL = 0, maxColL = 0;
1013    GLint minColR = 0, maxColR = 0;
1014 #if 0
1015    GLint minSum;
1016    GLint maxSum;
1017
1018    /* Our solution here is to find the darkest and brightest colors in
1019     * the 4x4 tile and use those as the two representative colors.
1020     * There are probably better algorithms to use (histogram-based).
1021     */
1022    minSum = 2000; /* big enough */
1023    maxSum = -1; /* small enough */
1024    for (k = 0; k < N_TEXELS / 2; k++) {
1025       GLint sum = 0;
1026       for (i = 0; i < n_comp; i++) {
1027          sum += input[k][i];
1028       }
1029       if (minSum > sum) {
1030          minSum = sum;
1031          minColL = k;
1032       }
1033       if (maxSum < sum) {
1034          maxSum = sum;
1035          maxColL = k;
1036       }
1037    }
1038    minSum = 2000; /* big enough */
1039    maxSum = -1; /* small enough */
1040    for (; k < N_TEXELS; k++) {
1041       GLint sum = 0;
1042       for (i = 0; i < n_comp; i++) {
1043          sum += input[k][i];
1044       }
1045       if (minSum > sum) {
1046          minSum = sum;
1047          minColR = k;
1048       }
1049       if (maxSum < sum) {
1050          maxSum = sum;
1051          maxColR = k;
1052       }
1053    }
1054 #else
1055    GLint minVal;
1056    GLint maxVal;
1057    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1058    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1059
1060    /* Scan the channel with max variance for lo & hi
1061     * and use those as the two representative colors.
1062     */
1063    minVal = 2000; /* big enough */
1064    maxVal = -1; /* small enough */
1065    for (k = 0; k < N_TEXELS / 2; k++) {
1066       GLint t = input[k][maxVarL];
1067       if (minVal > t) {
1068          minVal = t;
1069          minColL = k;
1070       }
1071       if (maxVal < t) {
1072          maxVal = t;
1073          maxColL = k;
1074       }
1075    }
1076    minVal = 2000; /* big enough */
1077    maxVal = -1; /* small enough */
1078    for (; k < N_TEXELS; k++) {
1079       GLint t = input[k][maxVarR];
1080       if (minVal > t) {
1081          minVal = t;
1082          minColR = k;
1083       }
1084       if (maxVal < t) {
1085          maxVal = t;
1086          maxColR = k;
1087       }
1088    }
1089 #endif
1090
1091    /* left microtile */
1092    cc[0] = 0;
1093    for (i = 0; i < n_comp; i++) {
1094       vec[0][i] = input[minColL][i];
1095       vec[1][i] = input[maxColL][i];
1096    }
1097    if (minColL != maxColL) {
1098       /* compute interpolation vector */
1099       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1100
1101       /* add in texels */
1102       lolo = 0;
1103       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1104          GLint texel;
1105          /* interpolate color */
1106          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1107          /* add in texel */
1108          lolo <<= 2;
1109          lolo |= texel;
1110       }
1111
1112       /* funky encoding for LSB of green */
1113       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1114          for (i = 0; i < n_comp; i++) {
1115             vec[1][i] = input[minColL][i];
1116             vec[0][i] = input[maxColL][i];
1117          }
1118          lolo = ~lolo;
1119       }
1120
1121       cc[0] = lolo;
1122    }
1123
1124    /* right microtile */
1125    cc[1] = 0;
1126    for (i = 0; i < n_comp; i++) {
1127       vec[2][i] = input[minColR][i];
1128       vec[3][i] = input[maxColR][i];
1129    }
1130    if (minColR != maxColR) {
1131       /* compute interpolation vector */
1132       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1133
1134       /* add in texels */
1135       lohi = 0;
1136       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1137          GLint texel;
1138          /* interpolate color */
1139          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1140          /* add in texel */
1141          lohi <<= 2;
1142          lohi |= texel;
1143       }
1144
1145       /* funky encoding for LSB of green */
1146       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1147          for (i = 0; i < n_comp; i++) {
1148             vec[3][i] = input[minColR][i];
1149             vec[2][i] = input[maxColR][i];
1150          }
1151          lohi = ~lohi;
1152       }
1153
1154       cc[1] = lohi;
1155    }
1156
1157    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1158    for (j = 2 * 2 - 1; j >= 0; j--) {
1159       for (i = 0; i < n_comp; i++) {
1160          /* add in colors */
1161          FX64_SHL(hi, 5);
1162          FX64_OR32(hi, vec[j][i] >> 3);
1163       }
1164    }
1165    ((Fx64 *)cc)[1] = hi;
1166 }
1167
1168
1169 static void
1170 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1171 {
1172    GLint trualpha;
1173    GLubyte reord[N_TEXELS][MAX_COMP];
1174
1175    GLubyte input[N_TEXELS][MAX_COMP];
1176    GLint i, k, l;
1177
1178    if (comps == 3) {
1179       /* make the whole block opaque */
1180       memset(input, -1, sizeof(input));
1181    }
1182
1183    /* 8 texels each line */
1184    for (l = 0; l < 4; l++) {
1185       for (k = 0; k < 4; k++) {
1186          for (i = 0; i < comps; i++) {
1187             input[k + l * 4][i] = *lines[l]++;
1188          }
1189       }
1190       for (; k < 8; k++) {
1191          for (i = 0; i < comps; i++) {
1192             input[k + l * 4 + 12][i] = *lines[l]++;
1193          }
1194       }
1195    }
1196
1197    /* block layout:
1198     * 00, 01, 02, 03, 08, 09, 0a, 0b
1199     * 10, 11, 12, 13, 18, 19, 1a, 1b
1200     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1201     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1202     */
1203
1204    /* [dBorca]
1205     * stupidity flows forth from this
1206     */
1207    l = N_TEXELS;
1208    trualpha = 0;
1209    if (comps == 4) {
1210       /* skip all transparent black texels */
1211       l = 0;
1212       for (k = 0; k < N_TEXELS; k++) {
1213          /* test all components against 0 */
1214          if (!ISTBLACK(input[k])) {
1215             /* texel is not transparent black */
1216             COPY_4UBV(reord[l], input[k]);
1217             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1218                /* non-opaque texel */
1219                trualpha = !0;
1220             }
1221             l++;
1222          }
1223       }
1224    }
1225
1226 #if 0
1227    if (trualpha) {
1228       fxt1_quantize_ALPHA0(cc, input, reord, l);
1229    } else if (l == 0) {
1230       cc[0] = cc[1] = cc[2] = -1;
1231       cc[3] = 0;
1232    } else if (l < N_TEXELS) {
1233       fxt1_quantize_HI(cc, input, reord, l);
1234    } else {
1235       fxt1_quantize_CHROMA(cc, input);
1236    }
1237    (void)fxt1_quantize_ALPHA1;
1238    (void)fxt1_quantize_MIXED1;
1239    (void)fxt1_quantize_MIXED0;
1240 #else
1241    if (trualpha) {
1242       fxt1_quantize_ALPHA1(cc, input);
1243    } else if (l == 0) {
1244       cc[0] = cc[1] = cc[2] = ~0u;
1245       cc[3] = 0;
1246    } else if (l < N_TEXELS) {
1247       fxt1_quantize_MIXED1(cc, input);
1248    } else {
1249       fxt1_quantize_MIXED0(cc, input);
1250    }
1251    (void)fxt1_quantize_ALPHA0;
1252    (void)fxt1_quantize_HI;
1253    (void)fxt1_quantize_CHROMA;
1254 #endif
1255 }
1256
1257
1258
1259 /**
1260  * Upscale an image by replication, not (typical) stretching.
1261  * We use this when the image width or height is less than a
1262  * certain size (4, 8) and we need to upscale an image.
1263  */
1264 static void
1265 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1266                    GLsizei outWidth, GLsizei outHeight,
1267                    GLint comps, const GLubyte *src, GLint srcRowStride,
1268                    GLubyte *dest )
1269 {
1270    GLint i, j, k;
1271
1272    assert(outWidth >= inWidth);
1273    assert(outHeight >= inHeight);
1274 #if 0
1275    assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1276    assert((outWidth & 3) == 0);
1277    assert((outHeight & 3) == 0);
1278 #endif
1279
1280    for (i = 0; i < outHeight; i++) {
1281       const GLint ii = i % inHeight;
1282       for (j = 0; j < outWidth; j++) {
1283          const GLint jj = j % inWidth;
1284          for (k = 0; k < comps; k++) {
1285             dest[(i * outWidth + j) * comps + k]
1286                = src[ii * srcRowStride + jj * comps + k];
1287          }
1288       }
1289    }
1290 }
1291
1292
1293 static void
1294 fxt1_encode (GLuint width, GLuint height, GLint comps,
1295              const void *source, GLint srcRowStride,
1296              void *dest, GLint destRowStride)
1297 {
1298    GLuint x, y;
1299    const GLubyte *data;
1300    GLuint *encoded = (GLuint *)dest;
1301    void *newSource = NULL;
1302
1303    assert(comps == 3 || comps == 4);
1304
1305    /* Replicate image if width is not M8 or height is not M4 */
1306    if ((width & 7) | (height & 3)) {
1307       GLint newWidth = (width + 7) & ~7;
1308       GLint newHeight = (height + 3) & ~3;
1309       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1310       if (!newSource) {
1311          GET_CURRENT_CONTEXT(ctx);
1312          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1313          goto cleanUp;
1314       }
1315       upscale_teximage2d(width, height, newWidth, newHeight,
1316                          comps, (const GLubyte *) source,
1317                          srcRowStride, (GLubyte *) newSource);
1318       source = newSource;
1319       width = newWidth;
1320       height = newHeight;
1321       srcRowStride = comps * newWidth;
1322    }
1323
1324    data = (const GLubyte *) source;
1325    destRowStride = (destRowStride - width * 2) / 4;
1326    for (y = 0; y < height; y += 4) {
1327       GLuint offs = 0 + (y + 0) * srcRowStride;
1328       for (x = 0; x < width; x += 8) {
1329          const GLubyte *lines[4];
1330          lines[0] = &data[offs];
1331          lines[1] = lines[0] + srcRowStride;
1332          lines[2] = lines[1] + srcRowStride;
1333          lines[3] = lines[2] + srcRowStride;
1334          offs += 8 * comps;
1335          fxt1_quantize(encoded, lines, comps);
1336          /* 128 bits per 8x4 block */
1337          encoded += 4;
1338       }
1339       encoded += destRowStride;
1340    }
1341
1342  cleanUp:
1343    free(newSource);
1344 }
1345
1346
1347 /***************************************************************************\
1348  * FXT1 decoder
1349  *
1350  * The decoder is based on GL_3DFX_texture_compression_FXT1
1351  * specification and serves as a concept for the encoder.
1352 \***************************************************************************/
1353
1354
1355 /* lookup table for scaling 5 bit colors up to 8 bits */
1356 static const GLubyte _rgb_scale_5[] = {
1357    0,   8,   16,  25,  33,  41,  49,  58,
1358    66,  74,  82,  90,  99,  107, 115, 123,
1359    132, 140, 148, 156, 165, 173, 181, 189,
1360    197, 206, 214, 222, 230, 239, 247, 255
1361 };
1362
1363 /* lookup table for scaling 6 bit colors up to 8 bits */
1364 static const GLubyte _rgb_scale_6[] = {
1365    0,   4,   8,   12,  16,  20,  24,  28,
1366    32,  36,  40,  45,  49,  53,  57,  61,
1367    65,  69,  73,  77,  81,  85,  89,  93,
1368    97,  101, 105, 109, 113, 117, 121, 125,
1369    130, 134, 138, 142, 146, 150, 154, 158,
1370    162, 166, 170, 174, 178, 182, 186, 190,
1371    194, 198, 202, 206, 210, 215, 219, 223,
1372    227, 231, 235, 239, 243, 247, 251, 255
1373 };
1374
1375
1376 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1377 #define UP5(c) _rgb_scale_5[(c) & 31]
1378 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1379 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1380
1381
1382 static void
1383 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1384 {
1385    const GLuint *cc;
1386
1387    t *= 3;
1388    cc = (const GLuint *)(code + t / 8);
1389    t = (cc[0] >> (t & 7)) & 7;
1390
1391    if (t == 7) {
1392       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1393    } else {
1394       GLubyte r, g, b;
1395       cc = (const GLuint *)(code + 12);
1396       if (t == 0) {
1397          b = UP5(CC_SEL(cc, 0));
1398          g = UP5(CC_SEL(cc, 5));
1399          r = UP5(CC_SEL(cc, 10));
1400       } else if (t == 6) {
1401          b = UP5(CC_SEL(cc, 15));
1402          g = UP5(CC_SEL(cc, 20));
1403          r = UP5(CC_SEL(cc, 25));
1404       } else {
1405          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1406          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1407          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1408       }
1409       rgba[RCOMP] = r;
1410       rgba[GCOMP] = g;
1411       rgba[BCOMP] = b;
1412       rgba[ACOMP] = 255;
1413    }
1414 }
1415
1416
1417 static void
1418 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1419 {
1420    const GLuint *cc;
1421    GLuint kk;
1422
1423    cc = (const GLuint *)code;
1424    if (t & 16) {
1425       cc++;
1426       t &= 15;
1427    }
1428    t = (cc[0] >> (t * 2)) & 3;
1429
1430    t *= 15;
1431    cc = (const GLuint *)(code + 8 + t / 8);
1432    kk = cc[0] >> (t & 7);
1433    rgba[BCOMP] = UP5(kk);
1434    rgba[GCOMP] = UP5(kk >> 5);
1435    rgba[RCOMP] = UP5(kk >> 10);
1436    rgba[ACOMP] = 255;
1437 }
1438
1439
1440 static void
1441 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1442 {
1443    const GLuint *cc;
1444    GLuint col[2][3];
1445    GLint glsb, selb;
1446
1447    cc = (const GLuint *)code;
1448    if (t & 16) {
1449       t &= 15;
1450       t = (cc[1] >> (t * 2)) & 3;
1451       /* col 2 */
1452       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1453       col[0][GCOMP] = CC_SEL(cc, 99);
1454       col[0][RCOMP] = CC_SEL(cc, 104);
1455       /* col 3 */
1456       col[1][BCOMP] = CC_SEL(cc, 109);
1457       col[1][GCOMP] = CC_SEL(cc, 114);
1458       col[1][RCOMP] = CC_SEL(cc, 119);
1459       glsb = CC_SEL(cc, 126);
1460       selb = CC_SEL(cc, 33);
1461    } else {
1462       t = (cc[0] >> (t * 2)) & 3;
1463       /* col 0 */
1464       col[0][BCOMP] = CC_SEL(cc, 64);
1465       col[0][GCOMP] = CC_SEL(cc, 69);
1466       col[0][RCOMP] = CC_SEL(cc, 74);
1467       /* col 1 */
1468       col[1][BCOMP] = CC_SEL(cc, 79);
1469       col[1][GCOMP] = CC_SEL(cc, 84);
1470       col[1][RCOMP] = CC_SEL(cc, 89);
1471       glsb = CC_SEL(cc, 125);
1472       selb = CC_SEL(cc, 1);
1473    }
1474
1475    if (CC_SEL(cc, 124) & 1) {
1476       /* alpha[0] == 1 */
1477
1478       if (t == 3) {
1479          /* zero */
1480          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1481       } else {
1482          GLubyte r, g, b;
1483          if (t == 0) {
1484             b = UP5(col[0][BCOMP]);
1485             g = UP5(col[0][GCOMP]);
1486             r = UP5(col[0][RCOMP]);
1487          } else if (t == 2) {
1488             b = UP5(col[1][BCOMP]);
1489             g = UP6(col[1][GCOMP], glsb);
1490             r = UP5(col[1][RCOMP]);
1491          } else {
1492             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1493             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1494             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1495          }
1496          rgba[RCOMP] = r;
1497          rgba[GCOMP] = g;
1498          rgba[BCOMP] = b;
1499          rgba[ACOMP] = 255;
1500       }
1501    } else {
1502       /* alpha[0] == 0 */
1503       GLubyte r, g, b;
1504       if (t == 0) {
1505          b = UP5(col[0][BCOMP]);
1506          g = UP6(col[0][GCOMP], glsb ^ selb);
1507          r = UP5(col[0][RCOMP]);
1508       } else if (t == 3) {
1509          b = UP5(col[1][BCOMP]);
1510          g = UP6(col[1][GCOMP], glsb);
1511          r = UP5(col[1][RCOMP]);
1512       } else {
1513          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1514          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1515                         UP6(col[1][GCOMP], glsb));
1516          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1517       }
1518       rgba[RCOMP] = r;
1519       rgba[GCOMP] = g;
1520       rgba[BCOMP] = b;
1521       rgba[ACOMP] = 255;
1522    }
1523 }
1524
1525
1526 static void
1527 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1528 {
1529    const GLuint *cc;
1530    GLubyte r, g, b, a;
1531
1532    cc = (const GLuint *)code;
1533    if (CC_SEL(cc, 124) & 1) {
1534       /* lerp == 1 */
1535       GLuint col0[4];
1536
1537       if (t & 16) {
1538          t &= 15;
1539          t = (cc[1] >> (t * 2)) & 3;
1540          /* col 2 */
1541          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1542          col0[GCOMP] = CC_SEL(cc, 99);
1543          col0[RCOMP] = CC_SEL(cc, 104);
1544          col0[ACOMP] = CC_SEL(cc, 119);
1545       } else {
1546          t = (cc[0] >> (t * 2)) & 3;
1547          /* col 0 */
1548          col0[BCOMP] = CC_SEL(cc, 64);
1549          col0[GCOMP] = CC_SEL(cc, 69);
1550          col0[RCOMP] = CC_SEL(cc, 74);
1551          col0[ACOMP] = CC_SEL(cc, 109);
1552       }
1553
1554       if (t == 0) {
1555          b = UP5(col0[BCOMP]);
1556          g = UP5(col0[GCOMP]);
1557          r = UP5(col0[RCOMP]);
1558          a = UP5(col0[ACOMP]);
1559       } else if (t == 3) {
1560          b = UP5(CC_SEL(cc, 79));
1561          g = UP5(CC_SEL(cc, 84));
1562          r = UP5(CC_SEL(cc, 89));
1563          a = UP5(CC_SEL(cc, 114));
1564       } else {
1565          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1566          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1567          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1568          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1569       }
1570    } else {
1571       /* lerp == 0 */
1572
1573       if (t & 16) {
1574          cc++;
1575          t &= 15;
1576       }
1577       t = (cc[0] >> (t * 2)) & 3;
1578
1579       if (t == 3) {
1580          /* zero */
1581          r = g = b = a = 0;
1582       } else {
1583          GLuint kk;
1584          cc = (const GLuint *)code;
1585          a = UP5(cc[3] >> (t * 5 + 13));
1586          t *= 15;
1587          cc = (const GLuint *)(code + 8 + t / 8);
1588          kk = cc[0] >> (t & 7);
1589          b = UP5(kk);
1590          g = UP5(kk >> 5);
1591          r = UP5(kk >> 10);
1592       }
1593    }
1594    rgba[RCOMP] = r;
1595    rgba[GCOMP] = g;
1596    rgba[BCOMP] = b;
1597    rgba[ACOMP] = a;
1598 }
1599
1600
1601 static void
1602 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1603                GLint i, GLint j, GLubyte *rgba)
1604 {
1605    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1606       fxt1_decode_1HI,     /* cc-high   = "00?" */
1607       fxt1_decode_1HI,     /* cc-high   = "00?" */
1608       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1609       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1610       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1611       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1612       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1613       fxt1_decode_1MIXED   /* mixed     = "1??" */
1614    };
1615
1616    const GLubyte *code = (const GLubyte *)texture +
1617                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1618    GLint mode = CC_SEL(code, 125);
1619    GLint t = i & 7;
1620
1621    if (t & 4) {
1622       t += 12;
1623    }
1624    t += (j & 3) * 4;
1625
1626    decode_1[mode](code, t, rgba);
1627 }
1628
1629
1630
1631
1632 static void
1633 fetch_rgb_fxt1(const GLubyte *map,
1634                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1635 {
1636    GLubyte rgba[4];
1637    fxt1_decode_1(map, rowStride, i, j, rgba);
1638    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1639    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1640    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1641    texel[ACOMP] = 1.0F;
1642 }
1643
1644
1645 static void
1646 fetch_rgba_fxt1(const GLubyte *map,
1647                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1648 {
1649    GLubyte rgba[4];
1650    fxt1_decode_1(map, rowStride, i, j, rgba);
1651    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1652    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1653    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1654    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1655 }
1656
1657
1658 compressed_fetch_func
1659 _mesa_get_fxt_fetch_func(mesa_format format)
1660 {
1661    switch (format) {
1662    case MESA_FORMAT_RGB_FXT1:
1663       return fetch_rgb_fxt1;
1664    case MESA_FORMAT_RGBA_FXT1:
1665       return fetch_rgba_fxt1;
1666    default:
1667       return NULL;
1668    }
1669 }