src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.1
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "mipmap.h"
  39 #include "texcompress.h"
  40 #include "texcompress_fxt1.h"
  41 #include "texformat.h"
  42 #include "texstore.h"
  43
  44
  45 static void
  46 fxt1_encode (GLuint width, GLuint height, GLint comps,
  47              const void *source, GLint srcRowStride,
  48              void *dest, GLint destRowStride);
  49
  50 void
  51 fxt1_decode_1 (const void *texture, GLint stride,
  52                GLint i, GLint j, GLchan *rgba);
  53
  54
  55 /**
  56  * Called during context initialization.
  57  */
  58 void
  59 _mesa_init_texture_fxt1( GLcontext *ctx )
  60 {
  61    (void) ctx;
  62 }
  63
  64
  65 /**
  66  * Store user's image in rgb_fxt1 format.
  67  */
  68 GLboolean
  69 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
  70 {
  71    const GLchan *pixels;
  72    GLint srcRowStride;
  73    GLubyte *dst;
  74    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  75    const GLchan *tempImage = NULL;
  76
  77    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
  78    ASSERT(dstXoffset % 8 == 0);
  79    ASSERT(dstYoffset % 4 == 0);
  80    ASSERT(dstZoffset     == 0);
  81    (void) dstZoffset;
  82    (void) dstImageOffsets;
  83
  84    if (srcFormat != GL_RGB ||
  85        srcType != CHAN_TYPE ||
  86        ctx->_ImageTransferState ||
  87        srcPacking->SwapBytes) {
  88       /* convert image to RGB/GLchan */
  89       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  90                                              baseInternalFormat,
  91                                              _mesa_get_format_base_format(dstFormat),
  92                                              srcWidth, srcHeight, srcDepth,
  93                                              srcFormat, srcType, srcAddr,
  94                                              srcPacking);
  95       if (!tempImage)
  96          return GL_FALSE; /* out of memory */
  97       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  98       pixels = tempImage;
  99       srcRowStride = 3 * srcWidth;
 100       srcFormat = GL_RGB;
 101    }
 102    else {
 103       pixels = (const GLchan *) srcAddr;
 104       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 105                                             srcType) / sizeof(GLchan);
 106    }
 107
 108    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 109                                         dstFormat,
 110                                         texWidth, (GLubyte *) dstAddr);
 111
 112    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
 113                dst, dstRowStride);
 114
 115    if (tempImage)
 116       _mesa_free((void*) tempImage);
 117
 118    return GL_TRUE;
 119 }
 120
 121
 122 /**
 123  * Store user's image in rgba_fxt1 format.
 124  */
 125 GLboolean
 126 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
 127 {
 128    const GLchan *pixels;
 129    GLint srcRowStride;
 130    GLubyte *dst;
 131    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 132    const GLchan *tempImage = NULL;
 133
 134    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
 135    ASSERT(dstXoffset % 8 == 0);
 136    ASSERT(dstYoffset % 4 == 0);
 137    ASSERT(dstZoffset     == 0);
 138    (void) dstZoffset;
 139    (void) dstImageOffsets;
 140
 141    if (srcFormat != GL_RGBA ||
 142        srcType != CHAN_TYPE ||
 143        ctx->_ImageTransferState ||
 144        srcPacking->SwapBytes) {
 145       /* convert image to RGBA/GLchan */
 146       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 147                                              baseInternalFormat,
 148                                              _mesa_get_format_base_format(dstFormat),
 149                                              srcWidth, srcHeight, srcDepth,
 150                                              srcFormat, srcType, srcAddr,
 151                                              srcPacking);
 152       if (!tempImage)
 153          return GL_FALSE; /* out of memory */
 154       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 155       pixels = tempImage;
 156       srcRowStride = 4 * srcWidth;
 157       srcFormat = GL_RGBA;
 158    }
 159    else {
 160       pixels = (const GLchan *) srcAddr;
 161       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 162                                             srcType) / sizeof(GLchan);
 163    }
 164
 165    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 166                                         dstFormat,
 167                                         texWidth, (GLubyte *) dstAddr);
 168
 169    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
 170                dst, dstRowStride);
 171
 172    if (tempImage)
 173       _mesa_free((void*) tempImage);
 174
 175    return GL_TRUE;
 176 }
 177
 178
 179 void
 180 _mesa_fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 181                                 GLint i, GLint j, GLint k, GLchan *texel )
 182 {
 183    (void) k;
 184    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 185 }
 186
 187
 188 void
 189 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 190                                   GLint i, GLint j, GLint k, GLfloat *texel )
 191 {
 192    /* just sample as GLchan and convert to float here */
 193    GLchan rgba[4];
 194    (void) k;
 195    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 196    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 197    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 198    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 199    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 200 }
 201
 202
 203 void
 204 _mesa_fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 205                                GLint i, GLint j, GLint k, GLchan *texel )
 206 {
 207    (void) k;
 208    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 209    texel[ACOMP] = 255;
 210 }
 211
 212
 213 void
 214 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 215                                  GLint i, GLint j, GLint k, GLfloat *texel )
 216 {
 217    /* just sample as GLchan and convert to float here */
 218    GLchan rgba[4];
 219    (void) k;
 220    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 221    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 222    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 223    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 224    texel[ACOMP] = 1.0F;
 225 }
 226
 227
 228
 229 /***************************************************************************\
 230  * FXT1 encoder
 231  *
 232  * The encoder was built by reversing the decoder,
 233  * and is vaguely based on Texus2 by 3dfx. Note that this code
 234  * is merely a proof of concept, since it is highly UNoptimized;
 235  * moreover, it is sub-optimal due to initial conditions passed
 236  * to Lloyd's algorithm (the interpolation modes are even worse).
 237 \***************************************************************************/
 238
 239
 240 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 241 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 242 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 243 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 244 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 245 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 246 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 247 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
 248
 249
 250 /*
 251  * Define a 64-bit unsigned integer type and macros
 252  */
 253 #if 1
 254
 255 #define FX64_NATIVE 1
 256
 257 typedef uint64_t Fx64;
 258
 259 #define FX64_MOV32(a, b) a = b
 260 #define FX64_OR32(a, b)  a |= b
 261 #define FX64_SHL(a, c)   a <<= c
 262
 263 #else
 264
 265 #define FX64_NATIVE 0
 266
 267 typedef struct {
 268    GLuint lo, hi;
 269 } Fx64;
 270
 271 #define FX64_MOV32(a, b) a.lo = b
 272 #define FX64_OR32(a, b)  a.lo |= b
 273
 274 #define FX64_SHL(a, c)                                 \
 275    do {                                                \
 276        if ((c) >= 32) {                                \
 277           a.hi = a.lo << ((c) - 32);                   \
 278           a.lo = 0;                                    \
 279        } else {                                        \
 280           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 281           a.lo <<= (c);                                \
 282        }                                               \
 283    } while (0)
 284
 285 #endif
 286
 287
 288 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
 289 #define SAFECDOT 1 /* for paranoids */
 290
 291 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
 292    do {                                  \
 293       /* compute interpolation vector */ \
 294       GLfloat d2 = 0.0F;                 \
 295       GLfloat rd2;                       \
 296                                          \
 297       for (i = 0; i < NC; i++) {         \
 298          IV[i] = (V1[i] - V0[i]) * F(i); \
 299          d2 += IV[i] * IV[i];            \
 300       }                                  \
 301       rd2 = (GLfloat)NV / d2;            \
 302       B = 0;                             \
 303       for (i = 0; i < NC; i++) {         \
 304          IV[i] *= F(i);                  \
 305          B -= IV[i] * V0[i];             \
 306          IV[i] *= rd2;                   \
 307       }                                  \
 308       B = B * rd2 + 0.5f;                \
 309    } while (0)
 310
 311 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
 312    do {                                  \
 313       GLfloat dot = 0.0F;                \
 314       for (i = 0; i < NC; i++) {         \
 315          dot += V[i] * IV[i];            \
 316       }                                  \
 317       TEXEL = (GLint)(dot + B);          \
 318       if (SAFECDOT) {                    \
 319          if (TEXEL < 0) {                \
 320             TEXEL = 0;                   \
 321          } else if (TEXEL > NV) {        \
 322             TEXEL = NV;                  \
 323          }                               \
 324       }                                  \
 325    } while (0)
 326
 327
 328 static GLint
 329 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
 330               GLubyte input[MAX_COMP], GLint nc)
 331 {
 332    GLint i, j, best = -1;
 333    GLfloat err = 1e9; /* big enough */
 334
 335    for (j = 0; j < nv; j++) {
 336       GLfloat e = 0.0F;
 337       for (i = 0; i < nc; i++) {
 338          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 339       }
 340       if (e < err) {
 341          err = e;
 342          best = j;
 343       }
 344    }
 345
 346    return best;
 347 }
 348
 349
 350 static GLint
 351 fxt1_worst (GLfloat vec[MAX_COMP],
 352             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 353 {
 354    GLint i, k, worst = -1;
 355    GLfloat err = -1.0F; /* small enough */
 356
 357    for (k = 0; k < n; k++) {
 358       GLfloat e = 0.0F;
 359       for (i = 0; i < nc; i++) {
 360          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 361       }
 362       if (e > err) {
 363          err = e;
 364          worst = k;
 365       }
 366    }
 367
 368    return worst;
 369 }
 370
 371
 372 static GLint
 373 fxt1_variance (GLdouble variance[MAX_COMP],
 374                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 375 {
 376    GLint i, k, best = 0;
 377    GLint sx, sx2;
 378    GLdouble var, maxvar = -1; /* small enough */
 379    GLdouble teenth = 1.0 / n;
 380
 381    for (i = 0; i < nc; i++) {
 382       sx = sx2 = 0;
 383       for (k = 0; k < n; k++) {
 384          GLint t = input[k][i];
 385          sx += t;
 386          sx2 += t * t;
 387       }
 388       var = sx2 * teenth - sx * sx * teenth * teenth;
 389       if (maxvar < var) {
 390          maxvar = var;
 391          best = i;
 392       }
 393       if (variance) {
 394          variance[i] = var;
 395       }
 396    }
 397
 398    return best;
 399 }
 400
 401
 402 static GLint
 403 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
 404              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 405 {
 406 #if 0
 407    /* Choose colors from a grid.
 408     */
 409    GLint i, j;
 410
 411    for (j = 0; j < nv; j++) {
 412       GLint m = j * (n - 1) / (nv - 1);
 413       for (i = 0; i < nc; i++) {
 414          vec[j][i] = input[m][i];
 415       }
 416    }
 417 #else
 418    /* Our solution here is to find the darkest and brightest colors in
 419     * the 8x4 tile and use those as the two representative colors.
 420     * There are probably better algorithms to use (histogram-based).
 421     */
 422    GLint i, j, k;
 423    GLint minSum = 2000; /* big enough */
 424    GLint maxSum = -1; /* small enough */
 425    GLint minCol = 0; /* phoudoin: silent compiler! */
 426    GLint maxCol = 0; /* phoudoin: silent compiler! */
 427
 428    struct {
 429       GLint flag;
 430       GLint key;
 431       GLint freq;
 432       GLint idx;
 433    } hist[N_TEXELS];
 434    GLint lenh = 0;
 435
 436    _mesa_memset(hist, 0, sizeof(hist));
 437
 438    for (k = 0; k < n; k++) {
 439       GLint l;
 440       GLint key = 0;
 441       GLint sum = 0;
 442       for (i = 0; i < nc; i++) {
 443          key <<= 8;
 444          key |= input[k][i];
 445          sum += input[k][i];
 446       }
 447       for (l = 0; l < n; l++) {
 448          if (!hist[l].flag) {
 449             /* alloc new slot */
 450             hist[l].flag = !0;
 451             hist[l].key = key;
 452             hist[l].freq = 1;
 453             hist[l].idx = k;
 454             lenh = l + 1;
 455             break;
 456          } else if (hist[l].key == key) {
 457             hist[l].freq++;
 458             break;
 459          }
 460       }
 461       if (minSum > sum) {
 462          minSum = sum;
 463          minCol = k;
 464       }
 465       if (maxSum < sum) {
 466          maxSum = sum;
 467          maxCol = k;
 468       }
 469    }
 470
 471    if (lenh <= nv) {
 472       for (j = 0; j < lenh; j++) {
 473          for (i = 0; i < nc; i++) {
 474             vec[j][i] = (GLfloat)input[hist[j].idx][i];
 475          }
 476       }
 477       for (; j < nv; j++) {
 478          for (i = 0; i < nc; i++) {
 479             vec[j][i] = vec[0][i];
 480          }
 481       }
 482       return 0;
 483    }
 484
 485    for (j = 0; j < nv; j++) {
 486       for (i = 0; i < nc; i++) {
 487          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
 488       }
 489    }
 490 #endif
 491
 492    return !0;
 493 }
 494
 495
 496 static GLint
 497 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
 498             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
 499 {
 500    /* Use the generalized lloyd's algorithm for VQ:
 501     *     find 4 color vectors.
 502     *
 503     *     for each sample color
 504     *         sort to nearest vector.
 505     *
 506     *     replace each vector with the centroid of it's matching colors.
 507     *
 508     *     repeat until RMS doesn't improve.
 509     *
 510     *     if a color vector has no samples, or becomes the same as another
 511     *     vector, replace it with the color which is farthest from a sample.
 512     *
 513     * vec[][MAX_COMP]           initial vectors and resulting colors
 514     * nv                        number of resulting colors required
 515     * input[N_TEXELS][MAX_COMP] input texels
 516     * nc                        number of components in input / vec
 517     * n                         number of input samples
 518     */
 519
 520    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 521    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 522    GLfloat error, lasterror = 1e9;
 523
 524    GLint i, j, k, rep;
 525
 526    /* the quantizer */
 527    for (rep = 0; rep < LL_N_REP; rep++) {
 528       /* reset sums & counters */
 529       for (j = 0; j < nv; j++) {
 530          for (i = 0; i < nc; i++) {
 531             sum[j][i] = 0;
 532          }
 533          cnt[j] = 0;
 534       }
 535       error = 0;
 536
 537       /* scan whole block */
 538       for (k = 0; k < n; k++) {
 539 #if 1
 540          GLint best = -1;
 541          GLfloat err = 1e9; /* big enough */
 542          /* determine best vector */
 543          for (j = 0; j < nv; j++) {
 544             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 545                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 546                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 547             if (nc == 4) {
 548                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 549             }
 550             if (e < err) {
 551                err = e;
 552                best = j;
 553             }
 554          }
 555 #else
 556          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 557 #endif
 558          /* add in closest color */
 559          for (i = 0; i < nc; i++) {
 560             sum[best][i] += input[k][i];
 561          }
 562          /* mark this vector as used */
 563          cnt[best]++;
 564          /* accumulate error */
 565          error += err;
 566       }
 567
 568       /* check RMS */
 569       if ((error < LL_RMS_E) ||
 570           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 571          return !0; /* good match */
 572       }
 573       lasterror = error;
 574
 575       /* move each vector to the barycenter of its closest colors */
 576       for (j = 0; j < nv; j++) {
 577          if (cnt[j]) {
 578             GLfloat div = 1.0F / cnt[j];
 579             for (i = 0; i < nc; i++) {
 580                vec[j][i] = div * sum[j][i];
 581             }
 582          } else {
 583             /* this vec has no samples or is identical with a previous vec */
 584             GLint worst = fxt1_worst(vec[j], input, nc, n);
 585             for (i = 0; i < nc; i++) {
 586                vec[j][i] = input[worst][i];
 587             }
 588          }
 589       }
 590    }
 591
 592    return 0; /* could not converge fast enough */
 593 }
 594
 595
 596 static void
 597 fxt1_quantize_CHROMA (GLuint *cc,
 598                       GLubyte input[N_TEXELS][MAX_COMP])
 599 {
 600    const GLint n_vect = 4; /* 4 base vectors to find */
 601    const GLint n_comp = 3; /* 3 components: R, G, B */
 602    GLfloat vec[MAX_VECT][MAX_COMP];
 603    GLint i, j, k;
 604    Fx64 hi; /* high quadword */
 605    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 606
 607    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 608       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 609    }
 610
 611    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 612    for (j = n_vect - 1; j >= 0; j--) {
 613       for (i = 0; i < n_comp; i++) {
 614          /* add in colors */
 615          FX64_SHL(hi, 5);
 616          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 617       }
 618    }
 619    ((Fx64 *)cc)[1] = hi;
 620
 621    lohi = lolo = 0;
 622    /* right microtile */
 623    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 624       lohi <<= 2;
 625       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 626    }
 627    /* left microtile */
 628    for (; k >= 0; k--) {
 629       lolo <<= 2;
 630       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 631    }
 632    cc[1] = lohi;
 633    cc[0] = lolo;
 634 }
 635
 636
 637 static void
 638 fxt1_quantize_ALPHA0 (GLuint *cc,
 639                       GLubyte input[N_TEXELS][MAX_COMP],
 640                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 641 {
 642    const GLint n_vect = 3; /* 3 base vectors to find */
 643    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 644    GLfloat vec[MAX_VECT][MAX_COMP];
 645    GLint i, j, k;
 646    Fx64 hi; /* high quadword */
 647    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 648
 649    /* the last vector indicates zero */
 650    for (i = 0; i < n_comp; i++) {
 651       vec[n_vect][i] = 0;
 652    }
 653
 654    /* the first n texels in reord are guaranteed to be non-zero */
 655    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 656       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 657    }
 658
 659    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 660    for (j = n_vect - 1; j >= 0; j--) {
 661       /* add in alphas */
 662       FX64_SHL(hi, 5);
 663       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 664    }
 665    for (j = n_vect - 1; j >= 0; j--) {
 666       for (i = 0; i < n_comp - 1; i++) {
 667          /* add in colors */
 668          FX64_SHL(hi, 5);
 669          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 670       }
 671    }
 672    ((Fx64 *)cc)[1] = hi;
 673
 674    lohi = lolo = 0;
 675    /* right microtile */
 676    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 677       lohi <<= 2;
 678       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 679    }
 680    /* left microtile */
 681    for (; k >= 0; k--) {
 682       lolo <<= 2;
 683       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 684    }
 685    cc[1] = lohi;
 686    cc[0] = lolo;
 687 }
 688
 689
 690 static void
 691 fxt1_quantize_ALPHA1 (GLuint *cc,
 692                       GLubyte input[N_TEXELS][MAX_COMP])
 693 {
 694    const GLint n_vect = 3; /* highest vector number in each microtile */
 695    const GLint n_comp = 4; /* 4 components: R, G, B, A */
 696    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 697    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 698    GLint i, j, k;
 699    Fx64 hi; /* high quadword */
 700    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 701
 702    GLint minSum;
 703    GLint maxSum;
 704    GLint minColL = 0, maxColL = 0;
 705    GLint minColR = 0, maxColR = 0;
 706    GLint sumL = 0, sumR = 0;
 707    GLint nn_comp;
 708    /* Our solution here is to find the darkest and brightest colors in
 709     * the 4x4 tile and use those as the two representative colors.
 710     * There are probably better algorithms to use (histogram-based).
 711     */
 712    nn_comp = n_comp;
 713    while ((minColL == maxColL) && nn_comp) {
 714        minSum = 2000; /* big enough */
 715        maxSum = -1; /* small enough */
 716        for (k = 0; k < N_TEXELS / 2; k++) {
 717            GLint sum = 0;
 718            for (i = 0; i < nn_comp; i++) {
 719                sum += input[k][i];
 720            }
 721            if (minSum > sum) {
 722                minSum = sum;
 723                minColL = k;
 724            }
 725            if (maxSum < sum) {
 726                maxSum = sum;
 727                maxColL = k;
 728            }
 729            sumL += sum;
 730        }
 731
 732        nn_comp--;
 733    }
 734
 735    nn_comp = n_comp;
 736    while ((minColR == maxColR) && nn_comp) {
 737        minSum = 2000; /* big enough */
 738        maxSum = -1; /* small enough */
 739        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 740            GLint sum = 0;
 741            for (i = 0; i < nn_comp; i++) {
 742                sum += input[k][i];
 743            }
 744            if (minSum > sum) {
 745                minSum = sum;
 746                minColR = k;
 747            }
 748            if (maxSum < sum) {
 749                maxSum = sum;
 750                maxColR = k;
 751            }
 752            sumR += sum;
 753        }
 754
 755        nn_comp--;
 756    }
 757
 758    /* choose the common vector (yuck!) */
 759    {
 760       GLint j1, j2;
 761       GLint v1 = 0, v2 = 0;
 762       GLfloat err = 1e9; /* big enough */
 763       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 764       for (i = 0; i < n_comp; i++) {
 765          tv[0][i] = input[minColL][i];
 766          tv[1][i] = input[maxColL][i];
 767          tv[2][i] = input[minColR][i];
 768          tv[3][i] = input[maxColR][i];
 769       }
 770       for (j1 = 0; j1 < 2; j1++) {
 771          for (j2 = 2; j2 < 4; j2++) {
 772             GLfloat e = 0.0F;
 773             for (i = 0; i < n_comp; i++) {
 774                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 775             }
 776             if (e < err) {
 777                err = e;
 778                v1 = j1;
 779                v2 = j2;
 780             }
 781          }
 782       }
 783       for (i = 0; i < n_comp; i++) {
 784          vec[0][i] = tv[1 - v1][i];
 785          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 786          vec[2][i] = tv[5 - v2][i];
 787       }
 788    }
 789
 790    /* left microtile */
 791    cc[0] = 0;
 792    if (minColL != maxColL) {
 793       /* compute interpolation vector */
 794       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 795
 796       /* add in texels */
 797       lolo = 0;
 798       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 799          GLint texel;
 800          /* interpolate color */
 801          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 802          /* add in texel */
 803          lolo <<= 2;
 804          lolo |= texel;
 805       }
 806
 807       cc[0] = lolo;
 808    }
 809
 810    /* right microtile */
 811    cc[1] = 0;
 812    if (minColR != maxColR) {
 813       /* compute interpolation vector */
 814       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 815
 816       /* add in texels */
 817       lohi = 0;
 818       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 819          GLint texel;
 820          /* interpolate color */
 821          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 822          /* add in texel */
 823          lohi <<= 2;
 824          lohi |= texel;
 825       }
 826
 827       cc[1] = lohi;
 828    }
 829
 830    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 831    for (j = n_vect - 1; j >= 0; j--) {
 832       /* add in alphas */
 833       FX64_SHL(hi, 5);
 834       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
 835    }
 836    for (j = n_vect - 1; j >= 0; j--) {
 837       for (i = 0; i < n_comp - 1; i++) {
 838          /* add in colors */
 839          FX64_SHL(hi, 5);
 840          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
 841       }
 842    }
 843    ((Fx64 *)cc)[1] = hi;
 844 }
 845
 846
 847 static void
 848 fxt1_quantize_HI (GLuint *cc,
 849                   GLubyte input[N_TEXELS][MAX_COMP],
 850                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
 851 {
 852    const GLint n_vect = 6; /* highest vector number */
 853    const GLint n_comp = 3; /* 3 components: R, G, B */
 854    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
 855    GLfloat iv[MAX_COMP];   /* interpolation vector */
 856    GLint i, k;
 857    GLuint hihi; /* high quadword: hi dword */
 858
 859    GLint minSum = 2000; /* big enough */
 860    GLint maxSum = -1; /* small enough */
 861    GLint minCol = 0; /* phoudoin: silent compiler! */
 862    GLint maxCol = 0; /* phoudoin: silent compiler! */
 863
 864    /* Our solution here is to find the darkest and brightest colors in
 865     * the 8x4 tile and use those as the two representative colors.
 866     * There are probably better algorithms to use (histogram-based).
 867     */
 868    for (k = 0; k < n; k++) {
 869       GLint sum = 0;
 870       for (i = 0; i < n_comp; i++) {
 871          sum += reord[k][i];
 872       }
 873       if (minSum > sum) {
 874          minSum = sum;
 875          minCol = k;
 876       }
 877       if (maxSum < sum) {
 878          maxSum = sum;
 879          maxCol = k;
 880       }
 881    }
 882
 883    hihi = 0; /* cc-hi = "00" */
 884    for (i = 0; i < n_comp; i++) {
 885       /* add in colors */
 886       hihi <<= 5;
 887       hihi |= reord[maxCol][i] >> 3;
 888    }
 889    for (i = 0; i < n_comp; i++) {
 890       /* add in colors */
 891       hihi <<= 5;
 892       hihi |= reord[minCol][i] >> 3;
 893    }
 894    cc[3] = hihi;
 895    cc[0] = cc[1] = cc[2] = 0;
 896
 897    /* compute interpolation vector */
 898    if (minCol != maxCol) {
 899       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 900    }
 901
 902    /* add in texels */
 903    for (k = N_TEXELS - 1; k >= 0; k--) {
 904       GLint t = k * 3;
 905       GLuint *kk = (GLuint *)((char *)cc + t / 8);
 906       GLint texel = n_vect + 1; /* transparent black */
 907
 908       if (!ISTBLACK(input[k])) {
 909          if (minCol != maxCol) {
 910             /* interpolate color */
 911             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 912             /* add in texel */
 913             kk[0] |= texel << (t & 7);
 914          }
 915       } else {
 916          /* add in texel */
 917          kk[0] |= texel << (t & 7);
 918       }
 919    }
 920 }
 921
 922
 923 static void
 924 fxt1_quantize_MIXED1 (GLuint *cc,
 925                       GLubyte input[N_TEXELS][MAX_COMP])
 926 {
 927    const GLint n_vect = 2; /* highest vector number in each microtile */
 928    const GLint n_comp = 3; /* 3 components: R, G, B */
 929    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 930    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
 931    GLint i, j, k;
 932    Fx64 hi; /* high quadword */
 933    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
 934
 935    GLint minSum;
 936    GLint maxSum;
 937    GLint minColL = 0, maxColL = -1;
 938    GLint minColR = 0, maxColR = -1;
 939
 940    /* Our solution here is to find the darkest and brightest colors in
 941     * the 4x4 tile and use those as the two representative colors.
 942     * There are probably better algorithms to use (histogram-based).
 943     */
 944    minSum = 2000; /* big enough */
 945    maxSum = -1; /* small enough */
 946    for (k = 0; k < N_TEXELS / 2; k++) {
 947       if (!ISTBLACK(input[k])) {
 948          GLint sum = 0;
 949          for (i = 0; i < n_comp; i++) {
 950             sum += input[k][i];
 951          }
 952          if (minSum > sum) {
 953             minSum = sum;
 954             minColL = k;
 955          }
 956          if (maxSum < sum) {
 957             maxSum = sum;
 958             maxColL = k;
 959          }
 960       }
 961    }
 962    minSum = 2000; /* big enough */
 963    maxSum = -1; /* small enough */
 964    for (; k < N_TEXELS; k++) {
 965       if (!ISTBLACK(input[k])) {
 966          GLint sum = 0;
 967          for (i = 0; i < n_comp; i++) {
 968             sum += input[k][i];
 969          }
 970          if (minSum > sum) {
 971             minSum = sum;
 972             minColR = k;
 973          }
 974          if (maxSum < sum) {
 975             maxSum = sum;
 976             maxColR = k;
 977          }
 978       }
 979    }
 980
 981    /* left microtile */
 982    if (maxColL == -1) {
 983       /* all transparent black */
 984       cc[0] = ~0u;
 985       for (i = 0; i < n_comp; i++) {
 986          vec[0][i] = 0;
 987          vec[1][i] = 0;
 988       }
 989    } else {
 990       cc[0] = 0;
 991       for (i = 0; i < n_comp; i++) {
 992          vec[0][i] = input[minColL][i];
 993          vec[1][i] = input[maxColL][i];
 994       }
 995       if (minColL != maxColL) {
 996          /* compute interpolation vector */
 997          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 998
 999          /* add in texels */
1000          lolo = 0;
1001          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1002             GLint texel = n_vect + 1; /* transparent black */
1003             if (!ISTBLACK(input[k])) {
1004                /* interpolate color */
1005                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1006             }
1007             /* add in texel */
1008             lolo <<= 2;
1009             lolo |= texel;
1010          }
1011          cc[0] = lolo;
1012       }
1013    }
1014
1015    /* right microtile */
1016    if (maxColR == -1) {
1017       /* all transparent black */
1018       cc[1] = ~0u;
1019       for (i = 0; i < n_comp; i++) {
1020          vec[2][i] = 0;
1021          vec[3][i] = 0;
1022       }
1023    } else {
1024       cc[1] = 0;
1025       for (i = 0; i < n_comp; i++) {
1026          vec[2][i] = input[minColR][i];
1027          vec[3][i] = input[maxColR][i];
1028       }
1029       if (minColR != maxColR) {
1030          /* compute interpolation vector */
1031          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1032
1033          /* add in texels */
1034          lohi = 0;
1035          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1036             GLint texel = n_vect + 1; /* transparent black */
1037             if (!ISTBLACK(input[k])) {
1038                /* interpolate color */
1039                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1040             }
1041             /* add in texel */
1042             lohi <<= 2;
1043             lohi |= texel;
1044          }
1045          cc[1] = lohi;
1046       }
1047    }
1048
1049    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1050    for (j = 2 * 2 - 1; j >= 0; j--) {
1051       for (i = 0; i < n_comp; i++) {
1052          /* add in colors */
1053          FX64_SHL(hi, 5);
1054          FX64_OR32(hi, vec[j][i] >> 3);
1055       }
1056    }
1057    ((Fx64 *)cc)[1] = hi;
1058 }
1059
1060
1061 static void
1062 fxt1_quantize_MIXED0 (GLuint *cc,
1063                       GLubyte input[N_TEXELS][MAX_COMP])
1064 {
1065    const GLint n_vect = 3; /* highest vector number in each microtile */
1066    const GLint n_comp = 3; /* 3 components: R, G, B */
1067    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1068    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1069    GLint i, j, k;
1070    Fx64 hi; /* high quadword */
1071    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1072
1073    GLint minColL = 0, maxColL = 0;
1074    GLint minColR = 0, maxColR = 0;
1075 #if 0
1076    GLint minSum;
1077    GLint maxSum;
1078
1079    /* Our solution here is to find the darkest and brightest colors in
1080     * the 4x4 tile and use those as the two representative colors.
1081     * There are probably better algorithms to use (histogram-based).
1082     */
1083    minSum = 2000; /* big enough */
1084    maxSum = -1; /* small enough */
1085    for (k = 0; k < N_TEXELS / 2; k++) {
1086       GLint sum = 0;
1087       for (i = 0; i < n_comp; i++) {
1088          sum += input[k][i];
1089       }
1090       if (minSum > sum) {
1091          minSum = sum;
1092          minColL = k;
1093       }
1094       if (maxSum < sum) {
1095          maxSum = sum;
1096          maxColL = k;
1097       }
1098    }
1099    minSum = 2000; /* big enough */
1100    maxSum = -1; /* small enough */
1101    for (; k < N_TEXELS; k++) {
1102       GLint sum = 0;
1103       for (i = 0; i < n_comp; i++) {
1104          sum += input[k][i];
1105       }
1106       if (minSum > sum) {
1107          minSum = sum;
1108          minColR = k;
1109       }
1110       if (maxSum < sum) {
1111          maxSum = sum;
1112          maxColR = k;
1113       }
1114    }
1115 #else
1116    GLint minVal;
1117    GLint maxVal;
1118    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1119    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1120
1121    /* Scan the channel with max variance for lo & hi
1122     * and use those as the two representative colors.
1123     */
1124    minVal = 2000; /* big enough */
1125    maxVal = -1; /* small enough */
1126    for (k = 0; k < N_TEXELS / 2; k++) {
1127       GLint t = input[k][maxVarL];
1128       if (minVal > t) {
1129          minVal = t;
1130          minColL = k;
1131       }
1132       if (maxVal < t) {
1133          maxVal = t;
1134          maxColL = k;
1135       }
1136    }
1137    minVal = 2000; /* big enough */
1138    maxVal = -1; /* small enough */
1139    for (; k < N_TEXELS; k++) {
1140       GLint t = input[k][maxVarR];
1141       if (minVal > t) {
1142          minVal = t;
1143          minColR = k;
1144       }
1145       if (maxVal < t) {
1146          maxVal = t;
1147          maxColR = k;
1148       }
1149    }
1150 #endif
1151
1152    /* left microtile */
1153    cc[0] = 0;
1154    for (i = 0; i < n_comp; i++) {
1155       vec[0][i] = input[minColL][i];
1156       vec[1][i] = input[maxColL][i];
1157    }
1158    if (minColL != maxColL) {
1159       /* compute interpolation vector */
1160       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1161
1162       /* add in texels */
1163       lolo = 0;
1164       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1165          GLint texel;
1166          /* interpolate color */
1167          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1168          /* add in texel */
1169          lolo <<= 2;
1170          lolo |= texel;
1171       }
1172
1173       /* funky encoding for LSB of green */
1174       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1175          for (i = 0; i < n_comp; i++) {
1176             vec[1][i] = input[minColL][i];
1177             vec[0][i] = input[maxColL][i];
1178          }
1179          lolo = ~lolo;
1180       }
1181
1182       cc[0] = lolo;
1183    }
1184
1185    /* right microtile */
1186    cc[1] = 0;
1187    for (i = 0; i < n_comp; i++) {
1188       vec[2][i] = input[minColR][i];
1189       vec[3][i] = input[maxColR][i];
1190    }
1191    if (minColR != maxColR) {
1192       /* compute interpolation vector */
1193       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1194
1195       /* add in texels */
1196       lohi = 0;
1197       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1198          GLint texel;
1199          /* interpolate color */
1200          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1201          /* add in texel */
1202          lohi <<= 2;
1203          lohi |= texel;
1204       }
1205
1206       /* funky encoding for LSB of green */
1207       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1208          for (i = 0; i < n_comp; i++) {
1209             vec[3][i] = input[minColR][i];
1210             vec[2][i] = input[maxColR][i];
1211          }
1212          lohi = ~lohi;
1213       }
1214
1215       cc[1] = lohi;
1216    }
1217
1218    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1219    for (j = 2 * 2 - 1; j >= 0; j--) {
1220       for (i = 0; i < n_comp; i++) {
1221          /* add in colors */
1222          FX64_SHL(hi, 5);
1223          FX64_OR32(hi, vec[j][i] >> 3);
1224       }
1225    }
1226    ((Fx64 *)cc)[1] = hi;
1227 }
1228
1229
1230 static void
1231 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1232 {
1233    GLint trualpha;
1234    GLubyte reord[N_TEXELS][MAX_COMP];
1235
1236    GLubyte input[N_TEXELS][MAX_COMP];
1237    GLint i, k, l;
1238
1239    if (comps == 3) {
1240       /* make the whole block opaque */
1241       _mesa_memset(input, -1, sizeof(input));
1242    }
1243
1244    /* 8 texels each line */
1245    for (l = 0; l < 4; l++) {
1246       for (k = 0; k < 4; k++) {
1247          for (i = 0; i < comps; i++) {
1248             input[k + l * 4][i] = *lines[l]++;
1249          }
1250       }
1251       for (; k < 8; k++) {
1252          for (i = 0; i < comps; i++) {
1253             input[k + l * 4 + 12][i] = *lines[l]++;
1254          }
1255       }
1256    }
1257
1258    /* block layout:
1259     * 00, 01, 02, 03, 08, 09, 0a, 0b
1260     * 10, 11, 12, 13, 18, 19, 1a, 1b
1261     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1262     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1263     */
1264
1265    /* [dBorca]
1266     * stupidity flows forth from this
1267     */
1268    l = N_TEXELS;
1269    trualpha = 0;
1270    if (comps == 4) {
1271       /* skip all transparent black texels */
1272       l = 0;
1273       for (k = 0; k < N_TEXELS; k++) {
1274          /* test all components against 0 */
1275          if (!ISTBLACK(input[k])) {
1276             /* texel is not transparent black */
1277             COPY_4UBV(reord[l], input[k]);
1278             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1279                /* non-opaque texel */
1280                trualpha = !0;
1281             }
1282             l++;
1283          }
1284       }
1285    }
1286
1287 #if 0
1288    if (trualpha) {
1289       fxt1_quantize_ALPHA0(cc, input, reord, l);
1290    } else if (l == 0) {
1291       cc[0] = cc[1] = cc[2] = -1;
1292       cc[3] = 0;
1293    } else if (l < N_TEXELS) {
1294       fxt1_quantize_HI(cc, input, reord, l);
1295    } else {
1296       fxt1_quantize_CHROMA(cc, input);
1297    }
1298    (void)fxt1_quantize_ALPHA1;
1299    (void)fxt1_quantize_MIXED1;
1300    (void)fxt1_quantize_MIXED0;
1301 #else
1302    if (trualpha) {
1303       fxt1_quantize_ALPHA1(cc, input);
1304    } else if (l == 0) {
1305       cc[0] = cc[1] = cc[2] = ~0u;
1306       cc[3] = 0;
1307    } else if (l < N_TEXELS) {
1308       fxt1_quantize_MIXED1(cc, input);
1309    } else {
1310       fxt1_quantize_MIXED0(cc, input);
1311    }
1312    (void)fxt1_quantize_ALPHA0;
1313    (void)fxt1_quantize_HI;
1314    (void)fxt1_quantize_CHROMA;
1315 #endif
1316 }
1317
1318
1319 static void
1320 fxt1_encode (GLuint width, GLuint height, GLint comps,
1321              const void *source, GLint srcRowStride,
1322              void *dest, GLint destRowStride)
1323 {
1324    GLuint x, y;
1325    const GLubyte *data;
1326    GLuint *encoded = (GLuint *)dest;
1327    void *newSource = NULL;
1328
1329    assert(comps == 3 || comps == 4);
1330
1331    /* Replicate image if width is not M8 or height is not M4 */
1332    if ((width & 7) | (height & 3)) {
1333       GLint newWidth = (width + 7) & ~7;
1334       GLint newHeight = (height + 3) & ~3;
1335       newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1336       if (!newSource) {
1337          GET_CURRENT_CONTEXT(ctx);
1338          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1339          goto cleanUp;
1340       }
1341       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1342                                comps, (const GLchan *) source,
1343                                srcRowStride, (GLchan *) newSource);
1344       source = newSource;
1345       width = newWidth;
1346       height = newHeight;
1347       srcRowStride = comps * newWidth;
1348    }
1349
1350    /* convert from 16/32-bit channels to GLubyte if needed */
1351    if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1352       const GLuint n = width * height * comps;
1353       const GLchan *src = (const GLchan *) source;
1354       GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1355       GLuint i;
1356       if (!dest) {
1357          GET_CURRENT_CONTEXT(ctx);
1358          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1359          goto cleanUp;
1360       }
1361       for (i = 0; i < n; i++) {
1362          dest[i] = CHAN_TO_UBYTE(src[i]);
1363       }
1364       if (newSource != NULL) {
1365          _mesa_free(newSource);
1366       }
1367       newSource = dest;  /* we'll free this buffer before returning */
1368       source = dest;  /* the new, GLubyte incoming image */
1369    }
1370
1371    data = (const GLubyte *) source;
1372    destRowStride = (destRowStride - width * 2) / 4;
1373    for (y = 0; y < height; y += 4) {
1374       GLuint offs = 0 + (y + 0) * srcRowStride;
1375       for (x = 0; x < width; x += 8) {
1376          const GLubyte *lines[4];
1377          lines[0] = &data[offs];
1378          lines[1] = lines[0] + srcRowStride;
1379          lines[2] = lines[1] + srcRowStride;
1380          lines[3] = lines[2] + srcRowStride;
1381          offs += 8 * comps;
1382          fxt1_quantize(encoded, lines, comps);
1383          /* 128 bits per 8x4 block */
1384          encoded += 4;
1385       }
1386       encoded += destRowStride;
1387    }
1388
1389  cleanUp:
1390    if (newSource != NULL) {
1391       _mesa_free(newSource);
1392    }
1393 }
1394
1395
1396 /***************************************************************************\
1397  * FXT1 decoder
1398  *
1399  * The decoder is based on GL_3DFX_texture_compression_FXT1
1400  * specification and serves as a concept for the encoder.
1401 \***************************************************************************/
1402
1403
1404 /* lookup table for scaling 5 bit colors up to 8 bits */
1405 static const GLubyte _rgb_scale_5[] = {
1406    0,   8,   16,  25,  33,  41,  49,  58,
1407    66,  74,  82,  90,  99,  107, 115, 123,
1408    132, 140, 148, 156, 165, 173, 181, 189,
1409    197, 206, 214, 222, 230, 239, 247, 255
1410 };
1411
1412 /* lookup table for scaling 6 bit colors up to 8 bits */
1413 static const GLubyte _rgb_scale_6[] = {
1414    0,   4,   8,   12,  16,  20,  24,  28,
1415    32,  36,  40,  45,  49,  53,  57,  61,
1416    65,  69,  73,  77,  81,  85,  89,  93,
1417    97,  101, 105, 109, 113, 117, 121, 125,
1418    130, 134, 138, 142, 146, 150, 154, 158,
1419    162, 166, 170, 174, 178, 182, 186, 190,
1420    194, 198, 202, 206, 210, 215, 219, 223,
1421    227, 231, 235, 239, 243, 247, 251, 255
1422 };
1423
1424
1425 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1426 #define UP5(c) _rgb_scale_5[(c) & 31]
1427 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1428 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1429
1430
1431 static void
1432 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1433 {
1434    const GLuint *cc;
1435
1436    t *= 3;
1437    cc = (const GLuint *)(code + t / 8);
1438    t = (cc[0] >> (t & 7)) & 7;
1439
1440    if (t == 7) {
1441       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1442    } else {
1443       GLubyte r, g, b;
1444       cc = (const GLuint *)(code + 12);
1445       if (t == 0) {
1446          b = UP5(CC_SEL(cc, 0));
1447          g = UP5(CC_SEL(cc, 5));
1448          r = UP5(CC_SEL(cc, 10));
1449       } else if (t == 6) {
1450          b = UP5(CC_SEL(cc, 15));
1451          g = UP5(CC_SEL(cc, 20));
1452          r = UP5(CC_SEL(cc, 25));
1453       } else {
1454          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1455          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1456          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1457       }
1458       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1459       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1460       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1461       rgba[ACOMP] = CHAN_MAX;
1462    }
1463 }
1464
1465
1466 static void
1467 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1468 {
1469    const GLuint *cc;
1470    GLuint kk;
1471
1472    cc = (const GLuint *)code;
1473    if (t & 16) {
1474       cc++;
1475       t &= 15;
1476    }
1477    t = (cc[0] >> (t * 2)) & 3;
1478
1479    t *= 15;
1480    cc = (const GLuint *)(code + 8 + t / 8);
1481    kk = cc[0] >> (t & 7);
1482    rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1483    rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1484    rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1485    rgba[ACOMP] = CHAN_MAX;
1486 }
1487
1488
1489 static void
1490 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1491 {
1492    const GLuint *cc;
1493    GLuint col[2][3];
1494    GLint glsb, selb;
1495
1496    cc = (const GLuint *)code;
1497    if (t & 16) {
1498       t &= 15;
1499       t = (cc[1] >> (t * 2)) & 3;
1500       /* col 2 */
1501       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1502       col[0][GCOMP] = CC_SEL(cc, 99);
1503       col[0][RCOMP] = CC_SEL(cc, 104);
1504       /* col 3 */
1505       col[1][BCOMP] = CC_SEL(cc, 109);
1506       col[1][GCOMP] = CC_SEL(cc, 114);
1507       col[1][RCOMP] = CC_SEL(cc, 119);
1508       glsb = CC_SEL(cc, 126);
1509       selb = CC_SEL(cc, 33);
1510    } else {
1511       t = (cc[0] >> (t * 2)) & 3;
1512       /* col 0 */
1513       col[0][BCOMP] = CC_SEL(cc, 64);
1514       col[0][GCOMP] = CC_SEL(cc, 69);
1515       col[0][RCOMP] = CC_SEL(cc, 74);
1516       /* col 1 */
1517       col[1][BCOMP] = CC_SEL(cc, 79);
1518       col[1][GCOMP] = CC_SEL(cc, 84);
1519       col[1][RCOMP] = CC_SEL(cc, 89);
1520       glsb = CC_SEL(cc, 125);
1521       selb = CC_SEL(cc, 1);
1522    }
1523
1524    if (CC_SEL(cc, 124) & 1) {
1525       /* alpha[0] == 1 */
1526
1527       if (t == 3) {
1528          /* zero */
1529          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1530       } else {
1531          GLubyte r, g, b;
1532          if (t == 0) {
1533             b = UP5(col[0][BCOMP]);
1534             g = UP5(col[0][GCOMP]);
1535             r = UP5(col[0][RCOMP]);
1536          } else if (t == 2) {
1537             b = UP5(col[1][BCOMP]);
1538             g = UP6(col[1][GCOMP], glsb);
1539             r = UP5(col[1][RCOMP]);
1540          } else {
1541             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1542             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1543             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1544          }
1545          rgba[RCOMP] = UBYTE_TO_CHAN(r);
1546          rgba[GCOMP] = UBYTE_TO_CHAN(g);
1547          rgba[BCOMP] = UBYTE_TO_CHAN(b);
1548          rgba[ACOMP] = CHAN_MAX;
1549       }
1550    } else {
1551       /* alpha[0] == 0 */
1552       GLubyte r, g, b;
1553       if (t == 0) {
1554          b = UP5(col[0][BCOMP]);
1555          g = UP6(col[0][GCOMP], glsb ^ selb);
1556          r = UP5(col[0][RCOMP]);
1557       } else if (t == 3) {
1558          b = UP5(col[1][BCOMP]);
1559          g = UP6(col[1][GCOMP], glsb);
1560          r = UP5(col[1][RCOMP]);
1561       } else {
1562          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1563          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1564                         UP6(col[1][GCOMP], glsb));
1565          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1566       }
1567       rgba[RCOMP] = UBYTE_TO_CHAN(r);
1568       rgba[GCOMP] = UBYTE_TO_CHAN(g);
1569       rgba[BCOMP] = UBYTE_TO_CHAN(b);
1570       rgba[ACOMP] = CHAN_MAX;
1571    }
1572 }
1573
1574
1575 static void
1576 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1577 {
1578    const GLuint *cc;
1579    GLubyte r, g, b, a;
1580
1581    cc = (const GLuint *)code;
1582    if (CC_SEL(cc, 124) & 1) {
1583       /* lerp == 1 */
1584       GLuint col0[4];
1585
1586       if (t & 16) {
1587          t &= 15;
1588          t = (cc[1] >> (t * 2)) & 3;
1589          /* col 2 */
1590          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1591          col0[GCOMP] = CC_SEL(cc, 99);
1592          col0[RCOMP] = CC_SEL(cc, 104);
1593          col0[ACOMP] = CC_SEL(cc, 119);
1594       } else {
1595          t = (cc[0] >> (t * 2)) & 3;
1596          /* col 0 */
1597          col0[BCOMP] = CC_SEL(cc, 64);
1598          col0[GCOMP] = CC_SEL(cc, 69);
1599          col0[RCOMP] = CC_SEL(cc, 74);
1600          col0[ACOMP] = CC_SEL(cc, 109);
1601       }
1602
1603       if (t == 0) {
1604          b = UP5(col0[BCOMP]);
1605          g = UP5(col0[GCOMP]);
1606          r = UP5(col0[RCOMP]);
1607          a = UP5(col0[ACOMP]);
1608       } else if (t == 3) {
1609          b = UP5(CC_SEL(cc, 79));
1610          g = UP5(CC_SEL(cc, 84));
1611          r = UP5(CC_SEL(cc, 89));
1612          a = UP5(CC_SEL(cc, 114));
1613       } else {
1614          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1615          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1616          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1617          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1618       }
1619    } else {
1620       /* lerp == 0 */
1621
1622       if (t & 16) {
1623          cc++;
1624          t &= 15;
1625       }
1626       t = (cc[0] >> (t * 2)) & 3;
1627
1628       if (t == 3) {
1629          /* zero */
1630          r = g = b = a = 0;
1631       } else {
1632          GLuint kk;
1633          cc = (const GLuint *)code;
1634          a = UP5(cc[3] >> (t * 5 + 13));
1635          t *= 15;
1636          cc = (const GLuint *)(code + 8 + t / 8);
1637          kk = cc[0] >> (t & 7);
1638          b = UP5(kk);
1639          g = UP5(kk >> 5);
1640          r = UP5(kk >> 10);
1641       }
1642    }
1643    rgba[RCOMP] = UBYTE_TO_CHAN(r);
1644    rgba[GCOMP] = UBYTE_TO_CHAN(g);
1645    rgba[BCOMP] = UBYTE_TO_CHAN(b);
1646    rgba[ACOMP] = UBYTE_TO_CHAN(a);
1647 }
1648
1649
1650 void
1651 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1652                GLint i, GLint j, GLchan *rgba)
1653 {
1654    static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1655       fxt1_decode_1HI,     /* cc-high   = "00?" */
1656       fxt1_decode_1HI,     /* cc-high   = "00?" */
1657       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1658       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1659       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1660       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1661       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1662       fxt1_decode_1MIXED   /* mixed     = "1??" */
1663    };
1664
1665    const GLubyte *code = (const GLubyte *)texture +
1666                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1667    GLint mode = CC_SEL(code, 125);
1668    GLint t = i & 7;
1669
1670    if (t & 4) {
1671       t += 12;
1672    }
1673    t += (j & 3) * 4;
1674
1675    decode_1[mode](code, t, rgba);
1676 }