src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.1
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "texcompress.h"
  39 #include "texformat.h"
  40 #include "texstore.h"
  41
  42
  43 int
  44 fxt1_encode (GLcontext *ctx,
  45              unsigned int width, unsigned int height,
  46              int srcFormat,
  47              const void *source, int srcRowStride,
  48              void *dest, int destRowStride);
  49 void
  50 fxt1_decode_1 (const void *texture, int stride,
  51                int i, int j, unsigned char *rgba);
  52
  53
  54 /**
  55  * Called during context initialization.
  56  */
  57 void
  58 _mesa_init_texture_fxt1( GLcontext *ctx )
  59 {
  60    (void) ctx;
  61 }
  62
  63
  64 /**
  65  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  66  */
  67 static GLboolean
  68 texstore_rgb_fxt1(STORE_PARAMS)
  69 {
  70    const GLchan *pixels;
  71    GLint srcRowStride;
  72    GLubyte *dst;
  73    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  74    const GLchan *tempImage = NULL;
  75
  76    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  77    ASSERT(dstXoffset % 8 == 0);
  78    ASSERT(dstYoffset % 4 == 0);
  79    ASSERT(dstZoffset     == 0);
  80    (void) dstZoffset; (void) dstImageStride;
  81
  82    if (srcFormat != GL_RGB ||
  83        srcType != CHAN_TYPE ||
  84        ctx->_ImageTransferState ||
  85        srcPacking->SwapBytes) {
  86       /* convert image to RGB/GLchan */
  87       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  88                                              baseInternalFormat,
  89                                              dstFormat->BaseFormat,
  90                                              srcWidth, srcHeight, srcDepth,
  91                                              srcFormat, srcType, srcAddr,
  92                                              srcPacking);
  93       if (!tempImage)
  94          return GL_FALSE; /* out of memory */
  95       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  96       pixels = tempImage;
  97       srcRowStride = 3 * srcWidth;
  98       srcFormat = GL_RGB;
  99    }
 100    else {
 101       pixels = (const GLchan *) srcAddr;
 102       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 103                                             srcType) / sizeof(GLchan);
 104    }
 105
 106    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 107                                         GL_COMPRESSED_RGB_FXT1_3DFX,
 108                                         texWidth, (GLubyte *) dstAddr);
 109
 110    fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
 111                dst, dstRowStride);
 112
 113    if (tempImage)
 114       _mesa_free((void*) tempImage);
 115
 116    return GL_TRUE;
 117 }
 118
 119
 120 /**
 121  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 122  */
 123 static GLboolean
 124 texstore_rgba_fxt1(STORE_PARAMS)
 125 {
 126    const GLchan *pixels;
 127    GLint srcRowStride;
 128    GLubyte *dst;
 129    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 130    const GLchan *tempImage = NULL;
 131
 132    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 133    ASSERT(dstXoffset % 8 == 0);
 134    ASSERT(dstYoffset % 4 == 0);
 135    ASSERT(dstZoffset     == 0);
 136    (void) dstZoffset; (void) dstImageStride;
 137
 138    if (srcFormat != GL_RGBA ||
 139        srcType != CHAN_TYPE ||
 140        ctx->_ImageTransferState ||
 141        srcPacking->SwapBytes) {
 142       /* convert image to RGBA/GLchan */
 143       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 144                                              baseInternalFormat,
 145                                              dstFormat->BaseFormat,
 146                                              srcWidth, srcHeight, srcDepth,
 147                                              srcFormat, srcType, srcAddr,
 148                                              srcPacking);
 149       if (!tempImage)
 150          return GL_FALSE; /* out of memory */
 151       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 152       pixels = tempImage;
 153       srcRowStride = 4 * srcWidth;
 154       srcFormat = GL_RGBA;
 155    }
 156    else {
 157       pixels = (const GLchan *) srcAddr;
 158       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 159                                             srcType) / sizeof(GLchan);
 160    }
 161
 162    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 163                                         GL_COMPRESSED_RGBA_FXT1_3DFX,
 164                                         texWidth, (GLubyte *) dstAddr);
 165
 166    fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
 167                dst, dstRowStride);
 168
 169    if (tempImage)
 170       _mesa_free((void*) tempImage);
 171
 172    return GL_TRUE;
 173 }
 174
 175
 176 static void
 177 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 178                           GLint i, GLint j, GLint k, GLchan *texel )
 179 {
 180    (void) k;
 181    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 182 }
 183
 184
 185 static void
 186 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 187                             GLint i, GLint j, GLint k, GLfloat *texel )
 188 {
 189    /* just sample as GLchan and convert to float here */
 190    GLchan rgba[4];
 191    (void) k;
 192    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 193    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 194    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 195    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 196    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 197 }
 198
 199
 200 static void
 201 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 202                          GLint i, GLint j, GLint k, GLchan *texel )
 203 {
 204    (void) k;
 205    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
 206    texel[ACOMP] = 255;
 207 }
 208
 209
 210 static void
 211 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 212                            GLint i, GLint j, GLint k, GLfloat *texel )
 213 {
 214    /* just sample as GLchan and convert to float here */
 215    GLchan rgba[4];
 216    (void) k;
 217    fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
 218    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 219    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 220    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 221    texel[ACOMP] = 1.0;
 222 }
 223
 224
 225
 226 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 227    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 228    GL_RGB,                              /* BaseFormat */
 229    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 230    4, /*approx*/                        /* RedBits */
 231    4, /*approx*/                        /* GreenBits */
 232    4, /*approx*/                        /* BlueBits */
 233    0,                                   /* AlphaBits */
 234    0,                                   /* LuminanceBits */
 235    0,                                   /* IntensityBits */
 236    0,                                   /* IndexBits */
 237    0,                                   /* DepthBits */
 238    0,                                   /* TexelBytes */
 239    texstore_rgb_fxt1,                   /* StoreTexImageFunc */
 240    NULL, /*impossible*/                 /* FetchTexel1D */
 241    fetch_texel_2d_rgb_fxt1,             /* FetchTexel2D */
 242    NULL, /*impossible*/                 /* FetchTexel3D */
 243    NULL, /*impossible*/                 /* FetchTexel1Df */
 244    fetch_texel_2d_f_rgb_fxt1,           /* FetchTexel2Df */
 245    NULL, /*impossible*/                 /* FetchTexel3Df */
 246 };
 247
 248 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 249    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 250    GL_RGBA,                             /* BaseFormat */
 251    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 252    4, /*approx*/                        /* RedBits */
 253    4, /*approx*/                        /* GreenBits */
 254    4, /*approx*/                        /* BlueBits */
 255    1, /*approx*/                        /* AlphaBits */
 256    0,                                   /* LuminanceBits */
 257    0,                                   /* IntensityBits */
 258    0,                                   /* IndexBits */
 259    0,                                   /* DepthBits */
 260    0,                                   /* TexelBytes */
 261    texstore_rgba_fxt1,                  /* StoreTexImageFunc */
 262    NULL, /*impossible*/                 /* FetchTexel1D */
 263    fetch_texel_2d_rgba_fxt1,            /* FetchTexel2D */
 264    NULL, /*impossible*/                 /* FetchTexel3D */
 265    NULL, /*impossible*/                 /* FetchTexel1Df */
 266    fetch_texel_2d_f_rgba_fxt1,          /* FetchTexel2Df */
 267    NULL, /*impossible*/                 /* FetchTexel3Df */
 268 };
 269
 270
 271 /***************************************************************************\
 272  * FXT1 encoder
 273  *
 274  * The encoder was built by reversing the decoder,
 275  * and is vaguely based on Texus2 by 3dfx. Note that this code
 276  * is merely a proof of concept, since it is higly UNoptimized;
 277  * moreover, it is sub-optimal due to inital conditions passed
 278  * to Lloyd's algorithm (the interpolation modes are worse).
 279 \***************************************************************************/
 280
 281
 282 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 283 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 284 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 285 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 286 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 287 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 288 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 289 #define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
 290
 291
 292 #ifdef __GNUC__
 293
 294 #define FX64_NATIVE 1
 295
 296 typedef unsigned long long Fx64;
 297
 298 #define FX64_MOV32(a, b) a = b
 299 #define FX64_OR32(a, b)  a |= b
 300 #define FX64_SHL(a, c)   a <<= c
 301
 302 #else  /* !__GNUC__ */
 303
 304 #define FX64_NATIVE 0
 305
 306 typedef struct {
 307         unsigned long lo, hi;
 308 } Fx64;
 309
 310 #define FX64_MOV32(a, b) a.lo = b
 311 #define FX64_OR32(a, b)  a.lo |= b
 312
 313 #define FX64_SHL(a, c)                                 \
 314    do {                                                \
 315        if ((c) >= 32) {                                \
 316           a.hi = a.lo << ((c) - 32);                   \
 317           a.lo = 0;                                    \
 318        } else {                                        \
 319           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 320           a.lo <<= (c);                                \
 321        }                                               \
 322    } while (0)
 323
 324 #endif /* !__GNUC__ */
 325
 326
 327 static int
 328 fxt1_bestcol (float vec[][MAX_COMP], int nv,
 329               unsigned char input[MAX_COMP], int nc)
 330 {
 331    int i, j, best = -1;
 332    float err = 1e9; /* big enough */
 333
 334    for (j = 0; j < nv; j++) {
 335       float e = 0;
 336       for (i = 0; i < nc; i++) {
 337          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 338       }
 339       if (e < err) {
 340          err = e;
 341          best = j;
 342       }
 343    }
 344
 345    return best;
 346 }
 347
 348
 349 static int
 350 fxt1_worst (float vec[MAX_COMP],
 351             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 352 {
 353    int i, k, worst = -1;
 354    float err = -1; /* small enough */
 355
 356    for (k = 0; k < n; k++) {
 357       float e = 0;
 358       for (i = 0; i < nc; i++) {
 359          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 360       }
 361       if (e > err) {
 362          err = e;
 363          worst = k;
 364       }
 365    }
 366
 367    return worst;
 368 }
 369
 370
 371 static int
 372 fxt1_variance (double variance[MAX_COMP],
 373                unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 374 {
 375    int i, k, best = 0;
 376    int sx, sx2;
 377    double var, maxvar = -1; /* small enough */
 378    double teenth = 1.0 / n;
 379
 380    for (i = 0; i < nc; i++) {
 381       sx = sx2 = 0;
 382       for (k = 0; k < n; k++) {
 383          int t = input[k][i];
 384          sx += t;
 385          sx2 += t * t;
 386       }
 387       var = sx2 * teenth - sx * sx * teenth * teenth;
 388       if (maxvar < var) {
 389          maxvar = var;
 390          best = i;
 391       }
 392       if (variance) {
 393          variance[i] = var;
 394       }
 395    }
 396
 397    return best;
 398 }
 399
 400
 401 static int
 402 fxt1_choose (float vec[][MAX_COMP], int nv,
 403             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 404 {
 405 #if 0
 406    /* Choose colors from a grid.
 407     */
 408    int i, j;
 409
 410    for (j = 0; j < nv; j++) {
 411       int m = j * (n - 1) / (nv - 1);
 412       for (i = 0; i < nc; i++) {
 413          vec[j][i] = input[m][i];
 414       }
 415    }
 416 #else
 417    /* Our solution here is to find the darkest and brightest colors in
 418     * the 8x4 tile and use those as the two representative colors.
 419     * There are probably better algorithms to use (histogram-based).
 420     */
 421    int i, j, k;
 422    int minSum = 1000; /* big enough */
 423    int maxSum = -1; /* small enough */
 424    int minCol = 0; /* phoudoin: silent compiler! */
 425    int maxCol = 0; /* phoudoin: silent compiler! */
 426
 427    struct {
 428       int flag;
 429       int key;
 430       int freq;
 431       int idx;
 432    } hist[N_TEXELS];
 433    int lenh = 0;
 434
 435    memset(hist, 0, sizeof(hist));
 436
 437    for (k = 0; k < n; k++) {
 438       int l;
 439       int key = 0;
 440       int sum = 0;
 441       for (i = 0; i < nc; i++) {
 442          key <<= 8;
 443          key |= input[k][i];
 444          sum += input[k][i];
 445       }
 446       for (l = 0; l < n; l++) {
 447          if (!hist[l].flag) {
 448             /* alloc new slot */
 449             hist[l].flag = !0;
 450             hist[l].key = key;
 451             hist[l].freq = 1;
 452             hist[l].idx = k;
 453             lenh = l + 1;
 454             break;
 455          } else if (hist[l].key == key) {
 456             hist[l].freq++;
 457             break;
 458          }
 459       }
 460       if (minSum > sum) {
 461          minSum = sum;
 462          minCol = k;
 463       }
 464       if (maxSum < sum) {
 465          maxSum = sum;
 466          maxCol = k;
 467       }
 468    }
 469
 470    if (lenh <= nv) {
 471       for (j = 0; j < lenh; j++) {
 472          for (i = 0; i < nc; i++) {
 473             vec[j][i] = (float)input[hist[j].idx][i];
 474          }
 475       }
 476       for (; j < nv; j++) {
 477          for (i = 0; i < nc; i++) {
 478             vec[j][i] = vec[0][i];
 479          }
 480       }
 481       return 0;
 482    }
 483
 484    for (j = 0; j < nv; j++) {
 485       for (i = 0; i < nc; i++) {
 486          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
 487       }
 488    }
 489 #endif
 490
 491    return !0;
 492 }
 493
 494
 495 static int
 496 fxt1_lloyd (float vec[][MAX_COMP], int nv,
 497             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 498 {
 499    /* Use the generalized lloyd's algorithm for VQ:
 500     *     find 4 color vectors.
 501     *
 502     *     for each sample color
 503     *         sort to nearest vector.
 504     *
 505     *     replace each vector with the centroid of it's matching colors.
 506     *
 507     *     repeat until RMS doesn't improve.
 508     *
 509     *     if a color vector has no samples, or becomes the same as another
 510     *     vector, replace it with the color which is farthest from a sample.
 511     *
 512     * vec[][MAX_COMP]           initial vectors and resulting colors
 513     * nv                        number of resulting colors required
 514     * input[N_TEXELS][MAX_COMP] input texels
 515     * nc                        number of components in input / vec
 516     * n                         number of input samples
 517     */
 518
 519    int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 520    int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 521    float error, lasterror = 1e9;
 522
 523    int i, j, k, rep;
 524
 525    /* the quantizer */
 526    for (rep = 0; rep < LL_N_REP; rep++) {
 527       /* reset sums & counters */
 528       for (j = 0; j < nv; j++) {
 529          for (i = 0; i < nc; i++) {
 530             sum[j][i] = 0;
 531          }
 532          cnt[j] = 0;
 533       }
 534       error = 0;
 535
 536       /* scan whole block */
 537       for (k = 0; k < n; k++) {
 538 #if 1
 539          int best = -1;
 540          float err = 1e9; /* big enough */
 541          /* determine best vector */
 542          for (j = 0; j < nv; j++) {
 543             float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 544                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 545                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 546             if (nc == 4) {
 547                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 548             }
 549             if (e < err) {
 550                err = e;
 551                best = j;
 552             }
 553          }
 554 #else
 555          int best = fxt1_bestcol(vec, n_vect, input[k], n_comp, &err);
 556 #endif
 557          /* add in closest color */
 558          for (i = 0; i < nc; i++) {
 559             sum[best][i] += input[k][i];
 560          }
 561          /* mark this vector as used */
 562          cnt[best]++;
 563          /* accumulate error */
 564          error += err;
 565       }
 566
 567       /* check RMS */
 568       if ((error < LL_RMS_E) ||
 569           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 570          return !0; /* good match */
 571       }
 572       lasterror = error;
 573
 574       /* move each vector to the barycenter of its closest colors */
 575       for (j = 0; j < nv; j++) {
 576          if (cnt[j]) {
 577             float div = 1.0 / cnt[j];
 578             for (i = 0; i < nc; i++) {
 579                vec[j][i] = div * sum[j][i];
 580             }
 581          } else {
 582             /* this vec has no samples or is identical with a previous vec */
 583             int worst = fxt1_worst(vec[j], input, nc, n);
 584             for (i = 0; i < nc; i++) {
 585                vec[j][i] = input[worst][i];
 586             }
 587          }
 588       }
 589    }
 590
 591    return 0; /* could not converge fast enough */
 592 }
 593
 594
 595 static void
 596 fxt1_quantize_CHROMA (unsigned long *cc,
 597                       unsigned char input[N_TEXELS][MAX_COMP])
 598 {
 599    const int n_vect = 4; /* 4 base vectors to find */
 600    const int n_comp = 3; /* 3 components: R, G, B */
 601    float vec[MAX_VECT][MAX_COMP];
 602    int i, j, k;
 603    Fx64 hi; /* high quadword */
 604    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 605
 606    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 607       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 608    }
 609
 610    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 611    for (j = n_vect - 1; j >= 0; j--) {
 612       for (i = 0; i < n_comp; i++) {
 613          /* add in colors */
 614          FX64_SHL(hi, 5);
 615          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 616       }
 617    }
 618    ((Fx64 *)cc)[1] = hi;
 619
 620    lohi = lolo = 0;
 621    /* right microtile */
 622    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 623       lohi <<= 2;
 624       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 625    }
 626    /* left microtile */
 627    for (; k >= 0; k--) {
 628       lolo <<= 2;
 629       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 630    }
 631    cc[1] = lohi;
 632    cc[0] = lolo;
 633 }
 634
 635
 636 static void
 637 fxt1_quantize_ALPHA0 (unsigned long *cc,
 638                       unsigned char input[N_TEXELS][MAX_COMP],
 639                       unsigned char reord[N_TEXELS][MAX_COMP], int n)
 640 {
 641    const int n_vect = 3; /* 3 base vectors to find */
 642    const int n_comp = 4; /* 4 components: R, G, B, A */
 643    float vec[MAX_VECT][MAX_COMP];
 644    int i, j, k;
 645    Fx64 hi; /* high quadword */
 646    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 647
 648    /* the last vector indicates zero */
 649    for (i = 0; i < n_comp; i++) {
 650       vec[n_vect][i] = 0;
 651    }
 652
 653    /* the first n texels in reord are guaranteed to be non-zero */
 654    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 655       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 656    }
 657
 658    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 659    for (j = n_vect - 1; j >= 0; j--) {
 660       /* add in alphas */
 661       FX64_SHL(hi, 5);
 662       FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
 663    }
 664    for (j = n_vect - 1; j >= 0; j--) {
 665       for (i = 0; i < n_comp - 1; i++) {
 666          /* add in colors */
 667          FX64_SHL(hi, 5);
 668          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 669       }
 670    }
 671    ((Fx64 *)cc)[1] = hi;
 672
 673    lohi = lolo = 0;
 674    /* right microtile */
 675    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 676       lohi <<= 2;
 677       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 678    }
 679    /* left microtile */
 680    for (; k >= 0; k--) {
 681       lolo <<= 2;
 682       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 683    }
 684    cc[1] = lohi;
 685    cc[0] = lolo;
 686 }
 687
 688
 689 static void
 690 fxt1_quantize_ALPHA1 (unsigned long *cc,
 691                       unsigned char input[N_TEXELS][MAX_COMP])
 692 {
 693    const int n_vect = 3; /* highest vector number in each microtile */
 694    const int n_comp = 4; /* 4 components: R, G, B, A */
 695    float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 696    float b, iv[MAX_COMP]; /* interpolation vector */
 697    int i, j, k;
 698    Fx64 hi; /* high quadword */
 699    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 700
 701    int minSum;
 702    int maxSum;
 703    int minColL = 0, maxColL = 0;
 704    int minColR = 0, maxColR = 0;
 705    int sumL = 0, sumR = 0;
 706
 707    /* Our solution here is to find the darkest and brightest colors in
 708     * the 4x4 tile and use those as the two representative colors.
 709     * There are probably better algorithms to use (histogram-based).
 710     */
 711    minSum = 1000; /* big enough */
 712    maxSum = -1; /* small enough */
 713    for (k = 0; k < N_TEXELS / 2; k++) {
 714       int sum = 0;
 715       for (i = 0; i < n_comp; i++) {
 716          sum += input[k][i];
 717       }
 718       if (minSum > sum) {
 719          minSum = sum;
 720          minColL = k;
 721       }
 722       if (maxSum < sum) {
 723          maxSum = sum;
 724          maxColL = k;
 725       }
 726       sumL += sum;
 727    }
 728    minSum = 1000; /* big enough */
 729    maxSum = -1; /* small enough */
 730    for (; k < N_TEXELS; k++) {
 731       int sum = 0;
 732       for (i = 0; i < n_comp; i++) {
 733          sum += input[k][i];
 734       }
 735       if (minSum > sum) {
 736          minSum = sum;
 737          minColR = k;
 738       }
 739       if (maxSum < sum) {
 740          maxSum = sum;
 741          maxColR = k;
 742       }
 743       sumR += sum;
 744    }
 745
 746    /* choose the common vector (yuck!) */
 747 {
 748    int j1, j2;
 749    int v1 = 0, v2 = 0;
 750    float err = 1e9; /* big enough */
 751    float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 752    for (i = 0; i < n_comp; i++) {
 753       tv[0][i] = input[minColL][i];
 754       tv[1][i] = input[maxColL][i];
 755       tv[2][i] = input[minColR][i];
 756       tv[3][i] = input[maxColR][i];
 757    }
 758    for (j1 = 0; j1 < 2; j1++) {
 759       for (j2 = 2; j2 < 4; j2++) {
 760           float e = 0;
 761           for (i = 0; i < n_comp; i++) {
 762              e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 763           }
 764           if (e < err) {
 765              err = e;
 766              v1 = j1;
 767              v2 = j2;
 768           }
 769       }
 770    }
 771    for (i = 0; i < n_comp; i++) {
 772       vec[0][i] = tv[1 - v1][i];
 773       vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 774       vec[2][i] = tv[5 - v2][i];
 775    }
 776 }
 777
 778    /* left microtile */
 779    cc[0] = 0;
 780    if (minColL != maxColL) {
 781       /* compute interpolation vector */
 782       float d2 = 0;
 783       float rd2;
 784
 785       for (i = 0; i < n_comp; i++) {
 786          iv[i] = vec[1][i] - vec[0][i];
 787          d2 += iv[i] * iv[i];
 788       }
 789       rd2 = (float)n_vect / d2;
 790       b = 0;
 791       for (i = 0; i < n_comp; i++) {
 792          b -= iv[i] * vec[0][i];
 793          iv[i] *= rd2;
 794       }
 795       b = b * rd2 + 0.5f;
 796
 797       /* add in texels */
 798       lolo = 0;
 799       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 800          int texel;
 801          /* interpolate color */
 802          float dot = 0;
 803          for (i = 0; i < n_comp; i++) {
 804             dot += input[k][i] * iv[i];
 805          }
 806          texel = (int)(dot + b);
 807          if (texel < 0) {
 808             texel = 0;
 809          } else if (texel > n_vect) {
 810             texel = n_vect;
 811          }
 812          /* add in texel */
 813          lolo <<= 2;
 814          lolo |= texel;
 815       }
 816
 817       cc[0] = lolo;
 818    }
 819
 820    /* right microtile */
 821    cc[1] = 0;
 822    if (minColR != maxColR) {
 823       /* compute interpolation vector */
 824       float d2 = 0;
 825       float rd2;
 826
 827       for (i = 0; i < n_comp; i++) {
 828          iv[i] = vec[1][i] - vec[2][i];
 829          d2 += iv[i] * iv[i];
 830       }
 831       rd2 = (float)n_vect / d2;
 832       b = 0;
 833       for (i = 0; i < n_comp; i++) {
 834          b -= iv[i] * vec[2][i];
 835          iv[i] *= rd2;
 836       }
 837       b = b * rd2 + 0.5f;
 838
 839       /* add in texels */
 840       lohi = 0;
 841       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 842          int texel;
 843          /* interpolate color */
 844          float dot = 0;
 845          for (i = 0; i < n_comp; i++) {
 846             dot += input[k][i] * iv[i];
 847          }
 848          texel = (int)(dot + b);
 849          if (texel < 0) {
 850             texel = 0;
 851          } else if (texel > n_vect) {
 852             texel = n_vect;
 853          }
 854          /* add in texel */
 855          lohi <<= 2;
 856          lohi |= texel;
 857       }
 858
 859       cc[1] = lohi;
 860    }
 861
 862    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 863    for (j = n_vect - 1; j >= 0; j--) {
 864       /* add in alphas */
 865       FX64_SHL(hi, 5);
 866       FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
 867    }
 868    for (j = n_vect - 1; j >= 0; j--) {
 869       for (i = 0; i < n_comp - 1; i++) {
 870          /* add in colors */
 871          FX64_SHL(hi, 5);
 872          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 873       }
 874    }
 875    ((Fx64 *)cc)[1] = hi;
 876 }
 877
 878
 879 static void
 880 fxt1_quantize_HI (unsigned long *cc,
 881                   unsigned char input[N_TEXELS][MAX_COMP],
 882                   unsigned char reord[N_TEXELS][MAX_COMP], int n)
 883 {
 884    const int n_vect = 6; /* highest vector number */
 885    const int n_comp = 3; /* 3 components: R, G, B */
 886    float b = 0.0;               /* phoudoin: silent compiler! */
 887    float iv[MAX_COMP]; /* interpolation vector */
 888    int i, k;
 889    unsigned long hihi; /* high quadword: hi dword */
 890
 891    int minSum = 1000; /* big enough */
 892    int maxSum = -1; /* small enough */
 893    int minCol = 0;      /* phoudoin: silent compiler! */
 894    int maxCol = 0;      /* phoudoin: silent compiler! */
 895
 896    /* Our solution here is to find the darkest and brightest colors in
 897     * the 8x4 tile and use those as the two representative colors.
 898     * There are probably better algorithms to use (histogram-based).
 899     */
 900    for (k = 0; k < n; k++) {
 901       int sum = 0;
 902       for (i = 0; i < n_comp; i++) {
 903          sum += reord[k][i];
 904       }
 905       if (minSum > sum) {
 906          minSum = sum;
 907          minCol = k;
 908       }
 909       if (maxSum < sum) {
 910          maxSum = sum;
 911          maxCol = k;
 912       }
 913    }
 914
 915    hihi = 0; /* cc-hi = "00" */
 916    for (i = 0; i < n_comp; i++) {
 917       /* add in colors */
 918       hihi <<= 5;
 919       hihi |= reord[maxCol][i] >> 3;
 920    }
 921    for (i = 0; i < n_comp; i++) {
 922       /* add in colors */
 923       hihi <<= 5;
 924       hihi |= reord[minCol][i] >> 3;
 925    }
 926    cc[3] = hihi;
 927    cc[0] = cc[1] = cc[2] = 0;
 928
 929    /* compute interpolation vector */
 930    if (minCol != maxCol) {
 931       float d2 = 0;
 932       float rd2;
 933
 934       for (i = 0; i < n_comp; i++) {
 935          iv[i] = reord[maxCol][i] - reord[minCol][i];
 936          d2 += iv[i] * iv[i];
 937       }
 938       rd2 = (float)n_vect / d2;
 939       b = 0;
 940       for (i = 0; i < n_comp; i++) {
 941          b -= iv[i] * reord[minCol][i];
 942          iv[i] *= rd2;
 943       }
 944       b = b * rd2 + 0.5f;
 945    }
 946
 947    /* add in texels */
 948    for (k = N_TEXELS - 1; k >= 0; k--) {
 949       int t = k * 3;
 950       unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
 951       int texel = n_vect + 1; /* transparent black */
 952
 953       if (!ISTBLACK(input[k])) {
 954          if (minCol != maxCol) {
 955             /* interpolate color */
 956             float dot = 0;
 957             for (i = 0; i < n_comp; i++) {
 958                dot += input[k][i] * iv[i];
 959             }
 960             texel = (int)(dot + b);
 961             if (texel < 0) {
 962                texel = 0;
 963             } else if (texel > n_vect) {
 964                texel = n_vect;
 965             }
 966             /* add in texel */
 967             kk[0] |= texel << (t & 7);
 968          }
 969       } else {
 970          /* add in texel */
 971          kk[0] |= texel << (t & 7);
 972       }
 973    }
 974 }
 975
 976
 977 static void
 978 fxt1_quantize_MIXED1 (unsigned long *cc,
 979                       unsigned char input[N_TEXELS][MAX_COMP])
 980 {
 981    const int n_vect = 2; /* highest vector number in each microtile */
 982    const int n_comp = 3; /* 3 components: R, G, B */
 983    unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 984    float b, iv[MAX_COMP]; /* interpolation vector */
 985    int i, j, k;
 986    Fx64 hi; /* high quadword */
 987    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 988
 989    int minSum;
 990    int maxSum;
 991    int minColL = 0, maxColL = -1;
 992    int minColR = 0, maxColR = -1;
 993
 994    /* Our solution here is to find the darkest and brightest colors in
 995     * the 4x4 tile and use those as the two representative colors.
 996     * There are probably better algorithms to use (histogram-based).
 997     */
 998    minSum = 1000; /* big enough */
 999    maxSum = -1; /* small enough */
1000    for (k = 0; k < N_TEXELS / 2; k++) {
1001       if (!ISTBLACK(input[k])) {
1002          int sum = 0;
1003          for (i = 0; i < n_comp; i++) {
1004             sum += input[k][i];
1005          }
1006          if (minSum > sum) {
1007             minSum = sum;
1008             minColL = k;
1009          }
1010          if (maxSum < sum) {
1011             maxSum = sum;
1012             maxColL = k;
1013          }
1014       }
1015    }
1016    minSum = 1000; /* big enough */
1017    maxSum = -1; /* small enough */
1018    for (; k < N_TEXELS; k++) {
1019       if (!ISTBLACK(input[k])) {
1020          int sum = 0;
1021          for (i = 0; i < n_comp; i++) {
1022             sum += input[k][i];
1023          }
1024          if (minSum > sum) {
1025             minSum = sum;
1026             minColR = k;
1027          }
1028          if (maxSum < sum) {
1029             maxSum = sum;
1030             maxColR = k;
1031          }
1032       }
1033    }
1034
1035    /* left microtile */
1036    if (maxColL == -1) {
1037       /* all transparent black */
1038       cc[0] = -1;
1039       for (i = 0; i < n_comp; i++) {
1040          vec[0][i] = 0;
1041          vec[1][i] = 0;
1042       }
1043    } else {
1044       cc[0] = 0;
1045       for (i = 0; i < n_comp; i++) {
1046          vec[0][i] = input[minColL][i];
1047          vec[1][i] = input[maxColL][i];
1048       }
1049       if (minColL != maxColL) {
1050          /* compute interpolation vector */
1051          float d2 = 0;
1052          float rd2;
1053
1054          for (i = 0; i < n_comp; i++) {
1055             iv[i] = vec[1][i] - vec[0][i];
1056             d2 += iv[i] * iv[i];
1057          }
1058          rd2 = (float)n_vect / d2;
1059          b = 0;
1060          for (i = 0; i < n_comp; i++) {
1061             b -= iv[i] * vec[0][i];
1062             iv[i] *= rd2;
1063          }
1064          b = b * rd2 + 0.5f;
1065
1066          /* add in texels */
1067          lolo = 0;
1068          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1069             int texel = n_vect + 1; /* transparent black */
1070             if (!ISTBLACK(input[k])) {
1071                /* interpolate color */
1072                float dot = 0;
1073                for (i = 0; i < n_comp; i++) {
1074                   dot += input[k][i] * iv[i];
1075                }
1076                texel = (int)(dot + b);
1077                if (texel < 0) {
1078                   texel = 0;
1079                } else if (texel > n_vect) {
1080                   texel = n_vect;
1081                }
1082             }
1083             /* add in texel */
1084             lolo <<= 2;
1085             lolo |= texel;
1086          }
1087          cc[0] = lolo;
1088       }
1089    }
1090
1091    /* right microtile */
1092    if (maxColR == -1) {
1093       /* all transparent black */
1094       cc[1] = -1;
1095       for (i = 0; i < n_comp; i++) {
1096          vec[2][i] = 0;
1097          vec[3][i] = 0;
1098       }
1099    } else {
1100       cc[1] = 0;
1101       for (i = 0; i < n_comp; i++) {
1102          vec[2][i] = input[minColR][i];
1103          vec[3][i] = input[maxColR][i];
1104       }
1105       if (minColR != maxColR) {
1106          /* compute interpolation vector */
1107          float d2 = 0;
1108          float rd2;
1109
1110          for (i = 0; i < n_comp; i++) {
1111             iv[i] = vec[3][i] - vec[2][i];
1112             d2 += iv[i] * iv[i];
1113          }
1114          rd2 = (float)n_vect / d2;
1115          b = 0;
1116          for (i = 0; i < n_comp; i++) {
1117             b -= iv[i] * vec[2][i];
1118             iv[i] *= rd2;
1119          }
1120          b = b * rd2 + 0.5f;
1121
1122          /* add in texels */
1123          lohi = 0;
1124          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1125             int texel = n_vect + 1; /* transparent black */
1126             if (!ISTBLACK(input[k])) {
1127                /* interpolate color */
1128                float dot = 0;
1129                for (i = 0; i < n_comp; i++) {
1130                   dot += input[k][i] * iv[i];
1131                }
1132                texel = (int)(dot + b);
1133                if (texel < 0) {
1134                   texel = 0;
1135                } else if (texel > n_vect) {
1136                   texel = n_vect;
1137                }
1138             }
1139             /* add in texel */
1140             lohi <<= 2;
1141             lohi |= texel;
1142          }
1143          cc[1] = lohi;
1144       }
1145    }
1146
1147    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1148    for (j = 2 * 2 - 1; j >= 0; j--) {
1149       for (i = 0; i < n_comp; i++) {
1150          /* add in colors */
1151          FX64_SHL(hi, 5);
1152          FX64_OR32(hi, vec[j][i] >> 3);
1153       }
1154    }
1155    ((Fx64 *)cc)[1] = hi;
1156 }
1157
1158
1159 static void
1160 fxt1_quantize_MIXED0 (unsigned long *cc,
1161                       unsigned char input[N_TEXELS][MAX_COMP])
1162 {
1163    const int n_vect = 3; /* highest vector number in each microtile */
1164    const int n_comp = 3; /* 3 components: R, G, B */
1165    unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1166    float b, iv[MAX_COMP]; /* interpolation vector */
1167    int i, j, k;
1168    Fx64 hi; /* high quadword */
1169    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1170
1171    int minColL = 0, maxColL = 0;
1172    int minColR = 0, maxColR = 0;
1173 #if 0
1174    int minSum;
1175    int maxSum;
1176
1177    /* Our solution here is to find the darkest and brightest colors in
1178     * the 4x4 tile and use those as the two representative colors.
1179     * There are probably better algorithms to use (histogram-based).
1180     */
1181    minSum = 1000; /* big enough */
1182    maxSum = -1; /* small enough */
1183    for (k = 0; k < N_TEXELS / 2; k++) {
1184       int sum = 0;
1185       for (i = 0; i < n_comp; i++) {
1186          sum += input[k][i];
1187       }
1188       if (minSum > sum) {
1189          minSum = sum;
1190          minColL = k;
1191       }
1192       if (maxSum < sum) {
1193          maxSum = sum;
1194          maxColL = k;
1195       }
1196    }
1197    minSum = 1000; /* big enough */
1198    maxSum = -1; /* small enough */
1199    for (; k < N_TEXELS; k++) {
1200       int sum = 0;
1201       for (i = 0; i < n_comp; i++) {
1202          sum += input[k][i];
1203       }
1204       if (minSum > sum) {
1205          minSum = sum;
1206          minColR = k;
1207       }
1208       if (maxSum < sum) {
1209          maxSum = sum;
1210          maxColR = k;
1211       }
1212    }
1213 #else
1214    int minVal;
1215    int maxVal;
1216    int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1217    int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1218
1219    /* Scan the channel with max variance for lo & hi
1220     * and use those as the two representative colors.
1221     */
1222    minVal = 1000; /* big enough */
1223    maxVal = -1; /* small enough */
1224    for (k = 0; k < N_TEXELS / 2; k++) {
1225       int t = input[k][maxVarL];
1226       if (minVal > t) {
1227          minVal = t;
1228          minColL = k;
1229       }
1230       if (maxVal < t) {
1231          maxVal = t;
1232          maxColL = k;
1233       }
1234    }
1235    minVal = 1000; /* big enough */
1236    maxVal = -1; /* small enough */
1237    for (; k < N_TEXELS; k++) {
1238       int t = input[k][maxVarR];
1239       if (minVal > t) {
1240          minVal = t;
1241          minColR = k;
1242       }
1243       if (maxVal < t) {
1244          maxVal = t;
1245          maxColR = k;
1246       }
1247    }
1248 #endif
1249
1250    /* left microtile */
1251    cc[0] = 0;
1252    for (i = 0; i < n_comp; i++) {
1253       vec[0][i] = input[minColL][i];
1254       vec[1][i] = input[maxColL][i];
1255    }
1256    if (minColL != maxColL) {
1257       /* compute interpolation vector */
1258       float d2 = 0;
1259       float rd2;
1260
1261       for (i = 0; i < n_comp; i++) {
1262          iv[i] = vec[1][i] - vec[0][i];
1263          d2 += iv[i] * iv[i];
1264       }
1265       rd2 = (float)n_vect / d2;
1266       b = 0;
1267       for (i = 0; i < n_comp; i++) {
1268          b -= iv[i] * vec[0][i];
1269          iv[i] *= rd2;
1270       }
1271       b = b * rd2 + 0.5f;
1272
1273       /* add in texels */
1274       lolo = 0;
1275       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1276          int texel;
1277          /* interpolate color */
1278          float dot = 0;
1279          for (i = 0; i < n_comp; i++) {
1280             dot += input[k][i] * iv[i];
1281          }
1282          texel = (int)(dot + b);
1283          if (texel < 0) {
1284             texel = 0;
1285          } else if (texel > n_vect) {
1286             texel = n_vect;
1287          }
1288          /* add in texel */
1289          lolo <<= 2;
1290          lolo |= texel;
1291       }
1292
1293       /* funky encoding for LSB of green */
1294       if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1295          for (i = 0; i < n_comp; i++) {
1296             vec[1][i] = input[minColL][i];
1297             vec[0][i] = input[maxColL][i];
1298          }
1299          lolo = ~lolo;
1300       }
1301
1302       cc[0] = lolo;
1303    }
1304
1305    /* right microtile */
1306    cc[1] = 0;
1307    for (i = 0; i < n_comp; i++) {
1308       vec[2][i] = input[minColR][i];
1309       vec[3][i] = input[maxColR][i];
1310    }
1311    if (minColR != maxColR) {
1312       /* compute interpolation vector */
1313       float d2 = 0;
1314       float rd2;
1315
1316       for (i = 0; i < n_comp; i++) {
1317          iv[i] = vec[3][i] - vec[2][i];
1318          d2 += iv[i] * iv[i];
1319       }
1320       rd2 = (float)n_vect / d2;
1321       b = 0;
1322       for (i = 0; i < n_comp; i++) {
1323          b -= iv[i] * vec[2][i];
1324          iv[i] *= rd2;
1325       }
1326       b = b * rd2 + 0.5f;
1327
1328       /* add in texels */
1329       lohi = 0;
1330       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1331          int texel;
1332          /* interpolate color */
1333          float dot = 0;
1334          for (i = 0; i < n_comp; i++) {
1335             dot += input[k][i] * iv[i];
1336          }
1337          texel = (int)(dot + b);
1338          if (texel < 0) {
1339             texel = 0;
1340          } else if (texel > n_vect) {
1341             texel = n_vect;
1342          }
1343          /* add in texel */
1344          lohi <<= 2;
1345          lohi |= texel;
1346       }
1347
1348       /* funky encoding for LSB of green */
1349       if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1350          for (i = 0; i < n_comp; i++) {
1351             vec[3][i] = input[minColR][i];
1352             vec[2][i] = input[maxColR][i];
1353          }
1354          lohi = ~lohi;
1355       }
1356
1357       cc[1] = lohi;
1358    }
1359
1360    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1361    for (j = 2 * 2 - 1; j >= 0; j--) {
1362       for (i = 0; i < n_comp; i++) {
1363          /* add in colors */
1364          FX64_SHL(hi, 5);
1365          FX64_OR32(hi, vec[j][i] >> 3);
1366       }
1367    }
1368    ((Fx64 *)cc)[1] = hi;
1369 }
1370
1371
1372 static void
1373 fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1374 {
1375    int trualpha;
1376    unsigned char reord[N_TEXELS][MAX_COMP];
1377
1378    unsigned char input[N_TEXELS][MAX_COMP];
1379    int i, k, l;
1380
1381    memset(input, -1, sizeof(input));
1382
1383    /* 8 texels each line */
1384    for (l = 0; l < 4; l++) {
1385       for (k = 0; k < 4; k++) {
1386          for (i = 0; i < comps; i++) {
1387             input[k + l * 4][i] = *lines[l]++;
1388          }
1389       }
1390       for (; k < 8; k++) {
1391          for (i = 0; i < comps; i++) {
1392             input[k + l * 4 + 12][i] = *lines[l]++;
1393          }
1394       }
1395    }
1396
1397    /* block layout:
1398     * 00, 01, 02, 03, 08, 09, 0a, 0b
1399     * 10, 11, 12, 13, 18, 19, 1a, 1b
1400     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1401     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1402     */
1403
1404    /* [dBorca]
1405     * stupidity flows forth from this
1406     */
1407    l = N_TEXELS;
1408    trualpha = 0;
1409    if (comps == 4) {
1410       /* skip all transparent black texels */
1411       l = 0;
1412       for (k = 0; k < N_TEXELS; k++) {
1413          /* test all components against 0 */
1414          if (!ISTBLACK(input[k])) {
1415             /* texel is not transparent black */
1416             COPY_4UBV(reord[l], input[k]);
1417             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1418                /* non-opaque texel */
1419                trualpha = !0;
1420             }
1421             l++;
1422          }
1423       }
1424    }
1425
1426 #if 0
1427    if (trualpha) {
1428       fxt1_quantize_ALPHA0(cc, input, reord, l);
1429    } else if (l == 0) {
1430       cc[0] = cc[1] = cc[2] = -1;
1431       cc[3] = 0;
1432    } else if (l < N_TEXELS) {
1433       fxt1_quantize_HI(cc, input, reord, l);
1434    } else {
1435       fxt1_quantize_CHROMA(cc, input);
1436    }
1437    (void)fxt1_quantize_ALPHA1;
1438    (void)fxt1_quantize_MIXED1;
1439    (void)fxt1_quantize_MIXED0;
1440 #else
1441    if (trualpha) {
1442       fxt1_quantize_ALPHA1(cc, input);
1443    } else if (l == 0) {
1444       cc[0] = cc[1] = cc[2] = -1;
1445       cc[3] = 0;
1446    } else if (l < N_TEXELS) {
1447       fxt1_quantize_MIXED1(cc, input);
1448    } else {
1449       fxt1_quantize_MIXED0(cc, input);
1450    }
1451    (void)fxt1_quantize_ALPHA0;
1452    (void)fxt1_quantize_HI;
1453    (void)fxt1_quantize_CHROMA;
1454 #endif
1455 }
1456
1457
1458 int
1459 fxt1_encode (GLcontext *ctx,
1460              unsigned int width, unsigned int height,
1461              int srcFormat,
1462              const void *source, int srcRowStride,
1463              void *dest, int destRowStride)
1464 {
1465    const int comps = (srcFormat == GL_RGB) ? 3 : 4;
1466    unsigned int x, y;
1467    const unsigned char *data;
1468    unsigned long *encoded = dest;
1469    GLubyte *newSource = NULL;
1470
1471    (void) ctx;
1472
1473    /*
1474     * Rescale image if width is less than 8 or height is less than 4.
1475     */
1476    if (width < 8 || height < 4) {
1477       GLint newWidth = (width + 7) & ~7;
1478       GLint newHeight = (height + 3) & ~3;
1479       newSource = MALLOC(comps * newWidth * newHeight * sizeof(GLchan));
1480       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1481                                comps, source, srcRowStride, newSource);
1482       source = newSource;
1483       width = newWidth;
1484       height = newHeight;
1485       srcRowStride = comps * newWidth;
1486    }
1487
1488    data = source;
1489    destRowStride = (destRowStride - width * 2) / 4;
1490    for (y = 0; y < height; y += 4) {
1491       unsigned int offs = 0 + (y + 0) * srcRowStride;
1492       for (x = 0; x < width; x += 8) {
1493          const unsigned char *lines[4];
1494          lines[0] = &data[offs];
1495          lines[1] = lines[0] + srcRowStride;
1496          lines[2] = lines[1] + srcRowStride;
1497          lines[3] = lines[2] + srcRowStride;
1498          offs += 8 * comps;
1499          fxt1_quantize(encoded, lines, comps);
1500          /* 128 bits per 8x4 block = 4bpp */
1501          encoded += 4;
1502       }
1503       encoded += destRowStride;
1504    }
1505
1506    if (newSource != NULL) {
1507       FREE(newSource);
1508    }
1509
1510    return 0;
1511 }
1512
1513
1514 /***************************************************************************\
1515  * FXT1 decoder
1516  *
1517  * The decoder is based on GL_3DFX_texture_compression_FXT1
1518  * specification and serves as a concept for the encoder.
1519 \***************************************************************************/
1520
1521
1522 /* lookup table for scaling 5 bit colors up to 8 bits */
1523 static unsigned char _rgb_scale_5[] = {
1524    0,   8,   16,  25,  33,  41,  49,  58,
1525    66,  74,  82,  90,  99,  107, 115, 123,
1526    132, 140, 148, 156, 165, 173, 181, 189,
1527    197, 206, 214, 222, 230, 239, 247, 255
1528 };
1529
1530 /* lookup table for scaling 6 bit colors up to 8 bits */
1531 static unsigned char _rgb_scale_6[] = {
1532    0,   4,   8,   12,  16,  20,  24,  28,
1533    32,  36,  40,  45,  49,  53,  57,  61,
1534    65,  69,  73,  77,  81,  85,  89,  93,
1535    97,  101, 105, 109, 113, 117, 121, 125,
1536    130, 134, 138, 142, 146, 150, 154, 158,
1537    162, 166, 170, 174, 178, 182, 186, 190,
1538    194, 198, 202, 206, 210, 215, 219, 223,
1539    227, 231, 235, 239, 243, 247, 251, 255
1540 };
1541
1542
1543 #define CC_SEL(cc, which) ((cc)[(which) / 32] >> ((which) & 31))
1544 #define UP5(c) _rgb_scale_5[(c) & 31]
1545 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1546 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1547 #define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1548
1549
1550 static void
1551 fxt1_decode_1HI (unsigned char *code, int t, unsigned char *rgba)
1552 {
1553    const unsigned long *cc;
1554
1555    t *= 3;
1556    cc = (unsigned long *)(code + t / 8);
1557    t = (cc[0] >> (t & 7)) & 7;
1558
1559    if (t == 7) {
1560       ZERO_4UBV(rgba);
1561    } else {
1562       cc = (unsigned long *)(code + 12);
1563       if (t == 0) {
1564          rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1565          rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1566          rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1567       } else if (t == 6) {
1568          rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1569          rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1570          rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1571       } else {
1572          rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1573          rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1574          rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1575       }
1576       rgba[ACOMP] = 255;
1577    }
1578 }
1579
1580
1581 static void
1582 fxt1_decode_1CHROMA (unsigned char *code, int t, unsigned char *rgba)
1583 {
1584    const unsigned long *cc;
1585    unsigned long kk;
1586
1587    cc = (unsigned long *)code;
1588    if (t & 16) {
1589       cc++;
1590       t &= 15;
1591    }
1592    t = (cc[0] >> (t * 2)) & 3;
1593
1594    t *= 15;
1595    cc = (unsigned long *)(code + 8 + t / 8);
1596    kk = cc[0] >> (t & 7);
1597    rgba[BCOMP] = UP5(kk);
1598    rgba[GCOMP] = UP5(kk >> 5);
1599    rgba[RCOMP] = UP5(kk >> 10);
1600    rgba[ACOMP] = 255;
1601 }
1602
1603
1604 static void
1605 fxt1_decode_1MIXED (unsigned char *code, int t, unsigned char *rgba)
1606 {
1607    const unsigned long *cc;
1608    unsigned int col[2][3];
1609    int glsb, selb;
1610
1611    cc = (unsigned long *)code;
1612    if (t & 16) {
1613       t &= 15;
1614       t = (cc[1] >> (t * 2)) & 3;
1615       /* col 2 */
1616       col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1617       col[0][GCOMP] = CC_SEL(cc, 99);
1618       col[0][RCOMP] = CC_SEL(cc, 104);
1619       /* col 3 */
1620       col[1][BCOMP] = CC_SEL(cc, 109);
1621       col[1][GCOMP] = CC_SEL(cc, 114);
1622       col[1][RCOMP] = CC_SEL(cc, 119);
1623       glsb = CC_SEL(cc, 126);
1624       selb = CC_SEL(cc, 33);
1625    } else {
1626       t = (cc[0] >> (t * 2)) & 3;
1627       /* col 0 */
1628       col[0][BCOMP] = CC_SEL(cc, 64);
1629       col[0][GCOMP] = CC_SEL(cc, 69);
1630       col[0][RCOMP] = CC_SEL(cc, 74);
1631       /* col 1 */
1632       col[1][BCOMP] = CC_SEL(cc, 79);
1633       col[1][GCOMP] = CC_SEL(cc, 84);
1634       col[1][RCOMP] = CC_SEL(cc, 89);
1635       glsb = CC_SEL(cc, 125);
1636       selb = CC_SEL(cc, 1);
1637    }
1638
1639    if (CC_SEL(cc, 124) & 1) {
1640       /* alpha[0] == 1 */
1641
1642       if (t == 3) {
1643          ZERO_4UBV(rgba);
1644       } else {
1645          if (t == 0) {
1646             rgba[BCOMP] = UP5(col[0][BCOMP]);
1647             rgba[GCOMP] = UP5(col[0][GCOMP]);
1648             rgba[RCOMP] = UP5(col[0][RCOMP]);
1649          } else if (t == 2) {
1650             rgba[BCOMP] = UP5(col[1][BCOMP]);
1651             rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1652             rgba[RCOMP] = UP5(col[1][RCOMP]);
1653          } else {
1654             rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1655             rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1656             rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1657          }
1658          rgba[ACOMP] = 255;
1659       }
1660    } else {
1661       /* alpha[0] == 0 */
1662
1663       if (t == 0) {
1664          rgba[BCOMP] = UP5(col[0][BCOMP]);
1665          rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1666          rgba[RCOMP] = UP5(col[0][RCOMP]);
1667       } else if (t == 3) {
1668          rgba[BCOMP] = UP5(col[1][BCOMP]);
1669          rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1670          rgba[RCOMP] = UP5(col[1][RCOMP]);
1671       } else {
1672          rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1673          rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1674                                   UP6(col[1][GCOMP], glsb));
1675          rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1676       }
1677       rgba[ACOMP] = 255;
1678    }
1679 }
1680
1681
1682 static void
1683 fxt1_decode_1ALPHA (unsigned char *code, int t, unsigned char *rgba)
1684 {
1685    const unsigned long *cc;
1686
1687    cc = (unsigned long *)code;
1688    if (CC_SEL(cc, 124) & 1) {
1689       /* lerp == 1 */
1690       unsigned int col0[4];
1691
1692       if (t & 16) {
1693          t &= 15;
1694          t = (cc[1] >> (t * 2)) & 3;
1695          /* col 2 */
1696          col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1697          col0[GCOMP] = CC_SEL(cc, 99);
1698          col0[RCOMP] = CC_SEL(cc, 104);
1699          col0[ACOMP] = CC_SEL(cc, 119);
1700       } else {
1701          t = (cc[0] >> (t * 2)) & 3;
1702          /* col 0 */
1703          col0[BCOMP] = CC_SEL(cc, 64);
1704          col0[GCOMP] = CC_SEL(cc, 69);
1705          col0[RCOMP] = CC_SEL(cc, 74);
1706          col0[ACOMP] = CC_SEL(cc, 109);
1707       }
1708
1709       if (t == 0) {
1710          rgba[BCOMP] = UP5(col0[BCOMP]);
1711          rgba[GCOMP] = UP5(col0[GCOMP]);
1712          rgba[RCOMP] = UP5(col0[RCOMP]);
1713          rgba[ACOMP] = UP5(col0[ACOMP]);
1714       } else if (t == 3) {
1715          rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1716          rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1717          rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1718          rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1719       } else {
1720          rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1721          rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1722          rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1723          rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1724       }
1725    } else {
1726       /* lerp == 0 */
1727
1728       if (t & 16) {
1729          cc++;
1730          t &= 15;
1731       }
1732       t = (cc[0] >> (t * 2)) & 3;
1733
1734       if (t == 3) {
1735          ZERO_4UBV(rgba);
1736       } else {
1737          unsigned long kk;
1738          cc = (unsigned long *)code;
1739          rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1740          t *= 15;
1741          cc = (unsigned long *)(code + 8 + t / 8);
1742          kk = cc[0] >> (t & 7);
1743          rgba[BCOMP] = UP5(kk);
1744          rgba[GCOMP] = UP5(kk >> 5);
1745          rgba[RCOMP] = UP5(kk >> 10);
1746       }
1747    }
1748 }
1749
1750
1751 void
1752 fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1753                int i, int j, unsigned char *rgba)
1754 {
1755    static void (*decode_1[]) (unsigned char *, int, unsigned char *) = {
1756       fxt1_decode_1HI,     /* cc-high   = "00?" */
1757       fxt1_decode_1HI,     /* cc-high   = "00?" */
1758       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1759       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1760       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1761       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1762       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1763       fxt1_decode_1MIXED   /* mixed     = "1??" */
1764    };
1765
1766    unsigned char *code = (unsigned char *)texture +
1767                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1768    int mode = CC_SEL((unsigned long *)code, 125);
1769    int t = i & 7;
1770
1771    if (t & 4) {
1772       t += 12;
1773    }
1774    t += (j & 3) * 4;
1775
1776    decode_1[mode](code, t, rgba);
1777 }