src/mesa/main/texcompress_fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.1
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 /**
  27  * \file texcompress_fxt1.c
  28  * GL_EXT_texture_compression_fxt1 support.
  29  */
  30
  31
  32 #include "glheader.h"
  33 #include "imports.h"
  34 #include "colormac.h"
  35 #include "context.h"
  36 #include "convolve.h"
  37 #include "image.h"
  38 #include "texcompress.h"
  39 #include "texformat.h"
  40 #include "texstore.h"
  41
  42
  43 int
  44 fxt1_encode (GLcontext *ctx,
  45              unsigned int width, unsigned int height,
  46              int srcFormat,
  47              const void *source, int srcRowStride,
  48              void *dest, int destRowStride);
  49 void
  50 fxt1_decode_1 (const void *texture, int width,
  51                int i, int j, unsigned char *rgba);
  52
  53
  54 /**
  55  * Called during context initialization.
  56  */
  57 void
  58 _mesa_init_texture_fxt1( GLcontext *ctx )
  59 {
  60 }
  61
  62
  63 /**
  64  * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
  65  */
  66 static GLboolean
  67 texstore_rgb_fxt1(STORE_PARAMS)
  68 {
  69    const GLchan *pixels;
  70    GLint srcRowStride;
  71    GLubyte *dst;
  72    const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
  73    const GLchan *tempImage = NULL;
  74
  75    ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
  76    ASSERT(dstXoffset % 8 == 0);
  77    ASSERT(dstYoffset % 4 == 0);
  78    ASSERT(dstZoffset     == 0);
  79
  80    if (srcFormat != GL_RGB ||
  81        srcType != CHAN_TYPE ||
  82        ctx->_ImageTransferState ||
  83        srcPacking->SwapBytes) {
  84       /* convert image to RGB/GLchan */
  85       tempImage = _mesa_make_temp_chan_image(ctx, dims,
  86                                              baseInternalFormat,
  87                                              dstFormat->BaseFormat,
  88                                              srcWidth, srcHeight, srcDepth,
  89                                              srcFormat, srcType, srcAddr,
  90                                              srcPacking);
  91       if (!tempImage)
  92          return GL_FALSE; /* out of memory */
  93       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
  94       pixels = tempImage;
  95       srcRowStride = 3 * srcWidth;
  96       srcFormat = GL_RGB;
  97    }
  98    else {
  99       pixels = (const GLchan *) srcAddr;
 100       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 101                                             srcType) / sizeof(GLchan);
 102    }
 103
 104    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 105                                         GL_COMPRESSED_RGB_FXT1_3DFX,
 106                                         texWidth, (GLubyte *) dstAddr);
 107
 108    fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
 109                dst, dstRowStride);
 110
 111    if (tempImage)
 112       _mesa_free((void*) tempImage);
 113
 114    return GL_TRUE;
 115 }
 116
 117
 118 /**
 119  * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
 120  */
 121 static GLboolean
 122 texstore_rgba_fxt1(STORE_PARAMS)
 123 {
 124    const GLchan *pixels;
 125    GLint srcRowStride;
 126    GLubyte *dst;
 127    GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
 128    const GLchan *tempImage = NULL;
 129
 130    ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
 131    ASSERT(dstXoffset % 8 == 0);
 132    ASSERT(dstYoffset % 4 == 0);
 133    ASSERT(dstZoffset     == 0);
 134
 135    if (srcFormat != GL_RGBA ||
 136        srcType != CHAN_TYPE ||
 137        ctx->_ImageTransferState ||
 138        srcPacking->SwapBytes) {
 139       /* convert image to RGBA/GLchan */
 140       tempImage = _mesa_make_temp_chan_image(ctx, dims,
 141                                              baseInternalFormat,
 142                                              dstFormat->BaseFormat,
 143                                              srcWidth, srcHeight, srcDepth,
 144                                              srcFormat, srcType, srcAddr,
 145                                              srcPacking);
 146       if (!tempImage)
 147          return GL_FALSE; /* out of memory */
 148       _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
 149       pixels = tempImage;
 150       srcRowStride = 4 * srcWidth;
 151       srcFormat = GL_RGBA;
 152    }
 153    else {
 154       pixels = (const GLchan *) srcAddr;
 155       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
 156                                             srcType) / sizeof(GLchan);
 157    }
 158
 159    dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
 160                                         GL_COMPRESSED_RGBA_FXT1_3DFX,
 161                                         texWidth, (GLubyte *) dstAddr);
 162
 163    fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
 164                dst, dstRowStride);
 165
 166    if (tempImage)
 167       _mesa_free((void*) tempImage);
 168
 169    return GL_TRUE;
 170 }
 171
 172
 173 static void
 174 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
 175                           GLint i, GLint j, GLint k, GLchan *texel )
 176 {
 177    fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel);
 178 }
 179
 180
 181 static void
 182 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
 183                             GLint i, GLint j, GLint k, GLfloat *texel )
 184 {
 185    /* just sample as GLchan and convert to float here */
 186    GLchan rgba[4];
 187    fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba);
 188    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 189    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 190    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 191    texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
 192 }
 193
 194
 195 static void
 196 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
 197                          GLint i, GLint j, GLint k, GLchan *texel )
 198 {
 199    fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel);
 200    texel[ACOMP] = 255;
 201 }
 202
 203
 204 static void
 205 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
 206                            GLint i, GLint j, GLint k, GLfloat *texel )
 207 {
 208    /* just sample as GLchan and convert to float here */
 209    GLchan rgba[4];
 210    fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba);
 211    texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
 212    texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
 213    texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
 214    texel[ACOMP] = 1.0;
 215 }
 216
 217
 218
 219 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
 220    MESA_FORMAT_RGB_FXT1,                /* MesaFormat */
 221    GL_RGB,                              /* BaseFormat */
 222    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 223    4, /*approx*/                        /* RedBits */
 224    4, /*approx*/                        /* GreenBits */
 225    4, /*approx*/                        /* BlueBits */
 226    0,                                   /* AlphaBits */
 227    0,                                   /* LuminanceBits */
 228    0,                                   /* IntensityBits */
 229    0,                                   /* IndexBits */
 230    0,                                   /* DepthBits */
 231    0,                                   /* TexelBytes */
 232    texstore_rgb_fxt1,                   /* StoreTexImageFunc */
 233    NULL, /*impossible*/                 /* FetchTexel1D */
 234    fetch_texel_2d_rgb_fxt1,             /* FetchTexel2D */
 235    NULL, /*impossible*/                 /* FetchTexel3D */
 236    NULL, /*impossible*/                 /* FetchTexel1Df */
 237    fetch_texel_2d_f_rgb_fxt1,           /* FetchTexel2Df */
 238    NULL, /*impossible*/                 /* FetchTexel3Df */
 239 };
 240
 241 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
 242    MESA_FORMAT_RGBA_FXT1,               /* MesaFormat */
 243    GL_RGBA,                             /* BaseFormat */
 244    GL_UNSIGNED_NORMALIZED_ARB,          /* DataType */
 245    4, /*approx*/                        /* RedBits */
 246    4, /*approx*/                        /* GreenBits */
 247    4, /*approx*/                        /* BlueBits */
 248    1, /*approx*/                        /* AlphaBits */
 249    0,                                   /* LuminanceBits */
 250    0,                                   /* IntensityBits */
 251    0,                                   /* IndexBits */
 252    0,                                   /* DepthBits */
 253    0,                                   /* TexelBytes */
 254    texstore_rgba_fxt1,                  /* StoreTexImageFunc */
 255    NULL, /*impossible*/                 /* FetchTexel1D */
 256    fetch_texel_2d_rgba_fxt1,            /* FetchTexel2D */
 257    NULL, /*impossible*/                 /* FetchTexel3D */
 258    NULL, /*impossible*/                 /* FetchTexel1Df */
 259    fetch_texel_2d_f_rgba_fxt1,          /* FetchTexel2Df */
 260    NULL, /*impossible*/                 /* FetchTexel3Df */
 261 };
 262
 263
 264 /***************************************************************************\
 265  * FXT1 encoder
 266  *
 267  * The encoder was built by reversing the decoder,
 268  * and is vaguely based on Texus2 by 3dfx. Note that this code
 269  * is merely a proof of concept, since it is higly UNoptimized;
 270  * moreover, it is sub-optimal due to inital conditions passed
 271  * to Lloyd's algorithm (the interpolation modes are worse).
 272 \***************************************************************************/
 273
 274
 275 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
 276 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
 277 #define N_TEXELS 32 /* number of texels in a block (always 32) */
 278 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
 279 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
 280 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
 281 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
 282 #define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
 283
 284
 285 #ifdef __GNUC__
 286
 287 #define FX64_NATIVE 1
 288
 289 typedef unsigned long long Fx64;
 290
 291 #define FX64_MOV32(a, b) a = b;
 292 #define FX64_OR32(a, b)  a |= b;
 293 #define FX64_SHL(a, c)   a <<= c;
 294
 295 #else  /* !__GNUC__ */
 296
 297 #define FX64_NATIVE 0
 298
 299 typedef struct {
 300         unsigned long lo, hi;
 301 } Fx64;
 302
 303 #define FX64_MOV32(a, b) a.lo = b
 304 #define FX64_OR32(a, b)  a.lo |= b
 305
 306 #define FX64_SHL(a, c)                                 \
 307    do {                                                \
 308        if ((c) >= 32) {                                \
 309           a.hi = a.lo << ((c) - 32);                   \
 310           a.lo = 0;                                    \
 311        } else {                                        \
 312           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
 313           a.lo <<= (c);                                \
 314        }                                               \
 315    } while (0)
 316
 317 #endif /* !__GNUC__ */
 318
 319
 320 static int
 321 fxt1_bestcol (float vec[][MAX_COMP], int nv,
 322               unsigned char input[MAX_COMP], int nc)
 323 {
 324    int i, j, best = -1;
 325    float err = 1e9; /* big enough */
 326
 327    for (j = 0; j < nv; j++) {
 328       float e = 0;
 329       for (i = 0; i < nc; i++) {
 330          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
 331       }
 332       if (e < err) {
 333          err = e;
 334          best = j;
 335       }
 336    }
 337
 338    return best;
 339 }
 340
 341
 342 static int
 343 fxt1_worst (float vec[MAX_COMP],
 344             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 345 {
 346    int i, k, worst = -1;
 347    float err = -1; /* small enough */
 348
 349    for (k = 0; k < n; k++) {
 350       float e = 0;
 351       for (i = 0; i < nc; i++) {
 352          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
 353       }
 354       if (e > err) {
 355          err = e;
 356          worst = k;
 357       }
 358    }
 359
 360    return worst;
 361 }
 362
 363
 364 static int
 365 fxt1_variance (double variance[MAX_COMP],
 366                unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 367 {
 368    int i, k, best = 0;
 369    int sx, sx2;
 370    double var, maxvar = -1; /* small enough */
 371    double teenth = 1.0 / n;
 372
 373    for (i = 0; i < nc; i++) {
 374       sx = sx2 = 0;
 375       for (k = 0; k < n; k++) {
 376          int t = input[k][i];
 377          sx += t;
 378          sx2 += t * t;
 379       }
 380       var = sx2 * teenth - sx * sx * teenth * teenth;
 381       if (maxvar < var) {
 382          maxvar = var;
 383          best = i;
 384       }
 385       if (variance) {
 386          variance[i] = var;
 387       }
 388    }
 389
 390    return best;
 391 }
 392
 393
 394 static int
 395 fxt1_choose (float vec[][MAX_COMP], int nv,
 396             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 397 {
 398 #if 0
 399    /* Choose colors from a grid.
 400     */
 401    int i, j;
 402
 403    for (j = 0; j < nv; j++) {
 404       int m = j * (n - 1) / (nv - 1);
 405       for (i = 0; i < nc; i++) {
 406          vec[j][i] = input[m][i];
 407       }
 408    }
 409 #else
 410    /* Our solution here is to find the darkest and brightest colors in
 411     * the 8x4 tile and use those as the two representative colors.
 412     * There are probably better algorithms to use (histogram-based).
 413     */
 414    int i, j, k;
 415    int minSum = 1000; /* big enough */
 416    int maxSum = -1; /* small enough */
 417    int minCol = 0; /* phoudoin: silent compiler! */
 418    int maxCol = 0; /* phoudoin: silent compiler! */
 419
 420    struct {
 421       int flag;
 422       int key;
 423       int freq;
 424       int idx;
 425    } hist[N_TEXELS];
 426    int lenh = 0;
 427
 428    memset(hist, 0, sizeof(hist));
 429
 430    for (k = 0; k < n; k++) {
 431       int l;
 432       int key = 0;
 433       int sum = 0;
 434       for (i = 0; i < nc; i++) {
 435          key <<= 8;
 436          key |= input[k][i];
 437          sum += input[k][i];
 438       }
 439       for (l = 0; l < n; l++) {
 440          if (!hist[l].flag) {
 441             /* alloc new slot */
 442             hist[l].flag = !0;
 443             hist[l].key = key;
 444             hist[l].freq = 1;
 445             hist[l].idx = k;
 446             lenh = l + 1;
 447             break;
 448          } else if (hist[l].key == key) {
 449             hist[l].freq++;
 450             break;
 451          }
 452       }
 453       if (minSum > sum) {
 454          minSum = sum;
 455          minCol = k;
 456       }
 457       if (maxSum < sum) {
 458          maxSum = sum;
 459          maxCol = k;
 460       }
 461    }
 462
 463    if (lenh <= nv) {
 464       for (j = 0; j < lenh; j++) {
 465          for (i = 0; i < nc; i++) {
 466             vec[j][i] = (float)input[hist[j].idx][i];
 467          }
 468       }
 469       for (; j < nv; j++) {
 470          for (i = 0; i < nc; i++) {
 471             vec[j][i] = vec[0][i];
 472          }
 473       }
 474       return 0;
 475    }
 476
 477    for (j = 0; j < nv; j++) {
 478       for (i = 0; i < nc; i++) {
 479          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
 480       }
 481    }
 482 #endif
 483
 484    return !0;
 485 }
 486
 487
 488 static int
 489 fxt1_lloyd (float vec[][MAX_COMP], int nv,
 490             unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
 491 {
 492    /* Use the generalized lloyd's algorithm for VQ:
 493     *     find 4 color vectors.
 494     *
 495     *     for each sample color
 496     *         sort to nearest vector.
 497     *
 498     *     replace each vector with the centroid of it's matching colors.
 499     *
 500     *     repeat until RMS doesn't improve.
 501     *
 502     *     if a color vector has no samples, or becomes the same as another
 503     *     vector, replace it with the color which is farthest from a sample.
 504     *
 505     * vec[][MAX_COMP]           initial vectors and resulting colors
 506     * nv                        number of resulting colors required
 507     * input[N_TEXELS][MAX_COMP] input texels
 508     * nc                        number of components in input / vec
 509     * n                         number of input samples
 510     */
 511
 512    int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 513    int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 514    float error, lasterror = 1e9;
 515
 516    int i, j, k, rep;
 517
 518    /* the quantizer */
 519    for (rep = 0; rep < LL_N_REP; rep++) {
 520       /* reset sums & counters */
 521       for (j = 0; j < nv; j++) {
 522          for (i = 0; i < nc; i++) {
 523             sum[j][i] = 0;
 524          }
 525          cnt[j] = 0;
 526       }
 527       error = 0;
 528
 529       /* scan whole block */
 530       for (k = 0; k < n; k++) {
 531 #if 1
 532          int best = -1;
 533          float err = 1e9; /* big enough */
 534          /* determine best vector */
 535          for (j = 0; j < nv; j++) {
 536             float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 537                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 538                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 539             if (nc == 4) {
 540                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 541             }
 542             if (e < err) {
 543                err = e;
 544                best = j;
 545             }
 546          }
 547 #else
 548          int best = fxt1_bestcol(vec, n_vect, input[k], n_comp, &err);
 549 #endif
 550          /* add in closest color */
 551          for (i = 0; i < nc; i++) {
 552             sum[best][i] += input[k][i];
 553          }
 554          /* mark this vector as used */
 555          cnt[best]++;
 556          /* accumulate error */
 557          error += err;
 558       }
 559
 560       /* check RMS */
 561       if ((error < LL_RMS_E) ||
 562           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 563          return !0; /* good match */
 564       }
 565       lasterror = error;
 566
 567       /* move each vector to the barycenter of its closest colors */
 568       for (j = 0; j < nv; j++) {
 569          if (cnt[j]) {
 570             float div = 1.0 / cnt[j];
 571             for (i = 0; i < nc; i++) {
 572                vec[j][i] = div * sum[j][i];
 573             }
 574          } else {
 575             /* this vec has no samples or is identical with a previous vec */
 576             int worst = fxt1_worst(vec[j], input, nc, n);
 577             for (i = 0; i < nc; i++) {
 578                vec[j][i] = input[worst][i];
 579             }
 580          }
 581       }
 582    }
 583
 584    return 0; /* could not converge fast enough */
 585 }
 586
 587
 588 static void
 589 fxt1_quantize_CHROMA (unsigned long *cc,
 590                       unsigned char input[N_TEXELS][MAX_COMP])
 591 {
 592    const int n_vect = 4; /* 4 base vectors to find */
 593    const int n_comp = 3; /* 3 components: R, G, B */
 594    float vec[MAX_VECT][MAX_COMP];
 595    int i, j, k;
 596    Fx64 hi; /* high quadword */
 597    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 598
 599    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 600       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 601    }
 602
 603    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 604    for (j = n_vect - 1; j >= 0; j--) {
 605       for (i = 0; i < n_comp; i++) {
 606          /* add in colors */
 607          FX64_SHL(hi, 5);
 608          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 609       }
 610    }
 611    ((Fx64 *)cc)[1] = hi;
 612
 613    lohi = lolo = 0;
 614    /* right microtile */
 615    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 616       lohi <<= 2;
 617       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 618    }
 619    /* left microtile */
 620    for (; k >= 0; k--) {
 621       lolo <<= 2;
 622       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 623    }
 624    cc[1] = lohi;
 625    cc[0] = lolo;
 626 }
 627
 628
 629 static void
 630 fxt1_quantize_ALPHA0 (unsigned long *cc,
 631                       unsigned char input[N_TEXELS][MAX_COMP],
 632                       unsigned char reord[N_TEXELS][MAX_COMP], int n)
 633 {
 634    const int n_vect = 3; /* 3 base vectors to find */
 635    const int n_comp = 4; /* 4 components: R, G, B, A */
 636    float vec[MAX_VECT][MAX_COMP];
 637    int i, j, k;
 638    Fx64 hi; /* high quadword */
 639    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 640
 641    /* the last vector indicates zero */
 642    for (i = 0; i < n_comp; i++) {
 643       vec[n_vect][i] = 0;
 644    }
 645
 646    /* the first n texels in reord are guaranteed to be non-zero */
 647    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 648       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 649    }
 650
 651    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 652    for (j = n_vect - 1; j >= 0; j--) {
 653       /* add in alphas */
 654       FX64_SHL(hi, 5);
 655       FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
 656    }
 657    for (j = n_vect - 1; j >= 0; j--) {
 658       for (i = 0; i < n_comp - 1; i++) {
 659          /* add in colors */
 660          FX64_SHL(hi, 5);
 661          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 662       }
 663    }
 664    ((Fx64 *)cc)[1] = hi;
 665
 666    lohi = lolo = 0;
 667    /* right microtile */
 668    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 669       lohi <<= 2;
 670       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 671    }
 672    /* left microtile */
 673    for (; k >= 0; k--) {
 674       lolo <<= 2;
 675       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 676    }
 677    cc[1] = lohi;
 678    cc[0] = lolo;
 679 }
 680
 681
 682 static void
 683 fxt1_quantize_ALPHA1 (unsigned long *cc,
 684                       unsigned char input[N_TEXELS][MAX_COMP])
 685 {
 686    const int n_vect = 3; /* highest vector number in each microtile */
 687    const int n_comp = 4; /* 4 components: R, G, B, A */
 688    float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 689    float b, iv[MAX_COMP]; /* interpolation vector */
 690    int i, j, k;
 691    Fx64 hi; /* high quadword */
 692    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 693
 694    int minSum;
 695    int maxSum;
 696    int minColL = 0, maxColL = 0;
 697    int minColR = 0, maxColR = 0;
 698    int sumL = 0, sumR = 0;
 699
 700    /* Our solution here is to find the darkest and brightest colors in
 701     * the 4x4 tile and use those as the two representative colors.
 702     * There are probably better algorithms to use (histogram-based).
 703     */
 704    minSum = 1000; /* big enough */
 705    maxSum = -1; /* small enough */
 706    for (k = 0; k < N_TEXELS / 2; k++) {
 707       int sum = 0;
 708       for (i = 0; i < n_comp; i++) {
 709          sum += input[k][i];
 710       }
 711       if (minSum > sum) {
 712          minSum = sum;
 713          minColL = k;
 714       }
 715       if (maxSum < sum) {
 716          maxSum = sum;
 717          maxColL = k;
 718       }
 719       sumL += sum;
 720    }
 721    minSum = 1000; /* big enough */
 722    maxSum = -1; /* small enough */
 723    for (; k < N_TEXELS; k++) {
 724       int sum = 0;
 725       for (i = 0; i < n_comp; i++) {
 726          sum += input[k][i];
 727       }
 728       if (minSum > sum) {
 729          minSum = sum;
 730          minColR = k;
 731       }
 732       if (maxSum < sum) {
 733          maxSum = sum;
 734          maxColR = k;
 735       }
 736       sumR += sum;
 737    }
 738
 739    /* choose the common vector (yuck!) */
 740 {
 741    int j1, j2;
 742    int v1 = 0, v2 = 0;
 743    float err = 1e9; /* big enough */
 744    float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 745    for (i = 0; i < n_comp; i++) {
 746       tv[0][i] = input[minColL][i];
 747       tv[1][i] = input[maxColL][i];
 748       tv[2][i] = input[minColR][i];
 749       tv[3][i] = input[maxColR][i];
 750    }
 751    for (j1 = 0; j1 < 2; j1++) {
 752       for (j2 = 2; j2 < 4; j2++) {
 753           float e = 0;
 754           for (i = 0; i < n_comp; i++) {
 755              e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 756           }
 757           if (e < err) {
 758              err = e;
 759              v1 = j1;
 760              v2 = j2;
 761           }
 762       }
 763    }
 764    for (i = 0; i < n_comp; i++) {
 765       vec[0][i] = tv[1 - v1][i];
 766       vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 767       vec[2][i] = tv[5 - v2][i];
 768    }
 769 }
 770
 771    /* left microtile */
 772    cc[0] = 0;
 773    if (minColL != maxColL) {
 774       /* compute interpolation vector */
 775       float d2 = 0;
 776       float rd2;
 777
 778       for (i = 0; i < n_comp; i++) {
 779          iv[i] = vec[1][i] - vec[0][i];
 780          d2 += iv[i] * iv[i];
 781       }
 782       rd2 = (float)n_vect / d2;
 783       b = 0;
 784       for (i = 0; i < n_comp; i++) {
 785          b -= iv[i] * vec[0][i];
 786          iv[i] *= rd2;
 787       }
 788       b = b * rd2 + 0.5f;
 789
 790       /* add in texels */
 791       lolo = 0;
 792       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 793          int texel;
 794          /* interpolate color */
 795          float dot = 0;
 796          for (i = 0; i < n_comp; i++) {
 797             dot += input[k][i] * iv[i];
 798          }
 799          texel = (int)(dot + b);
 800          if (texel < 0) {
 801             texel = 0;
 802          } else if (texel > n_vect) {
 803             texel = n_vect;
 804          }
 805          /* add in texel */
 806          lolo <<= 2;
 807          lolo |= texel;
 808       }
 809
 810       cc[0] = lolo;
 811    }
 812
 813    /* right microtile */
 814    cc[1] = 0;
 815    if (minColR != maxColR) {
 816       /* compute interpolation vector */
 817       float d2 = 0;
 818       float rd2;
 819
 820       for (i = 0; i < n_comp; i++) {
 821          iv[i] = vec[1][i] - vec[2][i];
 822          d2 += iv[i] * iv[i];
 823       }
 824       rd2 = (float)n_vect / d2;
 825       b = 0;
 826       for (i = 0; i < n_comp; i++) {
 827          b -= iv[i] * vec[2][i];
 828          iv[i] *= rd2;
 829       }
 830       b = b * rd2 + 0.5f;
 831
 832       /* add in texels */
 833       lohi = 0;
 834       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 835          int texel;
 836          /* interpolate color */
 837          float dot = 0;
 838          for (i = 0; i < n_comp; i++) {
 839             dot += input[k][i] * iv[i];
 840          }
 841          texel = (int)(dot + b);
 842          if (texel < 0) {
 843             texel = 0;
 844          } else if (texel > n_vect) {
 845             texel = n_vect;
 846          }
 847          /* add in texel */
 848          lohi <<= 2;
 849          lohi |= texel;
 850       }
 851
 852       cc[1] = lohi;
 853    }
 854
 855    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 856    for (j = n_vect - 1; j >= 0; j--) {
 857       /* add in alphas */
 858       FX64_SHL(hi, 5);
 859       FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
 860    }
 861    for (j = n_vect - 1; j >= 0; j--) {
 862       for (i = 0; i < n_comp - 1; i++) {
 863          /* add in colors */
 864          FX64_SHL(hi, 5);
 865          FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
 866       }
 867    }
 868    ((Fx64 *)cc)[1] = hi;
 869 }
 870
 871
 872 static void
 873 fxt1_quantize_HI (unsigned long *cc,
 874                   unsigned char input[N_TEXELS][MAX_COMP],
 875                   unsigned char reord[N_TEXELS][MAX_COMP], int n)
 876 {
 877    const int n_vect = 6; /* highest vector number */
 878    const int n_comp = 3; /* 3 components: R, G, B */
 879    float b = 0.0;               /* phoudoin: silent compiler! */
 880    float iv[MAX_COMP]; /* interpolation vector */
 881    int i, k;
 882    unsigned long hihi; /* high quadword: hi dword */
 883
 884    int minSum = 1000; /* big enough */
 885    int maxSum = -1; /* small enough */
 886    int minCol = 0;      /* phoudoin: silent compiler! */
 887    int maxCol = 0;      /* phoudoin: silent compiler! */
 888
 889    /* Our solution here is to find the darkest and brightest colors in
 890     * the 8x4 tile and use those as the two representative colors.
 891     * There are probably better algorithms to use (histogram-based).
 892     */
 893    for (k = 0; k < n; k++) {
 894       int sum = 0;
 895       for (i = 0; i < n_comp; i++) {
 896          sum += reord[k][i];
 897       }
 898       if (minSum > sum) {
 899          minSum = sum;
 900          minCol = k;
 901       }
 902       if (maxSum < sum) {
 903          maxSum = sum;
 904          maxCol = k;
 905       }
 906    }
 907
 908    hihi = 0; /* cc-hi = "00" */
 909    for (i = 0; i < n_comp; i++) {
 910       /* add in colors */
 911       hihi <<= 5;
 912       hihi |= reord[maxCol][i] >> 3;
 913    }
 914    for (i = 0; i < n_comp; i++) {
 915       /* add in colors */
 916       hihi <<= 5;
 917       hihi |= reord[minCol][i] >> 3;
 918    }
 919    cc[3] = hihi;
 920    cc[0] = cc[1] = cc[2] = 0;
 921
 922    /* compute interpolation vector */
 923    if (minCol != maxCol) {
 924       float d2 = 0;
 925       float rd2;
 926
 927       for (i = 0; i < n_comp; i++) {
 928          iv[i] = reord[maxCol][i] - reord[minCol][i];
 929          d2 += iv[i] * iv[i];
 930       }
 931       rd2 = (float)n_vect / d2;
 932       b = 0;
 933       for (i = 0; i < n_comp; i++) {
 934          b -= iv[i] * reord[minCol][i];
 935          iv[i] *= rd2;
 936       }
 937       b = b * rd2 + 0.5f;
 938    }
 939
 940    /* add in texels */
 941    for (k = N_TEXELS - 1; k >= 0; k--) {
 942       int t = k * 3;
 943       unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
 944       int texel = n_vect + 1; /* transparent black */
 945
 946       if (!ISTBLACK(input[k])) {
 947          if (minCol != maxCol) {
 948             /* interpolate color */
 949             float dot = 0;
 950             for (i = 0; i < n_comp; i++) {
 951                dot += input[k][i] * iv[i];
 952             }
 953             texel = (int)(dot + b);
 954             if (texel < 0) {
 955                texel = 0;
 956             } else if (texel > n_vect) {
 957                texel = n_vect;
 958             }
 959             /* add in texel */
 960             kk[0] |= texel << (t & 7);
 961          }
 962       } else {
 963          /* add in texel */
 964          kk[0] |= texel << (t & 7);
 965       }
 966    }
 967 }
 968
 969
 970 static void
 971 fxt1_quantize_MIXED1 (unsigned long *cc,
 972                       unsigned char input[N_TEXELS][MAX_COMP])
 973 {
 974    const int n_vect = 2; /* highest vector number in each microtile */
 975    const int n_comp = 3; /* 3 components: R, G, B */
 976    unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 977    float b, iv[MAX_COMP]; /* interpolation vector */
 978    int i, j, k;
 979    Fx64 hi; /* high quadword */
 980    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
 981
 982    int minSum;
 983    int maxSum;
 984    int minColL = 0, maxColL = -1;
 985    int minColR = 0, maxColR = -1;
 986
 987    /* Our solution here is to find the darkest and brightest colors in
 988     * the 4x4 tile and use those as the two representative colors.
 989     * There are probably better algorithms to use (histogram-based).
 990     */
 991    minSum = 1000; /* big enough */
 992    maxSum = -1; /* small enough */
 993    for (k = 0; k < N_TEXELS / 2; k++) {
 994       if (!ISTBLACK(input[k])) {
 995          int sum = 0;
 996          for (i = 0; i < n_comp; i++) {
 997             sum += input[k][i];
 998          }
 999          if (minSum > sum) {
1000             minSum = sum;
1001             minColL = k;
1002          }
1003          if (maxSum < sum) {
1004             maxSum = sum;
1005             maxColL = k;
1006          }
1007       }
1008    }
1009    minSum = 1000; /* big enough */
1010    maxSum = -1; /* small enough */
1011    for (; k < N_TEXELS; k++) {
1012       if (!ISTBLACK(input[k])) {
1013          int sum = 0;
1014          for (i = 0; i < n_comp; i++) {
1015             sum += input[k][i];
1016          }
1017          if (minSum > sum) {
1018             minSum = sum;
1019             minColR = k;
1020          }
1021          if (maxSum < sum) {
1022             maxSum = sum;
1023             maxColR = k;
1024          }
1025       }
1026    }
1027
1028    /* left microtile */
1029    if (maxColL == -1) {
1030       /* all transparent black */
1031       cc[0] = -1;
1032       for (i = 0; i < n_comp; i++) {
1033          vec[0][i] = 0;
1034          vec[1][i] = 0;
1035       }
1036    } else {
1037       cc[0] = 0;
1038       for (i = 0; i < n_comp; i++) {
1039          vec[0][i] = input[minColL][i];
1040          vec[1][i] = input[maxColL][i];
1041       }
1042       if (minColL != maxColL) {
1043          /* compute interpolation vector */
1044          float d2 = 0;
1045          float rd2;
1046
1047          for (i = 0; i < n_comp; i++) {
1048             iv[i] = vec[1][i] - vec[0][i];
1049             d2 += iv[i] * iv[i];
1050          }
1051          rd2 = (float)n_vect / d2;
1052          b = 0;
1053          for (i = 0; i < n_comp; i++) {
1054             b -= iv[i] * vec[0][i];
1055             iv[i] *= rd2;
1056          }
1057          b = b * rd2 + 0.5f;
1058
1059          /* add in texels */
1060          lolo = 0;
1061          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1062             int texel = n_vect + 1; /* transparent black */
1063             if (!ISTBLACK(input[k])) {
1064                /* interpolate color */
1065                float dot = 0;
1066                for (i = 0; i < n_comp; i++) {
1067                   dot += input[k][i] * iv[i];
1068                }
1069                texel = (int)(dot + b);
1070                if (texel < 0) {
1071                   texel = 0;
1072                } else if (texel > n_vect) {
1073                   texel = n_vect;
1074                }
1075             }
1076             /* add in texel */
1077             lolo <<= 2;
1078             lolo |= texel;
1079          }
1080          cc[0] = lolo;
1081       }
1082    }
1083
1084    /* right microtile */
1085    if (maxColR == -1) {
1086       /* all transparent black */
1087       cc[1] = -1;
1088       for (i = 0; i < n_comp; i++) {
1089          vec[2][i] = 0;
1090          vec[3][i] = 0;
1091       }
1092    } else {
1093       cc[1] = 0;
1094       for (i = 0; i < n_comp; i++) {
1095          vec[2][i] = input[minColR][i];
1096          vec[3][i] = input[maxColR][i];
1097       }
1098       if (minColR != maxColR) {
1099          /* compute interpolation vector */
1100          float d2 = 0;
1101          float rd2;
1102
1103          for (i = 0; i < n_comp; i++) {
1104             iv[i] = vec[3][i] - vec[2][i];
1105             d2 += iv[i] * iv[i];
1106          }
1107          rd2 = (float)n_vect / d2;
1108          b = 0;
1109          for (i = 0; i < n_comp; i++) {
1110             b -= iv[i] * vec[2][i];
1111             iv[i] *= rd2;
1112          }
1113          b = b * rd2 + 0.5f;
1114
1115          /* add in texels */
1116          lohi = 0;
1117          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1118             int texel = n_vect + 1; /* transparent black */
1119             if (!ISTBLACK(input[k])) {
1120                /* interpolate color */
1121                float dot = 0;
1122                for (i = 0; i < n_comp; i++) {
1123                   dot += input[k][i] * iv[i];
1124                }
1125                texel = (int)(dot + b);
1126                if (texel < 0) {
1127                   texel = 0;
1128                } else if (texel > n_vect) {
1129                   texel = n_vect;
1130                }
1131             }
1132             /* add in texel */
1133             lohi <<= 2;
1134             lohi |= texel;
1135          }
1136          cc[1] = lohi;
1137       }
1138    }
1139
1140    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1141    for (j = 2 * 2 - 1; j >= 0; j--) {
1142       for (i = 0; i < n_comp; i++) {
1143          /* add in colors */
1144          FX64_SHL(hi, 5);
1145          FX64_OR32(hi, vec[j][i] >> 3);
1146       }
1147    }
1148    ((Fx64 *)cc)[1] = hi;
1149 }
1150
1151
1152 static void
1153 fxt1_quantize_MIXED0 (unsigned long *cc,
1154                       unsigned char input[N_TEXELS][MAX_COMP])
1155 {
1156    const int n_vect = 3; /* highest vector number in each microtile */
1157    const int n_comp = 3; /* 3 components: R, G, B */
1158    unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1159    float b, iv[MAX_COMP]; /* interpolation vector */
1160    int i, j, k;
1161    Fx64 hi; /* high quadword */
1162    unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1163
1164    int minColL = 0, maxColL = 0;
1165    int minColR = 0, maxColR = 0;
1166 #if 0
1167    int minSum;
1168    int maxSum;
1169
1170    /* Our solution here is to find the darkest and brightest colors in
1171     * the 4x4 tile and use those as the two representative colors.
1172     * There are probably better algorithms to use (histogram-based).
1173     */
1174    minSum = 1000; /* big enough */
1175    maxSum = -1; /* small enough */
1176    for (k = 0; k < N_TEXELS / 2; k++) {
1177       int sum = 0;
1178       for (i = 0; i < n_comp; i++) {
1179          sum += input[k][i];
1180       }
1181       if (minSum > sum) {
1182          minSum = sum;
1183          minColL = k;
1184       }
1185       if (maxSum < sum) {
1186          maxSum = sum;
1187          maxColL = k;
1188       }
1189    }
1190    minSum = 1000; /* big enough */
1191    maxSum = -1; /* small enough */
1192    for (; k < N_TEXELS; k++) {
1193       int sum = 0;
1194       for (i = 0; i < n_comp; i++) {
1195          sum += input[k][i];
1196       }
1197       if (minSum > sum) {
1198          minSum = sum;
1199          minColR = k;
1200       }
1201       if (maxSum < sum) {
1202          maxSum = sum;
1203          maxColR = k;
1204       }
1205    }
1206 #else
1207    int minVal;
1208    int maxVal;
1209    int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1210    int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1211
1212    /* Scan the channel with max variance for lo & hi
1213     * and use those as the two representative colors.
1214     */
1215    minVal = 1000; /* big enough */
1216    maxVal = -1; /* small enough */
1217    for (k = 0; k < N_TEXELS / 2; k++) {
1218       int t = input[k][maxVarL];
1219       if (minVal > t) {
1220          minVal = t;
1221          minColL = k;
1222       }
1223       if (maxVal < t) {
1224          maxVal = t;
1225          maxColL = k;
1226       }
1227    }
1228    minVal = 1000; /* big enough */
1229    maxVal = -1; /* small enough */
1230    for (; k < N_TEXELS; k++) {
1231       int t = input[k][maxVarR];
1232       if (minVal > t) {
1233          minVal = t;
1234          minColR = k;
1235       }
1236       if (maxVal < t) {
1237          maxVal = t;
1238          maxColR = k;
1239       }
1240    }
1241 #endif
1242
1243    /* left microtile */
1244    cc[0] = 0;
1245    for (i = 0; i < n_comp; i++) {
1246       vec[0][i] = input[minColL][i];
1247       vec[1][i] = input[maxColL][i];
1248    }
1249    if (minColL != maxColL) {
1250       /* compute interpolation vector */
1251       float d2 = 0;
1252       float rd2;
1253
1254       for (i = 0; i < n_comp; i++) {
1255          iv[i] = vec[1][i] - vec[0][i];
1256          d2 += iv[i] * iv[i];
1257       }
1258       rd2 = (float)n_vect / d2;
1259       b = 0;
1260       for (i = 0; i < n_comp; i++) {
1261          b -= iv[i] * vec[0][i];
1262          iv[i] *= rd2;
1263       }
1264       b = b * rd2 + 0.5f;
1265
1266       /* add in texels */
1267       lolo = 0;
1268       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1269          int texel;
1270          /* interpolate color */
1271          float dot = 0;
1272          for (i = 0; i < n_comp; i++) {
1273             dot += input[k][i] * iv[i];
1274          }
1275          texel = (int)(dot + b);
1276          if (texel < 0) {
1277             texel = 0;
1278          } else if (texel > n_vect) {
1279             texel = n_vect;
1280          }
1281          /* add in texel */
1282          lolo <<= 2;
1283          lolo |= texel;
1284       }
1285
1286       /* funky encoding for LSB of green */
1287       if (((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1288          for (i = 0; i < n_comp; i++) {
1289             vec[1][i] = input[minColL][i];
1290             vec[0][i] = input[maxColL][i];
1291          }
1292          lolo = ~lolo;
1293       }
1294
1295       cc[0] = lolo;
1296    }
1297
1298    /* right microtile */
1299    cc[1] = 0;
1300    for (i = 0; i < n_comp; i++) {
1301       vec[2][i] = input[minColR][i];
1302       vec[3][i] = input[maxColR][i];
1303    }
1304    if (minColR != maxColR) {
1305       /* compute interpolation vector */
1306       float d2 = 0;
1307       float rd2;
1308
1309       for (i = 0; i < n_comp; i++) {
1310          iv[i] = vec[3][i] - vec[2][i];
1311          d2 += iv[i] * iv[i];
1312       }
1313       rd2 = (float)n_vect / d2;
1314       b = 0;
1315       for (i = 0; i < n_comp; i++) {
1316          b -= iv[i] * vec[2][i];
1317          iv[i] *= rd2;
1318       }
1319       b = b * rd2 + 0.5f;
1320
1321       /* add in texels */
1322       lohi = 0;
1323       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1324          int texel;
1325          /* interpolate color */
1326          float dot = 0;
1327          for (i = 0; i < n_comp; i++) {
1328             dot += input[k][i] * iv[i];
1329          }
1330          texel = (int)(dot + b);
1331          if (texel < 0) {
1332             texel = 0;
1333          } else if (texel > n_vect) {
1334             texel = n_vect;
1335          }
1336          /* add in texel */
1337          lohi <<= 2;
1338          lohi |= texel;
1339       }
1340
1341       /* funky encoding for LSB of green */
1342       if (((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1343          for (i = 0; i < n_comp; i++) {
1344             vec[3][i] = input[minColR][i];
1345             vec[2][i] = input[maxColR][i];
1346          }
1347          lohi = ~lohi;
1348       }
1349
1350       cc[1] = lohi;
1351    }
1352
1353    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1354    for (j = 2 * 2 - 1; j >= 0; j--) {
1355       for (i = 0; i < n_comp; i++) {
1356          /* add in colors */
1357          FX64_SHL(hi, 5);
1358          FX64_OR32(hi, vec[j][i] >> 3);
1359       }
1360    }
1361    ((Fx64 *)cc)[1] = hi;
1362 }
1363
1364
1365 static void
1366 fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1367 {
1368    int trualpha;
1369    unsigned char reord[N_TEXELS][MAX_COMP];
1370
1371    unsigned char input[N_TEXELS][MAX_COMP];
1372    int i, k, l;
1373
1374    memset(input, -1, sizeof(input));
1375
1376    /* 8 texels each line */
1377    for (l = 0; l < 4; l++) {
1378       for (k = 0; k < 4; k++) {
1379          for (i = 0; i < comps; i++) {
1380             input[k + l * 4][i] = *lines[l]++;
1381          }
1382       }
1383       for (; k < 8; k++) {
1384          for (i = 0; i < comps; i++) {
1385             input[k + l * 4 + 12][i] = *lines[l]++;
1386          }
1387       }
1388    }
1389
1390    /* block looks like this:
1391     * 00, 01, 02, 03, 08, 09, 0a, 0b
1392     * 10, 11, 12, 13, 18, 19, 1a, 1b
1393     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1394     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1395     */
1396
1397    /* [dBorca]
1398     * stupidity flows forth from this
1399     */
1400    l = N_TEXELS;
1401    trualpha = 0;
1402    if (comps == 4) {
1403       /* skip all transparent black texels */
1404       l = 0;
1405       for (k = 0; k < N_TEXELS; k++) {
1406          /* test all components against 0 */
1407          if (!ISTBLACK(input[k])) {
1408             /* texel is not transparent black */
1409             COPY_4UBV(reord[l], input[k]);
1410             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1411                /* non-opaque texel */
1412                trualpha = !0;
1413             }
1414             l++;
1415          }
1416       }
1417    }
1418
1419 #if 0
1420    if (trualpha) {
1421       fxt1_quantize_ALPHA0(cc, input, reord, l);
1422    } else if (l == 0) {
1423       cc[0] = cc[1] = cc[2] = -1;
1424       cc[3] = 0;
1425    } else if (l < N_TEXELS) {
1426       fxt1_quantize_HI(cc, input, reord, l);
1427    } else {
1428       fxt1_quantize_CHROMA(cc, input);
1429    }
1430 #else
1431    if (trualpha) {
1432       fxt1_quantize_ALPHA1(cc, input);
1433    } else if (l == 0) {
1434       cc[0] = cc[1] = cc[2] = -1;
1435       cc[3] = 0;
1436    } else if (l < N_TEXELS) {
1437       fxt1_quantize_MIXED1(cc, input);
1438    } else {
1439       fxt1_quantize_MIXED0(cc, input);
1440    }
1441 #endif
1442 }
1443
1444
1445 int
1446 fxt1_encode (GLcontext *ctx,
1447              unsigned int width, unsigned int height,
1448              int srcFormat,
1449              const void *source, int srcRowStride,
1450              void *dest, int destRowStride)
1451 {
1452    const int comps = (srcFormat == GL_RGB) ? 3 : 4;
1453    unsigned int x, y;
1454    const unsigned char *data;
1455    unsigned long *encoded = dest;
1456    GLubyte *newSource = NULL;
1457
1458    /*
1459     * Rescale image if width is less than 8 or height is less than 4.
1460     */
1461    if (width < 8 || height < 4) {
1462       GLint newWidth = (width + 7) & ~7;
1463       GLint newHeight = (height + 3) & ~3;
1464       newSource = MALLOC(comps * newWidth * newHeight * sizeof(GLchan));
1465       _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1466                                comps, source, srcRowStride, newSource);
1467       source = newSource;
1468       width = newWidth;
1469       height = newHeight;
1470       srcRowStride = comps * newWidth;
1471    }
1472
1473    data = source;
1474    destRowStride = (destRowStride - width * 2) / 4;
1475    for (y = 0; y < height; y += 4) {
1476       unsigned int offs = 0 + (y + 0) * srcRowStride;
1477       for (x = 0; x < width; x += 8) {
1478          const unsigned char *lines[4];
1479          lines[0] = &data[offs];
1480          lines[1] = lines[0] + srcRowStride;
1481          lines[2] = lines[1] + srcRowStride;
1482          lines[3] = lines[2] + srcRowStride;
1483          offs += 8 * comps;
1484          fxt1_quantize(encoded, lines, comps);
1485          /* 128 bits per 8x4 block = 4bpp */
1486          encoded += 4;
1487       }
1488       encoded += destRowStride;
1489    }
1490
1491    if (newSource != NULL) {
1492       FREE(newSource);
1493    }
1494
1495    return 0;
1496 }
1497
1498
1499 /***************************************************************************\
1500  * FXT1 decoder
1501  *
1502  * The decoder is based on GL_3DFX_texture_compression_FXT1
1503  * specification and serves as a concept for the encoder.
1504 \***************************************************************************/
1505
1506
1507 /* lookup table for scaling 5 bit colors up to 8 bits */
1508 static unsigned char _rgb_scale_5[] = {
1509    0,   8,   16,  25,  33,  41,  49,  58,
1510    66,  74,  82,  90,  99,  107, 115, 123,
1511    132, 140, 148, 156, 165, 173, 181, 189,
1512    197, 206, 214, 222, 230, 239, 247, 255
1513 };
1514
1515 /* lookup table for scaling 6 bit colors up to 8 bits */
1516 static unsigned char _rgb_scale_6[] = {
1517    0,   4,   8,   12,  16,  20,  24,  28,
1518    32,  36,  40,  45,  49,  53,  57,  61,
1519    65,  69,  73,  77,  81,  85,  89,  93,
1520    97,  101, 105, 109, 113, 117, 121, 125,
1521    130, 134, 138, 142, 146, 150, 154, 158,
1522    162, 166, 170, 174, 178, 182, 186, 190,
1523    194, 198, 202, 206, 210, 215, 219, 223,
1524    227, 231, 235, 239, 243, 247, 251, 255
1525 };
1526
1527
1528 #define CC_SEL(cc, which) ((cc)[(which) / 32] >> ((which) & 31))
1529 #define UP5(c) _rgb_scale_5[(c) & 31]
1530 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1531 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1532 #define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1533
1534
1535 static void
1536 fxt1_decode_1HI (unsigned long code, int t, unsigned char *rgba)
1537 {
1538    const unsigned long *cc;
1539
1540    t *= 3;
1541    cc = (unsigned long *)(code + t / 8);
1542    t = (cc[0] >> (t & 7)) & 7;
1543
1544    if (t == 7) {
1545       ZERO_4UBV(rgba);
1546    } else {
1547       cc = (unsigned long *)(code + 12);
1548       if (t == 0) {
1549          rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1550          rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1551          rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1552       } else if (t == 6) {
1553          rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1554          rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1555          rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1556       } else {
1557          rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1558          rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1559          rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1560       }
1561       rgba[ACOMP] = 255;
1562    }
1563 }
1564
1565
1566 static void
1567 fxt1_decode_1CHROMA (unsigned long code, int t, unsigned char *rgba)
1568 {
1569    const unsigned long *cc;
1570    unsigned long kk;
1571
1572    cc = (unsigned long *)code;
1573    if (t & 16) {
1574       cc++;
1575       t &= 15;
1576    }
1577    t = (cc[0] >> (t * 2)) & 3;
1578
1579    t *= 15;
1580    cc = (unsigned long *)(code + 8 + t / 8);
1581    kk = cc[0] >> (t & 7);
1582    rgba[BCOMP] = UP5(kk);
1583    rgba[GCOMP] = UP5(kk >> 5);
1584    rgba[RCOMP] = UP5(kk >> 10);
1585    rgba[ACOMP] = 255;
1586 }
1587
1588
1589 static void
1590 fxt1_decode_1MIXED (unsigned long code, int t, unsigned char *rgba)
1591 {
1592    const unsigned long *cc;
1593    unsigned int col[2][3];
1594    int glsb, selb;
1595
1596    cc = (unsigned long *)code;
1597    if (t & 16) {
1598       t &= 15;
1599       t = (cc[1] >> (t * 2)) & 3;
1600       /* col 2 */
1601       col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1602       col[0][GCOMP] = CC_SEL(cc, 99);
1603       col[0][RCOMP] = CC_SEL(cc, 104);
1604       /* col 3 */
1605       col[1][BCOMP] = CC_SEL(cc, 109);
1606       col[1][GCOMP] = CC_SEL(cc, 114);
1607       col[1][RCOMP] = CC_SEL(cc, 119);
1608       glsb = CC_SEL(cc, 126);
1609       selb = CC_SEL(cc, 33);
1610    } else {
1611       t = (cc[0] >> (t * 2)) & 3;
1612       /* col 0 */
1613       col[0][BCOMP] = CC_SEL(cc, 64);
1614       col[0][GCOMP] = CC_SEL(cc, 69);
1615       col[0][RCOMP] = CC_SEL(cc, 74);
1616       /* col 1 */
1617       col[1][BCOMP] = CC_SEL(cc, 79);
1618       col[1][GCOMP] = CC_SEL(cc, 84);
1619       col[1][RCOMP] = CC_SEL(cc, 89);
1620       glsb = CC_SEL(cc, 125);
1621       selb = CC_SEL(cc, 1);
1622    }
1623
1624    if (CC_SEL(cc, 124) & 1) {
1625       /* alpha[0] == 1 */
1626
1627       if (t == 3) {
1628          ZERO_4UBV(rgba);
1629       } else {
1630          if (t == 0) {
1631             rgba[BCOMP] = UP5(col[0][BCOMP]);
1632             rgba[GCOMP] = UP5(col[0][GCOMP]);
1633             rgba[RCOMP] = UP5(col[0][RCOMP]);
1634          } else if (t == 2) {
1635             rgba[BCOMP] = UP5(col[1][BCOMP]);
1636             rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1637             rgba[RCOMP] = UP5(col[1][RCOMP]);
1638          } else {
1639             rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1640             rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1641             rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1642          }
1643          rgba[ACOMP] = 255;
1644       }
1645    } else {
1646       /* alpha[0] == 0 */
1647
1648       if (t == 0) {
1649          rgba[BCOMP] = UP5(col[0][BCOMP]);
1650          rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1651          rgba[RCOMP] = UP5(col[0][RCOMP]);
1652       } else if (t == 3) {
1653          rgba[BCOMP] = UP5(col[1][BCOMP]);
1654          rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1655          rgba[RCOMP] = UP5(col[1][RCOMP]);
1656       } else {
1657          rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1658          rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1659                                   UP6(col[1][GCOMP], glsb));
1660          rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1661       }
1662       rgba[ACOMP] = 255;
1663    }
1664 }
1665
1666
1667 static void
1668 fxt1_decode_1ALPHA (unsigned long code, int t, unsigned char *rgba)
1669 {
1670    const unsigned long *cc;
1671
1672    cc = (unsigned long *)code;
1673    if (CC_SEL(cc, 124) & 1) {
1674       /* lerp == 1 */
1675       unsigned int col0[4];
1676
1677       if (t & 16) {
1678          t &= 15;
1679          t = (cc[1] >> (t * 2)) & 3;
1680          /* col 2 */
1681          col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1682          col0[GCOMP] = CC_SEL(cc, 99);
1683          col0[RCOMP] = CC_SEL(cc, 104);
1684          col0[ACOMP] = CC_SEL(cc, 119);
1685       } else {
1686          t = (cc[0] >> (t * 2)) & 3;
1687          /* col 0 */
1688          col0[BCOMP] = CC_SEL(cc, 64);
1689          col0[GCOMP] = CC_SEL(cc, 69);
1690          col0[RCOMP] = CC_SEL(cc, 74);
1691          col0[ACOMP] = CC_SEL(cc, 109);
1692       }
1693
1694       if (t == 0) {
1695          rgba[BCOMP] = UP5(col0[BCOMP]);
1696          rgba[GCOMP] = UP5(col0[GCOMP]);
1697          rgba[RCOMP] = UP5(col0[RCOMP]);
1698          rgba[ACOMP] = UP5(col0[ACOMP]);
1699       } else if (t == 3) {
1700          rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1701          rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1702          rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1703          rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1704       } else {
1705          rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1706          rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1707          rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1708          rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1709       }
1710    } else {
1711       /* lerp == 0 */
1712
1713       if (t & 16) {
1714          cc++;
1715          t &= 15;
1716       }
1717       t = (cc[0] >> (t * 2)) & 3;
1718
1719       if (t == 3) {
1720          ZERO_4UBV(rgba);
1721       } else {
1722          unsigned long kk;
1723          cc = (unsigned long *)code;
1724          rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1725          t *= 15;
1726          cc = (unsigned long *)(code + 8 + t / 8);
1727          kk = cc[0] >> (t & 7);
1728          rgba[BCOMP] = UP5(kk);
1729          rgba[GCOMP] = UP5(kk >> 5);
1730          rgba[RCOMP] = UP5(kk >> 10);
1731       }
1732    }
1733 }
1734
1735
1736 void
1737 fxt1_decode_1 (const void *texture, int width,
1738                int i, int j, unsigned char *rgba)
1739 {
1740    static void (*decode_1[]) (unsigned long, int, unsigned char *) = {
1741       fxt1_decode_1HI,     /* cc-high   = "00?" */
1742       fxt1_decode_1HI,     /* cc-high   = "00?" */
1743       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1744       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1745       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1746       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1747       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1748       fxt1_decode_1MIXED   /* mixed     = "1??" */
1749    };
1750
1751    unsigned long code = (unsigned long)texture +
1752                         ((j / 4) * (width / 8) + (i / 8)) * 16;
1753    int mode = CC_SEL((unsigned long *)code, 125);
1754    int t = i & 7;
1755
1756    if (t & 4) {
1757       t += 12;
1758    }
1759    t += (j & 3) * 4;
1760
1761    decode_1[mode](code, t, rgba);
1762 }