src/mesa/main/texcompress_s3tc_tmp.h

   1 /*
   2  * libtxc_dxtn
   3  * Version:  1.0
   4  *
   5  * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #ifdef __APPLE__
  26 #include <OpenGL/gl.h>
  27 #else
  28 #include <GL/gl.h>
  29 #endif
  30
  31 typedef GLubyte GLchan;
  32 #define UBYTE_TO_CHAN(b)  (b)
  33 #define CHAN_MAX 255
  34 #define RCOMP 0
  35 #define GCOMP 1
  36 #define BCOMP 2
  37 #define ACOMP 3
  38
  39 void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
  40                              GLint i, GLint j, GLvoid *texel);
  41 void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
  42                              GLint i, GLint j, GLvoid *texel);
  43 void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
  44                              GLint i, GLint j, GLvoid *texel);
  45 void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
  46                              GLint i, GLint j, GLvoid *texel);
  47
  48 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height,
  49                       const GLubyte *srcPixData, GLenum destformat,
  50                       GLubyte *dest, GLint dstRowStride);
  51
  52 #define EXP5TO8R(packedcol)                                     \
  53    ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
  54
  55 #define EXP6TO8G(packedcol)                                     \
  56    ((((packedcol) >> 3) & 0xfc) | (((packedcol) >>  9) & 0x3))
  57
  58 #define EXP5TO8B(packedcol)                                     \
  59    ((((packedcol) << 3) & 0xf8) | (((packedcol) >>  2) & 0x7))
  60
  61 #define EXP4TO8(col)                                            \
  62    ((col) | ((col) << 4))
  63
  64 /* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
  65
  66 static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
  67                          GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
  68    GLchan *rgba = (GLchan *) texel;
  69    const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
  70    const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
  71    const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
  72       (img_block_src[6] << 16) | (img_block_src[7] << 24);
  73    /* What about big/little endian? */
  74    GLubyte bit_pos = 2 * (j * 4 + i) ;
  75    GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
  76
  77    rgba[ACOMP] = CHAN_MAX;
  78    switch (code) {
  79    case 0:
  80       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
  81       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
  82       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
  83       break;
  84    case 1:
  85       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
  86       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
  87       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
  88       break;
  89    case 2:
  90       if ((dxt_type > 1) || (color0 > color1)) {
  91          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
  92          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
  93          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
  94       }
  95       else {
  96          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
  97          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
  98          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
  99       }
 100       break;
 101    case 3:
 102       if ((dxt_type > 1) || (color0 > color1)) {
 103          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
 104          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
 105          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
 106       }
 107       else {
 108          rgba[RCOMP] = 0;
 109          rgba[GCOMP] = 0;
 110          rgba[BCOMP] = 0;
 111          if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
 112       }
 113       break;
 114    default:
 115    /* CANNOT happen (I hope) */
 116       break;
 117    }
 118 }
 119
 120
 121 void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
 122                          GLint i, GLint j, GLvoid *texel)
 123 {
 124    /* Extract the (i,j) pixel from pixdata and return it
 125     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
 126     */
 127
 128    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
 129    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
 130 }
 131
 132
 133 void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
 134                          GLint i, GLint j, GLvoid *texel)
 135 {
 136    /* Extract the (i,j) pixel from pixdata and return it
 137     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
 138     */
 139
 140    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
 141    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
 142 }
 143
 144 void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
 145                          GLint i, GLint j, GLvoid *texel) {
 146
 147    /* Extract the (i,j) pixel from pixdata and return it
 148     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
 149     */
 150
 151    GLchan *rgba = (GLchan *) texel;
 152    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
 153 #if 0
 154    /* Simple 32bit version. */
 155 /* that's pretty brain-dead for a single pixel, isn't it? */
 156    const GLubyte bit_pos = 4 * ((j&3) * 4 + (i&3));
 157    const GLuint alpha_low = blksrc[0] | (blksrc[1] << 8) | (blksrc[2] << 16) | (blksrc[3] << 24);
 158    const GLuint alpha_high = blksrc[4] | (blksrc[5] << 8) | (blksrc[6] << 16) | (blksrc[7] << 24);
 159
 160    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
 161    if (bit_pos < 32)
 162       rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_low >> bit_pos) & 15)) );
 163    else
 164       rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_high >> (bit_pos - 32)) & 15)) );
 165 #endif
 166 #if 1
 167 /* TODO test this! */
 168    const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
 169    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
 170    rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
 171 #endif
 172
 173 }
 174
 175 void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
 176                          GLint i, GLint j, GLvoid *texel) {
 177
 178    /* Extract the (i,j) pixel from pixdata and return it
 179     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
 180     */
 181
 182    GLchan *rgba = (GLchan *) texel;
 183    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
 184    const GLubyte alpha0 = blksrc[0];
 185    const GLubyte alpha1 = blksrc[1];
 186 #if 0
 187    const GLubyte bit_pos = 3 * ((j&3) * 4 + (i&3));
 188    /* simple 32bit version */
 189    const GLuint bits_low = blksrc[2] | (blksrc[3] << 8) | (blksrc[4] << 16) | (blksrc[5] << 24);
 190    const GLuint bits_high = blksrc[6] | (blksrc[7] << 8);
 191    GLubyte code;
 192
 193    if (bit_pos < 30)
 194       code = (GLubyte) ((bits_low >> bit_pos) & 7);
 195    else if (bit_pos == 30)
 196       code = (GLubyte) ((bits_low >> 30) & 3) | ((bits_high << 2) & 4);
 197    else
 198       code = (GLubyte) ((bits_high >> (bit_pos - 32)) & 7);
 199 #endif
 200 #if 1
 201 /* TODO test this! */
 202    const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
 203    const GLubyte acodelow = blksrc[2 + bit_pos / 8];
 204    const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
 205    const GLubyte code = (acodelow >> (bit_pos & 0x7) |
 206       (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
 207 #endif
 208    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
 209 #if 0
 210    if (alpha0 > alpha1) {
 211       switch (code) {
 212       case 0:
 213          rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
 214          break;
 215       case 1:
 216          rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
 217          break;
 218       case 2:
 219       case 3:
 220       case 4:
 221       case 5:
 222       case 6:
 223       case 7:
 224          rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
 225          break;
 226       }
 227    }
 228    else {
 229       switch (code) {
 230       case 0:
 231          rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
 232          break;
 233       case 1:
 234          rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
 235          break;
 236       case 2:
 237       case 3:
 238       case 4:
 239       case 5:
 240          rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
 241          break;
 242       case 6:
 243          rgba[ACOMP] = 0;
 244          break;
 245       case 7:
 246          rgba[ACOMP] = CHAN_MAX;
 247          break;
 248       }
 249    }
 250 #endif
 251 /* not sure. Which version is faster? */
 252 #if 1
 253 /* TODO test this */
 254    if (code == 0)
 255       rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
 256    else if (code == 1)
 257       rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
 258    else if (alpha0 > alpha1)
 259       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
 260    else if (code < 6)
 261       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
 262    else if (code == 6)
 263       rgba[ACOMP] = 0;
 264    else
 265       rgba[ACOMP] = CHAN_MAX;
 266 #endif
 267 }
 268
 269
 270 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
 271    not sure if this really reflects visual perception */
 272 #define REDWEIGHT 4
 273 #define GREENWEIGHT 16
 274 #define BLUEWEIGHT 1
 275
 276 #define ALPHACUT 127
 277
 278 static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
 279                            GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha)
 280 {
 281    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
 282
 283    /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
 284       if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
 285       due to their alpha value will influence the result */
 286    GLint i, j, colors, z;
 287    GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
 288    GLint colordist, blockerrlin[2][3];
 289    GLubyte nrcolor[2];
 290    GLint pixerrorcolorbest[3];
 291    GLubyte enc = 0;
 292    GLubyte cv[4][4];
 293    GLubyte testcolor[2][3];
 294
 295 /*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
 296       bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
 297    if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
 298       ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
 299       testcolor[0][0] = bestcolor[0][0];
 300       testcolor[0][1] = bestcolor[0][1];
 301       testcolor[0][2] = bestcolor[0][2];
 302       testcolor[1][0] = bestcolor[1][0];
 303       testcolor[1][1] = bestcolor[1][1];
 304       testcolor[1][2] = bestcolor[1][2];
 305    }
 306    else {
 307       testcolor[1][0] = bestcolor[0][0];
 308       testcolor[1][1] = bestcolor[0][1];
 309       testcolor[1][2] = bestcolor[0][2];
 310       testcolor[0][0] = bestcolor[1][0];
 311       testcolor[0][1] = bestcolor[1][1];
 312       testcolor[0][2] = bestcolor[1][2];
 313    }
 314
 315    for (i = 0; i < 3; i ++) {
 316       cv[0][i] = testcolor[0][i];
 317       cv[1][i] = testcolor[1][i];
 318       cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
 319       cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
 320    }
 321
 322    blockerrlin[0][0] = 0;
 323    blockerrlin[0][1] = 0;
 324    blockerrlin[0][2] = 0;
 325    blockerrlin[1][0] = 0;
 326    blockerrlin[1][1] = 0;
 327    blockerrlin[1][2] = 0;
 328
 329    nrcolor[0] = 0;
 330    nrcolor[1] = 0;
 331
 332    for (j = 0; j < numypixels; j++) {
 333       for (i = 0; i < numxpixels; i++) {
 334          pixerrorbest = 0xffffffff;
 335          for (colors = 0; colors < 4; colors++) {
 336             colordist = srccolors[j][i][0] - (cv[colors][0]);
 337             pixerror = colordist * colordist * REDWEIGHT;
 338             pixerrorred = colordist;
 339             colordist = srccolors[j][i][1] - (cv[colors][1]);
 340             pixerror += colordist * colordist * GREENWEIGHT;
 341             pixerrorgreen = colordist;
 342             colordist = srccolors[j][i][2] - (cv[colors][2]);
 343             pixerror += colordist * colordist * BLUEWEIGHT;
 344             pixerrorblue = colordist;
 345             if (pixerror < pixerrorbest) {
 346                enc = colors;
 347                pixerrorbest = pixerror;
 348                pixerrorcolorbest[0] = pixerrorred;
 349                pixerrorcolorbest[1] = pixerrorgreen;
 350                pixerrorcolorbest[2] = pixerrorblue;
 351             }
 352          }
 353          if (enc == 0) {
 354             for (z = 0; z < 3; z++) {
 355                blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
 356             }
 357             nrcolor[0] += 3;
 358          }
 359          else if (enc == 2) {
 360             for (z = 0; z < 3; z++) {
 361                blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
 362             }
 363             nrcolor[0] += 2;
 364             for (z = 0; z < 3; z++) {
 365                blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
 366             }
 367             nrcolor[1] += 1;
 368          }
 369          else if (enc == 3) {
 370             for (z = 0; z < 3; z++) {
 371                blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
 372             }
 373             nrcolor[0] += 1;
 374             for (z = 0; z < 3; z++) {
 375                blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
 376             }
 377             nrcolor[1] += 2;
 378          }
 379          else if (enc == 1) {
 380             for (z = 0; z < 3; z++) {
 381                blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
 382             }
 383             nrcolor[1] += 3;
 384          }
 385       }
 386    }
 387    if (nrcolor[0] == 0) nrcolor[0] = 1;
 388    if (nrcolor[1] == 0) nrcolor[1] = 1;
 389    for (j = 0; j < 2; j++) {
 390       for (i = 0; i < 3; i++) {
 391          GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
 392          if (newvalue <= 0)
 393             testcolor[j][i] = 0;
 394          else if (newvalue >= 255)
 395             testcolor[j][i] = 255;
 396          else testcolor[j][i] = newvalue;
 397       }
 398    }
 399
 400    if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
 401        (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
 402        (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
 403        /* both colors are so close they might get encoded as the same 16bit values */
 404       GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
 405
 406       coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
 407       coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
 408       coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
 409       coldiffmax = coldiffred;
 410       if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
 411       if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
 412       if (coldiffmax > 0) {
 413          if (coldiffmax > 4) factor = 2;
 414          else if (coldiffmax > 2) factor = 3;
 415          else factor = 4;
 416          /* Won't do much if the color value is near 255... */
 417          /* argh so many ifs */
 418          if (testcolor[1][1] >= testcolor[0][1]) {
 419             ind1 = 1; ind0 = 0;
 420          }
 421          else {
 422             ind1 = 0; ind0 = 1;
 423          }
 424          if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
 425             testcolor[ind1][1] += factor * coldiffgreen;
 426          else testcolor[ind1][1] = 255;
 427          if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
 428             if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
 429                testcolor[ind1][0] += factor * coldiffred;
 430             else testcolor[ind1][0] = 255;
 431          }
 432          else {
 433             if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
 434                testcolor[ind0][0] += factor * coldiffred;
 435             else testcolor[ind0][0] = 255;
 436          }
 437          if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
 438             if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
 439                testcolor[ind1][2] += factor * coldiffblue;
 440             else testcolor[ind1][2] = 255;
 441          }
 442          else {
 443             if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
 444                testcolor[ind0][2] += factor * coldiffblue;
 445             else testcolor[ind0][2] = 255;
 446          }
 447       }
 448    }
 449
 450    if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
 451       ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
 452       for (i = 0; i < 3; i++) {
 453          bestcolor[0][i] = testcolor[0][i];
 454          bestcolor[1][i] = testcolor[1][i];
 455       }
 456    }
 457    else {
 458       for (i = 0; i < 3; i++) {
 459          bestcolor[0][i] = testcolor[1][i];
 460          bestcolor[1][i] = testcolor[0][i];
 461       }
 462    }
 463
 464 /*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
 465      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
 466 }
 467
 468
 469
 470 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
 471                            GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
 472 {
 473    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
 474
 475    GLint i, j, colors;
 476    GLuint testerror, testerror2, pixerror, pixerrorbest;
 477    GLint colordist;
 478    GLushort color0, color1, tempcolor;
 479    GLuint bits = 0, bits2 = 0;
 480    GLubyte *colorptr;
 481    GLubyte enc = 0;
 482    GLubyte cv[4][4];
 483
 484    bestcolor[0][0] = bestcolor[0][0] & 0xf8;
 485    bestcolor[0][1] = bestcolor[0][1] & 0xfc;
 486    bestcolor[0][2] = bestcolor[0][2] & 0xf8;
 487    bestcolor[1][0] = bestcolor[1][0] & 0xf8;
 488    bestcolor[1][1] = bestcolor[1][1] & 0xfc;
 489    bestcolor[1][2] = bestcolor[1][2] & 0xf8;
 490
 491    color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
 492    color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
 493    if (color0 < color1) {
 494       tempcolor = color0; color0 = color1; color1 = tempcolor;
 495       colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
 496    }
 497
 498
 499    for (i = 0; i < 3; i++) {
 500       cv[0][i] = bestcolor[0][i];
 501       cv[1][i] = bestcolor[1][i];
 502       cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
 503       cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
 504    }
 505
 506    testerror = 0;
 507    for (j = 0; j < numypixels; j++) {
 508       for (i = 0; i < numxpixels; i++) {
 509          pixerrorbest = 0xffffffff;
 510          for (colors = 0; colors < 4; colors++) {
 511             colordist = srccolors[j][i][0] - cv[colors][0];
 512             pixerror = colordist * colordist * REDWEIGHT;
 513             colordist = srccolors[j][i][1] - cv[colors][1];
 514             pixerror += colordist * colordist * GREENWEIGHT;
 515             colordist = srccolors[j][i][2] - cv[colors][2];
 516             pixerror += colordist * colordist * BLUEWEIGHT;
 517             if (pixerror < pixerrorbest) {
 518                pixerrorbest = pixerror;
 519                enc = colors;
 520             }
 521          }
 522          testerror += pixerrorbest;
 523          bits |= enc << (2 * (j * 4 + i));
 524       }
 525    }
 526    /* some hw might disagree but actually decoding should always use 4-color encoding
 527       for non-dxt1 formats */
 528    if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
 529       for (i = 0; i < 3; i++) {
 530          cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
 531          /* this isn't used. Looks like the black color constant can only be used
 532             with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
 533             it will decode 3 to black even with DXT3/5), and due to how the color searching works
 534             it won't get used even then */
 535          cv[3][i] = 0;
 536       }
 537       testerror2 = 0;
 538       for (j = 0; j < numypixels; j++) {
 539          for (i = 0; i < numxpixels; i++) {
 540             pixerrorbest = 0xffffffff;
 541             if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
 542                enc = 3;
 543                pixerrorbest = 0; /* don't calculate error */
 544             }
 545             else {
 546                /* we're calculating the same what we have done already for colors 0-1 above... */
 547                for (colors = 0; colors < 3; colors++) {
 548                   colordist = srccolors[j][i][0] - cv[colors][0];
 549                   pixerror = colordist * colordist * REDWEIGHT;
 550                   colordist = srccolors[j][i][1] - cv[colors][1];
 551                   pixerror += colordist * colordist * GREENWEIGHT;
 552                   colordist = srccolors[j][i][2] - cv[colors][2];
 553                   pixerror += colordist * colordist * BLUEWEIGHT;
 554                   if (pixerror < pixerrorbest) {
 555                      pixerrorbest = pixerror;
 556                      /* need to exchange colors later */
 557                      if (colors > 1) enc = colors;
 558                      else enc = colors ^ 1;
 559                   }
 560                }
 561             }
 562             testerror2 += pixerrorbest;
 563             bits2 |= enc << (2 * (j * 4 + i));
 564          }
 565       }
 566    } else {
 567       testerror2 = 0xffffffff;
 568    }
 569
 570    /* finally we're finished, write back colors and bits */
 571    if ((testerror > testerror2) || (haveAlpha)) {
 572       *blkaddr++ = color1 & 0xff;
 573       *blkaddr++ = color1 >> 8;
 574       *blkaddr++ = color0 & 0xff;
 575       *blkaddr++ = color0 >> 8;
 576       *blkaddr++ = bits2 & 0xff;
 577       *blkaddr++ = ( bits2 >> 8) & 0xff;
 578       *blkaddr++ = ( bits2 >> 16) & 0xff;
 579       *blkaddr = bits2 >> 24;
 580    }
 581    else {
 582       *blkaddr++ = color0 & 0xff;
 583       *blkaddr++ = color0 >> 8;
 584       *blkaddr++ = color1 & 0xff;
 585       *blkaddr++ = color1 >> 8;
 586       *blkaddr++ = bits & 0xff;
 587       *blkaddr++ = ( bits >> 8) & 0xff;
 588       *blkaddr++ = ( bits >> 16) & 0xff;
 589       *blkaddr = bits >> 24;
 590    }
 591 }
 592
 593 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
 594                          GLint numxpixels, GLint numypixels, GLuint type )
 595 {
 596 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
 597    present in the picture as base colors */
 598
 599    /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
 600       vectors are weighted similar to their importance in rgb-luminance conversion
 601       doesn't work too well though...
 602       This seems to be a rather difficult problem */
 603
 604    GLubyte *bestcolor[2];
 605    GLubyte basecolors[2][3];
 606    GLubyte i, j;
 607    GLuint lowcv, highcv, testcv;
 608    GLboolean haveAlpha = GL_FALSE;
 609
 610    lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
 611                           srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
 612                           srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
 613    bestcolor[0] = bestcolor[1] = srccolors[0][0];
 614    for (j = 0; j < numypixels; j++) {
 615       for (i = 0; i < numxpixels; i++) {
 616          /* don't use this as a base color if the pixel will get black/transparent anyway */
 617          if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
 618             testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
 619                      srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
 620                      srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
 621             if (testcv > highcv) {
 622                highcv = testcv;
 623                bestcolor[1] = srccolors[j][i];
 624             }
 625             else if (testcv < lowcv) {
 626                lowcv = testcv;
 627                bestcolor[0] = srccolors[j][i];
 628             }
 629          }
 630          else haveAlpha = GL_TRUE;
 631       }
 632    }
 633    /* make sure the original color values won't get touched... */
 634    for (j = 0; j < 2; j++) {
 635       for (i = 0; i < 3; i++) {
 636          basecolors[j][i] = bestcolor[j][i];
 637       }
 638    }
 639    bestcolor[0] = basecolors[0];
 640    bestcolor[1] = basecolors[1];
 641
 642    /* try to find better base colors */
 643    fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
 644    /* find the best encoding for these colors, and store the result */
 645    storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
 646 }
 647
 648 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
 649                          GLubyte alphaenc[16])
 650 {
 651    *blkaddr++ = alphabase1;
 652    *blkaddr++ = alphabase2;
 653    *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
 654    *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
 655    *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
 656    *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
 657    *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
 658    *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
 659 }
 660
 661 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
 662                             GLint numxpixels, GLint numypixels)
 663 {
 664    GLubyte alphabase[2], alphause[2];
 665    GLshort alphatest[2];
 666    GLuint alphablockerror1, alphablockerror2, alphablockerror3;
 667    GLubyte i, j, aindex, acutValues[7];
 668    GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
 669    GLboolean alphaabsmin = GL_FALSE;
 670    GLboolean alphaabsmax = GL_FALSE;
 671    GLshort alphadist;
 672
 673    /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
 674    alphabase[0] = 0xff; alphabase[1] = 0x0;
 675    for (j = 0; j < numypixels; j++) {
 676       for (i = 0; i < numxpixels; i++) {
 677          if (srccolors[j][i][3] == 0)
 678             alphaabsmin = GL_TRUE;
 679          else if (srccolors[j][i][3] == 255)
 680             alphaabsmax = GL_TRUE;
 681          else {
 682             if (srccolors[j][i][3] > alphabase[1])
 683                alphabase[1] = srccolors[j][i][3];
 684             if (srccolors[j][i][3] < alphabase[0])
 685                alphabase[0] = srccolors[j][i][3];
 686          }
 687       }
 688    }
 689
 690
 691    if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
 692       /* shortcut here since it is a very common case (and also avoids later problems) */
 693       /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
 694       /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
 695
 696       *blkaddr++ = srccolors[0][0][3];
 697       blkaddr++;
 698       *blkaddr++ = 0;
 699       *blkaddr++ = 0;
 700       *blkaddr++ = 0;
 701       *blkaddr++ = 0;
 702       *blkaddr++ = 0;
 703       *blkaddr++ = 0;
 704 /*      fprintf(stderr, "enc0 used\n");*/
 705       return;
 706    }
 707
 708    /* find best encoding for alpha0 > alpha1 */
 709    /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
 710    alphablockerror1 = 0x0;
 711    alphablockerror2 = 0xffffffff;
 712    alphablockerror3 = 0xffffffff;
 713    if (alphaabsmin) alphause[0] = 0;
 714    else alphause[0] = alphabase[0];
 715    if (alphaabsmax) alphause[1] = 255;
 716    else alphause[1] = alphabase[1];
 717    /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
 718    for (aindex = 0; aindex < 7; aindex++) {
 719       /* don't forget here is always rounded down */
 720       acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
 721    }
 722
 723    for (j = 0; j < numypixels; j++) {
 724       for (i = 0; i < numxpixels; i++) {
 725          /* maybe it's overkill to have the most complicated calculation just for the error
 726             calculation which we only need to figure out if encoding1 or encoding2 is better... */
 727          if (srccolors[j][i][3] > acutValues[0]) {
 728             alphaenc1[4*j + i] = 0;
 729             alphadist = srccolors[j][i][3] - alphause[1];
 730          }
 731          else if (srccolors[j][i][3] > acutValues[1]) {
 732             alphaenc1[4*j + i] = 2;
 733             alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
 734          }
 735          else if (srccolors[j][i][3] > acutValues[2]) {
 736             alphaenc1[4*j + i] = 3;
 737             alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
 738          }
 739          else if (srccolors[j][i][3] > acutValues[3]) {
 740             alphaenc1[4*j + i] = 4;
 741             alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
 742          }
 743          else if (srccolors[j][i][3] > acutValues[4]) {
 744             alphaenc1[4*j + i] = 5;
 745             alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
 746          }
 747          else if (srccolors[j][i][3] > acutValues[5]) {
 748             alphaenc1[4*j + i] = 6;
 749             alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
 750          }
 751          else if (srccolors[j][i][3] > acutValues[6]) {
 752             alphaenc1[4*j + i] = 7;
 753             alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
 754          }
 755          else {
 756             alphaenc1[4*j + i] = 1;
 757             alphadist = srccolors[j][i][3] - alphause[0];
 758          }
 759          alphablockerror1 += alphadist * alphadist;
 760       }
 761    }
 762 /*      for (i = 0; i < 16; i++) {
 763          fprintf(stderr, "%d ", alphaenc1[i]);
 764       }
 765       fprintf(stderr, "cutVals ");
 766       for (i = 0; i < 8; i++) {
 767          fprintf(stderr, "%d ", acutValues[i]);
 768       }
 769       fprintf(stderr, "srcVals ");
 770       for (j = 0; j < numypixels; j++)
 771          for (i = 0; i < numxpixels; i++) {
 772             fprintf(stderr, "%d ", srccolors[j][i][3]);
 773          }
 774
 775       fprintf(stderr, "\n");
 776    }*/
 777    /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
 778       are false but try it anyway */
 779    if (alphablockerror1 >= 32) {
 780
 781       /* don't bother if encoding is already very good, this condition should also imply
 782       we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
 783       alphablockerror2 = 0;
 784       for (aindex = 0; aindex < 5; aindex++) {
 785          /* don't forget here is always rounded down */
 786          acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
 787       }
 788       for (j = 0; j < numypixels; j++) {
 789          for (i = 0; i < numxpixels; i++) {
 790              /* maybe it's overkill to have the most complicated calculation just for the error
 791                calculation which we only need to figure out if encoding1 or encoding2 is better... */
 792             if (srccolors[j][i][3] == 0) {
 793                alphaenc2[4*j + i] = 6;
 794                alphadist = 0;
 795             }
 796             else if (srccolors[j][i][3] == 255) {
 797                alphaenc2[4*j + i] = 7;
 798                alphadist = 0;
 799             }
 800             else if (srccolors[j][i][3] <= acutValues[0]) {
 801                alphaenc2[4*j + i] = 0;
 802                alphadist = srccolors[j][i][3] - alphabase[0];
 803             }
 804             else if (srccolors[j][i][3] <= acutValues[1]) {
 805                alphaenc2[4*j + i] = 2;
 806                alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
 807             }
 808             else if (srccolors[j][i][3] <= acutValues[2]) {
 809                alphaenc2[4*j + i] = 3;
 810                alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
 811             }
 812             else if (srccolors[j][i][3] <= acutValues[3]) {
 813                alphaenc2[4*j + i] = 4;
 814                alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
 815             }
 816             else if (srccolors[j][i][3] <= acutValues[4]) {
 817                alphaenc2[4*j + i] = 5;
 818                alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
 819             }
 820             else {
 821                alphaenc2[4*j + i] = 1;
 822                alphadist = srccolors[j][i][3] - alphabase[1];
 823             }
 824             alphablockerror2 += alphadist * alphadist;
 825          }
 826       }
 827
 828
 829       /* skip this if the error is already very small
 830          this encoding is MUCH better on average than #2 though, but expensive! */
 831       if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
 832          GLshort blockerrlin1 = 0;
 833          GLshort blockerrlin2 = 0;
 834          GLubyte nralphainrangelow = 0;
 835          GLubyte nralphainrangehigh = 0;
 836          alphatest[0] = 0xff;
 837          alphatest[1] = 0x0;
 838          /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
 839          for (j = 0; j < numypixels; j++) {
 840             for (i = 0; i < numxpixels; i++) {
 841                if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
 842                   alphatest[1] = srccolors[j][i][3];
 843                if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
 844                   alphatest[0] = srccolors[j][i][3];
 845             }
 846          }
 847           /* shouldn't happen too often, don't really care about those degenerated cases */
 848           if (alphatest[1] <= alphatest[0]) {
 849              alphatest[0] = 1;
 850              alphatest[1] = 254;
 851 /*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
 852          }
 853          for (aindex = 0; aindex < 5; aindex++) {
 854          /* don't forget here is always rounded down */
 855             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
 856          }
 857
 858          /* find the "average" difference between the alpha values and the next encoded value.
 859             This is then used to calculate new base values.
 860             Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
 861             since they will see more improvement, and also because the values in the middle are somewhat
 862             likely to get no improvement at all (because the base values might move in different directions)?
 863             OTOH it would mean the values in the middle are even less likely to get an improvement
 864          */
 865          for (j = 0; j < numypixels; j++) {
 866             for (i = 0; i < numxpixels; i++) {
 867                if (srccolors[j][i][3] <= alphatest[0] / 2) {
 868                }
 869                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
 870                }
 871                else if (srccolors[j][i][3] <= acutValues[0]) {
 872                   blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
 873                   nralphainrangelow += 1;
 874                }
 875                else if (srccolors[j][i][3] <= acutValues[1]) {
 876                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
 877                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
 878                   nralphainrangelow += 1;
 879                   nralphainrangehigh += 1;
 880                }
 881                else if (srccolors[j][i][3] <= acutValues[2]) {
 882                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
 883                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
 884                   nralphainrangelow += 1;
 885                   nralphainrangehigh += 1;
 886                }
 887                else if (srccolors[j][i][3] <= acutValues[3]) {
 888                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
 889                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
 890                   nralphainrangelow += 1;
 891                   nralphainrangehigh += 1;
 892                }
 893                else if (srccolors[j][i][3] <= acutValues[4]) {
 894                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
 895                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
 896                   nralphainrangelow += 1;
 897                   nralphainrangehigh += 1;
 898                   }
 899                else {
 900                   blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
 901                   nralphainrangehigh += 1;
 902                }
 903             }
 904          }
 905          /* shouldn't happen often, needed to avoid div by zero */
 906          if (nralphainrangelow == 0) nralphainrangelow = 1;
 907          if (nralphainrangehigh == 0) nralphainrangehigh = 1;
 908          alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
 909 /*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
 910          fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
 911          /* again shouldn't really happen often... */
 912          if (alphatest[0] < 0) {
 913             alphatest[0] = 0;
 914 /*            fprintf(stderr, "adj alpha base val to 0\n");*/
 915          }
 916          alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
 917          if (alphatest[1] > 255) {
 918             alphatest[1] = 255;
 919 /*            fprintf(stderr, "adj alpha base val to 255\n");*/
 920          }
 921
 922          alphablockerror3 = 0;
 923          for (aindex = 0; aindex < 5; aindex++) {
 924          /* don't forget here is always rounded down */
 925             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
 926          }
 927          for (j = 0; j < numypixels; j++) {
 928             for (i = 0; i < numxpixels; i++) {
 929                 /* maybe it's overkill to have the most complicated calculation just for the error
 930                   calculation which we only need to figure out if encoding1 or encoding2 is better... */
 931                if (srccolors[j][i][3] <= alphatest[0] / 2) {
 932                   alphaenc3[4*j + i] = 6;
 933                   alphadist = srccolors[j][i][3];
 934                }
 935                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
 936                   alphaenc3[4*j + i] = 7;
 937                   alphadist = 255 - srccolors[j][i][3];
 938                }
 939                else if (srccolors[j][i][3] <= acutValues[0]) {
 940                   alphaenc3[4*j + i] = 0;
 941                   alphadist = srccolors[j][i][3] - alphatest[0];
 942                }
 943                else if (srccolors[j][i][3] <= acutValues[1]) {
 944                  alphaenc3[4*j + i] = 2;
 945                  alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
 946                }
 947                else if (srccolors[j][i][3] <= acutValues[2]) {
 948                   alphaenc3[4*j + i] = 3;
 949                   alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
 950                }
 951                else if (srccolors[j][i][3] <= acutValues[3]) {
 952                   alphaenc3[4*j + i] = 4;
 953                   alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
 954                }
 955                else if (srccolors[j][i][3] <= acutValues[4]) {
 956                   alphaenc3[4*j + i] = 5;
 957                   alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
 958                }
 959                else {
 960                   alphaenc3[4*j + i] = 1;
 961                   alphadist = srccolors[j][i][3] - alphatest[1];
 962                }
 963                alphablockerror3 += alphadist * alphadist;
 964             }
 965          }
 966       }
 967    }
 968   /* write the alpha values and encoding back. */
 969    if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
 970 /*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
 971       writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
 972    }
 973    else if (alphablockerror2 <= alphablockerror3) {
 974 /*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
 975       writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
 976    }
 977    else {
 978 /*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
 979       writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
 980    }
 981 }
 982
 983 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
 984                          GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
 985 {
 986    GLubyte i, j, c;
 987    const GLchan *curaddr;
 988    for (j = 0; j < numypixels; j++) {
 989       curaddr = srcaddr + j * srcRowStride * comps;
 990       for (i = 0; i < numxpixels; i++) {
 991          for (c = 0; c < comps; c++) {
 992             srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
 993          }
 994       }
 995    }
 996 }
 997
 998
 999 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
1000                      GLenum destFormat, GLubyte *dest, GLint dstRowStride)
1001 {
1002       GLubyte *blkaddr = dest;
1003       GLubyte srcpixels[4][4][4];
1004       const GLchan *srcaddr = srcPixData;
1005       GLint numxpixels, numypixels;
1006       GLint i, j;
1007       GLint dstRowDiff;
1008
1009    switch (destFormat) {
1010    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
1011    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
1012       /* hmm we used to get called without dstRowStride... */
1013       dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
1014 /*      fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
1015               width, height, dstRowStride); */
1016       for (j = 0; j < height; j += 4) {
1017          if (height > j + 3) numypixels = 4;
1018          else numypixels = height - j;
1019          srcaddr = srcPixData + j * width * srccomps;
1020          for (i = 0; i < width; i += 4) {
1021             if (width > i + 3) numxpixels = 4;
1022             else numxpixels = width - i;
1023             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1024             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
1025             srcaddr += srccomps * numxpixels;
1026             blkaddr += 8;
1027          }
1028          blkaddr += dstRowDiff;
1029       }
1030       break;
1031    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
1032       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
1033 /*      fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
1034               width, height, dstRowStride); */
1035       for (j = 0; j < height; j += 4) {
1036          if (height > j + 3) numypixels = 4;
1037          else numypixels = height - j;
1038          srcaddr = srcPixData + j * width * srccomps;
1039          for (i = 0; i < width; i += 4) {
1040             if (width > i + 3) numxpixels = 4;
1041             else numxpixels = width - i;
1042             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1043             *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
1044             *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
1045             *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
1046             *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
1047             *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
1048             *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
1049             *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
1050             *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
1051             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
1052             srcaddr += srccomps * numxpixels;
1053             blkaddr += 8;
1054          }
1055          blkaddr += dstRowDiff;
1056       }
1057       break;
1058    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
1059       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
1060 /*      fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
1061               width, height, dstRowStride); */
1062       for (j = 0; j < height; j += 4) {
1063          if (height > j + 3) numypixels = 4;
1064          else numypixels = height - j;
1065          srcaddr = srcPixData + j * width * srccomps;
1066          for (i = 0; i < width; i += 4) {
1067             if (width > i + 3) numxpixels = 4;
1068             else numxpixels = width - i;
1069             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1070             encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
1071             encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
1072             srcaddr += srccomps * numxpixels;
1073             blkaddr += 16;
1074          }
1075          blkaddr += dstRowDiff;
1076       }
1077       break;
1078    default:
1079       fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat);
1080       return;
1081    }
1082 }