Merge commit 'mesa_7_6_branch' into mesa_7_7_branch
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42
43
44 #if FEATURE_texture_fxt1
45
46
47 static void
48 fxt1_encode (GLuint width, GLuint height, GLint comps,
49 const void *source, GLint srcRowStride,
50 void *dest, GLint destRowStride);
51
52 void
53 fxt1_decode_1 (const void *texture, GLint stride,
54 GLint i, GLint j, GLchan *rgba);
55
56
57 /**
58 * Store user's image in rgb_fxt1 format.
59 */
60 GLboolean
61 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
62 {
63 const GLchan *pixels;
64 GLint srcRowStride;
65 GLubyte *dst;
66 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
67 const GLchan *tempImage = NULL;
68
69 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70 ASSERT(dstXoffset % 8 == 0);
71 ASSERT(dstYoffset % 4 == 0);
72 ASSERT(dstZoffset == 0);
73 (void) dstZoffset;
74 (void) dstImageOffsets;
75
76 if (srcFormat != GL_RGB ||
77 srcType != CHAN_TYPE ||
78 ctx->_ImageTransferState ||
79 srcPacking->SwapBytes) {
80 /* convert image to RGB/GLchan */
81 tempImage = _mesa_make_temp_chan_image(ctx, dims,
82 baseInternalFormat,
83 _mesa_get_format_base_format(dstFormat),
84 srcWidth, srcHeight, srcDepth,
85 srcFormat, srcType, srcAddr,
86 srcPacking);
87 if (!tempImage)
88 return GL_FALSE; /* out of memory */
89 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
90 pixels = tempImage;
91 srcRowStride = 3 * srcWidth;
92 srcFormat = GL_RGB;
93 }
94 else {
95 pixels = (const GLchan *) srcAddr;
96 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
97 srcType) / sizeof(GLchan);
98 }
99
100 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
101 dstFormat,
102 texWidth, (GLubyte *) dstAddr);
103
104 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
105 dst, dstRowStride);
106
107 if (tempImage)
108 _mesa_free((void*) tempImage);
109
110 return GL_TRUE;
111 }
112
113
114 /**
115 * Store user's image in rgba_fxt1 format.
116 */
117 GLboolean
118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
119 {
120 const GLchan *pixels;
121 GLint srcRowStride;
122 GLubyte *dst;
123 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
124 const GLchan *tempImage = NULL;
125
126 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
127 ASSERT(dstXoffset % 8 == 0);
128 ASSERT(dstYoffset % 4 == 0);
129 ASSERT(dstZoffset == 0);
130 (void) dstZoffset;
131 (void) dstImageOffsets;
132
133 if (srcFormat != GL_RGBA ||
134 srcType != CHAN_TYPE ||
135 ctx->_ImageTransferState ||
136 srcPacking->SwapBytes) {
137 /* convert image to RGBA/GLchan */
138 tempImage = _mesa_make_temp_chan_image(ctx, dims,
139 baseInternalFormat,
140 _mesa_get_format_base_format(dstFormat),
141 srcWidth, srcHeight, srcDepth,
142 srcFormat, srcType, srcAddr,
143 srcPacking);
144 if (!tempImage)
145 return GL_FALSE; /* out of memory */
146 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
147 pixels = tempImage;
148 srcRowStride = 4 * srcWidth;
149 srcFormat = GL_RGBA;
150 }
151 else {
152 pixels = (const GLchan *) srcAddr;
153 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
154 srcType) / sizeof(GLchan);
155 }
156
157 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
158 dstFormat,
159 texWidth, (GLubyte *) dstAddr);
160
161 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
162 dst, dstRowStride);
163
164 if (tempImage)
165 _mesa_free((void*) tempImage);
166
167 return GL_TRUE;
168 }
169
170
171 void
172 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
173 GLint i, GLint j, GLint k, GLfloat *texel )
174 {
175 /* just sample as GLchan and convert to float here */
176 GLchan rgba[4];
177 (void) k;
178 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
179 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
180 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
181 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
182 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
183 }
184
185
186 void
187 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
188 GLint i, GLint j, GLint k, GLfloat *texel )
189 {
190 /* just sample as GLchan and convert to float here */
191 GLchan rgba[4];
192 (void) k;
193 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
194 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
195 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
196 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
197 texel[ACOMP] = 1.0F;
198 }
199
200
201
202 /***************************************************************************\
203 * FXT1 encoder
204 *
205 * The encoder was built by reversing the decoder,
206 * and is vaguely based on Texus2 by 3dfx. Note that this code
207 * is merely a proof of concept, since it is highly UNoptimized;
208 * moreover, it is sub-optimal due to initial conditions passed
209 * to Lloyd's algorithm (the interpolation modes are even worse).
210 \***************************************************************************/
211
212
213 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
214 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
215 #define N_TEXELS 32 /* number of texels in a block (always 32) */
216 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
217 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
218 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
219 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
220 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
221
222
223 /*
224 * Define a 64-bit unsigned integer type and macros
225 */
226 #if 1
227
228 #define FX64_NATIVE 1
229
230 typedef uint64_t Fx64;
231
232 #define FX64_MOV32(a, b) a = b
233 #define FX64_OR32(a, b) a |= b
234 #define FX64_SHL(a, c) a <<= c
235
236 #else
237
238 #define FX64_NATIVE 0
239
240 typedef struct {
241 GLuint lo, hi;
242 } Fx64;
243
244 #define FX64_MOV32(a, b) a.lo = b
245 #define FX64_OR32(a, b) a.lo |= b
246
247 #define FX64_SHL(a, c) \
248 do { \
249 if ((c) >= 32) { \
250 a.hi = a.lo << ((c) - 32); \
251 a.lo = 0; \
252 } else { \
253 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
254 a.lo <<= (c); \
255 } \
256 } while (0)
257
258 #endif
259
260
261 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
262 #define SAFECDOT 1 /* for paranoids */
263
264 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
265 do { \
266 /* compute interpolation vector */ \
267 GLfloat d2 = 0.0F; \
268 GLfloat rd2; \
269 \
270 for (i = 0; i < NC; i++) { \
271 IV[i] = (V1[i] - V0[i]) * F(i); \
272 d2 += IV[i] * IV[i]; \
273 } \
274 rd2 = (GLfloat)NV / d2; \
275 B = 0; \
276 for (i = 0; i < NC; i++) { \
277 IV[i] *= F(i); \
278 B -= IV[i] * V0[i]; \
279 IV[i] *= rd2; \
280 } \
281 B = B * rd2 + 0.5f; \
282 } while (0)
283
284 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
285 do { \
286 GLfloat dot = 0.0F; \
287 for (i = 0; i < NC; i++) { \
288 dot += V[i] * IV[i]; \
289 } \
290 TEXEL = (GLint)(dot + B); \
291 if (SAFECDOT) { \
292 if (TEXEL < 0) { \
293 TEXEL = 0; \
294 } else if (TEXEL > NV) { \
295 TEXEL = NV; \
296 } \
297 } \
298 } while (0)
299
300
301 static GLint
302 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
303 GLubyte input[MAX_COMP], GLint nc)
304 {
305 GLint i, j, best = -1;
306 GLfloat err = 1e9; /* big enough */
307
308 for (j = 0; j < nv; j++) {
309 GLfloat e = 0.0F;
310 for (i = 0; i < nc; i++) {
311 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
312 }
313 if (e < err) {
314 err = e;
315 best = j;
316 }
317 }
318
319 return best;
320 }
321
322
323 static GLint
324 fxt1_worst (GLfloat vec[MAX_COMP],
325 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
326 {
327 GLint i, k, worst = -1;
328 GLfloat err = -1.0F; /* small enough */
329
330 for (k = 0; k < n; k++) {
331 GLfloat e = 0.0F;
332 for (i = 0; i < nc; i++) {
333 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
334 }
335 if (e > err) {
336 err = e;
337 worst = k;
338 }
339 }
340
341 return worst;
342 }
343
344
345 static GLint
346 fxt1_variance (GLdouble variance[MAX_COMP],
347 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
348 {
349 GLint i, k, best = 0;
350 GLint sx, sx2;
351 GLdouble var, maxvar = -1; /* small enough */
352 GLdouble teenth = 1.0 / n;
353
354 for (i = 0; i < nc; i++) {
355 sx = sx2 = 0;
356 for (k = 0; k < n; k++) {
357 GLint t = input[k][i];
358 sx += t;
359 sx2 += t * t;
360 }
361 var = sx2 * teenth - sx * sx * teenth * teenth;
362 if (maxvar < var) {
363 maxvar = var;
364 best = i;
365 }
366 if (variance) {
367 variance[i] = var;
368 }
369 }
370
371 return best;
372 }
373
374
375 static GLint
376 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
377 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
378 {
379 #if 0
380 /* Choose colors from a grid.
381 */
382 GLint i, j;
383
384 for (j = 0; j < nv; j++) {
385 GLint m = j * (n - 1) / (nv - 1);
386 for (i = 0; i < nc; i++) {
387 vec[j][i] = input[m][i];
388 }
389 }
390 #else
391 /* Our solution here is to find the darkest and brightest colors in
392 * the 8x4 tile and use those as the two representative colors.
393 * There are probably better algorithms to use (histogram-based).
394 */
395 GLint i, j, k;
396 GLint minSum = 2000; /* big enough */
397 GLint maxSum = -1; /* small enough */
398 GLint minCol = 0; /* phoudoin: silent compiler! */
399 GLint maxCol = 0; /* phoudoin: silent compiler! */
400
401 struct {
402 GLint flag;
403 GLint key;
404 GLint freq;
405 GLint idx;
406 } hist[N_TEXELS];
407 GLint lenh = 0;
408
409 _mesa_memset(hist, 0, sizeof(hist));
410
411 for (k = 0; k < n; k++) {
412 GLint l;
413 GLint key = 0;
414 GLint sum = 0;
415 for (i = 0; i < nc; i++) {
416 key <<= 8;
417 key |= input[k][i];
418 sum += input[k][i];
419 }
420 for (l = 0; l < n; l++) {
421 if (!hist[l].flag) {
422 /* alloc new slot */
423 hist[l].flag = !0;
424 hist[l].key = key;
425 hist[l].freq = 1;
426 hist[l].idx = k;
427 lenh = l + 1;
428 break;
429 } else if (hist[l].key == key) {
430 hist[l].freq++;
431 break;
432 }
433 }
434 if (minSum > sum) {
435 minSum = sum;
436 minCol = k;
437 }
438 if (maxSum < sum) {
439 maxSum = sum;
440 maxCol = k;
441 }
442 }
443
444 if (lenh <= nv) {
445 for (j = 0; j < lenh; j++) {
446 for (i = 0; i < nc; i++) {
447 vec[j][i] = (GLfloat)input[hist[j].idx][i];
448 }
449 }
450 for (; j < nv; j++) {
451 for (i = 0; i < nc; i++) {
452 vec[j][i] = vec[0][i];
453 }
454 }
455 return 0;
456 }
457
458 for (j = 0; j < nv; j++) {
459 for (i = 0; i < nc; i++) {
460 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
461 }
462 }
463 #endif
464
465 return !0;
466 }
467
468
469 static GLint
470 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
471 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
472 {
473 /* Use the generalized lloyd's algorithm for VQ:
474 * find 4 color vectors.
475 *
476 * for each sample color
477 * sort to nearest vector.
478 *
479 * replace each vector with the centroid of it's matching colors.
480 *
481 * repeat until RMS doesn't improve.
482 *
483 * if a color vector has no samples, or becomes the same as another
484 * vector, replace it with the color which is farthest from a sample.
485 *
486 * vec[][MAX_COMP] initial vectors and resulting colors
487 * nv number of resulting colors required
488 * input[N_TEXELS][MAX_COMP] input texels
489 * nc number of components in input / vec
490 * n number of input samples
491 */
492
493 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
494 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
495 GLfloat error, lasterror = 1e9;
496
497 GLint i, j, k, rep;
498
499 /* the quantizer */
500 for (rep = 0; rep < LL_N_REP; rep++) {
501 /* reset sums & counters */
502 for (j = 0; j < nv; j++) {
503 for (i = 0; i < nc; i++) {
504 sum[j][i] = 0;
505 }
506 cnt[j] = 0;
507 }
508 error = 0;
509
510 /* scan whole block */
511 for (k = 0; k < n; k++) {
512 #if 1
513 GLint best = -1;
514 GLfloat err = 1e9; /* big enough */
515 /* determine best vector */
516 for (j = 0; j < nv; j++) {
517 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
518 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
519 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
520 if (nc == 4) {
521 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
522 }
523 if (e < err) {
524 err = e;
525 best = j;
526 }
527 }
528 #else
529 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
530 #endif
531 /* add in closest color */
532 for (i = 0; i < nc; i++) {
533 sum[best][i] += input[k][i];
534 }
535 /* mark this vector as used */
536 cnt[best]++;
537 /* accumulate error */
538 error += err;
539 }
540
541 /* check RMS */
542 if ((error < LL_RMS_E) ||
543 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
544 return !0; /* good match */
545 }
546 lasterror = error;
547
548 /* move each vector to the barycenter of its closest colors */
549 for (j = 0; j < nv; j++) {
550 if (cnt[j]) {
551 GLfloat div = 1.0F / cnt[j];
552 for (i = 0; i < nc; i++) {
553 vec[j][i] = div * sum[j][i];
554 }
555 } else {
556 /* this vec has no samples or is identical with a previous vec */
557 GLint worst = fxt1_worst(vec[j], input, nc, n);
558 for (i = 0; i < nc; i++) {
559 vec[j][i] = input[worst][i];
560 }
561 }
562 }
563 }
564
565 return 0; /* could not converge fast enough */
566 }
567
568
569 static void
570 fxt1_quantize_CHROMA (GLuint *cc,
571 GLubyte input[N_TEXELS][MAX_COMP])
572 {
573 const GLint n_vect = 4; /* 4 base vectors to find */
574 const GLint n_comp = 3; /* 3 components: R, G, B */
575 GLfloat vec[MAX_VECT][MAX_COMP];
576 GLint i, j, k;
577 Fx64 hi; /* high quadword */
578 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
579
580 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
581 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
582 }
583
584 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
585 for (j = n_vect - 1; j >= 0; j--) {
586 for (i = 0; i < n_comp; i++) {
587 /* add in colors */
588 FX64_SHL(hi, 5);
589 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
590 }
591 }
592 ((Fx64 *)cc)[1] = hi;
593
594 lohi = lolo = 0;
595 /* right microtile */
596 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
597 lohi <<= 2;
598 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
599 }
600 /* left microtile */
601 for (; k >= 0; k--) {
602 lolo <<= 2;
603 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
604 }
605 cc[1] = lohi;
606 cc[0] = lolo;
607 }
608
609
610 static void
611 fxt1_quantize_ALPHA0 (GLuint *cc,
612 GLubyte input[N_TEXELS][MAX_COMP],
613 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
614 {
615 const GLint n_vect = 3; /* 3 base vectors to find */
616 const GLint n_comp = 4; /* 4 components: R, G, B, A */
617 GLfloat vec[MAX_VECT][MAX_COMP];
618 GLint i, j, k;
619 Fx64 hi; /* high quadword */
620 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
621
622 /* the last vector indicates zero */
623 for (i = 0; i < n_comp; i++) {
624 vec[n_vect][i] = 0;
625 }
626
627 /* the first n texels in reord are guaranteed to be non-zero */
628 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
629 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
630 }
631
632 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
633 for (j = n_vect - 1; j >= 0; j--) {
634 /* add in alphas */
635 FX64_SHL(hi, 5);
636 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
637 }
638 for (j = n_vect - 1; j >= 0; j--) {
639 for (i = 0; i < n_comp - 1; i++) {
640 /* add in colors */
641 FX64_SHL(hi, 5);
642 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
643 }
644 }
645 ((Fx64 *)cc)[1] = hi;
646
647 lohi = lolo = 0;
648 /* right microtile */
649 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
650 lohi <<= 2;
651 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
652 }
653 /* left microtile */
654 for (; k >= 0; k--) {
655 lolo <<= 2;
656 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
657 }
658 cc[1] = lohi;
659 cc[0] = lolo;
660 }
661
662
663 static void
664 fxt1_quantize_ALPHA1 (GLuint *cc,
665 GLubyte input[N_TEXELS][MAX_COMP])
666 {
667 const GLint n_vect = 3; /* highest vector number in each microtile */
668 const GLint n_comp = 4; /* 4 components: R, G, B, A */
669 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
670 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
671 GLint i, j, k;
672 Fx64 hi; /* high quadword */
673 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
674
675 GLint minSum;
676 GLint maxSum;
677 GLint minColL = 0, maxColL = 0;
678 GLint minColR = 0, maxColR = 0;
679 GLint sumL = 0, sumR = 0;
680 GLint nn_comp;
681 /* Our solution here is to find the darkest and brightest colors in
682 * the 4x4 tile and use those as the two representative colors.
683 * There are probably better algorithms to use (histogram-based).
684 */
685 nn_comp = n_comp;
686 while ((minColL == maxColL) && nn_comp) {
687 minSum = 2000; /* big enough */
688 maxSum = -1; /* small enough */
689 for (k = 0; k < N_TEXELS / 2; k++) {
690 GLint sum = 0;
691 for (i = 0; i < nn_comp; i++) {
692 sum += input[k][i];
693 }
694 if (minSum > sum) {
695 minSum = sum;
696 minColL = k;
697 }
698 if (maxSum < sum) {
699 maxSum = sum;
700 maxColL = k;
701 }
702 sumL += sum;
703 }
704
705 nn_comp--;
706 }
707
708 nn_comp = n_comp;
709 while ((minColR == maxColR) && nn_comp) {
710 minSum = 2000; /* big enough */
711 maxSum = -1; /* small enough */
712 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
713 GLint sum = 0;
714 for (i = 0; i < nn_comp; i++) {
715 sum += input[k][i];
716 }
717 if (minSum > sum) {
718 minSum = sum;
719 minColR = k;
720 }
721 if (maxSum < sum) {
722 maxSum = sum;
723 maxColR = k;
724 }
725 sumR += sum;
726 }
727
728 nn_comp--;
729 }
730
731 /* choose the common vector (yuck!) */
732 {
733 GLint j1, j2;
734 GLint v1 = 0, v2 = 0;
735 GLfloat err = 1e9; /* big enough */
736 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
737 for (i = 0; i < n_comp; i++) {
738 tv[0][i] = input[minColL][i];
739 tv[1][i] = input[maxColL][i];
740 tv[2][i] = input[minColR][i];
741 tv[3][i] = input[maxColR][i];
742 }
743 for (j1 = 0; j1 < 2; j1++) {
744 for (j2 = 2; j2 < 4; j2++) {
745 GLfloat e = 0.0F;
746 for (i = 0; i < n_comp; i++) {
747 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
748 }
749 if (e < err) {
750 err = e;
751 v1 = j1;
752 v2 = j2;
753 }
754 }
755 }
756 for (i = 0; i < n_comp; i++) {
757 vec[0][i] = tv[1 - v1][i];
758 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
759 vec[2][i] = tv[5 - v2][i];
760 }
761 }
762
763 /* left microtile */
764 cc[0] = 0;
765 if (minColL != maxColL) {
766 /* compute interpolation vector */
767 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
768
769 /* add in texels */
770 lolo = 0;
771 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
772 GLint texel;
773 /* interpolate color */
774 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
775 /* add in texel */
776 lolo <<= 2;
777 lolo |= texel;
778 }
779
780 cc[0] = lolo;
781 }
782
783 /* right microtile */
784 cc[1] = 0;
785 if (minColR != maxColR) {
786 /* compute interpolation vector */
787 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
788
789 /* add in texels */
790 lohi = 0;
791 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
792 GLint texel;
793 /* interpolate color */
794 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
795 /* add in texel */
796 lohi <<= 2;
797 lohi |= texel;
798 }
799
800 cc[1] = lohi;
801 }
802
803 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
804 for (j = n_vect - 1; j >= 0; j--) {
805 /* add in alphas */
806 FX64_SHL(hi, 5);
807 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
808 }
809 for (j = n_vect - 1; j >= 0; j--) {
810 for (i = 0; i < n_comp - 1; i++) {
811 /* add in colors */
812 FX64_SHL(hi, 5);
813 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
814 }
815 }
816 ((Fx64 *)cc)[1] = hi;
817 }
818
819
820 static void
821 fxt1_quantize_HI (GLuint *cc,
822 GLubyte input[N_TEXELS][MAX_COMP],
823 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
824 {
825 const GLint n_vect = 6; /* highest vector number */
826 const GLint n_comp = 3; /* 3 components: R, G, B */
827 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
828 GLfloat iv[MAX_COMP]; /* interpolation vector */
829 GLint i, k;
830 GLuint hihi; /* high quadword: hi dword */
831
832 GLint minSum = 2000; /* big enough */
833 GLint maxSum = -1; /* small enough */
834 GLint minCol = 0; /* phoudoin: silent compiler! */
835 GLint maxCol = 0; /* phoudoin: silent compiler! */
836
837 /* Our solution here is to find the darkest and brightest colors in
838 * the 8x4 tile and use those as the two representative colors.
839 * There are probably better algorithms to use (histogram-based).
840 */
841 for (k = 0; k < n; k++) {
842 GLint sum = 0;
843 for (i = 0; i < n_comp; i++) {
844 sum += reord[k][i];
845 }
846 if (minSum > sum) {
847 minSum = sum;
848 minCol = k;
849 }
850 if (maxSum < sum) {
851 maxSum = sum;
852 maxCol = k;
853 }
854 }
855
856 hihi = 0; /* cc-hi = "00" */
857 for (i = 0; i < n_comp; i++) {
858 /* add in colors */
859 hihi <<= 5;
860 hihi |= reord[maxCol][i] >> 3;
861 }
862 for (i = 0; i < n_comp; i++) {
863 /* add in colors */
864 hihi <<= 5;
865 hihi |= reord[minCol][i] >> 3;
866 }
867 cc[3] = hihi;
868 cc[0] = cc[1] = cc[2] = 0;
869
870 /* compute interpolation vector */
871 if (minCol != maxCol) {
872 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
873 }
874
875 /* add in texels */
876 for (k = N_TEXELS - 1; k >= 0; k--) {
877 GLint t = k * 3;
878 GLuint *kk = (GLuint *)((char *)cc + t / 8);
879 GLint texel = n_vect + 1; /* transparent black */
880
881 if (!ISTBLACK(input[k])) {
882 if (minCol != maxCol) {
883 /* interpolate color */
884 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
885 /* add in texel */
886 kk[0] |= texel << (t & 7);
887 }
888 } else {
889 /* add in texel */
890 kk[0] |= texel << (t & 7);
891 }
892 }
893 }
894
895
896 static void
897 fxt1_quantize_MIXED1 (GLuint *cc,
898 GLubyte input[N_TEXELS][MAX_COMP])
899 {
900 const GLint n_vect = 2; /* highest vector number in each microtile */
901 const GLint n_comp = 3; /* 3 components: R, G, B */
902 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
903 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
904 GLint i, j, k;
905 Fx64 hi; /* high quadword */
906 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
907
908 GLint minSum;
909 GLint maxSum;
910 GLint minColL = 0, maxColL = -1;
911 GLint minColR = 0, maxColR = -1;
912
913 /* Our solution here is to find the darkest and brightest colors in
914 * the 4x4 tile and use those as the two representative colors.
915 * There are probably better algorithms to use (histogram-based).
916 */
917 minSum = 2000; /* big enough */
918 maxSum = -1; /* small enough */
919 for (k = 0; k < N_TEXELS / 2; k++) {
920 if (!ISTBLACK(input[k])) {
921 GLint sum = 0;
922 for (i = 0; i < n_comp; i++) {
923 sum += input[k][i];
924 }
925 if (minSum > sum) {
926 minSum = sum;
927 minColL = k;
928 }
929 if (maxSum < sum) {
930 maxSum = sum;
931 maxColL = k;
932 }
933 }
934 }
935 minSum = 2000; /* big enough */
936 maxSum = -1; /* small enough */
937 for (; k < N_TEXELS; k++) {
938 if (!ISTBLACK(input[k])) {
939 GLint sum = 0;
940 for (i = 0; i < n_comp; i++) {
941 sum += input[k][i];
942 }
943 if (minSum > sum) {
944 minSum = sum;
945 minColR = k;
946 }
947 if (maxSum < sum) {
948 maxSum = sum;
949 maxColR = k;
950 }
951 }
952 }
953
954 /* left microtile */
955 if (maxColL == -1) {
956 /* all transparent black */
957 cc[0] = ~0u;
958 for (i = 0; i < n_comp; i++) {
959 vec[0][i] = 0;
960 vec[1][i] = 0;
961 }
962 } else {
963 cc[0] = 0;
964 for (i = 0; i < n_comp; i++) {
965 vec[0][i] = input[minColL][i];
966 vec[1][i] = input[maxColL][i];
967 }
968 if (minColL != maxColL) {
969 /* compute interpolation vector */
970 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
971
972 /* add in texels */
973 lolo = 0;
974 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
975 GLint texel = n_vect + 1; /* transparent black */
976 if (!ISTBLACK(input[k])) {
977 /* interpolate color */
978 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
979 }
980 /* add in texel */
981 lolo <<= 2;
982 lolo |= texel;
983 }
984 cc[0] = lolo;
985 }
986 }
987
988 /* right microtile */
989 if (maxColR == -1) {
990 /* all transparent black */
991 cc[1] = ~0u;
992 for (i = 0; i < n_comp; i++) {
993 vec[2][i] = 0;
994 vec[3][i] = 0;
995 }
996 } else {
997 cc[1] = 0;
998 for (i = 0; i < n_comp; i++) {
999 vec[2][i] = input[minColR][i];
1000 vec[3][i] = input[maxColR][i];
1001 }
1002 if (minColR != maxColR) {
1003 /* compute interpolation vector */
1004 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1005
1006 /* add in texels */
1007 lohi = 0;
1008 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1009 GLint texel = n_vect + 1; /* transparent black */
1010 if (!ISTBLACK(input[k])) {
1011 /* interpolate color */
1012 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1013 }
1014 /* add in texel */
1015 lohi <<= 2;
1016 lohi |= texel;
1017 }
1018 cc[1] = lohi;
1019 }
1020 }
1021
1022 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1023 for (j = 2 * 2 - 1; j >= 0; j--) {
1024 for (i = 0; i < n_comp; i++) {
1025 /* add in colors */
1026 FX64_SHL(hi, 5);
1027 FX64_OR32(hi, vec[j][i] >> 3);
1028 }
1029 }
1030 ((Fx64 *)cc)[1] = hi;
1031 }
1032
1033
1034 static void
1035 fxt1_quantize_MIXED0 (GLuint *cc,
1036 GLubyte input[N_TEXELS][MAX_COMP])
1037 {
1038 const GLint n_vect = 3; /* highest vector number in each microtile */
1039 const GLint n_comp = 3; /* 3 components: R, G, B */
1040 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1041 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1042 GLint i, j, k;
1043 Fx64 hi; /* high quadword */
1044 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1045
1046 GLint minColL = 0, maxColL = 0;
1047 GLint minColR = 0, maxColR = 0;
1048 #if 0
1049 GLint minSum;
1050 GLint maxSum;
1051
1052 /* Our solution here is to find the darkest and brightest colors in
1053 * the 4x4 tile and use those as the two representative colors.
1054 * There are probably better algorithms to use (histogram-based).
1055 */
1056 minSum = 2000; /* big enough */
1057 maxSum = -1; /* small enough */
1058 for (k = 0; k < N_TEXELS / 2; k++) {
1059 GLint sum = 0;
1060 for (i = 0; i < n_comp; i++) {
1061 sum += input[k][i];
1062 }
1063 if (minSum > sum) {
1064 minSum = sum;
1065 minColL = k;
1066 }
1067 if (maxSum < sum) {
1068 maxSum = sum;
1069 maxColL = k;
1070 }
1071 }
1072 minSum = 2000; /* big enough */
1073 maxSum = -1; /* small enough */
1074 for (; k < N_TEXELS; k++) {
1075 GLint sum = 0;
1076 for (i = 0; i < n_comp; i++) {
1077 sum += input[k][i];
1078 }
1079 if (minSum > sum) {
1080 minSum = sum;
1081 minColR = k;
1082 }
1083 if (maxSum < sum) {
1084 maxSum = sum;
1085 maxColR = k;
1086 }
1087 }
1088 #else
1089 GLint minVal;
1090 GLint maxVal;
1091 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1092 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1093
1094 /* Scan the channel with max variance for lo & hi
1095 * and use those as the two representative colors.
1096 */
1097 minVal = 2000; /* big enough */
1098 maxVal = -1; /* small enough */
1099 for (k = 0; k < N_TEXELS / 2; k++) {
1100 GLint t = input[k][maxVarL];
1101 if (minVal > t) {
1102 minVal = t;
1103 minColL = k;
1104 }
1105 if (maxVal < t) {
1106 maxVal = t;
1107 maxColL = k;
1108 }
1109 }
1110 minVal = 2000; /* big enough */
1111 maxVal = -1; /* small enough */
1112 for (; k < N_TEXELS; k++) {
1113 GLint t = input[k][maxVarR];
1114 if (minVal > t) {
1115 minVal = t;
1116 minColR = k;
1117 }
1118 if (maxVal < t) {
1119 maxVal = t;
1120 maxColR = k;
1121 }
1122 }
1123 #endif
1124
1125 /* left microtile */
1126 cc[0] = 0;
1127 for (i = 0; i < n_comp; i++) {
1128 vec[0][i] = input[minColL][i];
1129 vec[1][i] = input[maxColL][i];
1130 }
1131 if (minColL != maxColL) {
1132 /* compute interpolation vector */
1133 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1134
1135 /* add in texels */
1136 lolo = 0;
1137 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1138 GLint texel;
1139 /* interpolate color */
1140 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1141 /* add in texel */
1142 lolo <<= 2;
1143 lolo |= texel;
1144 }
1145
1146 /* funky encoding for LSB of green */
1147 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1148 for (i = 0; i < n_comp; i++) {
1149 vec[1][i] = input[minColL][i];
1150 vec[0][i] = input[maxColL][i];
1151 }
1152 lolo = ~lolo;
1153 }
1154
1155 cc[0] = lolo;
1156 }
1157
1158 /* right microtile */
1159 cc[1] = 0;
1160 for (i = 0; i < n_comp; i++) {
1161 vec[2][i] = input[minColR][i];
1162 vec[3][i] = input[maxColR][i];
1163 }
1164 if (minColR != maxColR) {
1165 /* compute interpolation vector */
1166 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1167
1168 /* add in texels */
1169 lohi = 0;
1170 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1171 GLint texel;
1172 /* interpolate color */
1173 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1174 /* add in texel */
1175 lohi <<= 2;
1176 lohi |= texel;
1177 }
1178
1179 /* funky encoding for LSB of green */
1180 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1181 for (i = 0; i < n_comp; i++) {
1182 vec[3][i] = input[minColR][i];
1183 vec[2][i] = input[maxColR][i];
1184 }
1185 lohi = ~lohi;
1186 }
1187
1188 cc[1] = lohi;
1189 }
1190
1191 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1192 for (j = 2 * 2 - 1; j >= 0; j--) {
1193 for (i = 0; i < n_comp; i++) {
1194 /* add in colors */
1195 FX64_SHL(hi, 5);
1196 FX64_OR32(hi, vec[j][i] >> 3);
1197 }
1198 }
1199 ((Fx64 *)cc)[1] = hi;
1200 }
1201
1202
1203 static void
1204 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1205 {
1206 GLint trualpha;
1207 GLubyte reord[N_TEXELS][MAX_COMP];
1208
1209 GLubyte input[N_TEXELS][MAX_COMP];
1210 GLint i, k, l;
1211
1212 if (comps == 3) {
1213 /* make the whole block opaque */
1214 _mesa_memset(input, -1, sizeof(input));
1215 }
1216
1217 /* 8 texels each line */
1218 for (l = 0; l < 4; l++) {
1219 for (k = 0; k < 4; k++) {
1220 for (i = 0; i < comps; i++) {
1221 input[k + l * 4][i] = *lines[l]++;
1222 }
1223 }
1224 for (; k < 8; k++) {
1225 for (i = 0; i < comps; i++) {
1226 input[k + l * 4 + 12][i] = *lines[l]++;
1227 }
1228 }
1229 }
1230
1231 /* block layout:
1232 * 00, 01, 02, 03, 08, 09, 0a, 0b
1233 * 10, 11, 12, 13, 18, 19, 1a, 1b
1234 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1235 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1236 */
1237
1238 /* [dBorca]
1239 * stupidity flows forth from this
1240 */
1241 l = N_TEXELS;
1242 trualpha = 0;
1243 if (comps == 4) {
1244 /* skip all transparent black texels */
1245 l = 0;
1246 for (k = 0; k < N_TEXELS; k++) {
1247 /* test all components against 0 */
1248 if (!ISTBLACK(input[k])) {
1249 /* texel is not transparent black */
1250 COPY_4UBV(reord[l], input[k]);
1251 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1252 /* non-opaque texel */
1253 trualpha = !0;
1254 }
1255 l++;
1256 }
1257 }
1258 }
1259
1260 #if 0
1261 if (trualpha) {
1262 fxt1_quantize_ALPHA0(cc, input, reord, l);
1263 } else if (l == 0) {
1264 cc[0] = cc[1] = cc[2] = -1;
1265 cc[3] = 0;
1266 } else if (l < N_TEXELS) {
1267 fxt1_quantize_HI(cc, input, reord, l);
1268 } else {
1269 fxt1_quantize_CHROMA(cc, input);
1270 }
1271 (void)fxt1_quantize_ALPHA1;
1272 (void)fxt1_quantize_MIXED1;
1273 (void)fxt1_quantize_MIXED0;
1274 #else
1275 if (trualpha) {
1276 fxt1_quantize_ALPHA1(cc, input);
1277 } else if (l == 0) {
1278 cc[0] = cc[1] = cc[2] = ~0u;
1279 cc[3] = 0;
1280 } else if (l < N_TEXELS) {
1281 fxt1_quantize_MIXED1(cc, input);
1282 } else {
1283 fxt1_quantize_MIXED0(cc, input);
1284 }
1285 (void)fxt1_quantize_ALPHA0;
1286 (void)fxt1_quantize_HI;
1287 (void)fxt1_quantize_CHROMA;
1288 #endif
1289 }
1290
1291
1292 static void
1293 fxt1_encode (GLuint width, GLuint height, GLint comps,
1294 const void *source, GLint srcRowStride,
1295 void *dest, GLint destRowStride)
1296 {
1297 GLuint x, y;
1298 const GLubyte *data;
1299 GLuint *encoded = (GLuint *)dest;
1300 void *newSource = NULL;
1301
1302 assert(comps == 3 || comps == 4);
1303
1304 /* Replicate image if width is not M8 or height is not M4 */
1305 if ((width & 7) | (height & 3)) {
1306 GLint newWidth = (width + 7) & ~7;
1307 GLint newHeight = (height + 3) & ~3;
1308 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1309 if (!newSource) {
1310 GET_CURRENT_CONTEXT(ctx);
1311 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1312 goto cleanUp;
1313 }
1314 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1315 comps, (const GLchan *) source,
1316 srcRowStride, (GLchan *) newSource);
1317 source = newSource;
1318 width = newWidth;
1319 height = newHeight;
1320 srcRowStride = comps * newWidth;
1321 }
1322
1323 /* convert from 16/32-bit channels to GLubyte if needed */
1324 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1325 const GLuint n = width * height * comps;
1326 const GLchan *src = (const GLchan *) source;
1327 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1328 GLuint i;
1329 if (!dest) {
1330 GET_CURRENT_CONTEXT(ctx);
1331 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1332 goto cleanUp;
1333 }
1334 for (i = 0; i < n; i++) {
1335 dest[i] = CHAN_TO_UBYTE(src[i]);
1336 }
1337 if (newSource != NULL) {
1338 _mesa_free(newSource);
1339 }
1340 newSource = dest; /* we'll free this buffer before returning */
1341 source = dest; /* the new, GLubyte incoming image */
1342 }
1343
1344 data = (const GLubyte *) source;
1345 destRowStride = (destRowStride - width * 2) / 4;
1346 for (y = 0; y < height; y += 4) {
1347 GLuint offs = 0 + (y + 0) * srcRowStride;
1348 for (x = 0; x < width; x += 8) {
1349 const GLubyte *lines[4];
1350 lines[0] = &data[offs];
1351 lines[1] = lines[0] + srcRowStride;
1352 lines[2] = lines[1] + srcRowStride;
1353 lines[3] = lines[2] + srcRowStride;
1354 offs += 8 * comps;
1355 fxt1_quantize(encoded, lines, comps);
1356 /* 128 bits per 8x4 block */
1357 encoded += 4;
1358 }
1359 encoded += destRowStride;
1360 }
1361
1362 cleanUp:
1363 if (newSource != NULL) {
1364 _mesa_free(newSource);
1365 }
1366 }
1367
1368
1369 /***************************************************************************\
1370 * FXT1 decoder
1371 *
1372 * The decoder is based on GL_3DFX_texture_compression_FXT1
1373 * specification and serves as a concept for the encoder.
1374 \***************************************************************************/
1375
1376
1377 /* lookup table for scaling 5 bit colors up to 8 bits */
1378 static const GLubyte _rgb_scale_5[] = {
1379 0, 8, 16, 25, 33, 41, 49, 58,
1380 66, 74, 82, 90, 99, 107, 115, 123,
1381 132, 140, 148, 156, 165, 173, 181, 189,
1382 197, 206, 214, 222, 230, 239, 247, 255
1383 };
1384
1385 /* lookup table for scaling 6 bit colors up to 8 bits */
1386 static const GLubyte _rgb_scale_6[] = {
1387 0, 4, 8, 12, 16, 20, 24, 28,
1388 32, 36, 40, 45, 49, 53, 57, 61,
1389 65, 69, 73, 77, 81, 85, 89, 93,
1390 97, 101, 105, 109, 113, 117, 121, 125,
1391 130, 134, 138, 142, 146, 150, 154, 158,
1392 162, 166, 170, 174, 178, 182, 186, 190,
1393 194, 198, 202, 206, 210, 215, 219, 223,
1394 227, 231, 235, 239, 243, 247, 251, 255
1395 };
1396
1397
1398 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1399 #define UP5(c) _rgb_scale_5[(c) & 31]
1400 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1401 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1402
1403
1404 static void
1405 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1406 {
1407 const GLuint *cc;
1408
1409 t *= 3;
1410 cc = (const GLuint *)(code + t / 8);
1411 t = (cc[0] >> (t & 7)) & 7;
1412
1413 if (t == 7) {
1414 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1415 } else {
1416 GLubyte r, g, b;
1417 cc = (const GLuint *)(code + 12);
1418 if (t == 0) {
1419 b = UP5(CC_SEL(cc, 0));
1420 g = UP5(CC_SEL(cc, 5));
1421 r = UP5(CC_SEL(cc, 10));
1422 } else if (t == 6) {
1423 b = UP5(CC_SEL(cc, 15));
1424 g = UP5(CC_SEL(cc, 20));
1425 r = UP5(CC_SEL(cc, 25));
1426 } else {
1427 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1428 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1429 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1430 }
1431 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1432 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1433 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1434 rgba[ACOMP] = CHAN_MAX;
1435 }
1436 }
1437
1438
1439 static void
1440 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1441 {
1442 const GLuint *cc;
1443 GLuint kk;
1444
1445 cc = (const GLuint *)code;
1446 if (t & 16) {
1447 cc++;
1448 t &= 15;
1449 }
1450 t = (cc[0] >> (t * 2)) & 3;
1451
1452 t *= 15;
1453 cc = (const GLuint *)(code + 8 + t / 8);
1454 kk = cc[0] >> (t & 7);
1455 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1456 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1457 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1458 rgba[ACOMP] = CHAN_MAX;
1459 }
1460
1461
1462 static void
1463 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1464 {
1465 const GLuint *cc;
1466 GLuint col[2][3];
1467 GLint glsb, selb;
1468
1469 cc = (const GLuint *)code;
1470 if (t & 16) {
1471 t &= 15;
1472 t = (cc[1] >> (t * 2)) & 3;
1473 /* col 2 */
1474 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1475 col[0][GCOMP] = CC_SEL(cc, 99);
1476 col[0][RCOMP] = CC_SEL(cc, 104);
1477 /* col 3 */
1478 col[1][BCOMP] = CC_SEL(cc, 109);
1479 col[1][GCOMP] = CC_SEL(cc, 114);
1480 col[1][RCOMP] = CC_SEL(cc, 119);
1481 glsb = CC_SEL(cc, 126);
1482 selb = CC_SEL(cc, 33);
1483 } else {
1484 t = (cc[0] >> (t * 2)) & 3;
1485 /* col 0 */
1486 col[0][BCOMP] = CC_SEL(cc, 64);
1487 col[0][GCOMP] = CC_SEL(cc, 69);
1488 col[0][RCOMP] = CC_SEL(cc, 74);
1489 /* col 1 */
1490 col[1][BCOMP] = CC_SEL(cc, 79);
1491 col[1][GCOMP] = CC_SEL(cc, 84);
1492 col[1][RCOMP] = CC_SEL(cc, 89);
1493 glsb = CC_SEL(cc, 125);
1494 selb = CC_SEL(cc, 1);
1495 }
1496
1497 if (CC_SEL(cc, 124) & 1) {
1498 /* alpha[0] == 1 */
1499
1500 if (t == 3) {
1501 /* zero */
1502 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1503 } else {
1504 GLubyte r, g, b;
1505 if (t == 0) {
1506 b = UP5(col[0][BCOMP]);
1507 g = UP5(col[0][GCOMP]);
1508 r = UP5(col[0][RCOMP]);
1509 } else if (t == 2) {
1510 b = UP5(col[1][BCOMP]);
1511 g = UP6(col[1][GCOMP], glsb);
1512 r = UP5(col[1][RCOMP]);
1513 } else {
1514 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1515 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1516 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1517 }
1518 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1519 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1520 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1521 rgba[ACOMP] = CHAN_MAX;
1522 }
1523 } else {
1524 /* alpha[0] == 0 */
1525 GLubyte r, g, b;
1526 if (t == 0) {
1527 b = UP5(col[0][BCOMP]);
1528 g = UP6(col[0][GCOMP], glsb ^ selb);
1529 r = UP5(col[0][RCOMP]);
1530 } else if (t == 3) {
1531 b = UP5(col[1][BCOMP]);
1532 g = UP6(col[1][GCOMP], glsb);
1533 r = UP5(col[1][RCOMP]);
1534 } else {
1535 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1536 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1537 UP6(col[1][GCOMP], glsb));
1538 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1539 }
1540 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1541 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1542 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1543 rgba[ACOMP] = CHAN_MAX;
1544 }
1545 }
1546
1547
1548 static void
1549 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1550 {
1551 const GLuint *cc;
1552 GLubyte r, g, b, a;
1553
1554 cc = (const GLuint *)code;
1555 if (CC_SEL(cc, 124) & 1) {
1556 /* lerp == 1 */
1557 GLuint col0[4];
1558
1559 if (t & 16) {
1560 t &= 15;
1561 t = (cc[1] >> (t * 2)) & 3;
1562 /* col 2 */
1563 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1564 col0[GCOMP] = CC_SEL(cc, 99);
1565 col0[RCOMP] = CC_SEL(cc, 104);
1566 col0[ACOMP] = CC_SEL(cc, 119);
1567 } else {
1568 t = (cc[0] >> (t * 2)) & 3;
1569 /* col 0 */
1570 col0[BCOMP] = CC_SEL(cc, 64);
1571 col0[GCOMP] = CC_SEL(cc, 69);
1572 col0[RCOMP] = CC_SEL(cc, 74);
1573 col0[ACOMP] = CC_SEL(cc, 109);
1574 }
1575
1576 if (t == 0) {
1577 b = UP5(col0[BCOMP]);
1578 g = UP5(col0[GCOMP]);
1579 r = UP5(col0[RCOMP]);
1580 a = UP5(col0[ACOMP]);
1581 } else if (t == 3) {
1582 b = UP5(CC_SEL(cc, 79));
1583 g = UP5(CC_SEL(cc, 84));
1584 r = UP5(CC_SEL(cc, 89));
1585 a = UP5(CC_SEL(cc, 114));
1586 } else {
1587 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1588 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1589 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1590 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1591 }
1592 } else {
1593 /* lerp == 0 */
1594
1595 if (t & 16) {
1596 cc++;
1597 t &= 15;
1598 }
1599 t = (cc[0] >> (t * 2)) & 3;
1600
1601 if (t == 3) {
1602 /* zero */
1603 r = g = b = a = 0;
1604 } else {
1605 GLuint kk;
1606 cc = (const GLuint *)code;
1607 a = UP5(cc[3] >> (t * 5 + 13));
1608 t *= 15;
1609 cc = (const GLuint *)(code + 8 + t / 8);
1610 kk = cc[0] >> (t & 7);
1611 b = UP5(kk);
1612 g = UP5(kk >> 5);
1613 r = UP5(kk >> 10);
1614 }
1615 }
1616 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1617 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1618 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1619 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1620 }
1621
1622
1623 void
1624 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1625 GLint i, GLint j, GLchan *rgba)
1626 {
1627 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1628 fxt1_decode_1HI, /* cc-high = "00?" */
1629 fxt1_decode_1HI, /* cc-high = "00?" */
1630 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1631 fxt1_decode_1ALPHA, /* alpha = "011" */
1632 fxt1_decode_1MIXED, /* mixed = "1??" */
1633 fxt1_decode_1MIXED, /* mixed = "1??" */
1634 fxt1_decode_1MIXED, /* mixed = "1??" */
1635 fxt1_decode_1MIXED /* mixed = "1??" */
1636 };
1637
1638 const GLubyte *code = (const GLubyte *)texture +
1639 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1640 GLint mode = CC_SEL(code, 125);
1641 GLint t = i & 7;
1642
1643 if (t & 4) {
1644 t += 12;
1645 }
1646 t += (j & 3) * 4;
1647
1648 decode_1[mode](code, t, rgba);
1649 }
1650
1651
1652 #endif /* FEATURE_texture_fxt1 */