Merge remote branch 'origin/master' into gallium_draw_llvm
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42
43
44 #if FEATURE_texture_fxt1
45
46
47 static void
48 fxt1_encode (GLuint width, GLuint height, GLint comps,
49 const void *source, GLint srcRowStride,
50 void *dest, GLint destRowStride);
51
52 void
53 fxt1_decode_1 (const void *texture, GLint stride,
54 GLint i, GLint j, GLchan *rgba);
55
56
57 /**
58 * Store user's image in rgb_fxt1 format.
59 */
60 GLboolean
61 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
62 {
63 const GLchan *pixels;
64 GLint srcRowStride;
65 GLubyte *dst;
66 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
67 const GLchan *tempImage = NULL;
68
69 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70 ASSERT(dstXoffset % 8 == 0);
71 ASSERT(dstYoffset % 4 == 0);
72 ASSERT(dstZoffset == 0);
73 (void) dstZoffset;
74 (void) dstImageOffsets;
75
76 if (srcFormat != GL_RGB ||
77 srcType != CHAN_TYPE ||
78 ctx->_ImageTransferState ||
79 srcPacking->SwapBytes) {
80 /* convert image to RGB/GLchan */
81 tempImage = _mesa_make_temp_chan_image(ctx, dims,
82 baseInternalFormat,
83 _mesa_get_format_base_format(dstFormat),
84 srcWidth, srcHeight, srcDepth,
85 srcFormat, srcType, srcAddr,
86 srcPacking);
87 if (!tempImage)
88 return GL_FALSE; /* out of memory */
89 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
90 pixels = tempImage;
91 srcRowStride = 3 * srcWidth;
92 srcFormat = GL_RGB;
93 }
94 else {
95 pixels = (const GLchan *) srcAddr;
96 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
97 srcType) / sizeof(GLchan);
98 }
99
100 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
101 dstFormat,
102 texWidth, (GLubyte *) dstAddr);
103
104 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
105 dst, dstRowStride);
106
107 if (tempImage)
108 free((void*) tempImage);
109
110 return GL_TRUE;
111 }
112
113
114 /**
115 * Store user's image in rgba_fxt1 format.
116 */
117 GLboolean
118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
119 {
120 const GLchan *pixels;
121 GLint srcRowStride;
122 GLubyte *dst;
123 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
124 const GLchan *tempImage = NULL;
125
126 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
127 ASSERT(dstXoffset % 8 == 0);
128 ASSERT(dstYoffset % 4 == 0);
129 ASSERT(dstZoffset == 0);
130 (void) dstZoffset;
131 (void) dstImageOffsets;
132
133 if (srcFormat != GL_RGBA ||
134 srcType != CHAN_TYPE ||
135 ctx->_ImageTransferState ||
136 srcPacking->SwapBytes) {
137 /* convert image to RGBA/GLchan */
138 tempImage = _mesa_make_temp_chan_image(ctx, dims,
139 baseInternalFormat,
140 _mesa_get_format_base_format(dstFormat),
141 srcWidth, srcHeight, srcDepth,
142 srcFormat, srcType, srcAddr,
143 srcPacking);
144 if (!tempImage)
145 return GL_FALSE; /* out of memory */
146 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
147 pixels = tempImage;
148 srcRowStride = 4 * srcWidth;
149 srcFormat = GL_RGBA;
150 }
151 else {
152 pixels = (const GLchan *) srcAddr;
153 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
154 srcType) / sizeof(GLchan);
155 }
156
157 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
158 dstFormat,
159 texWidth, (GLubyte *) dstAddr);
160
161 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
162 dst, dstRowStride);
163
164 if (tempImage)
165 free((void*) tempImage);
166
167 return GL_TRUE;
168 }
169
170
171 void
172 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
173 GLint i, GLint j, GLint k, GLfloat *texel )
174 {
175 /* just sample as GLchan and convert to float here */
176 GLchan rgba[4];
177 (void) k;
178 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
179 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
180 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
181 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
182 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
183 }
184
185
186 void
187 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
188 GLint i, GLint j, GLint k, GLfloat *texel )
189 {
190 /* just sample as GLchan and convert to float here */
191 GLchan rgba[4];
192 (void) k;
193 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
194 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
195 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
196 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
197 texel[ACOMP] = 1.0F;
198 }
199
200
201
202 /***************************************************************************\
203 * FXT1 encoder
204 *
205 * The encoder was built by reversing the decoder,
206 * and is vaguely based on Texus2 by 3dfx. Note that this code
207 * is merely a proof of concept, since it is highly UNoptimized;
208 * moreover, it is sub-optimal due to initial conditions passed
209 * to Lloyd's algorithm (the interpolation modes are even worse).
210 \***************************************************************************/
211
212
213 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
214 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
215 #define N_TEXELS 32 /* number of texels in a block (always 32) */
216 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
217 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
218 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
219 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
220 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
221
222
223 /*
224 * Define a 64-bit unsigned integer type and macros
225 */
226 #if 1
227
228 #define FX64_NATIVE 1
229
230 typedef uint64_t Fx64;
231
232 #define FX64_MOV32(a, b) a = b
233 #define FX64_OR32(a, b) a |= b
234 #define FX64_SHL(a, c) a <<= c
235
236 #else
237
238 #define FX64_NATIVE 0
239
240 typedef struct {
241 GLuint lo, hi;
242 } Fx64;
243
244 #define FX64_MOV32(a, b) a.lo = b
245 #define FX64_OR32(a, b) a.lo |= b
246
247 #define FX64_SHL(a, c) \
248 do { \
249 if ((c) >= 32) { \
250 a.hi = a.lo << ((c) - 32); \
251 a.lo = 0; \
252 } else { \
253 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
254 a.lo <<= (c); \
255 } \
256 } while (0)
257
258 #endif
259
260
261 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
262 #define SAFECDOT 1 /* for paranoids */
263
264 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
265 do { \
266 /* compute interpolation vector */ \
267 GLfloat d2 = 0.0F; \
268 GLfloat rd2; \
269 \
270 for (i = 0; i < NC; i++) { \
271 IV[i] = (V1[i] - V0[i]) * F(i); \
272 d2 += IV[i] * IV[i]; \
273 } \
274 rd2 = (GLfloat)NV / d2; \
275 B = 0; \
276 for (i = 0; i < NC; i++) { \
277 IV[i] *= F(i); \
278 B -= IV[i] * V0[i]; \
279 IV[i] *= rd2; \
280 } \
281 B = B * rd2 + 0.5f; \
282 } while (0)
283
284 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
285 do { \
286 GLfloat dot = 0.0F; \
287 for (i = 0; i < NC; i++) { \
288 dot += V[i] * IV[i]; \
289 } \
290 TEXEL = (GLint)(dot + B); \
291 if (SAFECDOT) { \
292 if (TEXEL < 0) { \
293 TEXEL = 0; \
294 } else if (TEXEL > NV) { \
295 TEXEL = NV; \
296 } \
297 } \
298 } while (0)
299
300
301 static GLint
302 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
303 GLubyte input[MAX_COMP], GLint nc)
304 {
305 GLint i, j, best = -1;
306 GLfloat err = 1e9; /* big enough */
307
308 for (j = 0; j < nv; j++) {
309 GLfloat e = 0.0F;
310 for (i = 0; i < nc; i++) {
311 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
312 }
313 if (e < err) {
314 err = e;
315 best = j;
316 }
317 }
318
319 return best;
320 }
321
322
323 static GLint
324 fxt1_worst (GLfloat vec[MAX_COMP],
325 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
326 {
327 GLint i, k, worst = -1;
328 GLfloat err = -1.0F; /* small enough */
329
330 for (k = 0; k < n; k++) {
331 GLfloat e = 0.0F;
332 for (i = 0; i < nc; i++) {
333 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
334 }
335 if (e > err) {
336 err = e;
337 worst = k;
338 }
339 }
340
341 return worst;
342 }
343
344
345 static GLint
346 fxt1_variance (GLdouble variance[MAX_COMP],
347 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
348 {
349 GLint i, k, best = 0;
350 GLint sx, sx2;
351 GLdouble var, maxvar = -1; /* small enough */
352 GLdouble teenth = 1.0 / n;
353
354 for (i = 0; i < nc; i++) {
355 sx = sx2 = 0;
356 for (k = 0; k < n; k++) {
357 GLint t = input[k][i];
358 sx += t;
359 sx2 += t * t;
360 }
361 var = sx2 * teenth - sx * sx * teenth * teenth;
362 if (maxvar < var) {
363 maxvar = var;
364 best = i;
365 }
366 if (variance) {
367 variance[i] = var;
368 }
369 }
370
371 return best;
372 }
373
374
375 static GLint
376 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
377 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
378 {
379 #if 0
380 /* Choose colors from a grid.
381 */
382 GLint i, j;
383
384 for (j = 0; j < nv; j++) {
385 GLint m = j * (n - 1) / (nv - 1);
386 for (i = 0; i < nc; i++) {
387 vec[j][i] = input[m][i];
388 }
389 }
390 #else
391 /* Our solution here is to find the darkest and brightest colors in
392 * the 8x4 tile and use those as the two representative colors.
393 * There are probably better algorithms to use (histogram-based).
394 */
395 GLint i, j, k;
396 GLint minSum = 2000; /* big enough */
397 GLint maxSum = -1; /* small enough */
398 GLint minCol = 0; /* phoudoin: silent compiler! */
399 GLint maxCol = 0; /* phoudoin: silent compiler! */
400
401 struct {
402 GLint flag;
403 GLint key;
404 GLint freq;
405 GLint idx;
406 } hist[N_TEXELS];
407 GLint lenh = 0;
408
409 memset(hist, 0, sizeof(hist));
410
411 for (k = 0; k < n; k++) {
412 GLint l;
413 GLint key = 0;
414 GLint sum = 0;
415 for (i = 0; i < nc; i++) {
416 key <<= 8;
417 key |= input[k][i];
418 sum += input[k][i];
419 }
420 for (l = 0; l < n; l++) {
421 if (!hist[l].flag) {
422 /* alloc new slot */
423 hist[l].flag = !0;
424 hist[l].key = key;
425 hist[l].freq = 1;
426 hist[l].idx = k;
427 lenh = l + 1;
428 break;
429 } else if (hist[l].key == key) {
430 hist[l].freq++;
431 break;
432 }
433 }
434 if (minSum > sum) {
435 minSum = sum;
436 minCol = k;
437 }
438 if (maxSum < sum) {
439 maxSum = sum;
440 maxCol = k;
441 }
442 }
443
444 if (lenh <= nv) {
445 for (j = 0; j < lenh; j++) {
446 for (i = 0; i < nc; i++) {
447 vec[j][i] = (GLfloat)input[hist[j].idx][i];
448 }
449 }
450 for (; j < nv; j++) {
451 for (i = 0; i < nc; i++) {
452 vec[j][i] = vec[0][i];
453 }
454 }
455 return 0;
456 }
457
458 for (j = 0; j < nv; j++) {
459 for (i = 0; i < nc; i++) {
460 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
461 }
462 }
463 #endif
464
465 return !0;
466 }
467
468
469 static GLint
470 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
471 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
472 {
473 /* Use the generalized lloyd's algorithm for VQ:
474 * find 4 color vectors.
475 *
476 * for each sample color
477 * sort to nearest vector.
478 *
479 * replace each vector with the centroid of its matching colors.
480 *
481 * repeat until RMS doesn't improve.
482 *
483 * if a color vector has no samples, or becomes the same as another
484 * vector, replace it with the color which is farthest from a sample.
485 *
486 * vec[][MAX_COMP] initial vectors and resulting colors
487 * nv number of resulting colors required
488 * input[N_TEXELS][MAX_COMP] input texels
489 * nc number of components in input / vec
490 * n number of input samples
491 */
492
493 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
494 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
495 GLfloat error, lasterror = 1e9;
496
497 GLint i, j, k, rep;
498
499 /* the quantizer */
500 for (rep = 0; rep < LL_N_REP; rep++) {
501 /* reset sums & counters */
502 for (j = 0; j < nv; j++) {
503 for (i = 0; i < nc; i++) {
504 sum[j][i] = 0;
505 }
506 cnt[j] = 0;
507 }
508 error = 0;
509
510 /* scan whole block */
511 for (k = 0; k < n; k++) {
512 #if 1
513 GLint best = -1;
514 GLfloat err = 1e9; /* big enough */
515 /* determine best vector */
516 for (j = 0; j < nv; j++) {
517 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
518 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
519 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
520 if (nc == 4) {
521 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
522 }
523 if (e < err) {
524 err = e;
525 best = j;
526 }
527 }
528 #else
529 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
530 #endif
531 assert(best >= 0);
532 /* add in closest color */
533 for (i = 0; i < nc; i++) {
534 sum[best][i] += input[k][i];
535 }
536 /* mark this vector as used */
537 cnt[best]++;
538 /* accumulate error */
539 error += err;
540 }
541
542 /* check RMS */
543 if ((error < LL_RMS_E) ||
544 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
545 return !0; /* good match */
546 }
547 lasterror = error;
548
549 /* move each vector to the barycenter of its closest colors */
550 for (j = 0; j < nv; j++) {
551 if (cnt[j]) {
552 GLfloat div = 1.0F / cnt[j];
553 for (i = 0; i < nc; i++) {
554 vec[j][i] = div * sum[j][i];
555 }
556 } else {
557 /* this vec has no samples or is identical with a previous vec */
558 GLint worst = fxt1_worst(vec[j], input, nc, n);
559 for (i = 0; i < nc; i++) {
560 vec[j][i] = input[worst][i];
561 }
562 }
563 }
564 }
565
566 return 0; /* could not converge fast enough */
567 }
568
569
570 static void
571 fxt1_quantize_CHROMA (GLuint *cc,
572 GLubyte input[N_TEXELS][MAX_COMP])
573 {
574 const GLint n_vect = 4; /* 4 base vectors to find */
575 const GLint n_comp = 3; /* 3 components: R, G, B */
576 GLfloat vec[MAX_VECT][MAX_COMP];
577 GLint i, j, k;
578 Fx64 hi; /* high quadword */
579 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
580
581 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
582 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
583 }
584
585 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
586 for (j = n_vect - 1; j >= 0; j--) {
587 for (i = 0; i < n_comp; i++) {
588 /* add in colors */
589 FX64_SHL(hi, 5);
590 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
591 }
592 }
593 ((Fx64 *)cc)[1] = hi;
594
595 lohi = lolo = 0;
596 /* right microtile */
597 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
598 lohi <<= 2;
599 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
600 }
601 /* left microtile */
602 for (; k >= 0; k--) {
603 lolo <<= 2;
604 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
605 }
606 cc[1] = lohi;
607 cc[0] = lolo;
608 }
609
610
611 static void
612 fxt1_quantize_ALPHA0 (GLuint *cc,
613 GLubyte input[N_TEXELS][MAX_COMP],
614 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
615 {
616 const GLint n_vect = 3; /* 3 base vectors to find */
617 const GLint n_comp = 4; /* 4 components: R, G, B, A */
618 GLfloat vec[MAX_VECT][MAX_COMP];
619 GLint i, j, k;
620 Fx64 hi; /* high quadword */
621 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
622
623 /* the last vector indicates zero */
624 for (i = 0; i < n_comp; i++) {
625 vec[n_vect][i] = 0;
626 }
627
628 /* the first n texels in reord are guaranteed to be non-zero */
629 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
630 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
631 }
632
633 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
634 for (j = n_vect - 1; j >= 0; j--) {
635 /* add in alphas */
636 FX64_SHL(hi, 5);
637 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
638 }
639 for (j = n_vect - 1; j >= 0; j--) {
640 for (i = 0; i < n_comp - 1; i++) {
641 /* add in colors */
642 FX64_SHL(hi, 5);
643 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
644 }
645 }
646 ((Fx64 *)cc)[1] = hi;
647
648 lohi = lolo = 0;
649 /* right microtile */
650 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
651 lohi <<= 2;
652 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
653 }
654 /* left microtile */
655 for (; k >= 0; k--) {
656 lolo <<= 2;
657 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
658 }
659 cc[1] = lohi;
660 cc[0] = lolo;
661 }
662
663
664 static void
665 fxt1_quantize_ALPHA1 (GLuint *cc,
666 GLubyte input[N_TEXELS][MAX_COMP])
667 {
668 const GLint n_vect = 3; /* highest vector number in each microtile */
669 const GLint n_comp = 4; /* 4 components: R, G, B, A */
670 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
671 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
672 GLint i, j, k;
673 Fx64 hi; /* high quadword */
674 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
675
676 GLint minSum;
677 GLint maxSum;
678 GLint minColL = 0, maxColL = 0;
679 GLint minColR = 0, maxColR = 0;
680 GLint sumL = 0, sumR = 0;
681 GLint nn_comp;
682 /* Our solution here is to find the darkest and brightest colors in
683 * the 4x4 tile and use those as the two representative colors.
684 * There are probably better algorithms to use (histogram-based).
685 */
686 nn_comp = n_comp;
687 while ((minColL == maxColL) && nn_comp) {
688 minSum = 2000; /* big enough */
689 maxSum = -1; /* small enough */
690 for (k = 0; k < N_TEXELS / 2; k++) {
691 GLint sum = 0;
692 for (i = 0; i < nn_comp; i++) {
693 sum += input[k][i];
694 }
695 if (minSum > sum) {
696 minSum = sum;
697 minColL = k;
698 }
699 if (maxSum < sum) {
700 maxSum = sum;
701 maxColL = k;
702 }
703 sumL += sum;
704 }
705
706 nn_comp--;
707 }
708
709 nn_comp = n_comp;
710 while ((minColR == maxColR) && nn_comp) {
711 minSum = 2000; /* big enough */
712 maxSum = -1; /* small enough */
713 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
714 GLint sum = 0;
715 for (i = 0; i < nn_comp; i++) {
716 sum += input[k][i];
717 }
718 if (minSum > sum) {
719 minSum = sum;
720 minColR = k;
721 }
722 if (maxSum < sum) {
723 maxSum = sum;
724 maxColR = k;
725 }
726 sumR += sum;
727 }
728
729 nn_comp--;
730 }
731
732 /* choose the common vector (yuck!) */
733 {
734 GLint j1, j2;
735 GLint v1 = 0, v2 = 0;
736 GLfloat err = 1e9; /* big enough */
737 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
738 for (i = 0; i < n_comp; i++) {
739 tv[0][i] = input[minColL][i];
740 tv[1][i] = input[maxColL][i];
741 tv[2][i] = input[minColR][i];
742 tv[3][i] = input[maxColR][i];
743 }
744 for (j1 = 0; j1 < 2; j1++) {
745 for (j2 = 2; j2 < 4; j2++) {
746 GLfloat e = 0.0F;
747 for (i = 0; i < n_comp; i++) {
748 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
749 }
750 if (e < err) {
751 err = e;
752 v1 = j1;
753 v2 = j2;
754 }
755 }
756 }
757 for (i = 0; i < n_comp; i++) {
758 vec[0][i] = tv[1 - v1][i];
759 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
760 vec[2][i] = tv[5 - v2][i];
761 }
762 }
763
764 /* left microtile */
765 cc[0] = 0;
766 if (minColL != maxColL) {
767 /* compute interpolation vector */
768 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
769
770 /* add in texels */
771 lolo = 0;
772 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
773 GLint texel;
774 /* interpolate color */
775 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
776 /* add in texel */
777 lolo <<= 2;
778 lolo |= texel;
779 }
780
781 cc[0] = lolo;
782 }
783
784 /* right microtile */
785 cc[1] = 0;
786 if (minColR != maxColR) {
787 /* compute interpolation vector */
788 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
789
790 /* add in texels */
791 lohi = 0;
792 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
793 GLint texel;
794 /* interpolate color */
795 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
796 /* add in texel */
797 lohi <<= 2;
798 lohi |= texel;
799 }
800
801 cc[1] = lohi;
802 }
803
804 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
805 for (j = n_vect - 1; j >= 0; j--) {
806 /* add in alphas */
807 FX64_SHL(hi, 5);
808 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
809 }
810 for (j = n_vect - 1; j >= 0; j--) {
811 for (i = 0; i < n_comp - 1; i++) {
812 /* add in colors */
813 FX64_SHL(hi, 5);
814 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
815 }
816 }
817 ((Fx64 *)cc)[1] = hi;
818 }
819
820
821 static void
822 fxt1_quantize_HI (GLuint *cc,
823 GLubyte input[N_TEXELS][MAX_COMP],
824 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
825 {
826 const GLint n_vect = 6; /* highest vector number */
827 const GLint n_comp = 3; /* 3 components: R, G, B */
828 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
829 GLfloat iv[MAX_COMP]; /* interpolation vector */
830 GLint i, k;
831 GLuint hihi; /* high quadword: hi dword */
832
833 GLint minSum = 2000; /* big enough */
834 GLint maxSum = -1; /* small enough */
835 GLint minCol = 0; /* phoudoin: silent compiler! */
836 GLint maxCol = 0; /* phoudoin: silent compiler! */
837
838 /* Our solution here is to find the darkest and brightest colors in
839 * the 8x4 tile and use those as the two representative colors.
840 * There are probably better algorithms to use (histogram-based).
841 */
842 for (k = 0; k < n; k++) {
843 GLint sum = 0;
844 for (i = 0; i < n_comp; i++) {
845 sum += reord[k][i];
846 }
847 if (minSum > sum) {
848 minSum = sum;
849 minCol = k;
850 }
851 if (maxSum < sum) {
852 maxSum = sum;
853 maxCol = k;
854 }
855 }
856
857 hihi = 0; /* cc-hi = "00" */
858 for (i = 0; i < n_comp; i++) {
859 /* add in colors */
860 hihi <<= 5;
861 hihi |= reord[maxCol][i] >> 3;
862 }
863 for (i = 0; i < n_comp; i++) {
864 /* add in colors */
865 hihi <<= 5;
866 hihi |= reord[minCol][i] >> 3;
867 }
868 cc[3] = hihi;
869 cc[0] = cc[1] = cc[2] = 0;
870
871 /* compute interpolation vector */
872 if (minCol != maxCol) {
873 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
874 }
875
876 /* add in texels */
877 for (k = N_TEXELS - 1; k >= 0; k--) {
878 GLint t = k * 3;
879 GLuint *kk = (GLuint *)((char *)cc + t / 8);
880 GLint texel = n_vect + 1; /* transparent black */
881
882 if (!ISTBLACK(input[k])) {
883 if (minCol != maxCol) {
884 /* interpolate color */
885 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
886 /* add in texel */
887 kk[0] |= texel << (t & 7);
888 }
889 } else {
890 /* add in texel */
891 kk[0] |= texel << (t & 7);
892 }
893 }
894 }
895
896
897 static void
898 fxt1_quantize_MIXED1 (GLuint *cc,
899 GLubyte input[N_TEXELS][MAX_COMP])
900 {
901 const GLint n_vect = 2; /* highest vector number in each microtile */
902 const GLint n_comp = 3; /* 3 components: R, G, B */
903 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
904 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
905 GLint i, j, k;
906 Fx64 hi; /* high quadword */
907 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
908
909 GLint minSum;
910 GLint maxSum;
911 GLint minColL = 0, maxColL = -1;
912 GLint minColR = 0, maxColR = -1;
913
914 /* Our solution here is to find the darkest and brightest colors in
915 * the 4x4 tile and use those as the two representative colors.
916 * There are probably better algorithms to use (histogram-based).
917 */
918 minSum = 2000; /* big enough */
919 maxSum = -1; /* small enough */
920 for (k = 0; k < N_TEXELS / 2; k++) {
921 if (!ISTBLACK(input[k])) {
922 GLint sum = 0;
923 for (i = 0; i < n_comp; i++) {
924 sum += input[k][i];
925 }
926 if (minSum > sum) {
927 minSum = sum;
928 minColL = k;
929 }
930 if (maxSum < sum) {
931 maxSum = sum;
932 maxColL = k;
933 }
934 }
935 }
936 minSum = 2000; /* big enough */
937 maxSum = -1; /* small enough */
938 for (; k < N_TEXELS; k++) {
939 if (!ISTBLACK(input[k])) {
940 GLint sum = 0;
941 for (i = 0; i < n_comp; i++) {
942 sum += input[k][i];
943 }
944 if (minSum > sum) {
945 minSum = sum;
946 minColR = k;
947 }
948 if (maxSum < sum) {
949 maxSum = sum;
950 maxColR = k;
951 }
952 }
953 }
954
955 /* left microtile */
956 if (maxColL == -1) {
957 /* all transparent black */
958 cc[0] = ~0u;
959 for (i = 0; i < n_comp; i++) {
960 vec[0][i] = 0;
961 vec[1][i] = 0;
962 }
963 } else {
964 cc[0] = 0;
965 for (i = 0; i < n_comp; i++) {
966 vec[0][i] = input[minColL][i];
967 vec[1][i] = input[maxColL][i];
968 }
969 if (minColL != maxColL) {
970 /* compute interpolation vector */
971 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
972
973 /* add in texels */
974 lolo = 0;
975 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
976 GLint texel = n_vect + 1; /* transparent black */
977 if (!ISTBLACK(input[k])) {
978 /* interpolate color */
979 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
980 }
981 /* add in texel */
982 lolo <<= 2;
983 lolo |= texel;
984 }
985 cc[0] = lolo;
986 }
987 }
988
989 /* right microtile */
990 if (maxColR == -1) {
991 /* all transparent black */
992 cc[1] = ~0u;
993 for (i = 0; i < n_comp; i++) {
994 vec[2][i] = 0;
995 vec[3][i] = 0;
996 }
997 } else {
998 cc[1] = 0;
999 for (i = 0; i < n_comp; i++) {
1000 vec[2][i] = input[minColR][i];
1001 vec[3][i] = input[maxColR][i];
1002 }
1003 if (minColR != maxColR) {
1004 /* compute interpolation vector */
1005 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1006
1007 /* add in texels */
1008 lohi = 0;
1009 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1010 GLint texel = n_vect + 1; /* transparent black */
1011 if (!ISTBLACK(input[k])) {
1012 /* interpolate color */
1013 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1014 }
1015 /* add in texel */
1016 lohi <<= 2;
1017 lohi |= texel;
1018 }
1019 cc[1] = lohi;
1020 }
1021 }
1022
1023 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1024 for (j = 2 * 2 - 1; j >= 0; j--) {
1025 for (i = 0; i < n_comp; i++) {
1026 /* add in colors */
1027 FX64_SHL(hi, 5);
1028 FX64_OR32(hi, vec[j][i] >> 3);
1029 }
1030 }
1031 ((Fx64 *)cc)[1] = hi;
1032 }
1033
1034
1035 static void
1036 fxt1_quantize_MIXED0 (GLuint *cc,
1037 GLubyte input[N_TEXELS][MAX_COMP])
1038 {
1039 const GLint n_vect = 3; /* highest vector number in each microtile */
1040 const GLint n_comp = 3; /* 3 components: R, G, B */
1041 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1042 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1043 GLint i, j, k;
1044 Fx64 hi; /* high quadword */
1045 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1046
1047 GLint minColL = 0, maxColL = 0;
1048 GLint minColR = 0, maxColR = 0;
1049 #if 0
1050 GLint minSum;
1051 GLint maxSum;
1052
1053 /* Our solution here is to find the darkest and brightest colors in
1054 * the 4x4 tile and use those as the two representative colors.
1055 * There are probably better algorithms to use (histogram-based).
1056 */
1057 minSum = 2000; /* big enough */
1058 maxSum = -1; /* small enough */
1059 for (k = 0; k < N_TEXELS / 2; k++) {
1060 GLint sum = 0;
1061 for (i = 0; i < n_comp; i++) {
1062 sum += input[k][i];
1063 }
1064 if (minSum > sum) {
1065 minSum = sum;
1066 minColL = k;
1067 }
1068 if (maxSum < sum) {
1069 maxSum = sum;
1070 maxColL = k;
1071 }
1072 }
1073 minSum = 2000; /* big enough */
1074 maxSum = -1; /* small enough */
1075 for (; k < N_TEXELS; k++) {
1076 GLint sum = 0;
1077 for (i = 0; i < n_comp; i++) {
1078 sum += input[k][i];
1079 }
1080 if (minSum > sum) {
1081 minSum = sum;
1082 minColR = k;
1083 }
1084 if (maxSum < sum) {
1085 maxSum = sum;
1086 maxColR = k;
1087 }
1088 }
1089 #else
1090 GLint minVal;
1091 GLint maxVal;
1092 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1093 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1094
1095 /* Scan the channel with max variance for lo & hi
1096 * and use those as the two representative colors.
1097 */
1098 minVal = 2000; /* big enough */
1099 maxVal = -1; /* small enough */
1100 for (k = 0; k < N_TEXELS / 2; k++) {
1101 GLint t = input[k][maxVarL];
1102 if (minVal > t) {
1103 minVal = t;
1104 minColL = k;
1105 }
1106 if (maxVal < t) {
1107 maxVal = t;
1108 maxColL = k;
1109 }
1110 }
1111 minVal = 2000; /* big enough */
1112 maxVal = -1; /* small enough */
1113 for (; k < N_TEXELS; k++) {
1114 GLint t = input[k][maxVarR];
1115 if (minVal > t) {
1116 minVal = t;
1117 minColR = k;
1118 }
1119 if (maxVal < t) {
1120 maxVal = t;
1121 maxColR = k;
1122 }
1123 }
1124 #endif
1125
1126 /* left microtile */
1127 cc[0] = 0;
1128 for (i = 0; i < n_comp; i++) {
1129 vec[0][i] = input[minColL][i];
1130 vec[1][i] = input[maxColL][i];
1131 }
1132 if (minColL != maxColL) {
1133 /* compute interpolation vector */
1134 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1135
1136 /* add in texels */
1137 lolo = 0;
1138 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1139 GLint texel;
1140 /* interpolate color */
1141 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1142 /* add in texel */
1143 lolo <<= 2;
1144 lolo |= texel;
1145 }
1146
1147 /* funky encoding for LSB of green */
1148 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1149 for (i = 0; i < n_comp; i++) {
1150 vec[1][i] = input[minColL][i];
1151 vec[0][i] = input[maxColL][i];
1152 }
1153 lolo = ~lolo;
1154 }
1155
1156 cc[0] = lolo;
1157 }
1158
1159 /* right microtile */
1160 cc[1] = 0;
1161 for (i = 0; i < n_comp; i++) {
1162 vec[2][i] = input[minColR][i];
1163 vec[3][i] = input[maxColR][i];
1164 }
1165 if (minColR != maxColR) {
1166 /* compute interpolation vector */
1167 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1168
1169 /* add in texels */
1170 lohi = 0;
1171 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1172 GLint texel;
1173 /* interpolate color */
1174 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1175 /* add in texel */
1176 lohi <<= 2;
1177 lohi |= texel;
1178 }
1179
1180 /* funky encoding for LSB of green */
1181 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1182 for (i = 0; i < n_comp; i++) {
1183 vec[3][i] = input[minColR][i];
1184 vec[2][i] = input[maxColR][i];
1185 }
1186 lohi = ~lohi;
1187 }
1188
1189 cc[1] = lohi;
1190 }
1191
1192 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1193 for (j = 2 * 2 - 1; j >= 0; j--) {
1194 for (i = 0; i < n_comp; i++) {
1195 /* add in colors */
1196 FX64_SHL(hi, 5);
1197 FX64_OR32(hi, vec[j][i] >> 3);
1198 }
1199 }
1200 ((Fx64 *)cc)[1] = hi;
1201 }
1202
1203
1204 static void
1205 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1206 {
1207 GLint trualpha;
1208 GLubyte reord[N_TEXELS][MAX_COMP];
1209
1210 GLubyte input[N_TEXELS][MAX_COMP];
1211 GLint i, k, l;
1212
1213 if (comps == 3) {
1214 /* make the whole block opaque */
1215 memset(input, -1, sizeof(input));
1216 }
1217
1218 /* 8 texels each line */
1219 for (l = 0; l < 4; l++) {
1220 for (k = 0; k < 4; k++) {
1221 for (i = 0; i < comps; i++) {
1222 input[k + l * 4][i] = *lines[l]++;
1223 }
1224 }
1225 for (; k < 8; k++) {
1226 for (i = 0; i < comps; i++) {
1227 input[k + l * 4 + 12][i] = *lines[l]++;
1228 }
1229 }
1230 }
1231
1232 /* block layout:
1233 * 00, 01, 02, 03, 08, 09, 0a, 0b
1234 * 10, 11, 12, 13, 18, 19, 1a, 1b
1235 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1236 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1237 */
1238
1239 /* [dBorca]
1240 * stupidity flows forth from this
1241 */
1242 l = N_TEXELS;
1243 trualpha = 0;
1244 if (comps == 4) {
1245 /* skip all transparent black texels */
1246 l = 0;
1247 for (k = 0; k < N_TEXELS; k++) {
1248 /* test all components against 0 */
1249 if (!ISTBLACK(input[k])) {
1250 /* texel is not transparent black */
1251 COPY_4UBV(reord[l], input[k]);
1252 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1253 /* non-opaque texel */
1254 trualpha = !0;
1255 }
1256 l++;
1257 }
1258 }
1259 }
1260
1261 #if 0
1262 if (trualpha) {
1263 fxt1_quantize_ALPHA0(cc, input, reord, l);
1264 } else if (l == 0) {
1265 cc[0] = cc[1] = cc[2] = -1;
1266 cc[3] = 0;
1267 } else if (l < N_TEXELS) {
1268 fxt1_quantize_HI(cc, input, reord, l);
1269 } else {
1270 fxt1_quantize_CHROMA(cc, input);
1271 }
1272 (void)fxt1_quantize_ALPHA1;
1273 (void)fxt1_quantize_MIXED1;
1274 (void)fxt1_quantize_MIXED0;
1275 #else
1276 if (trualpha) {
1277 fxt1_quantize_ALPHA1(cc, input);
1278 } else if (l == 0) {
1279 cc[0] = cc[1] = cc[2] = ~0u;
1280 cc[3] = 0;
1281 } else if (l < N_TEXELS) {
1282 fxt1_quantize_MIXED1(cc, input);
1283 } else {
1284 fxt1_quantize_MIXED0(cc, input);
1285 }
1286 (void)fxt1_quantize_ALPHA0;
1287 (void)fxt1_quantize_HI;
1288 (void)fxt1_quantize_CHROMA;
1289 #endif
1290 }
1291
1292
1293 static void
1294 fxt1_encode (GLuint width, GLuint height, GLint comps,
1295 const void *source, GLint srcRowStride,
1296 void *dest, GLint destRowStride)
1297 {
1298 GLuint x, y;
1299 const GLubyte *data;
1300 GLuint *encoded = (GLuint *)dest;
1301 void *newSource = NULL;
1302
1303 assert(comps == 3 || comps == 4);
1304
1305 /* Replicate image if width is not M8 or height is not M4 */
1306 if ((width & 7) | (height & 3)) {
1307 GLint newWidth = (width + 7) & ~7;
1308 GLint newHeight = (height + 3) & ~3;
1309 newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1310 if (!newSource) {
1311 GET_CURRENT_CONTEXT(ctx);
1312 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1313 goto cleanUp;
1314 }
1315 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1316 comps, (const GLchan *) source,
1317 srcRowStride, (GLchan *) newSource);
1318 source = newSource;
1319 width = newWidth;
1320 height = newHeight;
1321 srcRowStride = comps * newWidth;
1322 }
1323
1324 /* convert from 16/32-bit channels to GLubyte if needed */
1325 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1326 const GLuint n = width * height * comps;
1327 const GLchan *src = (const GLchan *) source;
1328 GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1329 GLuint i;
1330 if (!dest) {
1331 GET_CURRENT_CONTEXT(ctx);
1332 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1333 goto cleanUp;
1334 }
1335 for (i = 0; i < n; i++) {
1336 dest[i] = CHAN_TO_UBYTE(src[i]);
1337 }
1338 if (newSource != NULL) {
1339 free(newSource);
1340 }
1341 newSource = dest; /* we'll free this buffer before returning */
1342 source = dest; /* the new, GLubyte incoming image */
1343 }
1344
1345 data = (const GLubyte *) source;
1346 destRowStride = (destRowStride - width * 2) / 4;
1347 for (y = 0; y < height; y += 4) {
1348 GLuint offs = 0 + (y + 0) * srcRowStride;
1349 for (x = 0; x < width; x += 8) {
1350 const GLubyte *lines[4];
1351 lines[0] = &data[offs];
1352 lines[1] = lines[0] + srcRowStride;
1353 lines[2] = lines[1] + srcRowStride;
1354 lines[3] = lines[2] + srcRowStride;
1355 offs += 8 * comps;
1356 fxt1_quantize(encoded, lines, comps);
1357 /* 128 bits per 8x4 block */
1358 encoded += 4;
1359 }
1360 encoded += destRowStride;
1361 }
1362
1363 cleanUp:
1364 if (newSource != NULL) {
1365 free(newSource);
1366 }
1367 }
1368
1369
1370 /***************************************************************************\
1371 * FXT1 decoder
1372 *
1373 * The decoder is based on GL_3DFX_texture_compression_FXT1
1374 * specification and serves as a concept for the encoder.
1375 \***************************************************************************/
1376
1377
1378 /* lookup table for scaling 5 bit colors up to 8 bits */
1379 static const GLubyte _rgb_scale_5[] = {
1380 0, 8, 16, 25, 33, 41, 49, 58,
1381 66, 74, 82, 90, 99, 107, 115, 123,
1382 132, 140, 148, 156, 165, 173, 181, 189,
1383 197, 206, 214, 222, 230, 239, 247, 255
1384 };
1385
1386 /* lookup table for scaling 6 bit colors up to 8 bits */
1387 static const GLubyte _rgb_scale_6[] = {
1388 0, 4, 8, 12, 16, 20, 24, 28,
1389 32, 36, 40, 45, 49, 53, 57, 61,
1390 65, 69, 73, 77, 81, 85, 89, 93,
1391 97, 101, 105, 109, 113, 117, 121, 125,
1392 130, 134, 138, 142, 146, 150, 154, 158,
1393 162, 166, 170, 174, 178, 182, 186, 190,
1394 194, 198, 202, 206, 210, 215, 219, 223,
1395 227, 231, 235, 239, 243, 247, 251, 255
1396 };
1397
1398
1399 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1400 #define UP5(c) _rgb_scale_5[(c) & 31]
1401 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1402 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1403
1404
1405 static void
1406 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1407 {
1408 const GLuint *cc;
1409
1410 t *= 3;
1411 cc = (const GLuint *)(code + t / 8);
1412 t = (cc[0] >> (t & 7)) & 7;
1413
1414 if (t == 7) {
1415 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1416 } else {
1417 GLubyte r, g, b;
1418 cc = (const GLuint *)(code + 12);
1419 if (t == 0) {
1420 b = UP5(CC_SEL(cc, 0));
1421 g = UP5(CC_SEL(cc, 5));
1422 r = UP5(CC_SEL(cc, 10));
1423 } else if (t == 6) {
1424 b = UP5(CC_SEL(cc, 15));
1425 g = UP5(CC_SEL(cc, 20));
1426 r = UP5(CC_SEL(cc, 25));
1427 } else {
1428 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1429 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1430 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1431 }
1432 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1433 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1434 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1435 rgba[ACOMP] = CHAN_MAX;
1436 }
1437 }
1438
1439
1440 static void
1441 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1442 {
1443 const GLuint *cc;
1444 GLuint kk;
1445
1446 cc = (const GLuint *)code;
1447 if (t & 16) {
1448 cc++;
1449 t &= 15;
1450 }
1451 t = (cc[0] >> (t * 2)) & 3;
1452
1453 t *= 15;
1454 cc = (const GLuint *)(code + 8 + t / 8);
1455 kk = cc[0] >> (t & 7);
1456 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1457 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1458 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1459 rgba[ACOMP] = CHAN_MAX;
1460 }
1461
1462
1463 static void
1464 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1465 {
1466 const GLuint *cc;
1467 GLuint col[2][3];
1468 GLint glsb, selb;
1469
1470 cc = (const GLuint *)code;
1471 if (t & 16) {
1472 t &= 15;
1473 t = (cc[1] >> (t * 2)) & 3;
1474 /* col 2 */
1475 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1476 col[0][GCOMP] = CC_SEL(cc, 99);
1477 col[0][RCOMP] = CC_SEL(cc, 104);
1478 /* col 3 */
1479 col[1][BCOMP] = CC_SEL(cc, 109);
1480 col[1][GCOMP] = CC_SEL(cc, 114);
1481 col[1][RCOMP] = CC_SEL(cc, 119);
1482 glsb = CC_SEL(cc, 126);
1483 selb = CC_SEL(cc, 33);
1484 } else {
1485 t = (cc[0] >> (t * 2)) & 3;
1486 /* col 0 */
1487 col[0][BCOMP] = CC_SEL(cc, 64);
1488 col[0][GCOMP] = CC_SEL(cc, 69);
1489 col[0][RCOMP] = CC_SEL(cc, 74);
1490 /* col 1 */
1491 col[1][BCOMP] = CC_SEL(cc, 79);
1492 col[1][GCOMP] = CC_SEL(cc, 84);
1493 col[1][RCOMP] = CC_SEL(cc, 89);
1494 glsb = CC_SEL(cc, 125);
1495 selb = CC_SEL(cc, 1);
1496 }
1497
1498 if (CC_SEL(cc, 124) & 1) {
1499 /* alpha[0] == 1 */
1500
1501 if (t == 3) {
1502 /* zero */
1503 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1504 } else {
1505 GLubyte r, g, b;
1506 if (t == 0) {
1507 b = UP5(col[0][BCOMP]);
1508 g = UP5(col[0][GCOMP]);
1509 r = UP5(col[0][RCOMP]);
1510 } else if (t == 2) {
1511 b = UP5(col[1][BCOMP]);
1512 g = UP6(col[1][GCOMP], glsb);
1513 r = UP5(col[1][RCOMP]);
1514 } else {
1515 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1516 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1517 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1518 }
1519 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1520 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1521 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1522 rgba[ACOMP] = CHAN_MAX;
1523 }
1524 } else {
1525 /* alpha[0] == 0 */
1526 GLubyte r, g, b;
1527 if (t == 0) {
1528 b = UP5(col[0][BCOMP]);
1529 g = UP6(col[0][GCOMP], glsb ^ selb);
1530 r = UP5(col[0][RCOMP]);
1531 } else if (t == 3) {
1532 b = UP5(col[1][BCOMP]);
1533 g = UP6(col[1][GCOMP], glsb);
1534 r = UP5(col[1][RCOMP]);
1535 } else {
1536 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1537 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1538 UP6(col[1][GCOMP], glsb));
1539 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1540 }
1541 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1542 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1543 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1544 rgba[ACOMP] = CHAN_MAX;
1545 }
1546 }
1547
1548
1549 static void
1550 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1551 {
1552 const GLuint *cc;
1553 GLubyte r, g, b, a;
1554
1555 cc = (const GLuint *)code;
1556 if (CC_SEL(cc, 124) & 1) {
1557 /* lerp == 1 */
1558 GLuint col0[4];
1559
1560 if (t & 16) {
1561 t &= 15;
1562 t = (cc[1] >> (t * 2)) & 3;
1563 /* col 2 */
1564 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1565 col0[GCOMP] = CC_SEL(cc, 99);
1566 col0[RCOMP] = CC_SEL(cc, 104);
1567 col0[ACOMP] = CC_SEL(cc, 119);
1568 } else {
1569 t = (cc[0] >> (t * 2)) & 3;
1570 /* col 0 */
1571 col0[BCOMP] = CC_SEL(cc, 64);
1572 col0[GCOMP] = CC_SEL(cc, 69);
1573 col0[RCOMP] = CC_SEL(cc, 74);
1574 col0[ACOMP] = CC_SEL(cc, 109);
1575 }
1576
1577 if (t == 0) {
1578 b = UP5(col0[BCOMP]);
1579 g = UP5(col0[GCOMP]);
1580 r = UP5(col0[RCOMP]);
1581 a = UP5(col0[ACOMP]);
1582 } else if (t == 3) {
1583 b = UP5(CC_SEL(cc, 79));
1584 g = UP5(CC_SEL(cc, 84));
1585 r = UP5(CC_SEL(cc, 89));
1586 a = UP5(CC_SEL(cc, 114));
1587 } else {
1588 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1589 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1590 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1591 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1592 }
1593 } else {
1594 /* lerp == 0 */
1595
1596 if (t & 16) {
1597 cc++;
1598 t &= 15;
1599 }
1600 t = (cc[0] >> (t * 2)) & 3;
1601
1602 if (t == 3) {
1603 /* zero */
1604 r = g = b = a = 0;
1605 } else {
1606 GLuint kk;
1607 cc = (const GLuint *)code;
1608 a = UP5(cc[3] >> (t * 5 + 13));
1609 t *= 15;
1610 cc = (const GLuint *)(code + 8 + t / 8);
1611 kk = cc[0] >> (t & 7);
1612 b = UP5(kk);
1613 g = UP5(kk >> 5);
1614 r = UP5(kk >> 10);
1615 }
1616 }
1617 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1618 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1619 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1620 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1621 }
1622
1623
1624 void
1625 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1626 GLint i, GLint j, GLchan *rgba)
1627 {
1628 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1629 fxt1_decode_1HI, /* cc-high = "00?" */
1630 fxt1_decode_1HI, /* cc-high = "00?" */
1631 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1632 fxt1_decode_1ALPHA, /* alpha = "011" */
1633 fxt1_decode_1MIXED, /* mixed = "1??" */
1634 fxt1_decode_1MIXED, /* mixed = "1??" */
1635 fxt1_decode_1MIXED, /* mixed = "1??" */
1636 fxt1_decode_1MIXED /* mixed = "1??" */
1637 };
1638
1639 const GLubyte *code = (const GLubyte *)texture +
1640 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1641 GLint mode = CC_SEL(code, 125);
1642 GLint t = i & 7;
1643
1644 if (t & 4) {
1645 t += 12;
1646 }
1647 t += (j & 3) * 4;
1648
1649 decode_1[mode](code, t, rgba);
1650 }
1651
1652
1653 #endif /* FEATURE_texture_fxt1 */