mesa: remove FEATURE_texture_fxt1 define.
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42 #include "swrast/s_context.h"
43
44
45 static void
46 fxt1_encode (GLuint width, GLuint height, GLint comps,
47 const void *source, GLint srcRowStride,
48 void *dest, GLint destRowStride);
49
50 static void
51 fxt1_decode_1 (const void *texture, GLint stride,
52 GLint i, GLint j, GLubyte *rgba);
53
54
55 /**
56 * Store user's image in rgb_fxt1 format.
57 */
58 GLboolean
59 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
60 {
61 const GLubyte *pixels;
62 GLint srcRowStride;
63 GLubyte *dst;
64 const GLubyte *tempImage = NULL;
65
66 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
67
68 if (srcFormat != GL_RGB ||
69 srcType != GL_UNSIGNED_BYTE ||
70 ctx->_ImageTransferState ||
71 srcPacking->RowLength != srcWidth ||
72 srcPacking->SwapBytes) {
73 /* convert image to RGB/GLubyte */
74 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
75 baseInternalFormat,
76 _mesa_get_format_base_format(dstFormat),
77 srcWidth, srcHeight, srcDepth,
78 srcFormat, srcType, srcAddr,
79 srcPacking);
80 if (!tempImage)
81 return GL_FALSE; /* out of memory */
82 pixels = tempImage;
83 srcRowStride = 3 * srcWidth;
84 srcFormat = GL_RGB;
85 }
86 else {
87 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
88 srcFormat, srcType, 0, 0);
89
90 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
91 srcType) / sizeof(GLubyte);
92 }
93
94 dst = dstSlices[0];
95
96 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
97 dst, dstRowStride);
98
99 free((void*) tempImage);
100
101 return GL_TRUE;
102 }
103
104
105 /**
106 * Store user's image in rgba_fxt1 format.
107 */
108 GLboolean
109 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
110 {
111 const GLubyte *pixels;
112 GLint srcRowStride;
113 GLubyte *dst;
114 const GLubyte *tempImage = NULL;
115
116 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
117
118 if (srcFormat != GL_RGBA ||
119 srcType != GL_UNSIGNED_BYTE ||
120 ctx->_ImageTransferState ||
121 srcPacking->SwapBytes) {
122 /* convert image to RGBA/GLubyte */
123 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
124 baseInternalFormat,
125 _mesa_get_format_base_format(dstFormat),
126 srcWidth, srcHeight, srcDepth,
127 srcFormat, srcType, srcAddr,
128 srcPacking);
129 if (!tempImage)
130 return GL_FALSE; /* out of memory */
131 pixels = tempImage;
132 srcRowStride = 4 * srcWidth;
133 srcFormat = GL_RGBA;
134 }
135 else {
136 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
137 srcFormat, srcType, 0, 0);
138
139 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
140 srcType) / sizeof(GLubyte);
141 }
142
143 dst = dstSlices[0];
144
145 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
146 dst, dstRowStride);
147
148 free((void*) tempImage);
149
150 return GL_TRUE;
151 }
152
153
154 void
155 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
156 GLint i, GLint j, GLint k, GLfloat *texel )
157 {
158 /* just sample as GLubyte and convert to float here */
159 GLubyte rgba[4];
160 (void) k;
161 fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
162 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
163 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
164 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
165 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
166 }
167
168
169 void
170 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
171 GLint i, GLint j, GLint k, GLfloat *texel )
172 {
173 /* just sample as GLubyte and convert to float here */
174 GLubyte rgba[4];
175 (void) k;
176 fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
177 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
178 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
179 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
180 texel[ACOMP] = 1.0F;
181 }
182
183
184
185 /***************************************************************************\
186 * FXT1 encoder
187 *
188 * The encoder was built by reversing the decoder,
189 * and is vaguely based on Texus2 by 3dfx. Note that this code
190 * is merely a proof of concept, since it is highly UNoptimized;
191 * moreover, it is sub-optimal due to initial conditions passed
192 * to Lloyd's algorithm (the interpolation modes are even worse).
193 \***************************************************************************/
194
195
196 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
197 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
198 #define N_TEXELS 32 /* number of texels in a block (always 32) */
199 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
200 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
201 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
202 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
203 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
204
205
206 /*
207 * Define a 64-bit unsigned integer type and macros
208 */
209 #if 1
210
211 #define FX64_NATIVE 1
212
213 typedef uint64_t Fx64;
214
215 #define FX64_MOV32(a, b) a = b
216 #define FX64_OR32(a, b) a |= b
217 #define FX64_SHL(a, c) a <<= c
218
219 #else
220
221 #define FX64_NATIVE 0
222
223 typedef struct {
224 GLuint lo, hi;
225 } Fx64;
226
227 #define FX64_MOV32(a, b) a.lo = b
228 #define FX64_OR32(a, b) a.lo |= b
229
230 #define FX64_SHL(a, c) \
231 do { \
232 if ((c) >= 32) { \
233 a.hi = a.lo << ((c) - 32); \
234 a.lo = 0; \
235 } else { \
236 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
237 a.lo <<= (c); \
238 } \
239 } while (0)
240
241 #endif
242
243
244 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
245 #define SAFECDOT 1 /* for paranoids */
246
247 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
248 do { \
249 /* compute interpolation vector */ \
250 GLfloat d2 = 0.0F; \
251 GLfloat rd2; \
252 \
253 for (i = 0; i < NC; i++) { \
254 IV[i] = (V1[i] - V0[i]) * F(i); \
255 d2 += IV[i] * IV[i]; \
256 } \
257 rd2 = (GLfloat)NV / d2; \
258 B = 0; \
259 for (i = 0; i < NC; i++) { \
260 IV[i] *= F(i); \
261 B -= IV[i] * V0[i]; \
262 IV[i] *= rd2; \
263 } \
264 B = B * rd2 + 0.5f; \
265 } while (0)
266
267 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
268 do { \
269 GLfloat dot = 0.0F; \
270 for (i = 0; i < NC; i++) { \
271 dot += V[i] * IV[i]; \
272 } \
273 TEXEL = (GLint)(dot + B); \
274 if (SAFECDOT) { \
275 if (TEXEL < 0) { \
276 TEXEL = 0; \
277 } else if (TEXEL > NV) { \
278 TEXEL = NV; \
279 } \
280 } \
281 } while (0)
282
283
284 static GLint
285 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
286 GLubyte input[MAX_COMP], GLint nc)
287 {
288 GLint i, j, best = -1;
289 GLfloat err = 1e9; /* big enough */
290
291 for (j = 0; j < nv; j++) {
292 GLfloat e = 0.0F;
293 for (i = 0; i < nc; i++) {
294 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
295 }
296 if (e < err) {
297 err = e;
298 best = j;
299 }
300 }
301
302 return best;
303 }
304
305
306 static GLint
307 fxt1_worst (GLfloat vec[MAX_COMP],
308 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
309 {
310 GLint i, k, worst = -1;
311 GLfloat err = -1.0F; /* small enough */
312
313 for (k = 0; k < n; k++) {
314 GLfloat e = 0.0F;
315 for (i = 0; i < nc; i++) {
316 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
317 }
318 if (e > err) {
319 err = e;
320 worst = k;
321 }
322 }
323
324 return worst;
325 }
326
327
328 static GLint
329 fxt1_variance (GLdouble variance[MAX_COMP],
330 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
331 {
332 GLint i, k, best = 0;
333 GLint sx, sx2;
334 GLdouble var, maxvar = -1; /* small enough */
335 GLdouble teenth = 1.0 / n;
336
337 for (i = 0; i < nc; i++) {
338 sx = sx2 = 0;
339 for (k = 0; k < n; k++) {
340 GLint t = input[k][i];
341 sx += t;
342 sx2 += t * t;
343 }
344 var = sx2 * teenth - sx * sx * teenth * teenth;
345 if (maxvar < var) {
346 maxvar = var;
347 best = i;
348 }
349 if (variance) {
350 variance[i] = var;
351 }
352 }
353
354 return best;
355 }
356
357
358 static GLint
359 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
360 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
361 {
362 #if 0
363 /* Choose colors from a grid.
364 */
365 GLint i, j;
366
367 for (j = 0; j < nv; j++) {
368 GLint m = j * (n - 1) / (nv - 1);
369 for (i = 0; i < nc; i++) {
370 vec[j][i] = input[m][i];
371 }
372 }
373 #else
374 /* Our solution here is to find the darkest and brightest colors in
375 * the 8x4 tile and use those as the two representative colors.
376 * There are probably better algorithms to use (histogram-based).
377 */
378 GLint i, j, k;
379 GLint minSum = 2000; /* big enough */
380 GLint maxSum = -1; /* small enough */
381 GLint minCol = 0; /* phoudoin: silent compiler! */
382 GLint maxCol = 0; /* phoudoin: silent compiler! */
383
384 struct {
385 GLint flag;
386 GLint key;
387 GLint freq;
388 GLint idx;
389 } hist[N_TEXELS];
390 GLint lenh = 0;
391
392 memset(hist, 0, sizeof(hist));
393
394 for (k = 0; k < n; k++) {
395 GLint l;
396 GLint key = 0;
397 GLint sum = 0;
398 for (i = 0; i < nc; i++) {
399 key <<= 8;
400 key |= input[k][i];
401 sum += input[k][i];
402 }
403 for (l = 0; l < n; l++) {
404 if (!hist[l].flag) {
405 /* alloc new slot */
406 hist[l].flag = !0;
407 hist[l].key = key;
408 hist[l].freq = 1;
409 hist[l].idx = k;
410 lenh = l + 1;
411 break;
412 } else if (hist[l].key == key) {
413 hist[l].freq++;
414 break;
415 }
416 }
417 if (minSum > sum) {
418 minSum = sum;
419 minCol = k;
420 }
421 if (maxSum < sum) {
422 maxSum = sum;
423 maxCol = k;
424 }
425 }
426
427 if (lenh <= nv) {
428 for (j = 0; j < lenh; j++) {
429 for (i = 0; i < nc; i++) {
430 vec[j][i] = (GLfloat)input[hist[j].idx][i];
431 }
432 }
433 for (; j < nv; j++) {
434 for (i = 0; i < nc; i++) {
435 vec[j][i] = vec[0][i];
436 }
437 }
438 return 0;
439 }
440
441 for (j = 0; j < nv; j++) {
442 for (i = 0; i < nc; i++) {
443 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
444 }
445 }
446 #endif
447
448 return !0;
449 }
450
451
452 static GLint
453 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
454 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
455 {
456 /* Use the generalized lloyd's algorithm for VQ:
457 * find 4 color vectors.
458 *
459 * for each sample color
460 * sort to nearest vector.
461 *
462 * replace each vector with the centroid of its matching colors.
463 *
464 * repeat until RMS doesn't improve.
465 *
466 * if a color vector has no samples, or becomes the same as another
467 * vector, replace it with the color which is farthest from a sample.
468 *
469 * vec[][MAX_COMP] initial vectors and resulting colors
470 * nv number of resulting colors required
471 * input[N_TEXELS][MAX_COMP] input texels
472 * nc number of components in input / vec
473 * n number of input samples
474 */
475
476 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
477 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
478 GLfloat error, lasterror = 1e9;
479
480 GLint i, j, k, rep;
481
482 /* the quantizer */
483 for (rep = 0; rep < LL_N_REP; rep++) {
484 /* reset sums & counters */
485 for (j = 0; j < nv; j++) {
486 for (i = 0; i < nc; i++) {
487 sum[j][i] = 0;
488 }
489 cnt[j] = 0;
490 }
491 error = 0;
492
493 /* scan whole block */
494 for (k = 0; k < n; k++) {
495 #if 1
496 GLint best = -1;
497 GLfloat err = 1e9; /* big enough */
498 /* determine best vector */
499 for (j = 0; j < nv; j++) {
500 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
501 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
502 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
503 if (nc == 4) {
504 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
505 }
506 if (e < err) {
507 err = e;
508 best = j;
509 }
510 }
511 #else
512 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
513 #endif
514 assert(best >= 0);
515 /* add in closest color */
516 for (i = 0; i < nc; i++) {
517 sum[best][i] += input[k][i];
518 }
519 /* mark this vector as used */
520 cnt[best]++;
521 /* accumulate error */
522 error += err;
523 }
524
525 /* check RMS */
526 if ((error < LL_RMS_E) ||
527 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
528 return !0; /* good match */
529 }
530 lasterror = error;
531
532 /* move each vector to the barycenter of its closest colors */
533 for (j = 0; j < nv; j++) {
534 if (cnt[j]) {
535 GLfloat div = 1.0F / cnt[j];
536 for (i = 0; i < nc; i++) {
537 vec[j][i] = div * sum[j][i];
538 }
539 } else {
540 /* this vec has no samples or is identical with a previous vec */
541 GLint worst = fxt1_worst(vec[j], input, nc, n);
542 for (i = 0; i < nc; i++) {
543 vec[j][i] = input[worst][i];
544 }
545 }
546 }
547 }
548
549 return 0; /* could not converge fast enough */
550 }
551
552
553 static void
554 fxt1_quantize_CHROMA (GLuint *cc,
555 GLubyte input[N_TEXELS][MAX_COMP])
556 {
557 const GLint n_vect = 4; /* 4 base vectors to find */
558 const GLint n_comp = 3; /* 3 components: R, G, B */
559 GLfloat vec[MAX_VECT][MAX_COMP];
560 GLint i, j, k;
561 Fx64 hi; /* high quadword */
562 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
563
564 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
565 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
566 }
567
568 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
569 for (j = n_vect - 1; j >= 0; j--) {
570 for (i = 0; i < n_comp; i++) {
571 /* add in colors */
572 FX64_SHL(hi, 5);
573 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
574 }
575 }
576 ((Fx64 *)cc)[1] = hi;
577
578 lohi = lolo = 0;
579 /* right microtile */
580 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
581 lohi <<= 2;
582 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
583 }
584 /* left microtile */
585 for (; k >= 0; k--) {
586 lolo <<= 2;
587 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
588 }
589 cc[1] = lohi;
590 cc[0] = lolo;
591 }
592
593
594 static void
595 fxt1_quantize_ALPHA0 (GLuint *cc,
596 GLubyte input[N_TEXELS][MAX_COMP],
597 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
598 {
599 const GLint n_vect = 3; /* 3 base vectors to find */
600 const GLint n_comp = 4; /* 4 components: R, G, B, A */
601 GLfloat vec[MAX_VECT][MAX_COMP];
602 GLint i, j, k;
603 Fx64 hi; /* high quadword */
604 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
605
606 /* the last vector indicates zero */
607 for (i = 0; i < n_comp; i++) {
608 vec[n_vect][i] = 0;
609 }
610
611 /* the first n texels in reord are guaranteed to be non-zero */
612 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
613 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
614 }
615
616 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
617 for (j = n_vect - 1; j >= 0; j--) {
618 /* add in alphas */
619 FX64_SHL(hi, 5);
620 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
621 }
622 for (j = n_vect - 1; j >= 0; j--) {
623 for (i = 0; i < n_comp - 1; i++) {
624 /* add in colors */
625 FX64_SHL(hi, 5);
626 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
627 }
628 }
629 ((Fx64 *)cc)[1] = hi;
630
631 lohi = lolo = 0;
632 /* right microtile */
633 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
634 lohi <<= 2;
635 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
636 }
637 /* left microtile */
638 for (; k >= 0; k--) {
639 lolo <<= 2;
640 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
641 }
642 cc[1] = lohi;
643 cc[0] = lolo;
644 }
645
646
647 static void
648 fxt1_quantize_ALPHA1 (GLuint *cc,
649 GLubyte input[N_TEXELS][MAX_COMP])
650 {
651 const GLint n_vect = 3; /* highest vector number in each microtile */
652 const GLint n_comp = 4; /* 4 components: R, G, B, A */
653 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
654 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
655 GLint i, j, k;
656 Fx64 hi; /* high quadword */
657 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
658
659 GLint minSum;
660 GLint maxSum;
661 GLint minColL = 0, maxColL = 0;
662 GLint minColR = 0, maxColR = 0;
663 GLint sumL = 0, sumR = 0;
664 GLint nn_comp;
665 /* Our solution here is to find the darkest and brightest colors in
666 * the 4x4 tile and use those as the two representative colors.
667 * There are probably better algorithms to use (histogram-based).
668 */
669 nn_comp = n_comp;
670 while ((minColL == maxColL) && nn_comp) {
671 minSum = 2000; /* big enough */
672 maxSum = -1; /* small enough */
673 for (k = 0; k < N_TEXELS / 2; k++) {
674 GLint sum = 0;
675 for (i = 0; i < nn_comp; i++) {
676 sum += input[k][i];
677 }
678 if (minSum > sum) {
679 minSum = sum;
680 minColL = k;
681 }
682 if (maxSum < sum) {
683 maxSum = sum;
684 maxColL = k;
685 }
686 sumL += sum;
687 }
688
689 nn_comp--;
690 }
691
692 nn_comp = n_comp;
693 while ((minColR == maxColR) && nn_comp) {
694 minSum = 2000; /* big enough */
695 maxSum = -1; /* small enough */
696 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
697 GLint sum = 0;
698 for (i = 0; i < nn_comp; i++) {
699 sum += input[k][i];
700 }
701 if (minSum > sum) {
702 minSum = sum;
703 minColR = k;
704 }
705 if (maxSum < sum) {
706 maxSum = sum;
707 maxColR = k;
708 }
709 sumR += sum;
710 }
711
712 nn_comp--;
713 }
714
715 /* choose the common vector (yuck!) */
716 {
717 GLint j1, j2;
718 GLint v1 = 0, v2 = 0;
719 GLfloat err = 1e9; /* big enough */
720 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
721 for (i = 0; i < n_comp; i++) {
722 tv[0][i] = input[minColL][i];
723 tv[1][i] = input[maxColL][i];
724 tv[2][i] = input[minColR][i];
725 tv[3][i] = input[maxColR][i];
726 }
727 for (j1 = 0; j1 < 2; j1++) {
728 for (j2 = 2; j2 < 4; j2++) {
729 GLfloat e = 0.0F;
730 for (i = 0; i < n_comp; i++) {
731 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
732 }
733 if (e < err) {
734 err = e;
735 v1 = j1;
736 v2 = j2;
737 }
738 }
739 }
740 for (i = 0; i < n_comp; i++) {
741 vec[0][i] = tv[1 - v1][i];
742 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
743 vec[2][i] = tv[5 - v2][i];
744 }
745 }
746
747 /* left microtile */
748 cc[0] = 0;
749 if (minColL != maxColL) {
750 /* compute interpolation vector */
751 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
752
753 /* add in texels */
754 lolo = 0;
755 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
756 GLint texel;
757 /* interpolate color */
758 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
759 /* add in texel */
760 lolo <<= 2;
761 lolo |= texel;
762 }
763
764 cc[0] = lolo;
765 }
766
767 /* right microtile */
768 cc[1] = 0;
769 if (minColR != maxColR) {
770 /* compute interpolation vector */
771 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
772
773 /* add in texels */
774 lohi = 0;
775 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
776 GLint texel;
777 /* interpolate color */
778 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
779 /* add in texel */
780 lohi <<= 2;
781 lohi |= texel;
782 }
783
784 cc[1] = lohi;
785 }
786
787 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
788 for (j = n_vect - 1; j >= 0; j--) {
789 /* add in alphas */
790 FX64_SHL(hi, 5);
791 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
792 }
793 for (j = n_vect - 1; j >= 0; j--) {
794 for (i = 0; i < n_comp - 1; i++) {
795 /* add in colors */
796 FX64_SHL(hi, 5);
797 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
798 }
799 }
800 ((Fx64 *)cc)[1] = hi;
801 }
802
803
804 static void
805 fxt1_quantize_HI (GLuint *cc,
806 GLubyte input[N_TEXELS][MAX_COMP],
807 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
808 {
809 const GLint n_vect = 6; /* highest vector number */
810 const GLint n_comp = 3; /* 3 components: R, G, B */
811 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
812 GLfloat iv[MAX_COMP]; /* interpolation vector */
813 GLint i, k;
814 GLuint hihi; /* high quadword: hi dword */
815
816 GLint minSum = 2000; /* big enough */
817 GLint maxSum = -1; /* small enough */
818 GLint minCol = 0; /* phoudoin: silent compiler! */
819 GLint maxCol = 0; /* phoudoin: silent compiler! */
820
821 /* Our solution here is to find the darkest and brightest colors in
822 * the 8x4 tile and use those as the two representative colors.
823 * There are probably better algorithms to use (histogram-based).
824 */
825 for (k = 0; k < n; k++) {
826 GLint sum = 0;
827 for (i = 0; i < n_comp; i++) {
828 sum += reord[k][i];
829 }
830 if (minSum > sum) {
831 minSum = sum;
832 minCol = k;
833 }
834 if (maxSum < sum) {
835 maxSum = sum;
836 maxCol = k;
837 }
838 }
839
840 hihi = 0; /* cc-hi = "00" */
841 for (i = 0; i < n_comp; i++) {
842 /* add in colors */
843 hihi <<= 5;
844 hihi |= reord[maxCol][i] >> 3;
845 }
846 for (i = 0; i < n_comp; i++) {
847 /* add in colors */
848 hihi <<= 5;
849 hihi |= reord[minCol][i] >> 3;
850 }
851 cc[3] = hihi;
852 cc[0] = cc[1] = cc[2] = 0;
853
854 /* compute interpolation vector */
855 if (minCol != maxCol) {
856 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
857 }
858
859 /* add in texels */
860 for (k = N_TEXELS - 1; k >= 0; k--) {
861 GLint t = k * 3;
862 GLuint *kk = (GLuint *)((char *)cc + t / 8);
863 GLint texel = n_vect + 1; /* transparent black */
864
865 if (!ISTBLACK(input[k])) {
866 if (minCol != maxCol) {
867 /* interpolate color */
868 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
869 /* add in texel */
870 kk[0] |= texel << (t & 7);
871 }
872 } else {
873 /* add in texel */
874 kk[0] |= texel << (t & 7);
875 }
876 }
877 }
878
879
880 static void
881 fxt1_quantize_MIXED1 (GLuint *cc,
882 GLubyte input[N_TEXELS][MAX_COMP])
883 {
884 const GLint n_vect = 2; /* highest vector number in each microtile */
885 const GLint n_comp = 3; /* 3 components: R, G, B */
886 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
887 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
888 GLint i, j, k;
889 Fx64 hi; /* high quadword */
890 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
891
892 GLint minSum;
893 GLint maxSum;
894 GLint minColL = 0, maxColL = -1;
895 GLint minColR = 0, maxColR = -1;
896
897 /* Our solution here is to find the darkest and brightest colors in
898 * the 4x4 tile and use those as the two representative colors.
899 * There are probably better algorithms to use (histogram-based).
900 */
901 minSum = 2000; /* big enough */
902 maxSum = -1; /* small enough */
903 for (k = 0; k < N_TEXELS / 2; k++) {
904 if (!ISTBLACK(input[k])) {
905 GLint sum = 0;
906 for (i = 0; i < n_comp; i++) {
907 sum += input[k][i];
908 }
909 if (minSum > sum) {
910 minSum = sum;
911 minColL = k;
912 }
913 if (maxSum < sum) {
914 maxSum = sum;
915 maxColL = k;
916 }
917 }
918 }
919 minSum = 2000; /* big enough */
920 maxSum = -1; /* small enough */
921 for (; k < N_TEXELS; k++) {
922 if (!ISTBLACK(input[k])) {
923 GLint sum = 0;
924 for (i = 0; i < n_comp; i++) {
925 sum += input[k][i];
926 }
927 if (minSum > sum) {
928 minSum = sum;
929 minColR = k;
930 }
931 if (maxSum < sum) {
932 maxSum = sum;
933 maxColR = k;
934 }
935 }
936 }
937
938 /* left microtile */
939 if (maxColL == -1) {
940 /* all transparent black */
941 cc[0] = ~0u;
942 for (i = 0; i < n_comp; i++) {
943 vec[0][i] = 0;
944 vec[1][i] = 0;
945 }
946 } else {
947 cc[0] = 0;
948 for (i = 0; i < n_comp; i++) {
949 vec[0][i] = input[minColL][i];
950 vec[1][i] = input[maxColL][i];
951 }
952 if (minColL != maxColL) {
953 /* compute interpolation vector */
954 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
955
956 /* add in texels */
957 lolo = 0;
958 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
959 GLint texel = n_vect + 1; /* transparent black */
960 if (!ISTBLACK(input[k])) {
961 /* interpolate color */
962 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
963 }
964 /* add in texel */
965 lolo <<= 2;
966 lolo |= texel;
967 }
968 cc[0] = lolo;
969 }
970 }
971
972 /* right microtile */
973 if (maxColR == -1) {
974 /* all transparent black */
975 cc[1] = ~0u;
976 for (i = 0; i < n_comp; i++) {
977 vec[2][i] = 0;
978 vec[3][i] = 0;
979 }
980 } else {
981 cc[1] = 0;
982 for (i = 0; i < n_comp; i++) {
983 vec[2][i] = input[minColR][i];
984 vec[3][i] = input[maxColR][i];
985 }
986 if (minColR != maxColR) {
987 /* compute interpolation vector */
988 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
989
990 /* add in texels */
991 lohi = 0;
992 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
993 GLint texel = n_vect + 1; /* transparent black */
994 if (!ISTBLACK(input[k])) {
995 /* interpolate color */
996 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
997 }
998 /* add in texel */
999 lohi <<= 2;
1000 lohi |= texel;
1001 }
1002 cc[1] = lohi;
1003 }
1004 }
1005
1006 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1007 for (j = 2 * 2 - 1; j >= 0; j--) {
1008 for (i = 0; i < n_comp; i++) {
1009 /* add in colors */
1010 FX64_SHL(hi, 5);
1011 FX64_OR32(hi, vec[j][i] >> 3);
1012 }
1013 }
1014 ((Fx64 *)cc)[1] = hi;
1015 }
1016
1017
1018 static void
1019 fxt1_quantize_MIXED0 (GLuint *cc,
1020 GLubyte input[N_TEXELS][MAX_COMP])
1021 {
1022 const GLint n_vect = 3; /* highest vector number in each microtile */
1023 const GLint n_comp = 3; /* 3 components: R, G, B */
1024 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1025 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1026 GLint i, j, k;
1027 Fx64 hi; /* high quadword */
1028 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1029
1030 GLint minColL = 0, maxColL = 0;
1031 GLint minColR = 0, maxColR = 0;
1032 #if 0
1033 GLint minSum;
1034 GLint maxSum;
1035
1036 /* Our solution here is to find the darkest and brightest colors in
1037 * the 4x4 tile and use those as the two representative colors.
1038 * There are probably better algorithms to use (histogram-based).
1039 */
1040 minSum = 2000; /* big enough */
1041 maxSum = -1; /* small enough */
1042 for (k = 0; k < N_TEXELS / 2; k++) {
1043 GLint sum = 0;
1044 for (i = 0; i < n_comp; i++) {
1045 sum += input[k][i];
1046 }
1047 if (minSum > sum) {
1048 minSum = sum;
1049 minColL = k;
1050 }
1051 if (maxSum < sum) {
1052 maxSum = sum;
1053 maxColL = k;
1054 }
1055 }
1056 minSum = 2000; /* big enough */
1057 maxSum = -1; /* small enough */
1058 for (; k < N_TEXELS; k++) {
1059 GLint sum = 0;
1060 for (i = 0; i < n_comp; i++) {
1061 sum += input[k][i];
1062 }
1063 if (minSum > sum) {
1064 minSum = sum;
1065 minColR = k;
1066 }
1067 if (maxSum < sum) {
1068 maxSum = sum;
1069 maxColR = k;
1070 }
1071 }
1072 #else
1073 GLint minVal;
1074 GLint maxVal;
1075 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1076 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1077
1078 /* Scan the channel with max variance for lo & hi
1079 * and use those as the two representative colors.
1080 */
1081 minVal = 2000; /* big enough */
1082 maxVal = -1; /* small enough */
1083 for (k = 0; k < N_TEXELS / 2; k++) {
1084 GLint t = input[k][maxVarL];
1085 if (minVal > t) {
1086 minVal = t;
1087 minColL = k;
1088 }
1089 if (maxVal < t) {
1090 maxVal = t;
1091 maxColL = k;
1092 }
1093 }
1094 minVal = 2000; /* big enough */
1095 maxVal = -1; /* small enough */
1096 for (; k < N_TEXELS; k++) {
1097 GLint t = input[k][maxVarR];
1098 if (minVal > t) {
1099 minVal = t;
1100 minColR = k;
1101 }
1102 if (maxVal < t) {
1103 maxVal = t;
1104 maxColR = k;
1105 }
1106 }
1107 #endif
1108
1109 /* left microtile */
1110 cc[0] = 0;
1111 for (i = 0; i < n_comp; i++) {
1112 vec[0][i] = input[minColL][i];
1113 vec[1][i] = input[maxColL][i];
1114 }
1115 if (minColL != maxColL) {
1116 /* compute interpolation vector */
1117 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1118
1119 /* add in texels */
1120 lolo = 0;
1121 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1122 GLint texel;
1123 /* interpolate color */
1124 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1125 /* add in texel */
1126 lolo <<= 2;
1127 lolo |= texel;
1128 }
1129
1130 /* funky encoding for LSB of green */
1131 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1132 for (i = 0; i < n_comp; i++) {
1133 vec[1][i] = input[minColL][i];
1134 vec[0][i] = input[maxColL][i];
1135 }
1136 lolo = ~lolo;
1137 }
1138
1139 cc[0] = lolo;
1140 }
1141
1142 /* right microtile */
1143 cc[1] = 0;
1144 for (i = 0; i < n_comp; i++) {
1145 vec[2][i] = input[minColR][i];
1146 vec[3][i] = input[maxColR][i];
1147 }
1148 if (minColR != maxColR) {
1149 /* compute interpolation vector */
1150 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1151
1152 /* add in texels */
1153 lohi = 0;
1154 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1155 GLint texel;
1156 /* interpolate color */
1157 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1158 /* add in texel */
1159 lohi <<= 2;
1160 lohi |= texel;
1161 }
1162
1163 /* funky encoding for LSB of green */
1164 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1165 for (i = 0; i < n_comp; i++) {
1166 vec[3][i] = input[minColR][i];
1167 vec[2][i] = input[maxColR][i];
1168 }
1169 lohi = ~lohi;
1170 }
1171
1172 cc[1] = lohi;
1173 }
1174
1175 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1176 for (j = 2 * 2 - 1; j >= 0; j--) {
1177 for (i = 0; i < n_comp; i++) {
1178 /* add in colors */
1179 FX64_SHL(hi, 5);
1180 FX64_OR32(hi, vec[j][i] >> 3);
1181 }
1182 }
1183 ((Fx64 *)cc)[1] = hi;
1184 }
1185
1186
1187 static void
1188 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1189 {
1190 GLint trualpha;
1191 GLubyte reord[N_TEXELS][MAX_COMP];
1192
1193 GLubyte input[N_TEXELS][MAX_COMP];
1194 GLint i, k, l;
1195
1196 if (comps == 3) {
1197 /* make the whole block opaque */
1198 memset(input, -1, sizeof(input));
1199 }
1200
1201 /* 8 texels each line */
1202 for (l = 0; l < 4; l++) {
1203 for (k = 0; k < 4; k++) {
1204 for (i = 0; i < comps; i++) {
1205 input[k + l * 4][i] = *lines[l]++;
1206 }
1207 }
1208 for (; k < 8; k++) {
1209 for (i = 0; i < comps; i++) {
1210 input[k + l * 4 + 12][i] = *lines[l]++;
1211 }
1212 }
1213 }
1214
1215 /* block layout:
1216 * 00, 01, 02, 03, 08, 09, 0a, 0b
1217 * 10, 11, 12, 13, 18, 19, 1a, 1b
1218 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1219 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1220 */
1221
1222 /* [dBorca]
1223 * stupidity flows forth from this
1224 */
1225 l = N_TEXELS;
1226 trualpha = 0;
1227 if (comps == 4) {
1228 /* skip all transparent black texels */
1229 l = 0;
1230 for (k = 0; k < N_TEXELS; k++) {
1231 /* test all components against 0 */
1232 if (!ISTBLACK(input[k])) {
1233 /* texel is not transparent black */
1234 COPY_4UBV(reord[l], input[k]);
1235 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1236 /* non-opaque texel */
1237 trualpha = !0;
1238 }
1239 l++;
1240 }
1241 }
1242 }
1243
1244 #if 0
1245 if (trualpha) {
1246 fxt1_quantize_ALPHA0(cc, input, reord, l);
1247 } else if (l == 0) {
1248 cc[0] = cc[1] = cc[2] = -1;
1249 cc[3] = 0;
1250 } else if (l < N_TEXELS) {
1251 fxt1_quantize_HI(cc, input, reord, l);
1252 } else {
1253 fxt1_quantize_CHROMA(cc, input);
1254 }
1255 (void)fxt1_quantize_ALPHA1;
1256 (void)fxt1_quantize_MIXED1;
1257 (void)fxt1_quantize_MIXED0;
1258 #else
1259 if (trualpha) {
1260 fxt1_quantize_ALPHA1(cc, input);
1261 } else if (l == 0) {
1262 cc[0] = cc[1] = cc[2] = ~0u;
1263 cc[3] = 0;
1264 } else if (l < N_TEXELS) {
1265 fxt1_quantize_MIXED1(cc, input);
1266 } else {
1267 fxt1_quantize_MIXED0(cc, input);
1268 }
1269 (void)fxt1_quantize_ALPHA0;
1270 (void)fxt1_quantize_HI;
1271 (void)fxt1_quantize_CHROMA;
1272 #endif
1273 }
1274
1275
1276
1277 /**
1278 * Upscale an image by replication, not (typical) stretching.
1279 * We use this when the image width or height is less than a
1280 * certain size (4, 8) and we need to upscale an image.
1281 */
1282 static void
1283 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1284 GLsizei outWidth, GLsizei outHeight,
1285 GLint comps, const GLubyte *src, GLint srcRowStride,
1286 GLubyte *dest )
1287 {
1288 GLint i, j, k;
1289
1290 ASSERT(outWidth >= inWidth);
1291 ASSERT(outHeight >= inHeight);
1292 #if 0
1293 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1294 ASSERT((outWidth & 3) == 0);
1295 ASSERT((outHeight & 3) == 0);
1296 #endif
1297
1298 for (i = 0; i < outHeight; i++) {
1299 const GLint ii = i % inHeight;
1300 for (j = 0; j < outWidth; j++) {
1301 const GLint jj = j % inWidth;
1302 for (k = 0; k < comps; k++) {
1303 dest[(i * outWidth + j) * comps + k]
1304 = src[ii * srcRowStride + jj * comps + k];
1305 }
1306 }
1307 }
1308 }
1309
1310
1311 static void
1312 fxt1_encode (GLuint width, GLuint height, GLint comps,
1313 const void *source, GLint srcRowStride,
1314 void *dest, GLint destRowStride)
1315 {
1316 GLuint x, y;
1317 const GLubyte *data;
1318 GLuint *encoded = (GLuint *)dest;
1319 void *newSource = NULL;
1320
1321 assert(comps == 3 || comps == 4);
1322
1323 /* Replicate image if width is not M8 or height is not M4 */
1324 if ((width & 7) | (height & 3)) {
1325 GLint newWidth = (width + 7) & ~7;
1326 GLint newHeight = (height + 3) & ~3;
1327 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1328 if (!newSource) {
1329 GET_CURRENT_CONTEXT(ctx);
1330 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1331 goto cleanUp;
1332 }
1333 upscale_teximage2d(width, height, newWidth, newHeight,
1334 comps, (const GLubyte *) source,
1335 srcRowStride, (GLubyte *) newSource);
1336 source = newSource;
1337 width = newWidth;
1338 height = newHeight;
1339 srcRowStride = comps * newWidth;
1340 }
1341
1342 data = (const GLubyte *) source;
1343 destRowStride = (destRowStride - width * 2) / 4;
1344 for (y = 0; y < height; y += 4) {
1345 GLuint offs = 0 + (y + 0) * srcRowStride;
1346 for (x = 0; x < width; x += 8) {
1347 const GLubyte *lines[4];
1348 lines[0] = &data[offs];
1349 lines[1] = lines[0] + srcRowStride;
1350 lines[2] = lines[1] + srcRowStride;
1351 lines[3] = lines[2] + srcRowStride;
1352 offs += 8 * comps;
1353 fxt1_quantize(encoded, lines, comps);
1354 /* 128 bits per 8x4 block */
1355 encoded += 4;
1356 }
1357 encoded += destRowStride;
1358 }
1359
1360 cleanUp:
1361 free(newSource);
1362 }
1363
1364
1365 /***************************************************************************\
1366 * FXT1 decoder
1367 *
1368 * The decoder is based on GL_3DFX_texture_compression_FXT1
1369 * specification and serves as a concept for the encoder.
1370 \***************************************************************************/
1371
1372
1373 /* lookup table for scaling 5 bit colors up to 8 bits */
1374 static const GLubyte _rgb_scale_5[] = {
1375 0, 8, 16, 25, 33, 41, 49, 58,
1376 66, 74, 82, 90, 99, 107, 115, 123,
1377 132, 140, 148, 156, 165, 173, 181, 189,
1378 197, 206, 214, 222, 230, 239, 247, 255
1379 };
1380
1381 /* lookup table for scaling 6 bit colors up to 8 bits */
1382 static const GLubyte _rgb_scale_6[] = {
1383 0, 4, 8, 12, 16, 20, 24, 28,
1384 32, 36, 40, 45, 49, 53, 57, 61,
1385 65, 69, 73, 77, 81, 85, 89, 93,
1386 97, 101, 105, 109, 113, 117, 121, 125,
1387 130, 134, 138, 142, 146, 150, 154, 158,
1388 162, 166, 170, 174, 178, 182, 186, 190,
1389 194, 198, 202, 206, 210, 215, 219, 223,
1390 227, 231, 235, 239, 243, 247, 251, 255
1391 };
1392
1393
1394 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1395 #define UP5(c) _rgb_scale_5[(c) & 31]
1396 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1397 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1398
1399
1400 static void
1401 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1402 {
1403 const GLuint *cc;
1404
1405 t *= 3;
1406 cc = (const GLuint *)(code + t / 8);
1407 t = (cc[0] >> (t & 7)) & 7;
1408
1409 if (t == 7) {
1410 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1411 } else {
1412 GLubyte r, g, b;
1413 cc = (const GLuint *)(code + 12);
1414 if (t == 0) {
1415 b = UP5(CC_SEL(cc, 0));
1416 g = UP5(CC_SEL(cc, 5));
1417 r = UP5(CC_SEL(cc, 10));
1418 } else if (t == 6) {
1419 b = UP5(CC_SEL(cc, 15));
1420 g = UP5(CC_SEL(cc, 20));
1421 r = UP5(CC_SEL(cc, 25));
1422 } else {
1423 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1424 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1425 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1426 }
1427 rgba[RCOMP] = r;
1428 rgba[GCOMP] = g;
1429 rgba[BCOMP] = b;
1430 rgba[ACOMP] = 255;
1431 }
1432 }
1433
1434
1435 static void
1436 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1437 {
1438 const GLuint *cc;
1439 GLuint kk;
1440
1441 cc = (const GLuint *)code;
1442 if (t & 16) {
1443 cc++;
1444 t &= 15;
1445 }
1446 t = (cc[0] >> (t * 2)) & 3;
1447
1448 t *= 15;
1449 cc = (const GLuint *)(code + 8 + t / 8);
1450 kk = cc[0] >> (t & 7);
1451 rgba[BCOMP] = UP5(kk);
1452 rgba[GCOMP] = UP5(kk >> 5);
1453 rgba[RCOMP] = UP5(kk >> 10);
1454 rgba[ACOMP] = 255;
1455 }
1456
1457
1458 static void
1459 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1460 {
1461 const GLuint *cc;
1462 GLuint col[2][3];
1463 GLint glsb, selb;
1464
1465 cc = (const GLuint *)code;
1466 if (t & 16) {
1467 t &= 15;
1468 t = (cc[1] >> (t * 2)) & 3;
1469 /* col 2 */
1470 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1471 col[0][GCOMP] = CC_SEL(cc, 99);
1472 col[0][RCOMP] = CC_SEL(cc, 104);
1473 /* col 3 */
1474 col[1][BCOMP] = CC_SEL(cc, 109);
1475 col[1][GCOMP] = CC_SEL(cc, 114);
1476 col[1][RCOMP] = CC_SEL(cc, 119);
1477 glsb = CC_SEL(cc, 126);
1478 selb = CC_SEL(cc, 33);
1479 } else {
1480 t = (cc[0] >> (t * 2)) & 3;
1481 /* col 0 */
1482 col[0][BCOMP] = CC_SEL(cc, 64);
1483 col[0][GCOMP] = CC_SEL(cc, 69);
1484 col[0][RCOMP] = CC_SEL(cc, 74);
1485 /* col 1 */
1486 col[1][BCOMP] = CC_SEL(cc, 79);
1487 col[1][GCOMP] = CC_SEL(cc, 84);
1488 col[1][RCOMP] = CC_SEL(cc, 89);
1489 glsb = CC_SEL(cc, 125);
1490 selb = CC_SEL(cc, 1);
1491 }
1492
1493 if (CC_SEL(cc, 124) & 1) {
1494 /* alpha[0] == 1 */
1495
1496 if (t == 3) {
1497 /* zero */
1498 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1499 } else {
1500 GLubyte r, g, b;
1501 if (t == 0) {
1502 b = UP5(col[0][BCOMP]);
1503 g = UP5(col[0][GCOMP]);
1504 r = UP5(col[0][RCOMP]);
1505 } else if (t == 2) {
1506 b = UP5(col[1][BCOMP]);
1507 g = UP6(col[1][GCOMP], glsb);
1508 r = UP5(col[1][RCOMP]);
1509 } else {
1510 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1511 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1512 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1513 }
1514 rgba[RCOMP] = r;
1515 rgba[GCOMP] = g;
1516 rgba[BCOMP] = b;
1517 rgba[ACOMP] = 255;
1518 }
1519 } else {
1520 /* alpha[0] == 0 */
1521 GLubyte r, g, b;
1522 if (t == 0) {
1523 b = UP5(col[0][BCOMP]);
1524 g = UP6(col[0][GCOMP], glsb ^ selb);
1525 r = UP5(col[0][RCOMP]);
1526 } else if (t == 3) {
1527 b = UP5(col[1][BCOMP]);
1528 g = UP6(col[1][GCOMP], glsb);
1529 r = UP5(col[1][RCOMP]);
1530 } else {
1531 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1532 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1533 UP6(col[1][GCOMP], glsb));
1534 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1535 }
1536 rgba[RCOMP] = r;
1537 rgba[GCOMP] = g;
1538 rgba[BCOMP] = b;
1539 rgba[ACOMP] = 255;
1540 }
1541 }
1542
1543
1544 static void
1545 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1546 {
1547 const GLuint *cc;
1548 GLubyte r, g, b, a;
1549
1550 cc = (const GLuint *)code;
1551 if (CC_SEL(cc, 124) & 1) {
1552 /* lerp == 1 */
1553 GLuint col0[4];
1554
1555 if (t & 16) {
1556 t &= 15;
1557 t = (cc[1] >> (t * 2)) & 3;
1558 /* col 2 */
1559 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1560 col0[GCOMP] = CC_SEL(cc, 99);
1561 col0[RCOMP] = CC_SEL(cc, 104);
1562 col0[ACOMP] = CC_SEL(cc, 119);
1563 } else {
1564 t = (cc[0] >> (t * 2)) & 3;
1565 /* col 0 */
1566 col0[BCOMP] = CC_SEL(cc, 64);
1567 col0[GCOMP] = CC_SEL(cc, 69);
1568 col0[RCOMP] = CC_SEL(cc, 74);
1569 col0[ACOMP] = CC_SEL(cc, 109);
1570 }
1571
1572 if (t == 0) {
1573 b = UP5(col0[BCOMP]);
1574 g = UP5(col0[GCOMP]);
1575 r = UP5(col0[RCOMP]);
1576 a = UP5(col0[ACOMP]);
1577 } else if (t == 3) {
1578 b = UP5(CC_SEL(cc, 79));
1579 g = UP5(CC_SEL(cc, 84));
1580 r = UP5(CC_SEL(cc, 89));
1581 a = UP5(CC_SEL(cc, 114));
1582 } else {
1583 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1584 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1585 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1586 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1587 }
1588 } else {
1589 /* lerp == 0 */
1590
1591 if (t & 16) {
1592 cc++;
1593 t &= 15;
1594 }
1595 t = (cc[0] >> (t * 2)) & 3;
1596
1597 if (t == 3) {
1598 /* zero */
1599 r = g = b = a = 0;
1600 } else {
1601 GLuint kk;
1602 cc = (const GLuint *)code;
1603 a = UP5(cc[3] >> (t * 5 + 13));
1604 t *= 15;
1605 cc = (const GLuint *)(code + 8 + t / 8);
1606 kk = cc[0] >> (t & 7);
1607 b = UP5(kk);
1608 g = UP5(kk >> 5);
1609 r = UP5(kk >> 10);
1610 }
1611 }
1612 rgba[RCOMP] = r;
1613 rgba[GCOMP] = g;
1614 rgba[BCOMP] = b;
1615 rgba[ACOMP] = a;
1616 }
1617
1618
1619 static void
1620 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1621 GLint i, GLint j, GLubyte *rgba)
1622 {
1623 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1624 fxt1_decode_1HI, /* cc-high = "00?" */
1625 fxt1_decode_1HI, /* cc-high = "00?" */
1626 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1627 fxt1_decode_1ALPHA, /* alpha = "011" */
1628 fxt1_decode_1MIXED, /* mixed = "1??" */
1629 fxt1_decode_1MIXED, /* mixed = "1??" */
1630 fxt1_decode_1MIXED, /* mixed = "1??" */
1631 fxt1_decode_1MIXED /* mixed = "1??" */
1632 };
1633
1634 const GLubyte *code = (const GLubyte *)texture +
1635 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1636 GLint mode = CC_SEL(code, 125);
1637 GLint t = i & 7;
1638
1639 if (t & 4) {
1640 t += 12;
1641 }
1642 t += (j & 3) * 4;
1643
1644 decode_1[mode](code, t, rgba);
1645 }