mesa/fxt1: make fxt1_decode_1 static
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42 #include "swrast/s_context.h"
43
44
45 #if FEATURE_texture_fxt1
46
47
48 static void
49 fxt1_encode (GLuint width, GLuint height, GLint comps,
50 const void *source, GLint srcRowStride,
51 void *dest, GLint destRowStride);
52
53 static void
54 fxt1_decode_1 (const void *texture, GLint stride,
55 GLint i, GLint j, GLubyte *rgba);
56
57
58 /**
59 * Store user's image in rgb_fxt1 format.
60 */
61 GLboolean
62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
63 {
64 const GLubyte *pixels;
65 GLint srcRowStride;
66 GLubyte *dst;
67 const GLubyte *tempImage = NULL;
68
69 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70
71 if (srcFormat != GL_RGB ||
72 srcType != GL_UNSIGNED_BYTE ||
73 ctx->_ImageTransferState ||
74 srcPacking->RowLength != srcWidth ||
75 srcPacking->SwapBytes) {
76 /* convert image to RGB/GLubyte */
77 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
78 baseInternalFormat,
79 _mesa_get_format_base_format(dstFormat),
80 srcWidth, srcHeight, srcDepth,
81 srcFormat, srcType, srcAddr,
82 srcPacking);
83 if (!tempImage)
84 return GL_FALSE; /* out of memory */
85 pixels = tempImage;
86 srcRowStride = 3 * srcWidth;
87 srcFormat = GL_RGB;
88 }
89 else {
90 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
91 srcFormat, srcType, 0, 0);
92
93 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
94 srcType) / sizeof(GLubyte);
95 }
96
97 dst = dstSlices[0];
98
99 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
100 dst, dstRowStride);
101
102 free((void*) tempImage);
103
104 return GL_TRUE;
105 }
106
107
108 /**
109 * Store user's image in rgba_fxt1 format.
110 */
111 GLboolean
112 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
113 {
114 const GLubyte *pixels;
115 GLint srcRowStride;
116 GLubyte *dst;
117 const GLubyte *tempImage = NULL;
118
119 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
120
121 if (srcFormat != GL_RGBA ||
122 srcType != GL_UNSIGNED_BYTE ||
123 ctx->_ImageTransferState ||
124 srcPacking->SwapBytes) {
125 /* convert image to RGBA/GLubyte */
126 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
127 baseInternalFormat,
128 _mesa_get_format_base_format(dstFormat),
129 srcWidth, srcHeight, srcDepth,
130 srcFormat, srcType, srcAddr,
131 srcPacking);
132 if (!tempImage)
133 return GL_FALSE; /* out of memory */
134 pixels = tempImage;
135 srcRowStride = 4 * srcWidth;
136 srcFormat = GL_RGBA;
137 }
138 else {
139 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
140 srcFormat, srcType, 0, 0);
141
142 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
143 srcType) / sizeof(GLubyte);
144 }
145
146 dst = dstSlices[0];
147
148 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
149 dst, dstRowStride);
150
151 free((void*) tempImage);
152
153 return GL_TRUE;
154 }
155
156
157 void
158 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
159 GLint i, GLint j, GLint k, GLfloat *texel )
160 {
161 /* just sample as GLubyte and convert to float here */
162 GLubyte rgba[4];
163 (void) k;
164 fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
165 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
166 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
167 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
168 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
169 }
170
171
172 void
173 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
174 GLint i, GLint j, GLint k, GLfloat *texel )
175 {
176 /* just sample as GLubyte and convert to float here */
177 GLubyte rgba[4];
178 (void) k;
179 fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
180 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
181 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
182 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
183 texel[ACOMP] = 1.0F;
184 }
185
186
187
188 /***************************************************************************\
189 * FXT1 encoder
190 *
191 * The encoder was built by reversing the decoder,
192 * and is vaguely based on Texus2 by 3dfx. Note that this code
193 * is merely a proof of concept, since it is highly UNoptimized;
194 * moreover, it is sub-optimal due to initial conditions passed
195 * to Lloyd's algorithm (the interpolation modes are even worse).
196 \***************************************************************************/
197
198
199 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
200 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
201 #define N_TEXELS 32 /* number of texels in a block (always 32) */
202 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
203 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
204 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
205 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
206 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
207
208
209 /*
210 * Define a 64-bit unsigned integer type and macros
211 */
212 #if 1
213
214 #define FX64_NATIVE 1
215
216 typedef uint64_t Fx64;
217
218 #define FX64_MOV32(a, b) a = b
219 #define FX64_OR32(a, b) a |= b
220 #define FX64_SHL(a, c) a <<= c
221
222 #else
223
224 #define FX64_NATIVE 0
225
226 typedef struct {
227 GLuint lo, hi;
228 } Fx64;
229
230 #define FX64_MOV32(a, b) a.lo = b
231 #define FX64_OR32(a, b) a.lo |= b
232
233 #define FX64_SHL(a, c) \
234 do { \
235 if ((c) >= 32) { \
236 a.hi = a.lo << ((c) - 32); \
237 a.lo = 0; \
238 } else { \
239 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
240 a.lo <<= (c); \
241 } \
242 } while (0)
243
244 #endif
245
246
247 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
248 #define SAFECDOT 1 /* for paranoids */
249
250 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
251 do { \
252 /* compute interpolation vector */ \
253 GLfloat d2 = 0.0F; \
254 GLfloat rd2; \
255 \
256 for (i = 0; i < NC; i++) { \
257 IV[i] = (V1[i] - V0[i]) * F(i); \
258 d2 += IV[i] * IV[i]; \
259 } \
260 rd2 = (GLfloat)NV / d2; \
261 B = 0; \
262 for (i = 0; i < NC; i++) { \
263 IV[i] *= F(i); \
264 B -= IV[i] * V0[i]; \
265 IV[i] *= rd2; \
266 } \
267 B = B * rd2 + 0.5f; \
268 } while (0)
269
270 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
271 do { \
272 GLfloat dot = 0.0F; \
273 for (i = 0; i < NC; i++) { \
274 dot += V[i] * IV[i]; \
275 } \
276 TEXEL = (GLint)(dot + B); \
277 if (SAFECDOT) { \
278 if (TEXEL < 0) { \
279 TEXEL = 0; \
280 } else if (TEXEL > NV) { \
281 TEXEL = NV; \
282 } \
283 } \
284 } while (0)
285
286
287 static GLint
288 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
289 GLubyte input[MAX_COMP], GLint nc)
290 {
291 GLint i, j, best = -1;
292 GLfloat err = 1e9; /* big enough */
293
294 for (j = 0; j < nv; j++) {
295 GLfloat e = 0.0F;
296 for (i = 0; i < nc; i++) {
297 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
298 }
299 if (e < err) {
300 err = e;
301 best = j;
302 }
303 }
304
305 return best;
306 }
307
308
309 static GLint
310 fxt1_worst (GLfloat vec[MAX_COMP],
311 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
312 {
313 GLint i, k, worst = -1;
314 GLfloat err = -1.0F; /* small enough */
315
316 for (k = 0; k < n; k++) {
317 GLfloat e = 0.0F;
318 for (i = 0; i < nc; i++) {
319 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
320 }
321 if (e > err) {
322 err = e;
323 worst = k;
324 }
325 }
326
327 return worst;
328 }
329
330
331 static GLint
332 fxt1_variance (GLdouble variance[MAX_COMP],
333 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
334 {
335 GLint i, k, best = 0;
336 GLint sx, sx2;
337 GLdouble var, maxvar = -1; /* small enough */
338 GLdouble teenth = 1.0 / n;
339
340 for (i = 0; i < nc; i++) {
341 sx = sx2 = 0;
342 for (k = 0; k < n; k++) {
343 GLint t = input[k][i];
344 sx += t;
345 sx2 += t * t;
346 }
347 var = sx2 * teenth - sx * sx * teenth * teenth;
348 if (maxvar < var) {
349 maxvar = var;
350 best = i;
351 }
352 if (variance) {
353 variance[i] = var;
354 }
355 }
356
357 return best;
358 }
359
360
361 static GLint
362 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
363 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
364 {
365 #if 0
366 /* Choose colors from a grid.
367 */
368 GLint i, j;
369
370 for (j = 0; j < nv; j++) {
371 GLint m = j * (n - 1) / (nv - 1);
372 for (i = 0; i < nc; i++) {
373 vec[j][i] = input[m][i];
374 }
375 }
376 #else
377 /* Our solution here is to find the darkest and brightest colors in
378 * the 8x4 tile and use those as the two representative colors.
379 * There are probably better algorithms to use (histogram-based).
380 */
381 GLint i, j, k;
382 GLint minSum = 2000; /* big enough */
383 GLint maxSum = -1; /* small enough */
384 GLint minCol = 0; /* phoudoin: silent compiler! */
385 GLint maxCol = 0; /* phoudoin: silent compiler! */
386
387 struct {
388 GLint flag;
389 GLint key;
390 GLint freq;
391 GLint idx;
392 } hist[N_TEXELS];
393 GLint lenh = 0;
394
395 memset(hist, 0, sizeof(hist));
396
397 for (k = 0; k < n; k++) {
398 GLint l;
399 GLint key = 0;
400 GLint sum = 0;
401 for (i = 0; i < nc; i++) {
402 key <<= 8;
403 key |= input[k][i];
404 sum += input[k][i];
405 }
406 for (l = 0; l < n; l++) {
407 if (!hist[l].flag) {
408 /* alloc new slot */
409 hist[l].flag = !0;
410 hist[l].key = key;
411 hist[l].freq = 1;
412 hist[l].idx = k;
413 lenh = l + 1;
414 break;
415 } else if (hist[l].key == key) {
416 hist[l].freq++;
417 break;
418 }
419 }
420 if (minSum > sum) {
421 minSum = sum;
422 minCol = k;
423 }
424 if (maxSum < sum) {
425 maxSum = sum;
426 maxCol = k;
427 }
428 }
429
430 if (lenh <= nv) {
431 for (j = 0; j < lenh; j++) {
432 for (i = 0; i < nc; i++) {
433 vec[j][i] = (GLfloat)input[hist[j].idx][i];
434 }
435 }
436 for (; j < nv; j++) {
437 for (i = 0; i < nc; i++) {
438 vec[j][i] = vec[0][i];
439 }
440 }
441 return 0;
442 }
443
444 for (j = 0; j < nv; j++) {
445 for (i = 0; i < nc; i++) {
446 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
447 }
448 }
449 #endif
450
451 return !0;
452 }
453
454
455 static GLint
456 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
457 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
458 {
459 /* Use the generalized lloyd's algorithm for VQ:
460 * find 4 color vectors.
461 *
462 * for each sample color
463 * sort to nearest vector.
464 *
465 * replace each vector with the centroid of its matching colors.
466 *
467 * repeat until RMS doesn't improve.
468 *
469 * if a color vector has no samples, or becomes the same as another
470 * vector, replace it with the color which is farthest from a sample.
471 *
472 * vec[][MAX_COMP] initial vectors and resulting colors
473 * nv number of resulting colors required
474 * input[N_TEXELS][MAX_COMP] input texels
475 * nc number of components in input / vec
476 * n number of input samples
477 */
478
479 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
480 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
481 GLfloat error, lasterror = 1e9;
482
483 GLint i, j, k, rep;
484
485 /* the quantizer */
486 for (rep = 0; rep < LL_N_REP; rep++) {
487 /* reset sums & counters */
488 for (j = 0; j < nv; j++) {
489 for (i = 0; i < nc; i++) {
490 sum[j][i] = 0;
491 }
492 cnt[j] = 0;
493 }
494 error = 0;
495
496 /* scan whole block */
497 for (k = 0; k < n; k++) {
498 #if 1
499 GLint best = -1;
500 GLfloat err = 1e9; /* big enough */
501 /* determine best vector */
502 for (j = 0; j < nv; j++) {
503 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
504 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
505 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
506 if (nc == 4) {
507 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
508 }
509 if (e < err) {
510 err = e;
511 best = j;
512 }
513 }
514 #else
515 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
516 #endif
517 assert(best >= 0);
518 /* add in closest color */
519 for (i = 0; i < nc; i++) {
520 sum[best][i] += input[k][i];
521 }
522 /* mark this vector as used */
523 cnt[best]++;
524 /* accumulate error */
525 error += err;
526 }
527
528 /* check RMS */
529 if ((error < LL_RMS_E) ||
530 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
531 return !0; /* good match */
532 }
533 lasterror = error;
534
535 /* move each vector to the barycenter of its closest colors */
536 for (j = 0; j < nv; j++) {
537 if (cnt[j]) {
538 GLfloat div = 1.0F / cnt[j];
539 for (i = 0; i < nc; i++) {
540 vec[j][i] = div * sum[j][i];
541 }
542 } else {
543 /* this vec has no samples or is identical with a previous vec */
544 GLint worst = fxt1_worst(vec[j], input, nc, n);
545 for (i = 0; i < nc; i++) {
546 vec[j][i] = input[worst][i];
547 }
548 }
549 }
550 }
551
552 return 0; /* could not converge fast enough */
553 }
554
555
556 static void
557 fxt1_quantize_CHROMA (GLuint *cc,
558 GLubyte input[N_TEXELS][MAX_COMP])
559 {
560 const GLint n_vect = 4; /* 4 base vectors to find */
561 const GLint n_comp = 3; /* 3 components: R, G, B */
562 GLfloat vec[MAX_VECT][MAX_COMP];
563 GLint i, j, k;
564 Fx64 hi; /* high quadword */
565 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
566
567 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
568 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
569 }
570
571 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
572 for (j = n_vect - 1; j >= 0; j--) {
573 for (i = 0; i < n_comp; i++) {
574 /* add in colors */
575 FX64_SHL(hi, 5);
576 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
577 }
578 }
579 ((Fx64 *)cc)[1] = hi;
580
581 lohi = lolo = 0;
582 /* right microtile */
583 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
584 lohi <<= 2;
585 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
586 }
587 /* left microtile */
588 for (; k >= 0; k--) {
589 lolo <<= 2;
590 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
591 }
592 cc[1] = lohi;
593 cc[0] = lolo;
594 }
595
596
597 static void
598 fxt1_quantize_ALPHA0 (GLuint *cc,
599 GLubyte input[N_TEXELS][MAX_COMP],
600 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
601 {
602 const GLint n_vect = 3; /* 3 base vectors to find */
603 const GLint n_comp = 4; /* 4 components: R, G, B, A */
604 GLfloat vec[MAX_VECT][MAX_COMP];
605 GLint i, j, k;
606 Fx64 hi; /* high quadword */
607 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
608
609 /* the last vector indicates zero */
610 for (i = 0; i < n_comp; i++) {
611 vec[n_vect][i] = 0;
612 }
613
614 /* the first n texels in reord are guaranteed to be non-zero */
615 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
616 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
617 }
618
619 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
620 for (j = n_vect - 1; j >= 0; j--) {
621 /* add in alphas */
622 FX64_SHL(hi, 5);
623 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
624 }
625 for (j = n_vect - 1; j >= 0; j--) {
626 for (i = 0; i < n_comp - 1; i++) {
627 /* add in colors */
628 FX64_SHL(hi, 5);
629 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
630 }
631 }
632 ((Fx64 *)cc)[1] = hi;
633
634 lohi = lolo = 0;
635 /* right microtile */
636 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
637 lohi <<= 2;
638 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
639 }
640 /* left microtile */
641 for (; k >= 0; k--) {
642 lolo <<= 2;
643 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
644 }
645 cc[1] = lohi;
646 cc[0] = lolo;
647 }
648
649
650 static void
651 fxt1_quantize_ALPHA1 (GLuint *cc,
652 GLubyte input[N_TEXELS][MAX_COMP])
653 {
654 const GLint n_vect = 3; /* highest vector number in each microtile */
655 const GLint n_comp = 4; /* 4 components: R, G, B, A */
656 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
657 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
658 GLint i, j, k;
659 Fx64 hi; /* high quadword */
660 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
661
662 GLint minSum;
663 GLint maxSum;
664 GLint minColL = 0, maxColL = 0;
665 GLint minColR = 0, maxColR = 0;
666 GLint sumL = 0, sumR = 0;
667 GLint nn_comp;
668 /* Our solution here is to find the darkest and brightest colors in
669 * the 4x4 tile and use those as the two representative colors.
670 * There are probably better algorithms to use (histogram-based).
671 */
672 nn_comp = n_comp;
673 while ((minColL == maxColL) && nn_comp) {
674 minSum = 2000; /* big enough */
675 maxSum = -1; /* small enough */
676 for (k = 0; k < N_TEXELS / 2; k++) {
677 GLint sum = 0;
678 for (i = 0; i < nn_comp; i++) {
679 sum += input[k][i];
680 }
681 if (minSum > sum) {
682 minSum = sum;
683 minColL = k;
684 }
685 if (maxSum < sum) {
686 maxSum = sum;
687 maxColL = k;
688 }
689 sumL += sum;
690 }
691
692 nn_comp--;
693 }
694
695 nn_comp = n_comp;
696 while ((minColR == maxColR) && nn_comp) {
697 minSum = 2000; /* big enough */
698 maxSum = -1; /* small enough */
699 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
700 GLint sum = 0;
701 for (i = 0; i < nn_comp; i++) {
702 sum += input[k][i];
703 }
704 if (minSum > sum) {
705 minSum = sum;
706 minColR = k;
707 }
708 if (maxSum < sum) {
709 maxSum = sum;
710 maxColR = k;
711 }
712 sumR += sum;
713 }
714
715 nn_comp--;
716 }
717
718 /* choose the common vector (yuck!) */
719 {
720 GLint j1, j2;
721 GLint v1 = 0, v2 = 0;
722 GLfloat err = 1e9; /* big enough */
723 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
724 for (i = 0; i < n_comp; i++) {
725 tv[0][i] = input[minColL][i];
726 tv[1][i] = input[maxColL][i];
727 tv[2][i] = input[minColR][i];
728 tv[3][i] = input[maxColR][i];
729 }
730 for (j1 = 0; j1 < 2; j1++) {
731 for (j2 = 2; j2 < 4; j2++) {
732 GLfloat e = 0.0F;
733 for (i = 0; i < n_comp; i++) {
734 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
735 }
736 if (e < err) {
737 err = e;
738 v1 = j1;
739 v2 = j2;
740 }
741 }
742 }
743 for (i = 0; i < n_comp; i++) {
744 vec[0][i] = tv[1 - v1][i];
745 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
746 vec[2][i] = tv[5 - v2][i];
747 }
748 }
749
750 /* left microtile */
751 cc[0] = 0;
752 if (minColL != maxColL) {
753 /* compute interpolation vector */
754 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
755
756 /* add in texels */
757 lolo = 0;
758 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
759 GLint texel;
760 /* interpolate color */
761 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
762 /* add in texel */
763 lolo <<= 2;
764 lolo |= texel;
765 }
766
767 cc[0] = lolo;
768 }
769
770 /* right microtile */
771 cc[1] = 0;
772 if (minColR != maxColR) {
773 /* compute interpolation vector */
774 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
775
776 /* add in texels */
777 lohi = 0;
778 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
779 GLint texel;
780 /* interpolate color */
781 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
782 /* add in texel */
783 lohi <<= 2;
784 lohi |= texel;
785 }
786
787 cc[1] = lohi;
788 }
789
790 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
791 for (j = n_vect - 1; j >= 0; j--) {
792 /* add in alphas */
793 FX64_SHL(hi, 5);
794 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
795 }
796 for (j = n_vect - 1; j >= 0; j--) {
797 for (i = 0; i < n_comp - 1; i++) {
798 /* add in colors */
799 FX64_SHL(hi, 5);
800 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
801 }
802 }
803 ((Fx64 *)cc)[1] = hi;
804 }
805
806
807 static void
808 fxt1_quantize_HI (GLuint *cc,
809 GLubyte input[N_TEXELS][MAX_COMP],
810 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
811 {
812 const GLint n_vect = 6; /* highest vector number */
813 const GLint n_comp = 3; /* 3 components: R, G, B */
814 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
815 GLfloat iv[MAX_COMP]; /* interpolation vector */
816 GLint i, k;
817 GLuint hihi; /* high quadword: hi dword */
818
819 GLint minSum = 2000; /* big enough */
820 GLint maxSum = -1; /* small enough */
821 GLint minCol = 0; /* phoudoin: silent compiler! */
822 GLint maxCol = 0; /* phoudoin: silent compiler! */
823
824 /* Our solution here is to find the darkest and brightest colors in
825 * the 8x4 tile and use those as the two representative colors.
826 * There are probably better algorithms to use (histogram-based).
827 */
828 for (k = 0; k < n; k++) {
829 GLint sum = 0;
830 for (i = 0; i < n_comp; i++) {
831 sum += reord[k][i];
832 }
833 if (minSum > sum) {
834 minSum = sum;
835 minCol = k;
836 }
837 if (maxSum < sum) {
838 maxSum = sum;
839 maxCol = k;
840 }
841 }
842
843 hihi = 0; /* cc-hi = "00" */
844 for (i = 0; i < n_comp; i++) {
845 /* add in colors */
846 hihi <<= 5;
847 hihi |= reord[maxCol][i] >> 3;
848 }
849 for (i = 0; i < n_comp; i++) {
850 /* add in colors */
851 hihi <<= 5;
852 hihi |= reord[minCol][i] >> 3;
853 }
854 cc[3] = hihi;
855 cc[0] = cc[1] = cc[2] = 0;
856
857 /* compute interpolation vector */
858 if (minCol != maxCol) {
859 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
860 }
861
862 /* add in texels */
863 for (k = N_TEXELS - 1; k >= 0; k--) {
864 GLint t = k * 3;
865 GLuint *kk = (GLuint *)((char *)cc + t / 8);
866 GLint texel = n_vect + 1; /* transparent black */
867
868 if (!ISTBLACK(input[k])) {
869 if (minCol != maxCol) {
870 /* interpolate color */
871 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
872 /* add in texel */
873 kk[0] |= texel << (t & 7);
874 }
875 } else {
876 /* add in texel */
877 kk[0] |= texel << (t & 7);
878 }
879 }
880 }
881
882
883 static void
884 fxt1_quantize_MIXED1 (GLuint *cc,
885 GLubyte input[N_TEXELS][MAX_COMP])
886 {
887 const GLint n_vect = 2; /* highest vector number in each microtile */
888 const GLint n_comp = 3; /* 3 components: R, G, B */
889 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
890 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
891 GLint i, j, k;
892 Fx64 hi; /* high quadword */
893 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
894
895 GLint minSum;
896 GLint maxSum;
897 GLint minColL = 0, maxColL = -1;
898 GLint minColR = 0, maxColR = -1;
899
900 /* Our solution here is to find the darkest and brightest colors in
901 * the 4x4 tile and use those as the two representative colors.
902 * There are probably better algorithms to use (histogram-based).
903 */
904 minSum = 2000; /* big enough */
905 maxSum = -1; /* small enough */
906 for (k = 0; k < N_TEXELS / 2; k++) {
907 if (!ISTBLACK(input[k])) {
908 GLint sum = 0;
909 for (i = 0; i < n_comp; i++) {
910 sum += input[k][i];
911 }
912 if (minSum > sum) {
913 minSum = sum;
914 minColL = k;
915 }
916 if (maxSum < sum) {
917 maxSum = sum;
918 maxColL = k;
919 }
920 }
921 }
922 minSum = 2000; /* big enough */
923 maxSum = -1; /* small enough */
924 for (; k < N_TEXELS; k++) {
925 if (!ISTBLACK(input[k])) {
926 GLint sum = 0;
927 for (i = 0; i < n_comp; i++) {
928 sum += input[k][i];
929 }
930 if (minSum > sum) {
931 minSum = sum;
932 minColR = k;
933 }
934 if (maxSum < sum) {
935 maxSum = sum;
936 maxColR = k;
937 }
938 }
939 }
940
941 /* left microtile */
942 if (maxColL == -1) {
943 /* all transparent black */
944 cc[0] = ~0u;
945 for (i = 0; i < n_comp; i++) {
946 vec[0][i] = 0;
947 vec[1][i] = 0;
948 }
949 } else {
950 cc[0] = 0;
951 for (i = 0; i < n_comp; i++) {
952 vec[0][i] = input[minColL][i];
953 vec[1][i] = input[maxColL][i];
954 }
955 if (minColL != maxColL) {
956 /* compute interpolation vector */
957 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
958
959 /* add in texels */
960 lolo = 0;
961 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
962 GLint texel = n_vect + 1; /* transparent black */
963 if (!ISTBLACK(input[k])) {
964 /* interpolate color */
965 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
966 }
967 /* add in texel */
968 lolo <<= 2;
969 lolo |= texel;
970 }
971 cc[0] = lolo;
972 }
973 }
974
975 /* right microtile */
976 if (maxColR == -1) {
977 /* all transparent black */
978 cc[1] = ~0u;
979 for (i = 0; i < n_comp; i++) {
980 vec[2][i] = 0;
981 vec[3][i] = 0;
982 }
983 } else {
984 cc[1] = 0;
985 for (i = 0; i < n_comp; i++) {
986 vec[2][i] = input[minColR][i];
987 vec[3][i] = input[maxColR][i];
988 }
989 if (minColR != maxColR) {
990 /* compute interpolation vector */
991 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
992
993 /* add in texels */
994 lohi = 0;
995 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
996 GLint texel = n_vect + 1; /* transparent black */
997 if (!ISTBLACK(input[k])) {
998 /* interpolate color */
999 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1000 }
1001 /* add in texel */
1002 lohi <<= 2;
1003 lohi |= texel;
1004 }
1005 cc[1] = lohi;
1006 }
1007 }
1008
1009 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1010 for (j = 2 * 2 - 1; j >= 0; j--) {
1011 for (i = 0; i < n_comp; i++) {
1012 /* add in colors */
1013 FX64_SHL(hi, 5);
1014 FX64_OR32(hi, vec[j][i] >> 3);
1015 }
1016 }
1017 ((Fx64 *)cc)[1] = hi;
1018 }
1019
1020
1021 static void
1022 fxt1_quantize_MIXED0 (GLuint *cc,
1023 GLubyte input[N_TEXELS][MAX_COMP])
1024 {
1025 const GLint n_vect = 3; /* highest vector number in each microtile */
1026 const GLint n_comp = 3; /* 3 components: R, G, B */
1027 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1028 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1029 GLint i, j, k;
1030 Fx64 hi; /* high quadword */
1031 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1032
1033 GLint minColL = 0, maxColL = 0;
1034 GLint minColR = 0, maxColR = 0;
1035 #if 0
1036 GLint minSum;
1037 GLint maxSum;
1038
1039 /* Our solution here is to find the darkest and brightest colors in
1040 * the 4x4 tile and use those as the two representative colors.
1041 * There are probably better algorithms to use (histogram-based).
1042 */
1043 minSum = 2000; /* big enough */
1044 maxSum = -1; /* small enough */
1045 for (k = 0; k < N_TEXELS / 2; k++) {
1046 GLint sum = 0;
1047 for (i = 0; i < n_comp; i++) {
1048 sum += input[k][i];
1049 }
1050 if (minSum > sum) {
1051 minSum = sum;
1052 minColL = k;
1053 }
1054 if (maxSum < sum) {
1055 maxSum = sum;
1056 maxColL = k;
1057 }
1058 }
1059 minSum = 2000; /* big enough */
1060 maxSum = -1; /* small enough */
1061 for (; k < N_TEXELS; k++) {
1062 GLint sum = 0;
1063 for (i = 0; i < n_comp; i++) {
1064 sum += input[k][i];
1065 }
1066 if (minSum > sum) {
1067 minSum = sum;
1068 minColR = k;
1069 }
1070 if (maxSum < sum) {
1071 maxSum = sum;
1072 maxColR = k;
1073 }
1074 }
1075 #else
1076 GLint minVal;
1077 GLint maxVal;
1078 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1079 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1080
1081 /* Scan the channel with max variance for lo & hi
1082 * and use those as the two representative colors.
1083 */
1084 minVal = 2000; /* big enough */
1085 maxVal = -1; /* small enough */
1086 for (k = 0; k < N_TEXELS / 2; k++) {
1087 GLint t = input[k][maxVarL];
1088 if (minVal > t) {
1089 minVal = t;
1090 minColL = k;
1091 }
1092 if (maxVal < t) {
1093 maxVal = t;
1094 maxColL = k;
1095 }
1096 }
1097 minVal = 2000; /* big enough */
1098 maxVal = -1; /* small enough */
1099 for (; k < N_TEXELS; k++) {
1100 GLint t = input[k][maxVarR];
1101 if (minVal > t) {
1102 minVal = t;
1103 minColR = k;
1104 }
1105 if (maxVal < t) {
1106 maxVal = t;
1107 maxColR = k;
1108 }
1109 }
1110 #endif
1111
1112 /* left microtile */
1113 cc[0] = 0;
1114 for (i = 0; i < n_comp; i++) {
1115 vec[0][i] = input[minColL][i];
1116 vec[1][i] = input[maxColL][i];
1117 }
1118 if (minColL != maxColL) {
1119 /* compute interpolation vector */
1120 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1121
1122 /* add in texels */
1123 lolo = 0;
1124 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1125 GLint texel;
1126 /* interpolate color */
1127 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1128 /* add in texel */
1129 lolo <<= 2;
1130 lolo |= texel;
1131 }
1132
1133 /* funky encoding for LSB of green */
1134 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1135 for (i = 0; i < n_comp; i++) {
1136 vec[1][i] = input[minColL][i];
1137 vec[0][i] = input[maxColL][i];
1138 }
1139 lolo = ~lolo;
1140 }
1141
1142 cc[0] = lolo;
1143 }
1144
1145 /* right microtile */
1146 cc[1] = 0;
1147 for (i = 0; i < n_comp; i++) {
1148 vec[2][i] = input[minColR][i];
1149 vec[3][i] = input[maxColR][i];
1150 }
1151 if (minColR != maxColR) {
1152 /* compute interpolation vector */
1153 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1154
1155 /* add in texels */
1156 lohi = 0;
1157 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1158 GLint texel;
1159 /* interpolate color */
1160 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1161 /* add in texel */
1162 lohi <<= 2;
1163 lohi |= texel;
1164 }
1165
1166 /* funky encoding for LSB of green */
1167 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1168 for (i = 0; i < n_comp; i++) {
1169 vec[3][i] = input[minColR][i];
1170 vec[2][i] = input[maxColR][i];
1171 }
1172 lohi = ~lohi;
1173 }
1174
1175 cc[1] = lohi;
1176 }
1177
1178 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1179 for (j = 2 * 2 - 1; j >= 0; j--) {
1180 for (i = 0; i < n_comp; i++) {
1181 /* add in colors */
1182 FX64_SHL(hi, 5);
1183 FX64_OR32(hi, vec[j][i] >> 3);
1184 }
1185 }
1186 ((Fx64 *)cc)[1] = hi;
1187 }
1188
1189
1190 static void
1191 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1192 {
1193 GLint trualpha;
1194 GLubyte reord[N_TEXELS][MAX_COMP];
1195
1196 GLubyte input[N_TEXELS][MAX_COMP];
1197 GLint i, k, l;
1198
1199 if (comps == 3) {
1200 /* make the whole block opaque */
1201 memset(input, -1, sizeof(input));
1202 }
1203
1204 /* 8 texels each line */
1205 for (l = 0; l < 4; l++) {
1206 for (k = 0; k < 4; k++) {
1207 for (i = 0; i < comps; i++) {
1208 input[k + l * 4][i] = *lines[l]++;
1209 }
1210 }
1211 for (; k < 8; k++) {
1212 for (i = 0; i < comps; i++) {
1213 input[k + l * 4 + 12][i] = *lines[l]++;
1214 }
1215 }
1216 }
1217
1218 /* block layout:
1219 * 00, 01, 02, 03, 08, 09, 0a, 0b
1220 * 10, 11, 12, 13, 18, 19, 1a, 1b
1221 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1222 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1223 */
1224
1225 /* [dBorca]
1226 * stupidity flows forth from this
1227 */
1228 l = N_TEXELS;
1229 trualpha = 0;
1230 if (comps == 4) {
1231 /* skip all transparent black texels */
1232 l = 0;
1233 for (k = 0; k < N_TEXELS; k++) {
1234 /* test all components against 0 */
1235 if (!ISTBLACK(input[k])) {
1236 /* texel is not transparent black */
1237 COPY_4UBV(reord[l], input[k]);
1238 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1239 /* non-opaque texel */
1240 trualpha = !0;
1241 }
1242 l++;
1243 }
1244 }
1245 }
1246
1247 #if 0
1248 if (trualpha) {
1249 fxt1_quantize_ALPHA0(cc, input, reord, l);
1250 } else if (l == 0) {
1251 cc[0] = cc[1] = cc[2] = -1;
1252 cc[3] = 0;
1253 } else if (l < N_TEXELS) {
1254 fxt1_quantize_HI(cc, input, reord, l);
1255 } else {
1256 fxt1_quantize_CHROMA(cc, input);
1257 }
1258 (void)fxt1_quantize_ALPHA1;
1259 (void)fxt1_quantize_MIXED1;
1260 (void)fxt1_quantize_MIXED0;
1261 #else
1262 if (trualpha) {
1263 fxt1_quantize_ALPHA1(cc, input);
1264 } else if (l == 0) {
1265 cc[0] = cc[1] = cc[2] = ~0u;
1266 cc[3] = 0;
1267 } else if (l < N_TEXELS) {
1268 fxt1_quantize_MIXED1(cc, input);
1269 } else {
1270 fxt1_quantize_MIXED0(cc, input);
1271 }
1272 (void)fxt1_quantize_ALPHA0;
1273 (void)fxt1_quantize_HI;
1274 (void)fxt1_quantize_CHROMA;
1275 #endif
1276 }
1277
1278
1279
1280 /**
1281 * Upscale an image by replication, not (typical) stretching.
1282 * We use this when the image width or height is less than a
1283 * certain size (4, 8) and we need to upscale an image.
1284 */
1285 static void
1286 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1287 GLsizei outWidth, GLsizei outHeight,
1288 GLint comps, const GLubyte *src, GLint srcRowStride,
1289 GLubyte *dest )
1290 {
1291 GLint i, j, k;
1292
1293 ASSERT(outWidth >= inWidth);
1294 ASSERT(outHeight >= inHeight);
1295 #if 0
1296 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1297 ASSERT((outWidth & 3) == 0);
1298 ASSERT((outHeight & 3) == 0);
1299 #endif
1300
1301 for (i = 0; i < outHeight; i++) {
1302 const GLint ii = i % inHeight;
1303 for (j = 0; j < outWidth; j++) {
1304 const GLint jj = j % inWidth;
1305 for (k = 0; k < comps; k++) {
1306 dest[(i * outWidth + j) * comps + k]
1307 = src[ii * srcRowStride + jj * comps + k];
1308 }
1309 }
1310 }
1311 }
1312
1313
1314 static void
1315 fxt1_encode (GLuint width, GLuint height, GLint comps,
1316 const void *source, GLint srcRowStride,
1317 void *dest, GLint destRowStride)
1318 {
1319 GLuint x, y;
1320 const GLubyte *data;
1321 GLuint *encoded = (GLuint *)dest;
1322 void *newSource = NULL;
1323
1324 assert(comps == 3 || comps == 4);
1325
1326 /* Replicate image if width is not M8 or height is not M4 */
1327 if ((width & 7) | (height & 3)) {
1328 GLint newWidth = (width + 7) & ~7;
1329 GLint newHeight = (height + 3) & ~3;
1330 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1331 if (!newSource) {
1332 GET_CURRENT_CONTEXT(ctx);
1333 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1334 goto cleanUp;
1335 }
1336 upscale_teximage2d(width, height, newWidth, newHeight,
1337 comps, (const GLubyte *) source,
1338 srcRowStride, (GLubyte *) newSource);
1339 source = newSource;
1340 width = newWidth;
1341 height = newHeight;
1342 srcRowStride = comps * newWidth;
1343 }
1344
1345 data = (const GLubyte *) source;
1346 destRowStride = (destRowStride - width * 2) / 4;
1347 for (y = 0; y < height; y += 4) {
1348 GLuint offs = 0 + (y + 0) * srcRowStride;
1349 for (x = 0; x < width; x += 8) {
1350 const GLubyte *lines[4];
1351 lines[0] = &data[offs];
1352 lines[1] = lines[0] + srcRowStride;
1353 lines[2] = lines[1] + srcRowStride;
1354 lines[3] = lines[2] + srcRowStride;
1355 offs += 8 * comps;
1356 fxt1_quantize(encoded, lines, comps);
1357 /* 128 bits per 8x4 block */
1358 encoded += 4;
1359 }
1360 encoded += destRowStride;
1361 }
1362
1363 cleanUp:
1364 free(newSource);
1365 }
1366
1367
1368 /***************************************************************************\
1369 * FXT1 decoder
1370 *
1371 * The decoder is based on GL_3DFX_texture_compression_FXT1
1372 * specification and serves as a concept for the encoder.
1373 \***************************************************************************/
1374
1375
1376 /* lookup table for scaling 5 bit colors up to 8 bits */
1377 static const GLubyte _rgb_scale_5[] = {
1378 0, 8, 16, 25, 33, 41, 49, 58,
1379 66, 74, 82, 90, 99, 107, 115, 123,
1380 132, 140, 148, 156, 165, 173, 181, 189,
1381 197, 206, 214, 222, 230, 239, 247, 255
1382 };
1383
1384 /* lookup table for scaling 6 bit colors up to 8 bits */
1385 static const GLubyte _rgb_scale_6[] = {
1386 0, 4, 8, 12, 16, 20, 24, 28,
1387 32, 36, 40, 45, 49, 53, 57, 61,
1388 65, 69, 73, 77, 81, 85, 89, 93,
1389 97, 101, 105, 109, 113, 117, 121, 125,
1390 130, 134, 138, 142, 146, 150, 154, 158,
1391 162, 166, 170, 174, 178, 182, 186, 190,
1392 194, 198, 202, 206, 210, 215, 219, 223,
1393 227, 231, 235, 239, 243, 247, 251, 255
1394 };
1395
1396
1397 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1398 #define UP5(c) _rgb_scale_5[(c) & 31]
1399 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1400 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1401
1402
1403 static void
1404 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1405 {
1406 const GLuint *cc;
1407
1408 t *= 3;
1409 cc = (const GLuint *)(code + t / 8);
1410 t = (cc[0] >> (t & 7)) & 7;
1411
1412 if (t == 7) {
1413 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1414 } else {
1415 GLubyte r, g, b;
1416 cc = (const GLuint *)(code + 12);
1417 if (t == 0) {
1418 b = UP5(CC_SEL(cc, 0));
1419 g = UP5(CC_SEL(cc, 5));
1420 r = UP5(CC_SEL(cc, 10));
1421 } else if (t == 6) {
1422 b = UP5(CC_SEL(cc, 15));
1423 g = UP5(CC_SEL(cc, 20));
1424 r = UP5(CC_SEL(cc, 25));
1425 } else {
1426 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1427 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1428 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1429 }
1430 rgba[RCOMP] = r;
1431 rgba[GCOMP] = g;
1432 rgba[BCOMP] = b;
1433 rgba[ACOMP] = 255;
1434 }
1435 }
1436
1437
1438 static void
1439 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1440 {
1441 const GLuint *cc;
1442 GLuint kk;
1443
1444 cc = (const GLuint *)code;
1445 if (t & 16) {
1446 cc++;
1447 t &= 15;
1448 }
1449 t = (cc[0] >> (t * 2)) & 3;
1450
1451 t *= 15;
1452 cc = (const GLuint *)(code + 8 + t / 8);
1453 kk = cc[0] >> (t & 7);
1454 rgba[BCOMP] = UP5(kk);
1455 rgba[GCOMP] = UP5(kk >> 5);
1456 rgba[RCOMP] = UP5(kk >> 10);
1457 rgba[ACOMP] = 255;
1458 }
1459
1460
1461 static void
1462 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1463 {
1464 const GLuint *cc;
1465 GLuint col[2][3];
1466 GLint glsb, selb;
1467
1468 cc = (const GLuint *)code;
1469 if (t & 16) {
1470 t &= 15;
1471 t = (cc[1] >> (t * 2)) & 3;
1472 /* col 2 */
1473 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1474 col[0][GCOMP] = CC_SEL(cc, 99);
1475 col[0][RCOMP] = CC_SEL(cc, 104);
1476 /* col 3 */
1477 col[1][BCOMP] = CC_SEL(cc, 109);
1478 col[1][GCOMP] = CC_SEL(cc, 114);
1479 col[1][RCOMP] = CC_SEL(cc, 119);
1480 glsb = CC_SEL(cc, 126);
1481 selb = CC_SEL(cc, 33);
1482 } else {
1483 t = (cc[0] >> (t * 2)) & 3;
1484 /* col 0 */
1485 col[0][BCOMP] = CC_SEL(cc, 64);
1486 col[0][GCOMP] = CC_SEL(cc, 69);
1487 col[0][RCOMP] = CC_SEL(cc, 74);
1488 /* col 1 */
1489 col[1][BCOMP] = CC_SEL(cc, 79);
1490 col[1][GCOMP] = CC_SEL(cc, 84);
1491 col[1][RCOMP] = CC_SEL(cc, 89);
1492 glsb = CC_SEL(cc, 125);
1493 selb = CC_SEL(cc, 1);
1494 }
1495
1496 if (CC_SEL(cc, 124) & 1) {
1497 /* alpha[0] == 1 */
1498
1499 if (t == 3) {
1500 /* zero */
1501 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1502 } else {
1503 GLubyte r, g, b;
1504 if (t == 0) {
1505 b = UP5(col[0][BCOMP]);
1506 g = UP5(col[0][GCOMP]);
1507 r = UP5(col[0][RCOMP]);
1508 } else if (t == 2) {
1509 b = UP5(col[1][BCOMP]);
1510 g = UP6(col[1][GCOMP], glsb);
1511 r = UP5(col[1][RCOMP]);
1512 } else {
1513 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1514 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1515 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1516 }
1517 rgba[RCOMP] = r;
1518 rgba[GCOMP] = g;
1519 rgba[BCOMP] = b;
1520 rgba[ACOMP] = 255;
1521 }
1522 } else {
1523 /* alpha[0] == 0 */
1524 GLubyte r, g, b;
1525 if (t == 0) {
1526 b = UP5(col[0][BCOMP]);
1527 g = UP6(col[0][GCOMP], glsb ^ selb);
1528 r = UP5(col[0][RCOMP]);
1529 } else if (t == 3) {
1530 b = UP5(col[1][BCOMP]);
1531 g = UP6(col[1][GCOMP], glsb);
1532 r = UP5(col[1][RCOMP]);
1533 } else {
1534 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1535 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1536 UP6(col[1][GCOMP], glsb));
1537 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1538 }
1539 rgba[RCOMP] = r;
1540 rgba[GCOMP] = g;
1541 rgba[BCOMP] = b;
1542 rgba[ACOMP] = 255;
1543 }
1544 }
1545
1546
1547 static void
1548 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1549 {
1550 const GLuint *cc;
1551 GLubyte r, g, b, a;
1552
1553 cc = (const GLuint *)code;
1554 if (CC_SEL(cc, 124) & 1) {
1555 /* lerp == 1 */
1556 GLuint col0[4];
1557
1558 if (t & 16) {
1559 t &= 15;
1560 t = (cc[1] >> (t * 2)) & 3;
1561 /* col 2 */
1562 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1563 col0[GCOMP] = CC_SEL(cc, 99);
1564 col0[RCOMP] = CC_SEL(cc, 104);
1565 col0[ACOMP] = CC_SEL(cc, 119);
1566 } else {
1567 t = (cc[0] >> (t * 2)) & 3;
1568 /* col 0 */
1569 col0[BCOMP] = CC_SEL(cc, 64);
1570 col0[GCOMP] = CC_SEL(cc, 69);
1571 col0[RCOMP] = CC_SEL(cc, 74);
1572 col0[ACOMP] = CC_SEL(cc, 109);
1573 }
1574
1575 if (t == 0) {
1576 b = UP5(col0[BCOMP]);
1577 g = UP5(col0[GCOMP]);
1578 r = UP5(col0[RCOMP]);
1579 a = UP5(col0[ACOMP]);
1580 } else if (t == 3) {
1581 b = UP5(CC_SEL(cc, 79));
1582 g = UP5(CC_SEL(cc, 84));
1583 r = UP5(CC_SEL(cc, 89));
1584 a = UP5(CC_SEL(cc, 114));
1585 } else {
1586 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1587 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1588 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1589 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1590 }
1591 } else {
1592 /* lerp == 0 */
1593
1594 if (t & 16) {
1595 cc++;
1596 t &= 15;
1597 }
1598 t = (cc[0] >> (t * 2)) & 3;
1599
1600 if (t == 3) {
1601 /* zero */
1602 r = g = b = a = 0;
1603 } else {
1604 GLuint kk;
1605 cc = (const GLuint *)code;
1606 a = UP5(cc[3] >> (t * 5 + 13));
1607 t *= 15;
1608 cc = (const GLuint *)(code + 8 + t / 8);
1609 kk = cc[0] >> (t & 7);
1610 b = UP5(kk);
1611 g = UP5(kk >> 5);
1612 r = UP5(kk >> 10);
1613 }
1614 }
1615 rgba[RCOMP] = r;
1616 rgba[GCOMP] = g;
1617 rgba[BCOMP] = b;
1618 rgba[ACOMP] = a;
1619 }
1620
1621
1622 static void
1623 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1624 GLint i, GLint j, GLubyte *rgba)
1625 {
1626 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1627 fxt1_decode_1HI, /* cc-high = "00?" */
1628 fxt1_decode_1HI, /* cc-high = "00?" */
1629 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1630 fxt1_decode_1ALPHA, /* alpha = "011" */
1631 fxt1_decode_1MIXED, /* mixed = "1??" */
1632 fxt1_decode_1MIXED, /* mixed = "1??" */
1633 fxt1_decode_1MIXED, /* mixed = "1??" */
1634 fxt1_decode_1MIXED /* mixed = "1??" */
1635 };
1636
1637 const GLubyte *code = (const GLubyte *)texture +
1638 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1639 GLint mode = CC_SEL(code, 125);
1640 GLint t = i & 7;
1641
1642 if (t & 4) {
1643 t += 12;
1644 }
1645 t += (j & 3) * 4;
1646
1647 decode_1[mode](code, t, rgba);
1648 }
1649
1650
1651 #endif /* FEATURE_texture_fxt1 */