0437cfcc16ec58cd0f1e6ed22bff8b1beb5ff6e0
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42 #include "swrast/s_context.h"
43
44
45 #if FEATURE_texture_fxt1
46
47
48 static void
49 fxt1_encode (GLuint width, GLuint height, GLint comps,
50 const void *source, GLint srcRowStride,
51 void *dest, GLint destRowStride);
52
53 void
54 fxt1_decode_1 (const void *texture, GLint stride,
55 GLint i, GLint j, GLubyte *rgba);
56
57
58 /**
59 * Store user's image in rgb_fxt1 format.
60 */
61 GLboolean
62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
63 {
64 const GLubyte *pixels;
65 GLint srcRowStride;
66 GLubyte *dst;
67 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
68 const GLubyte *tempImage = NULL;
69
70 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
71 ASSERT(dstXoffset % 8 == 0);
72 ASSERT(dstYoffset % 4 == 0);
73 ASSERT(dstZoffset == 0);
74 (void) dstZoffset;
75 (void) dstImageOffsets;
76
77 if (srcFormat != GL_RGB ||
78 srcType != GL_UNSIGNED_BYTE ||
79 ctx->_ImageTransferState ||
80 srcPacking->SwapBytes) {
81 /* convert image to RGB/GLubyte */
82 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
83 baseInternalFormat,
84 _mesa_get_format_base_format(dstFormat),
85 srcWidth, srcHeight, srcDepth,
86 srcFormat, srcType, srcAddr,
87 srcPacking);
88 if (!tempImage)
89 return GL_FALSE; /* out of memory */
90 pixels = tempImage;
91 srcRowStride = 3 * srcWidth;
92 srcFormat = GL_RGB;
93 }
94 else {
95 pixels = (const GLubyte *) srcAddr;
96 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
97 srcType) / sizeof(GLubyte);
98 }
99
100 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
101 dstFormat,
102 texWidth, (GLubyte *) dstAddr);
103
104 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
105 dst, dstRowStride);
106
107 if (tempImage)
108 free((void*) tempImage);
109
110 return GL_TRUE;
111 }
112
113
114 /**
115 * Store user's image in rgba_fxt1 format.
116 */
117 GLboolean
118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
119 {
120 const GLubyte *pixels;
121 GLint srcRowStride;
122 GLubyte *dst;
123 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
124 const GLubyte *tempImage = NULL;
125
126 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
127 ASSERT(dstXoffset % 8 == 0);
128 ASSERT(dstYoffset % 4 == 0);
129 ASSERT(dstZoffset == 0);
130 (void) dstZoffset;
131 (void) dstImageOffsets;
132
133 if (srcFormat != GL_RGBA ||
134 srcType != GL_UNSIGNED_BYTE ||
135 ctx->_ImageTransferState ||
136 srcPacking->SwapBytes) {
137 /* convert image to RGBA/GLubyte */
138 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
139 baseInternalFormat,
140 _mesa_get_format_base_format(dstFormat),
141 srcWidth, srcHeight, srcDepth,
142 srcFormat, srcType, srcAddr,
143 srcPacking);
144 if (!tempImage)
145 return GL_FALSE; /* out of memory */
146 pixels = tempImage;
147 srcRowStride = 4 * srcWidth;
148 srcFormat = GL_RGBA;
149 }
150 else {
151 pixels = (const GLubyte *) srcAddr;
152 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
153 srcType) / sizeof(GLubyte);
154 }
155
156 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
157 dstFormat,
158 texWidth, (GLubyte *) dstAddr);
159
160 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
161 dst, dstRowStride);
162
163 if (tempImage)
164 free((void*) tempImage);
165
166 return GL_TRUE;
167 }
168
169
170 void
171 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
172 GLint i, GLint j, GLint k, GLfloat *texel )
173 {
174 /* just sample as GLubyte and convert to float here */
175 GLubyte rgba[4];
176 (void) k;
177 fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
178 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
179 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
180 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
181 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
182 }
183
184
185 void
186 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
187 GLint i, GLint j, GLint k, GLfloat *texel )
188 {
189 /* just sample as GLubyte and convert to float here */
190 GLubyte rgba[4];
191 (void) k;
192 fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
193 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
194 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
195 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
196 texel[ACOMP] = 1.0F;
197 }
198
199
200
201 /***************************************************************************\
202 * FXT1 encoder
203 *
204 * The encoder was built by reversing the decoder,
205 * and is vaguely based on Texus2 by 3dfx. Note that this code
206 * is merely a proof of concept, since it is highly UNoptimized;
207 * moreover, it is sub-optimal due to initial conditions passed
208 * to Lloyd's algorithm (the interpolation modes are even worse).
209 \***************************************************************************/
210
211
212 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
213 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
214 #define N_TEXELS 32 /* number of texels in a block (always 32) */
215 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
216 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
217 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
218 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
219 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
220
221
222 /*
223 * Define a 64-bit unsigned integer type and macros
224 */
225 #if 1
226
227 #define FX64_NATIVE 1
228
229 typedef uint64_t Fx64;
230
231 #define FX64_MOV32(a, b) a = b
232 #define FX64_OR32(a, b) a |= b
233 #define FX64_SHL(a, c) a <<= c
234
235 #else
236
237 #define FX64_NATIVE 0
238
239 typedef struct {
240 GLuint lo, hi;
241 } Fx64;
242
243 #define FX64_MOV32(a, b) a.lo = b
244 #define FX64_OR32(a, b) a.lo |= b
245
246 #define FX64_SHL(a, c) \
247 do { \
248 if ((c) >= 32) { \
249 a.hi = a.lo << ((c) - 32); \
250 a.lo = 0; \
251 } else { \
252 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
253 a.lo <<= (c); \
254 } \
255 } while (0)
256
257 #endif
258
259
260 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
261 #define SAFECDOT 1 /* for paranoids */
262
263 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
264 do { \
265 /* compute interpolation vector */ \
266 GLfloat d2 = 0.0F; \
267 GLfloat rd2; \
268 \
269 for (i = 0; i < NC; i++) { \
270 IV[i] = (V1[i] - V0[i]) * F(i); \
271 d2 += IV[i] * IV[i]; \
272 } \
273 rd2 = (GLfloat)NV / d2; \
274 B = 0; \
275 for (i = 0; i < NC; i++) { \
276 IV[i] *= F(i); \
277 B -= IV[i] * V0[i]; \
278 IV[i] *= rd2; \
279 } \
280 B = B * rd2 + 0.5f; \
281 } while (0)
282
283 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
284 do { \
285 GLfloat dot = 0.0F; \
286 for (i = 0; i < NC; i++) { \
287 dot += V[i] * IV[i]; \
288 } \
289 TEXEL = (GLint)(dot + B); \
290 if (SAFECDOT) { \
291 if (TEXEL < 0) { \
292 TEXEL = 0; \
293 } else if (TEXEL > NV) { \
294 TEXEL = NV; \
295 } \
296 } \
297 } while (0)
298
299
300 static GLint
301 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
302 GLubyte input[MAX_COMP], GLint nc)
303 {
304 GLint i, j, best = -1;
305 GLfloat err = 1e9; /* big enough */
306
307 for (j = 0; j < nv; j++) {
308 GLfloat e = 0.0F;
309 for (i = 0; i < nc; i++) {
310 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
311 }
312 if (e < err) {
313 err = e;
314 best = j;
315 }
316 }
317
318 return best;
319 }
320
321
322 static GLint
323 fxt1_worst (GLfloat vec[MAX_COMP],
324 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
325 {
326 GLint i, k, worst = -1;
327 GLfloat err = -1.0F; /* small enough */
328
329 for (k = 0; k < n; k++) {
330 GLfloat e = 0.0F;
331 for (i = 0; i < nc; i++) {
332 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
333 }
334 if (e > err) {
335 err = e;
336 worst = k;
337 }
338 }
339
340 return worst;
341 }
342
343
344 static GLint
345 fxt1_variance (GLdouble variance[MAX_COMP],
346 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
347 {
348 GLint i, k, best = 0;
349 GLint sx, sx2;
350 GLdouble var, maxvar = -1; /* small enough */
351 GLdouble teenth = 1.0 / n;
352
353 for (i = 0; i < nc; i++) {
354 sx = sx2 = 0;
355 for (k = 0; k < n; k++) {
356 GLint t = input[k][i];
357 sx += t;
358 sx2 += t * t;
359 }
360 var = sx2 * teenth - sx * sx * teenth * teenth;
361 if (maxvar < var) {
362 maxvar = var;
363 best = i;
364 }
365 if (variance) {
366 variance[i] = var;
367 }
368 }
369
370 return best;
371 }
372
373
374 static GLint
375 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
376 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
377 {
378 #if 0
379 /* Choose colors from a grid.
380 */
381 GLint i, j;
382
383 for (j = 0; j < nv; j++) {
384 GLint m = j * (n - 1) / (nv - 1);
385 for (i = 0; i < nc; i++) {
386 vec[j][i] = input[m][i];
387 }
388 }
389 #else
390 /* Our solution here is to find the darkest and brightest colors in
391 * the 8x4 tile and use those as the two representative colors.
392 * There are probably better algorithms to use (histogram-based).
393 */
394 GLint i, j, k;
395 GLint minSum = 2000; /* big enough */
396 GLint maxSum = -1; /* small enough */
397 GLint minCol = 0; /* phoudoin: silent compiler! */
398 GLint maxCol = 0; /* phoudoin: silent compiler! */
399
400 struct {
401 GLint flag;
402 GLint key;
403 GLint freq;
404 GLint idx;
405 } hist[N_TEXELS];
406 GLint lenh = 0;
407
408 memset(hist, 0, sizeof(hist));
409
410 for (k = 0; k < n; k++) {
411 GLint l;
412 GLint key = 0;
413 GLint sum = 0;
414 for (i = 0; i < nc; i++) {
415 key <<= 8;
416 key |= input[k][i];
417 sum += input[k][i];
418 }
419 for (l = 0; l < n; l++) {
420 if (!hist[l].flag) {
421 /* alloc new slot */
422 hist[l].flag = !0;
423 hist[l].key = key;
424 hist[l].freq = 1;
425 hist[l].idx = k;
426 lenh = l + 1;
427 break;
428 } else if (hist[l].key == key) {
429 hist[l].freq++;
430 break;
431 }
432 }
433 if (minSum > sum) {
434 minSum = sum;
435 minCol = k;
436 }
437 if (maxSum < sum) {
438 maxSum = sum;
439 maxCol = k;
440 }
441 }
442
443 if (lenh <= nv) {
444 for (j = 0; j < lenh; j++) {
445 for (i = 0; i < nc; i++) {
446 vec[j][i] = (GLfloat)input[hist[j].idx][i];
447 }
448 }
449 for (; j < nv; j++) {
450 for (i = 0; i < nc; i++) {
451 vec[j][i] = vec[0][i];
452 }
453 }
454 return 0;
455 }
456
457 for (j = 0; j < nv; j++) {
458 for (i = 0; i < nc; i++) {
459 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
460 }
461 }
462 #endif
463
464 return !0;
465 }
466
467
468 static GLint
469 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
470 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
471 {
472 /* Use the generalized lloyd's algorithm for VQ:
473 * find 4 color vectors.
474 *
475 * for each sample color
476 * sort to nearest vector.
477 *
478 * replace each vector with the centroid of its matching colors.
479 *
480 * repeat until RMS doesn't improve.
481 *
482 * if a color vector has no samples, or becomes the same as another
483 * vector, replace it with the color which is farthest from a sample.
484 *
485 * vec[][MAX_COMP] initial vectors and resulting colors
486 * nv number of resulting colors required
487 * input[N_TEXELS][MAX_COMP] input texels
488 * nc number of components in input / vec
489 * n number of input samples
490 */
491
492 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
493 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
494 GLfloat error, lasterror = 1e9;
495
496 GLint i, j, k, rep;
497
498 /* the quantizer */
499 for (rep = 0; rep < LL_N_REP; rep++) {
500 /* reset sums & counters */
501 for (j = 0; j < nv; j++) {
502 for (i = 0; i < nc; i++) {
503 sum[j][i] = 0;
504 }
505 cnt[j] = 0;
506 }
507 error = 0;
508
509 /* scan whole block */
510 for (k = 0; k < n; k++) {
511 #if 1
512 GLint best = -1;
513 GLfloat err = 1e9; /* big enough */
514 /* determine best vector */
515 for (j = 0; j < nv; j++) {
516 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
517 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
518 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
519 if (nc == 4) {
520 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
521 }
522 if (e < err) {
523 err = e;
524 best = j;
525 }
526 }
527 #else
528 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
529 #endif
530 assert(best >= 0);
531 /* add in closest color */
532 for (i = 0; i < nc; i++) {
533 sum[best][i] += input[k][i];
534 }
535 /* mark this vector as used */
536 cnt[best]++;
537 /* accumulate error */
538 error += err;
539 }
540
541 /* check RMS */
542 if ((error < LL_RMS_E) ||
543 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
544 return !0; /* good match */
545 }
546 lasterror = error;
547
548 /* move each vector to the barycenter of its closest colors */
549 for (j = 0; j < nv; j++) {
550 if (cnt[j]) {
551 GLfloat div = 1.0F / cnt[j];
552 for (i = 0; i < nc; i++) {
553 vec[j][i] = div * sum[j][i];
554 }
555 } else {
556 /* this vec has no samples or is identical with a previous vec */
557 GLint worst = fxt1_worst(vec[j], input, nc, n);
558 for (i = 0; i < nc; i++) {
559 vec[j][i] = input[worst][i];
560 }
561 }
562 }
563 }
564
565 return 0; /* could not converge fast enough */
566 }
567
568
569 static void
570 fxt1_quantize_CHROMA (GLuint *cc,
571 GLubyte input[N_TEXELS][MAX_COMP])
572 {
573 const GLint n_vect = 4; /* 4 base vectors to find */
574 const GLint n_comp = 3; /* 3 components: R, G, B */
575 GLfloat vec[MAX_VECT][MAX_COMP];
576 GLint i, j, k;
577 Fx64 hi; /* high quadword */
578 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
579
580 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
581 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
582 }
583
584 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
585 for (j = n_vect - 1; j >= 0; j--) {
586 for (i = 0; i < n_comp; i++) {
587 /* add in colors */
588 FX64_SHL(hi, 5);
589 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
590 }
591 }
592 ((Fx64 *)cc)[1] = hi;
593
594 lohi = lolo = 0;
595 /* right microtile */
596 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
597 lohi <<= 2;
598 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
599 }
600 /* left microtile */
601 for (; k >= 0; k--) {
602 lolo <<= 2;
603 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
604 }
605 cc[1] = lohi;
606 cc[0] = lolo;
607 }
608
609
610 static void
611 fxt1_quantize_ALPHA0 (GLuint *cc,
612 GLubyte input[N_TEXELS][MAX_COMP],
613 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
614 {
615 const GLint n_vect = 3; /* 3 base vectors to find */
616 const GLint n_comp = 4; /* 4 components: R, G, B, A */
617 GLfloat vec[MAX_VECT][MAX_COMP];
618 GLint i, j, k;
619 Fx64 hi; /* high quadword */
620 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
621
622 /* the last vector indicates zero */
623 for (i = 0; i < n_comp; i++) {
624 vec[n_vect][i] = 0;
625 }
626
627 /* the first n texels in reord are guaranteed to be non-zero */
628 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
629 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
630 }
631
632 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
633 for (j = n_vect - 1; j >= 0; j--) {
634 /* add in alphas */
635 FX64_SHL(hi, 5);
636 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
637 }
638 for (j = n_vect - 1; j >= 0; j--) {
639 for (i = 0; i < n_comp - 1; i++) {
640 /* add in colors */
641 FX64_SHL(hi, 5);
642 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
643 }
644 }
645 ((Fx64 *)cc)[1] = hi;
646
647 lohi = lolo = 0;
648 /* right microtile */
649 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
650 lohi <<= 2;
651 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
652 }
653 /* left microtile */
654 for (; k >= 0; k--) {
655 lolo <<= 2;
656 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
657 }
658 cc[1] = lohi;
659 cc[0] = lolo;
660 }
661
662
663 static void
664 fxt1_quantize_ALPHA1 (GLuint *cc,
665 GLubyte input[N_TEXELS][MAX_COMP])
666 {
667 const GLint n_vect = 3; /* highest vector number in each microtile */
668 const GLint n_comp = 4; /* 4 components: R, G, B, A */
669 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
670 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
671 GLint i, j, k;
672 Fx64 hi; /* high quadword */
673 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
674
675 GLint minSum;
676 GLint maxSum;
677 GLint minColL = 0, maxColL = 0;
678 GLint minColR = 0, maxColR = 0;
679 GLint sumL = 0, sumR = 0;
680 GLint nn_comp;
681 /* Our solution here is to find the darkest and brightest colors in
682 * the 4x4 tile and use those as the two representative colors.
683 * There are probably better algorithms to use (histogram-based).
684 */
685 nn_comp = n_comp;
686 while ((minColL == maxColL) && nn_comp) {
687 minSum = 2000; /* big enough */
688 maxSum = -1; /* small enough */
689 for (k = 0; k < N_TEXELS / 2; k++) {
690 GLint sum = 0;
691 for (i = 0; i < nn_comp; i++) {
692 sum += input[k][i];
693 }
694 if (minSum > sum) {
695 minSum = sum;
696 minColL = k;
697 }
698 if (maxSum < sum) {
699 maxSum = sum;
700 maxColL = k;
701 }
702 sumL += sum;
703 }
704
705 nn_comp--;
706 }
707
708 nn_comp = n_comp;
709 while ((minColR == maxColR) && nn_comp) {
710 minSum = 2000; /* big enough */
711 maxSum = -1; /* small enough */
712 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
713 GLint sum = 0;
714 for (i = 0; i < nn_comp; i++) {
715 sum += input[k][i];
716 }
717 if (minSum > sum) {
718 minSum = sum;
719 minColR = k;
720 }
721 if (maxSum < sum) {
722 maxSum = sum;
723 maxColR = k;
724 }
725 sumR += sum;
726 }
727
728 nn_comp--;
729 }
730
731 /* choose the common vector (yuck!) */
732 {
733 GLint j1, j2;
734 GLint v1 = 0, v2 = 0;
735 GLfloat err = 1e9; /* big enough */
736 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
737 for (i = 0; i < n_comp; i++) {
738 tv[0][i] = input[minColL][i];
739 tv[1][i] = input[maxColL][i];
740 tv[2][i] = input[minColR][i];
741 tv[3][i] = input[maxColR][i];
742 }
743 for (j1 = 0; j1 < 2; j1++) {
744 for (j2 = 2; j2 < 4; j2++) {
745 GLfloat e = 0.0F;
746 for (i = 0; i < n_comp; i++) {
747 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
748 }
749 if (e < err) {
750 err = e;
751 v1 = j1;
752 v2 = j2;
753 }
754 }
755 }
756 for (i = 0; i < n_comp; i++) {
757 vec[0][i] = tv[1 - v1][i];
758 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
759 vec[2][i] = tv[5 - v2][i];
760 }
761 }
762
763 /* left microtile */
764 cc[0] = 0;
765 if (minColL != maxColL) {
766 /* compute interpolation vector */
767 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
768
769 /* add in texels */
770 lolo = 0;
771 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
772 GLint texel;
773 /* interpolate color */
774 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
775 /* add in texel */
776 lolo <<= 2;
777 lolo |= texel;
778 }
779
780 cc[0] = lolo;
781 }
782
783 /* right microtile */
784 cc[1] = 0;
785 if (minColR != maxColR) {
786 /* compute interpolation vector */
787 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
788
789 /* add in texels */
790 lohi = 0;
791 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
792 GLint texel;
793 /* interpolate color */
794 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
795 /* add in texel */
796 lohi <<= 2;
797 lohi |= texel;
798 }
799
800 cc[1] = lohi;
801 }
802
803 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
804 for (j = n_vect - 1; j >= 0; j--) {
805 /* add in alphas */
806 FX64_SHL(hi, 5);
807 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
808 }
809 for (j = n_vect - 1; j >= 0; j--) {
810 for (i = 0; i < n_comp - 1; i++) {
811 /* add in colors */
812 FX64_SHL(hi, 5);
813 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
814 }
815 }
816 ((Fx64 *)cc)[1] = hi;
817 }
818
819
820 static void
821 fxt1_quantize_HI (GLuint *cc,
822 GLubyte input[N_TEXELS][MAX_COMP],
823 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
824 {
825 const GLint n_vect = 6; /* highest vector number */
826 const GLint n_comp = 3; /* 3 components: R, G, B */
827 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
828 GLfloat iv[MAX_COMP]; /* interpolation vector */
829 GLint i, k;
830 GLuint hihi; /* high quadword: hi dword */
831
832 GLint minSum = 2000; /* big enough */
833 GLint maxSum = -1; /* small enough */
834 GLint minCol = 0; /* phoudoin: silent compiler! */
835 GLint maxCol = 0; /* phoudoin: silent compiler! */
836
837 /* Our solution here is to find the darkest and brightest colors in
838 * the 8x4 tile and use those as the two representative colors.
839 * There are probably better algorithms to use (histogram-based).
840 */
841 for (k = 0; k < n; k++) {
842 GLint sum = 0;
843 for (i = 0; i < n_comp; i++) {
844 sum += reord[k][i];
845 }
846 if (minSum > sum) {
847 minSum = sum;
848 minCol = k;
849 }
850 if (maxSum < sum) {
851 maxSum = sum;
852 maxCol = k;
853 }
854 }
855
856 hihi = 0; /* cc-hi = "00" */
857 for (i = 0; i < n_comp; i++) {
858 /* add in colors */
859 hihi <<= 5;
860 hihi |= reord[maxCol][i] >> 3;
861 }
862 for (i = 0; i < n_comp; i++) {
863 /* add in colors */
864 hihi <<= 5;
865 hihi |= reord[minCol][i] >> 3;
866 }
867 cc[3] = hihi;
868 cc[0] = cc[1] = cc[2] = 0;
869
870 /* compute interpolation vector */
871 if (minCol != maxCol) {
872 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
873 }
874
875 /* add in texels */
876 for (k = N_TEXELS - 1; k >= 0; k--) {
877 GLint t = k * 3;
878 GLuint *kk = (GLuint *)((char *)cc + t / 8);
879 GLint texel = n_vect + 1; /* transparent black */
880
881 if (!ISTBLACK(input[k])) {
882 if (minCol != maxCol) {
883 /* interpolate color */
884 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
885 /* add in texel */
886 kk[0] |= texel << (t & 7);
887 }
888 } else {
889 /* add in texel */
890 kk[0] |= texel << (t & 7);
891 }
892 }
893 }
894
895
896 static void
897 fxt1_quantize_MIXED1 (GLuint *cc,
898 GLubyte input[N_TEXELS][MAX_COMP])
899 {
900 const GLint n_vect = 2; /* highest vector number in each microtile */
901 const GLint n_comp = 3; /* 3 components: R, G, B */
902 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
903 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
904 GLint i, j, k;
905 Fx64 hi; /* high quadword */
906 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
907
908 GLint minSum;
909 GLint maxSum;
910 GLint minColL = 0, maxColL = -1;
911 GLint minColR = 0, maxColR = -1;
912
913 /* Our solution here is to find the darkest and brightest colors in
914 * the 4x4 tile and use those as the two representative colors.
915 * There are probably better algorithms to use (histogram-based).
916 */
917 minSum = 2000; /* big enough */
918 maxSum = -1; /* small enough */
919 for (k = 0; k < N_TEXELS / 2; k++) {
920 if (!ISTBLACK(input[k])) {
921 GLint sum = 0;
922 for (i = 0; i < n_comp; i++) {
923 sum += input[k][i];
924 }
925 if (minSum > sum) {
926 minSum = sum;
927 minColL = k;
928 }
929 if (maxSum < sum) {
930 maxSum = sum;
931 maxColL = k;
932 }
933 }
934 }
935 minSum = 2000; /* big enough */
936 maxSum = -1; /* small enough */
937 for (; k < N_TEXELS; k++) {
938 if (!ISTBLACK(input[k])) {
939 GLint sum = 0;
940 for (i = 0; i < n_comp; i++) {
941 sum += input[k][i];
942 }
943 if (minSum > sum) {
944 minSum = sum;
945 minColR = k;
946 }
947 if (maxSum < sum) {
948 maxSum = sum;
949 maxColR = k;
950 }
951 }
952 }
953
954 /* left microtile */
955 if (maxColL == -1) {
956 /* all transparent black */
957 cc[0] = ~0u;
958 for (i = 0; i < n_comp; i++) {
959 vec[0][i] = 0;
960 vec[1][i] = 0;
961 }
962 } else {
963 cc[0] = 0;
964 for (i = 0; i < n_comp; i++) {
965 vec[0][i] = input[minColL][i];
966 vec[1][i] = input[maxColL][i];
967 }
968 if (minColL != maxColL) {
969 /* compute interpolation vector */
970 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
971
972 /* add in texels */
973 lolo = 0;
974 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
975 GLint texel = n_vect + 1; /* transparent black */
976 if (!ISTBLACK(input[k])) {
977 /* interpolate color */
978 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
979 }
980 /* add in texel */
981 lolo <<= 2;
982 lolo |= texel;
983 }
984 cc[0] = lolo;
985 }
986 }
987
988 /* right microtile */
989 if (maxColR == -1) {
990 /* all transparent black */
991 cc[1] = ~0u;
992 for (i = 0; i < n_comp; i++) {
993 vec[2][i] = 0;
994 vec[3][i] = 0;
995 }
996 } else {
997 cc[1] = 0;
998 for (i = 0; i < n_comp; i++) {
999 vec[2][i] = input[minColR][i];
1000 vec[3][i] = input[maxColR][i];
1001 }
1002 if (minColR != maxColR) {
1003 /* compute interpolation vector */
1004 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1005
1006 /* add in texels */
1007 lohi = 0;
1008 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1009 GLint texel = n_vect + 1; /* transparent black */
1010 if (!ISTBLACK(input[k])) {
1011 /* interpolate color */
1012 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1013 }
1014 /* add in texel */
1015 lohi <<= 2;
1016 lohi |= texel;
1017 }
1018 cc[1] = lohi;
1019 }
1020 }
1021
1022 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1023 for (j = 2 * 2 - 1; j >= 0; j--) {
1024 for (i = 0; i < n_comp; i++) {
1025 /* add in colors */
1026 FX64_SHL(hi, 5);
1027 FX64_OR32(hi, vec[j][i] >> 3);
1028 }
1029 }
1030 ((Fx64 *)cc)[1] = hi;
1031 }
1032
1033
1034 static void
1035 fxt1_quantize_MIXED0 (GLuint *cc,
1036 GLubyte input[N_TEXELS][MAX_COMP])
1037 {
1038 const GLint n_vect = 3; /* highest vector number in each microtile */
1039 const GLint n_comp = 3; /* 3 components: R, G, B */
1040 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1041 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1042 GLint i, j, k;
1043 Fx64 hi; /* high quadword */
1044 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1045
1046 GLint minColL = 0, maxColL = 0;
1047 GLint minColR = 0, maxColR = 0;
1048 #if 0
1049 GLint minSum;
1050 GLint maxSum;
1051
1052 /* Our solution here is to find the darkest and brightest colors in
1053 * the 4x4 tile and use those as the two representative colors.
1054 * There are probably better algorithms to use (histogram-based).
1055 */
1056 minSum = 2000; /* big enough */
1057 maxSum = -1; /* small enough */
1058 for (k = 0; k < N_TEXELS / 2; k++) {
1059 GLint sum = 0;
1060 for (i = 0; i < n_comp; i++) {
1061 sum += input[k][i];
1062 }
1063 if (minSum > sum) {
1064 minSum = sum;
1065 minColL = k;
1066 }
1067 if (maxSum < sum) {
1068 maxSum = sum;
1069 maxColL = k;
1070 }
1071 }
1072 minSum = 2000; /* big enough */
1073 maxSum = -1; /* small enough */
1074 for (; k < N_TEXELS; k++) {
1075 GLint sum = 0;
1076 for (i = 0; i < n_comp; i++) {
1077 sum += input[k][i];
1078 }
1079 if (minSum > sum) {
1080 minSum = sum;
1081 minColR = k;
1082 }
1083 if (maxSum < sum) {
1084 maxSum = sum;
1085 maxColR = k;
1086 }
1087 }
1088 #else
1089 GLint minVal;
1090 GLint maxVal;
1091 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1092 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1093
1094 /* Scan the channel with max variance for lo & hi
1095 * and use those as the two representative colors.
1096 */
1097 minVal = 2000; /* big enough */
1098 maxVal = -1; /* small enough */
1099 for (k = 0; k < N_TEXELS / 2; k++) {
1100 GLint t = input[k][maxVarL];
1101 if (minVal > t) {
1102 minVal = t;
1103 minColL = k;
1104 }
1105 if (maxVal < t) {
1106 maxVal = t;
1107 maxColL = k;
1108 }
1109 }
1110 minVal = 2000; /* big enough */
1111 maxVal = -1; /* small enough */
1112 for (; k < N_TEXELS; k++) {
1113 GLint t = input[k][maxVarR];
1114 if (minVal > t) {
1115 minVal = t;
1116 minColR = k;
1117 }
1118 if (maxVal < t) {
1119 maxVal = t;
1120 maxColR = k;
1121 }
1122 }
1123 #endif
1124
1125 /* left microtile */
1126 cc[0] = 0;
1127 for (i = 0; i < n_comp; i++) {
1128 vec[0][i] = input[minColL][i];
1129 vec[1][i] = input[maxColL][i];
1130 }
1131 if (minColL != maxColL) {
1132 /* compute interpolation vector */
1133 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1134
1135 /* add in texels */
1136 lolo = 0;
1137 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1138 GLint texel;
1139 /* interpolate color */
1140 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1141 /* add in texel */
1142 lolo <<= 2;
1143 lolo |= texel;
1144 }
1145
1146 /* funky encoding for LSB of green */
1147 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1148 for (i = 0; i < n_comp; i++) {
1149 vec[1][i] = input[minColL][i];
1150 vec[0][i] = input[maxColL][i];
1151 }
1152 lolo = ~lolo;
1153 }
1154
1155 cc[0] = lolo;
1156 }
1157
1158 /* right microtile */
1159 cc[1] = 0;
1160 for (i = 0; i < n_comp; i++) {
1161 vec[2][i] = input[minColR][i];
1162 vec[3][i] = input[maxColR][i];
1163 }
1164 if (minColR != maxColR) {
1165 /* compute interpolation vector */
1166 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1167
1168 /* add in texels */
1169 lohi = 0;
1170 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1171 GLint texel;
1172 /* interpolate color */
1173 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1174 /* add in texel */
1175 lohi <<= 2;
1176 lohi |= texel;
1177 }
1178
1179 /* funky encoding for LSB of green */
1180 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1181 for (i = 0; i < n_comp; i++) {
1182 vec[3][i] = input[minColR][i];
1183 vec[2][i] = input[maxColR][i];
1184 }
1185 lohi = ~lohi;
1186 }
1187
1188 cc[1] = lohi;
1189 }
1190
1191 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1192 for (j = 2 * 2 - 1; j >= 0; j--) {
1193 for (i = 0; i < n_comp; i++) {
1194 /* add in colors */
1195 FX64_SHL(hi, 5);
1196 FX64_OR32(hi, vec[j][i] >> 3);
1197 }
1198 }
1199 ((Fx64 *)cc)[1] = hi;
1200 }
1201
1202
1203 static void
1204 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1205 {
1206 GLint trualpha;
1207 GLubyte reord[N_TEXELS][MAX_COMP];
1208
1209 GLubyte input[N_TEXELS][MAX_COMP];
1210 GLint i, k, l;
1211
1212 if (comps == 3) {
1213 /* make the whole block opaque */
1214 memset(input, -1, sizeof(input));
1215 }
1216
1217 /* 8 texels each line */
1218 for (l = 0; l < 4; l++) {
1219 for (k = 0; k < 4; k++) {
1220 for (i = 0; i < comps; i++) {
1221 input[k + l * 4][i] = *lines[l]++;
1222 }
1223 }
1224 for (; k < 8; k++) {
1225 for (i = 0; i < comps; i++) {
1226 input[k + l * 4 + 12][i] = *lines[l]++;
1227 }
1228 }
1229 }
1230
1231 /* block layout:
1232 * 00, 01, 02, 03, 08, 09, 0a, 0b
1233 * 10, 11, 12, 13, 18, 19, 1a, 1b
1234 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1235 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1236 */
1237
1238 /* [dBorca]
1239 * stupidity flows forth from this
1240 */
1241 l = N_TEXELS;
1242 trualpha = 0;
1243 if (comps == 4) {
1244 /* skip all transparent black texels */
1245 l = 0;
1246 for (k = 0; k < N_TEXELS; k++) {
1247 /* test all components against 0 */
1248 if (!ISTBLACK(input[k])) {
1249 /* texel is not transparent black */
1250 COPY_4UBV(reord[l], input[k]);
1251 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1252 /* non-opaque texel */
1253 trualpha = !0;
1254 }
1255 l++;
1256 }
1257 }
1258 }
1259
1260 #if 0
1261 if (trualpha) {
1262 fxt1_quantize_ALPHA0(cc, input, reord, l);
1263 } else if (l == 0) {
1264 cc[0] = cc[1] = cc[2] = -1;
1265 cc[3] = 0;
1266 } else if (l < N_TEXELS) {
1267 fxt1_quantize_HI(cc, input, reord, l);
1268 } else {
1269 fxt1_quantize_CHROMA(cc, input);
1270 }
1271 (void)fxt1_quantize_ALPHA1;
1272 (void)fxt1_quantize_MIXED1;
1273 (void)fxt1_quantize_MIXED0;
1274 #else
1275 if (trualpha) {
1276 fxt1_quantize_ALPHA1(cc, input);
1277 } else if (l == 0) {
1278 cc[0] = cc[1] = cc[2] = ~0u;
1279 cc[3] = 0;
1280 } else if (l < N_TEXELS) {
1281 fxt1_quantize_MIXED1(cc, input);
1282 } else {
1283 fxt1_quantize_MIXED0(cc, input);
1284 }
1285 (void)fxt1_quantize_ALPHA0;
1286 (void)fxt1_quantize_HI;
1287 (void)fxt1_quantize_CHROMA;
1288 #endif
1289 }
1290
1291
1292
1293 /**
1294 * Upscale an image by replication, not (typical) stretching.
1295 * We use this when the image width or height is less than a
1296 * certain size (4, 8) and we need to upscale an image.
1297 */
1298 static void
1299 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1300 GLsizei outWidth, GLsizei outHeight,
1301 GLint comps, const GLubyte *src, GLint srcRowStride,
1302 GLubyte *dest )
1303 {
1304 GLint i, j, k;
1305
1306 ASSERT(outWidth >= inWidth);
1307 ASSERT(outHeight >= inHeight);
1308 #if 0
1309 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1310 ASSERT((outWidth & 3) == 0);
1311 ASSERT((outHeight & 3) == 0);
1312 #endif
1313
1314 for (i = 0; i < outHeight; i++) {
1315 const GLint ii = i % inHeight;
1316 for (j = 0; j < outWidth; j++) {
1317 const GLint jj = j % inWidth;
1318 for (k = 0; k < comps; k++) {
1319 dest[(i * outWidth + j) * comps + k]
1320 = src[ii * srcRowStride + jj * comps + k];
1321 }
1322 }
1323 }
1324 }
1325
1326
1327 static void
1328 fxt1_encode (GLuint width, GLuint height, GLint comps,
1329 const void *source, GLint srcRowStride,
1330 void *dest, GLint destRowStride)
1331 {
1332 GLuint x, y;
1333 const GLubyte *data;
1334 GLuint *encoded = (GLuint *)dest;
1335 void *newSource = NULL;
1336
1337 assert(comps == 3 || comps == 4);
1338
1339 /* Replicate image if width is not M8 or height is not M4 */
1340 if ((width & 7) | (height & 3)) {
1341 GLint newWidth = (width + 7) & ~7;
1342 GLint newHeight = (height + 3) & ~3;
1343 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1344 if (!newSource) {
1345 GET_CURRENT_CONTEXT(ctx);
1346 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1347 goto cleanUp;
1348 }
1349 upscale_teximage2d(width, height, newWidth, newHeight,
1350 comps, (const GLubyte *) source,
1351 srcRowStride, (GLubyte *) newSource);
1352 source = newSource;
1353 width = newWidth;
1354 height = newHeight;
1355 srcRowStride = comps * newWidth;
1356 }
1357
1358 data = (const GLubyte *) source;
1359 destRowStride = (destRowStride - width * 2) / 4;
1360 for (y = 0; y < height; y += 4) {
1361 GLuint offs = 0 + (y + 0) * srcRowStride;
1362 for (x = 0; x < width; x += 8) {
1363 const GLubyte *lines[4];
1364 lines[0] = &data[offs];
1365 lines[1] = lines[0] + srcRowStride;
1366 lines[2] = lines[1] + srcRowStride;
1367 lines[3] = lines[2] + srcRowStride;
1368 offs += 8 * comps;
1369 fxt1_quantize(encoded, lines, comps);
1370 /* 128 bits per 8x4 block */
1371 encoded += 4;
1372 }
1373 encoded += destRowStride;
1374 }
1375
1376 cleanUp:
1377 if (newSource != NULL) {
1378 free(newSource);
1379 }
1380 }
1381
1382
1383 /***************************************************************************\
1384 * FXT1 decoder
1385 *
1386 * The decoder is based on GL_3DFX_texture_compression_FXT1
1387 * specification and serves as a concept for the encoder.
1388 \***************************************************************************/
1389
1390
1391 /* lookup table for scaling 5 bit colors up to 8 bits */
1392 static const GLubyte _rgb_scale_5[] = {
1393 0, 8, 16, 25, 33, 41, 49, 58,
1394 66, 74, 82, 90, 99, 107, 115, 123,
1395 132, 140, 148, 156, 165, 173, 181, 189,
1396 197, 206, 214, 222, 230, 239, 247, 255
1397 };
1398
1399 /* lookup table for scaling 6 bit colors up to 8 bits */
1400 static const GLubyte _rgb_scale_6[] = {
1401 0, 4, 8, 12, 16, 20, 24, 28,
1402 32, 36, 40, 45, 49, 53, 57, 61,
1403 65, 69, 73, 77, 81, 85, 89, 93,
1404 97, 101, 105, 109, 113, 117, 121, 125,
1405 130, 134, 138, 142, 146, 150, 154, 158,
1406 162, 166, 170, 174, 178, 182, 186, 190,
1407 194, 198, 202, 206, 210, 215, 219, 223,
1408 227, 231, 235, 239, 243, 247, 251, 255
1409 };
1410
1411
1412 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1413 #define UP5(c) _rgb_scale_5[(c) & 31]
1414 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1415 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1416
1417
1418 static void
1419 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1420 {
1421 const GLuint *cc;
1422
1423 t *= 3;
1424 cc = (const GLuint *)(code + t / 8);
1425 t = (cc[0] >> (t & 7)) & 7;
1426
1427 if (t == 7) {
1428 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1429 } else {
1430 GLubyte r, g, b;
1431 cc = (const GLuint *)(code + 12);
1432 if (t == 0) {
1433 b = UP5(CC_SEL(cc, 0));
1434 g = UP5(CC_SEL(cc, 5));
1435 r = UP5(CC_SEL(cc, 10));
1436 } else if (t == 6) {
1437 b = UP5(CC_SEL(cc, 15));
1438 g = UP5(CC_SEL(cc, 20));
1439 r = UP5(CC_SEL(cc, 25));
1440 } else {
1441 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1442 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1443 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1444 }
1445 rgba[RCOMP] = r;
1446 rgba[GCOMP] = g;
1447 rgba[BCOMP] = b;
1448 rgba[ACOMP] = 255;
1449 }
1450 }
1451
1452
1453 static void
1454 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1455 {
1456 const GLuint *cc;
1457 GLuint kk;
1458
1459 cc = (const GLuint *)code;
1460 if (t & 16) {
1461 cc++;
1462 t &= 15;
1463 }
1464 t = (cc[0] >> (t * 2)) & 3;
1465
1466 t *= 15;
1467 cc = (const GLuint *)(code + 8 + t / 8);
1468 kk = cc[0] >> (t & 7);
1469 rgba[BCOMP] = UP5(kk);
1470 rgba[GCOMP] = UP5(kk >> 5);
1471 rgba[RCOMP] = UP5(kk >> 10);
1472 rgba[ACOMP] = 255;
1473 }
1474
1475
1476 static void
1477 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1478 {
1479 const GLuint *cc;
1480 GLuint col[2][3];
1481 GLint glsb, selb;
1482
1483 cc = (const GLuint *)code;
1484 if (t & 16) {
1485 t &= 15;
1486 t = (cc[1] >> (t * 2)) & 3;
1487 /* col 2 */
1488 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1489 col[0][GCOMP] = CC_SEL(cc, 99);
1490 col[0][RCOMP] = CC_SEL(cc, 104);
1491 /* col 3 */
1492 col[1][BCOMP] = CC_SEL(cc, 109);
1493 col[1][GCOMP] = CC_SEL(cc, 114);
1494 col[1][RCOMP] = CC_SEL(cc, 119);
1495 glsb = CC_SEL(cc, 126);
1496 selb = CC_SEL(cc, 33);
1497 } else {
1498 t = (cc[0] >> (t * 2)) & 3;
1499 /* col 0 */
1500 col[0][BCOMP] = CC_SEL(cc, 64);
1501 col[0][GCOMP] = CC_SEL(cc, 69);
1502 col[0][RCOMP] = CC_SEL(cc, 74);
1503 /* col 1 */
1504 col[1][BCOMP] = CC_SEL(cc, 79);
1505 col[1][GCOMP] = CC_SEL(cc, 84);
1506 col[1][RCOMP] = CC_SEL(cc, 89);
1507 glsb = CC_SEL(cc, 125);
1508 selb = CC_SEL(cc, 1);
1509 }
1510
1511 if (CC_SEL(cc, 124) & 1) {
1512 /* alpha[0] == 1 */
1513
1514 if (t == 3) {
1515 /* zero */
1516 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1517 } else {
1518 GLubyte r, g, b;
1519 if (t == 0) {
1520 b = UP5(col[0][BCOMP]);
1521 g = UP5(col[0][GCOMP]);
1522 r = UP5(col[0][RCOMP]);
1523 } else if (t == 2) {
1524 b = UP5(col[1][BCOMP]);
1525 g = UP6(col[1][GCOMP], glsb);
1526 r = UP5(col[1][RCOMP]);
1527 } else {
1528 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1529 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1530 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1531 }
1532 rgba[RCOMP] = r;
1533 rgba[GCOMP] = g;
1534 rgba[BCOMP] = b;
1535 rgba[ACOMP] = 255;
1536 }
1537 } else {
1538 /* alpha[0] == 0 */
1539 GLubyte r, g, b;
1540 if (t == 0) {
1541 b = UP5(col[0][BCOMP]);
1542 g = UP6(col[0][GCOMP], glsb ^ selb);
1543 r = UP5(col[0][RCOMP]);
1544 } else if (t == 3) {
1545 b = UP5(col[1][BCOMP]);
1546 g = UP6(col[1][GCOMP], glsb);
1547 r = UP5(col[1][RCOMP]);
1548 } else {
1549 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1550 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1551 UP6(col[1][GCOMP], glsb));
1552 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1553 }
1554 rgba[RCOMP] = r;
1555 rgba[GCOMP] = g;
1556 rgba[BCOMP] = b;
1557 rgba[ACOMP] = 255;
1558 }
1559 }
1560
1561
1562 static void
1563 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1564 {
1565 const GLuint *cc;
1566 GLubyte r, g, b, a;
1567
1568 cc = (const GLuint *)code;
1569 if (CC_SEL(cc, 124) & 1) {
1570 /* lerp == 1 */
1571 GLuint col0[4];
1572
1573 if (t & 16) {
1574 t &= 15;
1575 t = (cc[1] >> (t * 2)) & 3;
1576 /* col 2 */
1577 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1578 col0[GCOMP] = CC_SEL(cc, 99);
1579 col0[RCOMP] = CC_SEL(cc, 104);
1580 col0[ACOMP] = CC_SEL(cc, 119);
1581 } else {
1582 t = (cc[0] >> (t * 2)) & 3;
1583 /* col 0 */
1584 col0[BCOMP] = CC_SEL(cc, 64);
1585 col0[GCOMP] = CC_SEL(cc, 69);
1586 col0[RCOMP] = CC_SEL(cc, 74);
1587 col0[ACOMP] = CC_SEL(cc, 109);
1588 }
1589
1590 if (t == 0) {
1591 b = UP5(col0[BCOMP]);
1592 g = UP5(col0[GCOMP]);
1593 r = UP5(col0[RCOMP]);
1594 a = UP5(col0[ACOMP]);
1595 } else if (t == 3) {
1596 b = UP5(CC_SEL(cc, 79));
1597 g = UP5(CC_SEL(cc, 84));
1598 r = UP5(CC_SEL(cc, 89));
1599 a = UP5(CC_SEL(cc, 114));
1600 } else {
1601 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1602 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1603 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1604 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1605 }
1606 } else {
1607 /* lerp == 0 */
1608
1609 if (t & 16) {
1610 cc++;
1611 t &= 15;
1612 }
1613 t = (cc[0] >> (t * 2)) & 3;
1614
1615 if (t == 3) {
1616 /* zero */
1617 r = g = b = a = 0;
1618 } else {
1619 GLuint kk;
1620 cc = (const GLuint *)code;
1621 a = UP5(cc[3] >> (t * 5 + 13));
1622 t *= 15;
1623 cc = (const GLuint *)(code + 8 + t / 8);
1624 kk = cc[0] >> (t & 7);
1625 b = UP5(kk);
1626 g = UP5(kk >> 5);
1627 r = UP5(kk >> 10);
1628 }
1629 }
1630 rgba[RCOMP] = r;
1631 rgba[GCOMP] = g;
1632 rgba[BCOMP] = b;
1633 rgba[ACOMP] = a;
1634 }
1635
1636
1637 void
1638 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1639 GLint i, GLint j, GLubyte *rgba)
1640 {
1641 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1642 fxt1_decode_1HI, /* cc-high = "00?" */
1643 fxt1_decode_1HI, /* cc-high = "00?" */
1644 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1645 fxt1_decode_1ALPHA, /* alpha = "011" */
1646 fxt1_decode_1MIXED, /* mixed = "1??" */
1647 fxt1_decode_1MIXED, /* mixed = "1??" */
1648 fxt1_decode_1MIXED, /* mixed = "1??" */
1649 fxt1_decode_1MIXED /* mixed = "1??" */
1650 };
1651
1652 const GLubyte *code = (const GLubyte *)texture +
1653 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1654 GLint mode = CC_SEL(code, 125);
1655 GLint t = i & 7;
1656
1657 if (t & 4) {
1658 t += 12;
1659 }
1660 t += (j & 3) * 4;
1661
1662 decode_1[mode](code, t, rgba);
1663 }
1664
1665
1666 #endif /* FEATURE_texture_fxt1 */