mesa/colormac: introduce inline helper for 4 unclamped float to ubyte.
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42
43
44 #if FEATURE_texture_fxt1
45
46
47 static void
48 fxt1_encode (GLuint width, GLuint height, GLint comps,
49 const void *source, GLint srcRowStride,
50 void *dest, GLint destRowStride);
51
52 void
53 fxt1_decode_1 (const void *texture, GLint stride,
54 GLint i, GLint j, GLchan *rgba);
55
56
57 /**
58 * Store user's image in rgb_fxt1 format.
59 */
60 GLboolean
61 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
62 {
63 const GLchan *pixels;
64 GLint srcRowStride;
65 GLubyte *dst;
66 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
67 const GLchan *tempImage = NULL;
68
69 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70 ASSERT(dstXoffset % 8 == 0);
71 ASSERT(dstYoffset % 4 == 0);
72 ASSERT(dstZoffset == 0);
73 (void) dstZoffset;
74 (void) dstImageOffsets;
75
76 if (srcFormat != GL_RGB ||
77 srcType != CHAN_TYPE ||
78 ctx->_ImageTransferState ||
79 srcPacking->SwapBytes) {
80 /* convert image to RGB/GLchan */
81 tempImage = _mesa_make_temp_chan_image(ctx, dims,
82 baseInternalFormat,
83 _mesa_get_format_base_format(dstFormat),
84 srcWidth, srcHeight, srcDepth,
85 srcFormat, srcType, srcAddr,
86 srcPacking);
87 if (!tempImage)
88 return GL_FALSE; /* out of memory */
89 pixels = tempImage;
90 srcRowStride = 3 * srcWidth;
91 srcFormat = GL_RGB;
92 }
93 else {
94 pixels = (const GLchan *) srcAddr;
95 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
96 srcType) / sizeof(GLchan);
97 }
98
99 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
100 dstFormat,
101 texWidth, (GLubyte *) dstAddr);
102
103 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
104 dst, dstRowStride);
105
106 if (tempImage)
107 free((void*) tempImage);
108
109 return GL_TRUE;
110 }
111
112
113 /**
114 * Store user's image in rgba_fxt1 format.
115 */
116 GLboolean
117 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
118 {
119 const GLchan *pixels;
120 GLint srcRowStride;
121 GLubyte *dst;
122 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
123 const GLchan *tempImage = NULL;
124
125 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
126 ASSERT(dstXoffset % 8 == 0);
127 ASSERT(dstYoffset % 4 == 0);
128 ASSERT(dstZoffset == 0);
129 (void) dstZoffset;
130 (void) dstImageOffsets;
131
132 if (srcFormat != GL_RGBA ||
133 srcType != CHAN_TYPE ||
134 ctx->_ImageTransferState ||
135 srcPacking->SwapBytes) {
136 /* convert image to RGBA/GLchan */
137 tempImage = _mesa_make_temp_chan_image(ctx, dims,
138 baseInternalFormat,
139 _mesa_get_format_base_format(dstFormat),
140 srcWidth, srcHeight, srcDepth,
141 srcFormat, srcType, srcAddr,
142 srcPacking);
143 if (!tempImage)
144 return GL_FALSE; /* out of memory */
145 pixels = tempImage;
146 srcRowStride = 4 * srcWidth;
147 srcFormat = GL_RGBA;
148 }
149 else {
150 pixels = (const GLchan *) srcAddr;
151 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
152 srcType) / sizeof(GLchan);
153 }
154
155 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
156 dstFormat,
157 texWidth, (GLubyte *) dstAddr);
158
159 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
160 dst, dstRowStride);
161
162 if (tempImage)
163 free((void*) tempImage);
164
165 return GL_TRUE;
166 }
167
168
169 void
170 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
171 GLint i, GLint j, GLint k, GLfloat *texel )
172 {
173 /* just sample as GLchan and convert to float here */
174 GLchan rgba[4];
175 (void) k;
176 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
177 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
178 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
179 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
180 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
181 }
182
183
184 void
185 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
186 GLint i, GLint j, GLint k, GLfloat *texel )
187 {
188 /* just sample as GLchan and convert to float here */
189 GLchan rgba[4];
190 (void) k;
191 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
192 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
193 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
194 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
195 texel[ACOMP] = 1.0F;
196 }
197
198
199
200 /***************************************************************************\
201 * FXT1 encoder
202 *
203 * The encoder was built by reversing the decoder,
204 * and is vaguely based on Texus2 by 3dfx. Note that this code
205 * is merely a proof of concept, since it is highly UNoptimized;
206 * moreover, it is sub-optimal due to initial conditions passed
207 * to Lloyd's algorithm (the interpolation modes are even worse).
208 \***************************************************************************/
209
210
211 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
212 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
213 #define N_TEXELS 32 /* number of texels in a block (always 32) */
214 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
215 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
216 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
217 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
218 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
219
220
221 /*
222 * Define a 64-bit unsigned integer type and macros
223 */
224 #if 1
225
226 #define FX64_NATIVE 1
227
228 typedef uint64_t Fx64;
229
230 #define FX64_MOV32(a, b) a = b
231 #define FX64_OR32(a, b) a |= b
232 #define FX64_SHL(a, c) a <<= c
233
234 #else
235
236 #define FX64_NATIVE 0
237
238 typedef struct {
239 GLuint lo, hi;
240 } Fx64;
241
242 #define FX64_MOV32(a, b) a.lo = b
243 #define FX64_OR32(a, b) a.lo |= b
244
245 #define FX64_SHL(a, c) \
246 do { \
247 if ((c) >= 32) { \
248 a.hi = a.lo << ((c) - 32); \
249 a.lo = 0; \
250 } else { \
251 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
252 a.lo <<= (c); \
253 } \
254 } while (0)
255
256 #endif
257
258
259 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
260 #define SAFECDOT 1 /* for paranoids */
261
262 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
263 do { \
264 /* compute interpolation vector */ \
265 GLfloat d2 = 0.0F; \
266 GLfloat rd2; \
267 \
268 for (i = 0; i < NC; i++) { \
269 IV[i] = (V1[i] - V0[i]) * F(i); \
270 d2 += IV[i] * IV[i]; \
271 } \
272 rd2 = (GLfloat)NV / d2; \
273 B = 0; \
274 for (i = 0; i < NC; i++) { \
275 IV[i] *= F(i); \
276 B -= IV[i] * V0[i]; \
277 IV[i] *= rd2; \
278 } \
279 B = B * rd2 + 0.5f; \
280 } while (0)
281
282 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
283 do { \
284 GLfloat dot = 0.0F; \
285 for (i = 0; i < NC; i++) { \
286 dot += V[i] * IV[i]; \
287 } \
288 TEXEL = (GLint)(dot + B); \
289 if (SAFECDOT) { \
290 if (TEXEL < 0) { \
291 TEXEL = 0; \
292 } else if (TEXEL > NV) { \
293 TEXEL = NV; \
294 } \
295 } \
296 } while (0)
297
298
299 static GLint
300 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
301 GLubyte input[MAX_COMP], GLint nc)
302 {
303 GLint i, j, best = -1;
304 GLfloat err = 1e9; /* big enough */
305
306 for (j = 0; j < nv; j++) {
307 GLfloat e = 0.0F;
308 for (i = 0; i < nc; i++) {
309 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
310 }
311 if (e < err) {
312 err = e;
313 best = j;
314 }
315 }
316
317 return best;
318 }
319
320
321 static GLint
322 fxt1_worst (GLfloat vec[MAX_COMP],
323 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
324 {
325 GLint i, k, worst = -1;
326 GLfloat err = -1.0F; /* small enough */
327
328 for (k = 0; k < n; k++) {
329 GLfloat e = 0.0F;
330 for (i = 0; i < nc; i++) {
331 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
332 }
333 if (e > err) {
334 err = e;
335 worst = k;
336 }
337 }
338
339 return worst;
340 }
341
342
343 static GLint
344 fxt1_variance (GLdouble variance[MAX_COMP],
345 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
346 {
347 GLint i, k, best = 0;
348 GLint sx, sx2;
349 GLdouble var, maxvar = -1; /* small enough */
350 GLdouble teenth = 1.0 / n;
351
352 for (i = 0; i < nc; i++) {
353 sx = sx2 = 0;
354 for (k = 0; k < n; k++) {
355 GLint t = input[k][i];
356 sx += t;
357 sx2 += t * t;
358 }
359 var = sx2 * teenth - sx * sx * teenth * teenth;
360 if (maxvar < var) {
361 maxvar = var;
362 best = i;
363 }
364 if (variance) {
365 variance[i] = var;
366 }
367 }
368
369 return best;
370 }
371
372
373 static GLint
374 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
375 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
376 {
377 #if 0
378 /* Choose colors from a grid.
379 */
380 GLint i, j;
381
382 for (j = 0; j < nv; j++) {
383 GLint m = j * (n - 1) / (nv - 1);
384 for (i = 0; i < nc; i++) {
385 vec[j][i] = input[m][i];
386 }
387 }
388 #else
389 /* Our solution here is to find the darkest and brightest colors in
390 * the 8x4 tile and use those as the two representative colors.
391 * There are probably better algorithms to use (histogram-based).
392 */
393 GLint i, j, k;
394 GLint minSum = 2000; /* big enough */
395 GLint maxSum = -1; /* small enough */
396 GLint minCol = 0; /* phoudoin: silent compiler! */
397 GLint maxCol = 0; /* phoudoin: silent compiler! */
398
399 struct {
400 GLint flag;
401 GLint key;
402 GLint freq;
403 GLint idx;
404 } hist[N_TEXELS];
405 GLint lenh = 0;
406
407 memset(hist, 0, sizeof(hist));
408
409 for (k = 0; k < n; k++) {
410 GLint l;
411 GLint key = 0;
412 GLint sum = 0;
413 for (i = 0; i < nc; i++) {
414 key <<= 8;
415 key |= input[k][i];
416 sum += input[k][i];
417 }
418 for (l = 0; l < n; l++) {
419 if (!hist[l].flag) {
420 /* alloc new slot */
421 hist[l].flag = !0;
422 hist[l].key = key;
423 hist[l].freq = 1;
424 hist[l].idx = k;
425 lenh = l + 1;
426 break;
427 } else if (hist[l].key == key) {
428 hist[l].freq++;
429 break;
430 }
431 }
432 if (minSum > sum) {
433 minSum = sum;
434 minCol = k;
435 }
436 if (maxSum < sum) {
437 maxSum = sum;
438 maxCol = k;
439 }
440 }
441
442 if (lenh <= nv) {
443 for (j = 0; j < lenh; j++) {
444 for (i = 0; i < nc; i++) {
445 vec[j][i] = (GLfloat)input[hist[j].idx][i];
446 }
447 }
448 for (; j < nv; j++) {
449 for (i = 0; i < nc; i++) {
450 vec[j][i] = vec[0][i];
451 }
452 }
453 return 0;
454 }
455
456 for (j = 0; j < nv; j++) {
457 for (i = 0; i < nc; i++) {
458 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
459 }
460 }
461 #endif
462
463 return !0;
464 }
465
466
467 static GLint
468 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
469 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
470 {
471 /* Use the generalized lloyd's algorithm for VQ:
472 * find 4 color vectors.
473 *
474 * for each sample color
475 * sort to nearest vector.
476 *
477 * replace each vector with the centroid of its matching colors.
478 *
479 * repeat until RMS doesn't improve.
480 *
481 * if a color vector has no samples, or becomes the same as another
482 * vector, replace it with the color which is farthest from a sample.
483 *
484 * vec[][MAX_COMP] initial vectors and resulting colors
485 * nv number of resulting colors required
486 * input[N_TEXELS][MAX_COMP] input texels
487 * nc number of components in input / vec
488 * n number of input samples
489 */
490
491 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
492 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
493 GLfloat error, lasterror = 1e9;
494
495 GLint i, j, k, rep;
496
497 /* the quantizer */
498 for (rep = 0; rep < LL_N_REP; rep++) {
499 /* reset sums & counters */
500 for (j = 0; j < nv; j++) {
501 for (i = 0; i < nc; i++) {
502 sum[j][i] = 0;
503 }
504 cnt[j] = 0;
505 }
506 error = 0;
507
508 /* scan whole block */
509 for (k = 0; k < n; k++) {
510 #if 1
511 GLint best = -1;
512 GLfloat err = 1e9; /* big enough */
513 /* determine best vector */
514 for (j = 0; j < nv; j++) {
515 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
516 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
517 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
518 if (nc == 4) {
519 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
520 }
521 if (e < err) {
522 err = e;
523 best = j;
524 }
525 }
526 #else
527 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
528 #endif
529 assert(best >= 0);
530 /* add in closest color */
531 for (i = 0; i < nc; i++) {
532 sum[best][i] += input[k][i];
533 }
534 /* mark this vector as used */
535 cnt[best]++;
536 /* accumulate error */
537 error += err;
538 }
539
540 /* check RMS */
541 if ((error < LL_RMS_E) ||
542 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
543 return !0; /* good match */
544 }
545 lasterror = error;
546
547 /* move each vector to the barycenter of its closest colors */
548 for (j = 0; j < nv; j++) {
549 if (cnt[j]) {
550 GLfloat div = 1.0F / cnt[j];
551 for (i = 0; i < nc; i++) {
552 vec[j][i] = div * sum[j][i];
553 }
554 } else {
555 /* this vec has no samples or is identical with a previous vec */
556 GLint worst = fxt1_worst(vec[j], input, nc, n);
557 for (i = 0; i < nc; i++) {
558 vec[j][i] = input[worst][i];
559 }
560 }
561 }
562 }
563
564 return 0; /* could not converge fast enough */
565 }
566
567
568 static void
569 fxt1_quantize_CHROMA (GLuint *cc,
570 GLubyte input[N_TEXELS][MAX_COMP])
571 {
572 const GLint n_vect = 4; /* 4 base vectors to find */
573 const GLint n_comp = 3; /* 3 components: R, G, B */
574 GLfloat vec[MAX_VECT][MAX_COMP];
575 GLint i, j, k;
576 Fx64 hi; /* high quadword */
577 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
578
579 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
580 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
581 }
582
583 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
584 for (j = n_vect - 1; j >= 0; j--) {
585 for (i = 0; i < n_comp; i++) {
586 /* add in colors */
587 FX64_SHL(hi, 5);
588 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
589 }
590 }
591 ((Fx64 *)cc)[1] = hi;
592
593 lohi = lolo = 0;
594 /* right microtile */
595 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
596 lohi <<= 2;
597 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
598 }
599 /* left microtile */
600 for (; k >= 0; k--) {
601 lolo <<= 2;
602 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
603 }
604 cc[1] = lohi;
605 cc[0] = lolo;
606 }
607
608
609 static void
610 fxt1_quantize_ALPHA0 (GLuint *cc,
611 GLubyte input[N_TEXELS][MAX_COMP],
612 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
613 {
614 const GLint n_vect = 3; /* 3 base vectors to find */
615 const GLint n_comp = 4; /* 4 components: R, G, B, A */
616 GLfloat vec[MAX_VECT][MAX_COMP];
617 GLint i, j, k;
618 Fx64 hi; /* high quadword */
619 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
620
621 /* the last vector indicates zero */
622 for (i = 0; i < n_comp; i++) {
623 vec[n_vect][i] = 0;
624 }
625
626 /* the first n texels in reord are guaranteed to be non-zero */
627 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
628 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
629 }
630
631 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
632 for (j = n_vect - 1; j >= 0; j--) {
633 /* add in alphas */
634 FX64_SHL(hi, 5);
635 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
636 }
637 for (j = n_vect - 1; j >= 0; j--) {
638 for (i = 0; i < n_comp - 1; i++) {
639 /* add in colors */
640 FX64_SHL(hi, 5);
641 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
642 }
643 }
644 ((Fx64 *)cc)[1] = hi;
645
646 lohi = lolo = 0;
647 /* right microtile */
648 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
649 lohi <<= 2;
650 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
651 }
652 /* left microtile */
653 for (; k >= 0; k--) {
654 lolo <<= 2;
655 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
656 }
657 cc[1] = lohi;
658 cc[0] = lolo;
659 }
660
661
662 static void
663 fxt1_quantize_ALPHA1 (GLuint *cc,
664 GLubyte input[N_TEXELS][MAX_COMP])
665 {
666 const GLint n_vect = 3; /* highest vector number in each microtile */
667 const GLint n_comp = 4; /* 4 components: R, G, B, A */
668 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
669 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
670 GLint i, j, k;
671 Fx64 hi; /* high quadword */
672 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
673
674 GLint minSum;
675 GLint maxSum;
676 GLint minColL = 0, maxColL = 0;
677 GLint minColR = 0, maxColR = 0;
678 GLint sumL = 0, sumR = 0;
679 GLint nn_comp;
680 /* Our solution here is to find the darkest and brightest colors in
681 * the 4x4 tile and use those as the two representative colors.
682 * There are probably better algorithms to use (histogram-based).
683 */
684 nn_comp = n_comp;
685 while ((minColL == maxColL) && nn_comp) {
686 minSum = 2000; /* big enough */
687 maxSum = -1; /* small enough */
688 for (k = 0; k < N_TEXELS / 2; k++) {
689 GLint sum = 0;
690 for (i = 0; i < nn_comp; i++) {
691 sum += input[k][i];
692 }
693 if (minSum > sum) {
694 minSum = sum;
695 minColL = k;
696 }
697 if (maxSum < sum) {
698 maxSum = sum;
699 maxColL = k;
700 }
701 sumL += sum;
702 }
703
704 nn_comp--;
705 }
706
707 nn_comp = n_comp;
708 while ((minColR == maxColR) && nn_comp) {
709 minSum = 2000; /* big enough */
710 maxSum = -1; /* small enough */
711 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
712 GLint sum = 0;
713 for (i = 0; i < nn_comp; i++) {
714 sum += input[k][i];
715 }
716 if (minSum > sum) {
717 minSum = sum;
718 minColR = k;
719 }
720 if (maxSum < sum) {
721 maxSum = sum;
722 maxColR = k;
723 }
724 sumR += sum;
725 }
726
727 nn_comp--;
728 }
729
730 /* choose the common vector (yuck!) */
731 {
732 GLint j1, j2;
733 GLint v1 = 0, v2 = 0;
734 GLfloat err = 1e9; /* big enough */
735 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
736 for (i = 0; i < n_comp; i++) {
737 tv[0][i] = input[minColL][i];
738 tv[1][i] = input[maxColL][i];
739 tv[2][i] = input[minColR][i];
740 tv[3][i] = input[maxColR][i];
741 }
742 for (j1 = 0; j1 < 2; j1++) {
743 for (j2 = 2; j2 < 4; j2++) {
744 GLfloat e = 0.0F;
745 for (i = 0; i < n_comp; i++) {
746 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
747 }
748 if (e < err) {
749 err = e;
750 v1 = j1;
751 v2 = j2;
752 }
753 }
754 }
755 for (i = 0; i < n_comp; i++) {
756 vec[0][i] = tv[1 - v1][i];
757 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
758 vec[2][i] = tv[5 - v2][i];
759 }
760 }
761
762 /* left microtile */
763 cc[0] = 0;
764 if (minColL != maxColL) {
765 /* compute interpolation vector */
766 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
767
768 /* add in texels */
769 lolo = 0;
770 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
771 GLint texel;
772 /* interpolate color */
773 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
774 /* add in texel */
775 lolo <<= 2;
776 lolo |= texel;
777 }
778
779 cc[0] = lolo;
780 }
781
782 /* right microtile */
783 cc[1] = 0;
784 if (minColR != maxColR) {
785 /* compute interpolation vector */
786 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
787
788 /* add in texels */
789 lohi = 0;
790 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
791 GLint texel;
792 /* interpolate color */
793 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
794 /* add in texel */
795 lohi <<= 2;
796 lohi |= texel;
797 }
798
799 cc[1] = lohi;
800 }
801
802 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
803 for (j = n_vect - 1; j >= 0; j--) {
804 /* add in alphas */
805 FX64_SHL(hi, 5);
806 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
807 }
808 for (j = n_vect - 1; j >= 0; j--) {
809 for (i = 0; i < n_comp - 1; i++) {
810 /* add in colors */
811 FX64_SHL(hi, 5);
812 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
813 }
814 }
815 ((Fx64 *)cc)[1] = hi;
816 }
817
818
819 static void
820 fxt1_quantize_HI (GLuint *cc,
821 GLubyte input[N_TEXELS][MAX_COMP],
822 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
823 {
824 const GLint n_vect = 6; /* highest vector number */
825 const GLint n_comp = 3; /* 3 components: R, G, B */
826 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
827 GLfloat iv[MAX_COMP]; /* interpolation vector */
828 GLint i, k;
829 GLuint hihi; /* high quadword: hi dword */
830
831 GLint minSum = 2000; /* big enough */
832 GLint maxSum = -1; /* small enough */
833 GLint minCol = 0; /* phoudoin: silent compiler! */
834 GLint maxCol = 0; /* phoudoin: silent compiler! */
835
836 /* Our solution here is to find the darkest and brightest colors in
837 * the 8x4 tile and use those as the two representative colors.
838 * There are probably better algorithms to use (histogram-based).
839 */
840 for (k = 0; k < n; k++) {
841 GLint sum = 0;
842 for (i = 0; i < n_comp; i++) {
843 sum += reord[k][i];
844 }
845 if (minSum > sum) {
846 minSum = sum;
847 minCol = k;
848 }
849 if (maxSum < sum) {
850 maxSum = sum;
851 maxCol = k;
852 }
853 }
854
855 hihi = 0; /* cc-hi = "00" */
856 for (i = 0; i < n_comp; i++) {
857 /* add in colors */
858 hihi <<= 5;
859 hihi |= reord[maxCol][i] >> 3;
860 }
861 for (i = 0; i < n_comp; i++) {
862 /* add in colors */
863 hihi <<= 5;
864 hihi |= reord[minCol][i] >> 3;
865 }
866 cc[3] = hihi;
867 cc[0] = cc[1] = cc[2] = 0;
868
869 /* compute interpolation vector */
870 if (minCol != maxCol) {
871 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
872 }
873
874 /* add in texels */
875 for (k = N_TEXELS - 1; k >= 0; k--) {
876 GLint t = k * 3;
877 GLuint *kk = (GLuint *)((char *)cc + t / 8);
878 GLint texel = n_vect + 1; /* transparent black */
879
880 if (!ISTBLACK(input[k])) {
881 if (minCol != maxCol) {
882 /* interpolate color */
883 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
884 /* add in texel */
885 kk[0] |= texel << (t & 7);
886 }
887 } else {
888 /* add in texel */
889 kk[0] |= texel << (t & 7);
890 }
891 }
892 }
893
894
895 static void
896 fxt1_quantize_MIXED1 (GLuint *cc,
897 GLubyte input[N_TEXELS][MAX_COMP])
898 {
899 const GLint n_vect = 2; /* highest vector number in each microtile */
900 const GLint n_comp = 3; /* 3 components: R, G, B */
901 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
902 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
903 GLint i, j, k;
904 Fx64 hi; /* high quadword */
905 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
906
907 GLint minSum;
908 GLint maxSum;
909 GLint minColL = 0, maxColL = -1;
910 GLint minColR = 0, maxColR = -1;
911
912 /* Our solution here is to find the darkest and brightest colors in
913 * the 4x4 tile and use those as the two representative colors.
914 * There are probably better algorithms to use (histogram-based).
915 */
916 minSum = 2000; /* big enough */
917 maxSum = -1; /* small enough */
918 for (k = 0; k < N_TEXELS / 2; k++) {
919 if (!ISTBLACK(input[k])) {
920 GLint sum = 0;
921 for (i = 0; i < n_comp; i++) {
922 sum += input[k][i];
923 }
924 if (minSum > sum) {
925 minSum = sum;
926 minColL = k;
927 }
928 if (maxSum < sum) {
929 maxSum = sum;
930 maxColL = k;
931 }
932 }
933 }
934 minSum = 2000; /* big enough */
935 maxSum = -1; /* small enough */
936 for (; k < N_TEXELS; k++) {
937 if (!ISTBLACK(input[k])) {
938 GLint sum = 0;
939 for (i = 0; i < n_comp; i++) {
940 sum += input[k][i];
941 }
942 if (minSum > sum) {
943 minSum = sum;
944 minColR = k;
945 }
946 if (maxSum < sum) {
947 maxSum = sum;
948 maxColR = k;
949 }
950 }
951 }
952
953 /* left microtile */
954 if (maxColL == -1) {
955 /* all transparent black */
956 cc[0] = ~0u;
957 for (i = 0; i < n_comp; i++) {
958 vec[0][i] = 0;
959 vec[1][i] = 0;
960 }
961 } else {
962 cc[0] = 0;
963 for (i = 0; i < n_comp; i++) {
964 vec[0][i] = input[minColL][i];
965 vec[1][i] = input[maxColL][i];
966 }
967 if (minColL != maxColL) {
968 /* compute interpolation vector */
969 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
970
971 /* add in texels */
972 lolo = 0;
973 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
974 GLint texel = n_vect + 1; /* transparent black */
975 if (!ISTBLACK(input[k])) {
976 /* interpolate color */
977 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
978 }
979 /* add in texel */
980 lolo <<= 2;
981 lolo |= texel;
982 }
983 cc[0] = lolo;
984 }
985 }
986
987 /* right microtile */
988 if (maxColR == -1) {
989 /* all transparent black */
990 cc[1] = ~0u;
991 for (i = 0; i < n_comp; i++) {
992 vec[2][i] = 0;
993 vec[3][i] = 0;
994 }
995 } else {
996 cc[1] = 0;
997 for (i = 0; i < n_comp; i++) {
998 vec[2][i] = input[minColR][i];
999 vec[3][i] = input[maxColR][i];
1000 }
1001 if (minColR != maxColR) {
1002 /* compute interpolation vector */
1003 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1004
1005 /* add in texels */
1006 lohi = 0;
1007 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1008 GLint texel = n_vect + 1; /* transparent black */
1009 if (!ISTBLACK(input[k])) {
1010 /* interpolate color */
1011 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1012 }
1013 /* add in texel */
1014 lohi <<= 2;
1015 lohi |= texel;
1016 }
1017 cc[1] = lohi;
1018 }
1019 }
1020
1021 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1022 for (j = 2 * 2 - 1; j >= 0; j--) {
1023 for (i = 0; i < n_comp; i++) {
1024 /* add in colors */
1025 FX64_SHL(hi, 5);
1026 FX64_OR32(hi, vec[j][i] >> 3);
1027 }
1028 }
1029 ((Fx64 *)cc)[1] = hi;
1030 }
1031
1032
1033 static void
1034 fxt1_quantize_MIXED0 (GLuint *cc,
1035 GLubyte input[N_TEXELS][MAX_COMP])
1036 {
1037 const GLint n_vect = 3; /* highest vector number in each microtile */
1038 const GLint n_comp = 3; /* 3 components: R, G, B */
1039 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1040 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1041 GLint i, j, k;
1042 Fx64 hi; /* high quadword */
1043 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1044
1045 GLint minColL = 0, maxColL = 0;
1046 GLint minColR = 0, maxColR = 0;
1047 #if 0
1048 GLint minSum;
1049 GLint maxSum;
1050
1051 /* Our solution here is to find the darkest and brightest colors in
1052 * the 4x4 tile and use those as the two representative colors.
1053 * There are probably better algorithms to use (histogram-based).
1054 */
1055 minSum = 2000; /* big enough */
1056 maxSum = -1; /* small enough */
1057 for (k = 0; k < N_TEXELS / 2; k++) {
1058 GLint sum = 0;
1059 for (i = 0; i < n_comp; i++) {
1060 sum += input[k][i];
1061 }
1062 if (minSum > sum) {
1063 minSum = sum;
1064 minColL = k;
1065 }
1066 if (maxSum < sum) {
1067 maxSum = sum;
1068 maxColL = k;
1069 }
1070 }
1071 minSum = 2000; /* big enough */
1072 maxSum = -1; /* small enough */
1073 for (; k < N_TEXELS; k++) {
1074 GLint sum = 0;
1075 for (i = 0; i < n_comp; i++) {
1076 sum += input[k][i];
1077 }
1078 if (minSum > sum) {
1079 minSum = sum;
1080 minColR = k;
1081 }
1082 if (maxSum < sum) {
1083 maxSum = sum;
1084 maxColR = k;
1085 }
1086 }
1087 #else
1088 GLint minVal;
1089 GLint maxVal;
1090 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1091 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1092
1093 /* Scan the channel with max variance for lo & hi
1094 * and use those as the two representative colors.
1095 */
1096 minVal = 2000; /* big enough */
1097 maxVal = -1; /* small enough */
1098 for (k = 0; k < N_TEXELS / 2; k++) {
1099 GLint t = input[k][maxVarL];
1100 if (minVal > t) {
1101 minVal = t;
1102 minColL = k;
1103 }
1104 if (maxVal < t) {
1105 maxVal = t;
1106 maxColL = k;
1107 }
1108 }
1109 minVal = 2000; /* big enough */
1110 maxVal = -1; /* small enough */
1111 for (; k < N_TEXELS; k++) {
1112 GLint t = input[k][maxVarR];
1113 if (minVal > t) {
1114 minVal = t;
1115 minColR = k;
1116 }
1117 if (maxVal < t) {
1118 maxVal = t;
1119 maxColR = k;
1120 }
1121 }
1122 #endif
1123
1124 /* left microtile */
1125 cc[0] = 0;
1126 for (i = 0; i < n_comp; i++) {
1127 vec[0][i] = input[minColL][i];
1128 vec[1][i] = input[maxColL][i];
1129 }
1130 if (minColL != maxColL) {
1131 /* compute interpolation vector */
1132 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1133
1134 /* add in texels */
1135 lolo = 0;
1136 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1137 GLint texel;
1138 /* interpolate color */
1139 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1140 /* add in texel */
1141 lolo <<= 2;
1142 lolo |= texel;
1143 }
1144
1145 /* funky encoding for LSB of green */
1146 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1147 for (i = 0; i < n_comp; i++) {
1148 vec[1][i] = input[minColL][i];
1149 vec[0][i] = input[maxColL][i];
1150 }
1151 lolo = ~lolo;
1152 }
1153
1154 cc[0] = lolo;
1155 }
1156
1157 /* right microtile */
1158 cc[1] = 0;
1159 for (i = 0; i < n_comp; i++) {
1160 vec[2][i] = input[minColR][i];
1161 vec[3][i] = input[maxColR][i];
1162 }
1163 if (minColR != maxColR) {
1164 /* compute interpolation vector */
1165 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1166
1167 /* add in texels */
1168 lohi = 0;
1169 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1170 GLint texel;
1171 /* interpolate color */
1172 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1173 /* add in texel */
1174 lohi <<= 2;
1175 lohi |= texel;
1176 }
1177
1178 /* funky encoding for LSB of green */
1179 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1180 for (i = 0; i < n_comp; i++) {
1181 vec[3][i] = input[minColR][i];
1182 vec[2][i] = input[maxColR][i];
1183 }
1184 lohi = ~lohi;
1185 }
1186
1187 cc[1] = lohi;
1188 }
1189
1190 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1191 for (j = 2 * 2 - 1; j >= 0; j--) {
1192 for (i = 0; i < n_comp; i++) {
1193 /* add in colors */
1194 FX64_SHL(hi, 5);
1195 FX64_OR32(hi, vec[j][i] >> 3);
1196 }
1197 }
1198 ((Fx64 *)cc)[1] = hi;
1199 }
1200
1201
1202 static void
1203 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1204 {
1205 GLint trualpha;
1206 GLubyte reord[N_TEXELS][MAX_COMP];
1207
1208 GLubyte input[N_TEXELS][MAX_COMP];
1209 GLint i, k, l;
1210
1211 if (comps == 3) {
1212 /* make the whole block opaque */
1213 memset(input, -1, sizeof(input));
1214 }
1215
1216 /* 8 texels each line */
1217 for (l = 0; l < 4; l++) {
1218 for (k = 0; k < 4; k++) {
1219 for (i = 0; i < comps; i++) {
1220 input[k + l * 4][i] = *lines[l]++;
1221 }
1222 }
1223 for (; k < 8; k++) {
1224 for (i = 0; i < comps; i++) {
1225 input[k + l * 4 + 12][i] = *lines[l]++;
1226 }
1227 }
1228 }
1229
1230 /* block layout:
1231 * 00, 01, 02, 03, 08, 09, 0a, 0b
1232 * 10, 11, 12, 13, 18, 19, 1a, 1b
1233 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1234 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1235 */
1236
1237 /* [dBorca]
1238 * stupidity flows forth from this
1239 */
1240 l = N_TEXELS;
1241 trualpha = 0;
1242 if (comps == 4) {
1243 /* skip all transparent black texels */
1244 l = 0;
1245 for (k = 0; k < N_TEXELS; k++) {
1246 /* test all components against 0 */
1247 if (!ISTBLACK(input[k])) {
1248 /* texel is not transparent black */
1249 COPY_4UBV(reord[l], input[k]);
1250 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1251 /* non-opaque texel */
1252 trualpha = !0;
1253 }
1254 l++;
1255 }
1256 }
1257 }
1258
1259 #if 0
1260 if (trualpha) {
1261 fxt1_quantize_ALPHA0(cc, input, reord, l);
1262 } else if (l == 0) {
1263 cc[0] = cc[1] = cc[2] = -1;
1264 cc[3] = 0;
1265 } else if (l < N_TEXELS) {
1266 fxt1_quantize_HI(cc, input, reord, l);
1267 } else {
1268 fxt1_quantize_CHROMA(cc, input);
1269 }
1270 (void)fxt1_quantize_ALPHA1;
1271 (void)fxt1_quantize_MIXED1;
1272 (void)fxt1_quantize_MIXED0;
1273 #else
1274 if (trualpha) {
1275 fxt1_quantize_ALPHA1(cc, input);
1276 } else if (l == 0) {
1277 cc[0] = cc[1] = cc[2] = ~0u;
1278 cc[3] = 0;
1279 } else if (l < N_TEXELS) {
1280 fxt1_quantize_MIXED1(cc, input);
1281 } else {
1282 fxt1_quantize_MIXED0(cc, input);
1283 }
1284 (void)fxt1_quantize_ALPHA0;
1285 (void)fxt1_quantize_HI;
1286 (void)fxt1_quantize_CHROMA;
1287 #endif
1288 }
1289
1290
1291 static void
1292 fxt1_encode (GLuint width, GLuint height, GLint comps,
1293 const void *source, GLint srcRowStride,
1294 void *dest, GLint destRowStride)
1295 {
1296 GLuint x, y;
1297 const GLubyte *data;
1298 GLuint *encoded = (GLuint *)dest;
1299 void *newSource = NULL;
1300
1301 assert(comps == 3 || comps == 4);
1302
1303 /* Replicate image if width is not M8 or height is not M4 */
1304 if ((width & 7) | (height & 3)) {
1305 GLint newWidth = (width + 7) & ~7;
1306 GLint newHeight = (height + 3) & ~3;
1307 newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1308 if (!newSource) {
1309 GET_CURRENT_CONTEXT(ctx);
1310 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1311 goto cleanUp;
1312 }
1313 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1314 comps, (const GLchan *) source,
1315 srcRowStride, (GLchan *) newSource);
1316 source = newSource;
1317 width = newWidth;
1318 height = newHeight;
1319 srcRowStride = comps * newWidth;
1320 }
1321
1322 /* convert from 16/32-bit channels to GLubyte if needed */
1323 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1324 const GLuint n = width * height * comps;
1325 const GLchan *src = (const GLchan *) source;
1326 GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1327 GLuint i;
1328 if (!dest) {
1329 GET_CURRENT_CONTEXT(ctx);
1330 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1331 goto cleanUp;
1332 }
1333 for (i = 0; i < n; i++) {
1334 dest[i] = CHAN_TO_UBYTE(src[i]);
1335 }
1336 if (newSource != NULL) {
1337 free(newSource);
1338 }
1339 newSource = dest; /* we'll free this buffer before returning */
1340 source = dest; /* the new, GLubyte incoming image */
1341 }
1342
1343 data = (const GLubyte *) source;
1344 destRowStride = (destRowStride - width * 2) / 4;
1345 for (y = 0; y < height; y += 4) {
1346 GLuint offs = 0 + (y + 0) * srcRowStride;
1347 for (x = 0; x < width; x += 8) {
1348 const GLubyte *lines[4];
1349 lines[0] = &data[offs];
1350 lines[1] = lines[0] + srcRowStride;
1351 lines[2] = lines[1] + srcRowStride;
1352 lines[3] = lines[2] + srcRowStride;
1353 offs += 8 * comps;
1354 fxt1_quantize(encoded, lines, comps);
1355 /* 128 bits per 8x4 block */
1356 encoded += 4;
1357 }
1358 encoded += destRowStride;
1359 }
1360
1361 cleanUp:
1362 if (newSource != NULL) {
1363 free(newSource);
1364 }
1365 }
1366
1367
1368 /***************************************************************************\
1369 * FXT1 decoder
1370 *
1371 * The decoder is based on GL_3DFX_texture_compression_FXT1
1372 * specification and serves as a concept for the encoder.
1373 \***************************************************************************/
1374
1375
1376 /* lookup table for scaling 5 bit colors up to 8 bits */
1377 static const GLubyte _rgb_scale_5[] = {
1378 0, 8, 16, 25, 33, 41, 49, 58,
1379 66, 74, 82, 90, 99, 107, 115, 123,
1380 132, 140, 148, 156, 165, 173, 181, 189,
1381 197, 206, 214, 222, 230, 239, 247, 255
1382 };
1383
1384 /* lookup table for scaling 6 bit colors up to 8 bits */
1385 static const GLubyte _rgb_scale_6[] = {
1386 0, 4, 8, 12, 16, 20, 24, 28,
1387 32, 36, 40, 45, 49, 53, 57, 61,
1388 65, 69, 73, 77, 81, 85, 89, 93,
1389 97, 101, 105, 109, 113, 117, 121, 125,
1390 130, 134, 138, 142, 146, 150, 154, 158,
1391 162, 166, 170, 174, 178, 182, 186, 190,
1392 194, 198, 202, 206, 210, 215, 219, 223,
1393 227, 231, 235, 239, 243, 247, 251, 255
1394 };
1395
1396
1397 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1398 #define UP5(c) _rgb_scale_5[(c) & 31]
1399 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1400 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1401
1402
1403 static void
1404 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1405 {
1406 const GLuint *cc;
1407
1408 t *= 3;
1409 cc = (const GLuint *)(code + t / 8);
1410 t = (cc[0] >> (t & 7)) & 7;
1411
1412 if (t == 7) {
1413 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1414 } else {
1415 GLubyte r, g, b;
1416 cc = (const GLuint *)(code + 12);
1417 if (t == 0) {
1418 b = UP5(CC_SEL(cc, 0));
1419 g = UP5(CC_SEL(cc, 5));
1420 r = UP5(CC_SEL(cc, 10));
1421 } else if (t == 6) {
1422 b = UP5(CC_SEL(cc, 15));
1423 g = UP5(CC_SEL(cc, 20));
1424 r = UP5(CC_SEL(cc, 25));
1425 } else {
1426 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1427 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1428 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1429 }
1430 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1431 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1432 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1433 rgba[ACOMP] = CHAN_MAX;
1434 }
1435 }
1436
1437
1438 static void
1439 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1440 {
1441 const GLuint *cc;
1442 GLuint kk;
1443
1444 cc = (const GLuint *)code;
1445 if (t & 16) {
1446 cc++;
1447 t &= 15;
1448 }
1449 t = (cc[0] >> (t * 2)) & 3;
1450
1451 t *= 15;
1452 cc = (const GLuint *)(code + 8 + t / 8);
1453 kk = cc[0] >> (t & 7);
1454 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1455 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1456 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1457 rgba[ACOMP] = CHAN_MAX;
1458 }
1459
1460
1461 static void
1462 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1463 {
1464 const GLuint *cc;
1465 GLuint col[2][3];
1466 GLint glsb, selb;
1467
1468 cc = (const GLuint *)code;
1469 if (t & 16) {
1470 t &= 15;
1471 t = (cc[1] >> (t * 2)) & 3;
1472 /* col 2 */
1473 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1474 col[0][GCOMP] = CC_SEL(cc, 99);
1475 col[0][RCOMP] = CC_SEL(cc, 104);
1476 /* col 3 */
1477 col[1][BCOMP] = CC_SEL(cc, 109);
1478 col[1][GCOMP] = CC_SEL(cc, 114);
1479 col[1][RCOMP] = CC_SEL(cc, 119);
1480 glsb = CC_SEL(cc, 126);
1481 selb = CC_SEL(cc, 33);
1482 } else {
1483 t = (cc[0] >> (t * 2)) & 3;
1484 /* col 0 */
1485 col[0][BCOMP] = CC_SEL(cc, 64);
1486 col[0][GCOMP] = CC_SEL(cc, 69);
1487 col[0][RCOMP] = CC_SEL(cc, 74);
1488 /* col 1 */
1489 col[1][BCOMP] = CC_SEL(cc, 79);
1490 col[1][GCOMP] = CC_SEL(cc, 84);
1491 col[1][RCOMP] = CC_SEL(cc, 89);
1492 glsb = CC_SEL(cc, 125);
1493 selb = CC_SEL(cc, 1);
1494 }
1495
1496 if (CC_SEL(cc, 124) & 1) {
1497 /* alpha[0] == 1 */
1498
1499 if (t == 3) {
1500 /* zero */
1501 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1502 } else {
1503 GLubyte r, g, b;
1504 if (t == 0) {
1505 b = UP5(col[0][BCOMP]);
1506 g = UP5(col[0][GCOMP]);
1507 r = UP5(col[0][RCOMP]);
1508 } else if (t == 2) {
1509 b = UP5(col[1][BCOMP]);
1510 g = UP6(col[1][GCOMP], glsb);
1511 r = UP5(col[1][RCOMP]);
1512 } else {
1513 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1514 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1515 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1516 }
1517 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1518 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1519 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1520 rgba[ACOMP] = CHAN_MAX;
1521 }
1522 } else {
1523 /* alpha[0] == 0 */
1524 GLubyte r, g, b;
1525 if (t == 0) {
1526 b = UP5(col[0][BCOMP]);
1527 g = UP6(col[0][GCOMP], glsb ^ selb);
1528 r = UP5(col[0][RCOMP]);
1529 } else if (t == 3) {
1530 b = UP5(col[1][BCOMP]);
1531 g = UP6(col[1][GCOMP], glsb);
1532 r = UP5(col[1][RCOMP]);
1533 } else {
1534 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1535 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1536 UP6(col[1][GCOMP], glsb));
1537 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1538 }
1539 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1540 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1541 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1542 rgba[ACOMP] = CHAN_MAX;
1543 }
1544 }
1545
1546
1547 static void
1548 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1549 {
1550 const GLuint *cc;
1551 GLubyte r, g, b, a;
1552
1553 cc = (const GLuint *)code;
1554 if (CC_SEL(cc, 124) & 1) {
1555 /* lerp == 1 */
1556 GLuint col0[4];
1557
1558 if (t & 16) {
1559 t &= 15;
1560 t = (cc[1] >> (t * 2)) & 3;
1561 /* col 2 */
1562 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1563 col0[GCOMP] = CC_SEL(cc, 99);
1564 col0[RCOMP] = CC_SEL(cc, 104);
1565 col0[ACOMP] = CC_SEL(cc, 119);
1566 } else {
1567 t = (cc[0] >> (t * 2)) & 3;
1568 /* col 0 */
1569 col0[BCOMP] = CC_SEL(cc, 64);
1570 col0[GCOMP] = CC_SEL(cc, 69);
1571 col0[RCOMP] = CC_SEL(cc, 74);
1572 col0[ACOMP] = CC_SEL(cc, 109);
1573 }
1574
1575 if (t == 0) {
1576 b = UP5(col0[BCOMP]);
1577 g = UP5(col0[GCOMP]);
1578 r = UP5(col0[RCOMP]);
1579 a = UP5(col0[ACOMP]);
1580 } else if (t == 3) {
1581 b = UP5(CC_SEL(cc, 79));
1582 g = UP5(CC_SEL(cc, 84));
1583 r = UP5(CC_SEL(cc, 89));
1584 a = UP5(CC_SEL(cc, 114));
1585 } else {
1586 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1587 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1588 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1589 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1590 }
1591 } else {
1592 /* lerp == 0 */
1593
1594 if (t & 16) {
1595 cc++;
1596 t &= 15;
1597 }
1598 t = (cc[0] >> (t * 2)) & 3;
1599
1600 if (t == 3) {
1601 /* zero */
1602 r = g = b = a = 0;
1603 } else {
1604 GLuint kk;
1605 cc = (const GLuint *)code;
1606 a = UP5(cc[3] >> (t * 5 + 13));
1607 t *= 15;
1608 cc = (const GLuint *)(code + 8 + t / 8);
1609 kk = cc[0] >> (t & 7);
1610 b = UP5(kk);
1611 g = UP5(kk >> 5);
1612 r = UP5(kk >> 10);
1613 }
1614 }
1615 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1616 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1617 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1618 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1619 }
1620
1621
1622 void
1623 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1624 GLint i, GLint j, GLchan *rgba)
1625 {
1626 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1627 fxt1_decode_1HI, /* cc-high = "00?" */
1628 fxt1_decode_1HI, /* cc-high = "00?" */
1629 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1630 fxt1_decode_1ALPHA, /* alpha = "011" */
1631 fxt1_decode_1MIXED, /* mixed = "1??" */
1632 fxt1_decode_1MIXED, /* mixed = "1??" */
1633 fxt1_decode_1MIXED, /* mixed = "1??" */
1634 fxt1_decode_1MIXED /* mixed = "1??" */
1635 };
1636
1637 const GLubyte *code = (const GLubyte *)texture +
1638 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1639 GLint mode = CC_SEL(code, 125);
1640 GLint t = i & 7;
1641
1642 if (t & 4) {
1643 t += 12;
1644 }
1645 t += (j & 3) * 4;
1646
1647 decode_1[mode](code, t, rgba);
1648 }
1649
1650
1651 #endif /* FEATURE_texture_fxt1 */