mesa: Remove EXT_convolution.
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mipmap.h"
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
40 #include "texstore.h"
41
42
43 #if FEATURE_texture_fxt1
44
45
46 static void
47 fxt1_encode (GLuint width, GLuint height, GLint comps,
48 const void *source, GLint srcRowStride,
49 void *dest, GLint destRowStride);
50
51 void
52 fxt1_decode_1 (const void *texture, GLint stride,
53 GLint i, GLint j, GLchan *rgba);
54
55
56 /**
57 * Store user's image in rgb_fxt1 format.
58 */
59 GLboolean
60 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
61 {
62 const GLchan *pixels;
63 GLint srcRowStride;
64 GLubyte *dst;
65 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
66 const GLchan *tempImage = NULL;
67
68 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
69 ASSERT(dstXoffset % 8 == 0);
70 ASSERT(dstYoffset % 4 == 0);
71 ASSERT(dstZoffset == 0);
72 (void) dstZoffset;
73 (void) dstImageOffsets;
74
75 if (srcFormat != GL_RGB ||
76 srcType != CHAN_TYPE ||
77 ctx->_ImageTransferState ||
78 srcPacking->SwapBytes) {
79 /* convert image to RGB/GLchan */
80 tempImage = _mesa_make_temp_chan_image(ctx, dims,
81 baseInternalFormat,
82 _mesa_get_format_base_format(dstFormat),
83 srcWidth, srcHeight, srcDepth,
84 srcFormat, srcType, srcAddr,
85 srcPacking);
86 if (!tempImage)
87 return GL_FALSE; /* out of memory */
88 pixels = tempImage;
89 srcRowStride = 3 * srcWidth;
90 srcFormat = GL_RGB;
91 }
92 else {
93 pixels = (const GLchan *) srcAddr;
94 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
95 srcType) / sizeof(GLchan);
96 }
97
98 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
99 dstFormat,
100 texWidth, (GLubyte *) dstAddr);
101
102 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
103 dst, dstRowStride);
104
105 if (tempImage)
106 free((void*) tempImage);
107
108 return GL_TRUE;
109 }
110
111
112 /**
113 * Store user's image in rgba_fxt1 format.
114 */
115 GLboolean
116 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
117 {
118 const GLchan *pixels;
119 GLint srcRowStride;
120 GLubyte *dst;
121 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
122 const GLchan *tempImage = NULL;
123
124 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
125 ASSERT(dstXoffset % 8 == 0);
126 ASSERT(dstYoffset % 4 == 0);
127 ASSERT(dstZoffset == 0);
128 (void) dstZoffset;
129 (void) dstImageOffsets;
130
131 if (srcFormat != GL_RGBA ||
132 srcType != CHAN_TYPE ||
133 ctx->_ImageTransferState ||
134 srcPacking->SwapBytes) {
135 /* convert image to RGBA/GLchan */
136 tempImage = _mesa_make_temp_chan_image(ctx, dims,
137 baseInternalFormat,
138 _mesa_get_format_base_format(dstFormat),
139 srcWidth, srcHeight, srcDepth,
140 srcFormat, srcType, srcAddr,
141 srcPacking);
142 if (!tempImage)
143 return GL_FALSE; /* out of memory */
144 pixels = tempImage;
145 srcRowStride = 4 * srcWidth;
146 srcFormat = GL_RGBA;
147 }
148 else {
149 pixels = (const GLchan *) srcAddr;
150 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
151 srcType) / sizeof(GLchan);
152 }
153
154 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
155 dstFormat,
156 texWidth, (GLubyte *) dstAddr);
157
158 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
159 dst, dstRowStride);
160
161 if (tempImage)
162 free((void*) tempImage);
163
164 return GL_TRUE;
165 }
166
167
168 void
169 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
170 GLint i, GLint j, GLint k, GLfloat *texel )
171 {
172 /* just sample as GLchan and convert to float here */
173 GLchan rgba[4];
174 (void) k;
175 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
176 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
177 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
178 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
179 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
180 }
181
182
183 void
184 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
185 GLint i, GLint j, GLint k, GLfloat *texel )
186 {
187 /* just sample as GLchan and convert to float here */
188 GLchan rgba[4];
189 (void) k;
190 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
191 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
192 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
193 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
194 texel[ACOMP] = 1.0F;
195 }
196
197
198
199 /***************************************************************************\
200 * FXT1 encoder
201 *
202 * The encoder was built by reversing the decoder,
203 * and is vaguely based on Texus2 by 3dfx. Note that this code
204 * is merely a proof of concept, since it is highly UNoptimized;
205 * moreover, it is sub-optimal due to initial conditions passed
206 * to Lloyd's algorithm (the interpolation modes are even worse).
207 \***************************************************************************/
208
209
210 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
211 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
212 #define N_TEXELS 32 /* number of texels in a block (always 32) */
213 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
214 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
215 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
216 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
217 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
218
219
220 /*
221 * Define a 64-bit unsigned integer type and macros
222 */
223 #if 1
224
225 #define FX64_NATIVE 1
226
227 typedef uint64_t Fx64;
228
229 #define FX64_MOV32(a, b) a = b
230 #define FX64_OR32(a, b) a |= b
231 #define FX64_SHL(a, c) a <<= c
232
233 #else
234
235 #define FX64_NATIVE 0
236
237 typedef struct {
238 GLuint lo, hi;
239 } Fx64;
240
241 #define FX64_MOV32(a, b) a.lo = b
242 #define FX64_OR32(a, b) a.lo |= b
243
244 #define FX64_SHL(a, c) \
245 do { \
246 if ((c) >= 32) { \
247 a.hi = a.lo << ((c) - 32); \
248 a.lo = 0; \
249 } else { \
250 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
251 a.lo <<= (c); \
252 } \
253 } while (0)
254
255 #endif
256
257
258 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
259 #define SAFECDOT 1 /* for paranoids */
260
261 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
262 do { \
263 /* compute interpolation vector */ \
264 GLfloat d2 = 0.0F; \
265 GLfloat rd2; \
266 \
267 for (i = 0; i < NC; i++) { \
268 IV[i] = (V1[i] - V0[i]) * F(i); \
269 d2 += IV[i] * IV[i]; \
270 } \
271 rd2 = (GLfloat)NV / d2; \
272 B = 0; \
273 for (i = 0; i < NC; i++) { \
274 IV[i] *= F(i); \
275 B -= IV[i] * V0[i]; \
276 IV[i] *= rd2; \
277 } \
278 B = B * rd2 + 0.5f; \
279 } while (0)
280
281 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
282 do { \
283 GLfloat dot = 0.0F; \
284 for (i = 0; i < NC; i++) { \
285 dot += V[i] * IV[i]; \
286 } \
287 TEXEL = (GLint)(dot + B); \
288 if (SAFECDOT) { \
289 if (TEXEL < 0) { \
290 TEXEL = 0; \
291 } else if (TEXEL > NV) { \
292 TEXEL = NV; \
293 } \
294 } \
295 } while (0)
296
297
298 static GLint
299 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
300 GLubyte input[MAX_COMP], GLint nc)
301 {
302 GLint i, j, best = -1;
303 GLfloat err = 1e9; /* big enough */
304
305 for (j = 0; j < nv; j++) {
306 GLfloat e = 0.0F;
307 for (i = 0; i < nc; i++) {
308 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
309 }
310 if (e < err) {
311 err = e;
312 best = j;
313 }
314 }
315
316 return best;
317 }
318
319
320 static GLint
321 fxt1_worst (GLfloat vec[MAX_COMP],
322 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
323 {
324 GLint i, k, worst = -1;
325 GLfloat err = -1.0F; /* small enough */
326
327 for (k = 0; k < n; k++) {
328 GLfloat e = 0.0F;
329 for (i = 0; i < nc; i++) {
330 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
331 }
332 if (e > err) {
333 err = e;
334 worst = k;
335 }
336 }
337
338 return worst;
339 }
340
341
342 static GLint
343 fxt1_variance (GLdouble variance[MAX_COMP],
344 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
345 {
346 GLint i, k, best = 0;
347 GLint sx, sx2;
348 GLdouble var, maxvar = -1; /* small enough */
349 GLdouble teenth = 1.0 / n;
350
351 for (i = 0; i < nc; i++) {
352 sx = sx2 = 0;
353 for (k = 0; k < n; k++) {
354 GLint t = input[k][i];
355 sx += t;
356 sx2 += t * t;
357 }
358 var = sx2 * teenth - sx * sx * teenth * teenth;
359 if (maxvar < var) {
360 maxvar = var;
361 best = i;
362 }
363 if (variance) {
364 variance[i] = var;
365 }
366 }
367
368 return best;
369 }
370
371
372 static GLint
373 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
374 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
375 {
376 #if 0
377 /* Choose colors from a grid.
378 */
379 GLint i, j;
380
381 for (j = 0; j < nv; j++) {
382 GLint m = j * (n - 1) / (nv - 1);
383 for (i = 0; i < nc; i++) {
384 vec[j][i] = input[m][i];
385 }
386 }
387 #else
388 /* Our solution here is to find the darkest and brightest colors in
389 * the 8x4 tile and use those as the two representative colors.
390 * There are probably better algorithms to use (histogram-based).
391 */
392 GLint i, j, k;
393 GLint minSum = 2000; /* big enough */
394 GLint maxSum = -1; /* small enough */
395 GLint minCol = 0; /* phoudoin: silent compiler! */
396 GLint maxCol = 0; /* phoudoin: silent compiler! */
397
398 struct {
399 GLint flag;
400 GLint key;
401 GLint freq;
402 GLint idx;
403 } hist[N_TEXELS];
404 GLint lenh = 0;
405
406 memset(hist, 0, sizeof(hist));
407
408 for (k = 0; k < n; k++) {
409 GLint l;
410 GLint key = 0;
411 GLint sum = 0;
412 for (i = 0; i < nc; i++) {
413 key <<= 8;
414 key |= input[k][i];
415 sum += input[k][i];
416 }
417 for (l = 0; l < n; l++) {
418 if (!hist[l].flag) {
419 /* alloc new slot */
420 hist[l].flag = !0;
421 hist[l].key = key;
422 hist[l].freq = 1;
423 hist[l].idx = k;
424 lenh = l + 1;
425 break;
426 } else if (hist[l].key == key) {
427 hist[l].freq++;
428 break;
429 }
430 }
431 if (minSum > sum) {
432 minSum = sum;
433 minCol = k;
434 }
435 if (maxSum < sum) {
436 maxSum = sum;
437 maxCol = k;
438 }
439 }
440
441 if (lenh <= nv) {
442 for (j = 0; j < lenh; j++) {
443 for (i = 0; i < nc; i++) {
444 vec[j][i] = (GLfloat)input[hist[j].idx][i];
445 }
446 }
447 for (; j < nv; j++) {
448 for (i = 0; i < nc; i++) {
449 vec[j][i] = vec[0][i];
450 }
451 }
452 return 0;
453 }
454
455 for (j = 0; j < nv; j++) {
456 for (i = 0; i < nc; i++) {
457 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
458 }
459 }
460 #endif
461
462 return !0;
463 }
464
465
466 static GLint
467 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
468 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
469 {
470 /* Use the generalized lloyd's algorithm for VQ:
471 * find 4 color vectors.
472 *
473 * for each sample color
474 * sort to nearest vector.
475 *
476 * replace each vector with the centroid of its matching colors.
477 *
478 * repeat until RMS doesn't improve.
479 *
480 * if a color vector has no samples, or becomes the same as another
481 * vector, replace it with the color which is farthest from a sample.
482 *
483 * vec[][MAX_COMP] initial vectors and resulting colors
484 * nv number of resulting colors required
485 * input[N_TEXELS][MAX_COMP] input texels
486 * nc number of components in input / vec
487 * n number of input samples
488 */
489
490 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
491 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
492 GLfloat error, lasterror = 1e9;
493
494 GLint i, j, k, rep;
495
496 /* the quantizer */
497 for (rep = 0; rep < LL_N_REP; rep++) {
498 /* reset sums & counters */
499 for (j = 0; j < nv; j++) {
500 for (i = 0; i < nc; i++) {
501 sum[j][i] = 0;
502 }
503 cnt[j] = 0;
504 }
505 error = 0;
506
507 /* scan whole block */
508 for (k = 0; k < n; k++) {
509 #if 1
510 GLint best = -1;
511 GLfloat err = 1e9; /* big enough */
512 /* determine best vector */
513 for (j = 0; j < nv; j++) {
514 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
515 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
516 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
517 if (nc == 4) {
518 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
519 }
520 if (e < err) {
521 err = e;
522 best = j;
523 }
524 }
525 #else
526 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
527 #endif
528 assert(best >= 0);
529 /* add in closest color */
530 for (i = 0; i < nc; i++) {
531 sum[best][i] += input[k][i];
532 }
533 /* mark this vector as used */
534 cnt[best]++;
535 /* accumulate error */
536 error += err;
537 }
538
539 /* check RMS */
540 if ((error < LL_RMS_E) ||
541 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
542 return !0; /* good match */
543 }
544 lasterror = error;
545
546 /* move each vector to the barycenter of its closest colors */
547 for (j = 0; j < nv; j++) {
548 if (cnt[j]) {
549 GLfloat div = 1.0F / cnt[j];
550 for (i = 0; i < nc; i++) {
551 vec[j][i] = div * sum[j][i];
552 }
553 } else {
554 /* this vec has no samples or is identical with a previous vec */
555 GLint worst = fxt1_worst(vec[j], input, nc, n);
556 for (i = 0; i < nc; i++) {
557 vec[j][i] = input[worst][i];
558 }
559 }
560 }
561 }
562
563 return 0; /* could not converge fast enough */
564 }
565
566
567 static void
568 fxt1_quantize_CHROMA (GLuint *cc,
569 GLubyte input[N_TEXELS][MAX_COMP])
570 {
571 const GLint n_vect = 4; /* 4 base vectors to find */
572 const GLint n_comp = 3; /* 3 components: R, G, B */
573 GLfloat vec[MAX_VECT][MAX_COMP];
574 GLint i, j, k;
575 Fx64 hi; /* high quadword */
576 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
577
578 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
579 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
580 }
581
582 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
583 for (j = n_vect - 1; j >= 0; j--) {
584 for (i = 0; i < n_comp; i++) {
585 /* add in colors */
586 FX64_SHL(hi, 5);
587 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
588 }
589 }
590 ((Fx64 *)cc)[1] = hi;
591
592 lohi = lolo = 0;
593 /* right microtile */
594 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
595 lohi <<= 2;
596 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
597 }
598 /* left microtile */
599 for (; k >= 0; k--) {
600 lolo <<= 2;
601 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
602 }
603 cc[1] = lohi;
604 cc[0] = lolo;
605 }
606
607
608 static void
609 fxt1_quantize_ALPHA0 (GLuint *cc,
610 GLubyte input[N_TEXELS][MAX_COMP],
611 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
612 {
613 const GLint n_vect = 3; /* 3 base vectors to find */
614 const GLint n_comp = 4; /* 4 components: R, G, B, A */
615 GLfloat vec[MAX_VECT][MAX_COMP];
616 GLint i, j, k;
617 Fx64 hi; /* high quadword */
618 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
619
620 /* the last vector indicates zero */
621 for (i = 0; i < n_comp; i++) {
622 vec[n_vect][i] = 0;
623 }
624
625 /* the first n texels in reord are guaranteed to be non-zero */
626 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
627 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
628 }
629
630 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
631 for (j = n_vect - 1; j >= 0; j--) {
632 /* add in alphas */
633 FX64_SHL(hi, 5);
634 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
635 }
636 for (j = n_vect - 1; j >= 0; j--) {
637 for (i = 0; i < n_comp - 1; i++) {
638 /* add in colors */
639 FX64_SHL(hi, 5);
640 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
641 }
642 }
643 ((Fx64 *)cc)[1] = hi;
644
645 lohi = lolo = 0;
646 /* right microtile */
647 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
648 lohi <<= 2;
649 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
650 }
651 /* left microtile */
652 for (; k >= 0; k--) {
653 lolo <<= 2;
654 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
655 }
656 cc[1] = lohi;
657 cc[0] = lolo;
658 }
659
660
661 static void
662 fxt1_quantize_ALPHA1 (GLuint *cc,
663 GLubyte input[N_TEXELS][MAX_COMP])
664 {
665 const GLint n_vect = 3; /* highest vector number in each microtile */
666 const GLint n_comp = 4; /* 4 components: R, G, B, A */
667 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
668 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
669 GLint i, j, k;
670 Fx64 hi; /* high quadword */
671 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
672
673 GLint minSum;
674 GLint maxSum;
675 GLint minColL = 0, maxColL = 0;
676 GLint minColR = 0, maxColR = 0;
677 GLint sumL = 0, sumR = 0;
678 GLint nn_comp;
679 /* Our solution here is to find the darkest and brightest colors in
680 * the 4x4 tile and use those as the two representative colors.
681 * There are probably better algorithms to use (histogram-based).
682 */
683 nn_comp = n_comp;
684 while ((minColL == maxColL) && nn_comp) {
685 minSum = 2000; /* big enough */
686 maxSum = -1; /* small enough */
687 for (k = 0; k < N_TEXELS / 2; k++) {
688 GLint sum = 0;
689 for (i = 0; i < nn_comp; i++) {
690 sum += input[k][i];
691 }
692 if (minSum > sum) {
693 minSum = sum;
694 minColL = k;
695 }
696 if (maxSum < sum) {
697 maxSum = sum;
698 maxColL = k;
699 }
700 sumL += sum;
701 }
702
703 nn_comp--;
704 }
705
706 nn_comp = n_comp;
707 while ((minColR == maxColR) && nn_comp) {
708 minSum = 2000; /* big enough */
709 maxSum = -1; /* small enough */
710 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
711 GLint sum = 0;
712 for (i = 0; i < nn_comp; i++) {
713 sum += input[k][i];
714 }
715 if (minSum > sum) {
716 minSum = sum;
717 minColR = k;
718 }
719 if (maxSum < sum) {
720 maxSum = sum;
721 maxColR = k;
722 }
723 sumR += sum;
724 }
725
726 nn_comp--;
727 }
728
729 /* choose the common vector (yuck!) */
730 {
731 GLint j1, j2;
732 GLint v1 = 0, v2 = 0;
733 GLfloat err = 1e9; /* big enough */
734 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
735 for (i = 0; i < n_comp; i++) {
736 tv[0][i] = input[minColL][i];
737 tv[1][i] = input[maxColL][i];
738 tv[2][i] = input[minColR][i];
739 tv[3][i] = input[maxColR][i];
740 }
741 for (j1 = 0; j1 < 2; j1++) {
742 for (j2 = 2; j2 < 4; j2++) {
743 GLfloat e = 0.0F;
744 for (i = 0; i < n_comp; i++) {
745 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
746 }
747 if (e < err) {
748 err = e;
749 v1 = j1;
750 v2 = j2;
751 }
752 }
753 }
754 for (i = 0; i < n_comp; i++) {
755 vec[0][i] = tv[1 - v1][i];
756 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
757 vec[2][i] = tv[5 - v2][i];
758 }
759 }
760
761 /* left microtile */
762 cc[0] = 0;
763 if (minColL != maxColL) {
764 /* compute interpolation vector */
765 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
766
767 /* add in texels */
768 lolo = 0;
769 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
770 GLint texel;
771 /* interpolate color */
772 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
773 /* add in texel */
774 lolo <<= 2;
775 lolo |= texel;
776 }
777
778 cc[0] = lolo;
779 }
780
781 /* right microtile */
782 cc[1] = 0;
783 if (minColR != maxColR) {
784 /* compute interpolation vector */
785 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
786
787 /* add in texels */
788 lohi = 0;
789 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
790 GLint texel;
791 /* interpolate color */
792 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
793 /* add in texel */
794 lohi <<= 2;
795 lohi |= texel;
796 }
797
798 cc[1] = lohi;
799 }
800
801 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
802 for (j = n_vect - 1; j >= 0; j--) {
803 /* add in alphas */
804 FX64_SHL(hi, 5);
805 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
806 }
807 for (j = n_vect - 1; j >= 0; j--) {
808 for (i = 0; i < n_comp - 1; i++) {
809 /* add in colors */
810 FX64_SHL(hi, 5);
811 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
812 }
813 }
814 ((Fx64 *)cc)[1] = hi;
815 }
816
817
818 static void
819 fxt1_quantize_HI (GLuint *cc,
820 GLubyte input[N_TEXELS][MAX_COMP],
821 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
822 {
823 const GLint n_vect = 6; /* highest vector number */
824 const GLint n_comp = 3; /* 3 components: R, G, B */
825 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
826 GLfloat iv[MAX_COMP]; /* interpolation vector */
827 GLint i, k;
828 GLuint hihi; /* high quadword: hi dword */
829
830 GLint minSum = 2000; /* big enough */
831 GLint maxSum = -1; /* small enough */
832 GLint minCol = 0; /* phoudoin: silent compiler! */
833 GLint maxCol = 0; /* phoudoin: silent compiler! */
834
835 /* Our solution here is to find the darkest and brightest colors in
836 * the 8x4 tile and use those as the two representative colors.
837 * There are probably better algorithms to use (histogram-based).
838 */
839 for (k = 0; k < n; k++) {
840 GLint sum = 0;
841 for (i = 0; i < n_comp; i++) {
842 sum += reord[k][i];
843 }
844 if (minSum > sum) {
845 minSum = sum;
846 minCol = k;
847 }
848 if (maxSum < sum) {
849 maxSum = sum;
850 maxCol = k;
851 }
852 }
853
854 hihi = 0; /* cc-hi = "00" */
855 for (i = 0; i < n_comp; i++) {
856 /* add in colors */
857 hihi <<= 5;
858 hihi |= reord[maxCol][i] >> 3;
859 }
860 for (i = 0; i < n_comp; i++) {
861 /* add in colors */
862 hihi <<= 5;
863 hihi |= reord[minCol][i] >> 3;
864 }
865 cc[3] = hihi;
866 cc[0] = cc[1] = cc[2] = 0;
867
868 /* compute interpolation vector */
869 if (minCol != maxCol) {
870 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
871 }
872
873 /* add in texels */
874 for (k = N_TEXELS - 1; k >= 0; k--) {
875 GLint t = k * 3;
876 GLuint *kk = (GLuint *)((char *)cc + t / 8);
877 GLint texel = n_vect + 1; /* transparent black */
878
879 if (!ISTBLACK(input[k])) {
880 if (minCol != maxCol) {
881 /* interpolate color */
882 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
883 /* add in texel */
884 kk[0] |= texel << (t & 7);
885 }
886 } else {
887 /* add in texel */
888 kk[0] |= texel << (t & 7);
889 }
890 }
891 }
892
893
894 static void
895 fxt1_quantize_MIXED1 (GLuint *cc,
896 GLubyte input[N_TEXELS][MAX_COMP])
897 {
898 const GLint n_vect = 2; /* highest vector number in each microtile */
899 const GLint n_comp = 3; /* 3 components: R, G, B */
900 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
901 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
902 GLint i, j, k;
903 Fx64 hi; /* high quadword */
904 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
905
906 GLint minSum;
907 GLint maxSum;
908 GLint minColL = 0, maxColL = -1;
909 GLint minColR = 0, maxColR = -1;
910
911 /* Our solution here is to find the darkest and brightest colors in
912 * the 4x4 tile and use those as the two representative colors.
913 * There are probably better algorithms to use (histogram-based).
914 */
915 minSum = 2000; /* big enough */
916 maxSum = -1; /* small enough */
917 for (k = 0; k < N_TEXELS / 2; k++) {
918 if (!ISTBLACK(input[k])) {
919 GLint sum = 0;
920 for (i = 0; i < n_comp; i++) {
921 sum += input[k][i];
922 }
923 if (minSum > sum) {
924 minSum = sum;
925 minColL = k;
926 }
927 if (maxSum < sum) {
928 maxSum = sum;
929 maxColL = k;
930 }
931 }
932 }
933 minSum = 2000; /* big enough */
934 maxSum = -1; /* small enough */
935 for (; k < N_TEXELS; k++) {
936 if (!ISTBLACK(input[k])) {
937 GLint sum = 0;
938 for (i = 0; i < n_comp; i++) {
939 sum += input[k][i];
940 }
941 if (minSum > sum) {
942 minSum = sum;
943 minColR = k;
944 }
945 if (maxSum < sum) {
946 maxSum = sum;
947 maxColR = k;
948 }
949 }
950 }
951
952 /* left microtile */
953 if (maxColL == -1) {
954 /* all transparent black */
955 cc[0] = ~0u;
956 for (i = 0; i < n_comp; i++) {
957 vec[0][i] = 0;
958 vec[1][i] = 0;
959 }
960 } else {
961 cc[0] = 0;
962 for (i = 0; i < n_comp; i++) {
963 vec[0][i] = input[minColL][i];
964 vec[1][i] = input[maxColL][i];
965 }
966 if (minColL != maxColL) {
967 /* compute interpolation vector */
968 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
969
970 /* add in texels */
971 lolo = 0;
972 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
973 GLint texel = n_vect + 1; /* transparent black */
974 if (!ISTBLACK(input[k])) {
975 /* interpolate color */
976 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
977 }
978 /* add in texel */
979 lolo <<= 2;
980 lolo |= texel;
981 }
982 cc[0] = lolo;
983 }
984 }
985
986 /* right microtile */
987 if (maxColR == -1) {
988 /* all transparent black */
989 cc[1] = ~0u;
990 for (i = 0; i < n_comp; i++) {
991 vec[2][i] = 0;
992 vec[3][i] = 0;
993 }
994 } else {
995 cc[1] = 0;
996 for (i = 0; i < n_comp; i++) {
997 vec[2][i] = input[minColR][i];
998 vec[3][i] = input[maxColR][i];
999 }
1000 if (minColR != maxColR) {
1001 /* compute interpolation vector */
1002 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1003
1004 /* add in texels */
1005 lohi = 0;
1006 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1007 GLint texel = n_vect + 1; /* transparent black */
1008 if (!ISTBLACK(input[k])) {
1009 /* interpolate color */
1010 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1011 }
1012 /* add in texel */
1013 lohi <<= 2;
1014 lohi |= texel;
1015 }
1016 cc[1] = lohi;
1017 }
1018 }
1019
1020 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1021 for (j = 2 * 2 - 1; j >= 0; j--) {
1022 for (i = 0; i < n_comp; i++) {
1023 /* add in colors */
1024 FX64_SHL(hi, 5);
1025 FX64_OR32(hi, vec[j][i] >> 3);
1026 }
1027 }
1028 ((Fx64 *)cc)[1] = hi;
1029 }
1030
1031
1032 static void
1033 fxt1_quantize_MIXED0 (GLuint *cc,
1034 GLubyte input[N_TEXELS][MAX_COMP])
1035 {
1036 const GLint n_vect = 3; /* highest vector number in each microtile */
1037 const GLint n_comp = 3; /* 3 components: R, G, B */
1038 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1039 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1040 GLint i, j, k;
1041 Fx64 hi; /* high quadword */
1042 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1043
1044 GLint minColL = 0, maxColL = 0;
1045 GLint minColR = 0, maxColR = 0;
1046 #if 0
1047 GLint minSum;
1048 GLint maxSum;
1049
1050 /* Our solution here is to find the darkest and brightest colors in
1051 * the 4x4 tile and use those as the two representative colors.
1052 * There are probably better algorithms to use (histogram-based).
1053 */
1054 minSum = 2000; /* big enough */
1055 maxSum = -1; /* small enough */
1056 for (k = 0; k < N_TEXELS / 2; k++) {
1057 GLint sum = 0;
1058 for (i = 0; i < n_comp; i++) {
1059 sum += input[k][i];
1060 }
1061 if (minSum > sum) {
1062 minSum = sum;
1063 minColL = k;
1064 }
1065 if (maxSum < sum) {
1066 maxSum = sum;
1067 maxColL = k;
1068 }
1069 }
1070 minSum = 2000; /* big enough */
1071 maxSum = -1; /* small enough */
1072 for (; k < N_TEXELS; k++) {
1073 GLint sum = 0;
1074 for (i = 0; i < n_comp; i++) {
1075 sum += input[k][i];
1076 }
1077 if (minSum > sum) {
1078 minSum = sum;
1079 minColR = k;
1080 }
1081 if (maxSum < sum) {
1082 maxSum = sum;
1083 maxColR = k;
1084 }
1085 }
1086 #else
1087 GLint minVal;
1088 GLint maxVal;
1089 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1090 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1091
1092 /* Scan the channel with max variance for lo & hi
1093 * and use those as the two representative colors.
1094 */
1095 minVal = 2000; /* big enough */
1096 maxVal = -1; /* small enough */
1097 for (k = 0; k < N_TEXELS / 2; k++) {
1098 GLint t = input[k][maxVarL];
1099 if (minVal > t) {
1100 minVal = t;
1101 minColL = k;
1102 }
1103 if (maxVal < t) {
1104 maxVal = t;
1105 maxColL = k;
1106 }
1107 }
1108 minVal = 2000; /* big enough */
1109 maxVal = -1; /* small enough */
1110 for (; k < N_TEXELS; k++) {
1111 GLint t = input[k][maxVarR];
1112 if (minVal > t) {
1113 minVal = t;
1114 minColR = k;
1115 }
1116 if (maxVal < t) {
1117 maxVal = t;
1118 maxColR = k;
1119 }
1120 }
1121 #endif
1122
1123 /* left microtile */
1124 cc[0] = 0;
1125 for (i = 0; i < n_comp; i++) {
1126 vec[0][i] = input[minColL][i];
1127 vec[1][i] = input[maxColL][i];
1128 }
1129 if (minColL != maxColL) {
1130 /* compute interpolation vector */
1131 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1132
1133 /* add in texels */
1134 lolo = 0;
1135 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1136 GLint texel;
1137 /* interpolate color */
1138 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1139 /* add in texel */
1140 lolo <<= 2;
1141 lolo |= texel;
1142 }
1143
1144 /* funky encoding for LSB of green */
1145 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1146 for (i = 0; i < n_comp; i++) {
1147 vec[1][i] = input[minColL][i];
1148 vec[0][i] = input[maxColL][i];
1149 }
1150 lolo = ~lolo;
1151 }
1152
1153 cc[0] = lolo;
1154 }
1155
1156 /* right microtile */
1157 cc[1] = 0;
1158 for (i = 0; i < n_comp; i++) {
1159 vec[2][i] = input[minColR][i];
1160 vec[3][i] = input[maxColR][i];
1161 }
1162 if (minColR != maxColR) {
1163 /* compute interpolation vector */
1164 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1165
1166 /* add in texels */
1167 lohi = 0;
1168 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1169 GLint texel;
1170 /* interpolate color */
1171 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1172 /* add in texel */
1173 lohi <<= 2;
1174 lohi |= texel;
1175 }
1176
1177 /* funky encoding for LSB of green */
1178 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1179 for (i = 0; i < n_comp; i++) {
1180 vec[3][i] = input[minColR][i];
1181 vec[2][i] = input[maxColR][i];
1182 }
1183 lohi = ~lohi;
1184 }
1185
1186 cc[1] = lohi;
1187 }
1188
1189 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1190 for (j = 2 * 2 - 1; j >= 0; j--) {
1191 for (i = 0; i < n_comp; i++) {
1192 /* add in colors */
1193 FX64_SHL(hi, 5);
1194 FX64_OR32(hi, vec[j][i] >> 3);
1195 }
1196 }
1197 ((Fx64 *)cc)[1] = hi;
1198 }
1199
1200
1201 static void
1202 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1203 {
1204 GLint trualpha;
1205 GLubyte reord[N_TEXELS][MAX_COMP];
1206
1207 GLubyte input[N_TEXELS][MAX_COMP];
1208 GLint i, k, l;
1209
1210 if (comps == 3) {
1211 /* make the whole block opaque */
1212 memset(input, -1, sizeof(input));
1213 }
1214
1215 /* 8 texels each line */
1216 for (l = 0; l < 4; l++) {
1217 for (k = 0; k < 4; k++) {
1218 for (i = 0; i < comps; i++) {
1219 input[k + l * 4][i] = *lines[l]++;
1220 }
1221 }
1222 for (; k < 8; k++) {
1223 for (i = 0; i < comps; i++) {
1224 input[k + l * 4 + 12][i] = *lines[l]++;
1225 }
1226 }
1227 }
1228
1229 /* block layout:
1230 * 00, 01, 02, 03, 08, 09, 0a, 0b
1231 * 10, 11, 12, 13, 18, 19, 1a, 1b
1232 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1233 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1234 */
1235
1236 /* [dBorca]
1237 * stupidity flows forth from this
1238 */
1239 l = N_TEXELS;
1240 trualpha = 0;
1241 if (comps == 4) {
1242 /* skip all transparent black texels */
1243 l = 0;
1244 for (k = 0; k < N_TEXELS; k++) {
1245 /* test all components against 0 */
1246 if (!ISTBLACK(input[k])) {
1247 /* texel is not transparent black */
1248 COPY_4UBV(reord[l], input[k]);
1249 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1250 /* non-opaque texel */
1251 trualpha = !0;
1252 }
1253 l++;
1254 }
1255 }
1256 }
1257
1258 #if 0
1259 if (trualpha) {
1260 fxt1_quantize_ALPHA0(cc, input, reord, l);
1261 } else if (l == 0) {
1262 cc[0] = cc[1] = cc[2] = -1;
1263 cc[3] = 0;
1264 } else if (l < N_TEXELS) {
1265 fxt1_quantize_HI(cc, input, reord, l);
1266 } else {
1267 fxt1_quantize_CHROMA(cc, input);
1268 }
1269 (void)fxt1_quantize_ALPHA1;
1270 (void)fxt1_quantize_MIXED1;
1271 (void)fxt1_quantize_MIXED0;
1272 #else
1273 if (trualpha) {
1274 fxt1_quantize_ALPHA1(cc, input);
1275 } else if (l == 0) {
1276 cc[0] = cc[1] = cc[2] = ~0u;
1277 cc[3] = 0;
1278 } else if (l < N_TEXELS) {
1279 fxt1_quantize_MIXED1(cc, input);
1280 } else {
1281 fxt1_quantize_MIXED0(cc, input);
1282 }
1283 (void)fxt1_quantize_ALPHA0;
1284 (void)fxt1_quantize_HI;
1285 (void)fxt1_quantize_CHROMA;
1286 #endif
1287 }
1288
1289
1290 static void
1291 fxt1_encode (GLuint width, GLuint height, GLint comps,
1292 const void *source, GLint srcRowStride,
1293 void *dest, GLint destRowStride)
1294 {
1295 GLuint x, y;
1296 const GLubyte *data;
1297 GLuint *encoded = (GLuint *)dest;
1298 void *newSource = NULL;
1299
1300 assert(comps == 3 || comps == 4);
1301
1302 /* Replicate image if width is not M8 or height is not M4 */
1303 if ((width & 7) | (height & 3)) {
1304 GLint newWidth = (width + 7) & ~7;
1305 GLint newHeight = (height + 3) & ~3;
1306 newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1307 if (!newSource) {
1308 GET_CURRENT_CONTEXT(ctx);
1309 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1310 goto cleanUp;
1311 }
1312 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1313 comps, (const GLchan *) source,
1314 srcRowStride, (GLchan *) newSource);
1315 source = newSource;
1316 width = newWidth;
1317 height = newHeight;
1318 srcRowStride = comps * newWidth;
1319 }
1320
1321 /* convert from 16/32-bit channels to GLubyte if needed */
1322 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1323 const GLuint n = width * height * comps;
1324 const GLchan *src = (const GLchan *) source;
1325 GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1326 GLuint i;
1327 if (!dest) {
1328 GET_CURRENT_CONTEXT(ctx);
1329 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1330 goto cleanUp;
1331 }
1332 for (i = 0; i < n; i++) {
1333 dest[i] = CHAN_TO_UBYTE(src[i]);
1334 }
1335 if (newSource != NULL) {
1336 free(newSource);
1337 }
1338 newSource = dest; /* we'll free this buffer before returning */
1339 source = dest; /* the new, GLubyte incoming image */
1340 }
1341
1342 data = (const GLubyte *) source;
1343 destRowStride = (destRowStride - width * 2) / 4;
1344 for (y = 0; y < height; y += 4) {
1345 GLuint offs = 0 + (y + 0) * srcRowStride;
1346 for (x = 0; x < width; x += 8) {
1347 const GLubyte *lines[4];
1348 lines[0] = &data[offs];
1349 lines[1] = lines[0] + srcRowStride;
1350 lines[2] = lines[1] + srcRowStride;
1351 lines[3] = lines[2] + srcRowStride;
1352 offs += 8 * comps;
1353 fxt1_quantize(encoded, lines, comps);
1354 /* 128 bits per 8x4 block */
1355 encoded += 4;
1356 }
1357 encoded += destRowStride;
1358 }
1359
1360 cleanUp:
1361 if (newSource != NULL) {
1362 free(newSource);
1363 }
1364 }
1365
1366
1367 /***************************************************************************\
1368 * FXT1 decoder
1369 *
1370 * The decoder is based on GL_3DFX_texture_compression_FXT1
1371 * specification and serves as a concept for the encoder.
1372 \***************************************************************************/
1373
1374
1375 /* lookup table for scaling 5 bit colors up to 8 bits */
1376 static const GLubyte _rgb_scale_5[] = {
1377 0, 8, 16, 25, 33, 41, 49, 58,
1378 66, 74, 82, 90, 99, 107, 115, 123,
1379 132, 140, 148, 156, 165, 173, 181, 189,
1380 197, 206, 214, 222, 230, 239, 247, 255
1381 };
1382
1383 /* lookup table for scaling 6 bit colors up to 8 bits */
1384 static const GLubyte _rgb_scale_6[] = {
1385 0, 4, 8, 12, 16, 20, 24, 28,
1386 32, 36, 40, 45, 49, 53, 57, 61,
1387 65, 69, 73, 77, 81, 85, 89, 93,
1388 97, 101, 105, 109, 113, 117, 121, 125,
1389 130, 134, 138, 142, 146, 150, 154, 158,
1390 162, 166, 170, 174, 178, 182, 186, 190,
1391 194, 198, 202, 206, 210, 215, 219, 223,
1392 227, 231, 235, 239, 243, 247, 251, 255
1393 };
1394
1395
1396 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1397 #define UP5(c) _rgb_scale_5[(c) & 31]
1398 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1399 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1400
1401
1402 static void
1403 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1404 {
1405 const GLuint *cc;
1406
1407 t *= 3;
1408 cc = (const GLuint *)(code + t / 8);
1409 t = (cc[0] >> (t & 7)) & 7;
1410
1411 if (t == 7) {
1412 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1413 } else {
1414 GLubyte r, g, b;
1415 cc = (const GLuint *)(code + 12);
1416 if (t == 0) {
1417 b = UP5(CC_SEL(cc, 0));
1418 g = UP5(CC_SEL(cc, 5));
1419 r = UP5(CC_SEL(cc, 10));
1420 } else if (t == 6) {
1421 b = UP5(CC_SEL(cc, 15));
1422 g = UP5(CC_SEL(cc, 20));
1423 r = UP5(CC_SEL(cc, 25));
1424 } else {
1425 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1426 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1427 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1428 }
1429 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1430 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1431 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1432 rgba[ACOMP] = CHAN_MAX;
1433 }
1434 }
1435
1436
1437 static void
1438 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1439 {
1440 const GLuint *cc;
1441 GLuint kk;
1442
1443 cc = (const GLuint *)code;
1444 if (t & 16) {
1445 cc++;
1446 t &= 15;
1447 }
1448 t = (cc[0] >> (t * 2)) & 3;
1449
1450 t *= 15;
1451 cc = (const GLuint *)(code + 8 + t / 8);
1452 kk = cc[0] >> (t & 7);
1453 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1454 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1455 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1456 rgba[ACOMP] = CHAN_MAX;
1457 }
1458
1459
1460 static void
1461 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1462 {
1463 const GLuint *cc;
1464 GLuint col[2][3];
1465 GLint glsb, selb;
1466
1467 cc = (const GLuint *)code;
1468 if (t & 16) {
1469 t &= 15;
1470 t = (cc[1] >> (t * 2)) & 3;
1471 /* col 2 */
1472 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1473 col[0][GCOMP] = CC_SEL(cc, 99);
1474 col[0][RCOMP] = CC_SEL(cc, 104);
1475 /* col 3 */
1476 col[1][BCOMP] = CC_SEL(cc, 109);
1477 col[1][GCOMP] = CC_SEL(cc, 114);
1478 col[1][RCOMP] = CC_SEL(cc, 119);
1479 glsb = CC_SEL(cc, 126);
1480 selb = CC_SEL(cc, 33);
1481 } else {
1482 t = (cc[0] >> (t * 2)) & 3;
1483 /* col 0 */
1484 col[0][BCOMP] = CC_SEL(cc, 64);
1485 col[0][GCOMP] = CC_SEL(cc, 69);
1486 col[0][RCOMP] = CC_SEL(cc, 74);
1487 /* col 1 */
1488 col[1][BCOMP] = CC_SEL(cc, 79);
1489 col[1][GCOMP] = CC_SEL(cc, 84);
1490 col[1][RCOMP] = CC_SEL(cc, 89);
1491 glsb = CC_SEL(cc, 125);
1492 selb = CC_SEL(cc, 1);
1493 }
1494
1495 if (CC_SEL(cc, 124) & 1) {
1496 /* alpha[0] == 1 */
1497
1498 if (t == 3) {
1499 /* zero */
1500 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1501 } else {
1502 GLubyte r, g, b;
1503 if (t == 0) {
1504 b = UP5(col[0][BCOMP]);
1505 g = UP5(col[0][GCOMP]);
1506 r = UP5(col[0][RCOMP]);
1507 } else if (t == 2) {
1508 b = UP5(col[1][BCOMP]);
1509 g = UP6(col[1][GCOMP], glsb);
1510 r = UP5(col[1][RCOMP]);
1511 } else {
1512 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1513 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1514 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1515 }
1516 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1517 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1518 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1519 rgba[ACOMP] = CHAN_MAX;
1520 }
1521 } else {
1522 /* alpha[0] == 0 */
1523 GLubyte r, g, b;
1524 if (t == 0) {
1525 b = UP5(col[0][BCOMP]);
1526 g = UP6(col[0][GCOMP], glsb ^ selb);
1527 r = UP5(col[0][RCOMP]);
1528 } else if (t == 3) {
1529 b = UP5(col[1][BCOMP]);
1530 g = UP6(col[1][GCOMP], glsb);
1531 r = UP5(col[1][RCOMP]);
1532 } else {
1533 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1534 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1535 UP6(col[1][GCOMP], glsb));
1536 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1537 }
1538 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1539 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1540 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1541 rgba[ACOMP] = CHAN_MAX;
1542 }
1543 }
1544
1545
1546 static void
1547 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1548 {
1549 const GLuint *cc;
1550 GLubyte r, g, b, a;
1551
1552 cc = (const GLuint *)code;
1553 if (CC_SEL(cc, 124) & 1) {
1554 /* lerp == 1 */
1555 GLuint col0[4];
1556
1557 if (t & 16) {
1558 t &= 15;
1559 t = (cc[1] >> (t * 2)) & 3;
1560 /* col 2 */
1561 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1562 col0[GCOMP] = CC_SEL(cc, 99);
1563 col0[RCOMP] = CC_SEL(cc, 104);
1564 col0[ACOMP] = CC_SEL(cc, 119);
1565 } else {
1566 t = (cc[0] >> (t * 2)) & 3;
1567 /* col 0 */
1568 col0[BCOMP] = CC_SEL(cc, 64);
1569 col0[GCOMP] = CC_SEL(cc, 69);
1570 col0[RCOMP] = CC_SEL(cc, 74);
1571 col0[ACOMP] = CC_SEL(cc, 109);
1572 }
1573
1574 if (t == 0) {
1575 b = UP5(col0[BCOMP]);
1576 g = UP5(col0[GCOMP]);
1577 r = UP5(col0[RCOMP]);
1578 a = UP5(col0[ACOMP]);
1579 } else if (t == 3) {
1580 b = UP5(CC_SEL(cc, 79));
1581 g = UP5(CC_SEL(cc, 84));
1582 r = UP5(CC_SEL(cc, 89));
1583 a = UP5(CC_SEL(cc, 114));
1584 } else {
1585 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1586 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1587 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1588 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1589 }
1590 } else {
1591 /* lerp == 0 */
1592
1593 if (t & 16) {
1594 cc++;
1595 t &= 15;
1596 }
1597 t = (cc[0] >> (t * 2)) & 3;
1598
1599 if (t == 3) {
1600 /* zero */
1601 r = g = b = a = 0;
1602 } else {
1603 GLuint kk;
1604 cc = (const GLuint *)code;
1605 a = UP5(cc[3] >> (t * 5 + 13));
1606 t *= 15;
1607 cc = (const GLuint *)(code + 8 + t / 8);
1608 kk = cc[0] >> (t & 7);
1609 b = UP5(kk);
1610 g = UP5(kk >> 5);
1611 r = UP5(kk >> 10);
1612 }
1613 }
1614 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1615 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1616 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1617 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1618 }
1619
1620
1621 void
1622 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1623 GLint i, GLint j, GLchan *rgba)
1624 {
1625 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1626 fxt1_decode_1HI, /* cc-high = "00?" */
1627 fxt1_decode_1HI, /* cc-high = "00?" */
1628 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1629 fxt1_decode_1ALPHA, /* alpha = "011" */
1630 fxt1_decode_1MIXED, /* mixed = "1??" */
1631 fxt1_decode_1MIXED, /* mixed = "1??" */
1632 fxt1_decode_1MIXED, /* mixed = "1??" */
1633 fxt1_decode_1MIXED /* mixed = "1??" */
1634 };
1635
1636 const GLubyte *code = (const GLubyte *)texture +
1637 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1638 GLint mode = CC_SEL(code, 125);
1639 GLint t = i & 7;
1640
1641 if (t & 4) {
1642 t += 12;
1643 }
1644 t += (j & 3) * 4;
1645
1646 decode_1[mode](code, t, rgba);
1647 }
1648
1649
1650 #endif /* FEATURE_texture_fxt1 */