mesa: fix image unpacking when storing compressed textures
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42 #include "swrast/s_context.h"
43
44
45 #if FEATURE_texture_fxt1
46
47
48 static void
49 fxt1_encode (GLuint width, GLuint height, GLint comps,
50 const void *source, GLint srcRowStride,
51 void *dest, GLint destRowStride);
52
53 void
54 fxt1_decode_1 (const void *texture, GLint stride,
55 GLint i, GLint j, GLubyte *rgba);
56
57
58 /**
59 * Store user's image in rgb_fxt1 format.
60 */
61 GLboolean
62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
63 {
64 const GLubyte *pixels;
65 GLint srcRowStride;
66 GLubyte *dst;
67 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
68 const GLubyte *tempImage = NULL;
69
70 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
71 ASSERT(dstXoffset % 8 == 0);
72 ASSERT(dstYoffset % 4 == 0);
73 ASSERT(dstZoffset == 0);
74 (void) dstZoffset;
75
76 if (srcFormat != GL_RGB ||
77 srcType != GL_UNSIGNED_BYTE ||
78 ctx->_ImageTransferState ||
79 srcPacking->RowLength != srcWidth ||
80 srcPacking->SwapBytes) {
81 /* convert image to RGB/GLubyte */
82 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
83 baseInternalFormat,
84 _mesa_get_format_base_format(dstFormat),
85 srcWidth, srcHeight, srcDepth,
86 srcFormat, srcType, srcAddr,
87 srcPacking);
88 if (!tempImage)
89 return GL_FALSE; /* out of memory */
90 pixels = tempImage;
91 srcRowStride = 3 * srcWidth;
92 srcFormat = GL_RGB;
93 }
94 else {
95 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
96 srcFormat, srcType, 0, 0);
97
98 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
99 srcType) / sizeof(GLubyte);
100 }
101
102 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
103 dstFormat,
104 texWidth, dstSlices[0]);
105
106 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
107 dst, dstRowStride);
108
109 if (tempImage)
110 free((void*) tempImage);
111
112 return GL_TRUE;
113 }
114
115
116 /**
117 * Store user's image in rgba_fxt1 format.
118 */
119 GLboolean
120 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
121 {
122 const GLubyte *pixels;
123 GLint srcRowStride;
124 GLubyte *dst;
125 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
126 const GLubyte *tempImage = NULL;
127
128 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
129 ASSERT(dstXoffset % 8 == 0);
130 ASSERT(dstYoffset % 4 == 0);
131 ASSERT(dstZoffset == 0);
132 (void) dstZoffset;
133
134 if (srcFormat != GL_RGBA ||
135 srcType != GL_UNSIGNED_BYTE ||
136 ctx->_ImageTransferState ||
137 srcPacking->SwapBytes) {
138 /* convert image to RGBA/GLubyte */
139 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
140 baseInternalFormat,
141 _mesa_get_format_base_format(dstFormat),
142 srcWidth, srcHeight, srcDepth,
143 srcFormat, srcType, srcAddr,
144 srcPacking);
145 if (!tempImage)
146 return GL_FALSE; /* out of memory */
147 pixels = tempImage;
148 srcRowStride = 4 * srcWidth;
149 srcFormat = GL_RGBA;
150 }
151 else {
152 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
153 srcFormat, srcType, 0, 0);
154
155 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
156 srcType) / sizeof(GLubyte);
157 }
158
159 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
160 dstFormat,
161 texWidth, dstSlices[0]);
162
163 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
164 dst, dstRowStride);
165
166 if (tempImage)
167 free((void*) tempImage);
168
169 return GL_TRUE;
170 }
171
172
173 void
174 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
175 GLint i, GLint j, GLint k, GLfloat *texel )
176 {
177 /* just sample as GLubyte and convert to float here */
178 GLubyte rgba[4];
179 (void) k;
180 fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
181 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
182 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
183 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
184 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
185 }
186
187
188 void
189 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
190 GLint i, GLint j, GLint k, GLfloat *texel )
191 {
192 /* just sample as GLubyte and convert to float here */
193 GLubyte rgba[4];
194 (void) k;
195 fxt1_decode_1(texImage->Base.Data, texImage->Base.RowStride, i, j, rgba);
196 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
197 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
198 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
199 texel[ACOMP] = 1.0F;
200 }
201
202
203
204 /***************************************************************************\
205 * FXT1 encoder
206 *
207 * The encoder was built by reversing the decoder,
208 * and is vaguely based on Texus2 by 3dfx. Note that this code
209 * is merely a proof of concept, since it is highly UNoptimized;
210 * moreover, it is sub-optimal due to initial conditions passed
211 * to Lloyd's algorithm (the interpolation modes are even worse).
212 \***************************************************************************/
213
214
215 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
216 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
217 #define N_TEXELS 32 /* number of texels in a block (always 32) */
218 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
219 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
220 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
221 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
222 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
223
224
225 /*
226 * Define a 64-bit unsigned integer type and macros
227 */
228 #if 1
229
230 #define FX64_NATIVE 1
231
232 typedef uint64_t Fx64;
233
234 #define FX64_MOV32(a, b) a = b
235 #define FX64_OR32(a, b) a |= b
236 #define FX64_SHL(a, c) a <<= c
237
238 #else
239
240 #define FX64_NATIVE 0
241
242 typedef struct {
243 GLuint lo, hi;
244 } Fx64;
245
246 #define FX64_MOV32(a, b) a.lo = b
247 #define FX64_OR32(a, b) a.lo |= b
248
249 #define FX64_SHL(a, c) \
250 do { \
251 if ((c) >= 32) { \
252 a.hi = a.lo << ((c) - 32); \
253 a.lo = 0; \
254 } else { \
255 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
256 a.lo <<= (c); \
257 } \
258 } while (0)
259
260 #endif
261
262
263 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
264 #define SAFECDOT 1 /* for paranoids */
265
266 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
267 do { \
268 /* compute interpolation vector */ \
269 GLfloat d2 = 0.0F; \
270 GLfloat rd2; \
271 \
272 for (i = 0; i < NC; i++) { \
273 IV[i] = (V1[i] - V0[i]) * F(i); \
274 d2 += IV[i] * IV[i]; \
275 } \
276 rd2 = (GLfloat)NV / d2; \
277 B = 0; \
278 for (i = 0; i < NC; i++) { \
279 IV[i] *= F(i); \
280 B -= IV[i] * V0[i]; \
281 IV[i] *= rd2; \
282 } \
283 B = B * rd2 + 0.5f; \
284 } while (0)
285
286 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
287 do { \
288 GLfloat dot = 0.0F; \
289 for (i = 0; i < NC; i++) { \
290 dot += V[i] * IV[i]; \
291 } \
292 TEXEL = (GLint)(dot + B); \
293 if (SAFECDOT) { \
294 if (TEXEL < 0) { \
295 TEXEL = 0; \
296 } else if (TEXEL > NV) { \
297 TEXEL = NV; \
298 } \
299 } \
300 } while (0)
301
302
303 static GLint
304 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
305 GLubyte input[MAX_COMP], GLint nc)
306 {
307 GLint i, j, best = -1;
308 GLfloat err = 1e9; /* big enough */
309
310 for (j = 0; j < nv; j++) {
311 GLfloat e = 0.0F;
312 for (i = 0; i < nc; i++) {
313 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
314 }
315 if (e < err) {
316 err = e;
317 best = j;
318 }
319 }
320
321 return best;
322 }
323
324
325 static GLint
326 fxt1_worst (GLfloat vec[MAX_COMP],
327 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
328 {
329 GLint i, k, worst = -1;
330 GLfloat err = -1.0F; /* small enough */
331
332 for (k = 0; k < n; k++) {
333 GLfloat e = 0.0F;
334 for (i = 0; i < nc; i++) {
335 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
336 }
337 if (e > err) {
338 err = e;
339 worst = k;
340 }
341 }
342
343 return worst;
344 }
345
346
347 static GLint
348 fxt1_variance (GLdouble variance[MAX_COMP],
349 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
350 {
351 GLint i, k, best = 0;
352 GLint sx, sx2;
353 GLdouble var, maxvar = -1; /* small enough */
354 GLdouble teenth = 1.0 / n;
355
356 for (i = 0; i < nc; i++) {
357 sx = sx2 = 0;
358 for (k = 0; k < n; k++) {
359 GLint t = input[k][i];
360 sx += t;
361 sx2 += t * t;
362 }
363 var = sx2 * teenth - sx * sx * teenth * teenth;
364 if (maxvar < var) {
365 maxvar = var;
366 best = i;
367 }
368 if (variance) {
369 variance[i] = var;
370 }
371 }
372
373 return best;
374 }
375
376
377 static GLint
378 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
379 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
380 {
381 #if 0
382 /* Choose colors from a grid.
383 */
384 GLint i, j;
385
386 for (j = 0; j < nv; j++) {
387 GLint m = j * (n - 1) / (nv - 1);
388 for (i = 0; i < nc; i++) {
389 vec[j][i] = input[m][i];
390 }
391 }
392 #else
393 /* Our solution here is to find the darkest and brightest colors in
394 * the 8x4 tile and use those as the two representative colors.
395 * There are probably better algorithms to use (histogram-based).
396 */
397 GLint i, j, k;
398 GLint minSum = 2000; /* big enough */
399 GLint maxSum = -1; /* small enough */
400 GLint minCol = 0; /* phoudoin: silent compiler! */
401 GLint maxCol = 0; /* phoudoin: silent compiler! */
402
403 struct {
404 GLint flag;
405 GLint key;
406 GLint freq;
407 GLint idx;
408 } hist[N_TEXELS];
409 GLint lenh = 0;
410
411 memset(hist, 0, sizeof(hist));
412
413 for (k = 0; k < n; k++) {
414 GLint l;
415 GLint key = 0;
416 GLint sum = 0;
417 for (i = 0; i < nc; i++) {
418 key <<= 8;
419 key |= input[k][i];
420 sum += input[k][i];
421 }
422 for (l = 0; l < n; l++) {
423 if (!hist[l].flag) {
424 /* alloc new slot */
425 hist[l].flag = !0;
426 hist[l].key = key;
427 hist[l].freq = 1;
428 hist[l].idx = k;
429 lenh = l + 1;
430 break;
431 } else if (hist[l].key == key) {
432 hist[l].freq++;
433 break;
434 }
435 }
436 if (minSum > sum) {
437 minSum = sum;
438 minCol = k;
439 }
440 if (maxSum < sum) {
441 maxSum = sum;
442 maxCol = k;
443 }
444 }
445
446 if (lenh <= nv) {
447 for (j = 0; j < lenh; j++) {
448 for (i = 0; i < nc; i++) {
449 vec[j][i] = (GLfloat)input[hist[j].idx][i];
450 }
451 }
452 for (; j < nv; j++) {
453 for (i = 0; i < nc; i++) {
454 vec[j][i] = vec[0][i];
455 }
456 }
457 return 0;
458 }
459
460 for (j = 0; j < nv; j++) {
461 for (i = 0; i < nc; i++) {
462 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
463 }
464 }
465 #endif
466
467 return !0;
468 }
469
470
471 static GLint
472 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
473 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
474 {
475 /* Use the generalized lloyd's algorithm for VQ:
476 * find 4 color vectors.
477 *
478 * for each sample color
479 * sort to nearest vector.
480 *
481 * replace each vector with the centroid of its matching colors.
482 *
483 * repeat until RMS doesn't improve.
484 *
485 * if a color vector has no samples, or becomes the same as another
486 * vector, replace it with the color which is farthest from a sample.
487 *
488 * vec[][MAX_COMP] initial vectors and resulting colors
489 * nv number of resulting colors required
490 * input[N_TEXELS][MAX_COMP] input texels
491 * nc number of components in input / vec
492 * n number of input samples
493 */
494
495 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
496 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
497 GLfloat error, lasterror = 1e9;
498
499 GLint i, j, k, rep;
500
501 /* the quantizer */
502 for (rep = 0; rep < LL_N_REP; rep++) {
503 /* reset sums & counters */
504 for (j = 0; j < nv; j++) {
505 for (i = 0; i < nc; i++) {
506 sum[j][i] = 0;
507 }
508 cnt[j] = 0;
509 }
510 error = 0;
511
512 /* scan whole block */
513 for (k = 0; k < n; k++) {
514 #if 1
515 GLint best = -1;
516 GLfloat err = 1e9; /* big enough */
517 /* determine best vector */
518 for (j = 0; j < nv; j++) {
519 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
520 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
521 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
522 if (nc == 4) {
523 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
524 }
525 if (e < err) {
526 err = e;
527 best = j;
528 }
529 }
530 #else
531 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
532 #endif
533 assert(best >= 0);
534 /* add in closest color */
535 for (i = 0; i < nc; i++) {
536 sum[best][i] += input[k][i];
537 }
538 /* mark this vector as used */
539 cnt[best]++;
540 /* accumulate error */
541 error += err;
542 }
543
544 /* check RMS */
545 if ((error < LL_RMS_E) ||
546 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
547 return !0; /* good match */
548 }
549 lasterror = error;
550
551 /* move each vector to the barycenter of its closest colors */
552 for (j = 0; j < nv; j++) {
553 if (cnt[j]) {
554 GLfloat div = 1.0F / cnt[j];
555 for (i = 0; i < nc; i++) {
556 vec[j][i] = div * sum[j][i];
557 }
558 } else {
559 /* this vec has no samples or is identical with a previous vec */
560 GLint worst = fxt1_worst(vec[j], input, nc, n);
561 for (i = 0; i < nc; i++) {
562 vec[j][i] = input[worst][i];
563 }
564 }
565 }
566 }
567
568 return 0; /* could not converge fast enough */
569 }
570
571
572 static void
573 fxt1_quantize_CHROMA (GLuint *cc,
574 GLubyte input[N_TEXELS][MAX_COMP])
575 {
576 const GLint n_vect = 4; /* 4 base vectors to find */
577 const GLint n_comp = 3; /* 3 components: R, G, B */
578 GLfloat vec[MAX_VECT][MAX_COMP];
579 GLint i, j, k;
580 Fx64 hi; /* high quadword */
581 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
582
583 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
584 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
585 }
586
587 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
588 for (j = n_vect - 1; j >= 0; j--) {
589 for (i = 0; i < n_comp; i++) {
590 /* add in colors */
591 FX64_SHL(hi, 5);
592 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
593 }
594 }
595 ((Fx64 *)cc)[1] = hi;
596
597 lohi = lolo = 0;
598 /* right microtile */
599 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
600 lohi <<= 2;
601 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
602 }
603 /* left microtile */
604 for (; k >= 0; k--) {
605 lolo <<= 2;
606 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
607 }
608 cc[1] = lohi;
609 cc[0] = lolo;
610 }
611
612
613 static void
614 fxt1_quantize_ALPHA0 (GLuint *cc,
615 GLubyte input[N_TEXELS][MAX_COMP],
616 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
617 {
618 const GLint n_vect = 3; /* 3 base vectors to find */
619 const GLint n_comp = 4; /* 4 components: R, G, B, A */
620 GLfloat vec[MAX_VECT][MAX_COMP];
621 GLint i, j, k;
622 Fx64 hi; /* high quadword */
623 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
624
625 /* the last vector indicates zero */
626 for (i = 0; i < n_comp; i++) {
627 vec[n_vect][i] = 0;
628 }
629
630 /* the first n texels in reord are guaranteed to be non-zero */
631 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
632 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
633 }
634
635 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
636 for (j = n_vect - 1; j >= 0; j--) {
637 /* add in alphas */
638 FX64_SHL(hi, 5);
639 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
640 }
641 for (j = n_vect - 1; j >= 0; j--) {
642 for (i = 0; i < n_comp - 1; i++) {
643 /* add in colors */
644 FX64_SHL(hi, 5);
645 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
646 }
647 }
648 ((Fx64 *)cc)[1] = hi;
649
650 lohi = lolo = 0;
651 /* right microtile */
652 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
653 lohi <<= 2;
654 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
655 }
656 /* left microtile */
657 for (; k >= 0; k--) {
658 lolo <<= 2;
659 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
660 }
661 cc[1] = lohi;
662 cc[0] = lolo;
663 }
664
665
666 static void
667 fxt1_quantize_ALPHA1 (GLuint *cc,
668 GLubyte input[N_TEXELS][MAX_COMP])
669 {
670 const GLint n_vect = 3; /* highest vector number in each microtile */
671 const GLint n_comp = 4; /* 4 components: R, G, B, A */
672 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
673 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
674 GLint i, j, k;
675 Fx64 hi; /* high quadword */
676 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
677
678 GLint minSum;
679 GLint maxSum;
680 GLint minColL = 0, maxColL = 0;
681 GLint minColR = 0, maxColR = 0;
682 GLint sumL = 0, sumR = 0;
683 GLint nn_comp;
684 /* Our solution here is to find the darkest and brightest colors in
685 * the 4x4 tile and use those as the two representative colors.
686 * There are probably better algorithms to use (histogram-based).
687 */
688 nn_comp = n_comp;
689 while ((minColL == maxColL) && nn_comp) {
690 minSum = 2000; /* big enough */
691 maxSum = -1; /* small enough */
692 for (k = 0; k < N_TEXELS / 2; k++) {
693 GLint sum = 0;
694 for (i = 0; i < nn_comp; i++) {
695 sum += input[k][i];
696 }
697 if (minSum > sum) {
698 minSum = sum;
699 minColL = k;
700 }
701 if (maxSum < sum) {
702 maxSum = sum;
703 maxColL = k;
704 }
705 sumL += sum;
706 }
707
708 nn_comp--;
709 }
710
711 nn_comp = n_comp;
712 while ((minColR == maxColR) && nn_comp) {
713 minSum = 2000; /* big enough */
714 maxSum = -1; /* small enough */
715 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
716 GLint sum = 0;
717 for (i = 0; i < nn_comp; i++) {
718 sum += input[k][i];
719 }
720 if (minSum > sum) {
721 minSum = sum;
722 minColR = k;
723 }
724 if (maxSum < sum) {
725 maxSum = sum;
726 maxColR = k;
727 }
728 sumR += sum;
729 }
730
731 nn_comp--;
732 }
733
734 /* choose the common vector (yuck!) */
735 {
736 GLint j1, j2;
737 GLint v1 = 0, v2 = 0;
738 GLfloat err = 1e9; /* big enough */
739 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
740 for (i = 0; i < n_comp; i++) {
741 tv[0][i] = input[minColL][i];
742 tv[1][i] = input[maxColL][i];
743 tv[2][i] = input[minColR][i];
744 tv[3][i] = input[maxColR][i];
745 }
746 for (j1 = 0; j1 < 2; j1++) {
747 for (j2 = 2; j2 < 4; j2++) {
748 GLfloat e = 0.0F;
749 for (i = 0; i < n_comp; i++) {
750 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
751 }
752 if (e < err) {
753 err = e;
754 v1 = j1;
755 v2 = j2;
756 }
757 }
758 }
759 for (i = 0; i < n_comp; i++) {
760 vec[0][i] = tv[1 - v1][i];
761 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
762 vec[2][i] = tv[5 - v2][i];
763 }
764 }
765
766 /* left microtile */
767 cc[0] = 0;
768 if (minColL != maxColL) {
769 /* compute interpolation vector */
770 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
771
772 /* add in texels */
773 lolo = 0;
774 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
775 GLint texel;
776 /* interpolate color */
777 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
778 /* add in texel */
779 lolo <<= 2;
780 lolo |= texel;
781 }
782
783 cc[0] = lolo;
784 }
785
786 /* right microtile */
787 cc[1] = 0;
788 if (minColR != maxColR) {
789 /* compute interpolation vector */
790 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
791
792 /* add in texels */
793 lohi = 0;
794 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
795 GLint texel;
796 /* interpolate color */
797 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
798 /* add in texel */
799 lohi <<= 2;
800 lohi |= texel;
801 }
802
803 cc[1] = lohi;
804 }
805
806 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
807 for (j = n_vect - 1; j >= 0; j--) {
808 /* add in alphas */
809 FX64_SHL(hi, 5);
810 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
811 }
812 for (j = n_vect - 1; j >= 0; j--) {
813 for (i = 0; i < n_comp - 1; i++) {
814 /* add in colors */
815 FX64_SHL(hi, 5);
816 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
817 }
818 }
819 ((Fx64 *)cc)[1] = hi;
820 }
821
822
823 static void
824 fxt1_quantize_HI (GLuint *cc,
825 GLubyte input[N_TEXELS][MAX_COMP],
826 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
827 {
828 const GLint n_vect = 6; /* highest vector number */
829 const GLint n_comp = 3; /* 3 components: R, G, B */
830 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
831 GLfloat iv[MAX_COMP]; /* interpolation vector */
832 GLint i, k;
833 GLuint hihi; /* high quadword: hi dword */
834
835 GLint minSum = 2000; /* big enough */
836 GLint maxSum = -1; /* small enough */
837 GLint minCol = 0; /* phoudoin: silent compiler! */
838 GLint maxCol = 0; /* phoudoin: silent compiler! */
839
840 /* Our solution here is to find the darkest and brightest colors in
841 * the 8x4 tile and use those as the two representative colors.
842 * There are probably better algorithms to use (histogram-based).
843 */
844 for (k = 0; k < n; k++) {
845 GLint sum = 0;
846 for (i = 0; i < n_comp; i++) {
847 sum += reord[k][i];
848 }
849 if (minSum > sum) {
850 minSum = sum;
851 minCol = k;
852 }
853 if (maxSum < sum) {
854 maxSum = sum;
855 maxCol = k;
856 }
857 }
858
859 hihi = 0; /* cc-hi = "00" */
860 for (i = 0; i < n_comp; i++) {
861 /* add in colors */
862 hihi <<= 5;
863 hihi |= reord[maxCol][i] >> 3;
864 }
865 for (i = 0; i < n_comp; i++) {
866 /* add in colors */
867 hihi <<= 5;
868 hihi |= reord[minCol][i] >> 3;
869 }
870 cc[3] = hihi;
871 cc[0] = cc[1] = cc[2] = 0;
872
873 /* compute interpolation vector */
874 if (minCol != maxCol) {
875 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
876 }
877
878 /* add in texels */
879 for (k = N_TEXELS - 1; k >= 0; k--) {
880 GLint t = k * 3;
881 GLuint *kk = (GLuint *)((char *)cc + t / 8);
882 GLint texel = n_vect + 1; /* transparent black */
883
884 if (!ISTBLACK(input[k])) {
885 if (minCol != maxCol) {
886 /* interpolate color */
887 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
888 /* add in texel */
889 kk[0] |= texel << (t & 7);
890 }
891 } else {
892 /* add in texel */
893 kk[0] |= texel << (t & 7);
894 }
895 }
896 }
897
898
899 static void
900 fxt1_quantize_MIXED1 (GLuint *cc,
901 GLubyte input[N_TEXELS][MAX_COMP])
902 {
903 const GLint n_vect = 2; /* highest vector number in each microtile */
904 const GLint n_comp = 3; /* 3 components: R, G, B */
905 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
906 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
907 GLint i, j, k;
908 Fx64 hi; /* high quadword */
909 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
910
911 GLint minSum;
912 GLint maxSum;
913 GLint minColL = 0, maxColL = -1;
914 GLint minColR = 0, maxColR = -1;
915
916 /* Our solution here is to find the darkest and brightest colors in
917 * the 4x4 tile and use those as the two representative colors.
918 * There are probably better algorithms to use (histogram-based).
919 */
920 minSum = 2000; /* big enough */
921 maxSum = -1; /* small enough */
922 for (k = 0; k < N_TEXELS / 2; k++) {
923 if (!ISTBLACK(input[k])) {
924 GLint sum = 0;
925 for (i = 0; i < n_comp; i++) {
926 sum += input[k][i];
927 }
928 if (minSum > sum) {
929 minSum = sum;
930 minColL = k;
931 }
932 if (maxSum < sum) {
933 maxSum = sum;
934 maxColL = k;
935 }
936 }
937 }
938 minSum = 2000; /* big enough */
939 maxSum = -1; /* small enough */
940 for (; k < N_TEXELS; k++) {
941 if (!ISTBLACK(input[k])) {
942 GLint sum = 0;
943 for (i = 0; i < n_comp; i++) {
944 sum += input[k][i];
945 }
946 if (minSum > sum) {
947 minSum = sum;
948 minColR = k;
949 }
950 if (maxSum < sum) {
951 maxSum = sum;
952 maxColR = k;
953 }
954 }
955 }
956
957 /* left microtile */
958 if (maxColL == -1) {
959 /* all transparent black */
960 cc[0] = ~0u;
961 for (i = 0; i < n_comp; i++) {
962 vec[0][i] = 0;
963 vec[1][i] = 0;
964 }
965 } else {
966 cc[0] = 0;
967 for (i = 0; i < n_comp; i++) {
968 vec[0][i] = input[minColL][i];
969 vec[1][i] = input[maxColL][i];
970 }
971 if (minColL != maxColL) {
972 /* compute interpolation vector */
973 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
974
975 /* add in texels */
976 lolo = 0;
977 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
978 GLint texel = n_vect + 1; /* transparent black */
979 if (!ISTBLACK(input[k])) {
980 /* interpolate color */
981 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
982 }
983 /* add in texel */
984 lolo <<= 2;
985 lolo |= texel;
986 }
987 cc[0] = lolo;
988 }
989 }
990
991 /* right microtile */
992 if (maxColR == -1) {
993 /* all transparent black */
994 cc[1] = ~0u;
995 for (i = 0; i < n_comp; i++) {
996 vec[2][i] = 0;
997 vec[3][i] = 0;
998 }
999 } else {
1000 cc[1] = 0;
1001 for (i = 0; i < n_comp; i++) {
1002 vec[2][i] = input[minColR][i];
1003 vec[3][i] = input[maxColR][i];
1004 }
1005 if (minColR != maxColR) {
1006 /* compute interpolation vector */
1007 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1008
1009 /* add in texels */
1010 lohi = 0;
1011 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1012 GLint texel = n_vect + 1; /* transparent black */
1013 if (!ISTBLACK(input[k])) {
1014 /* interpolate color */
1015 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1016 }
1017 /* add in texel */
1018 lohi <<= 2;
1019 lohi |= texel;
1020 }
1021 cc[1] = lohi;
1022 }
1023 }
1024
1025 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1026 for (j = 2 * 2 - 1; j >= 0; j--) {
1027 for (i = 0; i < n_comp; i++) {
1028 /* add in colors */
1029 FX64_SHL(hi, 5);
1030 FX64_OR32(hi, vec[j][i] >> 3);
1031 }
1032 }
1033 ((Fx64 *)cc)[1] = hi;
1034 }
1035
1036
1037 static void
1038 fxt1_quantize_MIXED0 (GLuint *cc,
1039 GLubyte input[N_TEXELS][MAX_COMP])
1040 {
1041 const GLint n_vect = 3; /* highest vector number in each microtile */
1042 const GLint n_comp = 3; /* 3 components: R, G, B */
1043 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1044 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1045 GLint i, j, k;
1046 Fx64 hi; /* high quadword */
1047 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1048
1049 GLint minColL = 0, maxColL = 0;
1050 GLint minColR = 0, maxColR = 0;
1051 #if 0
1052 GLint minSum;
1053 GLint maxSum;
1054
1055 /* Our solution here is to find the darkest and brightest colors in
1056 * the 4x4 tile and use those as the two representative colors.
1057 * There are probably better algorithms to use (histogram-based).
1058 */
1059 minSum = 2000; /* big enough */
1060 maxSum = -1; /* small enough */
1061 for (k = 0; k < N_TEXELS / 2; k++) {
1062 GLint sum = 0;
1063 for (i = 0; i < n_comp; i++) {
1064 sum += input[k][i];
1065 }
1066 if (minSum > sum) {
1067 minSum = sum;
1068 minColL = k;
1069 }
1070 if (maxSum < sum) {
1071 maxSum = sum;
1072 maxColL = k;
1073 }
1074 }
1075 minSum = 2000; /* big enough */
1076 maxSum = -1; /* small enough */
1077 for (; k < N_TEXELS; k++) {
1078 GLint sum = 0;
1079 for (i = 0; i < n_comp; i++) {
1080 sum += input[k][i];
1081 }
1082 if (minSum > sum) {
1083 minSum = sum;
1084 minColR = k;
1085 }
1086 if (maxSum < sum) {
1087 maxSum = sum;
1088 maxColR = k;
1089 }
1090 }
1091 #else
1092 GLint minVal;
1093 GLint maxVal;
1094 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1095 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1096
1097 /* Scan the channel with max variance for lo & hi
1098 * and use those as the two representative colors.
1099 */
1100 minVal = 2000; /* big enough */
1101 maxVal = -1; /* small enough */
1102 for (k = 0; k < N_TEXELS / 2; k++) {
1103 GLint t = input[k][maxVarL];
1104 if (minVal > t) {
1105 minVal = t;
1106 minColL = k;
1107 }
1108 if (maxVal < t) {
1109 maxVal = t;
1110 maxColL = k;
1111 }
1112 }
1113 minVal = 2000; /* big enough */
1114 maxVal = -1; /* small enough */
1115 for (; k < N_TEXELS; k++) {
1116 GLint t = input[k][maxVarR];
1117 if (minVal > t) {
1118 minVal = t;
1119 minColR = k;
1120 }
1121 if (maxVal < t) {
1122 maxVal = t;
1123 maxColR = k;
1124 }
1125 }
1126 #endif
1127
1128 /* left microtile */
1129 cc[0] = 0;
1130 for (i = 0; i < n_comp; i++) {
1131 vec[0][i] = input[minColL][i];
1132 vec[1][i] = input[maxColL][i];
1133 }
1134 if (minColL != maxColL) {
1135 /* compute interpolation vector */
1136 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1137
1138 /* add in texels */
1139 lolo = 0;
1140 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1141 GLint texel;
1142 /* interpolate color */
1143 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1144 /* add in texel */
1145 lolo <<= 2;
1146 lolo |= texel;
1147 }
1148
1149 /* funky encoding for LSB of green */
1150 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1151 for (i = 0; i < n_comp; i++) {
1152 vec[1][i] = input[minColL][i];
1153 vec[0][i] = input[maxColL][i];
1154 }
1155 lolo = ~lolo;
1156 }
1157
1158 cc[0] = lolo;
1159 }
1160
1161 /* right microtile */
1162 cc[1] = 0;
1163 for (i = 0; i < n_comp; i++) {
1164 vec[2][i] = input[minColR][i];
1165 vec[3][i] = input[maxColR][i];
1166 }
1167 if (minColR != maxColR) {
1168 /* compute interpolation vector */
1169 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1170
1171 /* add in texels */
1172 lohi = 0;
1173 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1174 GLint texel;
1175 /* interpolate color */
1176 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1177 /* add in texel */
1178 lohi <<= 2;
1179 lohi |= texel;
1180 }
1181
1182 /* funky encoding for LSB of green */
1183 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1184 for (i = 0; i < n_comp; i++) {
1185 vec[3][i] = input[minColR][i];
1186 vec[2][i] = input[maxColR][i];
1187 }
1188 lohi = ~lohi;
1189 }
1190
1191 cc[1] = lohi;
1192 }
1193
1194 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1195 for (j = 2 * 2 - 1; j >= 0; j--) {
1196 for (i = 0; i < n_comp; i++) {
1197 /* add in colors */
1198 FX64_SHL(hi, 5);
1199 FX64_OR32(hi, vec[j][i] >> 3);
1200 }
1201 }
1202 ((Fx64 *)cc)[1] = hi;
1203 }
1204
1205
1206 static void
1207 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1208 {
1209 GLint trualpha;
1210 GLubyte reord[N_TEXELS][MAX_COMP];
1211
1212 GLubyte input[N_TEXELS][MAX_COMP];
1213 GLint i, k, l;
1214
1215 if (comps == 3) {
1216 /* make the whole block opaque */
1217 memset(input, -1, sizeof(input));
1218 }
1219
1220 /* 8 texels each line */
1221 for (l = 0; l < 4; l++) {
1222 for (k = 0; k < 4; k++) {
1223 for (i = 0; i < comps; i++) {
1224 input[k + l * 4][i] = *lines[l]++;
1225 }
1226 }
1227 for (; k < 8; k++) {
1228 for (i = 0; i < comps; i++) {
1229 input[k + l * 4 + 12][i] = *lines[l]++;
1230 }
1231 }
1232 }
1233
1234 /* block layout:
1235 * 00, 01, 02, 03, 08, 09, 0a, 0b
1236 * 10, 11, 12, 13, 18, 19, 1a, 1b
1237 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1238 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1239 */
1240
1241 /* [dBorca]
1242 * stupidity flows forth from this
1243 */
1244 l = N_TEXELS;
1245 trualpha = 0;
1246 if (comps == 4) {
1247 /* skip all transparent black texels */
1248 l = 0;
1249 for (k = 0; k < N_TEXELS; k++) {
1250 /* test all components against 0 */
1251 if (!ISTBLACK(input[k])) {
1252 /* texel is not transparent black */
1253 COPY_4UBV(reord[l], input[k]);
1254 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1255 /* non-opaque texel */
1256 trualpha = !0;
1257 }
1258 l++;
1259 }
1260 }
1261 }
1262
1263 #if 0
1264 if (trualpha) {
1265 fxt1_quantize_ALPHA0(cc, input, reord, l);
1266 } else if (l == 0) {
1267 cc[0] = cc[1] = cc[2] = -1;
1268 cc[3] = 0;
1269 } else if (l < N_TEXELS) {
1270 fxt1_quantize_HI(cc, input, reord, l);
1271 } else {
1272 fxt1_quantize_CHROMA(cc, input);
1273 }
1274 (void)fxt1_quantize_ALPHA1;
1275 (void)fxt1_quantize_MIXED1;
1276 (void)fxt1_quantize_MIXED0;
1277 #else
1278 if (trualpha) {
1279 fxt1_quantize_ALPHA1(cc, input);
1280 } else if (l == 0) {
1281 cc[0] = cc[1] = cc[2] = ~0u;
1282 cc[3] = 0;
1283 } else if (l < N_TEXELS) {
1284 fxt1_quantize_MIXED1(cc, input);
1285 } else {
1286 fxt1_quantize_MIXED0(cc, input);
1287 }
1288 (void)fxt1_quantize_ALPHA0;
1289 (void)fxt1_quantize_HI;
1290 (void)fxt1_quantize_CHROMA;
1291 #endif
1292 }
1293
1294
1295
1296 /**
1297 * Upscale an image by replication, not (typical) stretching.
1298 * We use this when the image width or height is less than a
1299 * certain size (4, 8) and we need to upscale an image.
1300 */
1301 static void
1302 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1303 GLsizei outWidth, GLsizei outHeight,
1304 GLint comps, const GLubyte *src, GLint srcRowStride,
1305 GLubyte *dest )
1306 {
1307 GLint i, j, k;
1308
1309 ASSERT(outWidth >= inWidth);
1310 ASSERT(outHeight >= inHeight);
1311 #if 0
1312 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1313 ASSERT((outWidth & 3) == 0);
1314 ASSERT((outHeight & 3) == 0);
1315 #endif
1316
1317 for (i = 0; i < outHeight; i++) {
1318 const GLint ii = i % inHeight;
1319 for (j = 0; j < outWidth; j++) {
1320 const GLint jj = j % inWidth;
1321 for (k = 0; k < comps; k++) {
1322 dest[(i * outWidth + j) * comps + k]
1323 = src[ii * srcRowStride + jj * comps + k];
1324 }
1325 }
1326 }
1327 }
1328
1329
1330 static void
1331 fxt1_encode (GLuint width, GLuint height, GLint comps,
1332 const void *source, GLint srcRowStride,
1333 void *dest, GLint destRowStride)
1334 {
1335 GLuint x, y;
1336 const GLubyte *data;
1337 GLuint *encoded = (GLuint *)dest;
1338 void *newSource = NULL;
1339
1340 assert(comps == 3 || comps == 4);
1341
1342 /* Replicate image if width is not M8 or height is not M4 */
1343 if ((width & 7) | (height & 3)) {
1344 GLint newWidth = (width + 7) & ~7;
1345 GLint newHeight = (height + 3) & ~3;
1346 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1347 if (!newSource) {
1348 GET_CURRENT_CONTEXT(ctx);
1349 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1350 goto cleanUp;
1351 }
1352 upscale_teximage2d(width, height, newWidth, newHeight,
1353 comps, (const GLubyte *) source,
1354 srcRowStride, (GLubyte *) newSource);
1355 source = newSource;
1356 width = newWidth;
1357 height = newHeight;
1358 srcRowStride = comps * newWidth;
1359 }
1360
1361 data = (const GLubyte *) source;
1362 destRowStride = (destRowStride - width * 2) / 4;
1363 for (y = 0; y < height; y += 4) {
1364 GLuint offs = 0 + (y + 0) * srcRowStride;
1365 for (x = 0; x < width; x += 8) {
1366 const GLubyte *lines[4];
1367 lines[0] = &data[offs];
1368 lines[1] = lines[0] + srcRowStride;
1369 lines[2] = lines[1] + srcRowStride;
1370 lines[3] = lines[2] + srcRowStride;
1371 offs += 8 * comps;
1372 fxt1_quantize(encoded, lines, comps);
1373 /* 128 bits per 8x4 block */
1374 encoded += 4;
1375 }
1376 encoded += destRowStride;
1377 }
1378
1379 cleanUp:
1380 if (newSource != NULL) {
1381 free(newSource);
1382 }
1383 }
1384
1385
1386 /***************************************************************************\
1387 * FXT1 decoder
1388 *
1389 * The decoder is based on GL_3DFX_texture_compression_FXT1
1390 * specification and serves as a concept for the encoder.
1391 \***************************************************************************/
1392
1393
1394 /* lookup table for scaling 5 bit colors up to 8 bits */
1395 static const GLubyte _rgb_scale_5[] = {
1396 0, 8, 16, 25, 33, 41, 49, 58,
1397 66, 74, 82, 90, 99, 107, 115, 123,
1398 132, 140, 148, 156, 165, 173, 181, 189,
1399 197, 206, 214, 222, 230, 239, 247, 255
1400 };
1401
1402 /* lookup table for scaling 6 bit colors up to 8 bits */
1403 static const GLubyte _rgb_scale_6[] = {
1404 0, 4, 8, 12, 16, 20, 24, 28,
1405 32, 36, 40, 45, 49, 53, 57, 61,
1406 65, 69, 73, 77, 81, 85, 89, 93,
1407 97, 101, 105, 109, 113, 117, 121, 125,
1408 130, 134, 138, 142, 146, 150, 154, 158,
1409 162, 166, 170, 174, 178, 182, 186, 190,
1410 194, 198, 202, 206, 210, 215, 219, 223,
1411 227, 231, 235, 239, 243, 247, 251, 255
1412 };
1413
1414
1415 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1416 #define UP5(c) _rgb_scale_5[(c) & 31]
1417 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1418 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1419
1420
1421 static void
1422 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1423 {
1424 const GLuint *cc;
1425
1426 t *= 3;
1427 cc = (const GLuint *)(code + t / 8);
1428 t = (cc[0] >> (t & 7)) & 7;
1429
1430 if (t == 7) {
1431 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1432 } else {
1433 GLubyte r, g, b;
1434 cc = (const GLuint *)(code + 12);
1435 if (t == 0) {
1436 b = UP5(CC_SEL(cc, 0));
1437 g = UP5(CC_SEL(cc, 5));
1438 r = UP5(CC_SEL(cc, 10));
1439 } else if (t == 6) {
1440 b = UP5(CC_SEL(cc, 15));
1441 g = UP5(CC_SEL(cc, 20));
1442 r = UP5(CC_SEL(cc, 25));
1443 } else {
1444 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1445 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1446 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1447 }
1448 rgba[RCOMP] = r;
1449 rgba[GCOMP] = g;
1450 rgba[BCOMP] = b;
1451 rgba[ACOMP] = 255;
1452 }
1453 }
1454
1455
1456 static void
1457 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1458 {
1459 const GLuint *cc;
1460 GLuint kk;
1461
1462 cc = (const GLuint *)code;
1463 if (t & 16) {
1464 cc++;
1465 t &= 15;
1466 }
1467 t = (cc[0] >> (t * 2)) & 3;
1468
1469 t *= 15;
1470 cc = (const GLuint *)(code + 8 + t / 8);
1471 kk = cc[0] >> (t & 7);
1472 rgba[BCOMP] = UP5(kk);
1473 rgba[GCOMP] = UP5(kk >> 5);
1474 rgba[RCOMP] = UP5(kk >> 10);
1475 rgba[ACOMP] = 255;
1476 }
1477
1478
1479 static void
1480 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1481 {
1482 const GLuint *cc;
1483 GLuint col[2][3];
1484 GLint glsb, selb;
1485
1486 cc = (const GLuint *)code;
1487 if (t & 16) {
1488 t &= 15;
1489 t = (cc[1] >> (t * 2)) & 3;
1490 /* col 2 */
1491 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1492 col[0][GCOMP] = CC_SEL(cc, 99);
1493 col[0][RCOMP] = CC_SEL(cc, 104);
1494 /* col 3 */
1495 col[1][BCOMP] = CC_SEL(cc, 109);
1496 col[1][GCOMP] = CC_SEL(cc, 114);
1497 col[1][RCOMP] = CC_SEL(cc, 119);
1498 glsb = CC_SEL(cc, 126);
1499 selb = CC_SEL(cc, 33);
1500 } else {
1501 t = (cc[0] >> (t * 2)) & 3;
1502 /* col 0 */
1503 col[0][BCOMP] = CC_SEL(cc, 64);
1504 col[0][GCOMP] = CC_SEL(cc, 69);
1505 col[0][RCOMP] = CC_SEL(cc, 74);
1506 /* col 1 */
1507 col[1][BCOMP] = CC_SEL(cc, 79);
1508 col[1][GCOMP] = CC_SEL(cc, 84);
1509 col[1][RCOMP] = CC_SEL(cc, 89);
1510 glsb = CC_SEL(cc, 125);
1511 selb = CC_SEL(cc, 1);
1512 }
1513
1514 if (CC_SEL(cc, 124) & 1) {
1515 /* alpha[0] == 1 */
1516
1517 if (t == 3) {
1518 /* zero */
1519 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1520 } else {
1521 GLubyte r, g, b;
1522 if (t == 0) {
1523 b = UP5(col[0][BCOMP]);
1524 g = UP5(col[0][GCOMP]);
1525 r = UP5(col[0][RCOMP]);
1526 } else if (t == 2) {
1527 b = UP5(col[1][BCOMP]);
1528 g = UP6(col[1][GCOMP], glsb);
1529 r = UP5(col[1][RCOMP]);
1530 } else {
1531 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1532 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1533 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1534 }
1535 rgba[RCOMP] = r;
1536 rgba[GCOMP] = g;
1537 rgba[BCOMP] = b;
1538 rgba[ACOMP] = 255;
1539 }
1540 } else {
1541 /* alpha[0] == 0 */
1542 GLubyte r, g, b;
1543 if (t == 0) {
1544 b = UP5(col[0][BCOMP]);
1545 g = UP6(col[0][GCOMP], glsb ^ selb);
1546 r = UP5(col[0][RCOMP]);
1547 } else if (t == 3) {
1548 b = UP5(col[1][BCOMP]);
1549 g = UP6(col[1][GCOMP], glsb);
1550 r = UP5(col[1][RCOMP]);
1551 } else {
1552 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1553 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1554 UP6(col[1][GCOMP], glsb));
1555 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1556 }
1557 rgba[RCOMP] = r;
1558 rgba[GCOMP] = g;
1559 rgba[BCOMP] = b;
1560 rgba[ACOMP] = 255;
1561 }
1562 }
1563
1564
1565 static void
1566 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1567 {
1568 const GLuint *cc;
1569 GLubyte r, g, b, a;
1570
1571 cc = (const GLuint *)code;
1572 if (CC_SEL(cc, 124) & 1) {
1573 /* lerp == 1 */
1574 GLuint col0[4];
1575
1576 if (t & 16) {
1577 t &= 15;
1578 t = (cc[1] >> (t * 2)) & 3;
1579 /* col 2 */
1580 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1581 col0[GCOMP] = CC_SEL(cc, 99);
1582 col0[RCOMP] = CC_SEL(cc, 104);
1583 col0[ACOMP] = CC_SEL(cc, 119);
1584 } else {
1585 t = (cc[0] >> (t * 2)) & 3;
1586 /* col 0 */
1587 col0[BCOMP] = CC_SEL(cc, 64);
1588 col0[GCOMP] = CC_SEL(cc, 69);
1589 col0[RCOMP] = CC_SEL(cc, 74);
1590 col0[ACOMP] = CC_SEL(cc, 109);
1591 }
1592
1593 if (t == 0) {
1594 b = UP5(col0[BCOMP]);
1595 g = UP5(col0[GCOMP]);
1596 r = UP5(col0[RCOMP]);
1597 a = UP5(col0[ACOMP]);
1598 } else if (t == 3) {
1599 b = UP5(CC_SEL(cc, 79));
1600 g = UP5(CC_SEL(cc, 84));
1601 r = UP5(CC_SEL(cc, 89));
1602 a = UP5(CC_SEL(cc, 114));
1603 } else {
1604 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1605 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1606 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1607 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1608 }
1609 } else {
1610 /* lerp == 0 */
1611
1612 if (t & 16) {
1613 cc++;
1614 t &= 15;
1615 }
1616 t = (cc[0] >> (t * 2)) & 3;
1617
1618 if (t == 3) {
1619 /* zero */
1620 r = g = b = a = 0;
1621 } else {
1622 GLuint kk;
1623 cc = (const GLuint *)code;
1624 a = UP5(cc[3] >> (t * 5 + 13));
1625 t *= 15;
1626 cc = (const GLuint *)(code + 8 + t / 8);
1627 kk = cc[0] >> (t & 7);
1628 b = UP5(kk);
1629 g = UP5(kk >> 5);
1630 r = UP5(kk >> 10);
1631 }
1632 }
1633 rgba[RCOMP] = r;
1634 rgba[GCOMP] = g;
1635 rgba[BCOMP] = b;
1636 rgba[ACOMP] = a;
1637 }
1638
1639
1640 void
1641 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1642 GLint i, GLint j, GLubyte *rgba)
1643 {
1644 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1645 fxt1_decode_1HI, /* cc-high = "00?" */
1646 fxt1_decode_1HI, /* cc-high = "00?" */
1647 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1648 fxt1_decode_1ALPHA, /* alpha = "011" */
1649 fxt1_decode_1MIXED, /* mixed = "1??" */
1650 fxt1_decode_1MIXED, /* mixed = "1??" */
1651 fxt1_decode_1MIXED, /* mixed = "1??" */
1652 fxt1_decode_1MIXED /* mixed = "1??" */
1653 };
1654
1655 const GLubyte *code = (const GLubyte *)texture +
1656 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1657 GLint mode = CC_SEL(code, 125);
1658 GLint t = i & 7;
1659
1660 if (t & 4) {
1661 t += 12;
1662 }
1663 t += (j & 3) * 4;
1664
1665 decode_1[mode](code, t, rgba);
1666 }
1667
1668
1669 #endif /* FEATURE_texture_fxt1 */