mesa: Port format_pack/unpack off of _mesa_problem().
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "errors.h"
33 #include "glheader.h"
34 #include "imports.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mipmap.h"
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
40 #include "texstore.h"
41 #include "mtypes.h"
42
43
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46 const void *source, GLint srcRowStride,
47 void *dest, GLint destRowStride);
48
49 static void
50 fxt1_decode_1 (const void *texture, GLint stride,
51 GLint i, GLint j, GLubyte *rgba);
52
53
54 /**
55 * Store user's image in rgb_fxt1 format.
56 */
57 GLboolean
58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
59 {
60 const GLubyte *pixels;
61 GLint srcRowStride;
62 GLubyte *dst;
63 const GLubyte *tempImage = NULL;
64
65 assert(dstFormat == MESA_FORMAT_RGB_FXT1);
66
67 if (srcFormat != GL_RGB ||
68 srcType != GL_UNSIGNED_BYTE ||
69 ctx->_ImageTransferState ||
70 ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
71 srcPacking->SwapBytes) {
72 /* convert image to RGB/GLubyte */
73 GLubyte *tempImageSlices[1];
74 int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
75 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
76 if (!tempImage)
77 return GL_FALSE; /* out of memory */
78 tempImageSlices[0] = (GLubyte *) tempImage;
79 _mesa_texstore(ctx, dims,
80 baseInternalFormat,
81 MESA_FORMAT_RGB_UNORM8,
82 rgbRowStride, tempImageSlices,
83 srcWidth, srcHeight, srcDepth,
84 srcFormat, srcType, srcAddr,
85 srcPacking);
86 pixels = tempImage;
87 srcRowStride = 3 * srcWidth;
88 srcFormat = GL_RGB;
89 }
90 else {
91 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
92 srcFormat, srcType, 0, 0);
93
94 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
95 srcType) / sizeof(GLubyte);
96 }
97
98 dst = dstSlices[0];
99
100 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
101 dst, dstRowStride);
102
103 free((void*) tempImage);
104
105 return GL_TRUE;
106 }
107
108
109 /**
110 * Store user's image in rgba_fxt1 format.
111 */
112 GLboolean
113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114 {
115 const GLubyte *pixels;
116 GLint srcRowStride;
117 GLubyte *dst;
118 const GLubyte *tempImage = NULL;
119
120 assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
121
122 if (srcFormat != GL_RGBA ||
123 srcType != GL_UNSIGNED_BYTE ||
124 ctx->_ImageTransferState ||
125 srcPacking->SwapBytes) {
126 /* convert image to RGBA/GLubyte */
127 GLubyte *tempImageSlices[1];
128 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
129 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
130 if (!tempImage)
131 return GL_FALSE; /* out of memory */
132 tempImageSlices[0] = (GLubyte *) tempImage;
133 _mesa_texstore(ctx, dims,
134 baseInternalFormat,
135 _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
136 : MESA_FORMAT_A8B8G8R8_UNORM,
137 rgbaRowStride, tempImageSlices,
138 srcWidth, srcHeight, srcDepth,
139 srcFormat, srcType, srcAddr,
140 srcPacking);
141 pixels = tempImage;
142 srcRowStride = 4 * srcWidth;
143 srcFormat = GL_RGBA;
144 }
145 else {
146 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
147 srcFormat, srcType, 0, 0);
148
149 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
150 srcType) / sizeof(GLubyte);
151 }
152
153 dst = dstSlices[0];
154
155 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
156 dst, dstRowStride);
157
158 free((void*) tempImage);
159
160 return GL_TRUE;
161 }
162
163
164 /***************************************************************************\
165 * FXT1 encoder
166 *
167 * The encoder was built by reversing the decoder,
168 * and is vaguely based on Texus2 by 3dfx. Note that this code
169 * is merely a proof of concept, since it is highly UNoptimized;
170 * moreover, it is sub-optimal due to initial conditions passed
171 * to Lloyd's algorithm (the interpolation modes are even worse).
172 \***************************************************************************/
173
174
175 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
176 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
177 #define N_TEXELS 32 /* number of texels in a block (always 32) */
178 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
179 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
180 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
181 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
182 static const GLuint zero = 0;
183 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
184
185 /*
186 * Define a 64-bit unsigned integer type and macros
187 */
188 #if 1
189
190 #define FX64_NATIVE 1
191
192 typedef uint64_t Fx64;
193
194 #define FX64_MOV32(a, b) a = b
195 #define FX64_OR32(a, b) a |= b
196 #define FX64_SHL(a, c) a <<= c
197
198 #else
199
200 #define FX64_NATIVE 0
201
202 typedef struct {
203 GLuint lo, hi;
204 } Fx64;
205
206 #define FX64_MOV32(a, b) a.lo = b
207 #define FX64_OR32(a, b) a.lo |= b
208
209 #define FX64_SHL(a, c) \
210 do { \
211 if ((c) >= 32) { \
212 a.hi = a.lo << ((c) - 32); \
213 a.lo = 0; \
214 } else { \
215 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
216 a.lo <<= (c); \
217 } \
218 } while (0)
219
220 #endif
221
222
223 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
224 #define SAFECDOT 1 /* for paranoids */
225
226 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
227 do { \
228 /* compute interpolation vector */ \
229 GLfloat d2 = 0.0F; \
230 GLfloat rd2; \
231 \
232 for (i = 0; i < NC; i++) { \
233 IV[i] = (V1[i] - V0[i]) * F(i); \
234 d2 += IV[i] * IV[i]; \
235 } \
236 rd2 = (GLfloat)NV / d2; \
237 B = 0; \
238 for (i = 0; i < NC; i++) { \
239 IV[i] *= F(i); \
240 B -= IV[i] * V0[i]; \
241 IV[i] *= rd2; \
242 } \
243 B = B * rd2 + 0.5f; \
244 } while (0)
245
246 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
247 do { \
248 GLfloat dot = 0.0F; \
249 for (i = 0; i < NC; i++) { \
250 dot += V[i] * IV[i]; \
251 } \
252 TEXEL = (GLint)(dot + B); \
253 if (SAFECDOT) { \
254 if (TEXEL < 0) { \
255 TEXEL = 0; \
256 } else if (TEXEL > NV) { \
257 TEXEL = NV; \
258 } \
259 } \
260 } while (0)
261
262
263 static GLint
264 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
265 GLubyte input[MAX_COMP], GLint nc)
266 {
267 GLint i, j, best = -1;
268 GLfloat err = 1e9; /* big enough */
269
270 for (j = 0; j < nv; j++) {
271 GLfloat e = 0.0F;
272 for (i = 0; i < nc; i++) {
273 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
274 }
275 if (e < err) {
276 err = e;
277 best = j;
278 }
279 }
280
281 return best;
282 }
283
284
285 static GLint
286 fxt1_worst (GLfloat vec[MAX_COMP],
287 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
288 {
289 GLint i, k, worst = -1;
290 GLfloat err = -1.0F; /* small enough */
291
292 for (k = 0; k < n; k++) {
293 GLfloat e = 0.0F;
294 for (i = 0; i < nc; i++) {
295 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
296 }
297 if (e > err) {
298 err = e;
299 worst = k;
300 }
301 }
302
303 return worst;
304 }
305
306
307 static GLint
308 fxt1_variance (GLdouble variance[MAX_COMP],
309 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
310 {
311 GLint i, k, best = 0;
312 GLint sx, sx2;
313 GLdouble var, maxvar = -1; /* small enough */
314 GLdouble teenth = 1.0 / n;
315
316 for (i = 0; i < nc; i++) {
317 sx = sx2 = 0;
318 for (k = 0; k < n; k++) {
319 GLint t = input[k][i];
320 sx += t;
321 sx2 += t * t;
322 }
323 var = sx2 * teenth - sx * sx * teenth * teenth;
324 if (maxvar < var) {
325 maxvar = var;
326 best = i;
327 }
328 if (variance) {
329 variance[i] = var;
330 }
331 }
332
333 return best;
334 }
335
336
337 static GLint
338 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
339 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
340 {
341 #if 0
342 /* Choose colors from a grid.
343 */
344 GLint i, j;
345
346 for (j = 0; j < nv; j++) {
347 GLint m = j * (n - 1) / (nv - 1);
348 for (i = 0; i < nc; i++) {
349 vec[j][i] = input[m][i];
350 }
351 }
352 #else
353 /* Our solution here is to find the darkest and brightest colors in
354 * the 8x4 tile and use those as the two representative colors.
355 * There are probably better algorithms to use (histogram-based).
356 */
357 GLint i, j, k;
358 GLint minSum = 2000; /* big enough */
359 GLint maxSum = -1; /* small enough */
360 GLint minCol = 0; /* phoudoin: silent compiler! */
361 GLint maxCol = 0; /* phoudoin: silent compiler! */
362
363 struct {
364 GLint flag;
365 GLint key;
366 GLint freq;
367 GLint idx;
368 } hist[N_TEXELS];
369 GLint lenh = 0;
370
371 memset(hist, 0, sizeof(hist));
372
373 for (k = 0; k < n; k++) {
374 GLint l;
375 GLint key = 0;
376 GLint sum = 0;
377 for (i = 0; i < nc; i++) {
378 key <<= 8;
379 key |= input[k][i];
380 sum += input[k][i];
381 }
382 for (l = 0; l < n; l++) {
383 if (!hist[l].flag) {
384 /* alloc new slot */
385 hist[l].flag = !0;
386 hist[l].key = key;
387 hist[l].freq = 1;
388 hist[l].idx = k;
389 lenh = l + 1;
390 break;
391 } else if (hist[l].key == key) {
392 hist[l].freq++;
393 break;
394 }
395 }
396 if (minSum > sum) {
397 minSum = sum;
398 minCol = k;
399 }
400 if (maxSum < sum) {
401 maxSum = sum;
402 maxCol = k;
403 }
404 }
405
406 if (lenh <= nv) {
407 for (j = 0; j < lenh; j++) {
408 for (i = 0; i < nc; i++) {
409 vec[j][i] = (GLfloat)input[hist[j].idx][i];
410 }
411 }
412 for (; j < nv; j++) {
413 for (i = 0; i < nc; i++) {
414 vec[j][i] = vec[0][i];
415 }
416 }
417 return 0;
418 }
419
420 for (j = 0; j < nv; j++) {
421 for (i = 0; i < nc; i++) {
422 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
423 }
424 }
425 #endif
426
427 return !0;
428 }
429
430
431 static GLint
432 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
433 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
434 {
435 /* Use the generalized lloyd's algorithm for VQ:
436 * find 4 color vectors.
437 *
438 * for each sample color
439 * sort to nearest vector.
440 *
441 * replace each vector with the centroid of its matching colors.
442 *
443 * repeat until RMS doesn't improve.
444 *
445 * if a color vector has no samples, or becomes the same as another
446 * vector, replace it with the color which is farthest from a sample.
447 *
448 * vec[][MAX_COMP] initial vectors and resulting colors
449 * nv number of resulting colors required
450 * input[N_TEXELS][MAX_COMP] input texels
451 * nc number of components in input / vec
452 * n number of input samples
453 */
454
455 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
456 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
457 GLfloat error, lasterror = 1e9;
458
459 GLint i, j, k, rep;
460
461 /* the quantizer */
462 for (rep = 0; rep < LL_N_REP; rep++) {
463 /* reset sums & counters */
464 for (j = 0; j < nv; j++) {
465 for (i = 0; i < nc; i++) {
466 sum[j][i] = 0;
467 }
468 cnt[j] = 0;
469 }
470 error = 0;
471
472 /* scan whole block */
473 for (k = 0; k < n; k++) {
474 #if 1
475 GLint best = -1;
476 GLfloat err = 1e9; /* big enough */
477 /* determine best vector */
478 for (j = 0; j < nv; j++) {
479 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
480 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
481 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
482 if (nc == 4) {
483 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
484 }
485 if (e < err) {
486 err = e;
487 best = j;
488 }
489 }
490 #else
491 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
492 #endif
493 assert(best >= 0);
494 /* add in closest color */
495 for (i = 0; i < nc; i++) {
496 sum[best][i] += input[k][i];
497 }
498 /* mark this vector as used */
499 cnt[best]++;
500 /* accumulate error */
501 error += err;
502 }
503
504 /* check RMS */
505 if ((error < LL_RMS_E) ||
506 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
507 return !0; /* good match */
508 }
509 lasterror = error;
510
511 /* move each vector to the barycenter of its closest colors */
512 for (j = 0; j < nv; j++) {
513 if (cnt[j]) {
514 GLfloat div = 1.0F / cnt[j];
515 for (i = 0; i < nc; i++) {
516 vec[j][i] = div * sum[j][i];
517 }
518 } else {
519 /* this vec has no samples or is identical with a previous vec */
520 GLint worst = fxt1_worst(vec[j], input, nc, n);
521 for (i = 0; i < nc; i++) {
522 vec[j][i] = input[worst][i];
523 }
524 }
525 }
526 }
527
528 return 0; /* could not converge fast enough */
529 }
530
531
532 static void
533 fxt1_quantize_CHROMA (GLuint *cc,
534 GLubyte input[N_TEXELS][MAX_COMP])
535 {
536 const GLint n_vect = 4; /* 4 base vectors to find */
537 const GLint n_comp = 3; /* 3 components: R, G, B */
538 GLfloat vec[MAX_VECT][MAX_COMP];
539 GLint i, j, k;
540 Fx64 hi; /* high quadword */
541 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
542
543 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
544 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
545 }
546
547 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
548 for (j = n_vect - 1; j >= 0; j--) {
549 for (i = 0; i < n_comp; i++) {
550 /* add in colors */
551 FX64_SHL(hi, 5);
552 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
553 }
554 }
555 ((Fx64 *)cc)[1] = hi;
556
557 lohi = lolo = 0;
558 /* right microtile */
559 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
560 lohi <<= 2;
561 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
562 }
563 /* left microtile */
564 for (; k >= 0; k--) {
565 lolo <<= 2;
566 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
567 }
568 cc[1] = lohi;
569 cc[0] = lolo;
570 }
571
572
573 static void
574 fxt1_quantize_ALPHA0 (GLuint *cc,
575 GLubyte input[N_TEXELS][MAX_COMP],
576 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
577 {
578 const GLint n_vect = 3; /* 3 base vectors to find */
579 const GLint n_comp = 4; /* 4 components: R, G, B, A */
580 GLfloat vec[MAX_VECT][MAX_COMP];
581 GLint i, j, k;
582 Fx64 hi; /* high quadword */
583 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
584
585 /* the last vector indicates zero */
586 for (i = 0; i < n_comp; i++) {
587 vec[n_vect][i] = 0;
588 }
589
590 /* the first n texels in reord are guaranteed to be non-zero */
591 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
592 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
593 }
594
595 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
596 for (j = n_vect - 1; j >= 0; j--) {
597 /* add in alphas */
598 FX64_SHL(hi, 5);
599 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
600 }
601 for (j = n_vect - 1; j >= 0; j--) {
602 for (i = 0; i < n_comp - 1; i++) {
603 /* add in colors */
604 FX64_SHL(hi, 5);
605 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
606 }
607 }
608 ((Fx64 *)cc)[1] = hi;
609
610 lohi = lolo = 0;
611 /* right microtile */
612 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
613 lohi <<= 2;
614 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
615 }
616 /* left microtile */
617 for (; k >= 0; k--) {
618 lolo <<= 2;
619 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
620 }
621 cc[1] = lohi;
622 cc[0] = lolo;
623 }
624
625
626 static void
627 fxt1_quantize_ALPHA1 (GLuint *cc,
628 GLubyte input[N_TEXELS][MAX_COMP])
629 {
630 const GLint n_vect = 3; /* highest vector number in each microtile */
631 const GLint n_comp = 4; /* 4 components: R, G, B, A */
632 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
633 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
634 GLint i, j, k;
635 Fx64 hi; /* high quadword */
636 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
637
638 GLint minSum;
639 GLint maxSum;
640 GLint minColL = 0, maxColL = 0;
641 GLint minColR = 0, maxColR = 0;
642 GLint sumL = 0, sumR = 0;
643 GLint nn_comp;
644 /* Our solution here is to find the darkest and brightest colors in
645 * the 4x4 tile and use those as the two representative colors.
646 * There are probably better algorithms to use (histogram-based).
647 */
648 nn_comp = n_comp;
649 while ((minColL == maxColL) && nn_comp) {
650 minSum = 2000; /* big enough */
651 maxSum = -1; /* small enough */
652 for (k = 0; k < N_TEXELS / 2; k++) {
653 GLint sum = 0;
654 for (i = 0; i < nn_comp; i++) {
655 sum += input[k][i];
656 }
657 if (minSum > sum) {
658 minSum = sum;
659 minColL = k;
660 }
661 if (maxSum < sum) {
662 maxSum = sum;
663 maxColL = k;
664 }
665 sumL += sum;
666 }
667
668 nn_comp--;
669 }
670
671 nn_comp = n_comp;
672 while ((minColR == maxColR) && nn_comp) {
673 minSum = 2000; /* big enough */
674 maxSum = -1; /* small enough */
675 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
676 GLint sum = 0;
677 for (i = 0; i < nn_comp; i++) {
678 sum += input[k][i];
679 }
680 if (minSum > sum) {
681 minSum = sum;
682 minColR = k;
683 }
684 if (maxSum < sum) {
685 maxSum = sum;
686 maxColR = k;
687 }
688 sumR += sum;
689 }
690
691 nn_comp--;
692 }
693
694 /* choose the common vector (yuck!) */
695 {
696 GLint j1, j2;
697 GLint v1 = 0, v2 = 0;
698 GLfloat err = 1e9; /* big enough */
699 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
700 for (i = 0; i < n_comp; i++) {
701 tv[0][i] = input[minColL][i];
702 tv[1][i] = input[maxColL][i];
703 tv[2][i] = input[minColR][i];
704 tv[3][i] = input[maxColR][i];
705 }
706 for (j1 = 0; j1 < 2; j1++) {
707 for (j2 = 2; j2 < 4; j2++) {
708 GLfloat e = 0.0F;
709 for (i = 0; i < n_comp; i++) {
710 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
711 }
712 if (e < err) {
713 err = e;
714 v1 = j1;
715 v2 = j2;
716 }
717 }
718 }
719 for (i = 0; i < n_comp; i++) {
720 vec[0][i] = tv[1 - v1][i];
721 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
722 vec[2][i] = tv[5 - v2][i];
723 }
724 }
725
726 /* left microtile */
727 cc[0] = 0;
728 if (minColL != maxColL) {
729 /* compute interpolation vector */
730 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
731
732 /* add in texels */
733 lolo = 0;
734 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
735 GLint texel;
736 /* interpolate color */
737 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
738 /* add in texel */
739 lolo <<= 2;
740 lolo |= texel;
741 }
742
743 cc[0] = lolo;
744 }
745
746 /* right microtile */
747 cc[1] = 0;
748 if (minColR != maxColR) {
749 /* compute interpolation vector */
750 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
751
752 /* add in texels */
753 lohi = 0;
754 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
755 GLint texel;
756 /* interpolate color */
757 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
758 /* add in texel */
759 lohi <<= 2;
760 lohi |= texel;
761 }
762
763 cc[1] = lohi;
764 }
765
766 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
767 for (j = n_vect - 1; j >= 0; j--) {
768 /* add in alphas */
769 FX64_SHL(hi, 5);
770 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
771 }
772 for (j = n_vect - 1; j >= 0; j--) {
773 for (i = 0; i < n_comp - 1; i++) {
774 /* add in colors */
775 FX64_SHL(hi, 5);
776 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
777 }
778 }
779 ((Fx64 *)cc)[1] = hi;
780 }
781
782
783 static void
784 fxt1_quantize_HI (GLuint *cc,
785 GLubyte input[N_TEXELS][MAX_COMP],
786 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
787 {
788 const GLint n_vect = 6; /* highest vector number */
789 const GLint n_comp = 3; /* 3 components: R, G, B */
790 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
791 GLfloat iv[MAX_COMP]; /* interpolation vector */
792 GLint i, k;
793 GLuint hihi; /* high quadword: hi dword */
794
795 GLint minSum = 2000; /* big enough */
796 GLint maxSum = -1; /* small enough */
797 GLint minCol = 0; /* phoudoin: silent compiler! */
798 GLint maxCol = 0; /* phoudoin: silent compiler! */
799
800 /* Our solution here is to find the darkest and brightest colors in
801 * the 8x4 tile and use those as the two representative colors.
802 * There are probably better algorithms to use (histogram-based).
803 */
804 for (k = 0; k < n; k++) {
805 GLint sum = 0;
806 for (i = 0; i < n_comp; i++) {
807 sum += reord[k][i];
808 }
809 if (minSum > sum) {
810 minSum = sum;
811 minCol = k;
812 }
813 if (maxSum < sum) {
814 maxSum = sum;
815 maxCol = k;
816 }
817 }
818
819 hihi = 0; /* cc-hi = "00" */
820 for (i = 0; i < n_comp; i++) {
821 /* add in colors */
822 hihi <<= 5;
823 hihi |= reord[maxCol][i] >> 3;
824 }
825 for (i = 0; i < n_comp; i++) {
826 /* add in colors */
827 hihi <<= 5;
828 hihi |= reord[minCol][i] >> 3;
829 }
830 cc[3] = hihi;
831 cc[0] = cc[1] = cc[2] = 0;
832
833 /* compute interpolation vector */
834 if (minCol != maxCol) {
835 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
836 }
837
838 /* add in texels */
839 for (k = N_TEXELS - 1; k >= 0; k--) {
840 GLint t = k * 3;
841 GLuint *kk = (GLuint *)((char *)cc + t / 8);
842 GLint texel = n_vect + 1; /* transparent black */
843
844 if (!ISTBLACK(input[k])) {
845 if (minCol != maxCol) {
846 /* interpolate color */
847 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
848 /* add in texel */
849 kk[0] |= texel << (t & 7);
850 }
851 } else {
852 /* add in texel */
853 kk[0] |= texel << (t & 7);
854 }
855 }
856 }
857
858
859 static void
860 fxt1_quantize_MIXED1 (GLuint *cc,
861 GLubyte input[N_TEXELS][MAX_COMP])
862 {
863 const GLint n_vect = 2; /* highest vector number in each microtile */
864 const GLint n_comp = 3; /* 3 components: R, G, B */
865 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
866 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
867 GLint i, j, k;
868 Fx64 hi; /* high quadword */
869 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
870
871 GLint minSum;
872 GLint maxSum;
873 GLint minColL = 0, maxColL = -1;
874 GLint minColR = 0, maxColR = -1;
875
876 /* Our solution here is to find the darkest and brightest colors in
877 * the 4x4 tile and use those as the two representative colors.
878 * There are probably better algorithms to use (histogram-based).
879 */
880 minSum = 2000; /* big enough */
881 maxSum = -1; /* small enough */
882 for (k = 0; k < N_TEXELS / 2; k++) {
883 if (!ISTBLACK(input[k])) {
884 GLint sum = 0;
885 for (i = 0; i < n_comp; i++) {
886 sum += input[k][i];
887 }
888 if (minSum > sum) {
889 minSum = sum;
890 minColL = k;
891 }
892 if (maxSum < sum) {
893 maxSum = sum;
894 maxColL = k;
895 }
896 }
897 }
898 minSum = 2000; /* big enough */
899 maxSum = -1; /* small enough */
900 for (; k < N_TEXELS; k++) {
901 if (!ISTBLACK(input[k])) {
902 GLint sum = 0;
903 for (i = 0; i < n_comp; i++) {
904 sum += input[k][i];
905 }
906 if (minSum > sum) {
907 minSum = sum;
908 minColR = k;
909 }
910 if (maxSum < sum) {
911 maxSum = sum;
912 maxColR = k;
913 }
914 }
915 }
916
917 /* left microtile */
918 if (maxColL == -1) {
919 /* all transparent black */
920 cc[0] = ~0u;
921 for (i = 0; i < n_comp; i++) {
922 vec[0][i] = 0;
923 vec[1][i] = 0;
924 }
925 } else {
926 cc[0] = 0;
927 for (i = 0; i < n_comp; i++) {
928 vec[0][i] = input[minColL][i];
929 vec[1][i] = input[maxColL][i];
930 }
931 if (minColL != maxColL) {
932 /* compute interpolation vector */
933 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
934
935 /* add in texels */
936 lolo = 0;
937 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
938 GLint texel = n_vect + 1; /* transparent black */
939 if (!ISTBLACK(input[k])) {
940 /* interpolate color */
941 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
942 }
943 /* add in texel */
944 lolo <<= 2;
945 lolo |= texel;
946 }
947 cc[0] = lolo;
948 }
949 }
950
951 /* right microtile */
952 if (maxColR == -1) {
953 /* all transparent black */
954 cc[1] = ~0u;
955 for (i = 0; i < n_comp; i++) {
956 vec[2][i] = 0;
957 vec[3][i] = 0;
958 }
959 } else {
960 cc[1] = 0;
961 for (i = 0; i < n_comp; i++) {
962 vec[2][i] = input[minColR][i];
963 vec[3][i] = input[maxColR][i];
964 }
965 if (minColR != maxColR) {
966 /* compute interpolation vector */
967 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
968
969 /* add in texels */
970 lohi = 0;
971 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
972 GLint texel = n_vect + 1; /* transparent black */
973 if (!ISTBLACK(input[k])) {
974 /* interpolate color */
975 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
976 }
977 /* add in texel */
978 lohi <<= 2;
979 lohi |= texel;
980 }
981 cc[1] = lohi;
982 }
983 }
984
985 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
986 for (j = 2 * 2 - 1; j >= 0; j--) {
987 for (i = 0; i < n_comp; i++) {
988 /* add in colors */
989 FX64_SHL(hi, 5);
990 FX64_OR32(hi, vec[j][i] >> 3);
991 }
992 }
993 ((Fx64 *)cc)[1] = hi;
994 }
995
996
997 static void
998 fxt1_quantize_MIXED0 (GLuint *cc,
999 GLubyte input[N_TEXELS][MAX_COMP])
1000 {
1001 const GLint n_vect = 3; /* highest vector number in each microtile */
1002 const GLint n_comp = 3; /* 3 components: R, G, B */
1003 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1004 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1005 GLint i, j, k;
1006 Fx64 hi; /* high quadword */
1007 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1008
1009 GLint minColL = 0, maxColL = 0;
1010 GLint minColR = 0, maxColR = 0;
1011 #if 0
1012 GLint minSum;
1013 GLint maxSum;
1014
1015 /* Our solution here is to find the darkest and brightest colors in
1016 * the 4x4 tile and use those as the two representative colors.
1017 * There are probably better algorithms to use (histogram-based).
1018 */
1019 minSum = 2000; /* big enough */
1020 maxSum = -1; /* small enough */
1021 for (k = 0; k < N_TEXELS / 2; k++) {
1022 GLint sum = 0;
1023 for (i = 0; i < n_comp; i++) {
1024 sum += input[k][i];
1025 }
1026 if (minSum > sum) {
1027 minSum = sum;
1028 minColL = k;
1029 }
1030 if (maxSum < sum) {
1031 maxSum = sum;
1032 maxColL = k;
1033 }
1034 }
1035 minSum = 2000; /* big enough */
1036 maxSum = -1; /* small enough */
1037 for (; k < N_TEXELS; k++) {
1038 GLint sum = 0;
1039 for (i = 0; i < n_comp; i++) {
1040 sum += input[k][i];
1041 }
1042 if (minSum > sum) {
1043 minSum = sum;
1044 minColR = k;
1045 }
1046 if (maxSum < sum) {
1047 maxSum = sum;
1048 maxColR = k;
1049 }
1050 }
1051 #else
1052 GLint minVal;
1053 GLint maxVal;
1054 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1055 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1056
1057 /* Scan the channel with max variance for lo & hi
1058 * and use those as the two representative colors.
1059 */
1060 minVal = 2000; /* big enough */
1061 maxVal = -1; /* small enough */
1062 for (k = 0; k < N_TEXELS / 2; k++) {
1063 GLint t = input[k][maxVarL];
1064 if (minVal > t) {
1065 minVal = t;
1066 minColL = k;
1067 }
1068 if (maxVal < t) {
1069 maxVal = t;
1070 maxColL = k;
1071 }
1072 }
1073 minVal = 2000; /* big enough */
1074 maxVal = -1; /* small enough */
1075 for (; k < N_TEXELS; k++) {
1076 GLint t = input[k][maxVarR];
1077 if (minVal > t) {
1078 minVal = t;
1079 minColR = k;
1080 }
1081 if (maxVal < t) {
1082 maxVal = t;
1083 maxColR = k;
1084 }
1085 }
1086 #endif
1087
1088 /* left microtile */
1089 cc[0] = 0;
1090 for (i = 0; i < n_comp; i++) {
1091 vec[0][i] = input[minColL][i];
1092 vec[1][i] = input[maxColL][i];
1093 }
1094 if (minColL != maxColL) {
1095 /* compute interpolation vector */
1096 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1097
1098 /* add in texels */
1099 lolo = 0;
1100 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1101 GLint texel;
1102 /* interpolate color */
1103 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1104 /* add in texel */
1105 lolo <<= 2;
1106 lolo |= texel;
1107 }
1108
1109 /* funky encoding for LSB of green */
1110 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1111 for (i = 0; i < n_comp; i++) {
1112 vec[1][i] = input[minColL][i];
1113 vec[0][i] = input[maxColL][i];
1114 }
1115 lolo = ~lolo;
1116 }
1117
1118 cc[0] = lolo;
1119 }
1120
1121 /* right microtile */
1122 cc[1] = 0;
1123 for (i = 0; i < n_comp; i++) {
1124 vec[2][i] = input[minColR][i];
1125 vec[3][i] = input[maxColR][i];
1126 }
1127 if (minColR != maxColR) {
1128 /* compute interpolation vector */
1129 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1130
1131 /* add in texels */
1132 lohi = 0;
1133 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1134 GLint texel;
1135 /* interpolate color */
1136 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1137 /* add in texel */
1138 lohi <<= 2;
1139 lohi |= texel;
1140 }
1141
1142 /* funky encoding for LSB of green */
1143 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1144 for (i = 0; i < n_comp; i++) {
1145 vec[3][i] = input[minColR][i];
1146 vec[2][i] = input[maxColR][i];
1147 }
1148 lohi = ~lohi;
1149 }
1150
1151 cc[1] = lohi;
1152 }
1153
1154 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1155 for (j = 2 * 2 - 1; j >= 0; j--) {
1156 for (i = 0; i < n_comp; i++) {
1157 /* add in colors */
1158 FX64_SHL(hi, 5);
1159 FX64_OR32(hi, vec[j][i] >> 3);
1160 }
1161 }
1162 ((Fx64 *)cc)[1] = hi;
1163 }
1164
1165
1166 static void
1167 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1168 {
1169 GLint trualpha;
1170 GLubyte reord[N_TEXELS][MAX_COMP];
1171
1172 GLubyte input[N_TEXELS][MAX_COMP];
1173 GLint i, k, l;
1174
1175 if (comps == 3) {
1176 /* make the whole block opaque */
1177 memset(input, -1, sizeof(input));
1178 }
1179
1180 /* 8 texels each line */
1181 for (l = 0; l < 4; l++) {
1182 for (k = 0; k < 4; k++) {
1183 for (i = 0; i < comps; i++) {
1184 input[k + l * 4][i] = *lines[l]++;
1185 }
1186 }
1187 for (; k < 8; k++) {
1188 for (i = 0; i < comps; i++) {
1189 input[k + l * 4 + 12][i] = *lines[l]++;
1190 }
1191 }
1192 }
1193
1194 /* block layout:
1195 * 00, 01, 02, 03, 08, 09, 0a, 0b
1196 * 10, 11, 12, 13, 18, 19, 1a, 1b
1197 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1198 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1199 */
1200
1201 /* [dBorca]
1202 * stupidity flows forth from this
1203 */
1204 l = N_TEXELS;
1205 trualpha = 0;
1206 if (comps == 4) {
1207 /* skip all transparent black texels */
1208 l = 0;
1209 for (k = 0; k < N_TEXELS; k++) {
1210 /* test all components against 0 */
1211 if (!ISTBLACK(input[k])) {
1212 /* texel is not transparent black */
1213 COPY_4UBV(reord[l], input[k]);
1214 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1215 /* non-opaque texel */
1216 trualpha = !0;
1217 }
1218 l++;
1219 }
1220 }
1221 }
1222
1223 #if 0
1224 if (trualpha) {
1225 fxt1_quantize_ALPHA0(cc, input, reord, l);
1226 } else if (l == 0) {
1227 cc[0] = cc[1] = cc[2] = -1;
1228 cc[3] = 0;
1229 } else if (l < N_TEXELS) {
1230 fxt1_quantize_HI(cc, input, reord, l);
1231 } else {
1232 fxt1_quantize_CHROMA(cc, input);
1233 }
1234 (void)fxt1_quantize_ALPHA1;
1235 (void)fxt1_quantize_MIXED1;
1236 (void)fxt1_quantize_MIXED0;
1237 #else
1238 if (trualpha) {
1239 fxt1_quantize_ALPHA1(cc, input);
1240 } else if (l == 0) {
1241 cc[0] = cc[1] = cc[2] = ~0u;
1242 cc[3] = 0;
1243 } else if (l < N_TEXELS) {
1244 fxt1_quantize_MIXED1(cc, input);
1245 } else {
1246 fxt1_quantize_MIXED0(cc, input);
1247 }
1248 (void)fxt1_quantize_ALPHA0;
1249 (void)fxt1_quantize_HI;
1250 (void)fxt1_quantize_CHROMA;
1251 #endif
1252 }
1253
1254
1255
1256 /**
1257 * Upscale an image by replication, not (typical) stretching.
1258 * We use this when the image width or height is less than a
1259 * certain size (4, 8) and we need to upscale an image.
1260 */
1261 static void
1262 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1263 GLsizei outWidth, GLsizei outHeight,
1264 GLint comps, const GLubyte *src, GLint srcRowStride,
1265 GLubyte *dest )
1266 {
1267 GLint i, j, k;
1268
1269 assert(outWidth >= inWidth);
1270 assert(outHeight >= inHeight);
1271 #if 0
1272 assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1273 assert((outWidth & 3) == 0);
1274 assert((outHeight & 3) == 0);
1275 #endif
1276
1277 for (i = 0; i < outHeight; i++) {
1278 const GLint ii = i % inHeight;
1279 for (j = 0; j < outWidth; j++) {
1280 const GLint jj = j % inWidth;
1281 for (k = 0; k < comps; k++) {
1282 dest[(i * outWidth + j) * comps + k]
1283 = src[ii * srcRowStride + jj * comps + k];
1284 }
1285 }
1286 }
1287 }
1288
1289
1290 static void
1291 fxt1_encode (GLuint width, GLuint height, GLint comps,
1292 const void *source, GLint srcRowStride,
1293 void *dest, GLint destRowStride)
1294 {
1295 GLuint x, y;
1296 const GLubyte *data;
1297 GLuint *encoded = (GLuint *)dest;
1298 void *newSource = NULL;
1299
1300 assert(comps == 3 || comps == 4);
1301
1302 /* Replicate image if width is not M8 or height is not M4 */
1303 if ((width & 7) | (height & 3)) {
1304 GLint newWidth = (width + 7) & ~7;
1305 GLint newHeight = (height + 3) & ~3;
1306 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1307 if (!newSource) {
1308 GET_CURRENT_CONTEXT(ctx);
1309 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1310 goto cleanUp;
1311 }
1312 upscale_teximage2d(width, height, newWidth, newHeight,
1313 comps, (const GLubyte *) source,
1314 srcRowStride, (GLubyte *) newSource);
1315 source = newSource;
1316 width = newWidth;
1317 height = newHeight;
1318 srcRowStride = comps * newWidth;
1319 }
1320
1321 data = (const GLubyte *) source;
1322 destRowStride = (destRowStride - width * 2) / 4;
1323 for (y = 0; y < height; y += 4) {
1324 GLuint offs = 0 + (y + 0) * srcRowStride;
1325 for (x = 0; x < width; x += 8) {
1326 const GLubyte *lines[4];
1327 lines[0] = &data[offs];
1328 lines[1] = lines[0] + srcRowStride;
1329 lines[2] = lines[1] + srcRowStride;
1330 lines[3] = lines[2] + srcRowStride;
1331 offs += 8 * comps;
1332 fxt1_quantize(encoded, lines, comps);
1333 /* 128 bits per 8x4 block */
1334 encoded += 4;
1335 }
1336 encoded += destRowStride;
1337 }
1338
1339 cleanUp:
1340 free(newSource);
1341 }
1342
1343
1344 /***************************************************************************\
1345 * FXT1 decoder
1346 *
1347 * The decoder is based on GL_3DFX_texture_compression_FXT1
1348 * specification and serves as a concept for the encoder.
1349 \***************************************************************************/
1350
1351
1352 /* lookup table for scaling 5 bit colors up to 8 bits */
1353 static const GLubyte _rgb_scale_5[] = {
1354 0, 8, 16, 25, 33, 41, 49, 58,
1355 66, 74, 82, 90, 99, 107, 115, 123,
1356 132, 140, 148, 156, 165, 173, 181, 189,
1357 197, 206, 214, 222, 230, 239, 247, 255
1358 };
1359
1360 /* lookup table for scaling 6 bit colors up to 8 bits */
1361 static const GLubyte _rgb_scale_6[] = {
1362 0, 4, 8, 12, 16, 20, 24, 28,
1363 32, 36, 40, 45, 49, 53, 57, 61,
1364 65, 69, 73, 77, 81, 85, 89, 93,
1365 97, 101, 105, 109, 113, 117, 121, 125,
1366 130, 134, 138, 142, 146, 150, 154, 158,
1367 162, 166, 170, 174, 178, 182, 186, 190,
1368 194, 198, 202, 206, 210, 215, 219, 223,
1369 227, 231, 235, 239, 243, 247, 251, 255
1370 };
1371
1372
1373 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1374 #define UP5(c) _rgb_scale_5[(c) & 31]
1375 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1376 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1377
1378
1379 static void
1380 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1381 {
1382 const GLuint *cc;
1383
1384 t *= 3;
1385 cc = (const GLuint *)(code + t / 8);
1386 t = (cc[0] >> (t & 7)) & 7;
1387
1388 if (t == 7) {
1389 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1390 } else {
1391 GLubyte r, g, b;
1392 cc = (const GLuint *)(code + 12);
1393 if (t == 0) {
1394 b = UP5(CC_SEL(cc, 0));
1395 g = UP5(CC_SEL(cc, 5));
1396 r = UP5(CC_SEL(cc, 10));
1397 } else if (t == 6) {
1398 b = UP5(CC_SEL(cc, 15));
1399 g = UP5(CC_SEL(cc, 20));
1400 r = UP5(CC_SEL(cc, 25));
1401 } else {
1402 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1403 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1404 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1405 }
1406 rgba[RCOMP] = r;
1407 rgba[GCOMP] = g;
1408 rgba[BCOMP] = b;
1409 rgba[ACOMP] = 255;
1410 }
1411 }
1412
1413
1414 static void
1415 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1416 {
1417 const GLuint *cc;
1418 GLuint kk;
1419
1420 cc = (const GLuint *)code;
1421 if (t & 16) {
1422 cc++;
1423 t &= 15;
1424 }
1425 t = (cc[0] >> (t * 2)) & 3;
1426
1427 t *= 15;
1428 cc = (const GLuint *)(code + 8 + t / 8);
1429 kk = cc[0] >> (t & 7);
1430 rgba[BCOMP] = UP5(kk);
1431 rgba[GCOMP] = UP5(kk >> 5);
1432 rgba[RCOMP] = UP5(kk >> 10);
1433 rgba[ACOMP] = 255;
1434 }
1435
1436
1437 static void
1438 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1439 {
1440 const GLuint *cc;
1441 GLuint col[2][3];
1442 GLint glsb, selb;
1443
1444 cc = (const GLuint *)code;
1445 if (t & 16) {
1446 t &= 15;
1447 t = (cc[1] >> (t * 2)) & 3;
1448 /* col 2 */
1449 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1450 col[0][GCOMP] = CC_SEL(cc, 99);
1451 col[0][RCOMP] = CC_SEL(cc, 104);
1452 /* col 3 */
1453 col[1][BCOMP] = CC_SEL(cc, 109);
1454 col[1][GCOMP] = CC_SEL(cc, 114);
1455 col[1][RCOMP] = CC_SEL(cc, 119);
1456 glsb = CC_SEL(cc, 126);
1457 selb = CC_SEL(cc, 33);
1458 } else {
1459 t = (cc[0] >> (t * 2)) & 3;
1460 /* col 0 */
1461 col[0][BCOMP] = CC_SEL(cc, 64);
1462 col[0][GCOMP] = CC_SEL(cc, 69);
1463 col[0][RCOMP] = CC_SEL(cc, 74);
1464 /* col 1 */
1465 col[1][BCOMP] = CC_SEL(cc, 79);
1466 col[1][GCOMP] = CC_SEL(cc, 84);
1467 col[1][RCOMP] = CC_SEL(cc, 89);
1468 glsb = CC_SEL(cc, 125);
1469 selb = CC_SEL(cc, 1);
1470 }
1471
1472 if (CC_SEL(cc, 124) & 1) {
1473 /* alpha[0] == 1 */
1474
1475 if (t == 3) {
1476 /* zero */
1477 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1478 } else {
1479 GLubyte r, g, b;
1480 if (t == 0) {
1481 b = UP5(col[0][BCOMP]);
1482 g = UP5(col[0][GCOMP]);
1483 r = UP5(col[0][RCOMP]);
1484 } else if (t == 2) {
1485 b = UP5(col[1][BCOMP]);
1486 g = UP6(col[1][GCOMP], glsb);
1487 r = UP5(col[1][RCOMP]);
1488 } else {
1489 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1490 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1491 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1492 }
1493 rgba[RCOMP] = r;
1494 rgba[GCOMP] = g;
1495 rgba[BCOMP] = b;
1496 rgba[ACOMP] = 255;
1497 }
1498 } else {
1499 /* alpha[0] == 0 */
1500 GLubyte r, g, b;
1501 if (t == 0) {
1502 b = UP5(col[0][BCOMP]);
1503 g = UP6(col[0][GCOMP], glsb ^ selb);
1504 r = UP5(col[0][RCOMP]);
1505 } else if (t == 3) {
1506 b = UP5(col[1][BCOMP]);
1507 g = UP6(col[1][GCOMP], glsb);
1508 r = UP5(col[1][RCOMP]);
1509 } else {
1510 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1511 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1512 UP6(col[1][GCOMP], glsb));
1513 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1514 }
1515 rgba[RCOMP] = r;
1516 rgba[GCOMP] = g;
1517 rgba[BCOMP] = b;
1518 rgba[ACOMP] = 255;
1519 }
1520 }
1521
1522
1523 static void
1524 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1525 {
1526 const GLuint *cc;
1527 GLubyte r, g, b, a;
1528
1529 cc = (const GLuint *)code;
1530 if (CC_SEL(cc, 124) & 1) {
1531 /* lerp == 1 */
1532 GLuint col0[4];
1533
1534 if (t & 16) {
1535 t &= 15;
1536 t = (cc[1] >> (t * 2)) & 3;
1537 /* col 2 */
1538 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1539 col0[GCOMP] = CC_SEL(cc, 99);
1540 col0[RCOMP] = CC_SEL(cc, 104);
1541 col0[ACOMP] = CC_SEL(cc, 119);
1542 } else {
1543 t = (cc[0] >> (t * 2)) & 3;
1544 /* col 0 */
1545 col0[BCOMP] = CC_SEL(cc, 64);
1546 col0[GCOMP] = CC_SEL(cc, 69);
1547 col0[RCOMP] = CC_SEL(cc, 74);
1548 col0[ACOMP] = CC_SEL(cc, 109);
1549 }
1550
1551 if (t == 0) {
1552 b = UP5(col0[BCOMP]);
1553 g = UP5(col0[GCOMP]);
1554 r = UP5(col0[RCOMP]);
1555 a = UP5(col0[ACOMP]);
1556 } else if (t == 3) {
1557 b = UP5(CC_SEL(cc, 79));
1558 g = UP5(CC_SEL(cc, 84));
1559 r = UP5(CC_SEL(cc, 89));
1560 a = UP5(CC_SEL(cc, 114));
1561 } else {
1562 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1563 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1564 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1565 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1566 }
1567 } else {
1568 /* lerp == 0 */
1569
1570 if (t & 16) {
1571 cc++;
1572 t &= 15;
1573 }
1574 t = (cc[0] >> (t * 2)) & 3;
1575
1576 if (t == 3) {
1577 /* zero */
1578 r = g = b = a = 0;
1579 } else {
1580 GLuint kk;
1581 cc = (const GLuint *)code;
1582 a = UP5(cc[3] >> (t * 5 + 13));
1583 t *= 15;
1584 cc = (const GLuint *)(code + 8 + t / 8);
1585 kk = cc[0] >> (t & 7);
1586 b = UP5(kk);
1587 g = UP5(kk >> 5);
1588 r = UP5(kk >> 10);
1589 }
1590 }
1591 rgba[RCOMP] = r;
1592 rgba[GCOMP] = g;
1593 rgba[BCOMP] = b;
1594 rgba[ACOMP] = a;
1595 }
1596
1597
1598 static void
1599 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1600 GLint i, GLint j, GLubyte *rgba)
1601 {
1602 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1603 fxt1_decode_1HI, /* cc-high = "00?" */
1604 fxt1_decode_1HI, /* cc-high = "00?" */
1605 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1606 fxt1_decode_1ALPHA, /* alpha = "011" */
1607 fxt1_decode_1MIXED, /* mixed = "1??" */
1608 fxt1_decode_1MIXED, /* mixed = "1??" */
1609 fxt1_decode_1MIXED, /* mixed = "1??" */
1610 fxt1_decode_1MIXED /* mixed = "1??" */
1611 };
1612
1613 const GLubyte *code = (const GLubyte *)texture +
1614 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1615 GLint mode = CC_SEL(code, 125);
1616 GLint t = i & 7;
1617
1618 if (t & 4) {
1619 t += 12;
1620 }
1621 t += (j & 3) * 4;
1622
1623 decode_1[mode](code, t, rgba);
1624 }
1625
1626
1627
1628
1629 static void
1630 fetch_rgb_fxt1(const GLubyte *map,
1631 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1632 {
1633 GLubyte rgba[4];
1634 fxt1_decode_1(map, rowStride, i, j, rgba);
1635 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1636 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1637 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1638 texel[ACOMP] = 1.0F;
1639 }
1640
1641
1642 static void
1643 fetch_rgba_fxt1(const GLubyte *map,
1644 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1645 {
1646 GLubyte rgba[4];
1647 fxt1_decode_1(map, rowStride, i, j, rgba);
1648 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1649 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1650 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1651 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1652 }
1653
1654
1655 compressed_fetch_func
1656 _mesa_get_fxt_fetch_func(mesa_format format)
1657 {
1658 switch (format) {
1659 case MESA_FORMAT_RGB_FXT1:
1660 return fetch_rgb_fxt1;
1661 case MESA_FORMAT_RGBA_FXT1:
1662 return fetch_rgba_fxt1;
1663 default:
1664 return NULL;
1665 }
1666 }