mesa: function for testing if current frag-shader has atomics
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "image.h"
35 #include "macros.h"
36 #include "mipmap.h"
37 #include "texcompress.h"
38 #include "texcompress_fxt1.h"
39 #include "texstore.h"
40
41
42 static void
43 fxt1_encode (GLuint width, GLuint height, GLint comps,
44 const void *source, GLint srcRowStride,
45 void *dest, GLint destRowStride);
46
47 static void
48 fxt1_decode_1 (const void *texture, GLint stride,
49 GLint i, GLint j, GLubyte *rgba);
50
51
52 /**
53 * Store user's image in rgb_fxt1 format.
54 */
55 GLboolean
56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
57 {
58 const GLubyte *pixels;
59 GLint srcRowStride;
60 GLubyte *dst;
61 const GLubyte *tempImage = NULL;
62
63 assert(dstFormat == MESA_FORMAT_RGB_FXT1);
64
65 if (srcFormat != GL_RGB ||
66 srcType != GL_UNSIGNED_BYTE ||
67 ctx->_ImageTransferState ||
68 srcPacking->RowLength != srcWidth ||
69 srcPacking->SwapBytes) {
70 /* convert image to RGB/GLubyte */
71 GLubyte *tempImageSlices[1];
72 int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
73 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
74 if (!tempImage)
75 return GL_FALSE; /* out of memory */
76 tempImageSlices[0] = (GLubyte *) tempImage;
77 _mesa_texstore(ctx, dims,
78 baseInternalFormat,
79 MESA_FORMAT_RGB_UNORM8,
80 rgbRowStride, tempImageSlices,
81 srcWidth, srcHeight, srcDepth,
82 srcFormat, srcType, srcAddr,
83 srcPacking);
84 pixels = tempImage;
85 srcRowStride = 3 * srcWidth;
86 srcFormat = GL_RGB;
87 }
88 else {
89 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
90 srcFormat, srcType, 0, 0);
91
92 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
93 srcType) / sizeof(GLubyte);
94 }
95
96 dst = dstSlices[0];
97
98 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
99 dst, dstRowStride);
100
101 free((void*) tempImage);
102
103 return GL_TRUE;
104 }
105
106
107 /**
108 * Store user's image in rgba_fxt1 format.
109 */
110 GLboolean
111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
112 {
113 const GLubyte *pixels;
114 GLint srcRowStride;
115 GLubyte *dst;
116 const GLubyte *tempImage = NULL;
117
118 assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
119
120 if (srcFormat != GL_RGBA ||
121 srcType != GL_UNSIGNED_BYTE ||
122 ctx->_ImageTransferState ||
123 srcPacking->SwapBytes) {
124 /* convert image to RGBA/GLubyte */
125 GLubyte *tempImageSlices[1];
126 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
127 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
128 if (!tempImage)
129 return GL_FALSE; /* out of memory */
130 tempImageSlices[0] = (GLubyte *) tempImage;
131 _mesa_texstore(ctx, dims,
132 baseInternalFormat,
133 MESA_FORMAT_R8G8B8A8_UNORM,
134 rgbaRowStride, tempImageSlices,
135 srcWidth, srcHeight, srcDepth,
136 srcFormat, srcType, srcAddr,
137 srcPacking);
138 pixels = tempImage;
139 srcRowStride = 4 * srcWidth;
140 srcFormat = GL_RGBA;
141 }
142 else {
143 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
144 srcFormat, srcType, 0, 0);
145
146 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
147 srcType) / sizeof(GLubyte);
148 }
149
150 dst = dstSlices[0];
151
152 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
153 dst, dstRowStride);
154
155 free((void*) tempImage);
156
157 return GL_TRUE;
158 }
159
160
161 /***************************************************************************\
162 * FXT1 encoder
163 *
164 * The encoder was built by reversing the decoder,
165 * and is vaguely based on Texus2 by 3dfx. Note that this code
166 * is merely a proof of concept, since it is highly UNoptimized;
167 * moreover, it is sub-optimal due to initial conditions passed
168 * to Lloyd's algorithm (the interpolation modes are even worse).
169 \***************************************************************************/
170
171
172 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
173 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
174 #define N_TEXELS 32 /* number of texels in a block (always 32) */
175 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
176 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
177 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
178 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
179 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
180
181
182 /*
183 * Define a 64-bit unsigned integer type and macros
184 */
185 #if 1
186
187 #define FX64_NATIVE 1
188
189 typedef uint64_t Fx64;
190
191 #define FX64_MOV32(a, b) a = b
192 #define FX64_OR32(a, b) a |= b
193 #define FX64_SHL(a, c) a <<= c
194
195 #else
196
197 #define FX64_NATIVE 0
198
199 typedef struct {
200 GLuint lo, hi;
201 } Fx64;
202
203 #define FX64_MOV32(a, b) a.lo = b
204 #define FX64_OR32(a, b) a.lo |= b
205
206 #define FX64_SHL(a, c) \
207 do { \
208 if ((c) >= 32) { \
209 a.hi = a.lo << ((c) - 32); \
210 a.lo = 0; \
211 } else { \
212 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
213 a.lo <<= (c); \
214 } \
215 } while (0)
216
217 #endif
218
219
220 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
221 #define SAFECDOT 1 /* for paranoids */
222
223 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
224 do { \
225 /* compute interpolation vector */ \
226 GLfloat d2 = 0.0F; \
227 GLfloat rd2; \
228 \
229 for (i = 0; i < NC; i++) { \
230 IV[i] = (V1[i] - V0[i]) * F(i); \
231 d2 += IV[i] * IV[i]; \
232 } \
233 rd2 = (GLfloat)NV / d2; \
234 B = 0; \
235 for (i = 0; i < NC; i++) { \
236 IV[i] *= F(i); \
237 B -= IV[i] * V0[i]; \
238 IV[i] *= rd2; \
239 } \
240 B = B * rd2 + 0.5f; \
241 } while (0)
242
243 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
244 do { \
245 GLfloat dot = 0.0F; \
246 for (i = 0; i < NC; i++) { \
247 dot += V[i] * IV[i]; \
248 } \
249 TEXEL = (GLint)(dot + B); \
250 if (SAFECDOT) { \
251 if (TEXEL < 0) { \
252 TEXEL = 0; \
253 } else if (TEXEL > NV) { \
254 TEXEL = NV; \
255 } \
256 } \
257 } while (0)
258
259
260 static GLint
261 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
262 GLubyte input[MAX_COMP], GLint nc)
263 {
264 GLint i, j, best = -1;
265 GLfloat err = 1e9; /* big enough */
266
267 for (j = 0; j < nv; j++) {
268 GLfloat e = 0.0F;
269 for (i = 0; i < nc; i++) {
270 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
271 }
272 if (e < err) {
273 err = e;
274 best = j;
275 }
276 }
277
278 return best;
279 }
280
281
282 static GLint
283 fxt1_worst (GLfloat vec[MAX_COMP],
284 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
285 {
286 GLint i, k, worst = -1;
287 GLfloat err = -1.0F; /* small enough */
288
289 for (k = 0; k < n; k++) {
290 GLfloat e = 0.0F;
291 for (i = 0; i < nc; i++) {
292 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
293 }
294 if (e > err) {
295 err = e;
296 worst = k;
297 }
298 }
299
300 return worst;
301 }
302
303
304 static GLint
305 fxt1_variance (GLdouble variance[MAX_COMP],
306 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
307 {
308 GLint i, k, best = 0;
309 GLint sx, sx2;
310 GLdouble var, maxvar = -1; /* small enough */
311 GLdouble teenth = 1.0 / n;
312
313 for (i = 0; i < nc; i++) {
314 sx = sx2 = 0;
315 for (k = 0; k < n; k++) {
316 GLint t = input[k][i];
317 sx += t;
318 sx2 += t * t;
319 }
320 var = sx2 * teenth - sx * sx * teenth * teenth;
321 if (maxvar < var) {
322 maxvar = var;
323 best = i;
324 }
325 if (variance) {
326 variance[i] = var;
327 }
328 }
329
330 return best;
331 }
332
333
334 static GLint
335 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
336 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
337 {
338 #if 0
339 /* Choose colors from a grid.
340 */
341 GLint i, j;
342
343 for (j = 0; j < nv; j++) {
344 GLint m = j * (n - 1) / (nv - 1);
345 for (i = 0; i < nc; i++) {
346 vec[j][i] = input[m][i];
347 }
348 }
349 #else
350 /* Our solution here is to find the darkest and brightest colors in
351 * the 8x4 tile and use those as the two representative colors.
352 * There are probably better algorithms to use (histogram-based).
353 */
354 GLint i, j, k;
355 GLint minSum = 2000; /* big enough */
356 GLint maxSum = -1; /* small enough */
357 GLint minCol = 0; /* phoudoin: silent compiler! */
358 GLint maxCol = 0; /* phoudoin: silent compiler! */
359
360 struct {
361 GLint flag;
362 GLint key;
363 GLint freq;
364 GLint idx;
365 } hist[N_TEXELS];
366 GLint lenh = 0;
367
368 memset(hist, 0, sizeof(hist));
369
370 for (k = 0; k < n; k++) {
371 GLint l;
372 GLint key = 0;
373 GLint sum = 0;
374 for (i = 0; i < nc; i++) {
375 key <<= 8;
376 key |= input[k][i];
377 sum += input[k][i];
378 }
379 for (l = 0; l < n; l++) {
380 if (!hist[l].flag) {
381 /* alloc new slot */
382 hist[l].flag = !0;
383 hist[l].key = key;
384 hist[l].freq = 1;
385 hist[l].idx = k;
386 lenh = l + 1;
387 break;
388 } else if (hist[l].key == key) {
389 hist[l].freq++;
390 break;
391 }
392 }
393 if (minSum > sum) {
394 minSum = sum;
395 minCol = k;
396 }
397 if (maxSum < sum) {
398 maxSum = sum;
399 maxCol = k;
400 }
401 }
402
403 if (lenh <= nv) {
404 for (j = 0; j < lenh; j++) {
405 for (i = 0; i < nc; i++) {
406 vec[j][i] = (GLfloat)input[hist[j].idx][i];
407 }
408 }
409 for (; j < nv; j++) {
410 for (i = 0; i < nc; i++) {
411 vec[j][i] = vec[0][i];
412 }
413 }
414 return 0;
415 }
416
417 for (j = 0; j < nv; j++) {
418 for (i = 0; i < nc; i++) {
419 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
420 }
421 }
422 #endif
423
424 return !0;
425 }
426
427
428 static GLint
429 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
430 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
431 {
432 /* Use the generalized lloyd's algorithm for VQ:
433 * find 4 color vectors.
434 *
435 * for each sample color
436 * sort to nearest vector.
437 *
438 * replace each vector with the centroid of its matching colors.
439 *
440 * repeat until RMS doesn't improve.
441 *
442 * if a color vector has no samples, or becomes the same as another
443 * vector, replace it with the color which is farthest from a sample.
444 *
445 * vec[][MAX_COMP] initial vectors and resulting colors
446 * nv number of resulting colors required
447 * input[N_TEXELS][MAX_COMP] input texels
448 * nc number of components in input / vec
449 * n number of input samples
450 */
451
452 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
453 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
454 GLfloat error, lasterror = 1e9;
455
456 GLint i, j, k, rep;
457
458 /* the quantizer */
459 for (rep = 0; rep < LL_N_REP; rep++) {
460 /* reset sums & counters */
461 for (j = 0; j < nv; j++) {
462 for (i = 0; i < nc; i++) {
463 sum[j][i] = 0;
464 }
465 cnt[j] = 0;
466 }
467 error = 0;
468
469 /* scan whole block */
470 for (k = 0; k < n; k++) {
471 #if 1
472 GLint best = -1;
473 GLfloat err = 1e9; /* big enough */
474 /* determine best vector */
475 for (j = 0; j < nv; j++) {
476 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
477 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
478 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
479 if (nc == 4) {
480 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
481 }
482 if (e < err) {
483 err = e;
484 best = j;
485 }
486 }
487 #else
488 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
489 #endif
490 assert(best >= 0);
491 /* add in closest color */
492 for (i = 0; i < nc; i++) {
493 sum[best][i] += input[k][i];
494 }
495 /* mark this vector as used */
496 cnt[best]++;
497 /* accumulate error */
498 error += err;
499 }
500
501 /* check RMS */
502 if ((error < LL_RMS_E) ||
503 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
504 return !0; /* good match */
505 }
506 lasterror = error;
507
508 /* move each vector to the barycenter of its closest colors */
509 for (j = 0; j < nv; j++) {
510 if (cnt[j]) {
511 GLfloat div = 1.0F / cnt[j];
512 for (i = 0; i < nc; i++) {
513 vec[j][i] = div * sum[j][i];
514 }
515 } else {
516 /* this vec has no samples or is identical with a previous vec */
517 GLint worst = fxt1_worst(vec[j], input, nc, n);
518 for (i = 0; i < nc; i++) {
519 vec[j][i] = input[worst][i];
520 }
521 }
522 }
523 }
524
525 return 0; /* could not converge fast enough */
526 }
527
528
529 static void
530 fxt1_quantize_CHROMA (GLuint *cc,
531 GLubyte input[N_TEXELS][MAX_COMP])
532 {
533 const GLint n_vect = 4; /* 4 base vectors to find */
534 const GLint n_comp = 3; /* 3 components: R, G, B */
535 GLfloat vec[MAX_VECT][MAX_COMP];
536 GLint i, j, k;
537 Fx64 hi; /* high quadword */
538 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
539
540 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
541 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
542 }
543
544 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
545 for (j = n_vect - 1; j >= 0; j--) {
546 for (i = 0; i < n_comp; i++) {
547 /* add in colors */
548 FX64_SHL(hi, 5);
549 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
550 }
551 }
552 ((Fx64 *)cc)[1] = hi;
553
554 lohi = lolo = 0;
555 /* right microtile */
556 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
557 lohi <<= 2;
558 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
559 }
560 /* left microtile */
561 for (; k >= 0; k--) {
562 lolo <<= 2;
563 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
564 }
565 cc[1] = lohi;
566 cc[0] = lolo;
567 }
568
569
570 static void
571 fxt1_quantize_ALPHA0 (GLuint *cc,
572 GLubyte input[N_TEXELS][MAX_COMP],
573 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
574 {
575 const GLint n_vect = 3; /* 3 base vectors to find */
576 const GLint n_comp = 4; /* 4 components: R, G, B, A */
577 GLfloat vec[MAX_VECT][MAX_COMP];
578 GLint i, j, k;
579 Fx64 hi; /* high quadword */
580 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
581
582 /* the last vector indicates zero */
583 for (i = 0; i < n_comp; i++) {
584 vec[n_vect][i] = 0;
585 }
586
587 /* the first n texels in reord are guaranteed to be non-zero */
588 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
589 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
590 }
591
592 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
593 for (j = n_vect - 1; j >= 0; j--) {
594 /* add in alphas */
595 FX64_SHL(hi, 5);
596 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
597 }
598 for (j = n_vect - 1; j >= 0; j--) {
599 for (i = 0; i < n_comp - 1; i++) {
600 /* add in colors */
601 FX64_SHL(hi, 5);
602 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
603 }
604 }
605 ((Fx64 *)cc)[1] = hi;
606
607 lohi = lolo = 0;
608 /* right microtile */
609 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
610 lohi <<= 2;
611 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
612 }
613 /* left microtile */
614 for (; k >= 0; k--) {
615 lolo <<= 2;
616 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
617 }
618 cc[1] = lohi;
619 cc[0] = lolo;
620 }
621
622
623 static void
624 fxt1_quantize_ALPHA1 (GLuint *cc,
625 GLubyte input[N_TEXELS][MAX_COMP])
626 {
627 const GLint n_vect = 3; /* highest vector number in each microtile */
628 const GLint n_comp = 4; /* 4 components: R, G, B, A */
629 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
630 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
631 GLint i, j, k;
632 Fx64 hi; /* high quadword */
633 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
634
635 GLint minSum;
636 GLint maxSum;
637 GLint minColL = 0, maxColL = 0;
638 GLint minColR = 0, maxColR = 0;
639 GLint sumL = 0, sumR = 0;
640 GLint nn_comp;
641 /* Our solution here is to find the darkest and brightest colors in
642 * the 4x4 tile and use those as the two representative colors.
643 * There are probably better algorithms to use (histogram-based).
644 */
645 nn_comp = n_comp;
646 while ((minColL == maxColL) && nn_comp) {
647 minSum = 2000; /* big enough */
648 maxSum = -1; /* small enough */
649 for (k = 0; k < N_TEXELS / 2; k++) {
650 GLint sum = 0;
651 for (i = 0; i < nn_comp; i++) {
652 sum += input[k][i];
653 }
654 if (minSum > sum) {
655 minSum = sum;
656 minColL = k;
657 }
658 if (maxSum < sum) {
659 maxSum = sum;
660 maxColL = k;
661 }
662 sumL += sum;
663 }
664
665 nn_comp--;
666 }
667
668 nn_comp = n_comp;
669 while ((minColR == maxColR) && nn_comp) {
670 minSum = 2000; /* big enough */
671 maxSum = -1; /* small enough */
672 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
673 GLint sum = 0;
674 for (i = 0; i < nn_comp; i++) {
675 sum += input[k][i];
676 }
677 if (minSum > sum) {
678 minSum = sum;
679 minColR = k;
680 }
681 if (maxSum < sum) {
682 maxSum = sum;
683 maxColR = k;
684 }
685 sumR += sum;
686 }
687
688 nn_comp--;
689 }
690
691 /* choose the common vector (yuck!) */
692 {
693 GLint j1, j2;
694 GLint v1 = 0, v2 = 0;
695 GLfloat err = 1e9; /* big enough */
696 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
697 for (i = 0; i < n_comp; i++) {
698 tv[0][i] = input[minColL][i];
699 tv[1][i] = input[maxColL][i];
700 tv[2][i] = input[minColR][i];
701 tv[3][i] = input[maxColR][i];
702 }
703 for (j1 = 0; j1 < 2; j1++) {
704 for (j2 = 2; j2 < 4; j2++) {
705 GLfloat e = 0.0F;
706 for (i = 0; i < n_comp; i++) {
707 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
708 }
709 if (e < err) {
710 err = e;
711 v1 = j1;
712 v2 = j2;
713 }
714 }
715 }
716 for (i = 0; i < n_comp; i++) {
717 vec[0][i] = tv[1 - v1][i];
718 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
719 vec[2][i] = tv[5 - v2][i];
720 }
721 }
722
723 /* left microtile */
724 cc[0] = 0;
725 if (minColL != maxColL) {
726 /* compute interpolation vector */
727 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
728
729 /* add in texels */
730 lolo = 0;
731 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
732 GLint texel;
733 /* interpolate color */
734 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
735 /* add in texel */
736 lolo <<= 2;
737 lolo |= texel;
738 }
739
740 cc[0] = lolo;
741 }
742
743 /* right microtile */
744 cc[1] = 0;
745 if (minColR != maxColR) {
746 /* compute interpolation vector */
747 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
748
749 /* add in texels */
750 lohi = 0;
751 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
752 GLint texel;
753 /* interpolate color */
754 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
755 /* add in texel */
756 lohi <<= 2;
757 lohi |= texel;
758 }
759
760 cc[1] = lohi;
761 }
762
763 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
764 for (j = n_vect - 1; j >= 0; j--) {
765 /* add in alphas */
766 FX64_SHL(hi, 5);
767 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
768 }
769 for (j = n_vect - 1; j >= 0; j--) {
770 for (i = 0; i < n_comp - 1; i++) {
771 /* add in colors */
772 FX64_SHL(hi, 5);
773 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
774 }
775 }
776 ((Fx64 *)cc)[1] = hi;
777 }
778
779
780 static void
781 fxt1_quantize_HI (GLuint *cc,
782 GLubyte input[N_TEXELS][MAX_COMP],
783 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
784 {
785 const GLint n_vect = 6; /* highest vector number */
786 const GLint n_comp = 3; /* 3 components: R, G, B */
787 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
788 GLfloat iv[MAX_COMP]; /* interpolation vector */
789 GLint i, k;
790 GLuint hihi; /* high quadword: hi dword */
791
792 GLint minSum = 2000; /* big enough */
793 GLint maxSum = -1; /* small enough */
794 GLint minCol = 0; /* phoudoin: silent compiler! */
795 GLint maxCol = 0; /* phoudoin: silent compiler! */
796
797 /* Our solution here is to find the darkest and brightest colors in
798 * the 8x4 tile and use those as the two representative colors.
799 * There are probably better algorithms to use (histogram-based).
800 */
801 for (k = 0; k < n; k++) {
802 GLint sum = 0;
803 for (i = 0; i < n_comp; i++) {
804 sum += reord[k][i];
805 }
806 if (minSum > sum) {
807 minSum = sum;
808 minCol = k;
809 }
810 if (maxSum < sum) {
811 maxSum = sum;
812 maxCol = k;
813 }
814 }
815
816 hihi = 0; /* cc-hi = "00" */
817 for (i = 0; i < n_comp; i++) {
818 /* add in colors */
819 hihi <<= 5;
820 hihi |= reord[maxCol][i] >> 3;
821 }
822 for (i = 0; i < n_comp; i++) {
823 /* add in colors */
824 hihi <<= 5;
825 hihi |= reord[minCol][i] >> 3;
826 }
827 cc[3] = hihi;
828 cc[0] = cc[1] = cc[2] = 0;
829
830 /* compute interpolation vector */
831 if (minCol != maxCol) {
832 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
833 }
834
835 /* add in texels */
836 for (k = N_TEXELS - 1; k >= 0; k--) {
837 GLint t = k * 3;
838 GLuint *kk = (GLuint *)((char *)cc + t / 8);
839 GLint texel = n_vect + 1; /* transparent black */
840
841 if (!ISTBLACK(input[k])) {
842 if (minCol != maxCol) {
843 /* interpolate color */
844 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
845 /* add in texel */
846 kk[0] |= texel << (t & 7);
847 }
848 } else {
849 /* add in texel */
850 kk[0] |= texel << (t & 7);
851 }
852 }
853 }
854
855
856 static void
857 fxt1_quantize_MIXED1 (GLuint *cc,
858 GLubyte input[N_TEXELS][MAX_COMP])
859 {
860 const GLint n_vect = 2; /* highest vector number in each microtile */
861 const GLint n_comp = 3; /* 3 components: R, G, B */
862 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
863 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
864 GLint i, j, k;
865 Fx64 hi; /* high quadword */
866 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
867
868 GLint minSum;
869 GLint maxSum;
870 GLint minColL = 0, maxColL = -1;
871 GLint minColR = 0, maxColR = -1;
872
873 /* Our solution here is to find the darkest and brightest colors in
874 * the 4x4 tile and use those as the two representative colors.
875 * There are probably better algorithms to use (histogram-based).
876 */
877 minSum = 2000; /* big enough */
878 maxSum = -1; /* small enough */
879 for (k = 0; k < N_TEXELS / 2; k++) {
880 if (!ISTBLACK(input[k])) {
881 GLint sum = 0;
882 for (i = 0; i < n_comp; i++) {
883 sum += input[k][i];
884 }
885 if (minSum > sum) {
886 minSum = sum;
887 minColL = k;
888 }
889 if (maxSum < sum) {
890 maxSum = sum;
891 maxColL = k;
892 }
893 }
894 }
895 minSum = 2000; /* big enough */
896 maxSum = -1; /* small enough */
897 for (; k < N_TEXELS; k++) {
898 if (!ISTBLACK(input[k])) {
899 GLint sum = 0;
900 for (i = 0; i < n_comp; i++) {
901 sum += input[k][i];
902 }
903 if (minSum > sum) {
904 minSum = sum;
905 minColR = k;
906 }
907 if (maxSum < sum) {
908 maxSum = sum;
909 maxColR = k;
910 }
911 }
912 }
913
914 /* left microtile */
915 if (maxColL == -1) {
916 /* all transparent black */
917 cc[0] = ~0u;
918 for (i = 0; i < n_comp; i++) {
919 vec[0][i] = 0;
920 vec[1][i] = 0;
921 }
922 } else {
923 cc[0] = 0;
924 for (i = 0; i < n_comp; i++) {
925 vec[0][i] = input[minColL][i];
926 vec[1][i] = input[maxColL][i];
927 }
928 if (minColL != maxColL) {
929 /* compute interpolation vector */
930 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
931
932 /* add in texels */
933 lolo = 0;
934 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
935 GLint texel = n_vect + 1; /* transparent black */
936 if (!ISTBLACK(input[k])) {
937 /* interpolate color */
938 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
939 }
940 /* add in texel */
941 lolo <<= 2;
942 lolo |= texel;
943 }
944 cc[0] = lolo;
945 }
946 }
947
948 /* right microtile */
949 if (maxColR == -1) {
950 /* all transparent black */
951 cc[1] = ~0u;
952 for (i = 0; i < n_comp; i++) {
953 vec[2][i] = 0;
954 vec[3][i] = 0;
955 }
956 } else {
957 cc[1] = 0;
958 for (i = 0; i < n_comp; i++) {
959 vec[2][i] = input[minColR][i];
960 vec[3][i] = input[maxColR][i];
961 }
962 if (minColR != maxColR) {
963 /* compute interpolation vector */
964 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
965
966 /* add in texels */
967 lohi = 0;
968 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
969 GLint texel = n_vect + 1; /* transparent black */
970 if (!ISTBLACK(input[k])) {
971 /* interpolate color */
972 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
973 }
974 /* add in texel */
975 lohi <<= 2;
976 lohi |= texel;
977 }
978 cc[1] = lohi;
979 }
980 }
981
982 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
983 for (j = 2 * 2 - 1; j >= 0; j--) {
984 for (i = 0; i < n_comp; i++) {
985 /* add in colors */
986 FX64_SHL(hi, 5);
987 FX64_OR32(hi, vec[j][i] >> 3);
988 }
989 }
990 ((Fx64 *)cc)[1] = hi;
991 }
992
993
994 static void
995 fxt1_quantize_MIXED0 (GLuint *cc,
996 GLubyte input[N_TEXELS][MAX_COMP])
997 {
998 const GLint n_vect = 3; /* highest vector number in each microtile */
999 const GLint n_comp = 3; /* 3 components: R, G, B */
1000 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1001 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1002 GLint i, j, k;
1003 Fx64 hi; /* high quadword */
1004 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1005
1006 GLint minColL = 0, maxColL = 0;
1007 GLint minColR = 0, maxColR = 0;
1008 #if 0
1009 GLint minSum;
1010 GLint maxSum;
1011
1012 /* Our solution here is to find the darkest and brightest colors in
1013 * the 4x4 tile and use those as the two representative colors.
1014 * There are probably better algorithms to use (histogram-based).
1015 */
1016 minSum = 2000; /* big enough */
1017 maxSum = -1; /* small enough */
1018 for (k = 0; k < N_TEXELS / 2; k++) {
1019 GLint sum = 0;
1020 for (i = 0; i < n_comp; i++) {
1021 sum += input[k][i];
1022 }
1023 if (minSum > sum) {
1024 minSum = sum;
1025 minColL = k;
1026 }
1027 if (maxSum < sum) {
1028 maxSum = sum;
1029 maxColL = k;
1030 }
1031 }
1032 minSum = 2000; /* big enough */
1033 maxSum = -1; /* small enough */
1034 for (; k < N_TEXELS; k++) {
1035 GLint sum = 0;
1036 for (i = 0; i < n_comp; i++) {
1037 sum += input[k][i];
1038 }
1039 if (minSum > sum) {
1040 minSum = sum;
1041 minColR = k;
1042 }
1043 if (maxSum < sum) {
1044 maxSum = sum;
1045 maxColR = k;
1046 }
1047 }
1048 #else
1049 GLint minVal;
1050 GLint maxVal;
1051 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1052 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1053
1054 /* Scan the channel with max variance for lo & hi
1055 * and use those as the two representative colors.
1056 */
1057 minVal = 2000; /* big enough */
1058 maxVal = -1; /* small enough */
1059 for (k = 0; k < N_TEXELS / 2; k++) {
1060 GLint t = input[k][maxVarL];
1061 if (minVal > t) {
1062 minVal = t;
1063 minColL = k;
1064 }
1065 if (maxVal < t) {
1066 maxVal = t;
1067 maxColL = k;
1068 }
1069 }
1070 minVal = 2000; /* big enough */
1071 maxVal = -1; /* small enough */
1072 for (; k < N_TEXELS; k++) {
1073 GLint t = input[k][maxVarR];
1074 if (minVal > t) {
1075 minVal = t;
1076 minColR = k;
1077 }
1078 if (maxVal < t) {
1079 maxVal = t;
1080 maxColR = k;
1081 }
1082 }
1083 #endif
1084
1085 /* left microtile */
1086 cc[0] = 0;
1087 for (i = 0; i < n_comp; i++) {
1088 vec[0][i] = input[minColL][i];
1089 vec[1][i] = input[maxColL][i];
1090 }
1091 if (minColL != maxColL) {
1092 /* compute interpolation vector */
1093 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1094
1095 /* add in texels */
1096 lolo = 0;
1097 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1098 GLint texel;
1099 /* interpolate color */
1100 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1101 /* add in texel */
1102 lolo <<= 2;
1103 lolo |= texel;
1104 }
1105
1106 /* funky encoding for LSB of green */
1107 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1108 for (i = 0; i < n_comp; i++) {
1109 vec[1][i] = input[minColL][i];
1110 vec[0][i] = input[maxColL][i];
1111 }
1112 lolo = ~lolo;
1113 }
1114
1115 cc[0] = lolo;
1116 }
1117
1118 /* right microtile */
1119 cc[1] = 0;
1120 for (i = 0; i < n_comp; i++) {
1121 vec[2][i] = input[minColR][i];
1122 vec[3][i] = input[maxColR][i];
1123 }
1124 if (minColR != maxColR) {
1125 /* compute interpolation vector */
1126 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1127
1128 /* add in texels */
1129 lohi = 0;
1130 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1131 GLint texel;
1132 /* interpolate color */
1133 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1134 /* add in texel */
1135 lohi <<= 2;
1136 lohi |= texel;
1137 }
1138
1139 /* funky encoding for LSB of green */
1140 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1141 for (i = 0; i < n_comp; i++) {
1142 vec[3][i] = input[minColR][i];
1143 vec[2][i] = input[maxColR][i];
1144 }
1145 lohi = ~lohi;
1146 }
1147
1148 cc[1] = lohi;
1149 }
1150
1151 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1152 for (j = 2 * 2 - 1; j >= 0; j--) {
1153 for (i = 0; i < n_comp; i++) {
1154 /* add in colors */
1155 FX64_SHL(hi, 5);
1156 FX64_OR32(hi, vec[j][i] >> 3);
1157 }
1158 }
1159 ((Fx64 *)cc)[1] = hi;
1160 }
1161
1162
1163 static void
1164 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1165 {
1166 GLint trualpha;
1167 GLubyte reord[N_TEXELS][MAX_COMP];
1168
1169 GLubyte input[N_TEXELS][MAX_COMP];
1170 GLint i, k, l;
1171
1172 if (comps == 3) {
1173 /* make the whole block opaque */
1174 memset(input, -1, sizeof(input));
1175 }
1176
1177 /* 8 texels each line */
1178 for (l = 0; l < 4; l++) {
1179 for (k = 0; k < 4; k++) {
1180 for (i = 0; i < comps; i++) {
1181 input[k + l * 4][i] = *lines[l]++;
1182 }
1183 }
1184 for (; k < 8; k++) {
1185 for (i = 0; i < comps; i++) {
1186 input[k + l * 4 + 12][i] = *lines[l]++;
1187 }
1188 }
1189 }
1190
1191 /* block layout:
1192 * 00, 01, 02, 03, 08, 09, 0a, 0b
1193 * 10, 11, 12, 13, 18, 19, 1a, 1b
1194 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1195 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1196 */
1197
1198 /* [dBorca]
1199 * stupidity flows forth from this
1200 */
1201 l = N_TEXELS;
1202 trualpha = 0;
1203 if (comps == 4) {
1204 /* skip all transparent black texels */
1205 l = 0;
1206 for (k = 0; k < N_TEXELS; k++) {
1207 /* test all components against 0 */
1208 if (!ISTBLACK(input[k])) {
1209 /* texel is not transparent black */
1210 COPY_4UBV(reord[l], input[k]);
1211 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1212 /* non-opaque texel */
1213 trualpha = !0;
1214 }
1215 l++;
1216 }
1217 }
1218 }
1219
1220 #if 0
1221 if (trualpha) {
1222 fxt1_quantize_ALPHA0(cc, input, reord, l);
1223 } else if (l == 0) {
1224 cc[0] = cc[1] = cc[2] = -1;
1225 cc[3] = 0;
1226 } else if (l < N_TEXELS) {
1227 fxt1_quantize_HI(cc, input, reord, l);
1228 } else {
1229 fxt1_quantize_CHROMA(cc, input);
1230 }
1231 (void)fxt1_quantize_ALPHA1;
1232 (void)fxt1_quantize_MIXED1;
1233 (void)fxt1_quantize_MIXED0;
1234 #else
1235 if (trualpha) {
1236 fxt1_quantize_ALPHA1(cc, input);
1237 } else if (l == 0) {
1238 cc[0] = cc[1] = cc[2] = ~0u;
1239 cc[3] = 0;
1240 } else if (l < N_TEXELS) {
1241 fxt1_quantize_MIXED1(cc, input);
1242 } else {
1243 fxt1_quantize_MIXED0(cc, input);
1244 }
1245 (void)fxt1_quantize_ALPHA0;
1246 (void)fxt1_quantize_HI;
1247 (void)fxt1_quantize_CHROMA;
1248 #endif
1249 }
1250
1251
1252
1253 /**
1254 * Upscale an image by replication, not (typical) stretching.
1255 * We use this when the image width or height is less than a
1256 * certain size (4, 8) and we need to upscale an image.
1257 */
1258 static void
1259 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1260 GLsizei outWidth, GLsizei outHeight,
1261 GLint comps, const GLubyte *src, GLint srcRowStride,
1262 GLubyte *dest )
1263 {
1264 GLint i, j, k;
1265
1266 assert(outWidth >= inWidth);
1267 assert(outHeight >= inHeight);
1268 #if 0
1269 assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1270 assert((outWidth & 3) == 0);
1271 assert((outHeight & 3) == 0);
1272 #endif
1273
1274 for (i = 0; i < outHeight; i++) {
1275 const GLint ii = i % inHeight;
1276 for (j = 0; j < outWidth; j++) {
1277 const GLint jj = j % inWidth;
1278 for (k = 0; k < comps; k++) {
1279 dest[(i * outWidth + j) * comps + k]
1280 = src[ii * srcRowStride + jj * comps + k];
1281 }
1282 }
1283 }
1284 }
1285
1286
1287 static void
1288 fxt1_encode (GLuint width, GLuint height, GLint comps,
1289 const void *source, GLint srcRowStride,
1290 void *dest, GLint destRowStride)
1291 {
1292 GLuint x, y;
1293 const GLubyte *data;
1294 GLuint *encoded = (GLuint *)dest;
1295 void *newSource = NULL;
1296
1297 assert(comps == 3 || comps == 4);
1298
1299 /* Replicate image if width is not M8 or height is not M4 */
1300 if ((width & 7) | (height & 3)) {
1301 GLint newWidth = (width + 7) & ~7;
1302 GLint newHeight = (height + 3) & ~3;
1303 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1304 if (!newSource) {
1305 GET_CURRENT_CONTEXT(ctx);
1306 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1307 goto cleanUp;
1308 }
1309 upscale_teximage2d(width, height, newWidth, newHeight,
1310 comps, (const GLubyte *) source,
1311 srcRowStride, (GLubyte *) newSource);
1312 source = newSource;
1313 width = newWidth;
1314 height = newHeight;
1315 srcRowStride = comps * newWidth;
1316 }
1317
1318 data = (const GLubyte *) source;
1319 destRowStride = (destRowStride - width * 2) / 4;
1320 for (y = 0; y < height; y += 4) {
1321 GLuint offs = 0 + (y + 0) * srcRowStride;
1322 for (x = 0; x < width; x += 8) {
1323 const GLubyte *lines[4];
1324 lines[0] = &data[offs];
1325 lines[1] = lines[0] + srcRowStride;
1326 lines[2] = lines[1] + srcRowStride;
1327 lines[3] = lines[2] + srcRowStride;
1328 offs += 8 * comps;
1329 fxt1_quantize(encoded, lines, comps);
1330 /* 128 bits per 8x4 block */
1331 encoded += 4;
1332 }
1333 encoded += destRowStride;
1334 }
1335
1336 cleanUp:
1337 free(newSource);
1338 }
1339
1340
1341 /***************************************************************************\
1342 * FXT1 decoder
1343 *
1344 * The decoder is based on GL_3DFX_texture_compression_FXT1
1345 * specification and serves as a concept for the encoder.
1346 \***************************************************************************/
1347
1348
1349 /* lookup table for scaling 5 bit colors up to 8 bits */
1350 static const GLubyte _rgb_scale_5[] = {
1351 0, 8, 16, 25, 33, 41, 49, 58,
1352 66, 74, 82, 90, 99, 107, 115, 123,
1353 132, 140, 148, 156, 165, 173, 181, 189,
1354 197, 206, 214, 222, 230, 239, 247, 255
1355 };
1356
1357 /* lookup table for scaling 6 bit colors up to 8 bits */
1358 static const GLubyte _rgb_scale_6[] = {
1359 0, 4, 8, 12, 16, 20, 24, 28,
1360 32, 36, 40, 45, 49, 53, 57, 61,
1361 65, 69, 73, 77, 81, 85, 89, 93,
1362 97, 101, 105, 109, 113, 117, 121, 125,
1363 130, 134, 138, 142, 146, 150, 154, 158,
1364 162, 166, 170, 174, 178, 182, 186, 190,
1365 194, 198, 202, 206, 210, 215, 219, 223,
1366 227, 231, 235, 239, 243, 247, 251, 255
1367 };
1368
1369
1370 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1371 #define UP5(c) _rgb_scale_5[(c) & 31]
1372 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1373 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1374
1375
1376 static void
1377 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1378 {
1379 const GLuint *cc;
1380
1381 t *= 3;
1382 cc = (const GLuint *)(code + t / 8);
1383 t = (cc[0] >> (t & 7)) & 7;
1384
1385 if (t == 7) {
1386 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1387 } else {
1388 GLubyte r, g, b;
1389 cc = (const GLuint *)(code + 12);
1390 if (t == 0) {
1391 b = UP5(CC_SEL(cc, 0));
1392 g = UP5(CC_SEL(cc, 5));
1393 r = UP5(CC_SEL(cc, 10));
1394 } else if (t == 6) {
1395 b = UP5(CC_SEL(cc, 15));
1396 g = UP5(CC_SEL(cc, 20));
1397 r = UP5(CC_SEL(cc, 25));
1398 } else {
1399 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1400 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1401 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1402 }
1403 rgba[RCOMP] = r;
1404 rgba[GCOMP] = g;
1405 rgba[BCOMP] = b;
1406 rgba[ACOMP] = 255;
1407 }
1408 }
1409
1410
1411 static void
1412 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1413 {
1414 const GLuint *cc;
1415 GLuint kk;
1416
1417 cc = (const GLuint *)code;
1418 if (t & 16) {
1419 cc++;
1420 t &= 15;
1421 }
1422 t = (cc[0] >> (t * 2)) & 3;
1423
1424 t *= 15;
1425 cc = (const GLuint *)(code + 8 + t / 8);
1426 kk = cc[0] >> (t & 7);
1427 rgba[BCOMP] = UP5(kk);
1428 rgba[GCOMP] = UP5(kk >> 5);
1429 rgba[RCOMP] = UP5(kk >> 10);
1430 rgba[ACOMP] = 255;
1431 }
1432
1433
1434 static void
1435 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1436 {
1437 const GLuint *cc;
1438 GLuint col[2][3];
1439 GLint glsb, selb;
1440
1441 cc = (const GLuint *)code;
1442 if (t & 16) {
1443 t &= 15;
1444 t = (cc[1] >> (t * 2)) & 3;
1445 /* col 2 */
1446 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1447 col[0][GCOMP] = CC_SEL(cc, 99);
1448 col[0][RCOMP] = CC_SEL(cc, 104);
1449 /* col 3 */
1450 col[1][BCOMP] = CC_SEL(cc, 109);
1451 col[1][GCOMP] = CC_SEL(cc, 114);
1452 col[1][RCOMP] = CC_SEL(cc, 119);
1453 glsb = CC_SEL(cc, 126);
1454 selb = CC_SEL(cc, 33);
1455 } else {
1456 t = (cc[0] >> (t * 2)) & 3;
1457 /* col 0 */
1458 col[0][BCOMP] = CC_SEL(cc, 64);
1459 col[0][GCOMP] = CC_SEL(cc, 69);
1460 col[0][RCOMP] = CC_SEL(cc, 74);
1461 /* col 1 */
1462 col[1][BCOMP] = CC_SEL(cc, 79);
1463 col[1][GCOMP] = CC_SEL(cc, 84);
1464 col[1][RCOMP] = CC_SEL(cc, 89);
1465 glsb = CC_SEL(cc, 125);
1466 selb = CC_SEL(cc, 1);
1467 }
1468
1469 if (CC_SEL(cc, 124) & 1) {
1470 /* alpha[0] == 1 */
1471
1472 if (t == 3) {
1473 /* zero */
1474 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1475 } else {
1476 GLubyte r, g, b;
1477 if (t == 0) {
1478 b = UP5(col[0][BCOMP]);
1479 g = UP5(col[0][GCOMP]);
1480 r = UP5(col[0][RCOMP]);
1481 } else if (t == 2) {
1482 b = UP5(col[1][BCOMP]);
1483 g = UP6(col[1][GCOMP], glsb);
1484 r = UP5(col[1][RCOMP]);
1485 } else {
1486 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1487 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1488 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1489 }
1490 rgba[RCOMP] = r;
1491 rgba[GCOMP] = g;
1492 rgba[BCOMP] = b;
1493 rgba[ACOMP] = 255;
1494 }
1495 } else {
1496 /* alpha[0] == 0 */
1497 GLubyte r, g, b;
1498 if (t == 0) {
1499 b = UP5(col[0][BCOMP]);
1500 g = UP6(col[0][GCOMP], glsb ^ selb);
1501 r = UP5(col[0][RCOMP]);
1502 } else if (t == 3) {
1503 b = UP5(col[1][BCOMP]);
1504 g = UP6(col[1][GCOMP], glsb);
1505 r = UP5(col[1][RCOMP]);
1506 } else {
1507 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1508 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1509 UP6(col[1][GCOMP], glsb));
1510 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1511 }
1512 rgba[RCOMP] = r;
1513 rgba[GCOMP] = g;
1514 rgba[BCOMP] = b;
1515 rgba[ACOMP] = 255;
1516 }
1517 }
1518
1519
1520 static void
1521 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1522 {
1523 const GLuint *cc;
1524 GLubyte r, g, b, a;
1525
1526 cc = (const GLuint *)code;
1527 if (CC_SEL(cc, 124) & 1) {
1528 /* lerp == 1 */
1529 GLuint col0[4];
1530
1531 if (t & 16) {
1532 t &= 15;
1533 t = (cc[1] >> (t * 2)) & 3;
1534 /* col 2 */
1535 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1536 col0[GCOMP] = CC_SEL(cc, 99);
1537 col0[RCOMP] = CC_SEL(cc, 104);
1538 col0[ACOMP] = CC_SEL(cc, 119);
1539 } else {
1540 t = (cc[0] >> (t * 2)) & 3;
1541 /* col 0 */
1542 col0[BCOMP] = CC_SEL(cc, 64);
1543 col0[GCOMP] = CC_SEL(cc, 69);
1544 col0[RCOMP] = CC_SEL(cc, 74);
1545 col0[ACOMP] = CC_SEL(cc, 109);
1546 }
1547
1548 if (t == 0) {
1549 b = UP5(col0[BCOMP]);
1550 g = UP5(col0[GCOMP]);
1551 r = UP5(col0[RCOMP]);
1552 a = UP5(col0[ACOMP]);
1553 } else if (t == 3) {
1554 b = UP5(CC_SEL(cc, 79));
1555 g = UP5(CC_SEL(cc, 84));
1556 r = UP5(CC_SEL(cc, 89));
1557 a = UP5(CC_SEL(cc, 114));
1558 } else {
1559 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1560 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1561 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1562 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1563 }
1564 } else {
1565 /* lerp == 0 */
1566
1567 if (t & 16) {
1568 cc++;
1569 t &= 15;
1570 }
1571 t = (cc[0] >> (t * 2)) & 3;
1572
1573 if (t == 3) {
1574 /* zero */
1575 r = g = b = a = 0;
1576 } else {
1577 GLuint kk;
1578 cc = (const GLuint *)code;
1579 a = UP5(cc[3] >> (t * 5 + 13));
1580 t *= 15;
1581 cc = (const GLuint *)(code + 8 + t / 8);
1582 kk = cc[0] >> (t & 7);
1583 b = UP5(kk);
1584 g = UP5(kk >> 5);
1585 r = UP5(kk >> 10);
1586 }
1587 }
1588 rgba[RCOMP] = r;
1589 rgba[GCOMP] = g;
1590 rgba[BCOMP] = b;
1591 rgba[ACOMP] = a;
1592 }
1593
1594
1595 static void
1596 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1597 GLint i, GLint j, GLubyte *rgba)
1598 {
1599 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1600 fxt1_decode_1HI, /* cc-high = "00?" */
1601 fxt1_decode_1HI, /* cc-high = "00?" */
1602 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1603 fxt1_decode_1ALPHA, /* alpha = "011" */
1604 fxt1_decode_1MIXED, /* mixed = "1??" */
1605 fxt1_decode_1MIXED, /* mixed = "1??" */
1606 fxt1_decode_1MIXED, /* mixed = "1??" */
1607 fxt1_decode_1MIXED /* mixed = "1??" */
1608 };
1609
1610 const GLubyte *code = (const GLubyte *)texture +
1611 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1612 GLint mode = CC_SEL(code, 125);
1613 GLint t = i & 7;
1614
1615 if (t & 4) {
1616 t += 12;
1617 }
1618 t += (j & 3) * 4;
1619
1620 decode_1[mode](code, t, rgba);
1621 }
1622
1623
1624
1625
1626 static void
1627 fetch_rgb_fxt1(const GLubyte *map,
1628 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1629 {
1630 GLubyte rgba[4];
1631 fxt1_decode_1(map, rowStride, i, j, rgba);
1632 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1633 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1634 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1635 texel[ACOMP] = 1.0F;
1636 }
1637
1638
1639 static void
1640 fetch_rgba_fxt1(const GLubyte *map,
1641 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1642 {
1643 GLubyte rgba[4];
1644 fxt1_decode_1(map, rowStride, i, j, rgba);
1645 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1646 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1647 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1648 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1649 }
1650
1651
1652 compressed_fetch_func
1653 _mesa_get_fxt_fetch_func(mesa_format format)
1654 {
1655 switch (format) {
1656 case MESA_FORMAT_RGB_FXT1:
1657 return fetch_rgb_fxt1;
1658 case MESA_FORMAT_RGBA_FXT1:
1659 return fetch_rgba_fxt1;
1660 default:
1661 return NULL;
1662 }
1663 }