mesa: Add "OR COPYRIGHT HOLDERS" to license text disclaiming liability.
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mipmap.h"
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
40 #include "texstore.h"
41
42
43 static void
44 fxt1_encode (GLuint width, GLuint height, GLint comps,
45 const void *source, GLint srcRowStride,
46 void *dest, GLint destRowStride);
47
48 static void
49 fxt1_decode_1 (const void *texture, GLint stride,
50 GLint i, GLint j, GLubyte *rgba);
51
52
53 /**
54 * Store user's image in rgb_fxt1 format.
55 */
56 GLboolean
57 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
58 {
59 const GLubyte *pixels;
60 GLint srcRowStride;
61 GLubyte *dst;
62 const GLubyte *tempImage = NULL;
63
64 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
65
66 if (srcFormat != GL_RGB ||
67 srcType != GL_UNSIGNED_BYTE ||
68 ctx->_ImageTransferState ||
69 srcPacking->RowLength != srcWidth ||
70 srcPacking->SwapBytes) {
71 /* convert image to RGB/GLubyte */
72 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
73 baseInternalFormat,
74 _mesa_get_format_base_format(dstFormat),
75 srcWidth, srcHeight, srcDepth,
76 srcFormat, srcType, srcAddr,
77 srcPacking);
78 if (!tempImage)
79 return GL_FALSE; /* out of memory */
80 pixels = tempImage;
81 srcRowStride = 3 * srcWidth;
82 srcFormat = GL_RGB;
83 }
84 else {
85 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
86 srcFormat, srcType, 0, 0);
87
88 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
89 srcType) / sizeof(GLubyte);
90 }
91
92 dst = dstSlices[0];
93
94 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
95 dst, dstRowStride);
96
97 free((void*) tempImage);
98
99 return GL_TRUE;
100 }
101
102
103 /**
104 * Store user's image in rgba_fxt1 format.
105 */
106 GLboolean
107 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
108 {
109 const GLubyte *pixels;
110 GLint srcRowStride;
111 GLubyte *dst;
112 const GLubyte *tempImage = NULL;
113
114 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
115
116 if (srcFormat != GL_RGBA ||
117 srcType != GL_UNSIGNED_BYTE ||
118 ctx->_ImageTransferState ||
119 srcPacking->SwapBytes) {
120 /* convert image to RGBA/GLubyte */
121 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
122 baseInternalFormat,
123 _mesa_get_format_base_format(dstFormat),
124 srcWidth, srcHeight, srcDepth,
125 srcFormat, srcType, srcAddr,
126 srcPacking);
127 if (!tempImage)
128 return GL_FALSE; /* out of memory */
129 pixels = tempImage;
130 srcRowStride = 4 * srcWidth;
131 srcFormat = GL_RGBA;
132 }
133 else {
134 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
135 srcFormat, srcType, 0, 0);
136
137 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
138 srcType) / sizeof(GLubyte);
139 }
140
141 dst = dstSlices[0];
142
143 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
144 dst, dstRowStride);
145
146 free((void*) tempImage);
147
148 return GL_TRUE;
149 }
150
151
152 /***************************************************************************\
153 * FXT1 encoder
154 *
155 * The encoder was built by reversing the decoder,
156 * and is vaguely based on Texus2 by 3dfx. Note that this code
157 * is merely a proof of concept, since it is highly UNoptimized;
158 * moreover, it is sub-optimal due to initial conditions passed
159 * to Lloyd's algorithm (the interpolation modes are even worse).
160 \***************************************************************************/
161
162
163 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
164 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
165 #define N_TEXELS 32 /* number of texels in a block (always 32) */
166 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
167 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
168 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
169 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
170 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
171
172
173 /*
174 * Define a 64-bit unsigned integer type and macros
175 */
176 #if 1
177
178 #define FX64_NATIVE 1
179
180 typedef uint64_t Fx64;
181
182 #define FX64_MOV32(a, b) a = b
183 #define FX64_OR32(a, b) a |= b
184 #define FX64_SHL(a, c) a <<= c
185
186 #else
187
188 #define FX64_NATIVE 0
189
190 typedef struct {
191 GLuint lo, hi;
192 } Fx64;
193
194 #define FX64_MOV32(a, b) a.lo = b
195 #define FX64_OR32(a, b) a.lo |= b
196
197 #define FX64_SHL(a, c) \
198 do { \
199 if ((c) >= 32) { \
200 a.hi = a.lo << ((c) - 32); \
201 a.lo = 0; \
202 } else { \
203 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
204 a.lo <<= (c); \
205 } \
206 } while (0)
207
208 #endif
209
210
211 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
212 #define SAFECDOT 1 /* for paranoids */
213
214 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
215 do { \
216 /* compute interpolation vector */ \
217 GLfloat d2 = 0.0F; \
218 GLfloat rd2; \
219 \
220 for (i = 0; i < NC; i++) { \
221 IV[i] = (V1[i] - V0[i]) * F(i); \
222 d2 += IV[i] * IV[i]; \
223 } \
224 rd2 = (GLfloat)NV / d2; \
225 B = 0; \
226 for (i = 0; i < NC; i++) { \
227 IV[i] *= F(i); \
228 B -= IV[i] * V0[i]; \
229 IV[i] *= rd2; \
230 } \
231 B = B * rd2 + 0.5f; \
232 } while (0)
233
234 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
235 do { \
236 GLfloat dot = 0.0F; \
237 for (i = 0; i < NC; i++) { \
238 dot += V[i] * IV[i]; \
239 } \
240 TEXEL = (GLint)(dot + B); \
241 if (SAFECDOT) { \
242 if (TEXEL < 0) { \
243 TEXEL = 0; \
244 } else if (TEXEL > NV) { \
245 TEXEL = NV; \
246 } \
247 } \
248 } while (0)
249
250
251 static GLint
252 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
253 GLubyte input[MAX_COMP], GLint nc)
254 {
255 GLint i, j, best = -1;
256 GLfloat err = 1e9; /* big enough */
257
258 for (j = 0; j < nv; j++) {
259 GLfloat e = 0.0F;
260 for (i = 0; i < nc; i++) {
261 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
262 }
263 if (e < err) {
264 err = e;
265 best = j;
266 }
267 }
268
269 return best;
270 }
271
272
273 static GLint
274 fxt1_worst (GLfloat vec[MAX_COMP],
275 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
276 {
277 GLint i, k, worst = -1;
278 GLfloat err = -1.0F; /* small enough */
279
280 for (k = 0; k < n; k++) {
281 GLfloat e = 0.0F;
282 for (i = 0; i < nc; i++) {
283 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
284 }
285 if (e > err) {
286 err = e;
287 worst = k;
288 }
289 }
290
291 return worst;
292 }
293
294
295 static GLint
296 fxt1_variance (GLdouble variance[MAX_COMP],
297 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
298 {
299 GLint i, k, best = 0;
300 GLint sx, sx2;
301 GLdouble var, maxvar = -1; /* small enough */
302 GLdouble teenth = 1.0 / n;
303
304 for (i = 0; i < nc; i++) {
305 sx = sx2 = 0;
306 for (k = 0; k < n; k++) {
307 GLint t = input[k][i];
308 sx += t;
309 sx2 += t * t;
310 }
311 var = sx2 * teenth - sx * sx * teenth * teenth;
312 if (maxvar < var) {
313 maxvar = var;
314 best = i;
315 }
316 if (variance) {
317 variance[i] = var;
318 }
319 }
320
321 return best;
322 }
323
324
325 static GLint
326 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
327 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
328 {
329 #if 0
330 /* Choose colors from a grid.
331 */
332 GLint i, j;
333
334 for (j = 0; j < nv; j++) {
335 GLint m = j * (n - 1) / (nv - 1);
336 for (i = 0; i < nc; i++) {
337 vec[j][i] = input[m][i];
338 }
339 }
340 #else
341 /* Our solution here is to find the darkest and brightest colors in
342 * the 8x4 tile and use those as the two representative colors.
343 * There are probably better algorithms to use (histogram-based).
344 */
345 GLint i, j, k;
346 GLint minSum = 2000; /* big enough */
347 GLint maxSum = -1; /* small enough */
348 GLint minCol = 0; /* phoudoin: silent compiler! */
349 GLint maxCol = 0; /* phoudoin: silent compiler! */
350
351 struct {
352 GLint flag;
353 GLint key;
354 GLint freq;
355 GLint idx;
356 } hist[N_TEXELS];
357 GLint lenh = 0;
358
359 memset(hist, 0, sizeof(hist));
360
361 for (k = 0; k < n; k++) {
362 GLint l;
363 GLint key = 0;
364 GLint sum = 0;
365 for (i = 0; i < nc; i++) {
366 key <<= 8;
367 key |= input[k][i];
368 sum += input[k][i];
369 }
370 for (l = 0; l < n; l++) {
371 if (!hist[l].flag) {
372 /* alloc new slot */
373 hist[l].flag = !0;
374 hist[l].key = key;
375 hist[l].freq = 1;
376 hist[l].idx = k;
377 lenh = l + 1;
378 break;
379 } else if (hist[l].key == key) {
380 hist[l].freq++;
381 break;
382 }
383 }
384 if (minSum > sum) {
385 minSum = sum;
386 minCol = k;
387 }
388 if (maxSum < sum) {
389 maxSum = sum;
390 maxCol = k;
391 }
392 }
393
394 if (lenh <= nv) {
395 for (j = 0; j < lenh; j++) {
396 for (i = 0; i < nc; i++) {
397 vec[j][i] = (GLfloat)input[hist[j].idx][i];
398 }
399 }
400 for (; j < nv; j++) {
401 for (i = 0; i < nc; i++) {
402 vec[j][i] = vec[0][i];
403 }
404 }
405 return 0;
406 }
407
408 for (j = 0; j < nv; j++) {
409 for (i = 0; i < nc; i++) {
410 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
411 }
412 }
413 #endif
414
415 return !0;
416 }
417
418
419 static GLint
420 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
421 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
422 {
423 /* Use the generalized lloyd's algorithm for VQ:
424 * find 4 color vectors.
425 *
426 * for each sample color
427 * sort to nearest vector.
428 *
429 * replace each vector with the centroid of its matching colors.
430 *
431 * repeat until RMS doesn't improve.
432 *
433 * if a color vector has no samples, or becomes the same as another
434 * vector, replace it with the color which is farthest from a sample.
435 *
436 * vec[][MAX_COMP] initial vectors and resulting colors
437 * nv number of resulting colors required
438 * input[N_TEXELS][MAX_COMP] input texels
439 * nc number of components in input / vec
440 * n number of input samples
441 */
442
443 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
444 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
445 GLfloat error, lasterror = 1e9;
446
447 GLint i, j, k, rep;
448
449 /* the quantizer */
450 for (rep = 0; rep < LL_N_REP; rep++) {
451 /* reset sums & counters */
452 for (j = 0; j < nv; j++) {
453 for (i = 0; i < nc; i++) {
454 sum[j][i] = 0;
455 }
456 cnt[j] = 0;
457 }
458 error = 0;
459
460 /* scan whole block */
461 for (k = 0; k < n; k++) {
462 #if 1
463 GLint best = -1;
464 GLfloat err = 1e9; /* big enough */
465 /* determine best vector */
466 for (j = 0; j < nv; j++) {
467 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
468 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
469 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
470 if (nc == 4) {
471 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
472 }
473 if (e < err) {
474 err = e;
475 best = j;
476 }
477 }
478 #else
479 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
480 #endif
481 assert(best >= 0);
482 /* add in closest color */
483 for (i = 0; i < nc; i++) {
484 sum[best][i] += input[k][i];
485 }
486 /* mark this vector as used */
487 cnt[best]++;
488 /* accumulate error */
489 error += err;
490 }
491
492 /* check RMS */
493 if ((error < LL_RMS_E) ||
494 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
495 return !0; /* good match */
496 }
497 lasterror = error;
498
499 /* move each vector to the barycenter of its closest colors */
500 for (j = 0; j < nv; j++) {
501 if (cnt[j]) {
502 GLfloat div = 1.0F / cnt[j];
503 for (i = 0; i < nc; i++) {
504 vec[j][i] = div * sum[j][i];
505 }
506 } else {
507 /* this vec has no samples or is identical with a previous vec */
508 GLint worst = fxt1_worst(vec[j], input, nc, n);
509 for (i = 0; i < nc; i++) {
510 vec[j][i] = input[worst][i];
511 }
512 }
513 }
514 }
515
516 return 0; /* could not converge fast enough */
517 }
518
519
520 static void
521 fxt1_quantize_CHROMA (GLuint *cc,
522 GLubyte input[N_TEXELS][MAX_COMP])
523 {
524 const GLint n_vect = 4; /* 4 base vectors to find */
525 const GLint n_comp = 3; /* 3 components: R, G, B */
526 GLfloat vec[MAX_VECT][MAX_COMP];
527 GLint i, j, k;
528 Fx64 hi; /* high quadword */
529 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
530
531 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
532 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
533 }
534
535 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
536 for (j = n_vect - 1; j >= 0; j--) {
537 for (i = 0; i < n_comp; i++) {
538 /* add in colors */
539 FX64_SHL(hi, 5);
540 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
541 }
542 }
543 ((Fx64 *)cc)[1] = hi;
544
545 lohi = lolo = 0;
546 /* right microtile */
547 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
548 lohi <<= 2;
549 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
550 }
551 /* left microtile */
552 for (; k >= 0; k--) {
553 lolo <<= 2;
554 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
555 }
556 cc[1] = lohi;
557 cc[0] = lolo;
558 }
559
560
561 static void
562 fxt1_quantize_ALPHA0 (GLuint *cc,
563 GLubyte input[N_TEXELS][MAX_COMP],
564 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
565 {
566 const GLint n_vect = 3; /* 3 base vectors to find */
567 const GLint n_comp = 4; /* 4 components: R, G, B, A */
568 GLfloat vec[MAX_VECT][MAX_COMP];
569 GLint i, j, k;
570 Fx64 hi; /* high quadword */
571 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
572
573 /* the last vector indicates zero */
574 for (i = 0; i < n_comp; i++) {
575 vec[n_vect][i] = 0;
576 }
577
578 /* the first n texels in reord are guaranteed to be non-zero */
579 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
580 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
581 }
582
583 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
584 for (j = n_vect - 1; j >= 0; j--) {
585 /* add in alphas */
586 FX64_SHL(hi, 5);
587 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
588 }
589 for (j = n_vect - 1; j >= 0; j--) {
590 for (i = 0; i < n_comp - 1; i++) {
591 /* add in colors */
592 FX64_SHL(hi, 5);
593 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
594 }
595 }
596 ((Fx64 *)cc)[1] = hi;
597
598 lohi = lolo = 0;
599 /* right microtile */
600 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
601 lohi <<= 2;
602 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
603 }
604 /* left microtile */
605 for (; k >= 0; k--) {
606 lolo <<= 2;
607 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
608 }
609 cc[1] = lohi;
610 cc[0] = lolo;
611 }
612
613
614 static void
615 fxt1_quantize_ALPHA1 (GLuint *cc,
616 GLubyte input[N_TEXELS][MAX_COMP])
617 {
618 const GLint n_vect = 3; /* highest vector number in each microtile */
619 const GLint n_comp = 4; /* 4 components: R, G, B, A */
620 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
621 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
622 GLint i, j, k;
623 Fx64 hi; /* high quadword */
624 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
625
626 GLint minSum;
627 GLint maxSum;
628 GLint minColL = 0, maxColL = 0;
629 GLint minColR = 0, maxColR = 0;
630 GLint sumL = 0, sumR = 0;
631 GLint nn_comp;
632 /* Our solution here is to find the darkest and brightest colors in
633 * the 4x4 tile and use those as the two representative colors.
634 * There are probably better algorithms to use (histogram-based).
635 */
636 nn_comp = n_comp;
637 while ((minColL == maxColL) && nn_comp) {
638 minSum = 2000; /* big enough */
639 maxSum = -1; /* small enough */
640 for (k = 0; k < N_TEXELS / 2; k++) {
641 GLint sum = 0;
642 for (i = 0; i < nn_comp; i++) {
643 sum += input[k][i];
644 }
645 if (minSum > sum) {
646 minSum = sum;
647 minColL = k;
648 }
649 if (maxSum < sum) {
650 maxSum = sum;
651 maxColL = k;
652 }
653 sumL += sum;
654 }
655
656 nn_comp--;
657 }
658
659 nn_comp = n_comp;
660 while ((minColR == maxColR) && nn_comp) {
661 minSum = 2000; /* big enough */
662 maxSum = -1; /* small enough */
663 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
664 GLint sum = 0;
665 for (i = 0; i < nn_comp; i++) {
666 sum += input[k][i];
667 }
668 if (minSum > sum) {
669 minSum = sum;
670 minColR = k;
671 }
672 if (maxSum < sum) {
673 maxSum = sum;
674 maxColR = k;
675 }
676 sumR += sum;
677 }
678
679 nn_comp--;
680 }
681
682 /* choose the common vector (yuck!) */
683 {
684 GLint j1, j2;
685 GLint v1 = 0, v2 = 0;
686 GLfloat err = 1e9; /* big enough */
687 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
688 for (i = 0; i < n_comp; i++) {
689 tv[0][i] = input[minColL][i];
690 tv[1][i] = input[maxColL][i];
691 tv[2][i] = input[minColR][i];
692 tv[3][i] = input[maxColR][i];
693 }
694 for (j1 = 0; j1 < 2; j1++) {
695 for (j2 = 2; j2 < 4; j2++) {
696 GLfloat e = 0.0F;
697 for (i = 0; i < n_comp; i++) {
698 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
699 }
700 if (e < err) {
701 err = e;
702 v1 = j1;
703 v2 = j2;
704 }
705 }
706 }
707 for (i = 0; i < n_comp; i++) {
708 vec[0][i] = tv[1 - v1][i];
709 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
710 vec[2][i] = tv[5 - v2][i];
711 }
712 }
713
714 /* left microtile */
715 cc[0] = 0;
716 if (minColL != maxColL) {
717 /* compute interpolation vector */
718 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
719
720 /* add in texels */
721 lolo = 0;
722 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
723 GLint texel;
724 /* interpolate color */
725 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
726 /* add in texel */
727 lolo <<= 2;
728 lolo |= texel;
729 }
730
731 cc[0] = lolo;
732 }
733
734 /* right microtile */
735 cc[1] = 0;
736 if (minColR != maxColR) {
737 /* compute interpolation vector */
738 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
739
740 /* add in texels */
741 lohi = 0;
742 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
743 GLint texel;
744 /* interpolate color */
745 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
746 /* add in texel */
747 lohi <<= 2;
748 lohi |= texel;
749 }
750
751 cc[1] = lohi;
752 }
753
754 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
755 for (j = n_vect - 1; j >= 0; j--) {
756 /* add in alphas */
757 FX64_SHL(hi, 5);
758 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
759 }
760 for (j = n_vect - 1; j >= 0; j--) {
761 for (i = 0; i < n_comp - 1; i++) {
762 /* add in colors */
763 FX64_SHL(hi, 5);
764 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
765 }
766 }
767 ((Fx64 *)cc)[1] = hi;
768 }
769
770
771 static void
772 fxt1_quantize_HI (GLuint *cc,
773 GLubyte input[N_TEXELS][MAX_COMP],
774 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
775 {
776 const GLint n_vect = 6; /* highest vector number */
777 const GLint n_comp = 3; /* 3 components: R, G, B */
778 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
779 GLfloat iv[MAX_COMP]; /* interpolation vector */
780 GLint i, k;
781 GLuint hihi; /* high quadword: hi dword */
782
783 GLint minSum = 2000; /* big enough */
784 GLint maxSum = -1; /* small enough */
785 GLint minCol = 0; /* phoudoin: silent compiler! */
786 GLint maxCol = 0; /* phoudoin: silent compiler! */
787
788 /* Our solution here is to find the darkest and brightest colors in
789 * the 8x4 tile and use those as the two representative colors.
790 * There are probably better algorithms to use (histogram-based).
791 */
792 for (k = 0; k < n; k++) {
793 GLint sum = 0;
794 for (i = 0; i < n_comp; i++) {
795 sum += reord[k][i];
796 }
797 if (minSum > sum) {
798 minSum = sum;
799 minCol = k;
800 }
801 if (maxSum < sum) {
802 maxSum = sum;
803 maxCol = k;
804 }
805 }
806
807 hihi = 0; /* cc-hi = "00" */
808 for (i = 0; i < n_comp; i++) {
809 /* add in colors */
810 hihi <<= 5;
811 hihi |= reord[maxCol][i] >> 3;
812 }
813 for (i = 0; i < n_comp; i++) {
814 /* add in colors */
815 hihi <<= 5;
816 hihi |= reord[minCol][i] >> 3;
817 }
818 cc[3] = hihi;
819 cc[0] = cc[1] = cc[2] = 0;
820
821 /* compute interpolation vector */
822 if (minCol != maxCol) {
823 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
824 }
825
826 /* add in texels */
827 for (k = N_TEXELS - 1; k >= 0; k--) {
828 GLint t = k * 3;
829 GLuint *kk = (GLuint *)((char *)cc + t / 8);
830 GLint texel = n_vect + 1; /* transparent black */
831
832 if (!ISTBLACK(input[k])) {
833 if (minCol != maxCol) {
834 /* interpolate color */
835 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
836 /* add in texel */
837 kk[0] |= texel << (t & 7);
838 }
839 } else {
840 /* add in texel */
841 kk[0] |= texel << (t & 7);
842 }
843 }
844 }
845
846
847 static void
848 fxt1_quantize_MIXED1 (GLuint *cc,
849 GLubyte input[N_TEXELS][MAX_COMP])
850 {
851 const GLint n_vect = 2; /* highest vector number in each microtile */
852 const GLint n_comp = 3; /* 3 components: R, G, B */
853 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
854 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
855 GLint i, j, k;
856 Fx64 hi; /* high quadword */
857 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
858
859 GLint minSum;
860 GLint maxSum;
861 GLint minColL = 0, maxColL = -1;
862 GLint minColR = 0, maxColR = -1;
863
864 /* Our solution here is to find the darkest and brightest colors in
865 * the 4x4 tile and use those as the two representative colors.
866 * There are probably better algorithms to use (histogram-based).
867 */
868 minSum = 2000; /* big enough */
869 maxSum = -1; /* small enough */
870 for (k = 0; k < N_TEXELS / 2; k++) {
871 if (!ISTBLACK(input[k])) {
872 GLint sum = 0;
873 for (i = 0; i < n_comp; i++) {
874 sum += input[k][i];
875 }
876 if (minSum > sum) {
877 minSum = sum;
878 minColL = k;
879 }
880 if (maxSum < sum) {
881 maxSum = sum;
882 maxColL = k;
883 }
884 }
885 }
886 minSum = 2000; /* big enough */
887 maxSum = -1; /* small enough */
888 for (; k < N_TEXELS; k++) {
889 if (!ISTBLACK(input[k])) {
890 GLint sum = 0;
891 for (i = 0; i < n_comp; i++) {
892 sum += input[k][i];
893 }
894 if (minSum > sum) {
895 minSum = sum;
896 minColR = k;
897 }
898 if (maxSum < sum) {
899 maxSum = sum;
900 maxColR = k;
901 }
902 }
903 }
904
905 /* left microtile */
906 if (maxColL == -1) {
907 /* all transparent black */
908 cc[0] = ~0u;
909 for (i = 0; i < n_comp; i++) {
910 vec[0][i] = 0;
911 vec[1][i] = 0;
912 }
913 } else {
914 cc[0] = 0;
915 for (i = 0; i < n_comp; i++) {
916 vec[0][i] = input[minColL][i];
917 vec[1][i] = input[maxColL][i];
918 }
919 if (minColL != maxColL) {
920 /* compute interpolation vector */
921 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
922
923 /* add in texels */
924 lolo = 0;
925 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
926 GLint texel = n_vect + 1; /* transparent black */
927 if (!ISTBLACK(input[k])) {
928 /* interpolate color */
929 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
930 }
931 /* add in texel */
932 lolo <<= 2;
933 lolo |= texel;
934 }
935 cc[0] = lolo;
936 }
937 }
938
939 /* right microtile */
940 if (maxColR == -1) {
941 /* all transparent black */
942 cc[1] = ~0u;
943 for (i = 0; i < n_comp; i++) {
944 vec[2][i] = 0;
945 vec[3][i] = 0;
946 }
947 } else {
948 cc[1] = 0;
949 for (i = 0; i < n_comp; i++) {
950 vec[2][i] = input[minColR][i];
951 vec[3][i] = input[maxColR][i];
952 }
953 if (minColR != maxColR) {
954 /* compute interpolation vector */
955 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
956
957 /* add in texels */
958 lohi = 0;
959 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
960 GLint texel = n_vect + 1; /* transparent black */
961 if (!ISTBLACK(input[k])) {
962 /* interpolate color */
963 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
964 }
965 /* add in texel */
966 lohi <<= 2;
967 lohi |= texel;
968 }
969 cc[1] = lohi;
970 }
971 }
972
973 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
974 for (j = 2 * 2 - 1; j >= 0; j--) {
975 for (i = 0; i < n_comp; i++) {
976 /* add in colors */
977 FX64_SHL(hi, 5);
978 FX64_OR32(hi, vec[j][i] >> 3);
979 }
980 }
981 ((Fx64 *)cc)[1] = hi;
982 }
983
984
985 static void
986 fxt1_quantize_MIXED0 (GLuint *cc,
987 GLubyte input[N_TEXELS][MAX_COMP])
988 {
989 const GLint n_vect = 3; /* highest vector number in each microtile */
990 const GLint n_comp = 3; /* 3 components: R, G, B */
991 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
992 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
993 GLint i, j, k;
994 Fx64 hi; /* high quadword */
995 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
996
997 GLint minColL = 0, maxColL = 0;
998 GLint minColR = 0, maxColR = 0;
999 #if 0
1000 GLint minSum;
1001 GLint maxSum;
1002
1003 /* Our solution here is to find the darkest and brightest colors in
1004 * the 4x4 tile and use those as the two representative colors.
1005 * There are probably better algorithms to use (histogram-based).
1006 */
1007 minSum = 2000; /* big enough */
1008 maxSum = -1; /* small enough */
1009 for (k = 0; k < N_TEXELS / 2; k++) {
1010 GLint sum = 0;
1011 for (i = 0; i < n_comp; i++) {
1012 sum += input[k][i];
1013 }
1014 if (minSum > sum) {
1015 minSum = sum;
1016 minColL = k;
1017 }
1018 if (maxSum < sum) {
1019 maxSum = sum;
1020 maxColL = k;
1021 }
1022 }
1023 minSum = 2000; /* big enough */
1024 maxSum = -1; /* small enough */
1025 for (; k < N_TEXELS; k++) {
1026 GLint sum = 0;
1027 for (i = 0; i < n_comp; i++) {
1028 sum += input[k][i];
1029 }
1030 if (minSum > sum) {
1031 minSum = sum;
1032 minColR = k;
1033 }
1034 if (maxSum < sum) {
1035 maxSum = sum;
1036 maxColR = k;
1037 }
1038 }
1039 #else
1040 GLint minVal;
1041 GLint maxVal;
1042 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1043 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1044
1045 /* Scan the channel with max variance for lo & hi
1046 * and use those as the two representative colors.
1047 */
1048 minVal = 2000; /* big enough */
1049 maxVal = -1; /* small enough */
1050 for (k = 0; k < N_TEXELS / 2; k++) {
1051 GLint t = input[k][maxVarL];
1052 if (minVal > t) {
1053 minVal = t;
1054 minColL = k;
1055 }
1056 if (maxVal < t) {
1057 maxVal = t;
1058 maxColL = k;
1059 }
1060 }
1061 minVal = 2000; /* big enough */
1062 maxVal = -1; /* small enough */
1063 for (; k < N_TEXELS; k++) {
1064 GLint t = input[k][maxVarR];
1065 if (minVal > t) {
1066 minVal = t;
1067 minColR = k;
1068 }
1069 if (maxVal < t) {
1070 maxVal = t;
1071 maxColR = k;
1072 }
1073 }
1074 #endif
1075
1076 /* left microtile */
1077 cc[0] = 0;
1078 for (i = 0; i < n_comp; i++) {
1079 vec[0][i] = input[minColL][i];
1080 vec[1][i] = input[maxColL][i];
1081 }
1082 if (minColL != maxColL) {
1083 /* compute interpolation vector */
1084 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1085
1086 /* add in texels */
1087 lolo = 0;
1088 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1089 GLint texel;
1090 /* interpolate color */
1091 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1092 /* add in texel */
1093 lolo <<= 2;
1094 lolo |= texel;
1095 }
1096
1097 /* funky encoding for LSB of green */
1098 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1099 for (i = 0; i < n_comp; i++) {
1100 vec[1][i] = input[minColL][i];
1101 vec[0][i] = input[maxColL][i];
1102 }
1103 lolo = ~lolo;
1104 }
1105
1106 cc[0] = lolo;
1107 }
1108
1109 /* right microtile */
1110 cc[1] = 0;
1111 for (i = 0; i < n_comp; i++) {
1112 vec[2][i] = input[minColR][i];
1113 vec[3][i] = input[maxColR][i];
1114 }
1115 if (minColR != maxColR) {
1116 /* compute interpolation vector */
1117 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1118
1119 /* add in texels */
1120 lohi = 0;
1121 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1122 GLint texel;
1123 /* interpolate color */
1124 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1125 /* add in texel */
1126 lohi <<= 2;
1127 lohi |= texel;
1128 }
1129
1130 /* funky encoding for LSB of green */
1131 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1132 for (i = 0; i < n_comp; i++) {
1133 vec[3][i] = input[minColR][i];
1134 vec[2][i] = input[maxColR][i];
1135 }
1136 lohi = ~lohi;
1137 }
1138
1139 cc[1] = lohi;
1140 }
1141
1142 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1143 for (j = 2 * 2 - 1; j >= 0; j--) {
1144 for (i = 0; i < n_comp; i++) {
1145 /* add in colors */
1146 FX64_SHL(hi, 5);
1147 FX64_OR32(hi, vec[j][i] >> 3);
1148 }
1149 }
1150 ((Fx64 *)cc)[1] = hi;
1151 }
1152
1153
1154 static void
1155 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1156 {
1157 GLint trualpha;
1158 GLubyte reord[N_TEXELS][MAX_COMP];
1159
1160 GLubyte input[N_TEXELS][MAX_COMP];
1161 GLint i, k, l;
1162
1163 if (comps == 3) {
1164 /* make the whole block opaque */
1165 memset(input, -1, sizeof(input));
1166 }
1167
1168 /* 8 texels each line */
1169 for (l = 0; l < 4; l++) {
1170 for (k = 0; k < 4; k++) {
1171 for (i = 0; i < comps; i++) {
1172 input[k + l * 4][i] = *lines[l]++;
1173 }
1174 }
1175 for (; k < 8; k++) {
1176 for (i = 0; i < comps; i++) {
1177 input[k + l * 4 + 12][i] = *lines[l]++;
1178 }
1179 }
1180 }
1181
1182 /* block layout:
1183 * 00, 01, 02, 03, 08, 09, 0a, 0b
1184 * 10, 11, 12, 13, 18, 19, 1a, 1b
1185 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1186 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1187 */
1188
1189 /* [dBorca]
1190 * stupidity flows forth from this
1191 */
1192 l = N_TEXELS;
1193 trualpha = 0;
1194 if (comps == 4) {
1195 /* skip all transparent black texels */
1196 l = 0;
1197 for (k = 0; k < N_TEXELS; k++) {
1198 /* test all components against 0 */
1199 if (!ISTBLACK(input[k])) {
1200 /* texel is not transparent black */
1201 COPY_4UBV(reord[l], input[k]);
1202 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1203 /* non-opaque texel */
1204 trualpha = !0;
1205 }
1206 l++;
1207 }
1208 }
1209 }
1210
1211 #if 0
1212 if (trualpha) {
1213 fxt1_quantize_ALPHA0(cc, input, reord, l);
1214 } else if (l == 0) {
1215 cc[0] = cc[1] = cc[2] = -1;
1216 cc[3] = 0;
1217 } else if (l < N_TEXELS) {
1218 fxt1_quantize_HI(cc, input, reord, l);
1219 } else {
1220 fxt1_quantize_CHROMA(cc, input);
1221 }
1222 (void)fxt1_quantize_ALPHA1;
1223 (void)fxt1_quantize_MIXED1;
1224 (void)fxt1_quantize_MIXED0;
1225 #else
1226 if (trualpha) {
1227 fxt1_quantize_ALPHA1(cc, input);
1228 } else if (l == 0) {
1229 cc[0] = cc[1] = cc[2] = ~0u;
1230 cc[3] = 0;
1231 } else if (l < N_TEXELS) {
1232 fxt1_quantize_MIXED1(cc, input);
1233 } else {
1234 fxt1_quantize_MIXED0(cc, input);
1235 }
1236 (void)fxt1_quantize_ALPHA0;
1237 (void)fxt1_quantize_HI;
1238 (void)fxt1_quantize_CHROMA;
1239 #endif
1240 }
1241
1242
1243
1244 /**
1245 * Upscale an image by replication, not (typical) stretching.
1246 * We use this when the image width or height is less than a
1247 * certain size (4, 8) and we need to upscale an image.
1248 */
1249 static void
1250 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1251 GLsizei outWidth, GLsizei outHeight,
1252 GLint comps, const GLubyte *src, GLint srcRowStride,
1253 GLubyte *dest )
1254 {
1255 GLint i, j, k;
1256
1257 ASSERT(outWidth >= inWidth);
1258 ASSERT(outHeight >= inHeight);
1259 #if 0
1260 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1261 ASSERT((outWidth & 3) == 0);
1262 ASSERT((outHeight & 3) == 0);
1263 #endif
1264
1265 for (i = 0; i < outHeight; i++) {
1266 const GLint ii = i % inHeight;
1267 for (j = 0; j < outWidth; j++) {
1268 const GLint jj = j % inWidth;
1269 for (k = 0; k < comps; k++) {
1270 dest[(i * outWidth + j) * comps + k]
1271 = src[ii * srcRowStride + jj * comps + k];
1272 }
1273 }
1274 }
1275 }
1276
1277
1278 static void
1279 fxt1_encode (GLuint width, GLuint height, GLint comps,
1280 const void *source, GLint srcRowStride,
1281 void *dest, GLint destRowStride)
1282 {
1283 GLuint x, y;
1284 const GLubyte *data;
1285 GLuint *encoded = (GLuint *)dest;
1286 void *newSource = NULL;
1287
1288 assert(comps == 3 || comps == 4);
1289
1290 /* Replicate image if width is not M8 or height is not M4 */
1291 if ((width & 7) | (height & 3)) {
1292 GLint newWidth = (width + 7) & ~7;
1293 GLint newHeight = (height + 3) & ~3;
1294 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1295 if (!newSource) {
1296 GET_CURRENT_CONTEXT(ctx);
1297 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1298 goto cleanUp;
1299 }
1300 upscale_teximage2d(width, height, newWidth, newHeight,
1301 comps, (const GLubyte *) source,
1302 srcRowStride, (GLubyte *) newSource);
1303 source = newSource;
1304 width = newWidth;
1305 height = newHeight;
1306 srcRowStride = comps * newWidth;
1307 }
1308
1309 data = (const GLubyte *) source;
1310 destRowStride = (destRowStride - width * 2) / 4;
1311 for (y = 0; y < height; y += 4) {
1312 GLuint offs = 0 + (y + 0) * srcRowStride;
1313 for (x = 0; x < width; x += 8) {
1314 const GLubyte *lines[4];
1315 lines[0] = &data[offs];
1316 lines[1] = lines[0] + srcRowStride;
1317 lines[2] = lines[1] + srcRowStride;
1318 lines[3] = lines[2] + srcRowStride;
1319 offs += 8 * comps;
1320 fxt1_quantize(encoded, lines, comps);
1321 /* 128 bits per 8x4 block */
1322 encoded += 4;
1323 }
1324 encoded += destRowStride;
1325 }
1326
1327 cleanUp:
1328 free(newSource);
1329 }
1330
1331
1332 /***************************************************************************\
1333 * FXT1 decoder
1334 *
1335 * The decoder is based on GL_3DFX_texture_compression_FXT1
1336 * specification and serves as a concept for the encoder.
1337 \***************************************************************************/
1338
1339
1340 /* lookup table for scaling 5 bit colors up to 8 bits */
1341 static const GLubyte _rgb_scale_5[] = {
1342 0, 8, 16, 25, 33, 41, 49, 58,
1343 66, 74, 82, 90, 99, 107, 115, 123,
1344 132, 140, 148, 156, 165, 173, 181, 189,
1345 197, 206, 214, 222, 230, 239, 247, 255
1346 };
1347
1348 /* lookup table for scaling 6 bit colors up to 8 bits */
1349 static const GLubyte _rgb_scale_6[] = {
1350 0, 4, 8, 12, 16, 20, 24, 28,
1351 32, 36, 40, 45, 49, 53, 57, 61,
1352 65, 69, 73, 77, 81, 85, 89, 93,
1353 97, 101, 105, 109, 113, 117, 121, 125,
1354 130, 134, 138, 142, 146, 150, 154, 158,
1355 162, 166, 170, 174, 178, 182, 186, 190,
1356 194, 198, 202, 206, 210, 215, 219, 223,
1357 227, 231, 235, 239, 243, 247, 251, 255
1358 };
1359
1360
1361 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1362 #define UP5(c) _rgb_scale_5[(c) & 31]
1363 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1364 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1365
1366
1367 static void
1368 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1369 {
1370 const GLuint *cc;
1371
1372 t *= 3;
1373 cc = (const GLuint *)(code + t / 8);
1374 t = (cc[0] >> (t & 7)) & 7;
1375
1376 if (t == 7) {
1377 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1378 } else {
1379 GLubyte r, g, b;
1380 cc = (const GLuint *)(code + 12);
1381 if (t == 0) {
1382 b = UP5(CC_SEL(cc, 0));
1383 g = UP5(CC_SEL(cc, 5));
1384 r = UP5(CC_SEL(cc, 10));
1385 } else if (t == 6) {
1386 b = UP5(CC_SEL(cc, 15));
1387 g = UP5(CC_SEL(cc, 20));
1388 r = UP5(CC_SEL(cc, 25));
1389 } else {
1390 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1391 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1392 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1393 }
1394 rgba[RCOMP] = r;
1395 rgba[GCOMP] = g;
1396 rgba[BCOMP] = b;
1397 rgba[ACOMP] = 255;
1398 }
1399 }
1400
1401
1402 static void
1403 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1404 {
1405 const GLuint *cc;
1406 GLuint kk;
1407
1408 cc = (const GLuint *)code;
1409 if (t & 16) {
1410 cc++;
1411 t &= 15;
1412 }
1413 t = (cc[0] >> (t * 2)) & 3;
1414
1415 t *= 15;
1416 cc = (const GLuint *)(code + 8 + t / 8);
1417 kk = cc[0] >> (t & 7);
1418 rgba[BCOMP] = UP5(kk);
1419 rgba[GCOMP] = UP5(kk >> 5);
1420 rgba[RCOMP] = UP5(kk >> 10);
1421 rgba[ACOMP] = 255;
1422 }
1423
1424
1425 static void
1426 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1427 {
1428 const GLuint *cc;
1429 GLuint col[2][3];
1430 GLint glsb, selb;
1431
1432 cc = (const GLuint *)code;
1433 if (t & 16) {
1434 t &= 15;
1435 t = (cc[1] >> (t * 2)) & 3;
1436 /* col 2 */
1437 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1438 col[0][GCOMP] = CC_SEL(cc, 99);
1439 col[0][RCOMP] = CC_SEL(cc, 104);
1440 /* col 3 */
1441 col[1][BCOMP] = CC_SEL(cc, 109);
1442 col[1][GCOMP] = CC_SEL(cc, 114);
1443 col[1][RCOMP] = CC_SEL(cc, 119);
1444 glsb = CC_SEL(cc, 126);
1445 selb = CC_SEL(cc, 33);
1446 } else {
1447 t = (cc[0] >> (t * 2)) & 3;
1448 /* col 0 */
1449 col[0][BCOMP] = CC_SEL(cc, 64);
1450 col[0][GCOMP] = CC_SEL(cc, 69);
1451 col[0][RCOMP] = CC_SEL(cc, 74);
1452 /* col 1 */
1453 col[1][BCOMP] = CC_SEL(cc, 79);
1454 col[1][GCOMP] = CC_SEL(cc, 84);
1455 col[1][RCOMP] = CC_SEL(cc, 89);
1456 glsb = CC_SEL(cc, 125);
1457 selb = CC_SEL(cc, 1);
1458 }
1459
1460 if (CC_SEL(cc, 124) & 1) {
1461 /* alpha[0] == 1 */
1462
1463 if (t == 3) {
1464 /* zero */
1465 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1466 } else {
1467 GLubyte r, g, b;
1468 if (t == 0) {
1469 b = UP5(col[0][BCOMP]);
1470 g = UP5(col[0][GCOMP]);
1471 r = UP5(col[0][RCOMP]);
1472 } else if (t == 2) {
1473 b = UP5(col[1][BCOMP]);
1474 g = UP6(col[1][GCOMP], glsb);
1475 r = UP5(col[1][RCOMP]);
1476 } else {
1477 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1478 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1479 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1480 }
1481 rgba[RCOMP] = r;
1482 rgba[GCOMP] = g;
1483 rgba[BCOMP] = b;
1484 rgba[ACOMP] = 255;
1485 }
1486 } else {
1487 /* alpha[0] == 0 */
1488 GLubyte r, g, b;
1489 if (t == 0) {
1490 b = UP5(col[0][BCOMP]);
1491 g = UP6(col[0][GCOMP], glsb ^ selb);
1492 r = UP5(col[0][RCOMP]);
1493 } else if (t == 3) {
1494 b = UP5(col[1][BCOMP]);
1495 g = UP6(col[1][GCOMP], glsb);
1496 r = UP5(col[1][RCOMP]);
1497 } else {
1498 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1499 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1500 UP6(col[1][GCOMP], glsb));
1501 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1502 }
1503 rgba[RCOMP] = r;
1504 rgba[GCOMP] = g;
1505 rgba[BCOMP] = b;
1506 rgba[ACOMP] = 255;
1507 }
1508 }
1509
1510
1511 static void
1512 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1513 {
1514 const GLuint *cc;
1515 GLubyte r, g, b, a;
1516
1517 cc = (const GLuint *)code;
1518 if (CC_SEL(cc, 124) & 1) {
1519 /* lerp == 1 */
1520 GLuint col0[4];
1521
1522 if (t & 16) {
1523 t &= 15;
1524 t = (cc[1] >> (t * 2)) & 3;
1525 /* col 2 */
1526 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1527 col0[GCOMP] = CC_SEL(cc, 99);
1528 col0[RCOMP] = CC_SEL(cc, 104);
1529 col0[ACOMP] = CC_SEL(cc, 119);
1530 } else {
1531 t = (cc[0] >> (t * 2)) & 3;
1532 /* col 0 */
1533 col0[BCOMP] = CC_SEL(cc, 64);
1534 col0[GCOMP] = CC_SEL(cc, 69);
1535 col0[RCOMP] = CC_SEL(cc, 74);
1536 col0[ACOMP] = CC_SEL(cc, 109);
1537 }
1538
1539 if (t == 0) {
1540 b = UP5(col0[BCOMP]);
1541 g = UP5(col0[GCOMP]);
1542 r = UP5(col0[RCOMP]);
1543 a = UP5(col0[ACOMP]);
1544 } else if (t == 3) {
1545 b = UP5(CC_SEL(cc, 79));
1546 g = UP5(CC_SEL(cc, 84));
1547 r = UP5(CC_SEL(cc, 89));
1548 a = UP5(CC_SEL(cc, 114));
1549 } else {
1550 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1551 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1552 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1553 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1554 }
1555 } else {
1556 /* lerp == 0 */
1557
1558 if (t & 16) {
1559 cc++;
1560 t &= 15;
1561 }
1562 t = (cc[0] >> (t * 2)) & 3;
1563
1564 if (t == 3) {
1565 /* zero */
1566 r = g = b = a = 0;
1567 } else {
1568 GLuint kk;
1569 cc = (const GLuint *)code;
1570 a = UP5(cc[3] >> (t * 5 + 13));
1571 t *= 15;
1572 cc = (const GLuint *)(code + 8 + t / 8);
1573 kk = cc[0] >> (t & 7);
1574 b = UP5(kk);
1575 g = UP5(kk >> 5);
1576 r = UP5(kk >> 10);
1577 }
1578 }
1579 rgba[RCOMP] = r;
1580 rgba[GCOMP] = g;
1581 rgba[BCOMP] = b;
1582 rgba[ACOMP] = a;
1583 }
1584
1585
1586 static void
1587 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1588 GLint i, GLint j, GLubyte *rgba)
1589 {
1590 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1591 fxt1_decode_1HI, /* cc-high = "00?" */
1592 fxt1_decode_1HI, /* cc-high = "00?" */
1593 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1594 fxt1_decode_1ALPHA, /* alpha = "011" */
1595 fxt1_decode_1MIXED, /* mixed = "1??" */
1596 fxt1_decode_1MIXED, /* mixed = "1??" */
1597 fxt1_decode_1MIXED, /* mixed = "1??" */
1598 fxt1_decode_1MIXED /* mixed = "1??" */
1599 };
1600
1601 const GLubyte *code = (const GLubyte *)texture +
1602 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1603 GLint mode = CC_SEL(code, 125);
1604 GLint t = i & 7;
1605
1606 if (t & 4) {
1607 t += 12;
1608 }
1609 t += (j & 3) * 4;
1610
1611 decode_1[mode](code, t, rgba);
1612 }
1613
1614
1615
1616
1617 static void
1618 fetch_rgb_fxt1(const GLubyte *map, const GLuint imageOffsets[],
1619 GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
1620 {
1621 GLubyte rgba[4];
1622 fxt1_decode_1(map, rowStride, i, j, rgba);
1623 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1624 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1625 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1626 texel[ACOMP] = 1.0F;
1627 }
1628
1629
1630 static void
1631 fetch_rgba_fxt1(const GLubyte *map, const GLuint imageOffsets[],
1632 GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
1633 {
1634 GLubyte rgba[4];
1635 fxt1_decode_1(map, rowStride, i, j, rgba);
1636 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1637 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1638 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1639 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1640 }
1641
1642
1643 compressed_fetch_func
1644 _mesa_get_fxt_fetch_func(gl_format format)
1645 {
1646 switch (format) {
1647 case MESA_FORMAT_RGB_FXT1:
1648 return fetch_rgb_fxt1;
1649 case MESA_FORMAT_RGBA_FXT1:
1650 return fetch_rgba_fxt1;
1651 default:
1652 return NULL;
1653 }
1654 }