i965/vs: Store texturing results into a vec4 temporary.
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42
43
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46 const void *source, GLint srcRowStride,
47 void *dest, GLint destRowStride);
48
49 static void
50 fxt1_decode_1 (const void *texture, GLint stride,
51 GLint i, GLint j, GLubyte *rgba);
52
53
54 /**
55 * Store user's image in rgb_fxt1 format.
56 */
57 GLboolean
58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
59 {
60 const GLubyte *pixels;
61 GLint srcRowStride;
62 GLubyte *dst;
63 const GLubyte *tempImage = NULL;
64
65 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
66
67 if (srcFormat != GL_RGB ||
68 srcType != GL_UNSIGNED_BYTE ||
69 ctx->_ImageTransferState ||
70 srcPacking->RowLength != srcWidth ||
71 srcPacking->SwapBytes) {
72 /* convert image to RGB/GLubyte */
73 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
74 baseInternalFormat,
75 _mesa_get_format_base_format(dstFormat),
76 srcWidth, srcHeight, srcDepth,
77 srcFormat, srcType, srcAddr,
78 srcPacking);
79 if (!tempImage)
80 return GL_FALSE; /* out of memory */
81 pixels = tempImage;
82 srcRowStride = 3 * srcWidth;
83 srcFormat = GL_RGB;
84 }
85 else {
86 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
87 srcFormat, srcType, 0, 0);
88
89 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
90 srcType) / sizeof(GLubyte);
91 }
92
93 dst = dstSlices[0];
94
95 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
96 dst, dstRowStride);
97
98 free((void*) tempImage);
99
100 return GL_TRUE;
101 }
102
103
104 /**
105 * Store user's image in rgba_fxt1 format.
106 */
107 GLboolean
108 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
109 {
110 const GLubyte *pixels;
111 GLint srcRowStride;
112 GLubyte *dst;
113 const GLubyte *tempImage = NULL;
114
115 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
116
117 if (srcFormat != GL_RGBA ||
118 srcType != GL_UNSIGNED_BYTE ||
119 ctx->_ImageTransferState ||
120 srcPacking->SwapBytes) {
121 /* convert image to RGBA/GLubyte */
122 tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
123 baseInternalFormat,
124 _mesa_get_format_base_format(dstFormat),
125 srcWidth, srcHeight, srcDepth,
126 srcFormat, srcType, srcAddr,
127 srcPacking);
128 if (!tempImage)
129 return GL_FALSE; /* out of memory */
130 pixels = tempImage;
131 srcRowStride = 4 * srcWidth;
132 srcFormat = GL_RGBA;
133 }
134 else {
135 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
136 srcFormat, srcType, 0, 0);
137
138 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
139 srcType) / sizeof(GLubyte);
140 }
141
142 dst = dstSlices[0];
143
144 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
145 dst, dstRowStride);
146
147 free((void*) tempImage);
148
149 return GL_TRUE;
150 }
151
152
153 /***************************************************************************\
154 * FXT1 encoder
155 *
156 * The encoder was built by reversing the decoder,
157 * and is vaguely based on Texus2 by 3dfx. Note that this code
158 * is merely a proof of concept, since it is highly UNoptimized;
159 * moreover, it is sub-optimal due to initial conditions passed
160 * to Lloyd's algorithm (the interpolation modes are even worse).
161 \***************************************************************************/
162
163
164 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
165 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
166 #define N_TEXELS 32 /* number of texels in a block (always 32) */
167 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
168 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
169 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
170 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
171 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
172
173
174 /*
175 * Define a 64-bit unsigned integer type and macros
176 */
177 #if 1
178
179 #define FX64_NATIVE 1
180
181 typedef uint64_t Fx64;
182
183 #define FX64_MOV32(a, b) a = b
184 #define FX64_OR32(a, b) a |= b
185 #define FX64_SHL(a, c) a <<= c
186
187 #else
188
189 #define FX64_NATIVE 0
190
191 typedef struct {
192 GLuint lo, hi;
193 } Fx64;
194
195 #define FX64_MOV32(a, b) a.lo = b
196 #define FX64_OR32(a, b) a.lo |= b
197
198 #define FX64_SHL(a, c) \
199 do { \
200 if ((c) >= 32) { \
201 a.hi = a.lo << ((c) - 32); \
202 a.lo = 0; \
203 } else { \
204 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
205 a.lo <<= (c); \
206 } \
207 } while (0)
208
209 #endif
210
211
212 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
213 #define SAFECDOT 1 /* for paranoids */
214
215 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
216 do { \
217 /* compute interpolation vector */ \
218 GLfloat d2 = 0.0F; \
219 GLfloat rd2; \
220 \
221 for (i = 0; i < NC; i++) { \
222 IV[i] = (V1[i] - V0[i]) * F(i); \
223 d2 += IV[i] * IV[i]; \
224 } \
225 rd2 = (GLfloat)NV / d2; \
226 B = 0; \
227 for (i = 0; i < NC; i++) { \
228 IV[i] *= F(i); \
229 B -= IV[i] * V0[i]; \
230 IV[i] *= rd2; \
231 } \
232 B = B * rd2 + 0.5f; \
233 } while (0)
234
235 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
236 do { \
237 GLfloat dot = 0.0F; \
238 for (i = 0; i < NC; i++) { \
239 dot += V[i] * IV[i]; \
240 } \
241 TEXEL = (GLint)(dot + B); \
242 if (SAFECDOT) { \
243 if (TEXEL < 0) { \
244 TEXEL = 0; \
245 } else if (TEXEL > NV) { \
246 TEXEL = NV; \
247 } \
248 } \
249 } while (0)
250
251
252 static GLint
253 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
254 GLubyte input[MAX_COMP], GLint nc)
255 {
256 GLint i, j, best = -1;
257 GLfloat err = 1e9; /* big enough */
258
259 for (j = 0; j < nv; j++) {
260 GLfloat e = 0.0F;
261 for (i = 0; i < nc; i++) {
262 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
263 }
264 if (e < err) {
265 err = e;
266 best = j;
267 }
268 }
269
270 return best;
271 }
272
273
274 static GLint
275 fxt1_worst (GLfloat vec[MAX_COMP],
276 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
277 {
278 GLint i, k, worst = -1;
279 GLfloat err = -1.0F; /* small enough */
280
281 for (k = 0; k < n; k++) {
282 GLfloat e = 0.0F;
283 for (i = 0; i < nc; i++) {
284 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
285 }
286 if (e > err) {
287 err = e;
288 worst = k;
289 }
290 }
291
292 return worst;
293 }
294
295
296 static GLint
297 fxt1_variance (GLdouble variance[MAX_COMP],
298 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
299 {
300 GLint i, k, best = 0;
301 GLint sx, sx2;
302 GLdouble var, maxvar = -1; /* small enough */
303 GLdouble teenth = 1.0 / n;
304
305 for (i = 0; i < nc; i++) {
306 sx = sx2 = 0;
307 for (k = 0; k < n; k++) {
308 GLint t = input[k][i];
309 sx += t;
310 sx2 += t * t;
311 }
312 var = sx2 * teenth - sx * sx * teenth * teenth;
313 if (maxvar < var) {
314 maxvar = var;
315 best = i;
316 }
317 if (variance) {
318 variance[i] = var;
319 }
320 }
321
322 return best;
323 }
324
325
326 static GLint
327 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
328 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
329 {
330 #if 0
331 /* Choose colors from a grid.
332 */
333 GLint i, j;
334
335 for (j = 0; j < nv; j++) {
336 GLint m = j * (n - 1) / (nv - 1);
337 for (i = 0; i < nc; i++) {
338 vec[j][i] = input[m][i];
339 }
340 }
341 #else
342 /* Our solution here is to find the darkest and brightest colors in
343 * the 8x4 tile and use those as the two representative colors.
344 * There are probably better algorithms to use (histogram-based).
345 */
346 GLint i, j, k;
347 GLint minSum = 2000; /* big enough */
348 GLint maxSum = -1; /* small enough */
349 GLint minCol = 0; /* phoudoin: silent compiler! */
350 GLint maxCol = 0; /* phoudoin: silent compiler! */
351
352 struct {
353 GLint flag;
354 GLint key;
355 GLint freq;
356 GLint idx;
357 } hist[N_TEXELS];
358 GLint lenh = 0;
359
360 memset(hist, 0, sizeof(hist));
361
362 for (k = 0; k < n; k++) {
363 GLint l;
364 GLint key = 0;
365 GLint sum = 0;
366 for (i = 0; i < nc; i++) {
367 key <<= 8;
368 key |= input[k][i];
369 sum += input[k][i];
370 }
371 for (l = 0; l < n; l++) {
372 if (!hist[l].flag) {
373 /* alloc new slot */
374 hist[l].flag = !0;
375 hist[l].key = key;
376 hist[l].freq = 1;
377 hist[l].idx = k;
378 lenh = l + 1;
379 break;
380 } else if (hist[l].key == key) {
381 hist[l].freq++;
382 break;
383 }
384 }
385 if (minSum > sum) {
386 minSum = sum;
387 minCol = k;
388 }
389 if (maxSum < sum) {
390 maxSum = sum;
391 maxCol = k;
392 }
393 }
394
395 if (lenh <= nv) {
396 for (j = 0; j < lenh; j++) {
397 for (i = 0; i < nc; i++) {
398 vec[j][i] = (GLfloat)input[hist[j].idx][i];
399 }
400 }
401 for (; j < nv; j++) {
402 for (i = 0; i < nc; i++) {
403 vec[j][i] = vec[0][i];
404 }
405 }
406 return 0;
407 }
408
409 for (j = 0; j < nv; j++) {
410 for (i = 0; i < nc; i++) {
411 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
412 }
413 }
414 #endif
415
416 return !0;
417 }
418
419
420 static GLint
421 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
422 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
423 {
424 /* Use the generalized lloyd's algorithm for VQ:
425 * find 4 color vectors.
426 *
427 * for each sample color
428 * sort to nearest vector.
429 *
430 * replace each vector with the centroid of its matching colors.
431 *
432 * repeat until RMS doesn't improve.
433 *
434 * if a color vector has no samples, or becomes the same as another
435 * vector, replace it with the color which is farthest from a sample.
436 *
437 * vec[][MAX_COMP] initial vectors and resulting colors
438 * nv number of resulting colors required
439 * input[N_TEXELS][MAX_COMP] input texels
440 * nc number of components in input / vec
441 * n number of input samples
442 */
443
444 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
445 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
446 GLfloat error, lasterror = 1e9;
447
448 GLint i, j, k, rep;
449
450 /* the quantizer */
451 for (rep = 0; rep < LL_N_REP; rep++) {
452 /* reset sums & counters */
453 for (j = 0; j < nv; j++) {
454 for (i = 0; i < nc; i++) {
455 sum[j][i] = 0;
456 }
457 cnt[j] = 0;
458 }
459 error = 0;
460
461 /* scan whole block */
462 for (k = 0; k < n; k++) {
463 #if 1
464 GLint best = -1;
465 GLfloat err = 1e9; /* big enough */
466 /* determine best vector */
467 for (j = 0; j < nv; j++) {
468 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
469 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
470 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
471 if (nc == 4) {
472 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
473 }
474 if (e < err) {
475 err = e;
476 best = j;
477 }
478 }
479 #else
480 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
481 #endif
482 assert(best >= 0);
483 /* add in closest color */
484 for (i = 0; i < nc; i++) {
485 sum[best][i] += input[k][i];
486 }
487 /* mark this vector as used */
488 cnt[best]++;
489 /* accumulate error */
490 error += err;
491 }
492
493 /* check RMS */
494 if ((error < LL_RMS_E) ||
495 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
496 return !0; /* good match */
497 }
498 lasterror = error;
499
500 /* move each vector to the barycenter of its closest colors */
501 for (j = 0; j < nv; j++) {
502 if (cnt[j]) {
503 GLfloat div = 1.0F / cnt[j];
504 for (i = 0; i < nc; i++) {
505 vec[j][i] = div * sum[j][i];
506 }
507 } else {
508 /* this vec has no samples or is identical with a previous vec */
509 GLint worst = fxt1_worst(vec[j], input, nc, n);
510 for (i = 0; i < nc; i++) {
511 vec[j][i] = input[worst][i];
512 }
513 }
514 }
515 }
516
517 return 0; /* could not converge fast enough */
518 }
519
520
521 static void
522 fxt1_quantize_CHROMA (GLuint *cc,
523 GLubyte input[N_TEXELS][MAX_COMP])
524 {
525 const GLint n_vect = 4; /* 4 base vectors to find */
526 const GLint n_comp = 3; /* 3 components: R, G, B */
527 GLfloat vec[MAX_VECT][MAX_COMP];
528 GLint i, j, k;
529 Fx64 hi; /* high quadword */
530 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
531
532 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
533 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
534 }
535
536 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
537 for (j = n_vect - 1; j >= 0; j--) {
538 for (i = 0; i < n_comp; i++) {
539 /* add in colors */
540 FX64_SHL(hi, 5);
541 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
542 }
543 }
544 ((Fx64 *)cc)[1] = hi;
545
546 lohi = lolo = 0;
547 /* right microtile */
548 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
549 lohi <<= 2;
550 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
551 }
552 /* left microtile */
553 for (; k >= 0; k--) {
554 lolo <<= 2;
555 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
556 }
557 cc[1] = lohi;
558 cc[0] = lolo;
559 }
560
561
562 static void
563 fxt1_quantize_ALPHA0 (GLuint *cc,
564 GLubyte input[N_TEXELS][MAX_COMP],
565 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
566 {
567 const GLint n_vect = 3; /* 3 base vectors to find */
568 const GLint n_comp = 4; /* 4 components: R, G, B, A */
569 GLfloat vec[MAX_VECT][MAX_COMP];
570 GLint i, j, k;
571 Fx64 hi; /* high quadword */
572 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
573
574 /* the last vector indicates zero */
575 for (i = 0; i < n_comp; i++) {
576 vec[n_vect][i] = 0;
577 }
578
579 /* the first n texels in reord are guaranteed to be non-zero */
580 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
581 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
582 }
583
584 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
585 for (j = n_vect - 1; j >= 0; j--) {
586 /* add in alphas */
587 FX64_SHL(hi, 5);
588 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
589 }
590 for (j = n_vect - 1; j >= 0; j--) {
591 for (i = 0; i < n_comp - 1; i++) {
592 /* add in colors */
593 FX64_SHL(hi, 5);
594 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
595 }
596 }
597 ((Fx64 *)cc)[1] = hi;
598
599 lohi = lolo = 0;
600 /* right microtile */
601 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
602 lohi <<= 2;
603 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
604 }
605 /* left microtile */
606 for (; k >= 0; k--) {
607 lolo <<= 2;
608 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
609 }
610 cc[1] = lohi;
611 cc[0] = lolo;
612 }
613
614
615 static void
616 fxt1_quantize_ALPHA1 (GLuint *cc,
617 GLubyte input[N_TEXELS][MAX_COMP])
618 {
619 const GLint n_vect = 3; /* highest vector number in each microtile */
620 const GLint n_comp = 4; /* 4 components: R, G, B, A */
621 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
622 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
623 GLint i, j, k;
624 Fx64 hi; /* high quadword */
625 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
626
627 GLint minSum;
628 GLint maxSum;
629 GLint minColL = 0, maxColL = 0;
630 GLint minColR = 0, maxColR = 0;
631 GLint sumL = 0, sumR = 0;
632 GLint nn_comp;
633 /* Our solution here is to find the darkest and brightest colors in
634 * the 4x4 tile and use those as the two representative colors.
635 * There are probably better algorithms to use (histogram-based).
636 */
637 nn_comp = n_comp;
638 while ((minColL == maxColL) && nn_comp) {
639 minSum = 2000; /* big enough */
640 maxSum = -1; /* small enough */
641 for (k = 0; k < N_TEXELS / 2; k++) {
642 GLint sum = 0;
643 for (i = 0; i < nn_comp; i++) {
644 sum += input[k][i];
645 }
646 if (minSum > sum) {
647 minSum = sum;
648 minColL = k;
649 }
650 if (maxSum < sum) {
651 maxSum = sum;
652 maxColL = k;
653 }
654 sumL += sum;
655 }
656
657 nn_comp--;
658 }
659
660 nn_comp = n_comp;
661 while ((minColR == maxColR) && nn_comp) {
662 minSum = 2000; /* big enough */
663 maxSum = -1; /* small enough */
664 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
665 GLint sum = 0;
666 for (i = 0; i < nn_comp; i++) {
667 sum += input[k][i];
668 }
669 if (minSum > sum) {
670 minSum = sum;
671 minColR = k;
672 }
673 if (maxSum < sum) {
674 maxSum = sum;
675 maxColR = k;
676 }
677 sumR += sum;
678 }
679
680 nn_comp--;
681 }
682
683 /* choose the common vector (yuck!) */
684 {
685 GLint j1, j2;
686 GLint v1 = 0, v2 = 0;
687 GLfloat err = 1e9; /* big enough */
688 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
689 for (i = 0; i < n_comp; i++) {
690 tv[0][i] = input[minColL][i];
691 tv[1][i] = input[maxColL][i];
692 tv[2][i] = input[minColR][i];
693 tv[3][i] = input[maxColR][i];
694 }
695 for (j1 = 0; j1 < 2; j1++) {
696 for (j2 = 2; j2 < 4; j2++) {
697 GLfloat e = 0.0F;
698 for (i = 0; i < n_comp; i++) {
699 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
700 }
701 if (e < err) {
702 err = e;
703 v1 = j1;
704 v2 = j2;
705 }
706 }
707 }
708 for (i = 0; i < n_comp; i++) {
709 vec[0][i] = tv[1 - v1][i];
710 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
711 vec[2][i] = tv[5 - v2][i];
712 }
713 }
714
715 /* left microtile */
716 cc[0] = 0;
717 if (minColL != maxColL) {
718 /* compute interpolation vector */
719 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
720
721 /* add in texels */
722 lolo = 0;
723 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
724 GLint texel;
725 /* interpolate color */
726 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
727 /* add in texel */
728 lolo <<= 2;
729 lolo |= texel;
730 }
731
732 cc[0] = lolo;
733 }
734
735 /* right microtile */
736 cc[1] = 0;
737 if (minColR != maxColR) {
738 /* compute interpolation vector */
739 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
740
741 /* add in texels */
742 lohi = 0;
743 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
744 GLint texel;
745 /* interpolate color */
746 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
747 /* add in texel */
748 lohi <<= 2;
749 lohi |= texel;
750 }
751
752 cc[1] = lohi;
753 }
754
755 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
756 for (j = n_vect - 1; j >= 0; j--) {
757 /* add in alphas */
758 FX64_SHL(hi, 5);
759 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
760 }
761 for (j = n_vect - 1; j >= 0; j--) {
762 for (i = 0; i < n_comp - 1; i++) {
763 /* add in colors */
764 FX64_SHL(hi, 5);
765 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
766 }
767 }
768 ((Fx64 *)cc)[1] = hi;
769 }
770
771
772 static void
773 fxt1_quantize_HI (GLuint *cc,
774 GLubyte input[N_TEXELS][MAX_COMP],
775 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
776 {
777 const GLint n_vect = 6; /* highest vector number */
778 const GLint n_comp = 3; /* 3 components: R, G, B */
779 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
780 GLfloat iv[MAX_COMP]; /* interpolation vector */
781 GLint i, k;
782 GLuint hihi; /* high quadword: hi dword */
783
784 GLint minSum = 2000; /* big enough */
785 GLint maxSum = -1; /* small enough */
786 GLint minCol = 0; /* phoudoin: silent compiler! */
787 GLint maxCol = 0; /* phoudoin: silent compiler! */
788
789 /* Our solution here is to find the darkest and brightest colors in
790 * the 8x4 tile and use those as the two representative colors.
791 * There are probably better algorithms to use (histogram-based).
792 */
793 for (k = 0; k < n; k++) {
794 GLint sum = 0;
795 for (i = 0; i < n_comp; i++) {
796 sum += reord[k][i];
797 }
798 if (minSum > sum) {
799 minSum = sum;
800 minCol = k;
801 }
802 if (maxSum < sum) {
803 maxSum = sum;
804 maxCol = k;
805 }
806 }
807
808 hihi = 0; /* cc-hi = "00" */
809 for (i = 0; i < n_comp; i++) {
810 /* add in colors */
811 hihi <<= 5;
812 hihi |= reord[maxCol][i] >> 3;
813 }
814 for (i = 0; i < n_comp; i++) {
815 /* add in colors */
816 hihi <<= 5;
817 hihi |= reord[minCol][i] >> 3;
818 }
819 cc[3] = hihi;
820 cc[0] = cc[1] = cc[2] = 0;
821
822 /* compute interpolation vector */
823 if (minCol != maxCol) {
824 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
825 }
826
827 /* add in texels */
828 for (k = N_TEXELS - 1; k >= 0; k--) {
829 GLint t = k * 3;
830 GLuint *kk = (GLuint *)((char *)cc + t / 8);
831 GLint texel = n_vect + 1; /* transparent black */
832
833 if (!ISTBLACK(input[k])) {
834 if (minCol != maxCol) {
835 /* interpolate color */
836 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
837 /* add in texel */
838 kk[0] |= texel << (t & 7);
839 }
840 } else {
841 /* add in texel */
842 kk[0] |= texel << (t & 7);
843 }
844 }
845 }
846
847
848 static void
849 fxt1_quantize_MIXED1 (GLuint *cc,
850 GLubyte input[N_TEXELS][MAX_COMP])
851 {
852 const GLint n_vect = 2; /* highest vector number in each microtile */
853 const GLint n_comp = 3; /* 3 components: R, G, B */
854 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
855 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
856 GLint i, j, k;
857 Fx64 hi; /* high quadword */
858 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
859
860 GLint minSum;
861 GLint maxSum;
862 GLint minColL = 0, maxColL = -1;
863 GLint minColR = 0, maxColR = -1;
864
865 /* Our solution here is to find the darkest and brightest colors in
866 * the 4x4 tile and use those as the two representative colors.
867 * There are probably better algorithms to use (histogram-based).
868 */
869 minSum = 2000; /* big enough */
870 maxSum = -1; /* small enough */
871 for (k = 0; k < N_TEXELS / 2; k++) {
872 if (!ISTBLACK(input[k])) {
873 GLint sum = 0;
874 for (i = 0; i < n_comp; i++) {
875 sum += input[k][i];
876 }
877 if (minSum > sum) {
878 minSum = sum;
879 minColL = k;
880 }
881 if (maxSum < sum) {
882 maxSum = sum;
883 maxColL = k;
884 }
885 }
886 }
887 minSum = 2000; /* big enough */
888 maxSum = -1; /* small enough */
889 for (; k < N_TEXELS; k++) {
890 if (!ISTBLACK(input[k])) {
891 GLint sum = 0;
892 for (i = 0; i < n_comp; i++) {
893 sum += input[k][i];
894 }
895 if (minSum > sum) {
896 minSum = sum;
897 minColR = k;
898 }
899 if (maxSum < sum) {
900 maxSum = sum;
901 maxColR = k;
902 }
903 }
904 }
905
906 /* left microtile */
907 if (maxColL == -1) {
908 /* all transparent black */
909 cc[0] = ~0u;
910 for (i = 0; i < n_comp; i++) {
911 vec[0][i] = 0;
912 vec[1][i] = 0;
913 }
914 } else {
915 cc[0] = 0;
916 for (i = 0; i < n_comp; i++) {
917 vec[0][i] = input[minColL][i];
918 vec[1][i] = input[maxColL][i];
919 }
920 if (minColL != maxColL) {
921 /* compute interpolation vector */
922 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
923
924 /* add in texels */
925 lolo = 0;
926 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
927 GLint texel = n_vect + 1; /* transparent black */
928 if (!ISTBLACK(input[k])) {
929 /* interpolate color */
930 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
931 }
932 /* add in texel */
933 lolo <<= 2;
934 lolo |= texel;
935 }
936 cc[0] = lolo;
937 }
938 }
939
940 /* right microtile */
941 if (maxColR == -1) {
942 /* all transparent black */
943 cc[1] = ~0u;
944 for (i = 0; i < n_comp; i++) {
945 vec[2][i] = 0;
946 vec[3][i] = 0;
947 }
948 } else {
949 cc[1] = 0;
950 for (i = 0; i < n_comp; i++) {
951 vec[2][i] = input[minColR][i];
952 vec[3][i] = input[maxColR][i];
953 }
954 if (minColR != maxColR) {
955 /* compute interpolation vector */
956 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
957
958 /* add in texels */
959 lohi = 0;
960 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
961 GLint texel = n_vect + 1; /* transparent black */
962 if (!ISTBLACK(input[k])) {
963 /* interpolate color */
964 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
965 }
966 /* add in texel */
967 lohi <<= 2;
968 lohi |= texel;
969 }
970 cc[1] = lohi;
971 }
972 }
973
974 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
975 for (j = 2 * 2 - 1; j >= 0; j--) {
976 for (i = 0; i < n_comp; i++) {
977 /* add in colors */
978 FX64_SHL(hi, 5);
979 FX64_OR32(hi, vec[j][i] >> 3);
980 }
981 }
982 ((Fx64 *)cc)[1] = hi;
983 }
984
985
986 static void
987 fxt1_quantize_MIXED0 (GLuint *cc,
988 GLubyte input[N_TEXELS][MAX_COMP])
989 {
990 const GLint n_vect = 3; /* highest vector number in each microtile */
991 const GLint n_comp = 3; /* 3 components: R, G, B */
992 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
993 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
994 GLint i, j, k;
995 Fx64 hi; /* high quadword */
996 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
997
998 GLint minColL = 0, maxColL = 0;
999 GLint minColR = 0, maxColR = 0;
1000 #if 0
1001 GLint minSum;
1002 GLint maxSum;
1003
1004 /* Our solution here is to find the darkest and brightest colors in
1005 * the 4x4 tile and use those as the two representative colors.
1006 * There are probably better algorithms to use (histogram-based).
1007 */
1008 minSum = 2000; /* big enough */
1009 maxSum = -1; /* small enough */
1010 for (k = 0; k < N_TEXELS / 2; k++) {
1011 GLint sum = 0;
1012 for (i = 0; i < n_comp; i++) {
1013 sum += input[k][i];
1014 }
1015 if (minSum > sum) {
1016 minSum = sum;
1017 minColL = k;
1018 }
1019 if (maxSum < sum) {
1020 maxSum = sum;
1021 maxColL = k;
1022 }
1023 }
1024 minSum = 2000; /* big enough */
1025 maxSum = -1; /* small enough */
1026 for (; k < N_TEXELS; k++) {
1027 GLint sum = 0;
1028 for (i = 0; i < n_comp; i++) {
1029 sum += input[k][i];
1030 }
1031 if (minSum > sum) {
1032 minSum = sum;
1033 minColR = k;
1034 }
1035 if (maxSum < sum) {
1036 maxSum = sum;
1037 maxColR = k;
1038 }
1039 }
1040 #else
1041 GLint minVal;
1042 GLint maxVal;
1043 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1044 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1045
1046 /* Scan the channel with max variance for lo & hi
1047 * and use those as the two representative colors.
1048 */
1049 minVal = 2000; /* big enough */
1050 maxVal = -1; /* small enough */
1051 for (k = 0; k < N_TEXELS / 2; k++) {
1052 GLint t = input[k][maxVarL];
1053 if (minVal > t) {
1054 minVal = t;
1055 minColL = k;
1056 }
1057 if (maxVal < t) {
1058 maxVal = t;
1059 maxColL = k;
1060 }
1061 }
1062 minVal = 2000; /* big enough */
1063 maxVal = -1; /* small enough */
1064 for (; k < N_TEXELS; k++) {
1065 GLint t = input[k][maxVarR];
1066 if (minVal > t) {
1067 minVal = t;
1068 minColR = k;
1069 }
1070 if (maxVal < t) {
1071 maxVal = t;
1072 maxColR = k;
1073 }
1074 }
1075 #endif
1076
1077 /* left microtile */
1078 cc[0] = 0;
1079 for (i = 0; i < n_comp; i++) {
1080 vec[0][i] = input[minColL][i];
1081 vec[1][i] = input[maxColL][i];
1082 }
1083 if (minColL != maxColL) {
1084 /* compute interpolation vector */
1085 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1086
1087 /* add in texels */
1088 lolo = 0;
1089 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1090 GLint texel;
1091 /* interpolate color */
1092 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1093 /* add in texel */
1094 lolo <<= 2;
1095 lolo |= texel;
1096 }
1097
1098 /* funky encoding for LSB of green */
1099 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1100 for (i = 0; i < n_comp; i++) {
1101 vec[1][i] = input[minColL][i];
1102 vec[0][i] = input[maxColL][i];
1103 }
1104 lolo = ~lolo;
1105 }
1106
1107 cc[0] = lolo;
1108 }
1109
1110 /* right microtile */
1111 cc[1] = 0;
1112 for (i = 0; i < n_comp; i++) {
1113 vec[2][i] = input[minColR][i];
1114 vec[3][i] = input[maxColR][i];
1115 }
1116 if (minColR != maxColR) {
1117 /* compute interpolation vector */
1118 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1119
1120 /* add in texels */
1121 lohi = 0;
1122 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1123 GLint texel;
1124 /* interpolate color */
1125 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1126 /* add in texel */
1127 lohi <<= 2;
1128 lohi |= texel;
1129 }
1130
1131 /* funky encoding for LSB of green */
1132 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1133 for (i = 0; i < n_comp; i++) {
1134 vec[3][i] = input[minColR][i];
1135 vec[2][i] = input[maxColR][i];
1136 }
1137 lohi = ~lohi;
1138 }
1139
1140 cc[1] = lohi;
1141 }
1142
1143 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1144 for (j = 2 * 2 - 1; j >= 0; j--) {
1145 for (i = 0; i < n_comp; i++) {
1146 /* add in colors */
1147 FX64_SHL(hi, 5);
1148 FX64_OR32(hi, vec[j][i] >> 3);
1149 }
1150 }
1151 ((Fx64 *)cc)[1] = hi;
1152 }
1153
1154
1155 static void
1156 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1157 {
1158 GLint trualpha;
1159 GLubyte reord[N_TEXELS][MAX_COMP];
1160
1161 GLubyte input[N_TEXELS][MAX_COMP];
1162 GLint i, k, l;
1163
1164 if (comps == 3) {
1165 /* make the whole block opaque */
1166 memset(input, -1, sizeof(input));
1167 }
1168
1169 /* 8 texels each line */
1170 for (l = 0; l < 4; l++) {
1171 for (k = 0; k < 4; k++) {
1172 for (i = 0; i < comps; i++) {
1173 input[k + l * 4][i] = *lines[l]++;
1174 }
1175 }
1176 for (; k < 8; k++) {
1177 for (i = 0; i < comps; i++) {
1178 input[k + l * 4 + 12][i] = *lines[l]++;
1179 }
1180 }
1181 }
1182
1183 /* block layout:
1184 * 00, 01, 02, 03, 08, 09, 0a, 0b
1185 * 10, 11, 12, 13, 18, 19, 1a, 1b
1186 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1187 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1188 */
1189
1190 /* [dBorca]
1191 * stupidity flows forth from this
1192 */
1193 l = N_TEXELS;
1194 trualpha = 0;
1195 if (comps == 4) {
1196 /* skip all transparent black texels */
1197 l = 0;
1198 for (k = 0; k < N_TEXELS; k++) {
1199 /* test all components against 0 */
1200 if (!ISTBLACK(input[k])) {
1201 /* texel is not transparent black */
1202 COPY_4UBV(reord[l], input[k]);
1203 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1204 /* non-opaque texel */
1205 trualpha = !0;
1206 }
1207 l++;
1208 }
1209 }
1210 }
1211
1212 #if 0
1213 if (trualpha) {
1214 fxt1_quantize_ALPHA0(cc, input, reord, l);
1215 } else if (l == 0) {
1216 cc[0] = cc[1] = cc[2] = -1;
1217 cc[3] = 0;
1218 } else if (l < N_TEXELS) {
1219 fxt1_quantize_HI(cc, input, reord, l);
1220 } else {
1221 fxt1_quantize_CHROMA(cc, input);
1222 }
1223 (void)fxt1_quantize_ALPHA1;
1224 (void)fxt1_quantize_MIXED1;
1225 (void)fxt1_quantize_MIXED0;
1226 #else
1227 if (trualpha) {
1228 fxt1_quantize_ALPHA1(cc, input);
1229 } else if (l == 0) {
1230 cc[0] = cc[1] = cc[2] = ~0u;
1231 cc[3] = 0;
1232 } else if (l < N_TEXELS) {
1233 fxt1_quantize_MIXED1(cc, input);
1234 } else {
1235 fxt1_quantize_MIXED0(cc, input);
1236 }
1237 (void)fxt1_quantize_ALPHA0;
1238 (void)fxt1_quantize_HI;
1239 (void)fxt1_quantize_CHROMA;
1240 #endif
1241 }
1242
1243
1244
1245 /**
1246 * Upscale an image by replication, not (typical) stretching.
1247 * We use this when the image width or height is less than a
1248 * certain size (4, 8) and we need to upscale an image.
1249 */
1250 static void
1251 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1252 GLsizei outWidth, GLsizei outHeight,
1253 GLint comps, const GLubyte *src, GLint srcRowStride,
1254 GLubyte *dest )
1255 {
1256 GLint i, j, k;
1257
1258 ASSERT(outWidth >= inWidth);
1259 ASSERT(outHeight >= inHeight);
1260 #if 0
1261 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1262 ASSERT((outWidth & 3) == 0);
1263 ASSERT((outHeight & 3) == 0);
1264 #endif
1265
1266 for (i = 0; i < outHeight; i++) {
1267 const GLint ii = i % inHeight;
1268 for (j = 0; j < outWidth; j++) {
1269 const GLint jj = j % inWidth;
1270 for (k = 0; k < comps; k++) {
1271 dest[(i * outWidth + j) * comps + k]
1272 = src[ii * srcRowStride + jj * comps + k];
1273 }
1274 }
1275 }
1276 }
1277
1278
1279 static void
1280 fxt1_encode (GLuint width, GLuint height, GLint comps,
1281 const void *source, GLint srcRowStride,
1282 void *dest, GLint destRowStride)
1283 {
1284 GLuint x, y;
1285 const GLubyte *data;
1286 GLuint *encoded = (GLuint *)dest;
1287 void *newSource = NULL;
1288
1289 assert(comps == 3 || comps == 4);
1290
1291 /* Replicate image if width is not M8 or height is not M4 */
1292 if ((width & 7) | (height & 3)) {
1293 GLint newWidth = (width + 7) & ~7;
1294 GLint newHeight = (height + 3) & ~3;
1295 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1296 if (!newSource) {
1297 GET_CURRENT_CONTEXT(ctx);
1298 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1299 goto cleanUp;
1300 }
1301 upscale_teximage2d(width, height, newWidth, newHeight,
1302 comps, (const GLubyte *) source,
1303 srcRowStride, (GLubyte *) newSource);
1304 source = newSource;
1305 width = newWidth;
1306 height = newHeight;
1307 srcRowStride = comps * newWidth;
1308 }
1309
1310 data = (const GLubyte *) source;
1311 destRowStride = (destRowStride - width * 2) / 4;
1312 for (y = 0; y < height; y += 4) {
1313 GLuint offs = 0 + (y + 0) * srcRowStride;
1314 for (x = 0; x < width; x += 8) {
1315 const GLubyte *lines[4];
1316 lines[0] = &data[offs];
1317 lines[1] = lines[0] + srcRowStride;
1318 lines[2] = lines[1] + srcRowStride;
1319 lines[3] = lines[2] + srcRowStride;
1320 offs += 8 * comps;
1321 fxt1_quantize(encoded, lines, comps);
1322 /* 128 bits per 8x4 block */
1323 encoded += 4;
1324 }
1325 encoded += destRowStride;
1326 }
1327
1328 cleanUp:
1329 free(newSource);
1330 }
1331
1332
1333 /***************************************************************************\
1334 * FXT1 decoder
1335 *
1336 * The decoder is based on GL_3DFX_texture_compression_FXT1
1337 * specification and serves as a concept for the encoder.
1338 \***************************************************************************/
1339
1340
1341 /* lookup table for scaling 5 bit colors up to 8 bits */
1342 static const GLubyte _rgb_scale_5[] = {
1343 0, 8, 16, 25, 33, 41, 49, 58,
1344 66, 74, 82, 90, 99, 107, 115, 123,
1345 132, 140, 148, 156, 165, 173, 181, 189,
1346 197, 206, 214, 222, 230, 239, 247, 255
1347 };
1348
1349 /* lookup table for scaling 6 bit colors up to 8 bits */
1350 static const GLubyte _rgb_scale_6[] = {
1351 0, 4, 8, 12, 16, 20, 24, 28,
1352 32, 36, 40, 45, 49, 53, 57, 61,
1353 65, 69, 73, 77, 81, 85, 89, 93,
1354 97, 101, 105, 109, 113, 117, 121, 125,
1355 130, 134, 138, 142, 146, 150, 154, 158,
1356 162, 166, 170, 174, 178, 182, 186, 190,
1357 194, 198, 202, 206, 210, 215, 219, 223,
1358 227, 231, 235, 239, 243, 247, 251, 255
1359 };
1360
1361
1362 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1363 #define UP5(c) _rgb_scale_5[(c) & 31]
1364 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1365 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1366
1367
1368 static void
1369 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1370 {
1371 const GLuint *cc;
1372
1373 t *= 3;
1374 cc = (const GLuint *)(code + t / 8);
1375 t = (cc[0] >> (t & 7)) & 7;
1376
1377 if (t == 7) {
1378 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1379 } else {
1380 GLubyte r, g, b;
1381 cc = (const GLuint *)(code + 12);
1382 if (t == 0) {
1383 b = UP5(CC_SEL(cc, 0));
1384 g = UP5(CC_SEL(cc, 5));
1385 r = UP5(CC_SEL(cc, 10));
1386 } else if (t == 6) {
1387 b = UP5(CC_SEL(cc, 15));
1388 g = UP5(CC_SEL(cc, 20));
1389 r = UP5(CC_SEL(cc, 25));
1390 } else {
1391 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1392 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1393 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1394 }
1395 rgba[RCOMP] = r;
1396 rgba[GCOMP] = g;
1397 rgba[BCOMP] = b;
1398 rgba[ACOMP] = 255;
1399 }
1400 }
1401
1402
1403 static void
1404 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1405 {
1406 const GLuint *cc;
1407 GLuint kk;
1408
1409 cc = (const GLuint *)code;
1410 if (t & 16) {
1411 cc++;
1412 t &= 15;
1413 }
1414 t = (cc[0] >> (t * 2)) & 3;
1415
1416 t *= 15;
1417 cc = (const GLuint *)(code + 8 + t / 8);
1418 kk = cc[0] >> (t & 7);
1419 rgba[BCOMP] = UP5(kk);
1420 rgba[GCOMP] = UP5(kk >> 5);
1421 rgba[RCOMP] = UP5(kk >> 10);
1422 rgba[ACOMP] = 255;
1423 }
1424
1425
1426 static void
1427 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1428 {
1429 const GLuint *cc;
1430 GLuint col[2][3];
1431 GLint glsb, selb;
1432
1433 cc = (const GLuint *)code;
1434 if (t & 16) {
1435 t &= 15;
1436 t = (cc[1] >> (t * 2)) & 3;
1437 /* col 2 */
1438 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1439 col[0][GCOMP] = CC_SEL(cc, 99);
1440 col[0][RCOMP] = CC_SEL(cc, 104);
1441 /* col 3 */
1442 col[1][BCOMP] = CC_SEL(cc, 109);
1443 col[1][GCOMP] = CC_SEL(cc, 114);
1444 col[1][RCOMP] = CC_SEL(cc, 119);
1445 glsb = CC_SEL(cc, 126);
1446 selb = CC_SEL(cc, 33);
1447 } else {
1448 t = (cc[0] >> (t * 2)) & 3;
1449 /* col 0 */
1450 col[0][BCOMP] = CC_SEL(cc, 64);
1451 col[0][GCOMP] = CC_SEL(cc, 69);
1452 col[0][RCOMP] = CC_SEL(cc, 74);
1453 /* col 1 */
1454 col[1][BCOMP] = CC_SEL(cc, 79);
1455 col[1][GCOMP] = CC_SEL(cc, 84);
1456 col[1][RCOMP] = CC_SEL(cc, 89);
1457 glsb = CC_SEL(cc, 125);
1458 selb = CC_SEL(cc, 1);
1459 }
1460
1461 if (CC_SEL(cc, 124) & 1) {
1462 /* alpha[0] == 1 */
1463
1464 if (t == 3) {
1465 /* zero */
1466 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1467 } else {
1468 GLubyte r, g, b;
1469 if (t == 0) {
1470 b = UP5(col[0][BCOMP]);
1471 g = UP5(col[0][GCOMP]);
1472 r = UP5(col[0][RCOMP]);
1473 } else if (t == 2) {
1474 b = UP5(col[1][BCOMP]);
1475 g = UP6(col[1][GCOMP], glsb);
1476 r = UP5(col[1][RCOMP]);
1477 } else {
1478 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1479 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1480 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1481 }
1482 rgba[RCOMP] = r;
1483 rgba[GCOMP] = g;
1484 rgba[BCOMP] = b;
1485 rgba[ACOMP] = 255;
1486 }
1487 } else {
1488 /* alpha[0] == 0 */
1489 GLubyte r, g, b;
1490 if (t == 0) {
1491 b = UP5(col[0][BCOMP]);
1492 g = UP6(col[0][GCOMP], glsb ^ selb);
1493 r = UP5(col[0][RCOMP]);
1494 } else if (t == 3) {
1495 b = UP5(col[1][BCOMP]);
1496 g = UP6(col[1][GCOMP], glsb);
1497 r = UP5(col[1][RCOMP]);
1498 } else {
1499 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1500 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1501 UP6(col[1][GCOMP], glsb));
1502 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1503 }
1504 rgba[RCOMP] = r;
1505 rgba[GCOMP] = g;
1506 rgba[BCOMP] = b;
1507 rgba[ACOMP] = 255;
1508 }
1509 }
1510
1511
1512 static void
1513 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1514 {
1515 const GLuint *cc;
1516 GLubyte r, g, b, a;
1517
1518 cc = (const GLuint *)code;
1519 if (CC_SEL(cc, 124) & 1) {
1520 /* lerp == 1 */
1521 GLuint col0[4];
1522
1523 if (t & 16) {
1524 t &= 15;
1525 t = (cc[1] >> (t * 2)) & 3;
1526 /* col 2 */
1527 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1528 col0[GCOMP] = CC_SEL(cc, 99);
1529 col0[RCOMP] = CC_SEL(cc, 104);
1530 col0[ACOMP] = CC_SEL(cc, 119);
1531 } else {
1532 t = (cc[0] >> (t * 2)) & 3;
1533 /* col 0 */
1534 col0[BCOMP] = CC_SEL(cc, 64);
1535 col0[GCOMP] = CC_SEL(cc, 69);
1536 col0[RCOMP] = CC_SEL(cc, 74);
1537 col0[ACOMP] = CC_SEL(cc, 109);
1538 }
1539
1540 if (t == 0) {
1541 b = UP5(col0[BCOMP]);
1542 g = UP5(col0[GCOMP]);
1543 r = UP5(col0[RCOMP]);
1544 a = UP5(col0[ACOMP]);
1545 } else if (t == 3) {
1546 b = UP5(CC_SEL(cc, 79));
1547 g = UP5(CC_SEL(cc, 84));
1548 r = UP5(CC_SEL(cc, 89));
1549 a = UP5(CC_SEL(cc, 114));
1550 } else {
1551 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1552 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1553 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1554 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1555 }
1556 } else {
1557 /* lerp == 0 */
1558
1559 if (t & 16) {
1560 cc++;
1561 t &= 15;
1562 }
1563 t = (cc[0] >> (t * 2)) & 3;
1564
1565 if (t == 3) {
1566 /* zero */
1567 r = g = b = a = 0;
1568 } else {
1569 GLuint kk;
1570 cc = (const GLuint *)code;
1571 a = UP5(cc[3] >> (t * 5 + 13));
1572 t *= 15;
1573 cc = (const GLuint *)(code + 8 + t / 8);
1574 kk = cc[0] >> (t & 7);
1575 b = UP5(kk);
1576 g = UP5(kk >> 5);
1577 r = UP5(kk >> 10);
1578 }
1579 }
1580 rgba[RCOMP] = r;
1581 rgba[GCOMP] = g;
1582 rgba[BCOMP] = b;
1583 rgba[ACOMP] = a;
1584 }
1585
1586
1587 static void
1588 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1589 GLint i, GLint j, GLubyte *rgba)
1590 {
1591 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1592 fxt1_decode_1HI, /* cc-high = "00?" */
1593 fxt1_decode_1HI, /* cc-high = "00?" */
1594 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1595 fxt1_decode_1ALPHA, /* alpha = "011" */
1596 fxt1_decode_1MIXED, /* mixed = "1??" */
1597 fxt1_decode_1MIXED, /* mixed = "1??" */
1598 fxt1_decode_1MIXED, /* mixed = "1??" */
1599 fxt1_decode_1MIXED /* mixed = "1??" */
1600 };
1601
1602 const GLubyte *code = (const GLubyte *)texture +
1603 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1604 GLint mode = CC_SEL(code, 125);
1605 GLint t = i & 7;
1606
1607 if (t & 4) {
1608 t += 12;
1609 }
1610 t += (j & 3) * 4;
1611
1612 decode_1[mode](code, t, rgba);
1613 }
1614
1615
1616
1617
1618 static void
1619 fetch_rgb_fxt1(const GLubyte *map, const GLuint imageOffsets[],
1620 GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
1621 {
1622 GLubyte rgba[4];
1623 fxt1_decode_1(map, rowStride, i, j, rgba);
1624 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1625 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1626 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1627 texel[ACOMP] = 1.0F;
1628 }
1629
1630
1631 static void
1632 fetch_rgba_fxt1(const GLubyte *map, const GLuint imageOffsets[],
1633 GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
1634 {
1635 GLubyte rgba[4];
1636 fxt1_decode_1(map, rowStride, i, j, rgba);
1637 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1638 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1639 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1640 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1641 }
1642
1643
1644 compressed_fetch_func
1645 _mesa_get_fxt_fetch_func(gl_format format)
1646 {
1647 switch (format) {
1648 case MESA_FORMAT_RGB_FXT1:
1649 return fetch_rgb_fxt1;
1650 case MESA_FORMAT_RGBA_FXT1:
1651 return fetch_rgba_fxt1;
1652 default:
1653 return NULL;
1654 }
1655 }