glthread: track instance divisor changes
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "errors.h"
33 #include "glheader.h"
34
35 #include "image.h"
36 #include "macros.h"
37 #include "mipmap.h"
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
40 #include "texstore.h"
41 #include "mtypes.h"
42
43
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46 const void *source, GLint srcRowStride,
47 void *dest, GLint destRowStride);
48
49 static void
50 fxt1_decode_1 (const void *texture, GLint stride,
51 GLint i, GLint j, GLubyte *rgba);
52
53
54 /**
55 * Store user's image in rgb_fxt1 format.
56 */
57 GLboolean
58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
59 {
60 const GLubyte *pixels;
61 GLint srcRowStride;
62 GLubyte *dst;
63 const GLubyte *tempImage = NULL;
64
65 assert(dstFormat == MESA_FORMAT_RGB_FXT1);
66
67 if (srcFormat != GL_RGB ||
68 srcType != GL_UNSIGNED_BYTE ||
69 ctx->_ImageTransferState ||
70 ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
71 srcPacking->SwapBytes) {
72 /* convert image to RGB/GLubyte */
73 GLubyte *tempImageSlices[1];
74 int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
75 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
76 if (!tempImage)
77 return GL_FALSE; /* out of memory */
78 tempImageSlices[0] = (GLubyte *) tempImage;
79 _mesa_texstore(ctx, dims,
80 baseInternalFormat,
81 MESA_FORMAT_RGB_UNORM8,
82 rgbRowStride, tempImageSlices,
83 srcWidth, srcHeight, srcDepth,
84 srcFormat, srcType, srcAddr,
85 srcPacking);
86 pixels = tempImage;
87 srcRowStride = 3 * srcWidth;
88 srcFormat = GL_RGB;
89 }
90 else {
91 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
92 srcFormat, srcType, 0, 0);
93
94 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
95 srcType) / sizeof(GLubyte);
96 }
97
98 dst = dstSlices[0];
99
100 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
101 dst, dstRowStride);
102
103 free((void*) tempImage);
104
105 return GL_TRUE;
106 }
107
108
109 /**
110 * Store user's image in rgba_fxt1 format.
111 */
112 GLboolean
113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114 {
115 const GLubyte *pixels;
116 GLint srcRowStride;
117 GLubyte *dst;
118 const GLubyte *tempImage = NULL;
119
120 assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
121
122 if (srcFormat != GL_RGBA ||
123 srcType != GL_UNSIGNED_BYTE ||
124 ctx->_ImageTransferState ||
125 srcPacking->SwapBytes) {
126 /* convert image to RGBA/GLubyte */
127 GLubyte *tempImageSlices[1];
128 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
129 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
130 if (!tempImage)
131 return GL_FALSE; /* out of memory */
132 tempImageSlices[0] = (GLubyte *) tempImage;
133 _mesa_texstore(ctx, dims,
134 baseInternalFormat,
135 #if UTIL_ARCH_LITTLE_ENDIAN
136 MESA_FORMAT_R8G8B8A8_UNORM,
137 #else
138 MESA_FORMAT_A8B8G8R8_UNORM,
139 #endif
140 rgbaRowStride, tempImageSlices,
141 srcWidth, srcHeight, srcDepth,
142 srcFormat, srcType, srcAddr,
143 srcPacking);
144 pixels = tempImage;
145 srcRowStride = 4 * srcWidth;
146 srcFormat = GL_RGBA;
147 }
148 else {
149 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
150 srcFormat, srcType, 0, 0);
151
152 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
153 srcType) / sizeof(GLubyte);
154 }
155
156 dst = dstSlices[0];
157
158 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
159 dst, dstRowStride);
160
161 free((void*) tempImage);
162
163 return GL_TRUE;
164 }
165
166
167 /***************************************************************************\
168 * FXT1 encoder
169 *
170 * The encoder was built by reversing the decoder,
171 * and is vaguely based on Texus2 by 3dfx. Note that this code
172 * is merely a proof of concept, since it is highly UNoptimized;
173 * moreover, it is sub-optimal due to initial conditions passed
174 * to Lloyd's algorithm (the interpolation modes are even worse).
175 \***************************************************************************/
176
177
178 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
179 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
180 #define N_TEXELS 32 /* number of texels in a block (always 32) */
181 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
182 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
183 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
184 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
185 static const GLuint zero = 0;
186 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
187
188 /*
189 * Define a 64-bit unsigned integer type and macros
190 */
191 #if 1
192
193 #define FX64_NATIVE 1
194
195 typedef uint64_t Fx64;
196
197 #define FX64_MOV32(a, b) a = b
198 #define FX64_OR32(a, b) a |= b
199 #define FX64_SHL(a, c) a <<= c
200
201 #else
202
203 #define FX64_NATIVE 0
204
205 typedef struct {
206 GLuint lo, hi;
207 } Fx64;
208
209 #define FX64_MOV32(a, b) a.lo = b
210 #define FX64_OR32(a, b) a.lo |= b
211
212 #define FX64_SHL(a, c) \
213 do { \
214 if ((c) >= 32) { \
215 a.hi = a.lo << ((c) - 32); \
216 a.lo = 0; \
217 } else { \
218 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
219 a.lo <<= (c); \
220 } \
221 } while (0)
222
223 #endif
224
225
226 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
227 #define SAFECDOT 1 /* for paranoids */
228
229 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
230 do { \
231 /* compute interpolation vector */ \
232 GLfloat d2 = 0.0F; \
233 GLfloat rd2; \
234 \
235 for (i = 0; i < NC; i++) { \
236 IV[i] = (V1[i] - V0[i]) * F(i); \
237 d2 += IV[i] * IV[i]; \
238 } \
239 rd2 = (GLfloat)NV / d2; \
240 B = 0; \
241 for (i = 0; i < NC; i++) { \
242 IV[i] *= F(i); \
243 B -= IV[i] * V0[i]; \
244 IV[i] *= rd2; \
245 } \
246 B = B * rd2 + 0.5f; \
247 } while (0)
248
249 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
250 do { \
251 GLfloat dot = 0.0F; \
252 for (i = 0; i < NC; i++) { \
253 dot += V[i] * IV[i]; \
254 } \
255 TEXEL = (GLint)(dot + B); \
256 if (SAFECDOT) { \
257 if (TEXEL < 0) { \
258 TEXEL = 0; \
259 } else if (TEXEL > NV) { \
260 TEXEL = NV; \
261 } \
262 } \
263 } while (0)
264
265
266 static GLint
267 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
268 GLubyte input[MAX_COMP], GLint nc)
269 {
270 GLint i, j, best = -1;
271 GLfloat err = 1e9; /* big enough */
272
273 for (j = 0; j < nv; j++) {
274 GLfloat e = 0.0F;
275 for (i = 0; i < nc; i++) {
276 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
277 }
278 if (e < err) {
279 err = e;
280 best = j;
281 }
282 }
283
284 return best;
285 }
286
287
288 static GLint
289 fxt1_worst (GLfloat vec[MAX_COMP],
290 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
291 {
292 GLint i, k, worst = -1;
293 GLfloat err = -1.0F; /* small enough */
294
295 for (k = 0; k < n; k++) {
296 GLfloat e = 0.0F;
297 for (i = 0; i < nc; i++) {
298 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
299 }
300 if (e > err) {
301 err = e;
302 worst = k;
303 }
304 }
305
306 return worst;
307 }
308
309
310 static GLint
311 fxt1_variance (GLdouble variance[MAX_COMP],
312 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
313 {
314 GLint i, k, best = 0;
315 GLint sx, sx2;
316 GLdouble var, maxvar = -1; /* small enough */
317 GLdouble teenth = 1.0 / n;
318
319 for (i = 0; i < nc; i++) {
320 sx = sx2 = 0;
321 for (k = 0; k < n; k++) {
322 GLint t = input[k][i];
323 sx += t;
324 sx2 += t * t;
325 }
326 var = sx2 * teenth - sx * sx * teenth * teenth;
327 if (maxvar < var) {
328 maxvar = var;
329 best = i;
330 }
331 if (variance) {
332 variance[i] = var;
333 }
334 }
335
336 return best;
337 }
338
339
340 static GLint
341 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
342 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
343 {
344 #if 0
345 /* Choose colors from a grid.
346 */
347 GLint i, j;
348
349 for (j = 0; j < nv; j++) {
350 GLint m = j * (n - 1) / (nv - 1);
351 for (i = 0; i < nc; i++) {
352 vec[j][i] = input[m][i];
353 }
354 }
355 #else
356 /* Our solution here is to find the darkest and brightest colors in
357 * the 8x4 tile and use those as the two representative colors.
358 * There are probably better algorithms to use (histogram-based).
359 */
360 GLint i, j, k;
361 GLint minSum = 2000; /* big enough */
362 GLint maxSum = -1; /* small enough */
363 GLint minCol = 0; /* phoudoin: silent compiler! */
364 GLint maxCol = 0; /* phoudoin: silent compiler! */
365
366 struct {
367 GLint flag;
368 GLint key;
369 GLint freq;
370 GLint idx;
371 } hist[N_TEXELS];
372 GLint lenh = 0;
373
374 memset(hist, 0, sizeof(hist));
375
376 for (k = 0; k < n; k++) {
377 GLint l;
378 GLint key = 0;
379 GLint sum = 0;
380 for (i = 0; i < nc; i++) {
381 key <<= 8;
382 key |= input[k][i];
383 sum += input[k][i];
384 }
385 for (l = 0; l < n; l++) {
386 if (!hist[l].flag) {
387 /* alloc new slot */
388 hist[l].flag = !0;
389 hist[l].key = key;
390 hist[l].freq = 1;
391 hist[l].idx = k;
392 lenh = l + 1;
393 break;
394 } else if (hist[l].key == key) {
395 hist[l].freq++;
396 break;
397 }
398 }
399 if (minSum > sum) {
400 minSum = sum;
401 minCol = k;
402 }
403 if (maxSum < sum) {
404 maxSum = sum;
405 maxCol = k;
406 }
407 }
408
409 if (lenh <= nv) {
410 for (j = 0; j < lenh; j++) {
411 for (i = 0; i < nc; i++) {
412 vec[j][i] = (GLfloat)input[hist[j].idx][i];
413 }
414 }
415 for (; j < nv; j++) {
416 for (i = 0; i < nc; i++) {
417 vec[j][i] = vec[0][i];
418 }
419 }
420 return 0;
421 }
422
423 for (j = 0; j < nv; j++) {
424 for (i = 0; i < nc; i++) {
425 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
426 }
427 }
428 #endif
429
430 return !0;
431 }
432
433
434 static GLint
435 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
436 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
437 {
438 /* Use the generalized lloyd's algorithm for VQ:
439 * find 4 color vectors.
440 *
441 * for each sample color
442 * sort to nearest vector.
443 *
444 * replace each vector with the centroid of its matching colors.
445 *
446 * repeat until RMS doesn't improve.
447 *
448 * if a color vector has no samples, or becomes the same as another
449 * vector, replace it with the color which is farthest from a sample.
450 *
451 * vec[][MAX_COMP] initial vectors and resulting colors
452 * nv number of resulting colors required
453 * input[N_TEXELS][MAX_COMP] input texels
454 * nc number of components in input / vec
455 * n number of input samples
456 */
457
458 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
459 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
460 GLfloat error, lasterror = 1e9;
461
462 GLint i, j, k, rep;
463
464 /* the quantizer */
465 for (rep = 0; rep < LL_N_REP; rep++) {
466 /* reset sums & counters */
467 for (j = 0; j < nv; j++) {
468 for (i = 0; i < nc; i++) {
469 sum[j][i] = 0;
470 }
471 cnt[j] = 0;
472 }
473 error = 0;
474
475 /* scan whole block */
476 for (k = 0; k < n; k++) {
477 #if 1
478 GLint best = -1;
479 GLfloat err = 1e9; /* big enough */
480 /* determine best vector */
481 for (j = 0; j < nv; j++) {
482 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
483 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
484 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
485 if (nc == 4) {
486 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
487 }
488 if (e < err) {
489 err = e;
490 best = j;
491 }
492 }
493 #else
494 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
495 #endif
496 assert(best >= 0);
497 /* add in closest color */
498 for (i = 0; i < nc; i++) {
499 sum[best][i] += input[k][i];
500 }
501 /* mark this vector as used */
502 cnt[best]++;
503 /* accumulate error */
504 error += err;
505 }
506
507 /* check RMS */
508 if ((error < LL_RMS_E) ||
509 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
510 return !0; /* good match */
511 }
512 lasterror = error;
513
514 /* move each vector to the barycenter of its closest colors */
515 for (j = 0; j < nv; j++) {
516 if (cnt[j]) {
517 GLfloat div = 1.0F / cnt[j];
518 for (i = 0; i < nc; i++) {
519 vec[j][i] = div * sum[j][i];
520 }
521 } else {
522 /* this vec has no samples or is identical with a previous vec */
523 GLint worst = fxt1_worst(vec[j], input, nc, n);
524 for (i = 0; i < nc; i++) {
525 vec[j][i] = input[worst][i];
526 }
527 }
528 }
529 }
530
531 return 0; /* could not converge fast enough */
532 }
533
534
535 static void
536 fxt1_quantize_CHROMA (GLuint *cc,
537 GLubyte input[N_TEXELS][MAX_COMP])
538 {
539 const GLint n_vect = 4; /* 4 base vectors to find */
540 const GLint n_comp = 3; /* 3 components: R, G, B */
541 GLfloat vec[MAX_VECT][MAX_COMP];
542 GLint i, j, k;
543 Fx64 hi; /* high quadword */
544 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
545
546 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
547 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
548 }
549
550 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
551 for (j = n_vect - 1; j >= 0; j--) {
552 for (i = 0; i < n_comp; i++) {
553 /* add in colors */
554 FX64_SHL(hi, 5);
555 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
556 }
557 }
558 ((Fx64 *)cc)[1] = hi;
559
560 lohi = lolo = 0;
561 /* right microtile */
562 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
563 lohi <<= 2;
564 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
565 }
566 /* left microtile */
567 for (; k >= 0; k--) {
568 lolo <<= 2;
569 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
570 }
571 cc[1] = lohi;
572 cc[0] = lolo;
573 }
574
575
576 static void
577 fxt1_quantize_ALPHA0 (GLuint *cc,
578 GLubyte input[N_TEXELS][MAX_COMP],
579 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
580 {
581 const GLint n_vect = 3; /* 3 base vectors to find */
582 const GLint n_comp = 4; /* 4 components: R, G, B, A */
583 GLfloat vec[MAX_VECT][MAX_COMP];
584 GLint i, j, k;
585 Fx64 hi; /* high quadword */
586 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
587
588 /* the last vector indicates zero */
589 for (i = 0; i < n_comp; i++) {
590 vec[n_vect][i] = 0;
591 }
592
593 /* the first n texels in reord are guaranteed to be non-zero */
594 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
595 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
596 }
597
598 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
599 for (j = n_vect - 1; j >= 0; j--) {
600 /* add in alphas */
601 FX64_SHL(hi, 5);
602 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
603 }
604 for (j = n_vect - 1; j >= 0; j--) {
605 for (i = 0; i < n_comp - 1; i++) {
606 /* add in colors */
607 FX64_SHL(hi, 5);
608 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
609 }
610 }
611 ((Fx64 *)cc)[1] = hi;
612
613 lohi = lolo = 0;
614 /* right microtile */
615 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
616 lohi <<= 2;
617 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
618 }
619 /* left microtile */
620 for (; k >= 0; k--) {
621 lolo <<= 2;
622 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
623 }
624 cc[1] = lohi;
625 cc[0] = lolo;
626 }
627
628
629 static void
630 fxt1_quantize_ALPHA1 (GLuint *cc,
631 GLubyte input[N_TEXELS][MAX_COMP])
632 {
633 const GLint n_vect = 3; /* highest vector number in each microtile */
634 const GLint n_comp = 4; /* 4 components: R, G, B, A */
635 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
636 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
637 GLint i, j, k;
638 Fx64 hi; /* high quadword */
639 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
640
641 GLint minSum;
642 GLint maxSum;
643 GLint minColL = 0, maxColL = 0;
644 GLint minColR = 0, maxColR = 0;
645 GLint sumL = 0, sumR = 0;
646 GLint nn_comp;
647 /* Our solution here is to find the darkest and brightest colors in
648 * the 4x4 tile and use those as the two representative colors.
649 * There are probably better algorithms to use (histogram-based).
650 */
651 nn_comp = n_comp;
652 while ((minColL == maxColL) && nn_comp) {
653 minSum = 2000; /* big enough */
654 maxSum = -1; /* small enough */
655 for (k = 0; k < N_TEXELS / 2; k++) {
656 GLint sum = 0;
657 for (i = 0; i < nn_comp; i++) {
658 sum += input[k][i];
659 }
660 if (minSum > sum) {
661 minSum = sum;
662 minColL = k;
663 }
664 if (maxSum < sum) {
665 maxSum = sum;
666 maxColL = k;
667 }
668 sumL += sum;
669 }
670
671 nn_comp--;
672 }
673
674 nn_comp = n_comp;
675 while ((minColR == maxColR) && nn_comp) {
676 minSum = 2000; /* big enough */
677 maxSum = -1; /* small enough */
678 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
679 GLint sum = 0;
680 for (i = 0; i < nn_comp; i++) {
681 sum += input[k][i];
682 }
683 if (minSum > sum) {
684 minSum = sum;
685 minColR = k;
686 }
687 if (maxSum < sum) {
688 maxSum = sum;
689 maxColR = k;
690 }
691 sumR += sum;
692 }
693
694 nn_comp--;
695 }
696
697 /* choose the common vector (yuck!) */
698 {
699 GLint j1, j2;
700 GLint v1 = 0, v2 = 0;
701 GLfloat err = 1e9; /* big enough */
702 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
703 for (i = 0; i < n_comp; i++) {
704 tv[0][i] = input[minColL][i];
705 tv[1][i] = input[maxColL][i];
706 tv[2][i] = input[minColR][i];
707 tv[3][i] = input[maxColR][i];
708 }
709 for (j1 = 0; j1 < 2; j1++) {
710 for (j2 = 2; j2 < 4; j2++) {
711 GLfloat e = 0.0F;
712 for (i = 0; i < n_comp; i++) {
713 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
714 }
715 if (e < err) {
716 err = e;
717 v1 = j1;
718 v2 = j2;
719 }
720 }
721 }
722 for (i = 0; i < n_comp; i++) {
723 vec[0][i] = tv[1 - v1][i];
724 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
725 vec[2][i] = tv[5 - v2][i];
726 }
727 }
728
729 /* left microtile */
730 cc[0] = 0;
731 if (minColL != maxColL) {
732 /* compute interpolation vector */
733 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
734
735 /* add in texels */
736 lolo = 0;
737 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
738 GLint texel;
739 /* interpolate color */
740 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
741 /* add in texel */
742 lolo <<= 2;
743 lolo |= texel;
744 }
745
746 cc[0] = lolo;
747 }
748
749 /* right microtile */
750 cc[1] = 0;
751 if (minColR != maxColR) {
752 /* compute interpolation vector */
753 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
754
755 /* add in texels */
756 lohi = 0;
757 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
758 GLint texel;
759 /* interpolate color */
760 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
761 /* add in texel */
762 lohi <<= 2;
763 lohi |= texel;
764 }
765
766 cc[1] = lohi;
767 }
768
769 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
770 for (j = n_vect - 1; j >= 0; j--) {
771 /* add in alphas */
772 FX64_SHL(hi, 5);
773 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
774 }
775 for (j = n_vect - 1; j >= 0; j--) {
776 for (i = 0; i < n_comp - 1; i++) {
777 /* add in colors */
778 FX64_SHL(hi, 5);
779 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
780 }
781 }
782 ((Fx64 *)cc)[1] = hi;
783 }
784
785
786 static void
787 fxt1_quantize_HI (GLuint *cc,
788 GLubyte input[N_TEXELS][MAX_COMP],
789 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
790 {
791 const GLint n_vect = 6; /* highest vector number */
792 const GLint n_comp = 3; /* 3 components: R, G, B */
793 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
794 GLfloat iv[MAX_COMP]; /* interpolation vector */
795 GLint i, k;
796 GLuint hihi; /* high quadword: hi dword */
797
798 GLint minSum = 2000; /* big enough */
799 GLint maxSum = -1; /* small enough */
800 GLint minCol = 0; /* phoudoin: silent compiler! */
801 GLint maxCol = 0; /* phoudoin: silent compiler! */
802
803 /* Our solution here is to find the darkest and brightest colors in
804 * the 8x4 tile and use those as the two representative colors.
805 * There are probably better algorithms to use (histogram-based).
806 */
807 for (k = 0; k < n; k++) {
808 GLint sum = 0;
809 for (i = 0; i < n_comp; i++) {
810 sum += reord[k][i];
811 }
812 if (minSum > sum) {
813 minSum = sum;
814 minCol = k;
815 }
816 if (maxSum < sum) {
817 maxSum = sum;
818 maxCol = k;
819 }
820 }
821
822 hihi = 0; /* cc-hi = "00" */
823 for (i = 0; i < n_comp; i++) {
824 /* add in colors */
825 hihi <<= 5;
826 hihi |= reord[maxCol][i] >> 3;
827 }
828 for (i = 0; i < n_comp; i++) {
829 /* add in colors */
830 hihi <<= 5;
831 hihi |= reord[minCol][i] >> 3;
832 }
833 cc[3] = hihi;
834 cc[0] = cc[1] = cc[2] = 0;
835
836 /* compute interpolation vector */
837 if (minCol != maxCol) {
838 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
839 }
840
841 /* add in texels */
842 for (k = N_TEXELS - 1; k >= 0; k--) {
843 GLint t = k * 3;
844 GLuint *kk = (GLuint *)((char *)cc + t / 8);
845 GLint texel = n_vect + 1; /* transparent black */
846
847 if (!ISTBLACK(input[k])) {
848 if (minCol != maxCol) {
849 /* interpolate color */
850 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
851 /* add in texel */
852 kk[0] |= texel << (t & 7);
853 }
854 } else {
855 /* add in texel */
856 kk[0] |= texel << (t & 7);
857 }
858 }
859 }
860
861
862 static void
863 fxt1_quantize_MIXED1 (GLuint *cc,
864 GLubyte input[N_TEXELS][MAX_COMP])
865 {
866 const GLint n_vect = 2; /* highest vector number in each microtile */
867 const GLint n_comp = 3; /* 3 components: R, G, B */
868 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
869 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
870 GLint i, j, k;
871 Fx64 hi; /* high quadword */
872 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
873
874 GLint minSum;
875 GLint maxSum;
876 GLint minColL = 0, maxColL = -1;
877 GLint minColR = 0, maxColR = -1;
878
879 /* Our solution here is to find the darkest and brightest colors in
880 * the 4x4 tile and use those as the two representative colors.
881 * There are probably better algorithms to use (histogram-based).
882 */
883 minSum = 2000; /* big enough */
884 maxSum = -1; /* small enough */
885 for (k = 0; k < N_TEXELS / 2; k++) {
886 if (!ISTBLACK(input[k])) {
887 GLint sum = 0;
888 for (i = 0; i < n_comp; i++) {
889 sum += input[k][i];
890 }
891 if (minSum > sum) {
892 minSum = sum;
893 minColL = k;
894 }
895 if (maxSum < sum) {
896 maxSum = sum;
897 maxColL = k;
898 }
899 }
900 }
901 minSum = 2000; /* big enough */
902 maxSum = -1; /* small enough */
903 for (; k < N_TEXELS; k++) {
904 if (!ISTBLACK(input[k])) {
905 GLint sum = 0;
906 for (i = 0; i < n_comp; i++) {
907 sum += input[k][i];
908 }
909 if (minSum > sum) {
910 minSum = sum;
911 minColR = k;
912 }
913 if (maxSum < sum) {
914 maxSum = sum;
915 maxColR = k;
916 }
917 }
918 }
919
920 /* left microtile */
921 if (maxColL == -1) {
922 /* all transparent black */
923 cc[0] = ~0u;
924 for (i = 0; i < n_comp; i++) {
925 vec[0][i] = 0;
926 vec[1][i] = 0;
927 }
928 } else {
929 cc[0] = 0;
930 for (i = 0; i < n_comp; i++) {
931 vec[0][i] = input[minColL][i];
932 vec[1][i] = input[maxColL][i];
933 }
934 if (minColL != maxColL) {
935 /* compute interpolation vector */
936 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
937
938 /* add in texels */
939 lolo = 0;
940 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
941 GLint texel = n_vect + 1; /* transparent black */
942 if (!ISTBLACK(input[k])) {
943 /* interpolate color */
944 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
945 }
946 /* add in texel */
947 lolo <<= 2;
948 lolo |= texel;
949 }
950 cc[0] = lolo;
951 }
952 }
953
954 /* right microtile */
955 if (maxColR == -1) {
956 /* all transparent black */
957 cc[1] = ~0u;
958 for (i = 0; i < n_comp; i++) {
959 vec[2][i] = 0;
960 vec[3][i] = 0;
961 }
962 } else {
963 cc[1] = 0;
964 for (i = 0; i < n_comp; i++) {
965 vec[2][i] = input[minColR][i];
966 vec[3][i] = input[maxColR][i];
967 }
968 if (minColR != maxColR) {
969 /* compute interpolation vector */
970 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
971
972 /* add in texels */
973 lohi = 0;
974 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
975 GLint texel = n_vect + 1; /* transparent black */
976 if (!ISTBLACK(input[k])) {
977 /* interpolate color */
978 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
979 }
980 /* add in texel */
981 lohi <<= 2;
982 lohi |= texel;
983 }
984 cc[1] = lohi;
985 }
986 }
987
988 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
989 for (j = 2 * 2 - 1; j >= 0; j--) {
990 for (i = 0; i < n_comp; i++) {
991 /* add in colors */
992 FX64_SHL(hi, 5);
993 FX64_OR32(hi, vec[j][i] >> 3);
994 }
995 }
996 ((Fx64 *)cc)[1] = hi;
997 }
998
999
1000 static void
1001 fxt1_quantize_MIXED0 (GLuint *cc,
1002 GLubyte input[N_TEXELS][MAX_COMP])
1003 {
1004 const GLint n_vect = 3; /* highest vector number in each microtile */
1005 const GLint n_comp = 3; /* 3 components: R, G, B */
1006 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1007 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1008 GLint i, j, k;
1009 Fx64 hi; /* high quadword */
1010 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1011
1012 GLint minColL = 0, maxColL = 0;
1013 GLint minColR = 0, maxColR = 0;
1014 #if 0
1015 GLint minSum;
1016 GLint maxSum;
1017
1018 /* Our solution here is to find the darkest and brightest colors in
1019 * the 4x4 tile and use those as the two representative colors.
1020 * There are probably better algorithms to use (histogram-based).
1021 */
1022 minSum = 2000; /* big enough */
1023 maxSum = -1; /* small enough */
1024 for (k = 0; k < N_TEXELS / 2; k++) {
1025 GLint sum = 0;
1026 for (i = 0; i < n_comp; i++) {
1027 sum += input[k][i];
1028 }
1029 if (minSum > sum) {
1030 minSum = sum;
1031 minColL = k;
1032 }
1033 if (maxSum < sum) {
1034 maxSum = sum;
1035 maxColL = k;
1036 }
1037 }
1038 minSum = 2000; /* big enough */
1039 maxSum = -1; /* small enough */
1040 for (; k < N_TEXELS; k++) {
1041 GLint sum = 0;
1042 for (i = 0; i < n_comp; i++) {
1043 sum += input[k][i];
1044 }
1045 if (minSum > sum) {
1046 minSum = sum;
1047 minColR = k;
1048 }
1049 if (maxSum < sum) {
1050 maxSum = sum;
1051 maxColR = k;
1052 }
1053 }
1054 #else
1055 GLint minVal;
1056 GLint maxVal;
1057 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1058 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1059
1060 /* Scan the channel with max variance for lo & hi
1061 * and use those as the two representative colors.
1062 */
1063 minVal = 2000; /* big enough */
1064 maxVal = -1; /* small enough */
1065 for (k = 0; k < N_TEXELS / 2; k++) {
1066 GLint t = input[k][maxVarL];
1067 if (minVal > t) {
1068 minVal = t;
1069 minColL = k;
1070 }
1071 if (maxVal < t) {
1072 maxVal = t;
1073 maxColL = k;
1074 }
1075 }
1076 minVal = 2000; /* big enough */
1077 maxVal = -1; /* small enough */
1078 for (; k < N_TEXELS; k++) {
1079 GLint t = input[k][maxVarR];
1080 if (minVal > t) {
1081 minVal = t;
1082 minColR = k;
1083 }
1084 if (maxVal < t) {
1085 maxVal = t;
1086 maxColR = k;
1087 }
1088 }
1089 #endif
1090
1091 /* left microtile */
1092 cc[0] = 0;
1093 for (i = 0; i < n_comp; i++) {
1094 vec[0][i] = input[minColL][i];
1095 vec[1][i] = input[maxColL][i];
1096 }
1097 if (minColL != maxColL) {
1098 /* compute interpolation vector */
1099 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1100
1101 /* add in texels */
1102 lolo = 0;
1103 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1104 GLint texel;
1105 /* interpolate color */
1106 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1107 /* add in texel */
1108 lolo <<= 2;
1109 lolo |= texel;
1110 }
1111
1112 /* funky encoding for LSB of green */
1113 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1114 for (i = 0; i < n_comp; i++) {
1115 vec[1][i] = input[minColL][i];
1116 vec[0][i] = input[maxColL][i];
1117 }
1118 lolo = ~lolo;
1119 }
1120
1121 cc[0] = lolo;
1122 }
1123
1124 /* right microtile */
1125 cc[1] = 0;
1126 for (i = 0; i < n_comp; i++) {
1127 vec[2][i] = input[minColR][i];
1128 vec[3][i] = input[maxColR][i];
1129 }
1130 if (minColR != maxColR) {
1131 /* compute interpolation vector */
1132 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1133
1134 /* add in texels */
1135 lohi = 0;
1136 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1137 GLint texel;
1138 /* interpolate color */
1139 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1140 /* add in texel */
1141 lohi <<= 2;
1142 lohi |= texel;
1143 }
1144
1145 /* funky encoding for LSB of green */
1146 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1147 for (i = 0; i < n_comp; i++) {
1148 vec[3][i] = input[minColR][i];
1149 vec[2][i] = input[maxColR][i];
1150 }
1151 lohi = ~lohi;
1152 }
1153
1154 cc[1] = lohi;
1155 }
1156
1157 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1158 for (j = 2 * 2 - 1; j >= 0; j--) {
1159 for (i = 0; i < n_comp; i++) {
1160 /* add in colors */
1161 FX64_SHL(hi, 5);
1162 FX64_OR32(hi, vec[j][i] >> 3);
1163 }
1164 }
1165 ((Fx64 *)cc)[1] = hi;
1166 }
1167
1168
1169 static void
1170 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1171 {
1172 GLint trualpha;
1173 GLubyte reord[N_TEXELS][MAX_COMP];
1174
1175 GLubyte input[N_TEXELS][MAX_COMP];
1176 GLint i, k, l;
1177
1178 if (comps == 3) {
1179 /* make the whole block opaque */
1180 memset(input, -1, sizeof(input));
1181 }
1182
1183 /* 8 texels each line */
1184 for (l = 0; l < 4; l++) {
1185 for (k = 0; k < 4; k++) {
1186 for (i = 0; i < comps; i++) {
1187 input[k + l * 4][i] = *lines[l]++;
1188 }
1189 }
1190 for (; k < 8; k++) {
1191 for (i = 0; i < comps; i++) {
1192 input[k + l * 4 + 12][i] = *lines[l]++;
1193 }
1194 }
1195 }
1196
1197 /* block layout:
1198 * 00, 01, 02, 03, 08, 09, 0a, 0b
1199 * 10, 11, 12, 13, 18, 19, 1a, 1b
1200 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1201 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1202 */
1203
1204 /* [dBorca]
1205 * stupidity flows forth from this
1206 */
1207 l = N_TEXELS;
1208 trualpha = 0;
1209 if (comps == 4) {
1210 /* skip all transparent black texels */
1211 l = 0;
1212 for (k = 0; k < N_TEXELS; k++) {
1213 /* test all components against 0 */
1214 if (!ISTBLACK(input[k])) {
1215 /* texel is not transparent black */
1216 COPY_4UBV(reord[l], input[k]);
1217 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1218 /* non-opaque texel */
1219 trualpha = !0;
1220 }
1221 l++;
1222 }
1223 }
1224 }
1225
1226 #if 0
1227 if (trualpha) {
1228 fxt1_quantize_ALPHA0(cc, input, reord, l);
1229 } else if (l == 0) {
1230 cc[0] = cc[1] = cc[2] = -1;
1231 cc[3] = 0;
1232 } else if (l < N_TEXELS) {
1233 fxt1_quantize_HI(cc, input, reord, l);
1234 } else {
1235 fxt1_quantize_CHROMA(cc, input);
1236 }
1237 (void)fxt1_quantize_ALPHA1;
1238 (void)fxt1_quantize_MIXED1;
1239 (void)fxt1_quantize_MIXED0;
1240 #else
1241 if (trualpha) {
1242 fxt1_quantize_ALPHA1(cc, input);
1243 } else if (l == 0) {
1244 cc[0] = cc[1] = cc[2] = ~0u;
1245 cc[3] = 0;
1246 } else if (l < N_TEXELS) {
1247 fxt1_quantize_MIXED1(cc, input);
1248 } else {
1249 fxt1_quantize_MIXED0(cc, input);
1250 }
1251 (void)fxt1_quantize_ALPHA0;
1252 (void)fxt1_quantize_HI;
1253 (void)fxt1_quantize_CHROMA;
1254 #endif
1255 }
1256
1257
1258
1259 /**
1260 * Upscale an image by replication, not (typical) stretching.
1261 * We use this when the image width or height is less than a
1262 * certain size (4, 8) and we need to upscale an image.
1263 */
1264 static void
1265 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1266 GLsizei outWidth, GLsizei outHeight,
1267 GLint comps, const GLubyte *src, GLint srcRowStride,
1268 GLubyte *dest )
1269 {
1270 GLint i, j, k;
1271
1272 assert(outWidth >= inWidth);
1273 assert(outHeight >= inHeight);
1274 #if 0
1275 assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1276 assert((outWidth & 3) == 0);
1277 assert((outHeight & 3) == 0);
1278 #endif
1279
1280 for (i = 0; i < outHeight; i++) {
1281 const GLint ii = i % inHeight;
1282 for (j = 0; j < outWidth; j++) {
1283 const GLint jj = j % inWidth;
1284 for (k = 0; k < comps; k++) {
1285 dest[(i * outWidth + j) * comps + k]
1286 = src[ii * srcRowStride + jj * comps + k];
1287 }
1288 }
1289 }
1290 }
1291
1292
1293 static void
1294 fxt1_encode (GLuint width, GLuint height, GLint comps,
1295 const void *source, GLint srcRowStride,
1296 void *dest, GLint destRowStride)
1297 {
1298 GLuint x, y;
1299 const GLubyte *data;
1300 GLuint *encoded = (GLuint *)dest;
1301 void *newSource = NULL;
1302
1303 assert(comps == 3 || comps == 4);
1304
1305 /* Replicate image if width is not M8 or height is not M4 */
1306 if ((width & 7) | (height & 3)) {
1307 GLint newWidth = (width + 7) & ~7;
1308 GLint newHeight = (height + 3) & ~3;
1309 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1310 if (!newSource) {
1311 GET_CURRENT_CONTEXT(ctx);
1312 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1313 goto cleanUp;
1314 }
1315 upscale_teximage2d(width, height, newWidth, newHeight,
1316 comps, (const GLubyte *) source,
1317 srcRowStride, (GLubyte *) newSource);
1318 source = newSource;
1319 width = newWidth;
1320 height = newHeight;
1321 srcRowStride = comps * newWidth;
1322 }
1323
1324 data = (const GLubyte *) source;
1325 destRowStride = (destRowStride - width * 2) / 4;
1326 for (y = 0; y < height; y += 4) {
1327 GLuint offs = 0 + (y + 0) * srcRowStride;
1328 for (x = 0; x < width; x += 8) {
1329 const GLubyte *lines[4];
1330 lines[0] = &data[offs];
1331 lines[1] = lines[0] + srcRowStride;
1332 lines[2] = lines[1] + srcRowStride;
1333 lines[3] = lines[2] + srcRowStride;
1334 offs += 8 * comps;
1335 fxt1_quantize(encoded, lines, comps);
1336 /* 128 bits per 8x4 block */
1337 encoded += 4;
1338 }
1339 encoded += destRowStride;
1340 }
1341
1342 cleanUp:
1343 free(newSource);
1344 }
1345
1346
1347 /***************************************************************************\
1348 * FXT1 decoder
1349 *
1350 * The decoder is based on GL_3DFX_texture_compression_FXT1
1351 * specification and serves as a concept for the encoder.
1352 \***************************************************************************/
1353
1354
1355 /* lookup table for scaling 5 bit colors up to 8 bits */
1356 static const GLubyte _rgb_scale_5[] = {
1357 0, 8, 16, 25, 33, 41, 49, 58,
1358 66, 74, 82, 90, 99, 107, 115, 123,
1359 132, 140, 148, 156, 165, 173, 181, 189,
1360 197, 206, 214, 222, 230, 239, 247, 255
1361 };
1362
1363 /* lookup table for scaling 6 bit colors up to 8 bits */
1364 static const GLubyte _rgb_scale_6[] = {
1365 0, 4, 8, 12, 16, 20, 24, 28,
1366 32, 36, 40, 45, 49, 53, 57, 61,
1367 65, 69, 73, 77, 81, 85, 89, 93,
1368 97, 101, 105, 109, 113, 117, 121, 125,
1369 130, 134, 138, 142, 146, 150, 154, 158,
1370 162, 166, 170, 174, 178, 182, 186, 190,
1371 194, 198, 202, 206, 210, 215, 219, 223,
1372 227, 231, 235, 239, 243, 247, 251, 255
1373 };
1374
1375
1376 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1377 #define UP5(c) _rgb_scale_5[(c) & 31]
1378 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1379 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1380
1381
1382 static void
1383 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1384 {
1385 const GLuint *cc;
1386
1387 t *= 3;
1388 cc = (const GLuint *)(code + t / 8);
1389 t = (cc[0] >> (t & 7)) & 7;
1390
1391 if (t == 7) {
1392 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1393 } else {
1394 GLubyte r, g, b;
1395 cc = (const GLuint *)(code + 12);
1396 if (t == 0) {
1397 b = UP5(CC_SEL(cc, 0));
1398 g = UP5(CC_SEL(cc, 5));
1399 r = UP5(CC_SEL(cc, 10));
1400 } else if (t == 6) {
1401 b = UP5(CC_SEL(cc, 15));
1402 g = UP5(CC_SEL(cc, 20));
1403 r = UP5(CC_SEL(cc, 25));
1404 } else {
1405 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1406 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1407 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1408 }
1409 rgba[RCOMP] = r;
1410 rgba[GCOMP] = g;
1411 rgba[BCOMP] = b;
1412 rgba[ACOMP] = 255;
1413 }
1414 }
1415
1416
1417 static void
1418 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1419 {
1420 const GLuint *cc;
1421 GLuint kk;
1422
1423 cc = (const GLuint *)code;
1424 if (t & 16) {
1425 cc++;
1426 t &= 15;
1427 }
1428 t = (cc[0] >> (t * 2)) & 3;
1429
1430 t *= 15;
1431 cc = (const GLuint *)(code + 8 + t / 8);
1432 kk = cc[0] >> (t & 7);
1433 rgba[BCOMP] = UP5(kk);
1434 rgba[GCOMP] = UP5(kk >> 5);
1435 rgba[RCOMP] = UP5(kk >> 10);
1436 rgba[ACOMP] = 255;
1437 }
1438
1439
1440 static void
1441 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1442 {
1443 const GLuint *cc;
1444 GLuint col[2][3];
1445 GLint glsb, selb;
1446
1447 cc = (const GLuint *)code;
1448 if (t & 16) {
1449 t &= 15;
1450 t = (cc[1] >> (t * 2)) & 3;
1451 /* col 2 */
1452 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1453 col[0][GCOMP] = CC_SEL(cc, 99);
1454 col[0][RCOMP] = CC_SEL(cc, 104);
1455 /* col 3 */
1456 col[1][BCOMP] = CC_SEL(cc, 109);
1457 col[1][GCOMP] = CC_SEL(cc, 114);
1458 col[1][RCOMP] = CC_SEL(cc, 119);
1459 glsb = CC_SEL(cc, 126);
1460 selb = CC_SEL(cc, 33);
1461 } else {
1462 t = (cc[0] >> (t * 2)) & 3;
1463 /* col 0 */
1464 col[0][BCOMP] = CC_SEL(cc, 64);
1465 col[0][GCOMP] = CC_SEL(cc, 69);
1466 col[0][RCOMP] = CC_SEL(cc, 74);
1467 /* col 1 */
1468 col[1][BCOMP] = CC_SEL(cc, 79);
1469 col[1][GCOMP] = CC_SEL(cc, 84);
1470 col[1][RCOMP] = CC_SEL(cc, 89);
1471 glsb = CC_SEL(cc, 125);
1472 selb = CC_SEL(cc, 1);
1473 }
1474
1475 if (CC_SEL(cc, 124) & 1) {
1476 /* alpha[0] == 1 */
1477
1478 if (t == 3) {
1479 /* zero */
1480 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1481 } else {
1482 GLubyte r, g, b;
1483 if (t == 0) {
1484 b = UP5(col[0][BCOMP]);
1485 g = UP5(col[0][GCOMP]);
1486 r = UP5(col[0][RCOMP]);
1487 } else if (t == 2) {
1488 b = UP5(col[1][BCOMP]);
1489 g = UP6(col[1][GCOMP], glsb);
1490 r = UP5(col[1][RCOMP]);
1491 } else {
1492 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1493 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1494 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1495 }
1496 rgba[RCOMP] = r;
1497 rgba[GCOMP] = g;
1498 rgba[BCOMP] = b;
1499 rgba[ACOMP] = 255;
1500 }
1501 } else {
1502 /* alpha[0] == 0 */
1503 GLubyte r, g, b;
1504 if (t == 0) {
1505 b = UP5(col[0][BCOMP]);
1506 g = UP6(col[0][GCOMP], glsb ^ selb);
1507 r = UP5(col[0][RCOMP]);
1508 } else if (t == 3) {
1509 b = UP5(col[1][BCOMP]);
1510 g = UP6(col[1][GCOMP], glsb);
1511 r = UP5(col[1][RCOMP]);
1512 } else {
1513 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1514 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1515 UP6(col[1][GCOMP], glsb));
1516 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1517 }
1518 rgba[RCOMP] = r;
1519 rgba[GCOMP] = g;
1520 rgba[BCOMP] = b;
1521 rgba[ACOMP] = 255;
1522 }
1523 }
1524
1525
1526 static void
1527 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1528 {
1529 const GLuint *cc;
1530 GLubyte r, g, b, a;
1531
1532 cc = (const GLuint *)code;
1533 if (CC_SEL(cc, 124) & 1) {
1534 /* lerp == 1 */
1535 GLuint col0[4];
1536
1537 if (t & 16) {
1538 t &= 15;
1539 t = (cc[1] >> (t * 2)) & 3;
1540 /* col 2 */
1541 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1542 col0[GCOMP] = CC_SEL(cc, 99);
1543 col0[RCOMP] = CC_SEL(cc, 104);
1544 col0[ACOMP] = CC_SEL(cc, 119);
1545 } else {
1546 t = (cc[0] >> (t * 2)) & 3;
1547 /* col 0 */
1548 col0[BCOMP] = CC_SEL(cc, 64);
1549 col0[GCOMP] = CC_SEL(cc, 69);
1550 col0[RCOMP] = CC_SEL(cc, 74);
1551 col0[ACOMP] = CC_SEL(cc, 109);
1552 }
1553
1554 if (t == 0) {
1555 b = UP5(col0[BCOMP]);
1556 g = UP5(col0[GCOMP]);
1557 r = UP5(col0[RCOMP]);
1558 a = UP5(col0[ACOMP]);
1559 } else if (t == 3) {
1560 b = UP5(CC_SEL(cc, 79));
1561 g = UP5(CC_SEL(cc, 84));
1562 r = UP5(CC_SEL(cc, 89));
1563 a = UP5(CC_SEL(cc, 114));
1564 } else {
1565 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1566 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1567 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1568 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1569 }
1570 } else {
1571 /* lerp == 0 */
1572
1573 if (t & 16) {
1574 cc++;
1575 t &= 15;
1576 }
1577 t = (cc[0] >> (t * 2)) & 3;
1578
1579 if (t == 3) {
1580 /* zero */
1581 r = g = b = a = 0;
1582 } else {
1583 GLuint kk;
1584 cc = (const GLuint *)code;
1585 a = UP5(cc[3] >> (t * 5 + 13));
1586 t *= 15;
1587 cc = (const GLuint *)(code + 8 + t / 8);
1588 kk = cc[0] >> (t & 7);
1589 b = UP5(kk);
1590 g = UP5(kk >> 5);
1591 r = UP5(kk >> 10);
1592 }
1593 }
1594 rgba[RCOMP] = r;
1595 rgba[GCOMP] = g;
1596 rgba[BCOMP] = b;
1597 rgba[ACOMP] = a;
1598 }
1599
1600
1601 static void
1602 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1603 GLint i, GLint j, GLubyte *rgba)
1604 {
1605 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1606 fxt1_decode_1HI, /* cc-high = "00?" */
1607 fxt1_decode_1HI, /* cc-high = "00?" */
1608 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1609 fxt1_decode_1ALPHA, /* alpha = "011" */
1610 fxt1_decode_1MIXED, /* mixed = "1??" */
1611 fxt1_decode_1MIXED, /* mixed = "1??" */
1612 fxt1_decode_1MIXED, /* mixed = "1??" */
1613 fxt1_decode_1MIXED /* mixed = "1??" */
1614 };
1615
1616 const GLubyte *code = (const GLubyte *)texture +
1617 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1618 GLint mode = CC_SEL(code, 125);
1619 GLint t = i & 7;
1620
1621 if (t & 4) {
1622 t += 12;
1623 }
1624 t += (j & 3) * 4;
1625
1626 decode_1[mode](code, t, rgba);
1627 }
1628
1629
1630
1631
1632 static void
1633 fetch_rgb_fxt1(const GLubyte *map,
1634 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1635 {
1636 GLubyte rgba[4];
1637 fxt1_decode_1(map, rowStride, i, j, rgba);
1638 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1639 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1640 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1641 texel[ACOMP] = 1.0F;
1642 }
1643
1644
1645 static void
1646 fetch_rgba_fxt1(const GLubyte *map,
1647 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1648 {
1649 GLubyte rgba[4];
1650 fxt1_decode_1(map, rowStride, i, j, rgba);
1651 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1652 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1653 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1654 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1655 }
1656
1657
1658 compressed_fetch_func
1659 _mesa_get_fxt_fetch_func(mesa_format format)
1660 {
1661 switch (format) {
1662 case MESA_FORMAT_RGB_FXT1:
1663 return fetch_rgb_fxt1;
1664 case MESA_FORMAT_RGBA_FXT1:
1665 return fetch_rgba_fxt1;
1666 default:
1667 return NULL;
1668 }
1669 }