54e24fd2976605050ca3ce98d8dcde8961f7e732
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texformat.h"
42 #include "texstore.h"
43
44
45 static void
46 fxt1_encode (GLuint width, GLuint height, GLint comps,
47 const void *source, GLint srcRowStride,
48 void *dest, GLint destRowStride);
49
50 void
51 fxt1_decode_1 (const void *texture, GLint stride,
52 GLint i, GLint j, GLchan *rgba);
53
54
55 /**
56 * Called during context initialization.
57 */
58 void
59 _mesa_init_texture_fxt1( GLcontext *ctx )
60 {
61 (void) ctx;
62 }
63
64
65 /**
66 * Store user's image in rgb_fxt1 format.
67 */
68 GLboolean
69 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
70 {
71 const GLchan *pixels;
72 GLint srcRowStride;
73 GLubyte *dst;
74 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
75 const GLchan *tempImage = NULL;
76
77 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
78 ASSERT(dstXoffset % 8 == 0);
79 ASSERT(dstYoffset % 4 == 0);
80 ASSERT(dstZoffset == 0);
81 (void) dstZoffset;
82 (void) dstImageOffsets;
83
84 if (srcFormat != GL_RGB ||
85 srcType != CHAN_TYPE ||
86 ctx->_ImageTransferState ||
87 srcPacking->SwapBytes) {
88 /* convert image to RGB/GLchan */
89 tempImage = _mesa_make_temp_chan_image(ctx, dims,
90 baseInternalFormat,
91 _mesa_get_format_base_format(dstFormat),
92 srcWidth, srcHeight, srcDepth,
93 srcFormat, srcType, srcAddr,
94 srcPacking);
95 if (!tempImage)
96 return GL_FALSE; /* out of memory */
97 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
98 pixels = tempImage;
99 srcRowStride = 3 * srcWidth;
100 srcFormat = GL_RGB;
101 }
102 else {
103 pixels = (const GLchan *) srcAddr;
104 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
105 srcType) / sizeof(GLchan);
106 }
107
108 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
109 dstFormat,
110 texWidth, (GLubyte *) dstAddr);
111
112 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
113 dst, dstRowStride);
114
115 if (tempImage)
116 _mesa_free((void*) tempImage);
117
118 return GL_TRUE;
119 }
120
121
122 /**
123 * Store user's image in rgba_fxt1 format.
124 */
125 GLboolean
126 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
127 {
128 const GLchan *pixels;
129 GLint srcRowStride;
130 GLubyte *dst;
131 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
132 const GLchan *tempImage = NULL;
133
134 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
135 ASSERT(dstXoffset % 8 == 0);
136 ASSERT(dstYoffset % 4 == 0);
137 ASSERT(dstZoffset == 0);
138 (void) dstZoffset;
139 (void) dstImageOffsets;
140
141 if (srcFormat != GL_RGBA ||
142 srcType != CHAN_TYPE ||
143 ctx->_ImageTransferState ||
144 srcPacking->SwapBytes) {
145 /* convert image to RGBA/GLchan */
146 tempImage = _mesa_make_temp_chan_image(ctx, dims,
147 baseInternalFormat,
148 _mesa_get_format_base_format(dstFormat),
149 srcWidth, srcHeight, srcDepth,
150 srcFormat, srcType, srcAddr,
151 srcPacking);
152 if (!tempImage)
153 return GL_FALSE; /* out of memory */
154 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
155 pixels = tempImage;
156 srcRowStride = 4 * srcWidth;
157 srcFormat = GL_RGBA;
158 }
159 else {
160 pixels = (const GLchan *) srcAddr;
161 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
162 srcType) / sizeof(GLchan);
163 }
164
165 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
166 dstFormat,
167 texWidth, (GLubyte *) dstAddr);
168
169 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
170 dst, dstRowStride);
171
172 if (tempImage)
173 _mesa_free((void*) tempImage);
174
175 return GL_TRUE;
176 }
177
178
179 void
180 _mesa_fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
181 GLint i, GLint j, GLint k, GLchan *texel )
182 {
183 (void) k;
184 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
185 }
186
187
188 void
189 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
190 GLint i, GLint j, GLint k, GLfloat *texel )
191 {
192 /* just sample as GLchan and convert to float here */
193 GLchan rgba[4];
194 (void) k;
195 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
196 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
197 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
198 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
199 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
200 }
201
202
203 void
204 _mesa_fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
205 GLint i, GLint j, GLint k, GLchan *texel )
206 {
207 (void) k;
208 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
209 texel[ACOMP] = 255;
210 }
211
212
213 void
214 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
215 GLint i, GLint j, GLint k, GLfloat *texel )
216 {
217 /* just sample as GLchan and convert to float here */
218 GLchan rgba[4];
219 (void) k;
220 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
221 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
222 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
223 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
224 texel[ACOMP] = 1.0F;
225 }
226
227
228
229 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
230 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
231 GL_RGB, /* BaseFormat */
232 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
233 4, /*approx*/ /* RedBits */
234 4, /*approx*/ /* GreenBits */
235 4, /*approx*/ /* BlueBits */
236 0, /* AlphaBits */
237 0, /* LuminanceBits */
238 0, /* IntensityBits */
239 0, /* IndexBits */
240 0, /* DepthBits */
241 0, /* StencilBits */
242 0 /* TexelBytes */
243 };
244
245 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
246 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
247 GL_RGBA, /* BaseFormat */
248 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
249 4, /*approx*/ /* RedBits */
250 4, /*approx*/ /* GreenBits */
251 4, /*approx*/ /* BlueBits */
252 1, /*approx*/ /* AlphaBits */
253 0, /* LuminanceBits */
254 0, /* IntensityBits */
255 0, /* IndexBits */
256 0, /* DepthBits */
257 0, /* StencilBits */
258 0 /* TexelBytes */
259 };
260
261
262 /***************************************************************************\
263 * FXT1 encoder
264 *
265 * The encoder was built by reversing the decoder,
266 * and is vaguely based on Texus2 by 3dfx. Note that this code
267 * is merely a proof of concept, since it is highly UNoptimized;
268 * moreover, it is sub-optimal due to initial conditions passed
269 * to Lloyd's algorithm (the interpolation modes are even worse).
270 \***************************************************************************/
271
272
273 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
274 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
275 #define N_TEXELS 32 /* number of texels in a block (always 32) */
276 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
277 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
278 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
279 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
280 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
281
282
283 /*
284 * Define a 64-bit unsigned integer type and macros
285 */
286 #if 1
287
288 #define FX64_NATIVE 1
289
290 typedef uint64_t Fx64;
291
292 #define FX64_MOV32(a, b) a = b
293 #define FX64_OR32(a, b) a |= b
294 #define FX64_SHL(a, c) a <<= c
295
296 #else
297
298 #define FX64_NATIVE 0
299
300 typedef struct {
301 GLuint lo, hi;
302 } Fx64;
303
304 #define FX64_MOV32(a, b) a.lo = b
305 #define FX64_OR32(a, b) a.lo |= b
306
307 #define FX64_SHL(a, c) \
308 do { \
309 if ((c) >= 32) { \
310 a.hi = a.lo << ((c) - 32); \
311 a.lo = 0; \
312 } else { \
313 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
314 a.lo <<= (c); \
315 } \
316 } while (0)
317
318 #endif
319
320
321 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
322 #define SAFECDOT 1 /* for paranoids */
323
324 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
325 do { \
326 /* compute interpolation vector */ \
327 GLfloat d2 = 0.0F; \
328 GLfloat rd2; \
329 \
330 for (i = 0; i < NC; i++) { \
331 IV[i] = (V1[i] - V0[i]) * F(i); \
332 d2 += IV[i] * IV[i]; \
333 } \
334 rd2 = (GLfloat)NV / d2; \
335 B = 0; \
336 for (i = 0; i < NC; i++) { \
337 IV[i] *= F(i); \
338 B -= IV[i] * V0[i]; \
339 IV[i] *= rd2; \
340 } \
341 B = B * rd2 + 0.5f; \
342 } while (0)
343
344 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
345 do { \
346 GLfloat dot = 0.0F; \
347 for (i = 0; i < NC; i++) { \
348 dot += V[i] * IV[i]; \
349 } \
350 TEXEL = (GLint)(dot + B); \
351 if (SAFECDOT) { \
352 if (TEXEL < 0) { \
353 TEXEL = 0; \
354 } else if (TEXEL > NV) { \
355 TEXEL = NV; \
356 } \
357 } \
358 } while (0)
359
360
361 static GLint
362 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
363 GLubyte input[MAX_COMP], GLint nc)
364 {
365 GLint i, j, best = -1;
366 GLfloat err = 1e9; /* big enough */
367
368 for (j = 0; j < nv; j++) {
369 GLfloat e = 0.0F;
370 for (i = 0; i < nc; i++) {
371 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
372 }
373 if (e < err) {
374 err = e;
375 best = j;
376 }
377 }
378
379 return best;
380 }
381
382
383 static GLint
384 fxt1_worst (GLfloat vec[MAX_COMP],
385 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
386 {
387 GLint i, k, worst = -1;
388 GLfloat err = -1.0F; /* small enough */
389
390 for (k = 0; k < n; k++) {
391 GLfloat e = 0.0F;
392 for (i = 0; i < nc; i++) {
393 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
394 }
395 if (e > err) {
396 err = e;
397 worst = k;
398 }
399 }
400
401 return worst;
402 }
403
404
405 static GLint
406 fxt1_variance (GLdouble variance[MAX_COMP],
407 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
408 {
409 GLint i, k, best = 0;
410 GLint sx, sx2;
411 GLdouble var, maxvar = -1; /* small enough */
412 GLdouble teenth = 1.0 / n;
413
414 for (i = 0; i < nc; i++) {
415 sx = sx2 = 0;
416 for (k = 0; k < n; k++) {
417 GLint t = input[k][i];
418 sx += t;
419 sx2 += t * t;
420 }
421 var = sx2 * teenth - sx * sx * teenth * teenth;
422 if (maxvar < var) {
423 maxvar = var;
424 best = i;
425 }
426 if (variance) {
427 variance[i] = var;
428 }
429 }
430
431 return best;
432 }
433
434
435 static GLint
436 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
437 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
438 {
439 #if 0
440 /* Choose colors from a grid.
441 */
442 GLint i, j;
443
444 for (j = 0; j < nv; j++) {
445 GLint m = j * (n - 1) / (nv - 1);
446 for (i = 0; i < nc; i++) {
447 vec[j][i] = input[m][i];
448 }
449 }
450 #else
451 /* Our solution here is to find the darkest and brightest colors in
452 * the 8x4 tile and use those as the two representative colors.
453 * There are probably better algorithms to use (histogram-based).
454 */
455 GLint i, j, k;
456 GLint minSum = 2000; /* big enough */
457 GLint maxSum = -1; /* small enough */
458 GLint minCol = 0; /* phoudoin: silent compiler! */
459 GLint maxCol = 0; /* phoudoin: silent compiler! */
460
461 struct {
462 GLint flag;
463 GLint key;
464 GLint freq;
465 GLint idx;
466 } hist[N_TEXELS];
467 GLint lenh = 0;
468
469 _mesa_memset(hist, 0, sizeof(hist));
470
471 for (k = 0; k < n; k++) {
472 GLint l;
473 GLint key = 0;
474 GLint sum = 0;
475 for (i = 0; i < nc; i++) {
476 key <<= 8;
477 key |= input[k][i];
478 sum += input[k][i];
479 }
480 for (l = 0; l < n; l++) {
481 if (!hist[l].flag) {
482 /* alloc new slot */
483 hist[l].flag = !0;
484 hist[l].key = key;
485 hist[l].freq = 1;
486 hist[l].idx = k;
487 lenh = l + 1;
488 break;
489 } else if (hist[l].key == key) {
490 hist[l].freq++;
491 break;
492 }
493 }
494 if (minSum > sum) {
495 minSum = sum;
496 minCol = k;
497 }
498 if (maxSum < sum) {
499 maxSum = sum;
500 maxCol = k;
501 }
502 }
503
504 if (lenh <= nv) {
505 for (j = 0; j < lenh; j++) {
506 for (i = 0; i < nc; i++) {
507 vec[j][i] = (GLfloat)input[hist[j].idx][i];
508 }
509 }
510 for (; j < nv; j++) {
511 for (i = 0; i < nc; i++) {
512 vec[j][i] = vec[0][i];
513 }
514 }
515 return 0;
516 }
517
518 for (j = 0; j < nv; j++) {
519 for (i = 0; i < nc; i++) {
520 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
521 }
522 }
523 #endif
524
525 return !0;
526 }
527
528
529 static GLint
530 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
531 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
532 {
533 /* Use the generalized lloyd's algorithm for VQ:
534 * find 4 color vectors.
535 *
536 * for each sample color
537 * sort to nearest vector.
538 *
539 * replace each vector with the centroid of it's matching colors.
540 *
541 * repeat until RMS doesn't improve.
542 *
543 * if a color vector has no samples, or becomes the same as another
544 * vector, replace it with the color which is farthest from a sample.
545 *
546 * vec[][MAX_COMP] initial vectors and resulting colors
547 * nv number of resulting colors required
548 * input[N_TEXELS][MAX_COMP] input texels
549 * nc number of components in input / vec
550 * n number of input samples
551 */
552
553 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
554 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
555 GLfloat error, lasterror = 1e9;
556
557 GLint i, j, k, rep;
558
559 /* the quantizer */
560 for (rep = 0; rep < LL_N_REP; rep++) {
561 /* reset sums & counters */
562 for (j = 0; j < nv; j++) {
563 for (i = 0; i < nc; i++) {
564 sum[j][i] = 0;
565 }
566 cnt[j] = 0;
567 }
568 error = 0;
569
570 /* scan whole block */
571 for (k = 0; k < n; k++) {
572 #if 1
573 GLint best = -1;
574 GLfloat err = 1e9; /* big enough */
575 /* determine best vector */
576 for (j = 0; j < nv; j++) {
577 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
578 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
579 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
580 if (nc == 4) {
581 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
582 }
583 if (e < err) {
584 err = e;
585 best = j;
586 }
587 }
588 #else
589 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
590 #endif
591 /* add in closest color */
592 for (i = 0; i < nc; i++) {
593 sum[best][i] += input[k][i];
594 }
595 /* mark this vector as used */
596 cnt[best]++;
597 /* accumulate error */
598 error += err;
599 }
600
601 /* check RMS */
602 if ((error < LL_RMS_E) ||
603 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
604 return !0; /* good match */
605 }
606 lasterror = error;
607
608 /* move each vector to the barycenter of its closest colors */
609 for (j = 0; j < nv; j++) {
610 if (cnt[j]) {
611 GLfloat div = 1.0F / cnt[j];
612 for (i = 0; i < nc; i++) {
613 vec[j][i] = div * sum[j][i];
614 }
615 } else {
616 /* this vec has no samples or is identical with a previous vec */
617 GLint worst = fxt1_worst(vec[j], input, nc, n);
618 for (i = 0; i < nc; i++) {
619 vec[j][i] = input[worst][i];
620 }
621 }
622 }
623 }
624
625 return 0; /* could not converge fast enough */
626 }
627
628
629 static void
630 fxt1_quantize_CHROMA (GLuint *cc,
631 GLubyte input[N_TEXELS][MAX_COMP])
632 {
633 const GLint n_vect = 4; /* 4 base vectors to find */
634 const GLint n_comp = 3; /* 3 components: R, G, B */
635 GLfloat vec[MAX_VECT][MAX_COMP];
636 GLint i, j, k;
637 Fx64 hi; /* high quadword */
638 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
639
640 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
641 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
642 }
643
644 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
645 for (j = n_vect - 1; j >= 0; j--) {
646 for (i = 0; i < n_comp; i++) {
647 /* add in colors */
648 FX64_SHL(hi, 5);
649 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
650 }
651 }
652 ((Fx64 *)cc)[1] = hi;
653
654 lohi = lolo = 0;
655 /* right microtile */
656 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
657 lohi <<= 2;
658 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
659 }
660 /* left microtile */
661 for (; k >= 0; k--) {
662 lolo <<= 2;
663 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
664 }
665 cc[1] = lohi;
666 cc[0] = lolo;
667 }
668
669
670 static void
671 fxt1_quantize_ALPHA0 (GLuint *cc,
672 GLubyte input[N_TEXELS][MAX_COMP],
673 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
674 {
675 const GLint n_vect = 3; /* 3 base vectors to find */
676 const GLint n_comp = 4; /* 4 components: R, G, B, A */
677 GLfloat vec[MAX_VECT][MAX_COMP];
678 GLint i, j, k;
679 Fx64 hi; /* high quadword */
680 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
681
682 /* the last vector indicates zero */
683 for (i = 0; i < n_comp; i++) {
684 vec[n_vect][i] = 0;
685 }
686
687 /* the first n texels in reord are guaranteed to be non-zero */
688 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
689 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
690 }
691
692 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
693 for (j = n_vect - 1; j >= 0; j--) {
694 /* add in alphas */
695 FX64_SHL(hi, 5);
696 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
697 }
698 for (j = n_vect - 1; j >= 0; j--) {
699 for (i = 0; i < n_comp - 1; i++) {
700 /* add in colors */
701 FX64_SHL(hi, 5);
702 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
703 }
704 }
705 ((Fx64 *)cc)[1] = hi;
706
707 lohi = lolo = 0;
708 /* right microtile */
709 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
710 lohi <<= 2;
711 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
712 }
713 /* left microtile */
714 for (; k >= 0; k--) {
715 lolo <<= 2;
716 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
717 }
718 cc[1] = lohi;
719 cc[0] = lolo;
720 }
721
722
723 static void
724 fxt1_quantize_ALPHA1 (GLuint *cc,
725 GLubyte input[N_TEXELS][MAX_COMP])
726 {
727 const GLint n_vect = 3; /* highest vector number in each microtile */
728 const GLint n_comp = 4; /* 4 components: R, G, B, A */
729 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
730 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
731 GLint i, j, k;
732 Fx64 hi; /* high quadword */
733 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
734
735 GLint minSum;
736 GLint maxSum;
737 GLint minColL = 0, maxColL = 0;
738 GLint minColR = 0, maxColR = 0;
739 GLint sumL = 0, sumR = 0;
740 GLint nn_comp;
741 /* Our solution here is to find the darkest and brightest colors in
742 * the 4x4 tile and use those as the two representative colors.
743 * There are probably better algorithms to use (histogram-based).
744 */
745 nn_comp = n_comp;
746 while ((minColL == maxColL) && nn_comp) {
747 minSum = 2000; /* big enough */
748 maxSum = -1; /* small enough */
749 for (k = 0; k < N_TEXELS / 2; k++) {
750 GLint sum = 0;
751 for (i = 0; i < nn_comp; i++) {
752 sum += input[k][i];
753 }
754 if (minSum > sum) {
755 minSum = sum;
756 minColL = k;
757 }
758 if (maxSum < sum) {
759 maxSum = sum;
760 maxColL = k;
761 }
762 sumL += sum;
763 }
764
765 nn_comp--;
766 }
767
768 nn_comp = n_comp;
769 while ((minColR == maxColR) && nn_comp) {
770 minSum = 2000; /* big enough */
771 maxSum = -1; /* small enough */
772 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
773 GLint sum = 0;
774 for (i = 0; i < nn_comp; i++) {
775 sum += input[k][i];
776 }
777 if (minSum > sum) {
778 minSum = sum;
779 minColR = k;
780 }
781 if (maxSum < sum) {
782 maxSum = sum;
783 maxColR = k;
784 }
785 sumR += sum;
786 }
787
788 nn_comp--;
789 }
790
791 /* choose the common vector (yuck!) */
792 {
793 GLint j1, j2;
794 GLint v1 = 0, v2 = 0;
795 GLfloat err = 1e9; /* big enough */
796 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
797 for (i = 0; i < n_comp; i++) {
798 tv[0][i] = input[minColL][i];
799 tv[1][i] = input[maxColL][i];
800 tv[2][i] = input[minColR][i];
801 tv[3][i] = input[maxColR][i];
802 }
803 for (j1 = 0; j1 < 2; j1++) {
804 for (j2 = 2; j2 < 4; j2++) {
805 GLfloat e = 0.0F;
806 for (i = 0; i < n_comp; i++) {
807 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
808 }
809 if (e < err) {
810 err = e;
811 v1 = j1;
812 v2 = j2;
813 }
814 }
815 }
816 for (i = 0; i < n_comp; i++) {
817 vec[0][i] = tv[1 - v1][i];
818 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
819 vec[2][i] = tv[5 - v2][i];
820 }
821 }
822
823 /* left microtile */
824 cc[0] = 0;
825 if (minColL != maxColL) {
826 /* compute interpolation vector */
827 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
828
829 /* add in texels */
830 lolo = 0;
831 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
832 GLint texel;
833 /* interpolate color */
834 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
835 /* add in texel */
836 lolo <<= 2;
837 lolo |= texel;
838 }
839
840 cc[0] = lolo;
841 }
842
843 /* right microtile */
844 cc[1] = 0;
845 if (minColR != maxColR) {
846 /* compute interpolation vector */
847 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
848
849 /* add in texels */
850 lohi = 0;
851 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
852 GLint texel;
853 /* interpolate color */
854 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
855 /* add in texel */
856 lohi <<= 2;
857 lohi |= texel;
858 }
859
860 cc[1] = lohi;
861 }
862
863 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
864 for (j = n_vect - 1; j >= 0; j--) {
865 /* add in alphas */
866 FX64_SHL(hi, 5);
867 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
868 }
869 for (j = n_vect - 1; j >= 0; j--) {
870 for (i = 0; i < n_comp - 1; i++) {
871 /* add in colors */
872 FX64_SHL(hi, 5);
873 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
874 }
875 }
876 ((Fx64 *)cc)[1] = hi;
877 }
878
879
880 static void
881 fxt1_quantize_HI (GLuint *cc,
882 GLubyte input[N_TEXELS][MAX_COMP],
883 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
884 {
885 const GLint n_vect = 6; /* highest vector number */
886 const GLint n_comp = 3; /* 3 components: R, G, B */
887 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
888 GLfloat iv[MAX_COMP]; /* interpolation vector */
889 GLint i, k;
890 GLuint hihi; /* high quadword: hi dword */
891
892 GLint minSum = 2000; /* big enough */
893 GLint maxSum = -1; /* small enough */
894 GLint minCol = 0; /* phoudoin: silent compiler! */
895 GLint maxCol = 0; /* phoudoin: silent compiler! */
896
897 /* Our solution here is to find the darkest and brightest colors in
898 * the 8x4 tile and use those as the two representative colors.
899 * There are probably better algorithms to use (histogram-based).
900 */
901 for (k = 0; k < n; k++) {
902 GLint sum = 0;
903 for (i = 0; i < n_comp; i++) {
904 sum += reord[k][i];
905 }
906 if (minSum > sum) {
907 minSum = sum;
908 minCol = k;
909 }
910 if (maxSum < sum) {
911 maxSum = sum;
912 maxCol = k;
913 }
914 }
915
916 hihi = 0; /* cc-hi = "00" */
917 for (i = 0; i < n_comp; i++) {
918 /* add in colors */
919 hihi <<= 5;
920 hihi |= reord[maxCol][i] >> 3;
921 }
922 for (i = 0; i < n_comp; i++) {
923 /* add in colors */
924 hihi <<= 5;
925 hihi |= reord[minCol][i] >> 3;
926 }
927 cc[3] = hihi;
928 cc[0] = cc[1] = cc[2] = 0;
929
930 /* compute interpolation vector */
931 if (minCol != maxCol) {
932 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
933 }
934
935 /* add in texels */
936 for (k = N_TEXELS - 1; k >= 0; k--) {
937 GLint t = k * 3;
938 GLuint *kk = (GLuint *)((char *)cc + t / 8);
939 GLint texel = n_vect + 1; /* transparent black */
940
941 if (!ISTBLACK(input[k])) {
942 if (minCol != maxCol) {
943 /* interpolate color */
944 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
945 /* add in texel */
946 kk[0] |= texel << (t & 7);
947 }
948 } else {
949 /* add in texel */
950 kk[0] |= texel << (t & 7);
951 }
952 }
953 }
954
955
956 static void
957 fxt1_quantize_MIXED1 (GLuint *cc,
958 GLubyte input[N_TEXELS][MAX_COMP])
959 {
960 const GLint n_vect = 2; /* highest vector number in each microtile */
961 const GLint n_comp = 3; /* 3 components: R, G, B */
962 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
963 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
964 GLint i, j, k;
965 Fx64 hi; /* high quadword */
966 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
967
968 GLint minSum;
969 GLint maxSum;
970 GLint minColL = 0, maxColL = -1;
971 GLint minColR = 0, maxColR = -1;
972
973 /* Our solution here is to find the darkest and brightest colors in
974 * the 4x4 tile and use those as the two representative colors.
975 * There are probably better algorithms to use (histogram-based).
976 */
977 minSum = 2000; /* big enough */
978 maxSum = -1; /* small enough */
979 for (k = 0; k < N_TEXELS / 2; k++) {
980 if (!ISTBLACK(input[k])) {
981 GLint sum = 0;
982 for (i = 0; i < n_comp; i++) {
983 sum += input[k][i];
984 }
985 if (minSum > sum) {
986 minSum = sum;
987 minColL = k;
988 }
989 if (maxSum < sum) {
990 maxSum = sum;
991 maxColL = k;
992 }
993 }
994 }
995 minSum = 2000; /* big enough */
996 maxSum = -1; /* small enough */
997 for (; k < N_TEXELS; k++) {
998 if (!ISTBLACK(input[k])) {
999 GLint sum = 0;
1000 for (i = 0; i < n_comp; i++) {
1001 sum += input[k][i];
1002 }
1003 if (minSum > sum) {
1004 minSum = sum;
1005 minColR = k;
1006 }
1007 if (maxSum < sum) {
1008 maxSum = sum;
1009 maxColR = k;
1010 }
1011 }
1012 }
1013
1014 /* left microtile */
1015 if (maxColL == -1) {
1016 /* all transparent black */
1017 cc[0] = ~0u;
1018 for (i = 0; i < n_comp; i++) {
1019 vec[0][i] = 0;
1020 vec[1][i] = 0;
1021 }
1022 } else {
1023 cc[0] = 0;
1024 for (i = 0; i < n_comp; i++) {
1025 vec[0][i] = input[minColL][i];
1026 vec[1][i] = input[maxColL][i];
1027 }
1028 if (minColL != maxColL) {
1029 /* compute interpolation vector */
1030 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1031
1032 /* add in texels */
1033 lolo = 0;
1034 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1035 GLint texel = n_vect + 1; /* transparent black */
1036 if (!ISTBLACK(input[k])) {
1037 /* interpolate color */
1038 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1039 }
1040 /* add in texel */
1041 lolo <<= 2;
1042 lolo |= texel;
1043 }
1044 cc[0] = lolo;
1045 }
1046 }
1047
1048 /* right microtile */
1049 if (maxColR == -1) {
1050 /* all transparent black */
1051 cc[1] = ~0u;
1052 for (i = 0; i < n_comp; i++) {
1053 vec[2][i] = 0;
1054 vec[3][i] = 0;
1055 }
1056 } else {
1057 cc[1] = 0;
1058 for (i = 0; i < n_comp; i++) {
1059 vec[2][i] = input[minColR][i];
1060 vec[3][i] = input[maxColR][i];
1061 }
1062 if (minColR != maxColR) {
1063 /* compute interpolation vector */
1064 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1065
1066 /* add in texels */
1067 lohi = 0;
1068 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1069 GLint texel = n_vect + 1; /* transparent black */
1070 if (!ISTBLACK(input[k])) {
1071 /* interpolate color */
1072 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1073 }
1074 /* add in texel */
1075 lohi <<= 2;
1076 lohi |= texel;
1077 }
1078 cc[1] = lohi;
1079 }
1080 }
1081
1082 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1083 for (j = 2 * 2 - 1; j >= 0; j--) {
1084 for (i = 0; i < n_comp; i++) {
1085 /* add in colors */
1086 FX64_SHL(hi, 5);
1087 FX64_OR32(hi, vec[j][i] >> 3);
1088 }
1089 }
1090 ((Fx64 *)cc)[1] = hi;
1091 }
1092
1093
1094 static void
1095 fxt1_quantize_MIXED0 (GLuint *cc,
1096 GLubyte input[N_TEXELS][MAX_COMP])
1097 {
1098 const GLint n_vect = 3; /* highest vector number in each microtile */
1099 const GLint n_comp = 3; /* 3 components: R, G, B */
1100 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1101 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1102 GLint i, j, k;
1103 Fx64 hi; /* high quadword */
1104 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1105
1106 GLint minColL = 0, maxColL = 0;
1107 GLint minColR = 0, maxColR = 0;
1108 #if 0
1109 GLint minSum;
1110 GLint maxSum;
1111
1112 /* Our solution here is to find the darkest and brightest colors in
1113 * the 4x4 tile and use those as the two representative colors.
1114 * There are probably better algorithms to use (histogram-based).
1115 */
1116 minSum = 2000; /* big enough */
1117 maxSum = -1; /* small enough */
1118 for (k = 0; k < N_TEXELS / 2; k++) {
1119 GLint sum = 0;
1120 for (i = 0; i < n_comp; i++) {
1121 sum += input[k][i];
1122 }
1123 if (minSum > sum) {
1124 minSum = sum;
1125 minColL = k;
1126 }
1127 if (maxSum < sum) {
1128 maxSum = sum;
1129 maxColL = k;
1130 }
1131 }
1132 minSum = 2000; /* big enough */
1133 maxSum = -1; /* small enough */
1134 for (; k < N_TEXELS; k++) {
1135 GLint sum = 0;
1136 for (i = 0; i < n_comp; i++) {
1137 sum += input[k][i];
1138 }
1139 if (minSum > sum) {
1140 minSum = sum;
1141 minColR = k;
1142 }
1143 if (maxSum < sum) {
1144 maxSum = sum;
1145 maxColR = k;
1146 }
1147 }
1148 #else
1149 GLint minVal;
1150 GLint maxVal;
1151 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1152 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1153
1154 /* Scan the channel with max variance for lo & hi
1155 * and use those as the two representative colors.
1156 */
1157 minVal = 2000; /* big enough */
1158 maxVal = -1; /* small enough */
1159 for (k = 0; k < N_TEXELS / 2; k++) {
1160 GLint t = input[k][maxVarL];
1161 if (minVal > t) {
1162 minVal = t;
1163 minColL = k;
1164 }
1165 if (maxVal < t) {
1166 maxVal = t;
1167 maxColL = k;
1168 }
1169 }
1170 minVal = 2000; /* big enough */
1171 maxVal = -1; /* small enough */
1172 for (; k < N_TEXELS; k++) {
1173 GLint t = input[k][maxVarR];
1174 if (minVal > t) {
1175 minVal = t;
1176 minColR = k;
1177 }
1178 if (maxVal < t) {
1179 maxVal = t;
1180 maxColR = k;
1181 }
1182 }
1183 #endif
1184
1185 /* left microtile */
1186 cc[0] = 0;
1187 for (i = 0; i < n_comp; i++) {
1188 vec[0][i] = input[minColL][i];
1189 vec[1][i] = input[maxColL][i];
1190 }
1191 if (minColL != maxColL) {
1192 /* compute interpolation vector */
1193 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1194
1195 /* add in texels */
1196 lolo = 0;
1197 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1198 GLint texel;
1199 /* interpolate color */
1200 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1201 /* add in texel */
1202 lolo <<= 2;
1203 lolo |= texel;
1204 }
1205
1206 /* funky encoding for LSB of green */
1207 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1208 for (i = 0; i < n_comp; i++) {
1209 vec[1][i] = input[minColL][i];
1210 vec[0][i] = input[maxColL][i];
1211 }
1212 lolo = ~lolo;
1213 }
1214
1215 cc[0] = lolo;
1216 }
1217
1218 /* right microtile */
1219 cc[1] = 0;
1220 for (i = 0; i < n_comp; i++) {
1221 vec[2][i] = input[minColR][i];
1222 vec[3][i] = input[maxColR][i];
1223 }
1224 if (minColR != maxColR) {
1225 /* compute interpolation vector */
1226 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1227
1228 /* add in texels */
1229 lohi = 0;
1230 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1231 GLint texel;
1232 /* interpolate color */
1233 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1234 /* add in texel */
1235 lohi <<= 2;
1236 lohi |= texel;
1237 }
1238
1239 /* funky encoding for LSB of green */
1240 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1241 for (i = 0; i < n_comp; i++) {
1242 vec[3][i] = input[minColR][i];
1243 vec[2][i] = input[maxColR][i];
1244 }
1245 lohi = ~lohi;
1246 }
1247
1248 cc[1] = lohi;
1249 }
1250
1251 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1252 for (j = 2 * 2 - 1; j >= 0; j--) {
1253 for (i = 0; i < n_comp; i++) {
1254 /* add in colors */
1255 FX64_SHL(hi, 5);
1256 FX64_OR32(hi, vec[j][i] >> 3);
1257 }
1258 }
1259 ((Fx64 *)cc)[1] = hi;
1260 }
1261
1262
1263 static void
1264 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1265 {
1266 GLint trualpha;
1267 GLubyte reord[N_TEXELS][MAX_COMP];
1268
1269 GLubyte input[N_TEXELS][MAX_COMP];
1270 GLint i, k, l;
1271
1272 if (comps == 3) {
1273 /* make the whole block opaque */
1274 _mesa_memset(input, -1, sizeof(input));
1275 }
1276
1277 /* 8 texels each line */
1278 for (l = 0; l < 4; l++) {
1279 for (k = 0; k < 4; k++) {
1280 for (i = 0; i < comps; i++) {
1281 input[k + l * 4][i] = *lines[l]++;
1282 }
1283 }
1284 for (; k < 8; k++) {
1285 for (i = 0; i < comps; i++) {
1286 input[k + l * 4 + 12][i] = *lines[l]++;
1287 }
1288 }
1289 }
1290
1291 /* block layout:
1292 * 00, 01, 02, 03, 08, 09, 0a, 0b
1293 * 10, 11, 12, 13, 18, 19, 1a, 1b
1294 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1295 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1296 */
1297
1298 /* [dBorca]
1299 * stupidity flows forth from this
1300 */
1301 l = N_TEXELS;
1302 trualpha = 0;
1303 if (comps == 4) {
1304 /* skip all transparent black texels */
1305 l = 0;
1306 for (k = 0; k < N_TEXELS; k++) {
1307 /* test all components against 0 */
1308 if (!ISTBLACK(input[k])) {
1309 /* texel is not transparent black */
1310 COPY_4UBV(reord[l], input[k]);
1311 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1312 /* non-opaque texel */
1313 trualpha = !0;
1314 }
1315 l++;
1316 }
1317 }
1318 }
1319
1320 #if 0
1321 if (trualpha) {
1322 fxt1_quantize_ALPHA0(cc, input, reord, l);
1323 } else if (l == 0) {
1324 cc[0] = cc[1] = cc[2] = -1;
1325 cc[3] = 0;
1326 } else if (l < N_TEXELS) {
1327 fxt1_quantize_HI(cc, input, reord, l);
1328 } else {
1329 fxt1_quantize_CHROMA(cc, input);
1330 }
1331 (void)fxt1_quantize_ALPHA1;
1332 (void)fxt1_quantize_MIXED1;
1333 (void)fxt1_quantize_MIXED0;
1334 #else
1335 if (trualpha) {
1336 fxt1_quantize_ALPHA1(cc, input);
1337 } else if (l == 0) {
1338 cc[0] = cc[1] = cc[2] = ~0u;
1339 cc[3] = 0;
1340 } else if (l < N_TEXELS) {
1341 fxt1_quantize_MIXED1(cc, input);
1342 } else {
1343 fxt1_quantize_MIXED0(cc, input);
1344 }
1345 (void)fxt1_quantize_ALPHA0;
1346 (void)fxt1_quantize_HI;
1347 (void)fxt1_quantize_CHROMA;
1348 #endif
1349 }
1350
1351
1352 static void
1353 fxt1_encode (GLuint width, GLuint height, GLint comps,
1354 const void *source, GLint srcRowStride,
1355 void *dest, GLint destRowStride)
1356 {
1357 GLuint x, y;
1358 const GLubyte *data;
1359 GLuint *encoded = (GLuint *)dest;
1360 void *newSource = NULL;
1361
1362 assert(comps == 3 || comps == 4);
1363
1364 /* Replicate image if width is not M8 or height is not M4 */
1365 if ((width & 7) | (height & 3)) {
1366 GLint newWidth = (width + 7) & ~7;
1367 GLint newHeight = (height + 3) & ~3;
1368 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1369 if (!newSource) {
1370 GET_CURRENT_CONTEXT(ctx);
1371 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1372 goto cleanUp;
1373 }
1374 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1375 comps, (const GLchan *) source,
1376 srcRowStride, (GLchan *) newSource);
1377 source = newSource;
1378 width = newWidth;
1379 height = newHeight;
1380 srcRowStride = comps * newWidth;
1381 }
1382
1383 /* convert from 16/32-bit channels to GLubyte if needed */
1384 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1385 const GLuint n = width * height * comps;
1386 const GLchan *src = (const GLchan *) source;
1387 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1388 GLuint i;
1389 if (!dest) {
1390 GET_CURRENT_CONTEXT(ctx);
1391 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1392 goto cleanUp;
1393 }
1394 for (i = 0; i < n; i++) {
1395 dest[i] = CHAN_TO_UBYTE(src[i]);
1396 }
1397 if (newSource != NULL) {
1398 _mesa_free(newSource);
1399 }
1400 newSource = dest; /* we'll free this buffer before returning */
1401 source = dest; /* the new, GLubyte incoming image */
1402 }
1403
1404 data = (const GLubyte *) source;
1405 destRowStride = (destRowStride - width * 2) / 4;
1406 for (y = 0; y < height; y += 4) {
1407 GLuint offs = 0 + (y + 0) * srcRowStride;
1408 for (x = 0; x < width; x += 8) {
1409 const GLubyte *lines[4];
1410 lines[0] = &data[offs];
1411 lines[1] = lines[0] + srcRowStride;
1412 lines[2] = lines[1] + srcRowStride;
1413 lines[3] = lines[2] + srcRowStride;
1414 offs += 8 * comps;
1415 fxt1_quantize(encoded, lines, comps);
1416 /* 128 bits per 8x4 block */
1417 encoded += 4;
1418 }
1419 encoded += destRowStride;
1420 }
1421
1422 cleanUp:
1423 if (newSource != NULL) {
1424 _mesa_free(newSource);
1425 }
1426 }
1427
1428
1429 /***************************************************************************\
1430 * FXT1 decoder
1431 *
1432 * The decoder is based on GL_3DFX_texture_compression_FXT1
1433 * specification and serves as a concept for the encoder.
1434 \***************************************************************************/
1435
1436
1437 /* lookup table for scaling 5 bit colors up to 8 bits */
1438 static const GLubyte _rgb_scale_5[] = {
1439 0, 8, 16, 25, 33, 41, 49, 58,
1440 66, 74, 82, 90, 99, 107, 115, 123,
1441 132, 140, 148, 156, 165, 173, 181, 189,
1442 197, 206, 214, 222, 230, 239, 247, 255
1443 };
1444
1445 /* lookup table for scaling 6 bit colors up to 8 bits */
1446 static const GLubyte _rgb_scale_6[] = {
1447 0, 4, 8, 12, 16, 20, 24, 28,
1448 32, 36, 40, 45, 49, 53, 57, 61,
1449 65, 69, 73, 77, 81, 85, 89, 93,
1450 97, 101, 105, 109, 113, 117, 121, 125,
1451 130, 134, 138, 142, 146, 150, 154, 158,
1452 162, 166, 170, 174, 178, 182, 186, 190,
1453 194, 198, 202, 206, 210, 215, 219, 223,
1454 227, 231, 235, 239, 243, 247, 251, 255
1455 };
1456
1457
1458 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1459 #define UP5(c) _rgb_scale_5[(c) & 31]
1460 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1461 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1462
1463
1464 static void
1465 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1466 {
1467 const GLuint *cc;
1468
1469 t *= 3;
1470 cc = (const GLuint *)(code + t / 8);
1471 t = (cc[0] >> (t & 7)) & 7;
1472
1473 if (t == 7) {
1474 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1475 } else {
1476 GLubyte r, g, b;
1477 cc = (const GLuint *)(code + 12);
1478 if (t == 0) {
1479 b = UP5(CC_SEL(cc, 0));
1480 g = UP5(CC_SEL(cc, 5));
1481 r = UP5(CC_SEL(cc, 10));
1482 } else if (t == 6) {
1483 b = UP5(CC_SEL(cc, 15));
1484 g = UP5(CC_SEL(cc, 20));
1485 r = UP5(CC_SEL(cc, 25));
1486 } else {
1487 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1488 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1489 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1490 }
1491 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1492 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1493 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1494 rgba[ACOMP] = CHAN_MAX;
1495 }
1496 }
1497
1498
1499 static void
1500 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1501 {
1502 const GLuint *cc;
1503 GLuint kk;
1504
1505 cc = (const GLuint *)code;
1506 if (t & 16) {
1507 cc++;
1508 t &= 15;
1509 }
1510 t = (cc[0] >> (t * 2)) & 3;
1511
1512 t *= 15;
1513 cc = (const GLuint *)(code + 8 + t / 8);
1514 kk = cc[0] >> (t & 7);
1515 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1516 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1517 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1518 rgba[ACOMP] = CHAN_MAX;
1519 }
1520
1521
1522 static void
1523 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1524 {
1525 const GLuint *cc;
1526 GLuint col[2][3];
1527 GLint glsb, selb;
1528
1529 cc = (const GLuint *)code;
1530 if (t & 16) {
1531 t &= 15;
1532 t = (cc[1] >> (t * 2)) & 3;
1533 /* col 2 */
1534 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1535 col[0][GCOMP] = CC_SEL(cc, 99);
1536 col[0][RCOMP] = CC_SEL(cc, 104);
1537 /* col 3 */
1538 col[1][BCOMP] = CC_SEL(cc, 109);
1539 col[1][GCOMP] = CC_SEL(cc, 114);
1540 col[1][RCOMP] = CC_SEL(cc, 119);
1541 glsb = CC_SEL(cc, 126);
1542 selb = CC_SEL(cc, 33);
1543 } else {
1544 t = (cc[0] >> (t * 2)) & 3;
1545 /* col 0 */
1546 col[0][BCOMP] = CC_SEL(cc, 64);
1547 col[0][GCOMP] = CC_SEL(cc, 69);
1548 col[0][RCOMP] = CC_SEL(cc, 74);
1549 /* col 1 */
1550 col[1][BCOMP] = CC_SEL(cc, 79);
1551 col[1][GCOMP] = CC_SEL(cc, 84);
1552 col[1][RCOMP] = CC_SEL(cc, 89);
1553 glsb = CC_SEL(cc, 125);
1554 selb = CC_SEL(cc, 1);
1555 }
1556
1557 if (CC_SEL(cc, 124) & 1) {
1558 /* alpha[0] == 1 */
1559
1560 if (t == 3) {
1561 /* zero */
1562 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1563 } else {
1564 GLubyte r, g, b;
1565 if (t == 0) {
1566 b = UP5(col[0][BCOMP]);
1567 g = UP5(col[0][GCOMP]);
1568 r = UP5(col[0][RCOMP]);
1569 } else if (t == 2) {
1570 b = UP5(col[1][BCOMP]);
1571 g = UP6(col[1][GCOMP], glsb);
1572 r = UP5(col[1][RCOMP]);
1573 } else {
1574 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1575 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1576 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1577 }
1578 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1579 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1580 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1581 rgba[ACOMP] = CHAN_MAX;
1582 }
1583 } else {
1584 /* alpha[0] == 0 */
1585 GLubyte r, g, b;
1586 if (t == 0) {
1587 b = UP5(col[0][BCOMP]);
1588 g = UP6(col[0][GCOMP], glsb ^ selb);
1589 r = UP5(col[0][RCOMP]);
1590 } else if (t == 3) {
1591 b = UP5(col[1][BCOMP]);
1592 g = UP6(col[1][GCOMP], glsb);
1593 r = UP5(col[1][RCOMP]);
1594 } else {
1595 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1596 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1597 UP6(col[1][GCOMP], glsb));
1598 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1599 }
1600 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1601 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1602 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1603 rgba[ACOMP] = CHAN_MAX;
1604 }
1605 }
1606
1607
1608 static void
1609 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1610 {
1611 const GLuint *cc;
1612 GLubyte r, g, b, a;
1613
1614 cc = (const GLuint *)code;
1615 if (CC_SEL(cc, 124) & 1) {
1616 /* lerp == 1 */
1617 GLuint col0[4];
1618
1619 if (t & 16) {
1620 t &= 15;
1621 t = (cc[1] >> (t * 2)) & 3;
1622 /* col 2 */
1623 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1624 col0[GCOMP] = CC_SEL(cc, 99);
1625 col0[RCOMP] = CC_SEL(cc, 104);
1626 col0[ACOMP] = CC_SEL(cc, 119);
1627 } else {
1628 t = (cc[0] >> (t * 2)) & 3;
1629 /* col 0 */
1630 col0[BCOMP] = CC_SEL(cc, 64);
1631 col0[GCOMP] = CC_SEL(cc, 69);
1632 col0[RCOMP] = CC_SEL(cc, 74);
1633 col0[ACOMP] = CC_SEL(cc, 109);
1634 }
1635
1636 if (t == 0) {
1637 b = UP5(col0[BCOMP]);
1638 g = UP5(col0[GCOMP]);
1639 r = UP5(col0[RCOMP]);
1640 a = UP5(col0[ACOMP]);
1641 } else if (t == 3) {
1642 b = UP5(CC_SEL(cc, 79));
1643 g = UP5(CC_SEL(cc, 84));
1644 r = UP5(CC_SEL(cc, 89));
1645 a = UP5(CC_SEL(cc, 114));
1646 } else {
1647 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1648 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1649 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1650 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1651 }
1652 } else {
1653 /* lerp == 0 */
1654
1655 if (t & 16) {
1656 cc++;
1657 t &= 15;
1658 }
1659 t = (cc[0] >> (t * 2)) & 3;
1660
1661 if (t == 3) {
1662 /* zero */
1663 r = g = b = a = 0;
1664 } else {
1665 GLuint kk;
1666 cc = (const GLuint *)code;
1667 a = UP5(cc[3] >> (t * 5 + 13));
1668 t *= 15;
1669 cc = (const GLuint *)(code + 8 + t / 8);
1670 kk = cc[0] >> (t & 7);
1671 b = UP5(kk);
1672 g = UP5(kk >> 5);
1673 r = UP5(kk >> 10);
1674 }
1675 }
1676 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1677 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1678 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1679 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1680 }
1681
1682
1683 void
1684 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1685 GLint i, GLint j, GLchan *rgba)
1686 {
1687 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1688 fxt1_decode_1HI, /* cc-high = "00?" */
1689 fxt1_decode_1HI, /* cc-high = "00?" */
1690 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1691 fxt1_decode_1ALPHA, /* alpha = "011" */
1692 fxt1_decode_1MIXED, /* mixed = "1??" */
1693 fxt1_decode_1MIXED, /* mixed = "1??" */
1694 fxt1_decode_1MIXED, /* mixed = "1??" */
1695 fxt1_decode_1MIXED /* mixed = "1??" */
1696 };
1697
1698 const GLubyte *code = (const GLubyte *)texture +
1699 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1700 GLint mode = CC_SEL(code, 125);
1701 GLint t = i & 7;
1702
1703 if (t & 4) {
1704 t += 12;
1705 }
1706 t += (j & 3) * 4;
1707
1708 decode_1[mode](code, t, rgba);
1709 }