603f434f209e483774ee3bddcdcfbc505aa90a63
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.1
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "texcompress.h"
39 #include "texformat.h"
40 #include "texstore.h"
41
42
43 int
44 fxt1_encode (GLcontext *ctx,
45 unsigned int width, unsigned int height,
46 int srcFormat,
47 const void *source, int srcRowStride,
48 void *dest, int destRowStride);
49 void
50 fxt1_decode_1 (const void *texture, int width,
51 int i, int j, unsigned char *rgba);
52
53
54 /**
55 * Called during context initialization.
56 */
57 void
58 _mesa_init_texture_fxt1( GLcontext *ctx )
59 {
60 }
61
62
63 /**
64 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
65 */
66 static GLboolean
67 texstore_rgb_fxt1(STORE_PARAMS)
68 {
69 const GLchan *pixels;
70 GLint srcRowStride;
71 GLubyte *dst;
72 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
73 const GLchan *tempImage = NULL;
74
75 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
76 ASSERT(dstXoffset % 8 == 0);
77 ASSERT(dstYoffset % 4 == 0);
78 ASSERT(dstZoffset == 0);
79
80 if (srcFormat != GL_RGB ||
81 srcType != CHAN_TYPE ||
82 ctx->_ImageTransferState ||
83 srcPacking->SwapBytes) {
84 /* convert image to RGB/GLchan */
85 tempImage = _mesa_make_temp_chan_image(ctx, dims,
86 baseInternalFormat,
87 dstFormat->BaseFormat,
88 srcWidth, srcHeight, srcDepth,
89 srcFormat, srcType, srcAddr,
90 srcPacking);
91 if (!tempImage)
92 return GL_FALSE; /* out of memory */
93 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
94 pixels = tempImage;
95 srcRowStride = 3 * srcWidth;
96 srcFormat = GL_RGB;
97 }
98 else {
99 pixels = (const GLchan *) srcAddr;
100 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
101 srcType) / sizeof(GLchan);
102 }
103
104 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
105 GL_COMPRESSED_RGB_FXT1_3DFX,
106 texWidth, (GLubyte *) dstAddr);
107
108 fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
109 dst, dstRowStride);
110
111 if (tempImage)
112 _mesa_free((void*) tempImage);
113
114 return GL_TRUE;
115 }
116
117
118 /**
119 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
120 */
121 static GLboolean
122 texstore_rgba_fxt1(STORE_PARAMS)
123 {
124 const GLchan *pixels;
125 GLint srcRowStride;
126 GLubyte *dst;
127 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
128 const GLchan *tempImage = NULL;
129
130 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
131 ASSERT(dstXoffset % 8 == 0);
132 ASSERT(dstYoffset % 4 == 0);
133 ASSERT(dstZoffset == 0);
134
135 if (srcFormat != GL_RGBA ||
136 srcType != CHAN_TYPE ||
137 ctx->_ImageTransferState ||
138 srcPacking->SwapBytes) {
139 /* convert image to RGBA/GLchan */
140 tempImage = _mesa_make_temp_chan_image(ctx, dims,
141 baseInternalFormat,
142 dstFormat->BaseFormat,
143 srcWidth, srcHeight, srcDepth,
144 srcFormat, srcType, srcAddr,
145 srcPacking);
146 if (!tempImage)
147 return GL_FALSE; /* out of memory */
148 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
149 pixels = tempImage;
150 srcRowStride = 4 * srcWidth;
151 srcFormat = GL_RGBA;
152 }
153 else {
154 pixels = (const GLchan *) srcAddr;
155 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
156 srcType) / sizeof(GLchan);
157 }
158
159 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
160 GL_COMPRESSED_RGBA_FXT1_3DFX,
161 texWidth, (GLubyte *) dstAddr);
162
163 fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
164 dst, dstRowStride);
165
166 if (tempImage)
167 _mesa_free((void*) tempImage);
168
169 return GL_TRUE;
170 }
171
172
173 static void
174 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
175 GLint i, GLint j, GLint k, GLchan *texel )
176 {
177 fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel);
178 }
179
180
181 static void
182 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
183 GLint i, GLint j, GLint k, GLfloat *texel )
184 {
185 /* just sample as GLchan and convert to float here */
186 GLchan rgba[4];
187 fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba);
188 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
189 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
190 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
191 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
192 }
193
194
195 static void
196 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
197 GLint i, GLint j, GLint k, GLchan *texel )
198 {
199 fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel);
200 texel[ACOMP] = 255;
201 }
202
203
204 static void
205 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
206 GLint i, GLint j, GLint k, GLfloat *texel )
207 {
208 /* just sample as GLchan and convert to float here */
209 GLchan rgba[4];
210 fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba);
211 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
212 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
213 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
214 texel[ACOMP] = 1.0;
215 }
216
217
218
219 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
220 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
221 GL_RGB, /* BaseFormat */
222 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
223 4, /*approx*/ /* RedBits */
224 4, /*approx*/ /* GreenBits */
225 4, /*approx*/ /* BlueBits */
226 0, /* AlphaBits */
227 0, /* LuminanceBits */
228 0, /* IntensityBits */
229 0, /* IndexBits */
230 0, /* DepthBits */
231 0, /* TexelBytes */
232 texstore_rgb_fxt1, /* StoreTexImageFunc */
233 NULL, /*impossible*/ /* FetchTexel1D */
234 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
235 NULL, /*impossible*/ /* FetchTexel3D */
236 NULL, /*impossible*/ /* FetchTexel1Df */
237 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
238 NULL, /*impossible*/ /* FetchTexel3Df */
239 };
240
241 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
242 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
243 GL_RGBA, /* BaseFormat */
244 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
245 4, /*approx*/ /* RedBits */
246 4, /*approx*/ /* GreenBits */
247 4, /*approx*/ /* BlueBits */
248 1, /*approx*/ /* AlphaBits */
249 0, /* LuminanceBits */
250 0, /* IntensityBits */
251 0, /* IndexBits */
252 0, /* DepthBits */
253 0, /* TexelBytes */
254 texstore_rgba_fxt1, /* StoreTexImageFunc */
255 NULL, /*impossible*/ /* FetchTexel1D */
256 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
257 NULL, /*impossible*/ /* FetchTexel3D */
258 NULL, /*impossible*/ /* FetchTexel1Df */
259 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
260 NULL, /*impossible*/ /* FetchTexel3Df */
261 };
262
263
264 /***************************************************************************\
265 * FXT1 encoder
266 *
267 * The encoder was built by reversing the decoder,
268 * and is vaguely based on Texus2 by 3dfx. Note that this code
269 * is merely a proof of concept, since it is higly UNoptimized;
270 * moreover, it is sub-optimal due to inital conditions passed
271 * to Lloyd's algorithm (the interpolation modes are worse).
272 \***************************************************************************/
273
274
275 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
276 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
277 #define N_TEXELS 32 /* number of texels in a block (always 32) */
278 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
279 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
280 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
281 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
282 #define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
283
284
285 #ifdef __GNUC__
286
287 #define FX64_NATIVE 1
288
289 typedef unsigned long long Fx64;
290
291 #define FX64_MOV32(a, b) a = b;
292 #define FX64_OR32(a, b) a |= b;
293 #define FX64_SHL(a, c) a <<= c;
294
295 #else /* !__GNUC__ */
296
297 #define FX64_NATIVE 0
298
299 typedef struct {
300 unsigned long lo, hi;
301 } Fx64;
302
303 #define FX64_MOV32(a, b) a.lo = b
304 #define FX64_OR32(a, b) a.lo |= b
305
306 #define FX64_SHL(a, c) \
307 do { \
308 if ((c) >= 32) { \
309 a.hi = a.lo << ((c) - 32); \
310 a.lo = 0; \
311 } else { \
312 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
313 a.lo <<= (c); \
314 } \
315 } while (0)
316
317 #endif /* !__GNUC__ */
318
319
320 static int
321 fxt1_bestcol (float vec[][MAX_COMP], int nv,
322 unsigned char input[MAX_COMP], int nc)
323 {
324 int i, j, best = -1;
325 float err = 1e9; /* big enough */
326
327 for (j = 0; j < nv; j++) {
328 float e = 0;
329 for (i = 0; i < nc; i++) {
330 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
331 }
332 if (e < err) {
333 err = e;
334 best = j;
335 }
336 }
337
338 return best;
339 }
340
341
342 static int
343 fxt1_worst (float vec[MAX_COMP],
344 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
345 {
346 int i, k, worst = -1;
347 float err = -1; /* small enough */
348
349 for (k = 0; k < n; k++) {
350 float e = 0;
351 for (i = 0; i < nc; i++) {
352 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
353 }
354 if (e > err) {
355 err = e;
356 worst = k;
357 }
358 }
359
360 return worst;
361 }
362
363
364 static int
365 fxt1_variance (double variance[MAX_COMP],
366 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
367 {
368 int i, k, best = 0;
369 int sx, sx2;
370 double var, maxvar = -1; /* small enough */
371 double teenth = 1.0 / n;
372
373 for (i = 0; i < nc; i++) {
374 sx = sx2 = 0;
375 for (k = 0; k < n; k++) {
376 int t = input[k][i];
377 sx += t;
378 sx2 += t * t;
379 }
380 var = sx2 * teenth - sx * sx * teenth * teenth;
381 if (maxvar < var) {
382 maxvar = var;
383 best = i;
384 }
385 if (variance) {
386 variance[i] = var;
387 }
388 }
389
390 return best;
391 }
392
393
394 static int
395 fxt1_choose (float vec[][MAX_COMP], int nv,
396 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
397 {
398 #if 0
399 /* Choose colors from a grid.
400 */
401 int i, j;
402
403 for (j = 0; j < nv; j++) {
404 int m = j * (n - 1) / (nv - 1);
405 for (i = 0; i < nc; i++) {
406 vec[j][i] = input[m][i];
407 }
408 }
409 #else
410 /* Our solution here is to find the darkest and brightest colors in
411 * the 8x4 tile and use those as the two representative colors.
412 * There are probably better algorithms to use (histogram-based).
413 */
414 int i, j, k;
415 int minSum = 1000; /* big enough */
416 int maxSum = -1; /* small enough */
417 int minCol = 0; /* phoudoin: silent compiler! */
418 int maxCol = 0; /* phoudoin: silent compiler! */
419
420 struct {
421 int flag;
422 int key;
423 int freq;
424 int idx;
425 } hist[N_TEXELS];
426 int lenh = 0;
427
428 memset(hist, 0, sizeof(hist));
429
430 for (k = 0; k < n; k++) {
431 int l;
432 int key = 0;
433 int sum = 0;
434 for (i = 0; i < nc; i++) {
435 key <<= 8;
436 key |= input[k][i];
437 sum += input[k][i];
438 }
439 for (l = 0; l < n; l++) {
440 if (!hist[l].flag) {
441 /* alloc new slot */
442 hist[l].flag = !0;
443 hist[l].key = key;
444 hist[l].freq = 1;
445 hist[l].idx = k;
446 lenh = l + 1;
447 break;
448 } else if (hist[l].key == key) {
449 hist[l].freq++;
450 break;
451 }
452 }
453 if (minSum > sum) {
454 minSum = sum;
455 minCol = k;
456 }
457 if (maxSum < sum) {
458 maxSum = sum;
459 maxCol = k;
460 }
461 }
462
463 if (lenh <= nv) {
464 for (j = 0; j < lenh; j++) {
465 for (i = 0; i < nc; i++) {
466 vec[j][i] = (float)input[hist[j].idx][i];
467 }
468 }
469 for (; j < nv; j++) {
470 for (i = 0; i < nc; i++) {
471 vec[j][i] = vec[0][i];
472 }
473 }
474 return 0;
475 }
476
477 for (j = 0; j < nv; j++) {
478 for (i = 0; i < nc; i++) {
479 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
480 }
481 }
482 #endif
483
484 return !0;
485 }
486
487
488 static int
489 fxt1_lloyd (float vec[][MAX_COMP], int nv,
490 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
491 {
492 /* Use the generalized lloyd's algorithm for VQ:
493 * find 4 color vectors.
494 *
495 * for each sample color
496 * sort to nearest vector.
497 *
498 * replace each vector with the centroid of it's matching colors.
499 *
500 * repeat until RMS doesn't improve.
501 *
502 * if a color vector has no samples, or becomes the same as another
503 * vector, replace it with the color which is farthest from a sample.
504 *
505 * vec[][MAX_COMP] initial vectors and resulting colors
506 * nv number of resulting colors required
507 * input[N_TEXELS][MAX_COMP] input texels
508 * nc number of components in input / vec
509 * n number of input samples
510 */
511
512 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
513 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
514 float error, lasterror = 1e9;
515
516 int i, j, k, rep;
517
518 /* the quantizer */
519 for (rep = 0; rep < LL_N_REP; rep++) {
520 /* reset sums & counters */
521 for (j = 0; j < nv; j++) {
522 for (i = 0; i < nc; i++) {
523 sum[j][i] = 0;
524 }
525 cnt[j] = 0;
526 }
527 error = 0;
528
529 /* scan whole block */
530 for (k = 0; k < n; k++) {
531 #if 1
532 int best = -1;
533 float err = 1e9; /* big enough */
534 /* determine best vector */
535 for (j = 0; j < nv; j++) {
536 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
537 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
538 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
539 if (nc == 4) {
540 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
541 }
542 if (e < err) {
543 err = e;
544 best = j;
545 }
546 }
547 #else
548 int best = fxt1_bestcol(vec, n_vect, input[k], n_comp, &err);
549 #endif
550 /* add in closest color */
551 for (i = 0; i < nc; i++) {
552 sum[best][i] += input[k][i];
553 }
554 /* mark this vector as used */
555 cnt[best]++;
556 /* accumulate error */
557 error += err;
558 }
559
560 /* check RMS */
561 if ((error < LL_RMS_E) ||
562 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
563 return !0; /* good match */
564 }
565 lasterror = error;
566
567 /* move each vector to the barycenter of its closest colors */
568 for (j = 0; j < nv; j++) {
569 if (cnt[j]) {
570 float div = 1.0 / cnt[j];
571 for (i = 0; i < nc; i++) {
572 vec[j][i] = div * sum[j][i];
573 }
574 } else {
575 /* this vec has no samples or is identical with a previous vec */
576 int worst = fxt1_worst(vec[j], input, nc, n);
577 for (i = 0; i < nc; i++) {
578 vec[j][i] = input[worst][i];
579 }
580 }
581 }
582 }
583
584 return 0; /* could not converge fast enough */
585 }
586
587
588 static void
589 fxt1_quantize_CHROMA (unsigned long *cc,
590 unsigned char input[N_TEXELS][MAX_COMP])
591 {
592 const int n_vect = 4; /* 4 base vectors to find */
593 const int n_comp = 3; /* 3 components: R, G, B */
594 float vec[MAX_VECT][MAX_COMP];
595 int i, j, k;
596 Fx64 hi; /* high quadword */
597 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
598
599 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
600 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
601 }
602
603 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
604 for (j = n_vect - 1; j >= 0; j--) {
605 for (i = 0; i < n_comp; i++) {
606 /* add in colors */
607 FX64_SHL(hi, 5);
608 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
609 }
610 }
611 ((Fx64 *)cc)[1] = hi;
612
613 lohi = lolo = 0;
614 /* right microtile */
615 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
616 lohi <<= 2;
617 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
618 }
619 /* left microtile */
620 for (; k >= 0; k--) {
621 lolo <<= 2;
622 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
623 }
624 cc[1] = lohi;
625 cc[0] = lolo;
626 }
627
628
629 static void
630 fxt1_quantize_ALPHA0 (unsigned long *cc,
631 unsigned char input[N_TEXELS][MAX_COMP],
632 unsigned char reord[N_TEXELS][MAX_COMP], int n)
633 {
634 const int n_vect = 3; /* 3 base vectors to find */
635 const int n_comp = 4; /* 4 components: R, G, B, A */
636 float vec[MAX_VECT][MAX_COMP];
637 int i, j, k;
638 Fx64 hi; /* high quadword */
639 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
640
641 /* the last vector indicates zero */
642 for (i = 0; i < n_comp; i++) {
643 vec[n_vect][i] = 0;
644 }
645
646 /* the first n texels in reord are guaranteed to be non-zero */
647 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
648 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
649 }
650
651 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
652 for (j = n_vect - 1; j >= 0; j--) {
653 /* add in alphas */
654 FX64_SHL(hi, 5);
655 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
656 }
657 for (j = n_vect - 1; j >= 0; j--) {
658 for (i = 0; i < n_comp - 1; i++) {
659 /* add in colors */
660 FX64_SHL(hi, 5);
661 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
662 }
663 }
664 ((Fx64 *)cc)[1] = hi;
665
666 lohi = lolo = 0;
667 /* right microtile */
668 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
669 lohi <<= 2;
670 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
671 }
672 /* left microtile */
673 for (; k >= 0; k--) {
674 lolo <<= 2;
675 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
676 }
677 cc[1] = lohi;
678 cc[0] = lolo;
679 }
680
681
682 static void
683 fxt1_quantize_ALPHA1 (unsigned long *cc,
684 unsigned char input[N_TEXELS][MAX_COMP])
685 {
686 const int n_vect = 3; /* highest vector number in each microtile */
687 const int n_comp = 4; /* 4 components: R, G, B, A */
688 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
689 float b, iv[MAX_COMP]; /* interpolation vector */
690 int i, j, k;
691 Fx64 hi; /* high quadword */
692 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
693
694 int minSum;
695 int maxSum;
696 int minColL = 0, maxColL = 0;
697 int minColR = 0, maxColR = 0;
698 int sumL = 0, sumR = 0;
699
700 /* Our solution here is to find the darkest and brightest colors in
701 * the 4x4 tile and use those as the two representative colors.
702 * There are probably better algorithms to use (histogram-based).
703 */
704 minSum = 1000; /* big enough */
705 maxSum = -1; /* small enough */
706 for (k = 0; k < N_TEXELS / 2; k++) {
707 int sum = 0;
708 for (i = 0; i < n_comp; i++) {
709 sum += input[k][i];
710 }
711 if (minSum > sum) {
712 minSum = sum;
713 minColL = k;
714 }
715 if (maxSum < sum) {
716 maxSum = sum;
717 maxColL = k;
718 }
719 sumL += sum;
720 }
721 minSum = 1000; /* big enough */
722 maxSum = -1; /* small enough */
723 for (; k < N_TEXELS; k++) {
724 int sum = 0;
725 for (i = 0; i < n_comp; i++) {
726 sum += input[k][i];
727 }
728 if (minSum > sum) {
729 minSum = sum;
730 minColR = k;
731 }
732 if (maxSum < sum) {
733 maxSum = sum;
734 maxColR = k;
735 }
736 sumR += sum;
737 }
738
739 /* choose the common vector (yuck!) */
740 {
741 int j1, j2;
742 int v1 = 0, v2 = 0;
743 float err = 1e9; /* big enough */
744 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
745 for (i = 0; i < n_comp; i++) {
746 tv[0][i] = input[minColL][i];
747 tv[1][i] = input[maxColL][i];
748 tv[2][i] = input[minColR][i];
749 tv[3][i] = input[maxColR][i];
750 }
751 for (j1 = 0; j1 < 2; j1++) {
752 for (j2 = 2; j2 < 4; j2++) {
753 float e = 0;
754 for (i = 0; i < n_comp; i++) {
755 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
756 }
757 if (e < err) {
758 err = e;
759 v1 = j1;
760 v2 = j2;
761 }
762 }
763 }
764 for (i = 0; i < n_comp; i++) {
765 vec[0][i] = tv[1 - v1][i];
766 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
767 vec[2][i] = tv[5 - v2][i];
768 }
769 }
770
771 /* left microtile */
772 cc[0] = 0;
773 if (minColL != maxColL) {
774 /* compute interpolation vector */
775 float d2 = 0;
776 float rd2;
777
778 for (i = 0; i < n_comp; i++) {
779 iv[i] = vec[1][i] - vec[0][i];
780 d2 += iv[i] * iv[i];
781 }
782 rd2 = (float)n_vect / d2;
783 b = 0;
784 for (i = 0; i < n_comp; i++) {
785 b -= iv[i] * vec[0][i];
786 iv[i] *= rd2;
787 }
788 b = b * rd2 + 0.5f;
789
790 /* add in texels */
791 lolo = 0;
792 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
793 int texel;
794 /* interpolate color */
795 float dot = 0;
796 for (i = 0; i < n_comp; i++) {
797 dot += input[k][i] * iv[i];
798 }
799 texel = (int)(dot + b);
800 if (texel < 0) {
801 texel = 0;
802 } else if (texel > n_vect) {
803 texel = n_vect;
804 }
805 /* add in texel */
806 lolo <<= 2;
807 lolo |= texel;
808 }
809
810 cc[0] = lolo;
811 }
812
813 /* right microtile */
814 cc[1] = 0;
815 if (minColR != maxColR) {
816 /* compute interpolation vector */
817 float d2 = 0;
818 float rd2;
819
820 for (i = 0; i < n_comp; i++) {
821 iv[i] = vec[1][i] - vec[2][i];
822 d2 += iv[i] * iv[i];
823 }
824 rd2 = (float)n_vect / d2;
825 b = 0;
826 for (i = 0; i < n_comp; i++) {
827 b -= iv[i] * vec[2][i];
828 iv[i] *= rd2;
829 }
830 b = b * rd2 + 0.5f;
831
832 /* add in texels */
833 lohi = 0;
834 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
835 int texel;
836 /* interpolate color */
837 float dot = 0;
838 for (i = 0; i < n_comp; i++) {
839 dot += input[k][i] * iv[i];
840 }
841 texel = (int)(dot + b);
842 if (texel < 0) {
843 texel = 0;
844 } else if (texel > n_vect) {
845 texel = n_vect;
846 }
847 /* add in texel */
848 lohi <<= 2;
849 lohi |= texel;
850 }
851
852 cc[1] = lohi;
853 }
854
855 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
856 for (j = n_vect - 1; j >= 0; j--) {
857 /* add in alphas */
858 FX64_SHL(hi, 5);
859 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
860 }
861 for (j = n_vect - 1; j >= 0; j--) {
862 for (i = 0; i < n_comp - 1; i++) {
863 /* add in colors */
864 FX64_SHL(hi, 5);
865 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
866 }
867 }
868 ((Fx64 *)cc)[1] = hi;
869 }
870
871
872 static void
873 fxt1_quantize_HI (unsigned long *cc,
874 unsigned char input[N_TEXELS][MAX_COMP],
875 unsigned char reord[N_TEXELS][MAX_COMP], int n)
876 {
877 const int n_vect = 6; /* highest vector number */
878 const int n_comp = 3; /* 3 components: R, G, B */
879 float b = 0.0; /* phoudoin: silent compiler! */
880 float iv[MAX_COMP]; /* interpolation vector */
881 int i, k;
882 unsigned long hihi; /* high quadword: hi dword */
883
884 int minSum = 1000; /* big enough */
885 int maxSum = -1; /* small enough */
886 int minCol = 0; /* phoudoin: silent compiler! */
887 int maxCol = 0; /* phoudoin: silent compiler! */
888
889 /* Our solution here is to find the darkest and brightest colors in
890 * the 8x4 tile and use those as the two representative colors.
891 * There are probably better algorithms to use (histogram-based).
892 */
893 for (k = 0; k < n; k++) {
894 int sum = 0;
895 for (i = 0; i < n_comp; i++) {
896 sum += reord[k][i];
897 }
898 if (minSum > sum) {
899 minSum = sum;
900 minCol = k;
901 }
902 if (maxSum < sum) {
903 maxSum = sum;
904 maxCol = k;
905 }
906 }
907
908 hihi = 0; /* cc-hi = "00" */
909 for (i = 0; i < n_comp; i++) {
910 /* add in colors */
911 hihi <<= 5;
912 hihi |= reord[maxCol][i] >> 3;
913 }
914 for (i = 0; i < n_comp; i++) {
915 /* add in colors */
916 hihi <<= 5;
917 hihi |= reord[minCol][i] >> 3;
918 }
919 cc[3] = hihi;
920 cc[0] = cc[1] = cc[2] = 0;
921
922 /* compute interpolation vector */
923 if (minCol != maxCol) {
924 float d2 = 0;
925 float rd2;
926
927 for (i = 0; i < n_comp; i++) {
928 iv[i] = reord[maxCol][i] - reord[minCol][i];
929 d2 += iv[i] * iv[i];
930 }
931 rd2 = (float)n_vect / d2;
932 b = 0;
933 for (i = 0; i < n_comp; i++) {
934 b -= iv[i] * reord[minCol][i];
935 iv[i] *= rd2;
936 }
937 b = b * rd2 + 0.5f;
938 }
939
940 /* add in texels */
941 for (k = N_TEXELS - 1; k >= 0; k--) {
942 int t = k * 3;
943 unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
944 int texel = n_vect + 1; /* transparent black */
945
946 if (!ISTBLACK(input[k])) {
947 if (minCol != maxCol) {
948 /* interpolate color */
949 float dot = 0;
950 for (i = 0; i < n_comp; i++) {
951 dot += input[k][i] * iv[i];
952 }
953 texel = (int)(dot + b);
954 if (texel < 0) {
955 texel = 0;
956 } else if (texel > n_vect) {
957 texel = n_vect;
958 }
959 /* add in texel */
960 kk[0] |= texel << (t & 7);
961 }
962 } else {
963 /* add in texel */
964 kk[0] |= texel << (t & 7);
965 }
966 }
967 }
968
969
970 static void
971 fxt1_quantize_MIXED1 (unsigned long *cc,
972 unsigned char input[N_TEXELS][MAX_COMP])
973 {
974 const int n_vect = 2; /* highest vector number in each microtile */
975 const int n_comp = 3; /* 3 components: R, G, B */
976 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
977 float b, iv[MAX_COMP]; /* interpolation vector */
978 int i, j, k;
979 Fx64 hi; /* high quadword */
980 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
981
982 int minSum;
983 int maxSum;
984 int minColL = 0, maxColL = -1;
985 int minColR = 0, maxColR = -1;
986
987 /* Our solution here is to find the darkest and brightest colors in
988 * the 4x4 tile and use those as the two representative colors.
989 * There are probably better algorithms to use (histogram-based).
990 */
991 minSum = 1000; /* big enough */
992 maxSum = -1; /* small enough */
993 for (k = 0; k < N_TEXELS / 2; k++) {
994 if (!ISTBLACK(input[k])) {
995 int sum = 0;
996 for (i = 0; i < n_comp; i++) {
997 sum += input[k][i];
998 }
999 if (minSum > sum) {
1000 minSum = sum;
1001 minColL = k;
1002 }
1003 if (maxSum < sum) {
1004 maxSum = sum;
1005 maxColL = k;
1006 }
1007 }
1008 }
1009 minSum = 1000; /* big enough */
1010 maxSum = -1; /* small enough */
1011 for (; k < N_TEXELS; k++) {
1012 if (!ISTBLACK(input[k])) {
1013 int sum = 0;
1014 for (i = 0; i < n_comp; i++) {
1015 sum += input[k][i];
1016 }
1017 if (minSum > sum) {
1018 minSum = sum;
1019 minColR = k;
1020 }
1021 if (maxSum < sum) {
1022 maxSum = sum;
1023 maxColR = k;
1024 }
1025 }
1026 }
1027
1028 /* left microtile */
1029 if (maxColL == -1) {
1030 /* all transparent black */
1031 cc[0] = -1;
1032 for (i = 0; i < n_comp; i++) {
1033 vec[0][i] = 0;
1034 vec[1][i] = 0;
1035 }
1036 } else {
1037 cc[0] = 0;
1038 for (i = 0; i < n_comp; i++) {
1039 vec[0][i] = input[minColL][i];
1040 vec[1][i] = input[maxColL][i];
1041 }
1042 if (minColL != maxColL) {
1043 /* compute interpolation vector */
1044 float d2 = 0;
1045 float rd2;
1046
1047 for (i = 0; i < n_comp; i++) {
1048 iv[i] = vec[1][i] - vec[0][i];
1049 d2 += iv[i] * iv[i];
1050 }
1051 rd2 = (float)n_vect / d2;
1052 b = 0;
1053 for (i = 0; i < n_comp; i++) {
1054 b -= iv[i] * vec[0][i];
1055 iv[i] *= rd2;
1056 }
1057 b = b * rd2 + 0.5f;
1058
1059 /* add in texels */
1060 lolo = 0;
1061 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1062 int texel = n_vect + 1; /* transparent black */
1063 if (!ISTBLACK(input[k])) {
1064 /* interpolate color */
1065 float dot = 0;
1066 for (i = 0; i < n_comp; i++) {
1067 dot += input[k][i] * iv[i];
1068 }
1069 texel = (int)(dot + b);
1070 if (texel < 0) {
1071 texel = 0;
1072 } else if (texel > n_vect) {
1073 texel = n_vect;
1074 }
1075 }
1076 /* add in texel */
1077 lolo <<= 2;
1078 lolo |= texel;
1079 }
1080 cc[0] = lolo;
1081 }
1082 }
1083
1084 /* right microtile */
1085 if (maxColR == -1) {
1086 /* all transparent black */
1087 cc[1] = -1;
1088 for (i = 0; i < n_comp; i++) {
1089 vec[2][i] = 0;
1090 vec[3][i] = 0;
1091 }
1092 } else {
1093 cc[1] = 0;
1094 for (i = 0; i < n_comp; i++) {
1095 vec[2][i] = input[minColR][i];
1096 vec[3][i] = input[maxColR][i];
1097 }
1098 if (minColR != maxColR) {
1099 /* compute interpolation vector */
1100 float d2 = 0;
1101 float rd2;
1102
1103 for (i = 0; i < n_comp; i++) {
1104 iv[i] = vec[3][i] - vec[2][i];
1105 d2 += iv[i] * iv[i];
1106 }
1107 rd2 = (float)n_vect / d2;
1108 b = 0;
1109 for (i = 0; i < n_comp; i++) {
1110 b -= iv[i] * vec[2][i];
1111 iv[i] *= rd2;
1112 }
1113 b = b * rd2 + 0.5f;
1114
1115 /* add in texels */
1116 lohi = 0;
1117 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1118 int texel = n_vect + 1; /* transparent black */
1119 if (!ISTBLACK(input[k])) {
1120 /* interpolate color */
1121 float dot = 0;
1122 for (i = 0; i < n_comp; i++) {
1123 dot += input[k][i] * iv[i];
1124 }
1125 texel = (int)(dot + b);
1126 if (texel < 0) {
1127 texel = 0;
1128 } else if (texel > n_vect) {
1129 texel = n_vect;
1130 }
1131 }
1132 /* add in texel */
1133 lohi <<= 2;
1134 lohi |= texel;
1135 }
1136 cc[1] = lohi;
1137 }
1138 }
1139
1140 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1141 for (j = 2 * 2 - 1; j >= 0; j--) {
1142 for (i = 0; i < n_comp; i++) {
1143 /* add in colors */
1144 FX64_SHL(hi, 5);
1145 FX64_OR32(hi, vec[j][i] >> 3);
1146 }
1147 }
1148 ((Fx64 *)cc)[1] = hi;
1149 }
1150
1151
1152 static void
1153 fxt1_quantize_MIXED0 (unsigned long *cc,
1154 unsigned char input[N_TEXELS][MAX_COMP])
1155 {
1156 const int n_vect = 3; /* highest vector number in each microtile */
1157 const int n_comp = 3; /* 3 components: R, G, B */
1158 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1159 float b, iv[MAX_COMP]; /* interpolation vector */
1160 int i, j, k;
1161 Fx64 hi; /* high quadword */
1162 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1163
1164 int minColL = 0, maxColL = 0;
1165 int minColR = 0, maxColR = 0;
1166 #if 0
1167 int minSum;
1168 int maxSum;
1169
1170 /* Our solution here is to find the darkest and brightest colors in
1171 * the 4x4 tile and use those as the two representative colors.
1172 * There are probably better algorithms to use (histogram-based).
1173 */
1174 minSum = 1000; /* big enough */
1175 maxSum = -1; /* small enough */
1176 for (k = 0; k < N_TEXELS / 2; k++) {
1177 int sum = 0;
1178 for (i = 0; i < n_comp; i++) {
1179 sum += input[k][i];
1180 }
1181 if (minSum > sum) {
1182 minSum = sum;
1183 minColL = k;
1184 }
1185 if (maxSum < sum) {
1186 maxSum = sum;
1187 maxColL = k;
1188 }
1189 }
1190 minSum = 1000; /* big enough */
1191 maxSum = -1; /* small enough */
1192 for (; k < N_TEXELS; k++) {
1193 int sum = 0;
1194 for (i = 0; i < n_comp; i++) {
1195 sum += input[k][i];
1196 }
1197 if (minSum > sum) {
1198 minSum = sum;
1199 minColR = k;
1200 }
1201 if (maxSum < sum) {
1202 maxSum = sum;
1203 maxColR = k;
1204 }
1205 }
1206 #else
1207 int minVal;
1208 int maxVal;
1209 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1210 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1211
1212 /* Scan the channel with max variance for lo & hi
1213 * and use those as the two representative colors.
1214 */
1215 minVal = 1000; /* big enough */
1216 maxVal = -1; /* small enough */
1217 for (k = 0; k < N_TEXELS / 2; k++) {
1218 int t = input[k][maxVarL];
1219 if (minVal > t) {
1220 minVal = t;
1221 minColL = k;
1222 }
1223 if (maxVal < t) {
1224 maxVal = t;
1225 maxColL = k;
1226 }
1227 }
1228 minVal = 1000; /* big enough */
1229 maxVal = -1; /* small enough */
1230 for (; k < N_TEXELS; k++) {
1231 int t = input[k][maxVarR];
1232 if (minVal > t) {
1233 minVal = t;
1234 minColR = k;
1235 }
1236 if (maxVal < t) {
1237 maxVal = t;
1238 maxColR = k;
1239 }
1240 }
1241 #endif
1242
1243 /* left microtile */
1244 cc[0] = 0;
1245 for (i = 0; i < n_comp; i++) {
1246 vec[0][i] = input[minColL][i];
1247 vec[1][i] = input[maxColL][i];
1248 }
1249 if (minColL != maxColL) {
1250 /* compute interpolation vector */
1251 float d2 = 0;
1252 float rd2;
1253
1254 for (i = 0; i < n_comp; i++) {
1255 iv[i] = vec[1][i] - vec[0][i];
1256 d2 += iv[i] * iv[i];
1257 }
1258 rd2 = (float)n_vect / d2;
1259 b = 0;
1260 for (i = 0; i < n_comp; i++) {
1261 b -= iv[i] * vec[0][i];
1262 iv[i] *= rd2;
1263 }
1264 b = b * rd2 + 0.5f;
1265
1266 /* add in texels */
1267 lolo = 0;
1268 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1269 int texel;
1270 /* interpolate color */
1271 float dot = 0;
1272 for (i = 0; i < n_comp; i++) {
1273 dot += input[k][i] * iv[i];
1274 }
1275 texel = (int)(dot + b);
1276 if (texel < 0) {
1277 texel = 0;
1278 } else if (texel > n_vect) {
1279 texel = n_vect;
1280 }
1281 /* add in texel */
1282 lolo <<= 2;
1283 lolo |= texel;
1284 }
1285
1286 /* funky encoding for LSB of green */
1287 if (((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1288 for (i = 0; i < n_comp; i++) {
1289 vec[1][i] = input[minColL][i];
1290 vec[0][i] = input[maxColL][i];
1291 }
1292 lolo = ~lolo;
1293 }
1294
1295 cc[0] = lolo;
1296 }
1297
1298 /* right microtile */
1299 cc[1] = 0;
1300 for (i = 0; i < n_comp; i++) {
1301 vec[2][i] = input[minColR][i];
1302 vec[3][i] = input[maxColR][i];
1303 }
1304 if (minColR != maxColR) {
1305 /* compute interpolation vector */
1306 float d2 = 0;
1307 float rd2;
1308
1309 for (i = 0; i < n_comp; i++) {
1310 iv[i] = vec[3][i] - vec[2][i];
1311 d2 += iv[i] * iv[i];
1312 }
1313 rd2 = (float)n_vect / d2;
1314 b = 0;
1315 for (i = 0; i < n_comp; i++) {
1316 b -= iv[i] * vec[2][i];
1317 iv[i] *= rd2;
1318 }
1319 b = b * rd2 + 0.5f;
1320
1321 /* add in texels */
1322 lohi = 0;
1323 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1324 int texel;
1325 /* interpolate color */
1326 float dot = 0;
1327 for (i = 0; i < n_comp; i++) {
1328 dot += input[k][i] * iv[i];
1329 }
1330 texel = (int)(dot + b);
1331 if (texel < 0) {
1332 texel = 0;
1333 } else if (texel > n_vect) {
1334 texel = n_vect;
1335 }
1336 /* add in texel */
1337 lohi <<= 2;
1338 lohi |= texel;
1339 }
1340
1341 /* funky encoding for LSB of green */
1342 if (((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1343 for (i = 0; i < n_comp; i++) {
1344 vec[3][i] = input[minColR][i];
1345 vec[2][i] = input[maxColR][i];
1346 }
1347 lohi = ~lohi;
1348 }
1349
1350 cc[1] = lohi;
1351 }
1352
1353 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1354 for (j = 2 * 2 - 1; j >= 0; j--) {
1355 for (i = 0; i < n_comp; i++) {
1356 /* add in colors */
1357 FX64_SHL(hi, 5);
1358 FX64_OR32(hi, vec[j][i] >> 3);
1359 }
1360 }
1361 ((Fx64 *)cc)[1] = hi;
1362 }
1363
1364
1365 static void
1366 fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1367 {
1368 int trualpha;
1369 unsigned char reord[N_TEXELS][MAX_COMP];
1370
1371 unsigned char input[N_TEXELS][MAX_COMP];
1372 int i, k, l;
1373
1374 memset(input, -1, sizeof(input));
1375
1376 /* 8 texels each line */
1377 for (l = 0; l < 4; l++) {
1378 for (k = 0; k < 4; k++) {
1379 for (i = 0; i < comps; i++) {
1380 input[k + l * 4][i] = *lines[l]++;
1381 }
1382 }
1383 for (; k < 8; k++) {
1384 for (i = 0; i < comps; i++) {
1385 input[k + l * 4 + 12][i] = *lines[l]++;
1386 }
1387 }
1388 }
1389
1390 /* block looks like this:
1391 * 00, 01, 02, 03, 08, 09, 0a, 0b
1392 * 10, 11, 12, 13, 18, 19, 1a, 1b
1393 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1394 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1395 */
1396
1397 /* [dBorca]
1398 * stupidity flows forth from this
1399 */
1400 l = N_TEXELS;
1401 trualpha = 0;
1402 if (comps == 4) {
1403 /* skip all transparent black texels */
1404 l = 0;
1405 for (k = 0; k < N_TEXELS; k++) {
1406 /* test all components against 0 */
1407 if (!ISTBLACK(input[k])) {
1408 /* texel is not transparent black */
1409 COPY_4UBV(reord[l], input[k]);
1410 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1411 /* non-opaque texel */
1412 trualpha = !0;
1413 }
1414 l++;
1415 }
1416 }
1417 }
1418
1419 #if 0
1420 if (trualpha) {
1421 fxt1_quantize_ALPHA0(cc, input, reord, l);
1422 } else if (l == 0) {
1423 cc[0] = cc[1] = cc[2] = -1;
1424 cc[3] = 0;
1425 } else if (l < N_TEXELS) {
1426 fxt1_quantize_HI(cc, input, reord, l);
1427 } else {
1428 fxt1_quantize_CHROMA(cc, input);
1429 }
1430 #else
1431 if (trualpha) {
1432 fxt1_quantize_ALPHA1(cc, input);
1433 } else if (l == 0) {
1434 cc[0] = cc[1] = cc[2] = -1;
1435 cc[3] = 0;
1436 } else if (l < N_TEXELS) {
1437 fxt1_quantize_MIXED1(cc, input);
1438 } else {
1439 fxt1_quantize_MIXED0(cc, input);
1440 }
1441 #endif
1442 }
1443
1444
1445 int
1446 fxt1_encode (GLcontext *ctx,
1447 unsigned int width, unsigned int height,
1448 int srcFormat,
1449 const void *source, int srcRowStride,
1450 void *dest, int destRowStride)
1451 {
1452 const int comps = (srcFormat == GL_RGB) ? 3 : 4;
1453 unsigned int x, y;
1454 const unsigned char *data;
1455 unsigned long *encoded = dest;
1456 GLubyte *newSource = NULL;
1457
1458 /*
1459 * Rescale image if width is less than 8 or height is less than 4.
1460 */
1461 if (width < 8 || height < 4) {
1462 GLint newWidth = (width + 7) & ~7;
1463 GLint newHeight = (height + 3) & ~3;
1464 newSource = MALLOC(comps * newWidth * newHeight * sizeof(GLchan));
1465 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1466 comps, source, srcRowStride, newSource);
1467 source = newSource;
1468 width = newWidth;
1469 height = newHeight;
1470 srcRowStride = comps * newWidth;
1471 }
1472
1473 data = source;
1474 destRowStride = (destRowStride - width * 2) / 4;
1475 for (y = 0; y < height; y += 4) {
1476 unsigned int offs = 0 + (y + 0) * srcRowStride;
1477 for (x = 0; x < width; x += 8) {
1478 const unsigned char *lines[4];
1479 lines[0] = &data[offs];
1480 lines[1] = lines[0] + srcRowStride;
1481 lines[2] = lines[1] + srcRowStride;
1482 lines[3] = lines[2] + srcRowStride;
1483 offs += 8 * comps;
1484 fxt1_quantize(encoded, lines, comps);
1485 /* 128 bits per 8x4 block = 4bpp */
1486 encoded += 4;
1487 }
1488 encoded += destRowStride;
1489 }
1490
1491 if (newSource != NULL) {
1492 FREE(newSource);
1493 }
1494
1495 return 0;
1496 }
1497
1498
1499 /***************************************************************************\
1500 * FXT1 decoder
1501 *
1502 * The decoder is based on GL_3DFX_texture_compression_FXT1
1503 * specification and serves as a concept for the encoder.
1504 \***************************************************************************/
1505
1506
1507 /* lookup table for scaling 5 bit colors up to 8 bits */
1508 static unsigned char _rgb_scale_5[] = {
1509 0, 8, 16, 25, 33, 41, 49, 58,
1510 66, 74, 82, 90, 99, 107, 115, 123,
1511 132, 140, 148, 156, 165, 173, 181, 189,
1512 197, 206, 214, 222, 230, 239, 247, 255
1513 };
1514
1515 /* lookup table for scaling 6 bit colors up to 8 bits */
1516 static unsigned char _rgb_scale_6[] = {
1517 0, 4, 8, 12, 16, 20, 24, 28,
1518 32, 36, 40, 45, 49, 53, 57, 61,
1519 65, 69, 73, 77, 81, 85, 89, 93,
1520 97, 101, 105, 109, 113, 117, 121, 125,
1521 130, 134, 138, 142, 146, 150, 154, 158,
1522 162, 166, 170, 174, 178, 182, 186, 190,
1523 194, 198, 202, 206, 210, 215, 219, 223,
1524 227, 231, 235, 239, 243, 247, 251, 255
1525 };
1526
1527
1528 #define CC_SEL(cc, which) ((cc)[(which) / 32] >> ((which) & 31))
1529 #define UP5(c) _rgb_scale_5[(c) & 31]
1530 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1531 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1532 #define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1533
1534
1535 static void
1536 fxt1_decode_1HI (unsigned long code, int t, unsigned char *rgba)
1537 {
1538 const unsigned long *cc;
1539
1540 t *= 3;
1541 cc = (unsigned long *)(code + t / 8);
1542 t = (cc[0] >> (t & 7)) & 7;
1543
1544 if (t == 7) {
1545 ZERO_4UBV(rgba);
1546 } else {
1547 cc = (unsigned long *)(code + 12);
1548 if (t == 0) {
1549 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1550 rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1551 rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1552 } else if (t == 6) {
1553 rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1554 rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1555 rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1556 } else {
1557 rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1558 rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1559 rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1560 }
1561 rgba[ACOMP] = 255;
1562 }
1563 }
1564
1565
1566 static void
1567 fxt1_decode_1CHROMA (unsigned long code, int t, unsigned char *rgba)
1568 {
1569 const unsigned long *cc;
1570 unsigned long kk;
1571
1572 cc = (unsigned long *)code;
1573 if (t & 16) {
1574 cc++;
1575 t &= 15;
1576 }
1577 t = (cc[0] >> (t * 2)) & 3;
1578
1579 t *= 15;
1580 cc = (unsigned long *)(code + 8 + t / 8);
1581 kk = cc[0] >> (t & 7);
1582 rgba[BCOMP] = UP5(kk);
1583 rgba[GCOMP] = UP5(kk >> 5);
1584 rgba[RCOMP] = UP5(kk >> 10);
1585 rgba[ACOMP] = 255;
1586 }
1587
1588
1589 static void
1590 fxt1_decode_1MIXED (unsigned long code, int t, unsigned char *rgba)
1591 {
1592 const unsigned long *cc;
1593 unsigned int col[2][3];
1594 int glsb, selb;
1595
1596 cc = (unsigned long *)code;
1597 if (t & 16) {
1598 t &= 15;
1599 t = (cc[1] >> (t * 2)) & 3;
1600 /* col 2 */
1601 col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1602 col[0][GCOMP] = CC_SEL(cc, 99);
1603 col[0][RCOMP] = CC_SEL(cc, 104);
1604 /* col 3 */
1605 col[1][BCOMP] = CC_SEL(cc, 109);
1606 col[1][GCOMP] = CC_SEL(cc, 114);
1607 col[1][RCOMP] = CC_SEL(cc, 119);
1608 glsb = CC_SEL(cc, 126);
1609 selb = CC_SEL(cc, 33);
1610 } else {
1611 t = (cc[0] >> (t * 2)) & 3;
1612 /* col 0 */
1613 col[0][BCOMP] = CC_SEL(cc, 64);
1614 col[0][GCOMP] = CC_SEL(cc, 69);
1615 col[0][RCOMP] = CC_SEL(cc, 74);
1616 /* col 1 */
1617 col[1][BCOMP] = CC_SEL(cc, 79);
1618 col[1][GCOMP] = CC_SEL(cc, 84);
1619 col[1][RCOMP] = CC_SEL(cc, 89);
1620 glsb = CC_SEL(cc, 125);
1621 selb = CC_SEL(cc, 1);
1622 }
1623
1624 if (CC_SEL(cc, 124) & 1) {
1625 /* alpha[0] == 1 */
1626
1627 if (t == 3) {
1628 ZERO_4UBV(rgba);
1629 } else {
1630 if (t == 0) {
1631 rgba[BCOMP] = UP5(col[0][BCOMP]);
1632 rgba[GCOMP] = UP5(col[0][GCOMP]);
1633 rgba[RCOMP] = UP5(col[0][RCOMP]);
1634 } else if (t == 2) {
1635 rgba[BCOMP] = UP5(col[1][BCOMP]);
1636 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1637 rgba[RCOMP] = UP5(col[1][RCOMP]);
1638 } else {
1639 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1640 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1641 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1642 }
1643 rgba[ACOMP] = 255;
1644 }
1645 } else {
1646 /* alpha[0] == 0 */
1647
1648 if (t == 0) {
1649 rgba[BCOMP] = UP5(col[0][BCOMP]);
1650 rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1651 rgba[RCOMP] = UP5(col[0][RCOMP]);
1652 } else if (t == 3) {
1653 rgba[BCOMP] = UP5(col[1][BCOMP]);
1654 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1655 rgba[RCOMP] = UP5(col[1][RCOMP]);
1656 } else {
1657 rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1658 rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1659 UP6(col[1][GCOMP], glsb));
1660 rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1661 }
1662 rgba[ACOMP] = 255;
1663 }
1664 }
1665
1666
1667 static void
1668 fxt1_decode_1ALPHA (unsigned long code, int t, unsigned char *rgba)
1669 {
1670 const unsigned long *cc;
1671
1672 cc = (unsigned long *)code;
1673 if (CC_SEL(cc, 124) & 1) {
1674 /* lerp == 1 */
1675 unsigned int col0[4];
1676
1677 if (t & 16) {
1678 t &= 15;
1679 t = (cc[1] >> (t * 2)) & 3;
1680 /* col 2 */
1681 col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1682 col0[GCOMP] = CC_SEL(cc, 99);
1683 col0[RCOMP] = CC_SEL(cc, 104);
1684 col0[ACOMP] = CC_SEL(cc, 119);
1685 } else {
1686 t = (cc[0] >> (t * 2)) & 3;
1687 /* col 0 */
1688 col0[BCOMP] = CC_SEL(cc, 64);
1689 col0[GCOMP] = CC_SEL(cc, 69);
1690 col0[RCOMP] = CC_SEL(cc, 74);
1691 col0[ACOMP] = CC_SEL(cc, 109);
1692 }
1693
1694 if (t == 0) {
1695 rgba[BCOMP] = UP5(col0[BCOMP]);
1696 rgba[GCOMP] = UP5(col0[GCOMP]);
1697 rgba[RCOMP] = UP5(col0[RCOMP]);
1698 rgba[ACOMP] = UP5(col0[ACOMP]);
1699 } else if (t == 3) {
1700 rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1701 rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1702 rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1703 rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1704 } else {
1705 rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1706 rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1707 rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1708 rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1709 }
1710 } else {
1711 /* lerp == 0 */
1712
1713 if (t & 16) {
1714 cc++;
1715 t &= 15;
1716 }
1717 t = (cc[0] >> (t * 2)) & 3;
1718
1719 if (t == 3) {
1720 ZERO_4UBV(rgba);
1721 } else {
1722 unsigned long kk;
1723 cc = (unsigned long *)code;
1724 rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1725 t *= 15;
1726 cc = (unsigned long *)(code + 8 + t / 8);
1727 kk = cc[0] >> (t & 7);
1728 rgba[BCOMP] = UP5(kk);
1729 rgba[GCOMP] = UP5(kk >> 5);
1730 rgba[RCOMP] = UP5(kk >> 10);
1731 }
1732 }
1733 }
1734
1735
1736 void
1737 fxt1_decode_1 (const void *texture, int width,
1738 int i, int j, unsigned char *rgba)
1739 {
1740 static void (*decode_1[]) (unsigned long, int, unsigned char *) = {
1741 fxt1_decode_1HI, /* cc-high = "00?" */
1742 fxt1_decode_1HI, /* cc-high = "00?" */
1743 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1744 fxt1_decode_1ALPHA, /* alpha = "011" */
1745 fxt1_decode_1MIXED, /* mixed = "1??" */
1746 fxt1_decode_1MIXED, /* mixed = "1??" */
1747 fxt1_decode_1MIXED, /* mixed = "1??" */
1748 fxt1_decode_1MIXED /* mixed = "1??" */
1749 };
1750
1751 unsigned long code = (unsigned long)texture +
1752 ((j / 4) * (width / 8) + (i / 8)) * 16;
1753 int mode = CC_SEL((unsigned long *)code, 125);
1754 int t = i & 7;
1755
1756 if (t & 4) {
1757 t += 12;
1758 }
1759 t += (j & 3) * 4;
1760
1761 decode_1[mode](code, t, rgba);
1762 }