cleaned up the mess a bit
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.1
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "texcompress.h"
39 #include "texformat.h"
40 #include "texstore.h"
41
42
43 int
44 fxt1_encode (unsigned int width, unsigned int height, int comps,
45 const void *source, int srcRowStride,
46 void *dest, int destRowStride);
47 void
48 fxt1_decode_1 (const void *texture, int stride,
49 int i, int j, unsigned char *rgba);
50
51
52 /**
53 * Called during context initialization.
54 */
55 void
56 _mesa_init_texture_fxt1( GLcontext *ctx )
57 {
58 (void) ctx;
59 }
60
61
62 /**
63 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
64 */
65 static GLboolean
66 texstore_rgb_fxt1(STORE_PARAMS)
67 {
68 const GLchan *pixels;
69 GLint srcRowStride;
70 GLubyte *dst;
71 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
72 const GLchan *tempImage = NULL;
73
74 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
75 ASSERT(dstXoffset % 8 == 0);
76 ASSERT(dstYoffset % 4 == 0);
77 ASSERT(dstZoffset == 0);
78 (void) dstZoffset; (void) dstImageStride;
79
80 if (srcFormat != GL_RGB ||
81 srcType != CHAN_TYPE ||
82 ctx->_ImageTransferState ||
83 srcPacking->SwapBytes) {
84 /* convert image to RGB/GLchan */
85 tempImage = _mesa_make_temp_chan_image(ctx, dims,
86 baseInternalFormat,
87 dstFormat->BaseFormat,
88 srcWidth, srcHeight, srcDepth,
89 srcFormat, srcType, srcAddr,
90 srcPacking);
91 if (!tempImage)
92 return GL_FALSE; /* out of memory */
93 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
94 pixels = tempImage;
95 srcRowStride = 3 * srcWidth;
96 srcFormat = GL_RGB;
97 }
98 else {
99 pixels = (const GLchan *) srcAddr;
100 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
101 srcType) / sizeof(GLchan);
102 }
103
104 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
105 GL_COMPRESSED_RGB_FXT1_3DFX,
106 texWidth, (GLubyte *) dstAddr);
107
108 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
109 dst, dstRowStride);
110
111 if (tempImage)
112 _mesa_free((void*) tempImage);
113
114 return GL_TRUE;
115 }
116
117
118 /**
119 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
120 */
121 static GLboolean
122 texstore_rgba_fxt1(STORE_PARAMS)
123 {
124 const GLchan *pixels;
125 GLint srcRowStride;
126 GLubyte *dst;
127 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
128 const GLchan *tempImage = NULL;
129
130 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
131 ASSERT(dstXoffset % 8 == 0);
132 ASSERT(dstYoffset % 4 == 0);
133 ASSERT(dstZoffset == 0);
134 (void) dstZoffset; (void) dstImageStride;
135
136 if (srcFormat != GL_RGBA ||
137 srcType != CHAN_TYPE ||
138 ctx->_ImageTransferState ||
139 srcPacking->SwapBytes) {
140 /* convert image to RGBA/GLchan */
141 tempImage = _mesa_make_temp_chan_image(ctx, dims,
142 baseInternalFormat,
143 dstFormat->BaseFormat,
144 srcWidth, srcHeight, srcDepth,
145 srcFormat, srcType, srcAddr,
146 srcPacking);
147 if (!tempImage)
148 return GL_FALSE; /* out of memory */
149 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
150 pixels = tempImage;
151 srcRowStride = 4 * srcWidth;
152 srcFormat = GL_RGBA;
153 }
154 else {
155 pixels = (const GLchan *) srcAddr;
156 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
157 srcType) / sizeof(GLchan);
158 }
159
160 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
161 GL_COMPRESSED_RGBA_FXT1_3DFX,
162 texWidth, (GLubyte *) dstAddr);
163
164 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
165 dst, dstRowStride);
166
167 if (tempImage)
168 _mesa_free((void*) tempImage);
169
170 return GL_TRUE;
171 }
172
173
174 static void
175 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
176 GLint i, GLint j, GLint k, GLchan *texel )
177 {
178 (void) k;
179 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
180 }
181
182
183 static void
184 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
185 GLint i, GLint j, GLint k, GLfloat *texel )
186 {
187 /* just sample as GLchan and convert to float here */
188 GLchan rgba[4];
189 (void) k;
190 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
191 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
192 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
193 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
194 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
195 }
196
197
198 static void
199 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
200 GLint i, GLint j, GLint k, GLchan *texel )
201 {
202 (void) k;
203 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
204 texel[ACOMP] = 255;
205 }
206
207
208 static void
209 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
210 GLint i, GLint j, GLint k, GLfloat *texel )
211 {
212 /* just sample as GLchan and convert to float here */
213 GLchan rgba[4];
214 (void) k;
215 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
216 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
217 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
218 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
219 texel[ACOMP] = 1.0;
220 }
221
222
223
224 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
225 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
226 GL_RGB, /* BaseFormat */
227 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
228 4, /*approx*/ /* RedBits */
229 4, /*approx*/ /* GreenBits */
230 4, /*approx*/ /* BlueBits */
231 0, /* AlphaBits */
232 0, /* LuminanceBits */
233 0, /* IntensityBits */
234 0, /* IndexBits */
235 0, /* DepthBits */
236 0, /* TexelBytes */
237 texstore_rgb_fxt1, /* StoreTexImageFunc */
238 NULL, /*impossible*/ /* FetchTexel1D */
239 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
240 NULL, /*impossible*/ /* FetchTexel3D */
241 NULL, /*impossible*/ /* FetchTexel1Df */
242 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
243 NULL, /*impossible*/ /* FetchTexel3Df */
244 };
245
246 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
247 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
248 GL_RGBA, /* BaseFormat */
249 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
250 4, /*approx*/ /* RedBits */
251 4, /*approx*/ /* GreenBits */
252 4, /*approx*/ /* BlueBits */
253 1, /*approx*/ /* AlphaBits */
254 0, /* LuminanceBits */
255 0, /* IntensityBits */
256 0, /* IndexBits */
257 0, /* DepthBits */
258 0, /* TexelBytes */
259 texstore_rgba_fxt1, /* StoreTexImageFunc */
260 NULL, /*impossible*/ /* FetchTexel1D */
261 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
262 NULL, /*impossible*/ /* FetchTexel3D */
263 NULL, /*impossible*/ /* FetchTexel1Df */
264 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
265 NULL, /*impossible*/ /* FetchTexel3Df */
266 };
267
268
269 /***************************************************************************\
270 * FXT1 encoder
271 *
272 * The encoder was built by reversing the decoder,
273 * and is vaguely based on Texus2 by 3dfx. Note that this code
274 * is merely a proof of concept, since it is highly UNoptimized;
275 * moreover, it is sub-optimal due to initial conditions passed
276 * to Lloyd's algorithm (the interpolation modes are even worse).
277 \***************************************************************************/
278
279
280 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
281 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
282 #define N_TEXELS 32 /* number of texels in a block (always 32) */
283 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
284 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
285 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
286 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
287 #define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
288
289
290 #ifdef __GNUC__
291
292 #define FX64_NATIVE 1
293
294 typedef unsigned long long Fx64;
295
296 #define FX64_MOV32(a, b) a = b
297 #define FX64_OR32(a, b) a |= b
298 #define FX64_SHL(a, c) a <<= c
299
300 #else /* !__GNUC__ */
301
302 #define FX64_NATIVE 0
303
304 typedef struct {
305 unsigned long lo, hi;
306 } Fx64;
307
308 #define FX64_MOV32(a, b) a.lo = b
309 #define FX64_OR32(a, b) a.lo |= b
310
311 #define FX64_SHL(a, c) \
312 do { \
313 if ((c) >= 32) { \
314 a.hi = a.lo << ((c) - 32); \
315 a.lo = 0; \
316 } else { \
317 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
318 a.lo <<= (c); \
319 } \
320 } while (0)
321
322 #endif /* !__GNUC__ */
323
324
325 #define F(i) 1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
326 #define SAFECDOT 1 /* for paranoids */
327
328 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
329 do { \
330 /* compute interpolation vector */ \
331 float d2 = 0; \
332 float rd2; \
333 \
334 for (i = 0; i < NC; i++) { \
335 IV[i] = (V1[i] - V0[i]) * F(i); \
336 d2 += IV[i] * IV[i]; \
337 } \
338 rd2 = (float)NV / d2; \
339 B = 0; \
340 for (i = 0; i < NC; i++) { \
341 IV[i] *= F(i); \
342 B -= IV[i] * V0[i]; \
343 IV[i] *= rd2; \
344 } \
345 B = B * rd2 + 0.5f; \
346 } while (0)
347
348 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
349 do { \
350 float dot = 0; \
351 for (i = 0; i < NC; i++) { \
352 dot += V[i] * IV[i]; \
353 } \
354 TEXEL = (int)(dot + B); \
355 if (SAFECDOT) { \
356 if (TEXEL < 0) { \
357 TEXEL = 0; \
358 } else if (TEXEL > NV) { \
359 TEXEL = NV; \
360 } \
361 } \
362 } while (0)
363
364
365 static int
366 fxt1_bestcol (float vec[][MAX_COMP], int nv,
367 unsigned char input[MAX_COMP], int nc)
368 {
369 int i, j, best = -1;
370 float err = 1e9; /* big enough */
371
372 for (j = 0; j < nv; j++) {
373 float e = 0;
374 for (i = 0; i < nc; i++) {
375 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
376 }
377 if (e < err) {
378 err = e;
379 best = j;
380 }
381 }
382
383 return best;
384 }
385
386
387 static int
388 fxt1_worst (float vec[MAX_COMP],
389 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
390 {
391 int i, k, worst = -1;
392 float err = -1; /* small enough */
393
394 for (k = 0; k < n; k++) {
395 float e = 0;
396 for (i = 0; i < nc; i++) {
397 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
398 }
399 if (e > err) {
400 err = e;
401 worst = k;
402 }
403 }
404
405 return worst;
406 }
407
408
409 static int
410 fxt1_variance (double variance[MAX_COMP],
411 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
412 {
413 int i, k, best = 0;
414 int sx, sx2;
415 double var, maxvar = -1; /* small enough */
416 double teenth = 1.0 / n;
417
418 for (i = 0; i < nc; i++) {
419 sx = sx2 = 0;
420 for (k = 0; k < n; k++) {
421 int t = input[k][i];
422 sx += t;
423 sx2 += t * t;
424 }
425 var = sx2 * teenth - sx * sx * teenth * teenth;
426 if (maxvar < var) {
427 maxvar = var;
428 best = i;
429 }
430 if (variance) {
431 variance[i] = var;
432 }
433 }
434
435 return best;
436 }
437
438
439 static int
440 fxt1_choose (float vec[][MAX_COMP], int nv,
441 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
442 {
443 #if 0
444 /* Choose colors from a grid.
445 */
446 int i, j;
447
448 for (j = 0; j < nv; j++) {
449 int m = j * (n - 1) / (nv - 1);
450 for (i = 0; i < nc; i++) {
451 vec[j][i] = input[m][i];
452 }
453 }
454 #else
455 /* Our solution here is to find the darkest and brightest colors in
456 * the 8x4 tile and use those as the two representative colors.
457 * There are probably better algorithms to use (histogram-based).
458 */
459 int i, j, k;
460 int minSum = 2000; /* big enough */
461 int maxSum = -1; /* small enough */
462 int minCol = 0; /* phoudoin: silent compiler! */
463 int maxCol = 0; /* phoudoin: silent compiler! */
464
465 struct {
466 int flag;
467 int key;
468 int freq;
469 int idx;
470 } hist[N_TEXELS];
471 int lenh = 0;
472
473 memset(hist, 0, sizeof(hist));
474
475 for (k = 0; k < n; k++) {
476 int l;
477 int key = 0;
478 int sum = 0;
479 for (i = 0; i < nc; i++) {
480 key <<= 8;
481 key |= input[k][i];
482 sum += input[k][i];
483 }
484 for (l = 0; l < n; l++) {
485 if (!hist[l].flag) {
486 /* alloc new slot */
487 hist[l].flag = !0;
488 hist[l].key = key;
489 hist[l].freq = 1;
490 hist[l].idx = k;
491 lenh = l + 1;
492 break;
493 } else if (hist[l].key == key) {
494 hist[l].freq++;
495 break;
496 }
497 }
498 if (minSum > sum) {
499 minSum = sum;
500 minCol = k;
501 }
502 if (maxSum < sum) {
503 maxSum = sum;
504 maxCol = k;
505 }
506 }
507
508 if (lenh <= nv) {
509 for (j = 0; j < lenh; j++) {
510 for (i = 0; i < nc; i++) {
511 vec[j][i] = (float)input[hist[j].idx][i];
512 }
513 }
514 for (; j < nv; j++) {
515 for (i = 0; i < nc; i++) {
516 vec[j][i] = vec[0][i];
517 }
518 }
519 return 0;
520 }
521
522 for (j = 0; j < nv; j++) {
523 for (i = 0; i < nc; i++) {
524 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
525 }
526 }
527 #endif
528
529 return !0;
530 }
531
532
533 static int
534 fxt1_lloyd (float vec[][MAX_COMP], int nv,
535 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
536 {
537 /* Use the generalized lloyd's algorithm for VQ:
538 * find 4 color vectors.
539 *
540 * for each sample color
541 * sort to nearest vector.
542 *
543 * replace each vector with the centroid of it's matching colors.
544 *
545 * repeat until RMS doesn't improve.
546 *
547 * if a color vector has no samples, or becomes the same as another
548 * vector, replace it with the color which is farthest from a sample.
549 *
550 * vec[][MAX_COMP] initial vectors and resulting colors
551 * nv number of resulting colors required
552 * input[N_TEXELS][MAX_COMP] input texels
553 * nc number of components in input / vec
554 * n number of input samples
555 */
556
557 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
558 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
559 float error, lasterror = 1e9;
560
561 int i, j, k, rep;
562
563 /* the quantizer */
564 for (rep = 0; rep < LL_N_REP; rep++) {
565 /* reset sums & counters */
566 for (j = 0; j < nv; j++) {
567 for (i = 0; i < nc; i++) {
568 sum[j][i] = 0;
569 }
570 cnt[j] = 0;
571 }
572 error = 0;
573
574 /* scan whole block */
575 for (k = 0; k < n; k++) {
576 #if 1
577 int best = -1;
578 float err = 1e9; /* big enough */
579 /* determine best vector */
580 for (j = 0; j < nv; j++) {
581 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
582 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
583 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
584 if (nc == 4) {
585 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
586 }
587 if (e < err) {
588 err = e;
589 best = j;
590 }
591 }
592 #else
593 int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
594 #endif
595 /* add in closest color */
596 for (i = 0; i < nc; i++) {
597 sum[best][i] += input[k][i];
598 }
599 /* mark this vector as used */
600 cnt[best]++;
601 /* accumulate error */
602 error += err;
603 }
604
605 /* check RMS */
606 if ((error < LL_RMS_E) ||
607 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
608 return !0; /* good match */
609 }
610 lasterror = error;
611
612 /* move each vector to the barycenter of its closest colors */
613 for (j = 0; j < nv; j++) {
614 if (cnt[j]) {
615 float div = 1.0F / cnt[j];
616 for (i = 0; i < nc; i++) {
617 vec[j][i] = div * sum[j][i];
618 }
619 } else {
620 /* this vec has no samples or is identical with a previous vec */
621 int worst = fxt1_worst(vec[j], input, nc, n);
622 for (i = 0; i < nc; i++) {
623 vec[j][i] = input[worst][i];
624 }
625 }
626 }
627 }
628
629 return 0; /* could not converge fast enough */
630 }
631
632
633 static void
634 fxt1_quantize_CHROMA (unsigned long *cc,
635 unsigned char input[N_TEXELS][MAX_COMP])
636 {
637 const int n_vect = 4; /* 4 base vectors to find */
638 const int n_comp = 3; /* 3 components: R, G, B */
639 float vec[MAX_VECT][MAX_COMP];
640 int i, j, k;
641 Fx64 hi; /* high quadword */
642 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
643
644 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
645 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
646 }
647
648 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
649 for (j = n_vect - 1; j >= 0; j--) {
650 for (i = 0; i < n_comp; i++) {
651 /* add in colors */
652 FX64_SHL(hi, 5);
653 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
654 }
655 }
656 ((Fx64 *)cc)[1] = hi;
657
658 lohi = lolo = 0;
659 /* right microtile */
660 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
661 lohi <<= 2;
662 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
663 }
664 /* left microtile */
665 for (; k >= 0; k--) {
666 lolo <<= 2;
667 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
668 }
669 cc[1] = lohi;
670 cc[0] = lolo;
671 }
672
673
674 static void
675 fxt1_quantize_ALPHA0 (unsigned long *cc,
676 unsigned char input[N_TEXELS][MAX_COMP],
677 unsigned char reord[N_TEXELS][MAX_COMP], int n)
678 {
679 const int n_vect = 3; /* 3 base vectors to find */
680 const int n_comp = 4; /* 4 components: R, G, B, A */
681 float vec[MAX_VECT][MAX_COMP];
682 int i, j, k;
683 Fx64 hi; /* high quadword */
684 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
685
686 /* the last vector indicates zero */
687 for (i = 0; i < n_comp; i++) {
688 vec[n_vect][i] = 0;
689 }
690
691 /* the first n texels in reord are guaranteed to be non-zero */
692 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
693 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
694 }
695
696 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
697 for (j = n_vect - 1; j >= 0; j--) {
698 /* add in alphas */
699 FX64_SHL(hi, 5);
700 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
701 }
702 for (j = n_vect - 1; j >= 0; j--) {
703 for (i = 0; i < n_comp - 1; i++) {
704 /* add in colors */
705 FX64_SHL(hi, 5);
706 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
707 }
708 }
709 ((Fx64 *)cc)[1] = hi;
710
711 lohi = lolo = 0;
712 /* right microtile */
713 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
714 lohi <<= 2;
715 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
716 }
717 /* left microtile */
718 for (; k >= 0; k--) {
719 lolo <<= 2;
720 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
721 }
722 cc[1] = lohi;
723 cc[0] = lolo;
724 }
725
726
727 static void
728 fxt1_quantize_ALPHA1 (unsigned long *cc,
729 unsigned char input[N_TEXELS][MAX_COMP])
730 {
731 const int n_vect = 3; /* highest vector number in each microtile */
732 const int n_comp = 4; /* 4 components: R, G, B, A */
733 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
734 float b, iv[MAX_COMP]; /* interpolation vector */
735 int i, j, k;
736 Fx64 hi; /* high quadword */
737 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
738
739 int minSum;
740 int maxSum;
741 int minColL = 0, maxColL = 0;
742 int minColR = 0, maxColR = 0;
743 int sumL = 0, sumR = 0;
744
745 /* Our solution here is to find the darkest and brightest colors in
746 * the 4x4 tile and use those as the two representative colors.
747 * There are probably better algorithms to use (histogram-based).
748 */
749 minSum = 2000; /* big enough */
750 maxSum = -1; /* small enough */
751 for (k = 0; k < N_TEXELS / 2; k++) {
752 int sum = 0;
753 for (i = 0; i < n_comp; i++) {
754 sum += input[k][i];
755 }
756 if (minSum > sum) {
757 minSum = sum;
758 minColL = k;
759 }
760 if (maxSum < sum) {
761 maxSum = sum;
762 maxColL = k;
763 }
764 sumL += sum;
765 }
766 minSum = 2000; /* big enough */
767 maxSum = -1; /* small enough */
768 for (; k < N_TEXELS; k++) {
769 int sum = 0;
770 for (i = 0; i < n_comp; i++) {
771 sum += input[k][i];
772 }
773 if (minSum > sum) {
774 minSum = sum;
775 minColR = k;
776 }
777 if (maxSum < sum) {
778 maxSum = sum;
779 maxColR = k;
780 }
781 sumR += sum;
782 }
783
784 /* choose the common vector (yuck!) */
785 {
786 int j1, j2;
787 int v1 = 0, v2 = 0;
788 float err = 1e9; /* big enough */
789 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
790 for (i = 0; i < n_comp; i++) {
791 tv[0][i] = input[minColL][i];
792 tv[1][i] = input[maxColL][i];
793 tv[2][i] = input[minColR][i];
794 tv[3][i] = input[maxColR][i];
795 }
796 for (j1 = 0; j1 < 2; j1++) {
797 for (j2 = 2; j2 < 4; j2++) {
798 float e = 0;
799 for (i = 0; i < n_comp; i++) {
800 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
801 }
802 if (e < err) {
803 err = e;
804 v1 = j1;
805 v2 = j2;
806 }
807 }
808 }
809 for (i = 0; i < n_comp; i++) {
810 vec[0][i] = tv[1 - v1][i];
811 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
812 vec[2][i] = tv[5 - v2][i];
813 }
814 }
815
816 /* left microtile */
817 cc[0] = 0;
818 if (minColL != maxColL) {
819 /* compute interpolation vector */
820 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
821
822 /* add in texels */
823 lolo = 0;
824 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
825 int texel;
826 /* interpolate color */
827 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
828 /* add in texel */
829 lolo <<= 2;
830 lolo |= texel;
831 }
832
833 cc[0] = lolo;
834 }
835
836 /* right microtile */
837 cc[1] = 0;
838 if (minColR != maxColR) {
839 /* compute interpolation vector */
840 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
841
842 /* add in texels */
843 lohi = 0;
844 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
845 int texel;
846 /* interpolate color */
847 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
848 /* add in texel */
849 lohi <<= 2;
850 lohi |= texel;
851 }
852
853 cc[1] = lohi;
854 }
855
856 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
857 for (j = n_vect - 1; j >= 0; j--) {
858 /* add in alphas */
859 FX64_SHL(hi, 5);
860 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
861 }
862 for (j = n_vect - 1; j >= 0; j--) {
863 for (i = 0; i < n_comp - 1; i++) {
864 /* add in colors */
865 FX64_SHL(hi, 5);
866 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
867 }
868 }
869 ((Fx64 *)cc)[1] = hi;
870 }
871
872
873 static void
874 fxt1_quantize_HI (unsigned long *cc,
875 unsigned char input[N_TEXELS][MAX_COMP],
876 unsigned char reord[N_TEXELS][MAX_COMP], int n)
877 {
878 const int n_vect = 6; /* highest vector number */
879 const int n_comp = 3; /* 3 components: R, G, B */
880 float b = 0.0; /* phoudoin: silent compiler! */
881 float iv[MAX_COMP]; /* interpolation vector */
882 int i, k;
883 unsigned long hihi; /* high quadword: hi dword */
884
885 int minSum = 2000; /* big enough */
886 int maxSum = -1; /* small enough */
887 int minCol = 0; /* phoudoin: silent compiler! */
888 int maxCol = 0; /* phoudoin: silent compiler! */
889
890 /* Our solution here is to find the darkest and brightest colors in
891 * the 8x4 tile and use those as the two representative colors.
892 * There are probably better algorithms to use (histogram-based).
893 */
894 for (k = 0; k < n; k++) {
895 int sum = 0;
896 for (i = 0; i < n_comp; i++) {
897 sum += reord[k][i];
898 }
899 if (minSum > sum) {
900 minSum = sum;
901 minCol = k;
902 }
903 if (maxSum < sum) {
904 maxSum = sum;
905 maxCol = k;
906 }
907 }
908
909 hihi = 0; /* cc-hi = "00" */
910 for (i = 0; i < n_comp; i++) {
911 /* add in colors */
912 hihi <<= 5;
913 hihi |= reord[maxCol][i] >> 3;
914 }
915 for (i = 0; i < n_comp; i++) {
916 /* add in colors */
917 hihi <<= 5;
918 hihi |= reord[minCol][i] >> 3;
919 }
920 cc[3] = hihi;
921 cc[0] = cc[1] = cc[2] = 0;
922
923 /* compute interpolation vector */
924 if (minCol != maxCol) {
925 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
926 }
927
928 /* add in texels */
929 for (k = N_TEXELS - 1; k >= 0; k--) {
930 int t = k * 3;
931 unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
932 int texel = n_vect + 1; /* transparent black */
933
934 if (!ISTBLACK(input[k])) {
935 if (minCol != maxCol) {
936 /* interpolate color */
937 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
938 /* add in texel */
939 kk[0] |= texel << (t & 7);
940 }
941 } else {
942 /* add in texel */
943 kk[0] |= texel << (t & 7);
944 }
945 }
946 }
947
948
949 static void
950 fxt1_quantize_MIXED1 (unsigned long *cc,
951 unsigned char input[N_TEXELS][MAX_COMP])
952 {
953 const int n_vect = 2; /* highest vector number in each microtile */
954 const int n_comp = 3; /* 3 components: R, G, B */
955 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
956 float b, iv[MAX_COMP]; /* interpolation vector */
957 int i, j, k;
958 Fx64 hi; /* high quadword */
959 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
960
961 int minSum;
962 int maxSum;
963 int minColL = 0, maxColL = -1;
964 int minColR = 0, maxColR = -1;
965
966 /* Our solution here is to find the darkest and brightest colors in
967 * the 4x4 tile and use those as the two representative colors.
968 * There are probably better algorithms to use (histogram-based).
969 */
970 minSum = 2000; /* big enough */
971 maxSum = -1; /* small enough */
972 for (k = 0; k < N_TEXELS / 2; k++) {
973 if (!ISTBLACK(input[k])) {
974 int sum = 0;
975 for (i = 0; i < n_comp; i++) {
976 sum += input[k][i];
977 }
978 if (minSum > sum) {
979 minSum = sum;
980 minColL = k;
981 }
982 if (maxSum < sum) {
983 maxSum = sum;
984 maxColL = k;
985 }
986 }
987 }
988 minSum = 2000; /* big enough */
989 maxSum = -1; /* small enough */
990 for (; k < N_TEXELS; k++) {
991 if (!ISTBLACK(input[k])) {
992 int sum = 0;
993 for (i = 0; i < n_comp; i++) {
994 sum += input[k][i];
995 }
996 if (minSum > sum) {
997 minSum = sum;
998 minColR = k;
999 }
1000 if (maxSum < sum) {
1001 maxSum = sum;
1002 maxColR = k;
1003 }
1004 }
1005 }
1006
1007 /* left microtile */
1008 if (maxColL == -1) {
1009 /* all transparent black */
1010 cc[0] = -1;
1011 for (i = 0; i < n_comp; i++) {
1012 vec[0][i] = 0;
1013 vec[1][i] = 0;
1014 }
1015 } else {
1016 cc[0] = 0;
1017 for (i = 0; i < n_comp; i++) {
1018 vec[0][i] = input[minColL][i];
1019 vec[1][i] = input[maxColL][i];
1020 }
1021 if (minColL != maxColL) {
1022 /* compute interpolation vector */
1023 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1024
1025 /* add in texels */
1026 lolo = 0;
1027 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1028 int texel = n_vect + 1; /* transparent black */
1029 if (!ISTBLACK(input[k])) {
1030 /* interpolate color */
1031 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1032 }
1033 /* add in texel */
1034 lolo <<= 2;
1035 lolo |= texel;
1036 }
1037 cc[0] = lolo;
1038 }
1039 }
1040
1041 /* right microtile */
1042 if (maxColR == -1) {
1043 /* all transparent black */
1044 cc[1] = -1;
1045 for (i = 0; i < n_comp; i++) {
1046 vec[2][i] = 0;
1047 vec[3][i] = 0;
1048 }
1049 } else {
1050 cc[1] = 0;
1051 for (i = 0; i < n_comp; i++) {
1052 vec[2][i] = input[minColR][i];
1053 vec[3][i] = input[maxColR][i];
1054 }
1055 if (minColR != maxColR) {
1056 /* compute interpolation vector */
1057 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1058
1059 /* add in texels */
1060 lohi = 0;
1061 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1062 int texel = n_vect + 1; /* transparent black */
1063 if (!ISTBLACK(input[k])) {
1064 /* interpolate color */
1065 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1066 }
1067 /* add in texel */
1068 lohi <<= 2;
1069 lohi |= texel;
1070 }
1071 cc[1] = lohi;
1072 }
1073 }
1074
1075 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1076 for (j = 2 * 2 - 1; j >= 0; j--) {
1077 for (i = 0; i < n_comp; i++) {
1078 /* add in colors */
1079 FX64_SHL(hi, 5);
1080 FX64_OR32(hi, vec[j][i] >> 3);
1081 }
1082 }
1083 ((Fx64 *)cc)[1] = hi;
1084 }
1085
1086
1087 static void
1088 fxt1_quantize_MIXED0 (unsigned long *cc,
1089 unsigned char input[N_TEXELS][MAX_COMP])
1090 {
1091 const int n_vect = 3; /* highest vector number in each microtile */
1092 const int n_comp = 3; /* 3 components: R, G, B */
1093 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1094 float b, iv[MAX_COMP]; /* interpolation vector */
1095 int i, j, k;
1096 Fx64 hi; /* high quadword */
1097 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1098
1099 int minColL = 0, maxColL = 0;
1100 int minColR = 0, maxColR = 0;
1101 #if 0
1102 int minSum;
1103 int maxSum;
1104
1105 /* Our solution here is to find the darkest and brightest colors in
1106 * the 4x4 tile and use those as the two representative colors.
1107 * There are probably better algorithms to use (histogram-based).
1108 */
1109 minSum = 2000; /* big enough */
1110 maxSum = -1; /* small enough */
1111 for (k = 0; k < N_TEXELS / 2; k++) {
1112 int sum = 0;
1113 for (i = 0; i < n_comp; i++) {
1114 sum += input[k][i];
1115 }
1116 if (minSum > sum) {
1117 minSum = sum;
1118 minColL = k;
1119 }
1120 if (maxSum < sum) {
1121 maxSum = sum;
1122 maxColL = k;
1123 }
1124 }
1125 minSum = 2000; /* big enough */
1126 maxSum = -1; /* small enough */
1127 for (; k < N_TEXELS; k++) {
1128 int sum = 0;
1129 for (i = 0; i < n_comp; i++) {
1130 sum += input[k][i];
1131 }
1132 if (minSum > sum) {
1133 minSum = sum;
1134 minColR = k;
1135 }
1136 if (maxSum < sum) {
1137 maxSum = sum;
1138 maxColR = k;
1139 }
1140 }
1141 #else
1142 int minVal;
1143 int maxVal;
1144 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1145 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1146
1147 /* Scan the channel with max variance for lo & hi
1148 * and use those as the two representative colors.
1149 */
1150 minVal = 2000; /* big enough */
1151 maxVal = -1; /* small enough */
1152 for (k = 0; k < N_TEXELS / 2; k++) {
1153 int t = input[k][maxVarL];
1154 if (minVal > t) {
1155 minVal = t;
1156 minColL = k;
1157 }
1158 if (maxVal < t) {
1159 maxVal = t;
1160 maxColL = k;
1161 }
1162 }
1163 minVal = 2000; /* big enough */
1164 maxVal = -1; /* small enough */
1165 for (; k < N_TEXELS; k++) {
1166 int t = input[k][maxVarR];
1167 if (minVal > t) {
1168 minVal = t;
1169 minColR = k;
1170 }
1171 if (maxVal < t) {
1172 maxVal = t;
1173 maxColR = k;
1174 }
1175 }
1176 #endif
1177
1178 /* left microtile */
1179 cc[0] = 0;
1180 for (i = 0; i < n_comp; i++) {
1181 vec[0][i] = input[minColL][i];
1182 vec[1][i] = input[maxColL][i];
1183 }
1184 if (minColL != maxColL) {
1185 /* compute interpolation vector */
1186 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1187
1188 /* add in texels */
1189 lolo = 0;
1190 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1191 int texel;
1192 /* interpolate color */
1193 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1194 /* add in texel */
1195 lolo <<= 2;
1196 lolo |= texel;
1197 }
1198
1199 /* funky encoding for LSB of green */
1200 if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1201 for (i = 0; i < n_comp; i++) {
1202 vec[1][i] = input[minColL][i];
1203 vec[0][i] = input[maxColL][i];
1204 }
1205 lolo = ~lolo;
1206 }
1207
1208 cc[0] = lolo;
1209 }
1210
1211 /* right microtile */
1212 cc[1] = 0;
1213 for (i = 0; i < n_comp; i++) {
1214 vec[2][i] = input[minColR][i];
1215 vec[3][i] = input[maxColR][i];
1216 }
1217 if (minColR != maxColR) {
1218 /* compute interpolation vector */
1219 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1220
1221 /* add in texels */
1222 lohi = 0;
1223 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1224 int texel;
1225 /* interpolate color */
1226 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1227 /* add in texel */
1228 lohi <<= 2;
1229 lohi |= texel;
1230 }
1231
1232 /* funky encoding for LSB of green */
1233 if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1234 for (i = 0; i < n_comp; i++) {
1235 vec[3][i] = input[minColR][i];
1236 vec[2][i] = input[maxColR][i];
1237 }
1238 lohi = ~lohi;
1239 }
1240
1241 cc[1] = lohi;
1242 }
1243
1244 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1245 for (j = 2 * 2 - 1; j >= 0; j--) {
1246 for (i = 0; i < n_comp; i++) {
1247 /* add in colors */
1248 FX64_SHL(hi, 5);
1249 FX64_OR32(hi, vec[j][i] >> 3);
1250 }
1251 }
1252 ((Fx64 *)cc)[1] = hi;
1253 }
1254
1255
1256 static void
1257 fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1258 {
1259 int trualpha;
1260 unsigned char reord[N_TEXELS][MAX_COMP];
1261
1262 unsigned char input[N_TEXELS][MAX_COMP];
1263 int i, k, l;
1264
1265 if (comps == 3) {
1266 /* make the whole block opaque */
1267 memset(input, -1, sizeof(input));
1268 }
1269
1270 /* 8 texels each line */
1271 for (l = 0; l < 4; l++) {
1272 for (k = 0; k < 4; k++) {
1273 for (i = 0; i < comps; i++) {
1274 input[k + l * 4][i] = *lines[l]++;
1275 }
1276 }
1277 for (; k < 8; k++) {
1278 for (i = 0; i < comps; i++) {
1279 input[k + l * 4 + 12][i] = *lines[l]++;
1280 }
1281 }
1282 }
1283
1284 /* block layout:
1285 * 00, 01, 02, 03, 08, 09, 0a, 0b
1286 * 10, 11, 12, 13, 18, 19, 1a, 1b
1287 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1288 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1289 */
1290
1291 /* [dBorca]
1292 * stupidity flows forth from this
1293 */
1294 l = N_TEXELS;
1295 trualpha = 0;
1296 if (comps == 4) {
1297 /* skip all transparent black texels */
1298 l = 0;
1299 for (k = 0; k < N_TEXELS; k++) {
1300 /* test all components against 0 */
1301 if (!ISTBLACK(input[k])) {
1302 /* texel is not transparent black */
1303 COPY_4UBV(reord[l], input[k]);
1304 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1305 /* non-opaque texel */
1306 trualpha = !0;
1307 }
1308 l++;
1309 }
1310 }
1311 }
1312
1313 #if 0
1314 if (trualpha) {
1315 fxt1_quantize_ALPHA0(cc, input, reord, l);
1316 } else if (l == 0) {
1317 cc[0] = cc[1] = cc[2] = -1;
1318 cc[3] = 0;
1319 } else if (l < N_TEXELS) {
1320 fxt1_quantize_HI(cc, input, reord, l);
1321 } else {
1322 fxt1_quantize_CHROMA(cc, input);
1323 }
1324 (void)fxt1_quantize_ALPHA1;
1325 (void)fxt1_quantize_MIXED1;
1326 (void)fxt1_quantize_MIXED0;
1327 #else
1328 if (trualpha) {
1329 fxt1_quantize_ALPHA1(cc, input);
1330 } else if (l == 0) {
1331 cc[0] = cc[1] = cc[2] = -1;
1332 cc[3] = 0;
1333 } else if (l < N_TEXELS) {
1334 fxt1_quantize_MIXED1(cc, input);
1335 } else {
1336 fxt1_quantize_MIXED0(cc, input);
1337 }
1338 (void)fxt1_quantize_ALPHA0;
1339 (void)fxt1_quantize_HI;
1340 (void)fxt1_quantize_CHROMA;
1341 #endif
1342 }
1343
1344
1345 int
1346 fxt1_encode (unsigned int width, unsigned int height, int comps,
1347 const void *source, int srcRowStride,
1348 void *dest, int destRowStride)
1349 {
1350 unsigned int x, y;
1351 const unsigned char *data;
1352 unsigned long *encoded = dest;
1353 unsigned char *newSource = NULL;
1354
1355 /* Replicate image if width is not M8 or height is not M4 */
1356 if ((width & 7) | (height & 3)) {
1357 int newWidth = (width + 7) & ~7;
1358 int newHeight = (height + 3) & ~3;
1359 newSource = malloc(comps * newWidth * newHeight * sizeof(unsigned char *));
1360 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1361 comps, source, srcRowStride, newSource);
1362 source = newSource;
1363 width = newWidth;
1364 height = newHeight;
1365 srcRowStride = comps * newWidth;
1366 }
1367
1368 data = source;
1369 destRowStride = (destRowStride - width * 2) / 4;
1370 for (y = 0; y < height; y += 4) {
1371 unsigned int offs = 0 + (y + 0) * srcRowStride;
1372 for (x = 0; x < width; x += 8) {
1373 const unsigned char *lines[4];
1374 lines[0] = &data[offs];
1375 lines[1] = lines[0] + srcRowStride;
1376 lines[2] = lines[1] + srcRowStride;
1377 lines[3] = lines[2] + srcRowStride;
1378 offs += 8 * comps;
1379 fxt1_quantize(encoded, lines, comps);
1380 /* 128 bits per 8x4 block */
1381 encoded += 4;
1382 }
1383 encoded += destRowStride;
1384 }
1385
1386 if (newSource != NULL) {
1387 free(newSource);
1388 }
1389
1390 return 0;
1391 }
1392
1393
1394 /***************************************************************************\
1395 * FXT1 decoder
1396 *
1397 * The decoder is based on GL_3DFX_texture_compression_FXT1
1398 * specification and serves as a concept for the encoder.
1399 \***************************************************************************/
1400
1401
1402 /* lookup table for scaling 5 bit colors up to 8 bits */
1403 static unsigned char _rgb_scale_5[] = {
1404 0, 8, 16, 25, 33, 41, 49, 58,
1405 66, 74, 82, 90, 99, 107, 115, 123,
1406 132, 140, 148, 156, 165, 173, 181, 189,
1407 197, 206, 214, 222, 230, 239, 247, 255
1408 };
1409
1410 /* lookup table for scaling 6 bit colors up to 8 bits */
1411 static unsigned char _rgb_scale_6[] = {
1412 0, 4, 8, 12, 16, 20, 24, 28,
1413 32, 36, 40, 45, 49, 53, 57, 61,
1414 65, 69, 73, 77, 81, 85, 89, 93,
1415 97, 101, 105, 109, 113, 117, 121, 125,
1416 130, 134, 138, 142, 146, 150, 154, 158,
1417 162, 166, 170, 174, 178, 182, 186, 190,
1418 194, 198, 202, 206, 210, 215, 219, 223,
1419 227, 231, 235, 239, 243, 247, 251, 255
1420 };
1421
1422
1423 #define CC_SEL(cc, which) (((unsigned long *)(cc))[(which) / 32] >> ((which) & 31))
1424 #define UP5(c) _rgb_scale_5[(c) & 31]
1425 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1426 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1427 #define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1428
1429
1430 static void
1431 fxt1_decode_1HI (unsigned char *code, int t, unsigned char *rgba)
1432 {
1433 const unsigned long *cc;
1434
1435 t *= 3;
1436 cc = (unsigned long *)(code + t / 8);
1437 t = (cc[0] >> (t & 7)) & 7;
1438
1439 if (t == 7) {
1440 ZERO_4UBV(rgba);
1441 } else {
1442 cc = (unsigned long *)(code + 12);
1443 if (t == 0) {
1444 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1445 rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1446 rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1447 } else if (t == 6) {
1448 rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1449 rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1450 rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1451 } else {
1452 rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1453 rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1454 rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1455 }
1456 rgba[ACOMP] = 255;
1457 }
1458 }
1459
1460
1461 static void
1462 fxt1_decode_1CHROMA (unsigned char *code, int t, unsigned char *rgba)
1463 {
1464 const unsigned long *cc;
1465 unsigned long kk;
1466
1467 cc = (unsigned long *)code;
1468 if (t & 16) {
1469 cc++;
1470 t &= 15;
1471 }
1472 t = (cc[0] >> (t * 2)) & 3;
1473
1474 t *= 15;
1475 cc = (unsigned long *)(code + 8 + t / 8);
1476 kk = cc[0] >> (t & 7);
1477 rgba[BCOMP] = UP5(kk);
1478 rgba[GCOMP] = UP5(kk >> 5);
1479 rgba[RCOMP] = UP5(kk >> 10);
1480 rgba[ACOMP] = 255;
1481 }
1482
1483
1484 static void
1485 fxt1_decode_1MIXED (unsigned char *code, int t, unsigned char *rgba)
1486 {
1487 const unsigned long *cc;
1488 unsigned int col[2][3];
1489 int glsb, selb;
1490
1491 cc = (unsigned long *)code;
1492 if (t & 16) {
1493 t &= 15;
1494 t = (cc[1] >> (t * 2)) & 3;
1495 /* col 2 */
1496 col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1497 col[0][GCOMP] = CC_SEL(cc, 99);
1498 col[0][RCOMP] = CC_SEL(cc, 104);
1499 /* col 3 */
1500 col[1][BCOMP] = CC_SEL(cc, 109);
1501 col[1][GCOMP] = CC_SEL(cc, 114);
1502 col[1][RCOMP] = CC_SEL(cc, 119);
1503 glsb = CC_SEL(cc, 126);
1504 selb = CC_SEL(cc, 33);
1505 } else {
1506 t = (cc[0] >> (t * 2)) & 3;
1507 /* col 0 */
1508 col[0][BCOMP] = CC_SEL(cc, 64);
1509 col[0][GCOMP] = CC_SEL(cc, 69);
1510 col[0][RCOMP] = CC_SEL(cc, 74);
1511 /* col 1 */
1512 col[1][BCOMP] = CC_SEL(cc, 79);
1513 col[1][GCOMP] = CC_SEL(cc, 84);
1514 col[1][RCOMP] = CC_SEL(cc, 89);
1515 glsb = CC_SEL(cc, 125);
1516 selb = CC_SEL(cc, 1);
1517 }
1518
1519 if (CC_SEL(cc, 124) & 1) {
1520 /* alpha[0] == 1 */
1521
1522 if (t == 3) {
1523 ZERO_4UBV(rgba);
1524 } else {
1525 if (t == 0) {
1526 rgba[BCOMP] = UP5(col[0][BCOMP]);
1527 rgba[GCOMP] = UP5(col[0][GCOMP]);
1528 rgba[RCOMP] = UP5(col[0][RCOMP]);
1529 } else if (t == 2) {
1530 rgba[BCOMP] = UP5(col[1][BCOMP]);
1531 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1532 rgba[RCOMP] = UP5(col[1][RCOMP]);
1533 } else {
1534 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1535 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1536 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1537 }
1538 rgba[ACOMP] = 255;
1539 }
1540 } else {
1541 /* alpha[0] == 0 */
1542
1543 if (t == 0) {
1544 rgba[BCOMP] = UP5(col[0][BCOMP]);
1545 rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1546 rgba[RCOMP] = UP5(col[0][RCOMP]);
1547 } else if (t == 3) {
1548 rgba[BCOMP] = UP5(col[1][BCOMP]);
1549 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1550 rgba[RCOMP] = UP5(col[1][RCOMP]);
1551 } else {
1552 rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1553 rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1554 UP6(col[1][GCOMP], glsb));
1555 rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1556 }
1557 rgba[ACOMP] = 255;
1558 }
1559 }
1560
1561
1562 static void
1563 fxt1_decode_1ALPHA (unsigned char *code, int t, unsigned char *rgba)
1564 {
1565 const unsigned long *cc;
1566
1567 cc = (unsigned long *)code;
1568 if (CC_SEL(cc, 124) & 1) {
1569 /* lerp == 1 */
1570 unsigned int col0[4];
1571
1572 if (t & 16) {
1573 t &= 15;
1574 t = (cc[1] >> (t * 2)) & 3;
1575 /* col 2 */
1576 col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1577 col0[GCOMP] = CC_SEL(cc, 99);
1578 col0[RCOMP] = CC_SEL(cc, 104);
1579 col0[ACOMP] = CC_SEL(cc, 119);
1580 } else {
1581 t = (cc[0] >> (t * 2)) & 3;
1582 /* col 0 */
1583 col0[BCOMP] = CC_SEL(cc, 64);
1584 col0[GCOMP] = CC_SEL(cc, 69);
1585 col0[RCOMP] = CC_SEL(cc, 74);
1586 col0[ACOMP] = CC_SEL(cc, 109);
1587 }
1588
1589 if (t == 0) {
1590 rgba[BCOMP] = UP5(col0[BCOMP]);
1591 rgba[GCOMP] = UP5(col0[GCOMP]);
1592 rgba[RCOMP] = UP5(col0[RCOMP]);
1593 rgba[ACOMP] = UP5(col0[ACOMP]);
1594 } else if (t == 3) {
1595 rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1596 rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1597 rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1598 rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1599 } else {
1600 rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1601 rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1602 rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1603 rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1604 }
1605 } else {
1606 /* lerp == 0 */
1607
1608 if (t & 16) {
1609 cc++;
1610 t &= 15;
1611 }
1612 t = (cc[0] >> (t * 2)) & 3;
1613
1614 if (t == 3) {
1615 ZERO_4UBV(rgba);
1616 } else {
1617 unsigned long kk;
1618 cc = (unsigned long *)code;
1619 rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1620 t *= 15;
1621 cc = (unsigned long *)(code + 8 + t / 8);
1622 kk = cc[0] >> (t & 7);
1623 rgba[BCOMP] = UP5(kk);
1624 rgba[GCOMP] = UP5(kk >> 5);
1625 rgba[RCOMP] = UP5(kk >> 10);
1626 }
1627 }
1628 }
1629
1630
1631 void
1632 fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1633 int i, int j, unsigned char *rgba)
1634 {
1635 static void (*decode_1[]) (unsigned char *, int, unsigned char *) = {
1636 fxt1_decode_1HI, /* cc-high = "00?" */
1637 fxt1_decode_1HI, /* cc-high = "00?" */
1638 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1639 fxt1_decode_1ALPHA, /* alpha = "011" */
1640 fxt1_decode_1MIXED, /* mixed = "1??" */
1641 fxt1_decode_1MIXED, /* mixed = "1??" */
1642 fxt1_decode_1MIXED, /* mixed = "1??" */
1643 fxt1_decode_1MIXED /* mixed = "1??" */
1644 };
1645
1646 unsigned char *code = (unsigned char *)texture +
1647 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1648 int mode = CC_SEL((unsigned long *)code, 125);
1649 int t = i & 7;
1650
1651 if (t & 4) {
1652 t += 12;
1653 }
1654 t += (j & 3) * 4;
1655
1656 decode_1[mode](code, t, rgba);
1657 }