decoder "width" parameter represents "stride-in-pixels"
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.1
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "texcompress.h"
39 #include "texformat.h"
40 #include "texstore.h"
41
42
43 int
44 fxt1_encode (GLcontext *ctx,
45 unsigned int width, unsigned int height,
46 int srcFormat,
47 const void *source, int srcRowStride,
48 void *dest, int destRowStride);
49 void
50 fxt1_decode_1 (const void *texture, int stride,
51 int i, int j, unsigned char *rgba);
52
53
54 /**
55 * Called during context initialization.
56 */
57 void
58 _mesa_init_texture_fxt1( GLcontext *ctx )
59 {
60 (void) ctx;
61 }
62
63
64 /**
65 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
66 */
67 static GLboolean
68 texstore_rgb_fxt1(STORE_PARAMS)
69 {
70 const GLchan *pixels;
71 GLint srcRowStride;
72 GLubyte *dst;
73 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
74 const GLchan *tempImage = NULL;
75
76 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
77 ASSERT(dstXoffset % 8 == 0);
78 ASSERT(dstYoffset % 4 == 0);
79 ASSERT(dstZoffset == 0);
80 (void) dstZoffset; (void) dstImageStride;
81
82 if (srcFormat != GL_RGB ||
83 srcType != CHAN_TYPE ||
84 ctx->_ImageTransferState ||
85 srcPacking->SwapBytes) {
86 /* convert image to RGB/GLchan */
87 tempImage = _mesa_make_temp_chan_image(ctx, dims,
88 baseInternalFormat,
89 dstFormat->BaseFormat,
90 srcWidth, srcHeight, srcDepth,
91 srcFormat, srcType, srcAddr,
92 srcPacking);
93 if (!tempImage)
94 return GL_FALSE; /* out of memory */
95 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
96 pixels = tempImage;
97 srcRowStride = 3 * srcWidth;
98 srcFormat = GL_RGB;
99 }
100 else {
101 pixels = (const GLchan *) srcAddr;
102 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
103 srcType) / sizeof(GLchan);
104 }
105
106 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
107 GL_COMPRESSED_RGB_FXT1_3DFX,
108 texWidth, (GLubyte *) dstAddr);
109
110 fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
111 dst, dstRowStride);
112
113 if (tempImage)
114 _mesa_free((void*) tempImage);
115
116 return GL_TRUE;
117 }
118
119
120 /**
121 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
122 */
123 static GLboolean
124 texstore_rgba_fxt1(STORE_PARAMS)
125 {
126 const GLchan *pixels;
127 GLint srcRowStride;
128 GLubyte *dst;
129 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
130 const GLchan *tempImage = NULL;
131
132 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
133 ASSERT(dstXoffset % 8 == 0);
134 ASSERT(dstYoffset % 4 == 0);
135 ASSERT(dstZoffset == 0);
136 (void) dstZoffset; (void) dstImageStride;
137
138 if (srcFormat != GL_RGBA ||
139 srcType != CHAN_TYPE ||
140 ctx->_ImageTransferState ||
141 srcPacking->SwapBytes) {
142 /* convert image to RGBA/GLchan */
143 tempImage = _mesa_make_temp_chan_image(ctx, dims,
144 baseInternalFormat,
145 dstFormat->BaseFormat,
146 srcWidth, srcHeight, srcDepth,
147 srcFormat, srcType, srcAddr,
148 srcPacking);
149 if (!tempImage)
150 return GL_FALSE; /* out of memory */
151 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
152 pixels = tempImage;
153 srcRowStride = 4 * srcWidth;
154 srcFormat = GL_RGBA;
155 }
156 else {
157 pixels = (const GLchan *) srcAddr;
158 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
159 srcType) / sizeof(GLchan);
160 }
161
162 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
163 GL_COMPRESSED_RGBA_FXT1_3DFX,
164 texWidth, (GLubyte *) dstAddr);
165
166 fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
167 dst, dstRowStride);
168
169 if (tempImage)
170 _mesa_free((void*) tempImage);
171
172 return GL_TRUE;
173 }
174
175
176 static void
177 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
178 GLint i, GLint j, GLint k, GLchan *texel )
179 {
180 (void) k;
181 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
182 }
183
184
185 static void
186 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
187 GLint i, GLint j, GLint k, GLfloat *texel )
188 {
189 /* just sample as GLchan and convert to float here */
190 GLchan rgba[4];
191 (void) k;
192 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
193 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
194 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
195 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
196 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
197 }
198
199
200 static void
201 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
202 GLint i, GLint j, GLint k, GLchan *texel )
203 {
204 (void) k;
205 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
206 texel[ACOMP] = 255;
207 }
208
209
210 static void
211 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
212 GLint i, GLint j, GLint k, GLfloat *texel )
213 {
214 /* just sample as GLchan and convert to float here */
215 GLchan rgba[4];
216 (void) k;
217 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
218 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
219 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
220 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
221 texel[ACOMP] = 1.0;
222 }
223
224
225
226 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
227 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
228 GL_RGB, /* BaseFormat */
229 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
230 4, /*approx*/ /* RedBits */
231 4, /*approx*/ /* GreenBits */
232 4, /*approx*/ /* BlueBits */
233 0, /* AlphaBits */
234 0, /* LuminanceBits */
235 0, /* IntensityBits */
236 0, /* IndexBits */
237 0, /* DepthBits */
238 0, /* TexelBytes */
239 texstore_rgb_fxt1, /* StoreTexImageFunc */
240 NULL, /*impossible*/ /* FetchTexel1D */
241 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
242 NULL, /*impossible*/ /* FetchTexel3D */
243 NULL, /*impossible*/ /* FetchTexel1Df */
244 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
245 NULL, /*impossible*/ /* FetchTexel3Df */
246 };
247
248 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
249 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
250 GL_RGBA, /* BaseFormat */
251 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
252 4, /*approx*/ /* RedBits */
253 4, /*approx*/ /* GreenBits */
254 4, /*approx*/ /* BlueBits */
255 1, /*approx*/ /* AlphaBits */
256 0, /* LuminanceBits */
257 0, /* IntensityBits */
258 0, /* IndexBits */
259 0, /* DepthBits */
260 0, /* TexelBytes */
261 texstore_rgba_fxt1, /* StoreTexImageFunc */
262 NULL, /*impossible*/ /* FetchTexel1D */
263 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
264 NULL, /*impossible*/ /* FetchTexel3D */
265 NULL, /*impossible*/ /* FetchTexel1Df */
266 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
267 NULL, /*impossible*/ /* FetchTexel3Df */
268 };
269
270
271 /***************************************************************************\
272 * FXT1 encoder
273 *
274 * The encoder was built by reversing the decoder,
275 * and is vaguely based on Texus2 by 3dfx. Note that this code
276 * is merely a proof of concept, since it is higly UNoptimized;
277 * moreover, it is sub-optimal due to inital conditions passed
278 * to Lloyd's algorithm (the interpolation modes are worse).
279 \***************************************************************************/
280
281
282 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
283 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
284 #define N_TEXELS 32 /* number of texels in a block (always 32) */
285 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
286 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
287 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
288 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
289 #define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
290
291
292 #ifdef __GNUC__
293
294 #define FX64_NATIVE 1
295
296 typedef unsigned long long Fx64;
297
298 #define FX64_MOV32(a, b) a = b
299 #define FX64_OR32(a, b) a |= b
300 #define FX64_SHL(a, c) a <<= c
301
302 #else /* !__GNUC__ */
303
304 #define FX64_NATIVE 0
305
306 typedef struct {
307 unsigned long lo, hi;
308 } Fx64;
309
310 #define FX64_MOV32(a, b) a.lo = b
311 #define FX64_OR32(a, b) a.lo |= b
312
313 #define FX64_SHL(a, c) \
314 do { \
315 if ((c) >= 32) { \
316 a.hi = a.lo << ((c) - 32); \
317 a.lo = 0; \
318 } else { \
319 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
320 a.lo <<= (c); \
321 } \
322 } while (0)
323
324 #endif /* !__GNUC__ */
325
326
327 static int
328 fxt1_bestcol (float vec[][MAX_COMP], int nv,
329 unsigned char input[MAX_COMP], int nc)
330 {
331 int i, j, best = -1;
332 float err = 1e9; /* big enough */
333
334 for (j = 0; j < nv; j++) {
335 float e = 0;
336 for (i = 0; i < nc; i++) {
337 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
338 }
339 if (e < err) {
340 err = e;
341 best = j;
342 }
343 }
344
345 return best;
346 }
347
348
349 static int
350 fxt1_worst (float vec[MAX_COMP],
351 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
352 {
353 int i, k, worst = -1;
354 float err = -1; /* small enough */
355
356 for (k = 0; k < n; k++) {
357 float e = 0;
358 for (i = 0; i < nc; i++) {
359 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
360 }
361 if (e > err) {
362 err = e;
363 worst = k;
364 }
365 }
366
367 return worst;
368 }
369
370
371 static int
372 fxt1_variance (double variance[MAX_COMP],
373 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
374 {
375 int i, k, best = 0;
376 int sx, sx2;
377 double var, maxvar = -1; /* small enough */
378 double teenth = 1.0 / n;
379
380 for (i = 0; i < nc; i++) {
381 sx = sx2 = 0;
382 for (k = 0; k < n; k++) {
383 int t = input[k][i];
384 sx += t;
385 sx2 += t * t;
386 }
387 var = sx2 * teenth - sx * sx * teenth * teenth;
388 if (maxvar < var) {
389 maxvar = var;
390 best = i;
391 }
392 if (variance) {
393 variance[i] = var;
394 }
395 }
396
397 return best;
398 }
399
400
401 static int
402 fxt1_choose (float vec[][MAX_COMP], int nv,
403 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
404 {
405 #if 0
406 /* Choose colors from a grid.
407 */
408 int i, j;
409
410 for (j = 0; j < nv; j++) {
411 int m = j * (n - 1) / (nv - 1);
412 for (i = 0; i < nc; i++) {
413 vec[j][i] = input[m][i];
414 }
415 }
416 #else
417 /* Our solution here is to find the darkest and brightest colors in
418 * the 8x4 tile and use those as the two representative colors.
419 * There are probably better algorithms to use (histogram-based).
420 */
421 int i, j, k;
422 int minSum = 1000; /* big enough */
423 int maxSum = -1; /* small enough */
424 int minCol = 0; /* phoudoin: silent compiler! */
425 int maxCol = 0; /* phoudoin: silent compiler! */
426
427 struct {
428 int flag;
429 int key;
430 int freq;
431 int idx;
432 } hist[N_TEXELS];
433 int lenh = 0;
434
435 memset(hist, 0, sizeof(hist));
436
437 for (k = 0; k < n; k++) {
438 int l;
439 int key = 0;
440 int sum = 0;
441 for (i = 0; i < nc; i++) {
442 key <<= 8;
443 key |= input[k][i];
444 sum += input[k][i];
445 }
446 for (l = 0; l < n; l++) {
447 if (!hist[l].flag) {
448 /* alloc new slot */
449 hist[l].flag = !0;
450 hist[l].key = key;
451 hist[l].freq = 1;
452 hist[l].idx = k;
453 lenh = l + 1;
454 break;
455 } else if (hist[l].key == key) {
456 hist[l].freq++;
457 break;
458 }
459 }
460 if (minSum > sum) {
461 minSum = sum;
462 minCol = k;
463 }
464 if (maxSum < sum) {
465 maxSum = sum;
466 maxCol = k;
467 }
468 }
469
470 if (lenh <= nv) {
471 for (j = 0; j < lenh; j++) {
472 for (i = 0; i < nc; i++) {
473 vec[j][i] = (float)input[hist[j].idx][i];
474 }
475 }
476 for (; j < nv; j++) {
477 for (i = 0; i < nc; i++) {
478 vec[j][i] = vec[0][i];
479 }
480 }
481 return 0;
482 }
483
484 for (j = 0; j < nv; j++) {
485 for (i = 0; i < nc; i++) {
486 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
487 }
488 }
489 #endif
490
491 return !0;
492 }
493
494
495 static int
496 fxt1_lloyd (float vec[][MAX_COMP], int nv,
497 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
498 {
499 /* Use the generalized lloyd's algorithm for VQ:
500 * find 4 color vectors.
501 *
502 * for each sample color
503 * sort to nearest vector.
504 *
505 * replace each vector with the centroid of it's matching colors.
506 *
507 * repeat until RMS doesn't improve.
508 *
509 * if a color vector has no samples, or becomes the same as another
510 * vector, replace it with the color which is farthest from a sample.
511 *
512 * vec[][MAX_COMP] initial vectors and resulting colors
513 * nv number of resulting colors required
514 * input[N_TEXELS][MAX_COMP] input texels
515 * nc number of components in input / vec
516 * n number of input samples
517 */
518
519 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
520 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
521 float error, lasterror = 1e9;
522
523 int i, j, k, rep;
524
525 /* the quantizer */
526 for (rep = 0; rep < LL_N_REP; rep++) {
527 /* reset sums & counters */
528 for (j = 0; j < nv; j++) {
529 for (i = 0; i < nc; i++) {
530 sum[j][i] = 0;
531 }
532 cnt[j] = 0;
533 }
534 error = 0;
535
536 /* scan whole block */
537 for (k = 0; k < n; k++) {
538 #if 1
539 int best = -1;
540 float err = 1e9; /* big enough */
541 /* determine best vector */
542 for (j = 0; j < nv; j++) {
543 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
544 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
545 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
546 if (nc == 4) {
547 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
548 }
549 if (e < err) {
550 err = e;
551 best = j;
552 }
553 }
554 #else
555 int best = fxt1_bestcol(vec, n_vect, input[k], n_comp, &err);
556 #endif
557 /* add in closest color */
558 for (i = 0; i < nc; i++) {
559 sum[best][i] += input[k][i];
560 }
561 /* mark this vector as used */
562 cnt[best]++;
563 /* accumulate error */
564 error += err;
565 }
566
567 /* check RMS */
568 if ((error < LL_RMS_E) ||
569 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
570 return !0; /* good match */
571 }
572 lasterror = error;
573
574 /* move each vector to the barycenter of its closest colors */
575 for (j = 0; j < nv; j++) {
576 if (cnt[j]) {
577 float div = 1.0 / cnt[j];
578 for (i = 0; i < nc; i++) {
579 vec[j][i] = div * sum[j][i];
580 }
581 } else {
582 /* this vec has no samples or is identical with a previous vec */
583 int worst = fxt1_worst(vec[j], input, nc, n);
584 for (i = 0; i < nc; i++) {
585 vec[j][i] = input[worst][i];
586 }
587 }
588 }
589 }
590
591 return 0; /* could not converge fast enough */
592 }
593
594
595 static void
596 fxt1_quantize_CHROMA (unsigned long *cc,
597 unsigned char input[N_TEXELS][MAX_COMP])
598 {
599 const int n_vect = 4; /* 4 base vectors to find */
600 const int n_comp = 3; /* 3 components: R, G, B */
601 float vec[MAX_VECT][MAX_COMP];
602 int i, j, k;
603 Fx64 hi; /* high quadword */
604 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
605
606 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
607 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
608 }
609
610 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
611 for (j = n_vect - 1; j >= 0; j--) {
612 for (i = 0; i < n_comp; i++) {
613 /* add in colors */
614 FX64_SHL(hi, 5);
615 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
616 }
617 }
618 ((Fx64 *)cc)[1] = hi;
619
620 lohi = lolo = 0;
621 /* right microtile */
622 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
623 lohi <<= 2;
624 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
625 }
626 /* left microtile */
627 for (; k >= 0; k--) {
628 lolo <<= 2;
629 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
630 }
631 cc[1] = lohi;
632 cc[0] = lolo;
633 }
634
635
636 static void
637 fxt1_quantize_ALPHA0 (unsigned long *cc,
638 unsigned char input[N_TEXELS][MAX_COMP],
639 unsigned char reord[N_TEXELS][MAX_COMP], int n)
640 {
641 const int n_vect = 3; /* 3 base vectors to find */
642 const int n_comp = 4; /* 4 components: R, G, B, A */
643 float vec[MAX_VECT][MAX_COMP];
644 int i, j, k;
645 Fx64 hi; /* high quadword */
646 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
647
648 /* the last vector indicates zero */
649 for (i = 0; i < n_comp; i++) {
650 vec[n_vect][i] = 0;
651 }
652
653 /* the first n texels in reord are guaranteed to be non-zero */
654 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
655 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
656 }
657
658 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
659 for (j = n_vect - 1; j >= 0; j--) {
660 /* add in alphas */
661 FX64_SHL(hi, 5);
662 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
663 }
664 for (j = n_vect - 1; j >= 0; j--) {
665 for (i = 0; i < n_comp - 1; i++) {
666 /* add in colors */
667 FX64_SHL(hi, 5);
668 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
669 }
670 }
671 ((Fx64 *)cc)[1] = hi;
672
673 lohi = lolo = 0;
674 /* right microtile */
675 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
676 lohi <<= 2;
677 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
678 }
679 /* left microtile */
680 for (; k >= 0; k--) {
681 lolo <<= 2;
682 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
683 }
684 cc[1] = lohi;
685 cc[0] = lolo;
686 }
687
688
689 static void
690 fxt1_quantize_ALPHA1 (unsigned long *cc,
691 unsigned char input[N_TEXELS][MAX_COMP])
692 {
693 const int n_vect = 3; /* highest vector number in each microtile */
694 const int n_comp = 4; /* 4 components: R, G, B, A */
695 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
696 float b, iv[MAX_COMP]; /* interpolation vector */
697 int i, j, k;
698 Fx64 hi; /* high quadword */
699 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
700
701 int minSum;
702 int maxSum;
703 int minColL = 0, maxColL = 0;
704 int minColR = 0, maxColR = 0;
705 int sumL = 0, sumR = 0;
706
707 /* Our solution here is to find the darkest and brightest colors in
708 * the 4x4 tile and use those as the two representative colors.
709 * There are probably better algorithms to use (histogram-based).
710 */
711 minSum = 1000; /* big enough */
712 maxSum = -1; /* small enough */
713 for (k = 0; k < N_TEXELS / 2; k++) {
714 int sum = 0;
715 for (i = 0; i < n_comp; i++) {
716 sum += input[k][i];
717 }
718 if (minSum > sum) {
719 minSum = sum;
720 minColL = k;
721 }
722 if (maxSum < sum) {
723 maxSum = sum;
724 maxColL = k;
725 }
726 sumL += sum;
727 }
728 minSum = 1000; /* big enough */
729 maxSum = -1; /* small enough */
730 for (; k < N_TEXELS; k++) {
731 int sum = 0;
732 for (i = 0; i < n_comp; i++) {
733 sum += input[k][i];
734 }
735 if (minSum > sum) {
736 minSum = sum;
737 minColR = k;
738 }
739 if (maxSum < sum) {
740 maxSum = sum;
741 maxColR = k;
742 }
743 sumR += sum;
744 }
745
746 /* choose the common vector (yuck!) */
747 {
748 int j1, j2;
749 int v1 = 0, v2 = 0;
750 float err = 1e9; /* big enough */
751 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
752 for (i = 0; i < n_comp; i++) {
753 tv[0][i] = input[minColL][i];
754 tv[1][i] = input[maxColL][i];
755 tv[2][i] = input[minColR][i];
756 tv[3][i] = input[maxColR][i];
757 }
758 for (j1 = 0; j1 < 2; j1++) {
759 for (j2 = 2; j2 < 4; j2++) {
760 float e = 0;
761 for (i = 0; i < n_comp; i++) {
762 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
763 }
764 if (e < err) {
765 err = e;
766 v1 = j1;
767 v2 = j2;
768 }
769 }
770 }
771 for (i = 0; i < n_comp; i++) {
772 vec[0][i] = tv[1 - v1][i];
773 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
774 vec[2][i] = tv[5 - v2][i];
775 }
776 }
777
778 /* left microtile */
779 cc[0] = 0;
780 if (minColL != maxColL) {
781 /* compute interpolation vector */
782 float d2 = 0;
783 float rd2;
784
785 for (i = 0; i < n_comp; i++) {
786 iv[i] = vec[1][i] - vec[0][i];
787 d2 += iv[i] * iv[i];
788 }
789 rd2 = (float)n_vect / d2;
790 b = 0;
791 for (i = 0; i < n_comp; i++) {
792 b -= iv[i] * vec[0][i];
793 iv[i] *= rd2;
794 }
795 b = b * rd2 + 0.5f;
796
797 /* add in texels */
798 lolo = 0;
799 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
800 int texel;
801 /* interpolate color */
802 float dot = 0;
803 for (i = 0; i < n_comp; i++) {
804 dot += input[k][i] * iv[i];
805 }
806 texel = (int)(dot + b);
807 if (texel < 0) {
808 texel = 0;
809 } else if (texel > n_vect) {
810 texel = n_vect;
811 }
812 /* add in texel */
813 lolo <<= 2;
814 lolo |= texel;
815 }
816
817 cc[0] = lolo;
818 }
819
820 /* right microtile */
821 cc[1] = 0;
822 if (minColR != maxColR) {
823 /* compute interpolation vector */
824 float d2 = 0;
825 float rd2;
826
827 for (i = 0; i < n_comp; i++) {
828 iv[i] = vec[1][i] - vec[2][i];
829 d2 += iv[i] * iv[i];
830 }
831 rd2 = (float)n_vect / d2;
832 b = 0;
833 for (i = 0; i < n_comp; i++) {
834 b -= iv[i] * vec[2][i];
835 iv[i] *= rd2;
836 }
837 b = b * rd2 + 0.5f;
838
839 /* add in texels */
840 lohi = 0;
841 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
842 int texel;
843 /* interpolate color */
844 float dot = 0;
845 for (i = 0; i < n_comp; i++) {
846 dot += input[k][i] * iv[i];
847 }
848 texel = (int)(dot + b);
849 if (texel < 0) {
850 texel = 0;
851 } else if (texel > n_vect) {
852 texel = n_vect;
853 }
854 /* add in texel */
855 lohi <<= 2;
856 lohi |= texel;
857 }
858
859 cc[1] = lohi;
860 }
861
862 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
863 for (j = n_vect - 1; j >= 0; j--) {
864 /* add in alphas */
865 FX64_SHL(hi, 5);
866 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
867 }
868 for (j = n_vect - 1; j >= 0; j--) {
869 for (i = 0; i < n_comp - 1; i++) {
870 /* add in colors */
871 FX64_SHL(hi, 5);
872 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
873 }
874 }
875 ((Fx64 *)cc)[1] = hi;
876 }
877
878
879 static void
880 fxt1_quantize_HI (unsigned long *cc,
881 unsigned char input[N_TEXELS][MAX_COMP],
882 unsigned char reord[N_TEXELS][MAX_COMP], int n)
883 {
884 const int n_vect = 6; /* highest vector number */
885 const int n_comp = 3; /* 3 components: R, G, B */
886 float b = 0.0; /* phoudoin: silent compiler! */
887 float iv[MAX_COMP]; /* interpolation vector */
888 int i, k;
889 unsigned long hihi; /* high quadword: hi dword */
890
891 int minSum = 1000; /* big enough */
892 int maxSum = -1; /* small enough */
893 int minCol = 0; /* phoudoin: silent compiler! */
894 int maxCol = 0; /* phoudoin: silent compiler! */
895
896 /* Our solution here is to find the darkest and brightest colors in
897 * the 8x4 tile and use those as the two representative colors.
898 * There are probably better algorithms to use (histogram-based).
899 */
900 for (k = 0; k < n; k++) {
901 int sum = 0;
902 for (i = 0; i < n_comp; i++) {
903 sum += reord[k][i];
904 }
905 if (minSum > sum) {
906 minSum = sum;
907 minCol = k;
908 }
909 if (maxSum < sum) {
910 maxSum = sum;
911 maxCol = k;
912 }
913 }
914
915 hihi = 0; /* cc-hi = "00" */
916 for (i = 0; i < n_comp; i++) {
917 /* add in colors */
918 hihi <<= 5;
919 hihi |= reord[maxCol][i] >> 3;
920 }
921 for (i = 0; i < n_comp; i++) {
922 /* add in colors */
923 hihi <<= 5;
924 hihi |= reord[minCol][i] >> 3;
925 }
926 cc[3] = hihi;
927 cc[0] = cc[1] = cc[2] = 0;
928
929 /* compute interpolation vector */
930 if (minCol != maxCol) {
931 float d2 = 0;
932 float rd2;
933
934 for (i = 0; i < n_comp; i++) {
935 iv[i] = reord[maxCol][i] - reord[minCol][i];
936 d2 += iv[i] * iv[i];
937 }
938 rd2 = (float)n_vect / d2;
939 b = 0;
940 for (i = 0; i < n_comp; i++) {
941 b -= iv[i] * reord[minCol][i];
942 iv[i] *= rd2;
943 }
944 b = b * rd2 + 0.5f;
945 }
946
947 /* add in texels */
948 for (k = N_TEXELS - 1; k >= 0; k--) {
949 int t = k * 3;
950 unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
951 int texel = n_vect + 1; /* transparent black */
952
953 if (!ISTBLACK(input[k])) {
954 if (minCol != maxCol) {
955 /* interpolate color */
956 float dot = 0;
957 for (i = 0; i < n_comp; i++) {
958 dot += input[k][i] * iv[i];
959 }
960 texel = (int)(dot + b);
961 if (texel < 0) {
962 texel = 0;
963 } else if (texel > n_vect) {
964 texel = n_vect;
965 }
966 /* add in texel */
967 kk[0] |= texel << (t & 7);
968 }
969 } else {
970 /* add in texel */
971 kk[0] |= texel << (t & 7);
972 }
973 }
974 }
975
976
977 static void
978 fxt1_quantize_MIXED1 (unsigned long *cc,
979 unsigned char input[N_TEXELS][MAX_COMP])
980 {
981 const int n_vect = 2; /* highest vector number in each microtile */
982 const int n_comp = 3; /* 3 components: R, G, B */
983 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
984 float b, iv[MAX_COMP]; /* interpolation vector */
985 int i, j, k;
986 Fx64 hi; /* high quadword */
987 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
988
989 int minSum;
990 int maxSum;
991 int minColL = 0, maxColL = -1;
992 int minColR = 0, maxColR = -1;
993
994 /* Our solution here is to find the darkest and brightest colors in
995 * the 4x4 tile and use those as the two representative colors.
996 * There are probably better algorithms to use (histogram-based).
997 */
998 minSum = 1000; /* big enough */
999 maxSum = -1; /* small enough */
1000 for (k = 0; k < N_TEXELS / 2; k++) {
1001 if (!ISTBLACK(input[k])) {
1002 int sum = 0;
1003 for (i = 0; i < n_comp; i++) {
1004 sum += input[k][i];
1005 }
1006 if (minSum > sum) {
1007 minSum = sum;
1008 minColL = k;
1009 }
1010 if (maxSum < sum) {
1011 maxSum = sum;
1012 maxColL = k;
1013 }
1014 }
1015 }
1016 minSum = 1000; /* big enough */
1017 maxSum = -1; /* small enough */
1018 for (; k < N_TEXELS; k++) {
1019 if (!ISTBLACK(input[k])) {
1020 int sum = 0;
1021 for (i = 0; i < n_comp; i++) {
1022 sum += input[k][i];
1023 }
1024 if (minSum > sum) {
1025 minSum = sum;
1026 minColR = k;
1027 }
1028 if (maxSum < sum) {
1029 maxSum = sum;
1030 maxColR = k;
1031 }
1032 }
1033 }
1034
1035 /* left microtile */
1036 if (maxColL == -1) {
1037 /* all transparent black */
1038 cc[0] = -1;
1039 for (i = 0; i < n_comp; i++) {
1040 vec[0][i] = 0;
1041 vec[1][i] = 0;
1042 }
1043 } else {
1044 cc[0] = 0;
1045 for (i = 0; i < n_comp; i++) {
1046 vec[0][i] = input[minColL][i];
1047 vec[1][i] = input[maxColL][i];
1048 }
1049 if (minColL != maxColL) {
1050 /* compute interpolation vector */
1051 float d2 = 0;
1052 float rd2;
1053
1054 for (i = 0; i < n_comp; i++) {
1055 iv[i] = vec[1][i] - vec[0][i];
1056 d2 += iv[i] * iv[i];
1057 }
1058 rd2 = (float)n_vect / d2;
1059 b = 0;
1060 for (i = 0; i < n_comp; i++) {
1061 b -= iv[i] * vec[0][i];
1062 iv[i] *= rd2;
1063 }
1064 b = b * rd2 + 0.5f;
1065
1066 /* add in texels */
1067 lolo = 0;
1068 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1069 int texel = n_vect + 1; /* transparent black */
1070 if (!ISTBLACK(input[k])) {
1071 /* interpolate color */
1072 float dot = 0;
1073 for (i = 0; i < n_comp; i++) {
1074 dot += input[k][i] * iv[i];
1075 }
1076 texel = (int)(dot + b);
1077 if (texel < 0) {
1078 texel = 0;
1079 } else if (texel > n_vect) {
1080 texel = n_vect;
1081 }
1082 }
1083 /* add in texel */
1084 lolo <<= 2;
1085 lolo |= texel;
1086 }
1087 cc[0] = lolo;
1088 }
1089 }
1090
1091 /* right microtile */
1092 if (maxColR == -1) {
1093 /* all transparent black */
1094 cc[1] = -1;
1095 for (i = 0; i < n_comp; i++) {
1096 vec[2][i] = 0;
1097 vec[3][i] = 0;
1098 }
1099 } else {
1100 cc[1] = 0;
1101 for (i = 0; i < n_comp; i++) {
1102 vec[2][i] = input[minColR][i];
1103 vec[3][i] = input[maxColR][i];
1104 }
1105 if (minColR != maxColR) {
1106 /* compute interpolation vector */
1107 float d2 = 0;
1108 float rd2;
1109
1110 for (i = 0; i < n_comp; i++) {
1111 iv[i] = vec[3][i] - vec[2][i];
1112 d2 += iv[i] * iv[i];
1113 }
1114 rd2 = (float)n_vect / d2;
1115 b = 0;
1116 for (i = 0; i < n_comp; i++) {
1117 b -= iv[i] * vec[2][i];
1118 iv[i] *= rd2;
1119 }
1120 b = b * rd2 + 0.5f;
1121
1122 /* add in texels */
1123 lohi = 0;
1124 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1125 int texel = n_vect + 1; /* transparent black */
1126 if (!ISTBLACK(input[k])) {
1127 /* interpolate color */
1128 float dot = 0;
1129 for (i = 0; i < n_comp; i++) {
1130 dot += input[k][i] * iv[i];
1131 }
1132 texel = (int)(dot + b);
1133 if (texel < 0) {
1134 texel = 0;
1135 } else if (texel > n_vect) {
1136 texel = n_vect;
1137 }
1138 }
1139 /* add in texel */
1140 lohi <<= 2;
1141 lohi |= texel;
1142 }
1143 cc[1] = lohi;
1144 }
1145 }
1146
1147 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1148 for (j = 2 * 2 - 1; j >= 0; j--) {
1149 for (i = 0; i < n_comp; i++) {
1150 /* add in colors */
1151 FX64_SHL(hi, 5);
1152 FX64_OR32(hi, vec[j][i] >> 3);
1153 }
1154 }
1155 ((Fx64 *)cc)[1] = hi;
1156 }
1157
1158
1159 static void
1160 fxt1_quantize_MIXED0 (unsigned long *cc,
1161 unsigned char input[N_TEXELS][MAX_COMP])
1162 {
1163 const int n_vect = 3; /* highest vector number in each microtile */
1164 const int n_comp = 3; /* 3 components: R, G, B */
1165 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1166 float b, iv[MAX_COMP]; /* interpolation vector */
1167 int i, j, k;
1168 Fx64 hi; /* high quadword */
1169 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1170
1171 int minColL = 0, maxColL = 0;
1172 int minColR = 0, maxColR = 0;
1173 #if 0
1174 int minSum;
1175 int maxSum;
1176
1177 /* Our solution here is to find the darkest and brightest colors in
1178 * the 4x4 tile and use those as the two representative colors.
1179 * There are probably better algorithms to use (histogram-based).
1180 */
1181 minSum = 1000; /* big enough */
1182 maxSum = -1; /* small enough */
1183 for (k = 0; k < N_TEXELS / 2; k++) {
1184 int sum = 0;
1185 for (i = 0; i < n_comp; i++) {
1186 sum += input[k][i];
1187 }
1188 if (minSum > sum) {
1189 minSum = sum;
1190 minColL = k;
1191 }
1192 if (maxSum < sum) {
1193 maxSum = sum;
1194 maxColL = k;
1195 }
1196 }
1197 minSum = 1000; /* big enough */
1198 maxSum = -1; /* small enough */
1199 for (; k < N_TEXELS; k++) {
1200 int sum = 0;
1201 for (i = 0; i < n_comp; i++) {
1202 sum += input[k][i];
1203 }
1204 if (minSum > sum) {
1205 minSum = sum;
1206 minColR = k;
1207 }
1208 if (maxSum < sum) {
1209 maxSum = sum;
1210 maxColR = k;
1211 }
1212 }
1213 #else
1214 int minVal;
1215 int maxVal;
1216 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1217 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1218
1219 /* Scan the channel with max variance for lo & hi
1220 * and use those as the two representative colors.
1221 */
1222 minVal = 1000; /* big enough */
1223 maxVal = -1; /* small enough */
1224 for (k = 0; k < N_TEXELS / 2; k++) {
1225 int t = input[k][maxVarL];
1226 if (minVal > t) {
1227 minVal = t;
1228 minColL = k;
1229 }
1230 if (maxVal < t) {
1231 maxVal = t;
1232 maxColL = k;
1233 }
1234 }
1235 minVal = 1000; /* big enough */
1236 maxVal = -1; /* small enough */
1237 for (; k < N_TEXELS; k++) {
1238 int t = input[k][maxVarR];
1239 if (minVal > t) {
1240 minVal = t;
1241 minColR = k;
1242 }
1243 if (maxVal < t) {
1244 maxVal = t;
1245 maxColR = k;
1246 }
1247 }
1248 #endif
1249
1250 /* left microtile */
1251 cc[0] = 0;
1252 for (i = 0; i < n_comp; i++) {
1253 vec[0][i] = input[minColL][i];
1254 vec[1][i] = input[maxColL][i];
1255 }
1256 if (minColL != maxColL) {
1257 /* compute interpolation vector */
1258 float d2 = 0;
1259 float rd2;
1260
1261 for (i = 0; i < n_comp; i++) {
1262 iv[i] = vec[1][i] - vec[0][i];
1263 d2 += iv[i] * iv[i];
1264 }
1265 rd2 = (float)n_vect / d2;
1266 b = 0;
1267 for (i = 0; i < n_comp; i++) {
1268 b -= iv[i] * vec[0][i];
1269 iv[i] *= rd2;
1270 }
1271 b = b * rd2 + 0.5f;
1272
1273 /* add in texels */
1274 lolo = 0;
1275 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1276 int texel;
1277 /* interpolate color */
1278 float dot = 0;
1279 for (i = 0; i < n_comp; i++) {
1280 dot += input[k][i] * iv[i];
1281 }
1282 texel = (int)(dot + b);
1283 if (texel < 0) {
1284 texel = 0;
1285 } else if (texel > n_vect) {
1286 texel = n_vect;
1287 }
1288 /* add in texel */
1289 lolo <<= 2;
1290 lolo |= texel;
1291 }
1292
1293 /* funky encoding for LSB of green */
1294 if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1295 for (i = 0; i < n_comp; i++) {
1296 vec[1][i] = input[minColL][i];
1297 vec[0][i] = input[maxColL][i];
1298 }
1299 lolo = ~lolo;
1300 }
1301
1302 cc[0] = lolo;
1303 }
1304
1305 /* right microtile */
1306 cc[1] = 0;
1307 for (i = 0; i < n_comp; i++) {
1308 vec[2][i] = input[minColR][i];
1309 vec[3][i] = input[maxColR][i];
1310 }
1311 if (minColR != maxColR) {
1312 /* compute interpolation vector */
1313 float d2 = 0;
1314 float rd2;
1315
1316 for (i = 0; i < n_comp; i++) {
1317 iv[i] = vec[3][i] - vec[2][i];
1318 d2 += iv[i] * iv[i];
1319 }
1320 rd2 = (float)n_vect / d2;
1321 b = 0;
1322 for (i = 0; i < n_comp; i++) {
1323 b -= iv[i] * vec[2][i];
1324 iv[i] *= rd2;
1325 }
1326 b = b * rd2 + 0.5f;
1327
1328 /* add in texels */
1329 lohi = 0;
1330 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1331 int texel;
1332 /* interpolate color */
1333 float dot = 0;
1334 for (i = 0; i < n_comp; i++) {
1335 dot += input[k][i] * iv[i];
1336 }
1337 texel = (int)(dot + b);
1338 if (texel < 0) {
1339 texel = 0;
1340 } else if (texel > n_vect) {
1341 texel = n_vect;
1342 }
1343 /* add in texel */
1344 lohi <<= 2;
1345 lohi |= texel;
1346 }
1347
1348 /* funky encoding for LSB of green */
1349 if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1350 for (i = 0; i < n_comp; i++) {
1351 vec[3][i] = input[minColR][i];
1352 vec[2][i] = input[maxColR][i];
1353 }
1354 lohi = ~lohi;
1355 }
1356
1357 cc[1] = lohi;
1358 }
1359
1360 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1361 for (j = 2 * 2 - 1; j >= 0; j--) {
1362 for (i = 0; i < n_comp; i++) {
1363 /* add in colors */
1364 FX64_SHL(hi, 5);
1365 FX64_OR32(hi, vec[j][i] >> 3);
1366 }
1367 }
1368 ((Fx64 *)cc)[1] = hi;
1369 }
1370
1371
1372 static void
1373 fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1374 {
1375 int trualpha;
1376 unsigned char reord[N_TEXELS][MAX_COMP];
1377
1378 unsigned char input[N_TEXELS][MAX_COMP];
1379 int i, k, l;
1380
1381 memset(input, -1, sizeof(input));
1382
1383 /* 8 texels each line */
1384 for (l = 0; l < 4; l++) {
1385 for (k = 0; k < 4; k++) {
1386 for (i = 0; i < comps; i++) {
1387 input[k + l * 4][i] = *lines[l]++;
1388 }
1389 }
1390 for (; k < 8; k++) {
1391 for (i = 0; i < comps; i++) {
1392 input[k + l * 4 + 12][i] = *lines[l]++;
1393 }
1394 }
1395 }
1396
1397 /* block layout:
1398 * 00, 01, 02, 03, 08, 09, 0a, 0b
1399 * 10, 11, 12, 13, 18, 19, 1a, 1b
1400 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1401 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1402 */
1403
1404 /* [dBorca]
1405 * stupidity flows forth from this
1406 */
1407 l = N_TEXELS;
1408 trualpha = 0;
1409 if (comps == 4) {
1410 /* skip all transparent black texels */
1411 l = 0;
1412 for (k = 0; k < N_TEXELS; k++) {
1413 /* test all components against 0 */
1414 if (!ISTBLACK(input[k])) {
1415 /* texel is not transparent black */
1416 COPY_4UBV(reord[l], input[k]);
1417 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1418 /* non-opaque texel */
1419 trualpha = !0;
1420 }
1421 l++;
1422 }
1423 }
1424 }
1425
1426 #if 0
1427 if (trualpha) {
1428 fxt1_quantize_ALPHA0(cc, input, reord, l);
1429 } else if (l == 0) {
1430 cc[0] = cc[1] = cc[2] = -1;
1431 cc[3] = 0;
1432 } else if (l < N_TEXELS) {
1433 fxt1_quantize_HI(cc, input, reord, l);
1434 } else {
1435 fxt1_quantize_CHROMA(cc, input);
1436 }
1437 (void)fxt1_quantize_ALPHA1;
1438 (void)fxt1_quantize_MIXED1;
1439 (void)fxt1_quantize_MIXED0;
1440 #else
1441 if (trualpha) {
1442 fxt1_quantize_ALPHA1(cc, input);
1443 } else if (l == 0) {
1444 cc[0] = cc[1] = cc[2] = -1;
1445 cc[3] = 0;
1446 } else if (l < N_TEXELS) {
1447 fxt1_quantize_MIXED1(cc, input);
1448 } else {
1449 fxt1_quantize_MIXED0(cc, input);
1450 }
1451 (void)fxt1_quantize_ALPHA0;
1452 (void)fxt1_quantize_HI;
1453 (void)fxt1_quantize_CHROMA;
1454 #endif
1455 }
1456
1457
1458 int
1459 fxt1_encode (GLcontext *ctx,
1460 unsigned int width, unsigned int height,
1461 int srcFormat,
1462 const void *source, int srcRowStride,
1463 void *dest, int destRowStride)
1464 {
1465 const int comps = (srcFormat == GL_RGB) ? 3 : 4;
1466 unsigned int x, y;
1467 const unsigned char *data;
1468 unsigned long *encoded = dest;
1469 GLubyte *newSource = NULL;
1470
1471 (void) ctx;
1472
1473 /*
1474 * Rescale image if width is less than 8 or height is less than 4.
1475 */
1476 if (width < 8 || height < 4) {
1477 GLint newWidth = (width + 7) & ~7;
1478 GLint newHeight = (height + 3) & ~3;
1479 newSource = MALLOC(comps * newWidth * newHeight * sizeof(GLchan));
1480 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1481 comps, source, srcRowStride, newSource);
1482 source = newSource;
1483 width = newWidth;
1484 height = newHeight;
1485 srcRowStride = comps * newWidth;
1486 }
1487
1488 data = source;
1489 destRowStride = (destRowStride - width * 2) / 4;
1490 for (y = 0; y < height; y += 4) {
1491 unsigned int offs = 0 + (y + 0) * srcRowStride;
1492 for (x = 0; x < width; x += 8) {
1493 const unsigned char *lines[4];
1494 lines[0] = &data[offs];
1495 lines[1] = lines[0] + srcRowStride;
1496 lines[2] = lines[1] + srcRowStride;
1497 lines[3] = lines[2] + srcRowStride;
1498 offs += 8 * comps;
1499 fxt1_quantize(encoded, lines, comps);
1500 /* 128 bits per 8x4 block = 4bpp */
1501 encoded += 4;
1502 }
1503 encoded += destRowStride;
1504 }
1505
1506 if (newSource != NULL) {
1507 FREE(newSource);
1508 }
1509
1510 return 0;
1511 }
1512
1513
1514 /***************************************************************************\
1515 * FXT1 decoder
1516 *
1517 * The decoder is based on GL_3DFX_texture_compression_FXT1
1518 * specification and serves as a concept for the encoder.
1519 \***************************************************************************/
1520
1521
1522 /* lookup table for scaling 5 bit colors up to 8 bits */
1523 static unsigned char _rgb_scale_5[] = {
1524 0, 8, 16, 25, 33, 41, 49, 58,
1525 66, 74, 82, 90, 99, 107, 115, 123,
1526 132, 140, 148, 156, 165, 173, 181, 189,
1527 197, 206, 214, 222, 230, 239, 247, 255
1528 };
1529
1530 /* lookup table for scaling 6 bit colors up to 8 bits */
1531 static unsigned char _rgb_scale_6[] = {
1532 0, 4, 8, 12, 16, 20, 24, 28,
1533 32, 36, 40, 45, 49, 53, 57, 61,
1534 65, 69, 73, 77, 81, 85, 89, 93,
1535 97, 101, 105, 109, 113, 117, 121, 125,
1536 130, 134, 138, 142, 146, 150, 154, 158,
1537 162, 166, 170, 174, 178, 182, 186, 190,
1538 194, 198, 202, 206, 210, 215, 219, 223,
1539 227, 231, 235, 239, 243, 247, 251, 255
1540 };
1541
1542
1543 #define CC_SEL(cc, which) ((cc)[(which) / 32] >> ((which) & 31))
1544 #define UP5(c) _rgb_scale_5[(c) & 31]
1545 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1546 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1547 #define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1548
1549
1550 static void
1551 fxt1_decode_1HI (unsigned char *code, int t, unsigned char *rgba)
1552 {
1553 const unsigned long *cc;
1554
1555 t *= 3;
1556 cc = (unsigned long *)(code + t / 8);
1557 t = (cc[0] >> (t & 7)) & 7;
1558
1559 if (t == 7) {
1560 ZERO_4UBV(rgba);
1561 } else {
1562 cc = (unsigned long *)(code + 12);
1563 if (t == 0) {
1564 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1565 rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1566 rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1567 } else if (t == 6) {
1568 rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1569 rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1570 rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1571 } else {
1572 rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1573 rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1574 rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1575 }
1576 rgba[ACOMP] = 255;
1577 }
1578 }
1579
1580
1581 static void
1582 fxt1_decode_1CHROMA (unsigned char *code, int t, unsigned char *rgba)
1583 {
1584 const unsigned long *cc;
1585 unsigned long kk;
1586
1587 cc = (unsigned long *)code;
1588 if (t & 16) {
1589 cc++;
1590 t &= 15;
1591 }
1592 t = (cc[0] >> (t * 2)) & 3;
1593
1594 t *= 15;
1595 cc = (unsigned long *)(code + 8 + t / 8);
1596 kk = cc[0] >> (t & 7);
1597 rgba[BCOMP] = UP5(kk);
1598 rgba[GCOMP] = UP5(kk >> 5);
1599 rgba[RCOMP] = UP5(kk >> 10);
1600 rgba[ACOMP] = 255;
1601 }
1602
1603
1604 static void
1605 fxt1_decode_1MIXED (unsigned char *code, int t, unsigned char *rgba)
1606 {
1607 const unsigned long *cc;
1608 unsigned int col[2][3];
1609 int glsb, selb;
1610
1611 cc = (unsigned long *)code;
1612 if (t & 16) {
1613 t &= 15;
1614 t = (cc[1] >> (t * 2)) & 3;
1615 /* col 2 */
1616 col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1617 col[0][GCOMP] = CC_SEL(cc, 99);
1618 col[0][RCOMP] = CC_SEL(cc, 104);
1619 /* col 3 */
1620 col[1][BCOMP] = CC_SEL(cc, 109);
1621 col[1][GCOMP] = CC_SEL(cc, 114);
1622 col[1][RCOMP] = CC_SEL(cc, 119);
1623 glsb = CC_SEL(cc, 126);
1624 selb = CC_SEL(cc, 33);
1625 } else {
1626 t = (cc[0] >> (t * 2)) & 3;
1627 /* col 0 */
1628 col[0][BCOMP] = CC_SEL(cc, 64);
1629 col[0][GCOMP] = CC_SEL(cc, 69);
1630 col[0][RCOMP] = CC_SEL(cc, 74);
1631 /* col 1 */
1632 col[1][BCOMP] = CC_SEL(cc, 79);
1633 col[1][GCOMP] = CC_SEL(cc, 84);
1634 col[1][RCOMP] = CC_SEL(cc, 89);
1635 glsb = CC_SEL(cc, 125);
1636 selb = CC_SEL(cc, 1);
1637 }
1638
1639 if (CC_SEL(cc, 124) & 1) {
1640 /* alpha[0] == 1 */
1641
1642 if (t == 3) {
1643 ZERO_4UBV(rgba);
1644 } else {
1645 if (t == 0) {
1646 rgba[BCOMP] = UP5(col[0][BCOMP]);
1647 rgba[GCOMP] = UP5(col[0][GCOMP]);
1648 rgba[RCOMP] = UP5(col[0][RCOMP]);
1649 } else if (t == 2) {
1650 rgba[BCOMP] = UP5(col[1][BCOMP]);
1651 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1652 rgba[RCOMP] = UP5(col[1][RCOMP]);
1653 } else {
1654 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1655 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1656 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1657 }
1658 rgba[ACOMP] = 255;
1659 }
1660 } else {
1661 /* alpha[0] == 0 */
1662
1663 if (t == 0) {
1664 rgba[BCOMP] = UP5(col[0][BCOMP]);
1665 rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1666 rgba[RCOMP] = UP5(col[0][RCOMP]);
1667 } else if (t == 3) {
1668 rgba[BCOMP] = UP5(col[1][BCOMP]);
1669 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1670 rgba[RCOMP] = UP5(col[1][RCOMP]);
1671 } else {
1672 rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1673 rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1674 UP6(col[1][GCOMP], glsb));
1675 rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1676 }
1677 rgba[ACOMP] = 255;
1678 }
1679 }
1680
1681
1682 static void
1683 fxt1_decode_1ALPHA (unsigned char *code, int t, unsigned char *rgba)
1684 {
1685 const unsigned long *cc;
1686
1687 cc = (unsigned long *)code;
1688 if (CC_SEL(cc, 124) & 1) {
1689 /* lerp == 1 */
1690 unsigned int col0[4];
1691
1692 if (t & 16) {
1693 t &= 15;
1694 t = (cc[1] >> (t * 2)) & 3;
1695 /* col 2 */
1696 col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1697 col0[GCOMP] = CC_SEL(cc, 99);
1698 col0[RCOMP] = CC_SEL(cc, 104);
1699 col0[ACOMP] = CC_SEL(cc, 119);
1700 } else {
1701 t = (cc[0] >> (t * 2)) & 3;
1702 /* col 0 */
1703 col0[BCOMP] = CC_SEL(cc, 64);
1704 col0[GCOMP] = CC_SEL(cc, 69);
1705 col0[RCOMP] = CC_SEL(cc, 74);
1706 col0[ACOMP] = CC_SEL(cc, 109);
1707 }
1708
1709 if (t == 0) {
1710 rgba[BCOMP] = UP5(col0[BCOMP]);
1711 rgba[GCOMP] = UP5(col0[GCOMP]);
1712 rgba[RCOMP] = UP5(col0[RCOMP]);
1713 rgba[ACOMP] = UP5(col0[ACOMP]);
1714 } else if (t == 3) {
1715 rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1716 rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1717 rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1718 rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1719 } else {
1720 rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1721 rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1722 rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1723 rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1724 }
1725 } else {
1726 /* lerp == 0 */
1727
1728 if (t & 16) {
1729 cc++;
1730 t &= 15;
1731 }
1732 t = (cc[0] >> (t * 2)) & 3;
1733
1734 if (t == 3) {
1735 ZERO_4UBV(rgba);
1736 } else {
1737 unsigned long kk;
1738 cc = (unsigned long *)code;
1739 rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1740 t *= 15;
1741 cc = (unsigned long *)(code + 8 + t / 8);
1742 kk = cc[0] >> (t & 7);
1743 rgba[BCOMP] = UP5(kk);
1744 rgba[GCOMP] = UP5(kk >> 5);
1745 rgba[RCOMP] = UP5(kk >> 10);
1746 }
1747 }
1748 }
1749
1750
1751 void
1752 fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1753 int i, int j, unsigned char *rgba)
1754 {
1755 static void (*decode_1[]) (unsigned char *, int, unsigned char *) = {
1756 fxt1_decode_1HI, /* cc-high = "00?" */
1757 fxt1_decode_1HI, /* cc-high = "00?" */
1758 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1759 fxt1_decode_1ALPHA, /* alpha = "011" */
1760 fxt1_decode_1MIXED, /* mixed = "1??" */
1761 fxt1_decode_1MIXED, /* mixed = "1??" */
1762 fxt1_decode_1MIXED, /* mixed = "1??" */
1763 fxt1_decode_1MIXED /* mixed = "1??" */
1764 };
1765
1766 unsigned char *code = (unsigned char *)texture +
1767 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1768 int mode = CC_SEL((unsigned long *)code, 125);
1769 int t = i & 7;
1770
1771 if (t & 4) {
1772 t += 12;
1773 }
1774 t += (j & 3) * 4;
1775
1776 decode_1[mode](code, t, rgba);
1777 }