Merge branch '965-glsl'
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texformat.h"
41 #include "texstore.h"
42
43
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46 const void *source, GLint srcRowStride,
47 void *dest, GLint destRowStride);
48
49 void
50 fxt1_decode_1 (const void *texture, GLint stride,
51 GLint i, GLint j, GLchan *rgba);
52
53
54 /**
55 * Called during context initialization.
56 */
57 void
58 _mesa_init_texture_fxt1( GLcontext *ctx )
59 {
60 (void) ctx;
61 }
62
63
64 /**
65 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
66 */
67 static GLboolean
68 texstore_rgb_fxt1(TEXSTORE_PARAMS)
69 {
70 const GLchan *pixels;
71 GLint srcRowStride;
72 GLubyte *dst;
73 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
74 const GLchan *tempImage = NULL;
75
76 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
77 ASSERT(dstXoffset % 8 == 0);
78 ASSERT(dstYoffset % 4 == 0);
79 ASSERT(dstZoffset == 0);
80 (void) dstZoffset;
81 (void) dstImageOffsets;
82
83 if (srcFormat != GL_RGB ||
84 srcType != CHAN_TYPE ||
85 ctx->_ImageTransferState ||
86 srcPacking->SwapBytes) {
87 /* convert image to RGB/GLchan */
88 tempImage = _mesa_make_temp_chan_image(ctx, dims,
89 baseInternalFormat,
90 dstFormat->BaseFormat,
91 srcWidth, srcHeight, srcDepth,
92 srcFormat, srcType, srcAddr,
93 srcPacking);
94 if (!tempImage)
95 return GL_FALSE; /* out of memory */
96 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
97 pixels = tempImage;
98 srcRowStride = 3 * srcWidth;
99 srcFormat = GL_RGB;
100 }
101 else {
102 pixels = (const GLchan *) srcAddr;
103 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
104 srcType) / sizeof(GLchan);
105 }
106
107 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
108 dstFormat->MesaFormat,
109 texWidth, (GLubyte *) dstAddr);
110
111 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
112 dst, dstRowStride);
113
114 if (tempImage)
115 _mesa_free((void*) tempImage);
116
117 return GL_TRUE;
118 }
119
120
121 /**
122 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
123 */
124 static GLboolean
125 texstore_rgba_fxt1(TEXSTORE_PARAMS)
126 {
127 const GLchan *pixels;
128 GLint srcRowStride;
129 GLubyte *dst;
130 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
131 const GLchan *tempImage = NULL;
132
133 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
134 ASSERT(dstXoffset % 8 == 0);
135 ASSERT(dstYoffset % 4 == 0);
136 ASSERT(dstZoffset == 0);
137 (void) dstZoffset;
138 (void) dstImageOffsets;
139
140 if (srcFormat != GL_RGBA ||
141 srcType != CHAN_TYPE ||
142 ctx->_ImageTransferState ||
143 srcPacking->SwapBytes) {
144 /* convert image to RGBA/GLchan */
145 tempImage = _mesa_make_temp_chan_image(ctx, dims,
146 baseInternalFormat,
147 dstFormat->BaseFormat,
148 srcWidth, srcHeight, srcDepth,
149 srcFormat, srcType, srcAddr,
150 srcPacking);
151 if (!tempImage)
152 return GL_FALSE; /* out of memory */
153 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
154 pixels = tempImage;
155 srcRowStride = 4 * srcWidth;
156 srcFormat = GL_RGBA;
157 }
158 else {
159 pixels = (const GLchan *) srcAddr;
160 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
161 srcType) / sizeof(GLchan);
162 }
163
164 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
165 dstFormat->MesaFormat,
166 texWidth, (GLubyte *) dstAddr);
167
168 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
169 dst, dstRowStride);
170
171 if (tempImage)
172 _mesa_free((void*) tempImage);
173
174 return GL_TRUE;
175 }
176
177
178 static void
179 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
180 GLint i, GLint j, GLint k, GLchan *texel )
181 {
182 (void) k;
183 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
184 }
185
186
187 static void
188 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
189 GLint i, GLint j, GLint k, GLfloat *texel )
190 {
191 /* just sample as GLchan and convert to float here */
192 GLchan rgba[4];
193 (void) k;
194 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
195 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
196 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
197 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
198 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
199 }
200
201
202 static void
203 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
204 GLint i, GLint j, GLint k, GLchan *texel )
205 {
206 (void) k;
207 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
208 texel[ACOMP] = 255;
209 }
210
211
212 static void
213 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
214 GLint i, GLint j, GLint k, GLfloat *texel )
215 {
216 /* just sample as GLchan and convert to float here */
217 GLchan rgba[4];
218 (void) k;
219 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
220 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
221 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
222 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
223 texel[ACOMP] = 1.0F;
224 }
225
226
227
228 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
229 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
230 GL_RGB, /* BaseFormat */
231 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
232 4, /*approx*/ /* RedBits */
233 4, /*approx*/ /* GreenBits */
234 4, /*approx*/ /* BlueBits */
235 0, /* AlphaBits */
236 0, /* LuminanceBits */
237 0, /* IntensityBits */
238 0, /* IndexBits */
239 0, /* DepthBits */
240 0, /* StencilBits */
241 0, /* TexelBytes */
242 texstore_rgb_fxt1, /* StoreTexImageFunc */
243 NULL, /*impossible*/ /* FetchTexel1D */
244 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
245 NULL, /*impossible*/ /* FetchTexel3D */
246 NULL, /*impossible*/ /* FetchTexel1Df */
247 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
248 NULL, /*impossible*/ /* FetchTexel3Df */
249 NULL /* StoreTexel */
250 };
251
252 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
253 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
254 GL_RGBA, /* BaseFormat */
255 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
256 4, /*approx*/ /* RedBits */
257 4, /*approx*/ /* GreenBits */
258 4, /*approx*/ /* BlueBits */
259 1, /*approx*/ /* AlphaBits */
260 0, /* LuminanceBits */
261 0, /* IntensityBits */
262 0, /* IndexBits */
263 0, /* DepthBits */
264 0, /* StencilBits */
265 0, /* TexelBytes */
266 texstore_rgba_fxt1, /* StoreTexImageFunc */
267 NULL, /*impossible*/ /* FetchTexel1D */
268 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
269 NULL, /*impossible*/ /* FetchTexel3D */
270 NULL, /*impossible*/ /* FetchTexel1Df */
271 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
272 NULL, /*impossible*/ /* FetchTexel3Df */
273 NULL /* StoreTexel */
274 };
275
276
277 /***************************************************************************\
278 * FXT1 encoder
279 *
280 * The encoder was built by reversing the decoder,
281 * and is vaguely based on Texus2 by 3dfx. Note that this code
282 * is merely a proof of concept, since it is highly UNoptimized;
283 * moreover, it is sub-optimal due to initial conditions passed
284 * to Lloyd's algorithm (the interpolation modes are even worse).
285 \***************************************************************************/
286
287
288 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
289 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
290 #define N_TEXELS 32 /* number of texels in a block (always 32) */
291 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
292 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
293 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
294 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
295 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
296
297
298 /*
299 * Define a 64-bit unsigned integer type and macros
300 */
301 #if defined(__GNUC__) && !defined(__cplusplus)
302
303 #define FX64_NATIVE 1
304
305 #ifdef __MINGW32__
306 typedef unsigned long Fx64;
307 #else
308 typedef unsigned long long Fx64;
309 #endif
310
311
312 #define FX64_MOV32(a, b) a = b
313 #define FX64_OR32(a, b) a |= b
314 #define FX64_SHL(a, c) a <<= c
315
316 #else /* !__GNUC__ */
317
318 #define FX64_NATIVE 0
319
320 typedef struct {
321 GLuint lo, hi;
322 } Fx64;
323
324 #define FX64_MOV32(a, b) a.lo = b
325 #define FX64_OR32(a, b) a.lo |= b
326
327 #define FX64_SHL(a, c) \
328 do { \
329 if ((c) >= 32) { \
330 a.hi = a.lo << ((c) - 32); \
331 a.lo = 0; \
332 } else { \
333 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
334 a.lo <<= (c); \
335 } \
336 } while (0)
337
338 #endif /* !__GNUC__ */
339
340
341 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
342 #define SAFECDOT 1 /* for paranoids */
343
344 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
345 do { \
346 /* compute interpolation vector */ \
347 GLfloat d2 = 0.0F; \
348 GLfloat rd2; \
349 \
350 for (i = 0; i < NC; i++) { \
351 IV[i] = (V1[i] - V0[i]) * F(i); \
352 d2 += IV[i] * IV[i]; \
353 } \
354 rd2 = (GLfloat)NV / d2; \
355 B = 0; \
356 for (i = 0; i < NC; i++) { \
357 IV[i] *= F(i); \
358 B -= IV[i] * V0[i]; \
359 IV[i] *= rd2; \
360 } \
361 B = B * rd2 + 0.5f; \
362 } while (0)
363
364 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
365 do { \
366 GLfloat dot = 0.0F; \
367 for (i = 0; i < NC; i++) { \
368 dot += V[i] * IV[i]; \
369 } \
370 TEXEL = (GLint)(dot + B); \
371 if (SAFECDOT) { \
372 if (TEXEL < 0) { \
373 TEXEL = 0; \
374 } else if (TEXEL > NV) { \
375 TEXEL = NV; \
376 } \
377 } \
378 } while (0)
379
380
381 static GLint
382 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
383 GLubyte input[MAX_COMP], GLint nc)
384 {
385 GLint i, j, best = -1;
386 GLfloat err = 1e9; /* big enough */
387
388 for (j = 0; j < nv; j++) {
389 GLfloat e = 0.0F;
390 for (i = 0; i < nc; i++) {
391 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
392 }
393 if (e < err) {
394 err = e;
395 best = j;
396 }
397 }
398
399 return best;
400 }
401
402
403 static GLint
404 fxt1_worst (GLfloat vec[MAX_COMP],
405 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
406 {
407 GLint i, k, worst = -1;
408 GLfloat err = -1.0F; /* small enough */
409
410 for (k = 0; k < n; k++) {
411 GLfloat e = 0.0F;
412 for (i = 0; i < nc; i++) {
413 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
414 }
415 if (e > err) {
416 err = e;
417 worst = k;
418 }
419 }
420
421 return worst;
422 }
423
424
425 static GLint
426 fxt1_variance (GLdouble variance[MAX_COMP],
427 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
428 {
429 GLint i, k, best = 0;
430 GLint sx, sx2;
431 GLdouble var, maxvar = -1; /* small enough */
432 GLdouble teenth = 1.0 / n;
433
434 for (i = 0; i < nc; i++) {
435 sx = sx2 = 0;
436 for (k = 0; k < n; k++) {
437 GLint t = input[k][i];
438 sx += t;
439 sx2 += t * t;
440 }
441 var = sx2 * teenth - sx * sx * teenth * teenth;
442 if (maxvar < var) {
443 maxvar = var;
444 best = i;
445 }
446 if (variance) {
447 variance[i] = var;
448 }
449 }
450
451 return best;
452 }
453
454
455 static GLint
456 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
457 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
458 {
459 #if 0
460 /* Choose colors from a grid.
461 */
462 GLint i, j;
463
464 for (j = 0; j < nv; j++) {
465 GLint m = j * (n - 1) / (nv - 1);
466 for (i = 0; i < nc; i++) {
467 vec[j][i] = input[m][i];
468 }
469 }
470 #else
471 /* Our solution here is to find the darkest and brightest colors in
472 * the 8x4 tile and use those as the two representative colors.
473 * There are probably better algorithms to use (histogram-based).
474 */
475 GLint i, j, k;
476 GLint minSum = 2000; /* big enough */
477 GLint maxSum = -1; /* small enough */
478 GLint minCol = 0; /* phoudoin: silent compiler! */
479 GLint maxCol = 0; /* phoudoin: silent compiler! */
480
481 struct {
482 GLint flag;
483 GLint key;
484 GLint freq;
485 GLint idx;
486 } hist[N_TEXELS];
487 GLint lenh = 0;
488
489 _mesa_memset(hist, 0, sizeof(hist));
490
491 for (k = 0; k < n; k++) {
492 GLint l;
493 GLint key = 0;
494 GLint sum = 0;
495 for (i = 0; i < nc; i++) {
496 key <<= 8;
497 key |= input[k][i];
498 sum += input[k][i];
499 }
500 for (l = 0; l < n; l++) {
501 if (!hist[l].flag) {
502 /* alloc new slot */
503 hist[l].flag = !0;
504 hist[l].key = key;
505 hist[l].freq = 1;
506 hist[l].idx = k;
507 lenh = l + 1;
508 break;
509 } else if (hist[l].key == key) {
510 hist[l].freq++;
511 break;
512 }
513 }
514 if (minSum > sum) {
515 minSum = sum;
516 minCol = k;
517 }
518 if (maxSum < sum) {
519 maxSum = sum;
520 maxCol = k;
521 }
522 }
523
524 if (lenh <= nv) {
525 for (j = 0; j < lenh; j++) {
526 for (i = 0; i < nc; i++) {
527 vec[j][i] = (GLfloat)input[hist[j].idx][i];
528 }
529 }
530 for (; j < nv; j++) {
531 for (i = 0; i < nc; i++) {
532 vec[j][i] = vec[0][i];
533 }
534 }
535 return 0;
536 }
537
538 for (j = 0; j < nv; j++) {
539 for (i = 0; i < nc; i++) {
540 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
541 }
542 }
543 #endif
544
545 return !0;
546 }
547
548
549 static GLint
550 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
551 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
552 {
553 /* Use the generalized lloyd's algorithm for VQ:
554 * find 4 color vectors.
555 *
556 * for each sample color
557 * sort to nearest vector.
558 *
559 * replace each vector with the centroid of it's matching colors.
560 *
561 * repeat until RMS doesn't improve.
562 *
563 * if a color vector has no samples, or becomes the same as another
564 * vector, replace it with the color which is farthest from a sample.
565 *
566 * vec[][MAX_COMP] initial vectors and resulting colors
567 * nv number of resulting colors required
568 * input[N_TEXELS][MAX_COMP] input texels
569 * nc number of components in input / vec
570 * n number of input samples
571 */
572
573 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
574 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
575 GLfloat error, lasterror = 1e9;
576
577 GLint i, j, k, rep;
578
579 /* the quantizer */
580 for (rep = 0; rep < LL_N_REP; rep++) {
581 /* reset sums & counters */
582 for (j = 0; j < nv; j++) {
583 for (i = 0; i < nc; i++) {
584 sum[j][i] = 0;
585 }
586 cnt[j] = 0;
587 }
588 error = 0;
589
590 /* scan whole block */
591 for (k = 0; k < n; k++) {
592 #if 1
593 GLint best = -1;
594 GLfloat err = 1e9; /* big enough */
595 /* determine best vector */
596 for (j = 0; j < nv; j++) {
597 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
598 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
599 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
600 if (nc == 4) {
601 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
602 }
603 if (e < err) {
604 err = e;
605 best = j;
606 }
607 }
608 #else
609 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
610 #endif
611 /* add in closest color */
612 for (i = 0; i < nc; i++) {
613 sum[best][i] += input[k][i];
614 }
615 /* mark this vector as used */
616 cnt[best]++;
617 /* accumulate error */
618 error += err;
619 }
620
621 /* check RMS */
622 if ((error < LL_RMS_E) ||
623 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
624 return !0; /* good match */
625 }
626 lasterror = error;
627
628 /* move each vector to the barycenter of its closest colors */
629 for (j = 0; j < nv; j++) {
630 if (cnt[j]) {
631 GLfloat div = 1.0F / cnt[j];
632 for (i = 0; i < nc; i++) {
633 vec[j][i] = div * sum[j][i];
634 }
635 } else {
636 /* this vec has no samples or is identical with a previous vec */
637 GLint worst = fxt1_worst(vec[j], input, nc, n);
638 for (i = 0; i < nc; i++) {
639 vec[j][i] = input[worst][i];
640 }
641 }
642 }
643 }
644
645 return 0; /* could not converge fast enough */
646 }
647
648
649 static void
650 fxt1_quantize_CHROMA (GLuint *cc,
651 GLubyte input[N_TEXELS][MAX_COMP])
652 {
653 const GLint n_vect = 4; /* 4 base vectors to find */
654 const GLint n_comp = 3; /* 3 components: R, G, B */
655 GLfloat vec[MAX_VECT][MAX_COMP];
656 GLint i, j, k;
657 Fx64 hi; /* high quadword */
658 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
659
660 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
661 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
662 }
663
664 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
665 for (j = n_vect - 1; j >= 0; j--) {
666 for (i = 0; i < n_comp; i++) {
667 /* add in colors */
668 FX64_SHL(hi, 5);
669 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
670 }
671 }
672 ((Fx64 *)cc)[1] = hi;
673
674 lohi = lolo = 0;
675 /* right microtile */
676 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
677 lohi <<= 2;
678 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
679 }
680 /* left microtile */
681 for (; k >= 0; k--) {
682 lolo <<= 2;
683 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
684 }
685 cc[1] = lohi;
686 cc[0] = lolo;
687 }
688
689
690 static void
691 fxt1_quantize_ALPHA0 (GLuint *cc,
692 GLubyte input[N_TEXELS][MAX_COMP],
693 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
694 {
695 const GLint n_vect = 3; /* 3 base vectors to find */
696 const GLint n_comp = 4; /* 4 components: R, G, B, A */
697 GLfloat vec[MAX_VECT][MAX_COMP];
698 GLint i, j, k;
699 Fx64 hi; /* high quadword */
700 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
701
702 /* the last vector indicates zero */
703 for (i = 0; i < n_comp; i++) {
704 vec[n_vect][i] = 0;
705 }
706
707 /* the first n texels in reord are guaranteed to be non-zero */
708 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
709 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
710 }
711
712 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
713 for (j = n_vect - 1; j >= 0; j--) {
714 /* add in alphas */
715 FX64_SHL(hi, 5);
716 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
717 }
718 for (j = n_vect - 1; j >= 0; j--) {
719 for (i = 0; i < n_comp - 1; i++) {
720 /* add in colors */
721 FX64_SHL(hi, 5);
722 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
723 }
724 }
725 ((Fx64 *)cc)[1] = hi;
726
727 lohi = lolo = 0;
728 /* right microtile */
729 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
730 lohi <<= 2;
731 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
732 }
733 /* left microtile */
734 for (; k >= 0; k--) {
735 lolo <<= 2;
736 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
737 }
738 cc[1] = lohi;
739 cc[0] = lolo;
740 }
741
742
743 static void
744 fxt1_quantize_ALPHA1 (GLuint *cc,
745 GLubyte input[N_TEXELS][MAX_COMP])
746 {
747 const GLint n_vect = 3; /* highest vector number in each microtile */
748 const GLint n_comp = 4; /* 4 components: R, G, B, A */
749 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
750 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
751 GLint i, j, k;
752 Fx64 hi; /* high quadword */
753 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
754
755 GLint minSum;
756 GLint maxSum;
757 GLint minColL = 0, maxColL = 0;
758 GLint minColR = 0, maxColR = 0;
759 GLint sumL = 0, sumR = 0;
760 GLint nn_comp;
761 /* Our solution here is to find the darkest and brightest colors in
762 * the 4x4 tile and use those as the two representative colors.
763 * There are probably better algorithms to use (histogram-based).
764 */
765 nn_comp = n_comp;
766 while ((minColL == maxColL) && nn_comp) {
767 minSum = 2000; /* big enough */
768 maxSum = -1; /* small enough */
769 for (k = 0; k < N_TEXELS / 2; k++) {
770 GLint sum = 0;
771 for (i = 0; i < nn_comp; i++) {
772 sum += input[k][i];
773 }
774 if (minSum > sum) {
775 minSum = sum;
776 minColL = k;
777 }
778 if (maxSum < sum) {
779 maxSum = sum;
780 maxColL = k;
781 }
782 sumL += sum;
783 }
784
785 nn_comp--;
786 }
787
788 nn_comp = n_comp;
789 while ((minColR == maxColR) && nn_comp) {
790 minSum = 2000; /* big enough */
791 maxSum = -1; /* small enough */
792 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
793 GLint sum = 0;
794 for (i = 0; i < nn_comp; i++) {
795 sum += input[k][i];
796 }
797 if (minSum > sum) {
798 minSum = sum;
799 minColR = k;
800 }
801 if (maxSum < sum) {
802 maxSum = sum;
803 maxColR = k;
804 }
805 sumR += sum;
806 }
807
808 nn_comp--;
809 }
810
811 /* choose the common vector (yuck!) */
812 {
813 GLint j1, j2;
814 GLint v1 = 0, v2 = 0;
815 GLfloat err = 1e9; /* big enough */
816 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
817 for (i = 0; i < n_comp; i++) {
818 tv[0][i] = input[minColL][i];
819 tv[1][i] = input[maxColL][i];
820 tv[2][i] = input[minColR][i];
821 tv[3][i] = input[maxColR][i];
822 }
823 for (j1 = 0; j1 < 2; j1++) {
824 for (j2 = 2; j2 < 4; j2++) {
825 GLfloat e = 0.0F;
826 for (i = 0; i < n_comp; i++) {
827 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
828 }
829 if (e < err) {
830 err = e;
831 v1 = j1;
832 v2 = j2;
833 }
834 }
835 }
836 for (i = 0; i < n_comp; i++) {
837 vec[0][i] = tv[1 - v1][i];
838 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
839 vec[2][i] = tv[5 - v2][i];
840 }
841 }
842
843 /* left microtile */
844 cc[0] = 0;
845 if (minColL != maxColL) {
846 /* compute interpolation vector */
847 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
848
849 /* add in texels */
850 lolo = 0;
851 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
852 GLint texel;
853 /* interpolate color */
854 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
855 /* add in texel */
856 lolo <<= 2;
857 lolo |= texel;
858 }
859
860 cc[0] = lolo;
861 }
862
863 /* right microtile */
864 cc[1] = 0;
865 if (minColR != maxColR) {
866 /* compute interpolation vector */
867 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
868
869 /* add in texels */
870 lohi = 0;
871 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
872 GLint texel;
873 /* interpolate color */
874 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
875 /* add in texel */
876 lohi <<= 2;
877 lohi |= texel;
878 }
879
880 cc[1] = lohi;
881 }
882
883 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
884 for (j = n_vect - 1; j >= 0; j--) {
885 /* add in alphas */
886 FX64_SHL(hi, 5);
887 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
888 }
889 for (j = n_vect - 1; j >= 0; j--) {
890 for (i = 0; i < n_comp - 1; i++) {
891 /* add in colors */
892 FX64_SHL(hi, 5);
893 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
894 }
895 }
896 ((Fx64 *)cc)[1] = hi;
897 }
898
899
900 static void
901 fxt1_quantize_HI (GLuint *cc,
902 GLubyte input[N_TEXELS][MAX_COMP],
903 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
904 {
905 const GLint n_vect = 6; /* highest vector number */
906 const GLint n_comp = 3; /* 3 components: R, G, B */
907 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
908 GLfloat iv[MAX_COMP]; /* interpolation vector */
909 GLint i, k;
910 GLuint hihi; /* high quadword: hi dword */
911
912 GLint minSum = 2000; /* big enough */
913 GLint maxSum = -1; /* small enough */
914 GLint minCol = 0; /* phoudoin: silent compiler! */
915 GLint maxCol = 0; /* phoudoin: silent compiler! */
916
917 /* Our solution here is to find the darkest and brightest colors in
918 * the 8x4 tile and use those as the two representative colors.
919 * There are probably better algorithms to use (histogram-based).
920 */
921 for (k = 0; k < n; k++) {
922 GLint sum = 0;
923 for (i = 0; i < n_comp; i++) {
924 sum += reord[k][i];
925 }
926 if (minSum > sum) {
927 minSum = sum;
928 minCol = k;
929 }
930 if (maxSum < sum) {
931 maxSum = sum;
932 maxCol = k;
933 }
934 }
935
936 hihi = 0; /* cc-hi = "00" */
937 for (i = 0; i < n_comp; i++) {
938 /* add in colors */
939 hihi <<= 5;
940 hihi |= reord[maxCol][i] >> 3;
941 }
942 for (i = 0; i < n_comp; i++) {
943 /* add in colors */
944 hihi <<= 5;
945 hihi |= reord[minCol][i] >> 3;
946 }
947 cc[3] = hihi;
948 cc[0] = cc[1] = cc[2] = 0;
949
950 /* compute interpolation vector */
951 if (minCol != maxCol) {
952 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
953 }
954
955 /* add in texels */
956 for (k = N_TEXELS - 1; k >= 0; k--) {
957 GLint t = k * 3;
958 GLuint *kk = (GLuint *)((char *)cc + t / 8);
959 GLint texel = n_vect + 1; /* transparent black */
960
961 if (!ISTBLACK(input[k])) {
962 if (minCol != maxCol) {
963 /* interpolate color */
964 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
965 /* add in texel */
966 kk[0] |= texel << (t & 7);
967 }
968 } else {
969 /* add in texel */
970 kk[0] |= texel << (t & 7);
971 }
972 }
973 }
974
975
976 static void
977 fxt1_quantize_MIXED1 (GLuint *cc,
978 GLubyte input[N_TEXELS][MAX_COMP])
979 {
980 const GLint n_vect = 2; /* highest vector number in each microtile */
981 const GLint n_comp = 3; /* 3 components: R, G, B */
982 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
983 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
984 GLint i, j, k;
985 Fx64 hi; /* high quadword */
986 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
987
988 GLint minSum;
989 GLint maxSum;
990 GLint minColL = 0, maxColL = -1;
991 GLint minColR = 0, maxColR = -1;
992
993 /* Our solution here is to find the darkest and brightest colors in
994 * the 4x4 tile and use those as the two representative colors.
995 * There are probably better algorithms to use (histogram-based).
996 */
997 minSum = 2000; /* big enough */
998 maxSum = -1; /* small enough */
999 for (k = 0; k < N_TEXELS / 2; k++) {
1000 if (!ISTBLACK(input[k])) {
1001 GLint sum = 0;
1002 for (i = 0; i < n_comp; i++) {
1003 sum += input[k][i];
1004 }
1005 if (minSum > sum) {
1006 minSum = sum;
1007 minColL = k;
1008 }
1009 if (maxSum < sum) {
1010 maxSum = sum;
1011 maxColL = k;
1012 }
1013 }
1014 }
1015 minSum = 2000; /* big enough */
1016 maxSum = -1; /* small enough */
1017 for (; k < N_TEXELS; k++) {
1018 if (!ISTBLACK(input[k])) {
1019 GLint sum = 0;
1020 for (i = 0; i < n_comp; i++) {
1021 sum += input[k][i];
1022 }
1023 if (minSum > sum) {
1024 minSum = sum;
1025 minColR = k;
1026 }
1027 if (maxSum < sum) {
1028 maxSum = sum;
1029 maxColR = k;
1030 }
1031 }
1032 }
1033
1034 /* left microtile */
1035 if (maxColL == -1) {
1036 /* all transparent black */
1037 cc[0] = ~0u;
1038 for (i = 0; i < n_comp; i++) {
1039 vec[0][i] = 0;
1040 vec[1][i] = 0;
1041 }
1042 } else {
1043 cc[0] = 0;
1044 for (i = 0; i < n_comp; i++) {
1045 vec[0][i] = input[minColL][i];
1046 vec[1][i] = input[maxColL][i];
1047 }
1048 if (minColL != maxColL) {
1049 /* compute interpolation vector */
1050 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1051
1052 /* add in texels */
1053 lolo = 0;
1054 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1055 GLint texel = n_vect + 1; /* transparent black */
1056 if (!ISTBLACK(input[k])) {
1057 /* interpolate color */
1058 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1059 }
1060 /* add in texel */
1061 lolo <<= 2;
1062 lolo |= texel;
1063 }
1064 cc[0] = lolo;
1065 }
1066 }
1067
1068 /* right microtile */
1069 if (maxColR == -1) {
1070 /* all transparent black */
1071 cc[1] = ~0u;
1072 for (i = 0; i < n_comp; i++) {
1073 vec[2][i] = 0;
1074 vec[3][i] = 0;
1075 }
1076 } else {
1077 cc[1] = 0;
1078 for (i = 0; i < n_comp; i++) {
1079 vec[2][i] = input[minColR][i];
1080 vec[3][i] = input[maxColR][i];
1081 }
1082 if (minColR != maxColR) {
1083 /* compute interpolation vector */
1084 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1085
1086 /* add in texels */
1087 lohi = 0;
1088 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1089 GLint texel = n_vect + 1; /* transparent black */
1090 if (!ISTBLACK(input[k])) {
1091 /* interpolate color */
1092 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1093 }
1094 /* add in texel */
1095 lohi <<= 2;
1096 lohi |= texel;
1097 }
1098 cc[1] = lohi;
1099 }
1100 }
1101
1102 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1103 for (j = 2 * 2 - 1; j >= 0; j--) {
1104 for (i = 0; i < n_comp; i++) {
1105 /* add in colors */
1106 FX64_SHL(hi, 5);
1107 FX64_OR32(hi, vec[j][i] >> 3);
1108 }
1109 }
1110 ((Fx64 *)cc)[1] = hi;
1111 }
1112
1113
1114 static void
1115 fxt1_quantize_MIXED0 (GLuint *cc,
1116 GLubyte input[N_TEXELS][MAX_COMP])
1117 {
1118 const GLint n_vect = 3; /* highest vector number in each microtile */
1119 const GLint n_comp = 3; /* 3 components: R, G, B */
1120 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1121 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1122 GLint i, j, k;
1123 Fx64 hi; /* high quadword */
1124 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1125
1126 GLint minColL = 0, maxColL = 0;
1127 GLint minColR = 0, maxColR = 0;
1128 #if 0
1129 GLint minSum;
1130 GLint maxSum;
1131
1132 /* Our solution here is to find the darkest and brightest colors in
1133 * the 4x4 tile and use those as the two representative colors.
1134 * There are probably better algorithms to use (histogram-based).
1135 */
1136 minSum = 2000; /* big enough */
1137 maxSum = -1; /* small enough */
1138 for (k = 0; k < N_TEXELS / 2; k++) {
1139 GLint sum = 0;
1140 for (i = 0; i < n_comp; i++) {
1141 sum += input[k][i];
1142 }
1143 if (minSum > sum) {
1144 minSum = sum;
1145 minColL = k;
1146 }
1147 if (maxSum < sum) {
1148 maxSum = sum;
1149 maxColL = k;
1150 }
1151 }
1152 minSum = 2000; /* big enough */
1153 maxSum = -1; /* small enough */
1154 for (; k < N_TEXELS; k++) {
1155 GLint sum = 0;
1156 for (i = 0; i < n_comp; i++) {
1157 sum += input[k][i];
1158 }
1159 if (minSum > sum) {
1160 minSum = sum;
1161 minColR = k;
1162 }
1163 if (maxSum < sum) {
1164 maxSum = sum;
1165 maxColR = k;
1166 }
1167 }
1168 #else
1169 GLint minVal;
1170 GLint maxVal;
1171 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1172 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1173
1174 /* Scan the channel with max variance for lo & hi
1175 * and use those as the two representative colors.
1176 */
1177 minVal = 2000; /* big enough */
1178 maxVal = -1; /* small enough */
1179 for (k = 0; k < N_TEXELS / 2; k++) {
1180 GLint t = input[k][maxVarL];
1181 if (minVal > t) {
1182 minVal = t;
1183 minColL = k;
1184 }
1185 if (maxVal < t) {
1186 maxVal = t;
1187 maxColL = k;
1188 }
1189 }
1190 minVal = 2000; /* big enough */
1191 maxVal = -1; /* small enough */
1192 for (; k < N_TEXELS; k++) {
1193 GLint t = input[k][maxVarR];
1194 if (minVal > t) {
1195 minVal = t;
1196 minColR = k;
1197 }
1198 if (maxVal < t) {
1199 maxVal = t;
1200 maxColR = k;
1201 }
1202 }
1203 #endif
1204
1205 /* left microtile */
1206 cc[0] = 0;
1207 for (i = 0; i < n_comp; i++) {
1208 vec[0][i] = input[minColL][i];
1209 vec[1][i] = input[maxColL][i];
1210 }
1211 if (minColL != maxColL) {
1212 /* compute interpolation vector */
1213 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1214
1215 /* add in texels */
1216 lolo = 0;
1217 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1218 GLint texel;
1219 /* interpolate color */
1220 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1221 /* add in texel */
1222 lolo <<= 2;
1223 lolo |= texel;
1224 }
1225
1226 /* funky encoding for LSB of green */
1227 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1228 for (i = 0; i < n_comp; i++) {
1229 vec[1][i] = input[minColL][i];
1230 vec[0][i] = input[maxColL][i];
1231 }
1232 lolo = ~lolo;
1233 }
1234
1235 cc[0] = lolo;
1236 }
1237
1238 /* right microtile */
1239 cc[1] = 0;
1240 for (i = 0; i < n_comp; i++) {
1241 vec[2][i] = input[minColR][i];
1242 vec[3][i] = input[maxColR][i];
1243 }
1244 if (minColR != maxColR) {
1245 /* compute interpolation vector */
1246 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1247
1248 /* add in texels */
1249 lohi = 0;
1250 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1251 GLint texel;
1252 /* interpolate color */
1253 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1254 /* add in texel */
1255 lohi <<= 2;
1256 lohi |= texel;
1257 }
1258
1259 /* funky encoding for LSB of green */
1260 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1261 for (i = 0; i < n_comp; i++) {
1262 vec[3][i] = input[minColR][i];
1263 vec[2][i] = input[maxColR][i];
1264 }
1265 lohi = ~lohi;
1266 }
1267
1268 cc[1] = lohi;
1269 }
1270
1271 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1272 for (j = 2 * 2 - 1; j >= 0; j--) {
1273 for (i = 0; i < n_comp; i++) {
1274 /* add in colors */
1275 FX64_SHL(hi, 5);
1276 FX64_OR32(hi, vec[j][i] >> 3);
1277 }
1278 }
1279 ((Fx64 *)cc)[1] = hi;
1280 }
1281
1282
1283 static void
1284 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1285 {
1286 GLint trualpha;
1287 GLubyte reord[N_TEXELS][MAX_COMP];
1288
1289 GLubyte input[N_TEXELS][MAX_COMP];
1290 GLint i, k, l;
1291
1292 if (comps == 3) {
1293 /* make the whole block opaque */
1294 _mesa_memset(input, -1, sizeof(input));
1295 }
1296
1297 /* 8 texels each line */
1298 for (l = 0; l < 4; l++) {
1299 for (k = 0; k < 4; k++) {
1300 for (i = 0; i < comps; i++) {
1301 input[k + l * 4][i] = *lines[l]++;
1302 }
1303 }
1304 for (; k < 8; k++) {
1305 for (i = 0; i < comps; i++) {
1306 input[k + l * 4 + 12][i] = *lines[l]++;
1307 }
1308 }
1309 }
1310
1311 /* block layout:
1312 * 00, 01, 02, 03, 08, 09, 0a, 0b
1313 * 10, 11, 12, 13, 18, 19, 1a, 1b
1314 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1315 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1316 */
1317
1318 /* [dBorca]
1319 * stupidity flows forth from this
1320 */
1321 l = N_TEXELS;
1322 trualpha = 0;
1323 if (comps == 4) {
1324 /* skip all transparent black texels */
1325 l = 0;
1326 for (k = 0; k < N_TEXELS; k++) {
1327 /* test all components against 0 */
1328 if (!ISTBLACK(input[k])) {
1329 /* texel is not transparent black */
1330 COPY_4UBV(reord[l], input[k]);
1331 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1332 /* non-opaque texel */
1333 trualpha = !0;
1334 }
1335 l++;
1336 }
1337 }
1338 }
1339
1340 #if 0
1341 if (trualpha) {
1342 fxt1_quantize_ALPHA0(cc, input, reord, l);
1343 } else if (l == 0) {
1344 cc[0] = cc[1] = cc[2] = -1;
1345 cc[3] = 0;
1346 } else if (l < N_TEXELS) {
1347 fxt1_quantize_HI(cc, input, reord, l);
1348 } else {
1349 fxt1_quantize_CHROMA(cc, input);
1350 }
1351 (void)fxt1_quantize_ALPHA1;
1352 (void)fxt1_quantize_MIXED1;
1353 (void)fxt1_quantize_MIXED0;
1354 #else
1355 if (trualpha) {
1356 fxt1_quantize_ALPHA1(cc, input);
1357 } else if (l == 0) {
1358 cc[0] = cc[1] = cc[2] = ~0u;
1359 cc[3] = 0;
1360 } else if (l < N_TEXELS) {
1361 fxt1_quantize_MIXED1(cc, input);
1362 } else {
1363 fxt1_quantize_MIXED0(cc, input);
1364 }
1365 (void)fxt1_quantize_ALPHA0;
1366 (void)fxt1_quantize_HI;
1367 (void)fxt1_quantize_CHROMA;
1368 #endif
1369 }
1370
1371
1372 static void
1373 fxt1_encode (GLuint width, GLuint height, GLint comps,
1374 const void *source, GLint srcRowStride,
1375 void *dest, GLint destRowStride)
1376 {
1377 GLuint x, y;
1378 const GLubyte *data;
1379 GLuint *encoded = (GLuint *)dest;
1380 void *newSource = NULL;
1381
1382 assert(comps == 3 || comps == 4);
1383
1384 /* Replicate image if width is not M8 or height is not M4 */
1385 if ((width & 7) | (height & 3)) {
1386 GLint newWidth = (width + 7) & ~7;
1387 GLint newHeight = (height + 3) & ~3;
1388 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1389 if (!newSource) {
1390 GET_CURRENT_CONTEXT(ctx);
1391 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1392 goto cleanUp;
1393 }
1394 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1395 comps, (const GLchan *) source,
1396 srcRowStride, (GLchan *) newSource);
1397 source = newSource;
1398 width = newWidth;
1399 height = newHeight;
1400 srcRowStride = comps * newWidth;
1401 }
1402
1403 /* convert from 16/32-bit channels to GLubyte if needed */
1404 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1405 const GLuint n = width * height * comps;
1406 const GLchan *src = (const GLchan *) source;
1407 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1408 GLuint i;
1409 if (!dest) {
1410 GET_CURRENT_CONTEXT(ctx);
1411 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1412 goto cleanUp;
1413 }
1414 for (i = 0; i < n; i++) {
1415 dest[i] = CHAN_TO_UBYTE(src[i]);
1416 }
1417 if (newSource != NULL) {
1418 _mesa_free(newSource);
1419 }
1420 newSource = dest; /* we'll free this buffer before returning */
1421 source = dest; /* the new, GLubyte incoming image */
1422 }
1423
1424 data = (const GLubyte *) source;
1425 destRowStride = (destRowStride - width * 2) / 4;
1426 for (y = 0; y < height; y += 4) {
1427 GLuint offs = 0 + (y + 0) * srcRowStride;
1428 for (x = 0; x < width; x += 8) {
1429 const GLubyte *lines[4];
1430 lines[0] = &data[offs];
1431 lines[1] = lines[0] + srcRowStride;
1432 lines[2] = lines[1] + srcRowStride;
1433 lines[3] = lines[2] + srcRowStride;
1434 offs += 8 * comps;
1435 fxt1_quantize(encoded, lines, comps);
1436 /* 128 bits per 8x4 block */
1437 encoded += 4;
1438 }
1439 encoded += destRowStride;
1440 }
1441
1442 cleanUp:
1443 if (newSource != NULL) {
1444 _mesa_free(newSource);
1445 }
1446 }
1447
1448
1449 /***************************************************************************\
1450 * FXT1 decoder
1451 *
1452 * The decoder is based on GL_3DFX_texture_compression_FXT1
1453 * specification and serves as a concept for the encoder.
1454 \***************************************************************************/
1455
1456
1457 /* lookup table for scaling 5 bit colors up to 8 bits */
1458 static const GLubyte _rgb_scale_5[] = {
1459 0, 8, 16, 25, 33, 41, 49, 58,
1460 66, 74, 82, 90, 99, 107, 115, 123,
1461 132, 140, 148, 156, 165, 173, 181, 189,
1462 197, 206, 214, 222, 230, 239, 247, 255
1463 };
1464
1465 /* lookup table for scaling 6 bit colors up to 8 bits */
1466 static const GLubyte _rgb_scale_6[] = {
1467 0, 4, 8, 12, 16, 20, 24, 28,
1468 32, 36, 40, 45, 49, 53, 57, 61,
1469 65, 69, 73, 77, 81, 85, 89, 93,
1470 97, 101, 105, 109, 113, 117, 121, 125,
1471 130, 134, 138, 142, 146, 150, 154, 158,
1472 162, 166, 170, 174, 178, 182, 186, 190,
1473 194, 198, 202, 206, 210, 215, 219, 223,
1474 227, 231, 235, 239, 243, 247, 251, 255
1475 };
1476
1477
1478 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1479 #define UP5(c) _rgb_scale_5[(c) & 31]
1480 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1481 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1482
1483
1484 static void
1485 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1486 {
1487 const GLuint *cc;
1488
1489 t *= 3;
1490 cc = (const GLuint *)(code + t / 8);
1491 t = (cc[0] >> (t & 7)) & 7;
1492
1493 if (t == 7) {
1494 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1495 } else {
1496 GLubyte r, g, b;
1497 cc = (const GLuint *)(code + 12);
1498 if (t == 0) {
1499 b = UP5(CC_SEL(cc, 0));
1500 g = UP5(CC_SEL(cc, 5));
1501 r = UP5(CC_SEL(cc, 10));
1502 } else if (t == 6) {
1503 b = UP5(CC_SEL(cc, 15));
1504 g = UP5(CC_SEL(cc, 20));
1505 r = UP5(CC_SEL(cc, 25));
1506 } else {
1507 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1508 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1509 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1510 }
1511 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1512 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1513 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1514 rgba[ACOMP] = CHAN_MAX;
1515 }
1516 }
1517
1518
1519 static void
1520 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1521 {
1522 const GLuint *cc;
1523 GLuint kk;
1524
1525 cc = (const GLuint *)code;
1526 if (t & 16) {
1527 cc++;
1528 t &= 15;
1529 }
1530 t = (cc[0] >> (t * 2)) & 3;
1531
1532 t *= 15;
1533 cc = (const GLuint *)(code + 8 + t / 8);
1534 kk = cc[0] >> (t & 7);
1535 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1536 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1537 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1538 rgba[ACOMP] = CHAN_MAX;
1539 }
1540
1541
1542 static void
1543 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1544 {
1545 const GLuint *cc;
1546 GLuint col[2][3];
1547 GLint glsb, selb;
1548
1549 cc = (const GLuint *)code;
1550 if (t & 16) {
1551 t &= 15;
1552 t = (cc[1] >> (t * 2)) & 3;
1553 /* col 2 */
1554 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1555 col[0][GCOMP] = CC_SEL(cc, 99);
1556 col[0][RCOMP] = CC_SEL(cc, 104);
1557 /* col 3 */
1558 col[1][BCOMP] = CC_SEL(cc, 109);
1559 col[1][GCOMP] = CC_SEL(cc, 114);
1560 col[1][RCOMP] = CC_SEL(cc, 119);
1561 glsb = CC_SEL(cc, 126);
1562 selb = CC_SEL(cc, 33);
1563 } else {
1564 t = (cc[0] >> (t * 2)) & 3;
1565 /* col 0 */
1566 col[0][BCOMP] = CC_SEL(cc, 64);
1567 col[0][GCOMP] = CC_SEL(cc, 69);
1568 col[0][RCOMP] = CC_SEL(cc, 74);
1569 /* col 1 */
1570 col[1][BCOMP] = CC_SEL(cc, 79);
1571 col[1][GCOMP] = CC_SEL(cc, 84);
1572 col[1][RCOMP] = CC_SEL(cc, 89);
1573 glsb = CC_SEL(cc, 125);
1574 selb = CC_SEL(cc, 1);
1575 }
1576
1577 if (CC_SEL(cc, 124) & 1) {
1578 /* alpha[0] == 1 */
1579
1580 if (t == 3) {
1581 /* zero */
1582 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1583 } else {
1584 GLubyte r, g, b;
1585 if (t == 0) {
1586 b = UP5(col[0][BCOMP]);
1587 g = UP5(col[0][GCOMP]);
1588 r = UP5(col[0][RCOMP]);
1589 } else if (t == 2) {
1590 b = UP5(col[1][BCOMP]);
1591 g = UP6(col[1][GCOMP], glsb);
1592 r = UP5(col[1][RCOMP]);
1593 } else {
1594 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1595 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1596 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1597 }
1598 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1599 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1600 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1601 rgba[ACOMP] = CHAN_MAX;
1602 }
1603 } else {
1604 /* alpha[0] == 0 */
1605 GLubyte r, g, b;
1606 if (t == 0) {
1607 b = UP5(col[0][BCOMP]);
1608 g = UP6(col[0][GCOMP], glsb ^ selb);
1609 r = UP5(col[0][RCOMP]);
1610 } else if (t == 3) {
1611 b = UP5(col[1][BCOMP]);
1612 g = UP6(col[1][GCOMP], glsb);
1613 r = UP5(col[1][RCOMP]);
1614 } else {
1615 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1616 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1617 UP6(col[1][GCOMP], glsb));
1618 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1619 }
1620 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1621 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1622 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1623 rgba[ACOMP] = CHAN_MAX;
1624 }
1625 }
1626
1627
1628 static void
1629 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1630 {
1631 const GLuint *cc;
1632 GLubyte r, g, b, a;
1633
1634 cc = (const GLuint *)code;
1635 if (CC_SEL(cc, 124) & 1) {
1636 /* lerp == 1 */
1637 GLuint col0[4];
1638
1639 if (t & 16) {
1640 t &= 15;
1641 t = (cc[1] >> (t * 2)) & 3;
1642 /* col 2 */
1643 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1644 col0[GCOMP] = CC_SEL(cc, 99);
1645 col0[RCOMP] = CC_SEL(cc, 104);
1646 col0[ACOMP] = CC_SEL(cc, 119);
1647 } else {
1648 t = (cc[0] >> (t * 2)) & 3;
1649 /* col 0 */
1650 col0[BCOMP] = CC_SEL(cc, 64);
1651 col0[GCOMP] = CC_SEL(cc, 69);
1652 col0[RCOMP] = CC_SEL(cc, 74);
1653 col0[ACOMP] = CC_SEL(cc, 109);
1654 }
1655
1656 if (t == 0) {
1657 b = UP5(col0[BCOMP]);
1658 g = UP5(col0[GCOMP]);
1659 r = UP5(col0[RCOMP]);
1660 a = UP5(col0[ACOMP]);
1661 } else if (t == 3) {
1662 b = UP5(CC_SEL(cc, 79));
1663 g = UP5(CC_SEL(cc, 84));
1664 r = UP5(CC_SEL(cc, 89));
1665 a = UP5(CC_SEL(cc, 114));
1666 } else {
1667 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1668 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1669 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1670 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1671 }
1672 } else {
1673 /* lerp == 0 */
1674
1675 if (t & 16) {
1676 cc++;
1677 t &= 15;
1678 }
1679 t = (cc[0] >> (t * 2)) & 3;
1680
1681 if (t == 3) {
1682 /* zero */
1683 r = g = b = a = 0;
1684 } else {
1685 GLuint kk;
1686 cc = (const GLuint *)code;
1687 a = UP5(cc[3] >> (t * 5 + 13));
1688 t *= 15;
1689 cc = (const GLuint *)(code + 8 + t / 8);
1690 kk = cc[0] >> (t & 7);
1691 b = UP5(kk);
1692 g = UP5(kk >> 5);
1693 r = UP5(kk >> 10);
1694 }
1695 }
1696 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1697 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1698 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1699 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1700 }
1701
1702
1703 void
1704 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1705 GLint i, GLint j, GLchan *rgba)
1706 {
1707 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1708 fxt1_decode_1HI, /* cc-high = "00?" */
1709 fxt1_decode_1HI, /* cc-high = "00?" */
1710 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1711 fxt1_decode_1ALPHA, /* alpha = "011" */
1712 fxt1_decode_1MIXED, /* mixed = "1??" */
1713 fxt1_decode_1MIXED, /* mixed = "1??" */
1714 fxt1_decode_1MIXED, /* mixed = "1??" */
1715 fxt1_decode_1MIXED /* mixed = "1??" */
1716 };
1717
1718 const GLubyte *code = (const GLubyte *)texture +
1719 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1720 GLint mode = CC_SEL(code, 125);
1721 GLint t = i & 7;
1722
1723 if (t & 4) {
1724 t += 12;
1725 }
1726 t += (j & 3) * 4;
1727
1728 decode_1[mode](code, t, rgba);
1729 }