remove rendundant r,g,b,a vars (bug 4331)
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "texcompress.h"
39 #include "texformat.h"
40 #include "texstore.h"
41
42
43 static void
44 fxt1_encode (GLuint width, GLuint height, GLint comps,
45 const void *source, GLint srcRowStride,
46 void *dest, GLint destRowStride);
47
48 static void
49 fxt1_decode_1 (const void *texture, GLint stride,
50 GLint i, GLint j, GLchan *rgba);
51
52
53 /**
54 * Called during context initialization.
55 */
56 void
57 _mesa_init_texture_fxt1( GLcontext *ctx )
58 {
59 (void) ctx;
60 }
61
62
63 /**
64 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
65 */
66 static GLboolean
67 texstore_rgb_fxt1(STORE_PARAMS)
68 {
69 const GLchan *pixels;
70 GLint srcRowStride;
71 GLubyte *dst;
72 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
73 const GLchan *tempImage = NULL;
74
75 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
76 ASSERT(dstXoffset % 8 == 0);
77 ASSERT(dstYoffset % 4 == 0);
78 ASSERT(dstZoffset == 0);
79 (void) dstZoffset; (void) dstImageStride;
80
81 if (srcFormat != GL_RGB ||
82 srcType != CHAN_TYPE ||
83 ctx->_ImageTransferState ||
84 srcPacking->SwapBytes) {
85 /* convert image to RGB/GLchan */
86 tempImage = _mesa_make_temp_chan_image(ctx, dims,
87 baseInternalFormat,
88 dstFormat->BaseFormat,
89 srcWidth, srcHeight, srcDepth,
90 srcFormat, srcType, srcAddr,
91 srcPacking);
92 if (!tempImage)
93 return GL_FALSE; /* out of memory */
94 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
95 pixels = tempImage;
96 srcRowStride = 3 * srcWidth;
97 srcFormat = GL_RGB;
98 }
99 else {
100 pixels = (const GLchan *) srcAddr;
101 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
102 srcType) / sizeof(GLchan);
103 }
104
105 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
106 GL_COMPRESSED_RGB_FXT1_3DFX,
107 texWidth, (GLubyte *) dstAddr);
108
109 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
110 dst, dstRowStride);
111
112 if (tempImage)
113 _mesa_free((void*) tempImage);
114
115 return GL_TRUE;
116 }
117
118
119 /**
120 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
121 */
122 static GLboolean
123 texstore_rgba_fxt1(STORE_PARAMS)
124 {
125 const GLchan *pixels;
126 GLint srcRowStride;
127 GLubyte *dst;
128 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
129 const GLchan *tempImage = NULL;
130
131 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
132 ASSERT(dstXoffset % 8 == 0);
133 ASSERT(dstYoffset % 4 == 0);
134 ASSERT(dstZoffset == 0);
135 (void) dstZoffset; (void) dstImageStride;
136
137 if (srcFormat != GL_RGBA ||
138 srcType != CHAN_TYPE ||
139 ctx->_ImageTransferState ||
140 srcPacking->SwapBytes) {
141 /* convert image to RGBA/GLchan */
142 tempImage = _mesa_make_temp_chan_image(ctx, dims,
143 baseInternalFormat,
144 dstFormat->BaseFormat,
145 srcWidth, srcHeight, srcDepth,
146 srcFormat, srcType, srcAddr,
147 srcPacking);
148 if (!tempImage)
149 return GL_FALSE; /* out of memory */
150 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
151 pixels = tempImage;
152 srcRowStride = 4 * srcWidth;
153 srcFormat = GL_RGBA;
154 }
155 else {
156 pixels = (const GLchan *) srcAddr;
157 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
158 srcType) / sizeof(GLchan);
159 }
160
161 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
162 GL_COMPRESSED_RGBA_FXT1_3DFX,
163 texWidth, (GLubyte *) dstAddr);
164
165 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
166 dst, dstRowStride);
167
168 if (tempImage)
169 _mesa_free((void*) tempImage);
170
171 return GL_TRUE;
172 }
173
174
175 static void
176 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
177 GLint i, GLint j, GLint k, GLchan *texel )
178 {
179 (void) k;
180 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
181 }
182
183
184 static void
185 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
186 GLint i, GLint j, GLint k, GLfloat *texel )
187 {
188 /* just sample as GLchan and convert to float here */
189 GLchan rgba[4];
190 (void) k;
191 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
192 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
193 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
194 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
195 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
196 }
197
198
199 static void
200 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
201 GLint i, GLint j, GLint k, GLchan *texel )
202 {
203 (void) k;
204 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
205 texel[ACOMP] = 255;
206 }
207
208
209 static void
210 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
211 GLint i, GLint j, GLint k, GLfloat *texel )
212 {
213 /* just sample as GLchan and convert to float here */
214 GLchan rgba[4];
215 (void) k;
216 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
217 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
218 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
219 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
220 texel[ACOMP] = 1.0F;
221 }
222
223
224
225 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
226 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
227 GL_RGB, /* BaseFormat */
228 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
229 4, /*approx*/ /* RedBits */
230 4, /*approx*/ /* GreenBits */
231 4, /*approx*/ /* BlueBits */
232 0, /* AlphaBits */
233 0, /* LuminanceBits */
234 0, /* IntensityBits */
235 0, /* IndexBits */
236 0, /* DepthBits */
237 0, /* TexelBytes */
238 texstore_rgb_fxt1, /* StoreTexImageFunc */
239 NULL, /*impossible*/ /* FetchTexel1D */
240 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
241 NULL, /*impossible*/ /* FetchTexel3D */
242 NULL, /*impossible*/ /* FetchTexel1Df */
243 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
244 NULL, /*impossible*/ /* FetchTexel3Df */
245 };
246
247 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
248 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
249 GL_RGBA, /* BaseFormat */
250 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
251 4, /*approx*/ /* RedBits */
252 4, /*approx*/ /* GreenBits */
253 4, /*approx*/ /* BlueBits */
254 1, /*approx*/ /* AlphaBits */
255 0, /* LuminanceBits */
256 0, /* IntensityBits */
257 0, /* IndexBits */
258 0, /* DepthBits */
259 0, /* TexelBytes */
260 texstore_rgba_fxt1, /* StoreTexImageFunc */
261 NULL, /*impossible*/ /* FetchTexel1D */
262 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
263 NULL, /*impossible*/ /* FetchTexel3D */
264 NULL, /*impossible*/ /* FetchTexel1Df */
265 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
266 NULL, /*impossible*/ /* FetchTexel3Df */
267 };
268
269
270 /***************************************************************************\
271 * FXT1 encoder
272 *
273 * The encoder was built by reversing the decoder,
274 * and is vaguely based on Texus2 by 3dfx. Note that this code
275 * is merely a proof of concept, since it is highly UNoptimized;
276 * moreover, it is sub-optimal due to initial conditions passed
277 * to Lloyd's algorithm (the interpolation modes are even worse).
278 \***************************************************************************/
279
280
281 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
282 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
283 #define N_TEXELS 32 /* number of texels in a block (always 32) */
284 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
285 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
286 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
287 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
288 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
289
290
291 /*
292 * Define a 64-bit unsigned integer type and macros
293 */
294 #if defined(__GNUC__) && !defined(__cplusplus)
295
296 #define FX64_NATIVE 1
297
298 typedef unsigned long long Fx64;
299
300 #define FX64_MOV32(a, b) a = b
301 #define FX64_OR32(a, b) a |= b
302 #define FX64_SHL(a, c) a <<= c
303
304 #else /* !__GNUC__ */
305
306 #define FX64_NATIVE 0
307
308 typedef struct {
309 GLuint lo, hi;
310 } Fx64;
311
312 #define FX64_MOV32(a, b) a.lo = b
313 #define FX64_OR32(a, b) a.lo |= b
314
315 #define FX64_SHL(a, c) \
316 do { \
317 if ((c) >= 32) { \
318 a.hi = a.lo << ((c) - 32); \
319 a.lo = 0; \
320 } else { \
321 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
322 a.lo <<= (c); \
323 } \
324 } while (0)
325
326 #endif /* !__GNUC__ */
327
328
329 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
330 #define SAFECDOT 1 /* for paranoids */
331
332 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
333 do { \
334 /* compute interpolation vector */ \
335 GLfloat d2 = 0.0F; \
336 GLfloat rd2; \
337 \
338 for (i = 0; i < NC; i++) { \
339 IV[i] = (V1[i] - V0[i]) * F(i); \
340 d2 += IV[i] * IV[i]; \
341 } \
342 rd2 = (GLfloat)NV / d2; \
343 B = 0; \
344 for (i = 0; i < NC; i++) { \
345 IV[i] *= F(i); \
346 B -= IV[i] * V0[i]; \
347 IV[i] *= rd2; \
348 } \
349 B = B * rd2 + 0.5f; \
350 } while (0)
351
352 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
353 do { \
354 GLfloat dot = 0.0F; \
355 for (i = 0; i < NC; i++) { \
356 dot += V[i] * IV[i]; \
357 } \
358 TEXEL = (GLint)(dot + B); \
359 if (SAFECDOT) { \
360 if (TEXEL < 0) { \
361 TEXEL = 0; \
362 } else if (TEXEL > NV) { \
363 TEXEL = NV; \
364 } \
365 } \
366 } while (0)
367
368
369 static GLint
370 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
371 GLubyte input[MAX_COMP], GLint nc)
372 {
373 GLint i, j, best = -1;
374 GLfloat err = 1e9; /* big enough */
375
376 for (j = 0; j < nv; j++) {
377 GLfloat e = 0.0F;
378 for (i = 0; i < nc; i++) {
379 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
380 }
381 if (e < err) {
382 err = e;
383 best = j;
384 }
385 }
386
387 return best;
388 }
389
390
391 static GLint
392 fxt1_worst (GLfloat vec[MAX_COMP],
393 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
394 {
395 GLint i, k, worst = -1;
396 GLfloat err = -1.0F; /* small enough */
397
398 for (k = 0; k < n; k++) {
399 GLfloat e = 0.0F;
400 for (i = 0; i < nc; i++) {
401 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
402 }
403 if (e > err) {
404 err = e;
405 worst = k;
406 }
407 }
408
409 return worst;
410 }
411
412
413 static GLint
414 fxt1_variance (GLdouble variance[MAX_COMP],
415 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
416 {
417 GLint i, k, best = 0;
418 GLint sx, sx2;
419 GLdouble var, maxvar = -1; /* small enough */
420 GLdouble teenth = 1.0 / n;
421
422 for (i = 0; i < nc; i++) {
423 sx = sx2 = 0;
424 for (k = 0; k < n; k++) {
425 GLint t = input[k][i];
426 sx += t;
427 sx2 += t * t;
428 }
429 var = sx2 * teenth - sx * sx * teenth * teenth;
430 if (maxvar < var) {
431 maxvar = var;
432 best = i;
433 }
434 if (variance) {
435 variance[i] = var;
436 }
437 }
438
439 return best;
440 }
441
442
443 static GLint
444 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
445 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
446 {
447 #if 0
448 /* Choose colors from a grid.
449 */
450 GLint i, j;
451
452 for (j = 0; j < nv; j++) {
453 GLint m = j * (n - 1) / (nv - 1);
454 for (i = 0; i < nc; i++) {
455 vec[j][i] = input[m][i];
456 }
457 }
458 #else
459 /* Our solution here is to find the darkest and brightest colors in
460 * the 8x4 tile and use those as the two representative colors.
461 * There are probably better algorithms to use (histogram-based).
462 */
463 GLint i, j, k;
464 GLint minSum = 2000; /* big enough */
465 GLint maxSum = -1; /* small enough */
466 GLint minCol = 0; /* phoudoin: silent compiler! */
467 GLint maxCol = 0; /* phoudoin: silent compiler! */
468
469 struct {
470 GLint flag;
471 GLint key;
472 GLint freq;
473 GLint idx;
474 } hist[N_TEXELS];
475 GLint lenh = 0;
476
477 memset(hist, 0, sizeof(hist));
478
479 for (k = 0; k < n; k++) {
480 GLint l;
481 GLint key = 0;
482 GLint sum = 0;
483 for (i = 0; i < nc; i++) {
484 key <<= 8;
485 key |= input[k][i];
486 sum += input[k][i];
487 }
488 for (l = 0; l < n; l++) {
489 if (!hist[l].flag) {
490 /* alloc new slot */
491 hist[l].flag = !0;
492 hist[l].key = key;
493 hist[l].freq = 1;
494 hist[l].idx = k;
495 lenh = l + 1;
496 break;
497 } else if (hist[l].key == key) {
498 hist[l].freq++;
499 break;
500 }
501 }
502 if (minSum > sum) {
503 minSum = sum;
504 minCol = k;
505 }
506 if (maxSum < sum) {
507 maxSum = sum;
508 maxCol = k;
509 }
510 }
511
512 if (lenh <= nv) {
513 for (j = 0; j < lenh; j++) {
514 for (i = 0; i < nc; i++) {
515 vec[j][i] = (GLfloat)input[hist[j].idx][i];
516 }
517 }
518 for (; j < nv; j++) {
519 for (i = 0; i < nc; i++) {
520 vec[j][i] = vec[0][i];
521 }
522 }
523 return 0;
524 }
525
526 for (j = 0; j < nv; j++) {
527 for (i = 0; i < nc; i++) {
528 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
529 }
530 }
531 #endif
532
533 return !0;
534 }
535
536
537 static GLint
538 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
539 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
540 {
541 /* Use the generalized lloyd's algorithm for VQ:
542 * find 4 color vectors.
543 *
544 * for each sample color
545 * sort to nearest vector.
546 *
547 * replace each vector with the centroid of it's matching colors.
548 *
549 * repeat until RMS doesn't improve.
550 *
551 * if a color vector has no samples, or becomes the same as another
552 * vector, replace it with the color which is farthest from a sample.
553 *
554 * vec[][MAX_COMP] initial vectors and resulting colors
555 * nv number of resulting colors required
556 * input[N_TEXELS][MAX_COMP] input texels
557 * nc number of components in input / vec
558 * n number of input samples
559 */
560
561 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
562 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
563 GLfloat error, lasterror = 1e9;
564
565 GLint i, j, k, rep;
566
567 /* the quantizer */
568 for (rep = 0; rep < LL_N_REP; rep++) {
569 /* reset sums & counters */
570 for (j = 0; j < nv; j++) {
571 for (i = 0; i < nc; i++) {
572 sum[j][i] = 0;
573 }
574 cnt[j] = 0;
575 }
576 error = 0;
577
578 /* scan whole block */
579 for (k = 0; k < n; k++) {
580 #if 1
581 GLint best = -1;
582 GLfloat err = 1e9; /* big enough */
583 /* determine best vector */
584 for (j = 0; j < nv; j++) {
585 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
586 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
587 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
588 if (nc == 4) {
589 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
590 }
591 if (e < err) {
592 err = e;
593 best = j;
594 }
595 }
596 #else
597 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
598 #endif
599 /* add in closest color */
600 for (i = 0; i < nc; i++) {
601 sum[best][i] += input[k][i];
602 }
603 /* mark this vector as used */
604 cnt[best]++;
605 /* accumulate error */
606 error += err;
607 }
608
609 /* check RMS */
610 if ((error < LL_RMS_E) ||
611 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
612 return !0; /* good match */
613 }
614 lasterror = error;
615
616 /* move each vector to the barycenter of its closest colors */
617 for (j = 0; j < nv; j++) {
618 if (cnt[j]) {
619 GLfloat div = 1.0F / cnt[j];
620 for (i = 0; i < nc; i++) {
621 vec[j][i] = div * sum[j][i];
622 }
623 } else {
624 /* this vec has no samples or is identical with a previous vec */
625 GLint worst = fxt1_worst(vec[j], input, nc, n);
626 for (i = 0; i < nc; i++) {
627 vec[j][i] = input[worst][i];
628 }
629 }
630 }
631 }
632
633 return 0; /* could not converge fast enough */
634 }
635
636
637 static void
638 fxt1_quantize_CHROMA (GLuint *cc,
639 GLubyte input[N_TEXELS][MAX_COMP])
640 {
641 const GLint n_vect = 4; /* 4 base vectors to find */
642 const GLint n_comp = 3; /* 3 components: R, G, B */
643 GLfloat vec[MAX_VECT][MAX_COMP];
644 GLint i, j, k;
645 Fx64 hi; /* high quadword */
646 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
647
648 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
649 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
650 }
651
652 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
653 for (j = n_vect - 1; j >= 0; j--) {
654 for (i = 0; i < n_comp; i++) {
655 /* add in colors */
656 FX64_SHL(hi, 5);
657 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
658 }
659 }
660 ((Fx64 *)cc)[1] = hi;
661
662 lohi = lolo = 0;
663 /* right microtile */
664 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
665 lohi <<= 2;
666 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
667 }
668 /* left microtile */
669 for (; k >= 0; k--) {
670 lolo <<= 2;
671 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
672 }
673 cc[1] = lohi;
674 cc[0] = lolo;
675 }
676
677
678 static void
679 fxt1_quantize_ALPHA0 (GLuint *cc,
680 GLubyte input[N_TEXELS][MAX_COMP],
681 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
682 {
683 const GLint n_vect = 3; /* 3 base vectors to find */
684 const GLint n_comp = 4; /* 4 components: R, G, B, A */
685 GLfloat vec[MAX_VECT][MAX_COMP];
686 GLint i, j, k;
687 Fx64 hi; /* high quadword */
688 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
689
690 /* the last vector indicates zero */
691 for (i = 0; i < n_comp; i++) {
692 vec[n_vect][i] = 0;
693 }
694
695 /* the first n texels in reord are guaranteed to be non-zero */
696 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
697 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
698 }
699
700 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
701 for (j = n_vect - 1; j >= 0; j--) {
702 /* add in alphas */
703 FX64_SHL(hi, 5);
704 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
705 }
706 for (j = n_vect - 1; j >= 0; j--) {
707 for (i = 0; i < n_comp - 1; i++) {
708 /* add in colors */
709 FX64_SHL(hi, 5);
710 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
711 }
712 }
713 ((Fx64 *)cc)[1] = hi;
714
715 lohi = lolo = 0;
716 /* right microtile */
717 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
718 lohi <<= 2;
719 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
720 }
721 /* left microtile */
722 for (; k >= 0; k--) {
723 lolo <<= 2;
724 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
725 }
726 cc[1] = lohi;
727 cc[0] = lolo;
728 }
729
730
731 static void
732 fxt1_quantize_ALPHA1 (GLuint *cc,
733 GLubyte input[N_TEXELS][MAX_COMP])
734 {
735 const GLint n_vect = 3; /* highest vector number in each microtile */
736 const GLint n_comp = 4; /* 4 components: R, G, B, A */
737 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
738 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
739 GLint i, j, k;
740 Fx64 hi; /* high quadword */
741 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
742
743 GLint minSum;
744 GLint maxSum;
745 GLint minColL = 0, maxColL = 0;
746 GLint minColR = 0, maxColR = 0;
747 GLint sumL = 0, sumR = 0;
748
749 /* Our solution here is to find the darkest and brightest colors in
750 * the 4x4 tile and use those as the two representative colors.
751 * There are probably better algorithms to use (histogram-based).
752 */
753 minSum = 2000; /* big enough */
754 maxSum = -1; /* small enough */
755 for (k = 0; k < N_TEXELS / 2; k++) {
756 GLint sum = 0;
757 for (i = 0; i < n_comp; i++) {
758 sum += input[k][i];
759 }
760 if (minSum > sum) {
761 minSum = sum;
762 minColL = k;
763 }
764 if (maxSum < sum) {
765 maxSum = sum;
766 maxColL = k;
767 }
768 sumL += sum;
769 }
770 minSum = 2000; /* big enough */
771 maxSum = -1; /* small enough */
772 for (; k < N_TEXELS; k++) {
773 GLint sum = 0;
774 for (i = 0; i < n_comp; i++) {
775 sum += input[k][i];
776 }
777 if (minSum > sum) {
778 minSum = sum;
779 minColR = k;
780 }
781 if (maxSum < sum) {
782 maxSum = sum;
783 maxColR = k;
784 }
785 sumR += sum;
786 }
787
788 /* choose the common vector (yuck!) */
789 {
790 GLint j1, j2;
791 GLint v1 = 0, v2 = 0;
792 GLfloat err = 1e9; /* big enough */
793 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
794 for (i = 0; i < n_comp; i++) {
795 tv[0][i] = input[minColL][i];
796 tv[1][i] = input[maxColL][i];
797 tv[2][i] = input[minColR][i];
798 tv[3][i] = input[maxColR][i];
799 }
800 for (j1 = 0; j1 < 2; j1++) {
801 for (j2 = 2; j2 < 4; j2++) {
802 GLfloat e = 0.0F;
803 for (i = 0; i < n_comp; i++) {
804 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
805 }
806 if (e < err) {
807 err = e;
808 v1 = j1;
809 v2 = j2;
810 }
811 }
812 }
813 for (i = 0; i < n_comp; i++) {
814 vec[0][i] = tv[1 - v1][i];
815 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
816 vec[2][i] = tv[5 - v2][i];
817 }
818 }
819
820 /* left microtile */
821 cc[0] = 0;
822 if (minColL != maxColL) {
823 /* compute interpolation vector */
824 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
825
826 /* add in texels */
827 lolo = 0;
828 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
829 GLint texel;
830 /* interpolate color */
831 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
832 /* add in texel */
833 lolo <<= 2;
834 lolo |= texel;
835 }
836
837 cc[0] = lolo;
838 }
839
840 /* right microtile */
841 cc[1] = 0;
842 if (minColR != maxColR) {
843 /* compute interpolation vector */
844 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
845
846 /* add in texels */
847 lohi = 0;
848 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
849 GLint texel;
850 /* interpolate color */
851 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
852 /* add in texel */
853 lohi <<= 2;
854 lohi |= texel;
855 }
856
857 cc[1] = lohi;
858 }
859
860 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
861 for (j = n_vect - 1; j >= 0; j--) {
862 /* add in alphas */
863 FX64_SHL(hi, 5);
864 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
865 }
866 for (j = n_vect - 1; j >= 0; j--) {
867 for (i = 0; i < n_comp - 1; i++) {
868 /* add in colors */
869 FX64_SHL(hi, 5);
870 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
871 }
872 }
873 ((Fx64 *)cc)[1] = hi;
874 }
875
876
877 static void
878 fxt1_quantize_HI (GLuint *cc,
879 GLubyte input[N_TEXELS][MAX_COMP],
880 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
881 {
882 const GLint n_vect = 6; /* highest vector number */
883 const GLint n_comp = 3; /* 3 components: R, G, B */
884 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
885 GLfloat iv[MAX_COMP]; /* interpolation vector */
886 GLint i, k;
887 GLuint hihi; /* high quadword: hi dword */
888
889 GLint minSum = 2000; /* big enough */
890 GLint maxSum = -1; /* small enough */
891 GLint minCol = 0; /* phoudoin: silent compiler! */
892 GLint maxCol = 0; /* phoudoin: silent compiler! */
893
894 /* Our solution here is to find the darkest and brightest colors in
895 * the 8x4 tile and use those as the two representative colors.
896 * There are probably better algorithms to use (histogram-based).
897 */
898 for (k = 0; k < n; k++) {
899 GLint sum = 0;
900 for (i = 0; i < n_comp; i++) {
901 sum += reord[k][i];
902 }
903 if (minSum > sum) {
904 minSum = sum;
905 minCol = k;
906 }
907 if (maxSum < sum) {
908 maxSum = sum;
909 maxCol = k;
910 }
911 }
912
913 hihi = 0; /* cc-hi = "00" */
914 for (i = 0; i < n_comp; i++) {
915 /* add in colors */
916 hihi <<= 5;
917 hihi |= reord[maxCol][i] >> 3;
918 }
919 for (i = 0; i < n_comp; i++) {
920 /* add in colors */
921 hihi <<= 5;
922 hihi |= reord[minCol][i] >> 3;
923 }
924 cc[3] = hihi;
925 cc[0] = cc[1] = cc[2] = 0;
926
927 /* compute interpolation vector */
928 if (minCol != maxCol) {
929 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
930 }
931
932 /* add in texels */
933 for (k = N_TEXELS - 1; k >= 0; k--) {
934 GLint t = k * 3;
935 GLuint *kk = (GLuint *)((char *)cc + t / 8);
936 GLint texel = n_vect + 1; /* transparent black */
937
938 if (!ISTBLACK(input[k])) {
939 if (minCol != maxCol) {
940 /* interpolate color */
941 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
942 /* add in texel */
943 kk[0] |= texel << (t & 7);
944 }
945 } else {
946 /* add in texel */
947 kk[0] |= texel << (t & 7);
948 }
949 }
950 }
951
952
953 static void
954 fxt1_quantize_MIXED1 (GLuint *cc,
955 GLubyte input[N_TEXELS][MAX_COMP])
956 {
957 const GLint n_vect = 2; /* highest vector number in each microtile */
958 const GLint n_comp = 3; /* 3 components: R, G, B */
959 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
960 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
961 GLint i, j, k;
962 Fx64 hi; /* high quadword */
963 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
964
965 GLint minSum;
966 GLint maxSum;
967 GLint minColL = 0, maxColL = -1;
968 GLint minColR = 0, maxColR = -1;
969
970 /* Our solution here is to find the darkest and brightest colors in
971 * the 4x4 tile and use those as the two representative colors.
972 * There are probably better algorithms to use (histogram-based).
973 */
974 minSum = 2000; /* big enough */
975 maxSum = -1; /* small enough */
976 for (k = 0; k < N_TEXELS / 2; k++) {
977 if (!ISTBLACK(input[k])) {
978 GLint sum = 0;
979 for (i = 0; i < n_comp; i++) {
980 sum += input[k][i];
981 }
982 if (minSum > sum) {
983 minSum = sum;
984 minColL = k;
985 }
986 if (maxSum < sum) {
987 maxSum = sum;
988 maxColL = k;
989 }
990 }
991 }
992 minSum = 2000; /* big enough */
993 maxSum = -1; /* small enough */
994 for (; k < N_TEXELS; k++) {
995 if (!ISTBLACK(input[k])) {
996 GLint sum = 0;
997 for (i = 0; i < n_comp; i++) {
998 sum += input[k][i];
999 }
1000 if (minSum > sum) {
1001 minSum = sum;
1002 minColR = k;
1003 }
1004 if (maxSum < sum) {
1005 maxSum = sum;
1006 maxColR = k;
1007 }
1008 }
1009 }
1010
1011 /* left microtile */
1012 if (maxColL == -1) {
1013 /* all transparent black */
1014 cc[0] = ~0u;
1015 for (i = 0; i < n_comp; i++) {
1016 vec[0][i] = 0;
1017 vec[1][i] = 0;
1018 }
1019 } else {
1020 cc[0] = 0;
1021 for (i = 0; i < n_comp; i++) {
1022 vec[0][i] = input[minColL][i];
1023 vec[1][i] = input[maxColL][i];
1024 }
1025 if (minColL != maxColL) {
1026 /* compute interpolation vector */
1027 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1028
1029 /* add in texels */
1030 lolo = 0;
1031 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1032 GLint texel = n_vect + 1; /* transparent black */
1033 if (!ISTBLACK(input[k])) {
1034 /* interpolate color */
1035 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1036 }
1037 /* add in texel */
1038 lolo <<= 2;
1039 lolo |= texel;
1040 }
1041 cc[0] = lolo;
1042 }
1043 }
1044
1045 /* right microtile */
1046 if (maxColR == -1) {
1047 /* all transparent black */
1048 cc[1] = ~0u;
1049 for (i = 0; i < n_comp; i++) {
1050 vec[2][i] = 0;
1051 vec[3][i] = 0;
1052 }
1053 } else {
1054 cc[1] = 0;
1055 for (i = 0; i < n_comp; i++) {
1056 vec[2][i] = input[minColR][i];
1057 vec[3][i] = input[maxColR][i];
1058 }
1059 if (minColR != maxColR) {
1060 /* compute interpolation vector */
1061 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1062
1063 /* add in texels */
1064 lohi = 0;
1065 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1066 GLint texel = n_vect + 1; /* transparent black */
1067 if (!ISTBLACK(input[k])) {
1068 /* interpolate color */
1069 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1070 }
1071 /* add in texel */
1072 lohi <<= 2;
1073 lohi |= texel;
1074 }
1075 cc[1] = lohi;
1076 }
1077 }
1078
1079 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1080 for (j = 2 * 2 - 1; j >= 0; j--) {
1081 for (i = 0; i < n_comp; i++) {
1082 /* add in colors */
1083 FX64_SHL(hi, 5);
1084 FX64_OR32(hi, vec[j][i] >> 3);
1085 }
1086 }
1087 ((Fx64 *)cc)[1] = hi;
1088 }
1089
1090
1091 static void
1092 fxt1_quantize_MIXED0 (GLuint *cc,
1093 GLubyte input[N_TEXELS][MAX_COMP])
1094 {
1095 const GLint n_vect = 3; /* highest vector number in each microtile */
1096 const GLint n_comp = 3; /* 3 components: R, G, B */
1097 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1098 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1099 GLint i, j, k;
1100 Fx64 hi; /* high quadword */
1101 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1102
1103 GLint minColL = 0, maxColL = 0;
1104 GLint minColR = 0, maxColR = 0;
1105 #if 0
1106 GLint minSum;
1107 GLint maxSum;
1108
1109 /* Our solution here is to find the darkest and brightest colors in
1110 * the 4x4 tile and use those as the two representative colors.
1111 * There are probably better algorithms to use (histogram-based).
1112 */
1113 minSum = 2000; /* big enough */
1114 maxSum = -1; /* small enough */
1115 for (k = 0; k < N_TEXELS / 2; k++) {
1116 GLint sum = 0;
1117 for (i = 0; i < n_comp; i++) {
1118 sum += input[k][i];
1119 }
1120 if (minSum > sum) {
1121 minSum = sum;
1122 minColL = k;
1123 }
1124 if (maxSum < sum) {
1125 maxSum = sum;
1126 maxColL = k;
1127 }
1128 }
1129 minSum = 2000; /* big enough */
1130 maxSum = -1; /* small enough */
1131 for (; k < N_TEXELS; k++) {
1132 GLint sum = 0;
1133 for (i = 0; i < n_comp; i++) {
1134 sum += input[k][i];
1135 }
1136 if (minSum > sum) {
1137 minSum = sum;
1138 minColR = k;
1139 }
1140 if (maxSum < sum) {
1141 maxSum = sum;
1142 maxColR = k;
1143 }
1144 }
1145 #else
1146 GLint minVal;
1147 GLint maxVal;
1148 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1149 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1150
1151 /* Scan the channel with max variance for lo & hi
1152 * and use those as the two representative colors.
1153 */
1154 minVal = 2000; /* big enough */
1155 maxVal = -1; /* small enough */
1156 for (k = 0; k < N_TEXELS / 2; k++) {
1157 GLint t = input[k][maxVarL];
1158 if (minVal > t) {
1159 minVal = t;
1160 minColL = k;
1161 }
1162 if (maxVal < t) {
1163 maxVal = t;
1164 maxColL = k;
1165 }
1166 }
1167 minVal = 2000; /* big enough */
1168 maxVal = -1; /* small enough */
1169 for (; k < N_TEXELS; k++) {
1170 GLint t = input[k][maxVarR];
1171 if (minVal > t) {
1172 minVal = t;
1173 minColR = k;
1174 }
1175 if (maxVal < t) {
1176 maxVal = t;
1177 maxColR = k;
1178 }
1179 }
1180 #endif
1181
1182 /* left microtile */
1183 cc[0] = 0;
1184 for (i = 0; i < n_comp; i++) {
1185 vec[0][i] = input[minColL][i];
1186 vec[1][i] = input[maxColL][i];
1187 }
1188 if (minColL != maxColL) {
1189 /* compute interpolation vector */
1190 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1191
1192 /* add in texels */
1193 lolo = 0;
1194 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1195 GLint texel;
1196 /* interpolate color */
1197 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1198 /* add in texel */
1199 lolo <<= 2;
1200 lolo |= texel;
1201 }
1202
1203 /* funky encoding for LSB of green */
1204 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1205 for (i = 0; i < n_comp; i++) {
1206 vec[1][i] = input[minColL][i];
1207 vec[0][i] = input[maxColL][i];
1208 }
1209 lolo = ~lolo;
1210 }
1211
1212 cc[0] = lolo;
1213 }
1214
1215 /* right microtile */
1216 cc[1] = 0;
1217 for (i = 0; i < n_comp; i++) {
1218 vec[2][i] = input[minColR][i];
1219 vec[3][i] = input[maxColR][i];
1220 }
1221 if (minColR != maxColR) {
1222 /* compute interpolation vector */
1223 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1224
1225 /* add in texels */
1226 lohi = 0;
1227 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1228 GLint texel;
1229 /* interpolate color */
1230 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1231 /* add in texel */
1232 lohi <<= 2;
1233 lohi |= texel;
1234 }
1235
1236 /* funky encoding for LSB of green */
1237 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1238 for (i = 0; i < n_comp; i++) {
1239 vec[3][i] = input[minColR][i];
1240 vec[2][i] = input[maxColR][i];
1241 }
1242 lohi = ~lohi;
1243 }
1244
1245 cc[1] = lohi;
1246 }
1247
1248 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1249 for (j = 2 * 2 - 1; j >= 0; j--) {
1250 for (i = 0; i < n_comp; i++) {
1251 /* add in colors */
1252 FX64_SHL(hi, 5);
1253 FX64_OR32(hi, vec[j][i] >> 3);
1254 }
1255 }
1256 ((Fx64 *)cc)[1] = hi;
1257 }
1258
1259
1260 static void
1261 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1262 {
1263 GLint trualpha;
1264 GLubyte reord[N_TEXELS][MAX_COMP];
1265
1266 GLubyte input[N_TEXELS][MAX_COMP];
1267 GLint i, k, l;
1268
1269 if (comps == 3) {
1270 /* make the whole block opaque */
1271 memset(input, -1, sizeof(input));
1272 }
1273
1274 /* 8 texels each line */
1275 for (l = 0; l < 4; l++) {
1276 for (k = 0; k < 4; k++) {
1277 for (i = 0; i < comps; i++) {
1278 input[k + l * 4][i] = *lines[l]++;
1279 }
1280 }
1281 for (; k < 8; k++) {
1282 for (i = 0; i < comps; i++) {
1283 input[k + l * 4 + 12][i] = *lines[l]++;
1284 }
1285 }
1286 }
1287
1288 /* block layout:
1289 * 00, 01, 02, 03, 08, 09, 0a, 0b
1290 * 10, 11, 12, 13, 18, 19, 1a, 1b
1291 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1292 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1293 */
1294
1295 /* [dBorca]
1296 * stupidity flows forth from this
1297 */
1298 l = N_TEXELS;
1299 trualpha = 0;
1300 if (comps == 4) {
1301 /* skip all transparent black texels */
1302 l = 0;
1303 for (k = 0; k < N_TEXELS; k++) {
1304 /* test all components against 0 */
1305 if (!ISTBLACK(input[k])) {
1306 /* texel is not transparent black */
1307 COPY_4UBV(reord[l], input[k]);
1308 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1309 /* non-opaque texel */
1310 trualpha = !0;
1311 }
1312 l++;
1313 }
1314 }
1315 }
1316
1317 #if 0
1318 if (trualpha) {
1319 fxt1_quantize_ALPHA0(cc, input, reord, l);
1320 } else if (l == 0) {
1321 cc[0] = cc[1] = cc[2] = -1;
1322 cc[3] = 0;
1323 } else if (l < N_TEXELS) {
1324 fxt1_quantize_HI(cc, input, reord, l);
1325 } else {
1326 fxt1_quantize_CHROMA(cc, input);
1327 }
1328 (void)fxt1_quantize_ALPHA1;
1329 (void)fxt1_quantize_MIXED1;
1330 (void)fxt1_quantize_MIXED0;
1331 #else
1332 if (trualpha) {
1333 fxt1_quantize_ALPHA1(cc, input);
1334 } else if (l == 0) {
1335 cc[0] = cc[1] = cc[2] = ~0u;
1336 cc[3] = 0;
1337 } else if (l < N_TEXELS) {
1338 fxt1_quantize_MIXED1(cc, input);
1339 } else {
1340 fxt1_quantize_MIXED0(cc, input);
1341 }
1342 (void)fxt1_quantize_ALPHA0;
1343 (void)fxt1_quantize_HI;
1344 (void)fxt1_quantize_CHROMA;
1345 #endif
1346 }
1347
1348
1349 static void
1350 fxt1_encode (GLuint width, GLuint height, GLint comps,
1351 const void *source, GLint srcRowStride,
1352 void *dest, GLint destRowStride)
1353 {
1354 GLuint x, y;
1355 const GLubyte *data;
1356 GLuint *encoded = (GLuint *)dest;
1357 void *newSource = NULL;
1358
1359 assert(comps == 3 || comps == 4);
1360
1361 /* Replicate image if width is not M8 or height is not M4 */
1362 if ((width & 7) | (height & 3)) {
1363 GLint newWidth = (width + 7) & ~7;
1364 GLint newHeight = (height + 3) & ~3;
1365 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1366 if (!newSource) {
1367 GET_CURRENT_CONTEXT(ctx);
1368 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1369 goto cleanUp;
1370 }
1371 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1372 comps, (const GLchan *) source,
1373 srcRowStride, (GLchan *) newSource);
1374 source = newSource;
1375 width = newWidth;
1376 height = newHeight;
1377 srcRowStride = comps * newWidth;
1378 }
1379
1380 /* convert from 16/32-bit channels to GLubyte if needed */
1381 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1382 const GLuint n = width * height * comps;
1383 const GLchan *src = (const GLchan *) source;
1384 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1385 GLuint i;
1386 if (!dest) {
1387 GET_CURRENT_CONTEXT(ctx);
1388 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1389 goto cleanUp;
1390 }
1391 for (i = 0; i < n; i++) {
1392 dest[i] = CHAN_TO_UBYTE(src[i]);
1393 }
1394 if (newSource != NULL) {
1395 _mesa_free(newSource);
1396 }
1397 newSource = dest; /* we'll free this buffer before returning */
1398 source = dest; /* the new, GLubyte incoming image */
1399 }
1400
1401 data = (const GLubyte *) source;
1402 destRowStride = (destRowStride - width * 2) / 4;
1403 for (y = 0; y < height; y += 4) {
1404 GLuint offs = 0 + (y + 0) * srcRowStride;
1405 for (x = 0; x < width; x += 8) {
1406 const GLubyte *lines[4];
1407 lines[0] = &data[offs];
1408 lines[1] = lines[0] + srcRowStride;
1409 lines[2] = lines[1] + srcRowStride;
1410 lines[3] = lines[2] + srcRowStride;
1411 offs += 8 * comps;
1412 fxt1_quantize(encoded, lines, comps);
1413 /* 128 bits per 8x4 block */
1414 encoded += 4;
1415 }
1416 encoded += destRowStride;
1417 }
1418
1419 cleanUp:
1420 if (newSource != NULL) {
1421 _mesa_free(newSource);
1422 }
1423 }
1424
1425
1426 /***************************************************************************\
1427 * FXT1 decoder
1428 *
1429 * The decoder is based on GL_3DFX_texture_compression_FXT1
1430 * specification and serves as a concept for the encoder.
1431 \***************************************************************************/
1432
1433
1434 /* lookup table for scaling 5 bit colors up to 8 bits */
1435 static const GLubyte _rgb_scale_5[] = {
1436 0, 8, 16, 25, 33, 41, 49, 58,
1437 66, 74, 82, 90, 99, 107, 115, 123,
1438 132, 140, 148, 156, 165, 173, 181, 189,
1439 197, 206, 214, 222, 230, 239, 247, 255
1440 };
1441
1442 /* lookup table for scaling 6 bit colors up to 8 bits */
1443 static const GLubyte _rgb_scale_6[] = {
1444 0, 4, 8, 12, 16, 20, 24, 28,
1445 32, 36, 40, 45, 49, 53, 57, 61,
1446 65, 69, 73, 77, 81, 85, 89, 93,
1447 97, 101, 105, 109, 113, 117, 121, 125,
1448 130, 134, 138, 142, 146, 150, 154, 158,
1449 162, 166, 170, 174, 178, 182, 186, 190,
1450 194, 198, 202, 206, 210, 215, 219, 223,
1451 227, 231, 235, 239, 243, 247, 251, 255
1452 };
1453
1454
1455 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1456 #define UP5(c) _rgb_scale_5[(c) & 31]
1457 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1458 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1459
1460
1461 static void
1462 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1463 {
1464 const GLuint *cc;
1465
1466 t *= 3;
1467 cc = (const GLuint *)(code + t / 8);
1468 t = (cc[0] >> (t & 7)) & 7;
1469
1470 if (t == 7) {
1471 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1472 } else {
1473 GLubyte r, g, b;
1474 cc = (const GLuint *)(code + 12);
1475 if (t == 0) {
1476 b = UP5(CC_SEL(cc, 0));
1477 g = UP5(CC_SEL(cc, 5));
1478 r = UP5(CC_SEL(cc, 10));
1479 } else if (t == 6) {
1480 b = UP5(CC_SEL(cc, 15));
1481 g = UP5(CC_SEL(cc, 20));
1482 r = UP5(CC_SEL(cc, 25));
1483 } else {
1484 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1485 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1486 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1487 }
1488 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1489 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1490 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1491 rgba[ACOMP] = CHAN_MAX;
1492 }
1493 }
1494
1495
1496 static void
1497 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1498 {
1499 const GLuint *cc;
1500 GLuint kk;
1501
1502 cc = (const GLuint *)code;
1503 if (t & 16) {
1504 cc++;
1505 t &= 15;
1506 }
1507 t = (cc[0] >> (t * 2)) & 3;
1508
1509 t *= 15;
1510 cc = (const GLuint *)(code + 8 + t / 8);
1511 kk = cc[0] >> (t & 7);
1512 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1513 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1514 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1515 rgba[ACOMP] = CHAN_MAX;
1516 }
1517
1518
1519 static void
1520 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1521 {
1522 const GLuint *cc;
1523 GLuint col[2][3];
1524 GLint glsb, selb;
1525
1526 cc = (const GLuint *)code;
1527 if (t & 16) {
1528 t &= 15;
1529 t = (cc[1] >> (t * 2)) & 3;
1530 /* col 2 */
1531 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1532 col[0][GCOMP] = CC_SEL(cc, 99);
1533 col[0][RCOMP] = CC_SEL(cc, 104);
1534 /* col 3 */
1535 col[1][BCOMP] = CC_SEL(cc, 109);
1536 col[1][GCOMP] = CC_SEL(cc, 114);
1537 col[1][RCOMP] = CC_SEL(cc, 119);
1538 glsb = CC_SEL(cc, 126);
1539 selb = CC_SEL(cc, 33);
1540 } else {
1541 t = (cc[0] >> (t * 2)) & 3;
1542 /* col 0 */
1543 col[0][BCOMP] = CC_SEL(cc, 64);
1544 col[0][GCOMP] = CC_SEL(cc, 69);
1545 col[0][RCOMP] = CC_SEL(cc, 74);
1546 /* col 1 */
1547 col[1][BCOMP] = CC_SEL(cc, 79);
1548 col[1][GCOMP] = CC_SEL(cc, 84);
1549 col[1][RCOMP] = CC_SEL(cc, 89);
1550 glsb = CC_SEL(cc, 125);
1551 selb = CC_SEL(cc, 1);
1552 }
1553
1554 if (CC_SEL(cc, 124) & 1) {
1555 /* alpha[0] == 1 */
1556
1557 if (t == 3) {
1558 /* zero */
1559 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1560 } else {
1561 GLubyte r, g, b;
1562 if (t == 0) {
1563 b = UP5(col[0][BCOMP]);
1564 g = UP5(col[0][GCOMP]);
1565 r = UP5(col[0][RCOMP]);
1566 } else if (t == 2) {
1567 b = UP5(col[1][BCOMP]);
1568 g = UP6(col[1][GCOMP], glsb);
1569 r = UP5(col[1][RCOMP]);
1570 } else {
1571 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1572 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1573 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1574 }
1575 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1576 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1577 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1578 rgba[ACOMP] = CHAN_MAX;
1579 }
1580 } else {
1581 /* alpha[0] == 0 */
1582 GLubyte r, g, b;
1583 if (t == 0) {
1584 b = UP5(col[0][BCOMP]);
1585 g = UP6(col[0][GCOMP], glsb ^ selb);
1586 r = UP5(col[0][RCOMP]);
1587 } else if (t == 3) {
1588 b = UP5(col[1][BCOMP]);
1589 g = UP6(col[1][GCOMP], glsb);
1590 r = UP5(col[1][RCOMP]);
1591 } else {
1592 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1593 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1594 UP6(col[1][GCOMP], glsb));
1595 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1596 }
1597 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1598 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1599 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1600 rgba[ACOMP] = CHAN_MAX;
1601 }
1602 }
1603
1604
1605 static void
1606 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1607 {
1608 const GLuint *cc;
1609 GLubyte r, g, b, a;
1610
1611 cc = (const GLuint *)code;
1612 if (CC_SEL(cc, 124) & 1) {
1613 /* lerp == 1 */
1614 GLuint col0[4];
1615
1616 if (t & 16) {
1617 t &= 15;
1618 t = (cc[1] >> (t * 2)) & 3;
1619 /* col 2 */
1620 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1621 col0[GCOMP] = CC_SEL(cc, 99);
1622 col0[RCOMP] = CC_SEL(cc, 104);
1623 col0[ACOMP] = CC_SEL(cc, 119);
1624 } else {
1625 t = (cc[0] >> (t * 2)) & 3;
1626 /* col 0 */
1627 col0[BCOMP] = CC_SEL(cc, 64);
1628 col0[GCOMP] = CC_SEL(cc, 69);
1629 col0[RCOMP] = CC_SEL(cc, 74);
1630 col0[ACOMP] = CC_SEL(cc, 109);
1631 }
1632
1633 if (t == 0) {
1634 b = UP5(col0[BCOMP]);
1635 g = UP5(col0[GCOMP]);
1636 r = UP5(col0[RCOMP]);
1637 a = UP5(col0[ACOMP]);
1638 } else if (t == 3) {
1639 b = UP5(CC_SEL(cc, 79));
1640 g = UP5(CC_SEL(cc, 84));
1641 r = UP5(CC_SEL(cc, 89));
1642 a = UP5(CC_SEL(cc, 114));
1643 } else {
1644 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1645 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1646 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1647 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1648 }
1649 } else {
1650 /* lerp == 0 */
1651
1652 if (t & 16) {
1653 cc++;
1654 t &= 15;
1655 }
1656 t = (cc[0] >> (t * 2)) & 3;
1657
1658 if (t == 3) {
1659 /* zero */
1660 r = g = b = 0;
1661 } else {
1662 GLuint kk;
1663 cc = (const GLuint *)code;
1664 a = UP5(cc[3] >> (t * 5 + 13));
1665 t *= 15;
1666 cc = (const GLuint *)(code + 8 + t / 8);
1667 kk = cc[0] >> (t & 7);
1668 b = UP5(kk);
1669 g = UP5(kk >> 5);
1670 r = UP5(kk >> 10);
1671 }
1672 }
1673 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1674 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1675 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1676 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1677 }
1678
1679
1680 static void
1681 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1682 GLint i, GLint j, GLchan *rgba)
1683 {
1684 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1685 fxt1_decode_1HI, /* cc-high = "00?" */
1686 fxt1_decode_1HI, /* cc-high = "00?" */
1687 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1688 fxt1_decode_1ALPHA, /* alpha = "011" */
1689 fxt1_decode_1MIXED, /* mixed = "1??" */
1690 fxt1_decode_1MIXED, /* mixed = "1??" */
1691 fxt1_decode_1MIXED, /* mixed = "1??" */
1692 fxt1_decode_1MIXED /* mixed = "1??" */
1693 };
1694
1695 const GLubyte *code = (const GLubyte *)texture +
1696 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1697 GLint mode = CC_SEL(code, 125);
1698 GLint t = i & 7;
1699
1700 if (t & 4) {
1701 t += 12;
1702 }
1703 t += (j & 3) * 4;
1704
1705 decode_1[mode](code, t, rgba);
1706 }