Merge branch 'gallium-0.1' into gallium-0.2
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texformat.h"
41 #include "texstore.h"
42
43
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46 const void *source, GLint srcRowStride,
47 void *dest, GLint destRowStride);
48
49 void
50 fxt1_decode_1 (const void *texture, GLint stride,
51 GLint i, GLint j, GLchan *rgba);
52
53
54 /**
55 * Called during context initialization.
56 */
57 void
58 _mesa_init_texture_fxt1( GLcontext *ctx )
59 {
60 (void) ctx;
61 }
62
63
64 /**
65 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
66 */
67 static GLboolean
68 texstore_rgb_fxt1(TEXSTORE_PARAMS)
69 {
70 const GLchan *pixels;
71 GLint srcRowStride;
72 GLubyte *dst;
73 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
74 const GLchan *tempImage = NULL;
75
76 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
77 ASSERT(dstXoffset % 8 == 0);
78 ASSERT(dstYoffset % 4 == 0);
79 ASSERT(dstZoffset == 0);
80 (void) dstZoffset;
81 (void) dstImageOffsets;
82
83 if (srcFormat != GL_RGB ||
84 srcType != CHAN_TYPE ||
85 ctx->_ImageTransferState ||
86 srcPacking->SwapBytes) {
87 /* convert image to RGB/GLchan */
88 tempImage = _mesa_make_temp_chan_image(ctx, dims,
89 baseInternalFormat,
90 dstFormat->BaseFormat,
91 srcWidth, srcHeight, srcDepth,
92 srcFormat, srcType, srcAddr,
93 srcPacking);
94 if (!tempImage)
95 return GL_FALSE; /* out of memory */
96 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
97 pixels = tempImage;
98 srcRowStride = 3 * srcWidth;
99 srcFormat = GL_RGB;
100 }
101 else {
102 pixels = (const GLchan *) srcAddr;
103 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
104 srcType) / sizeof(GLchan);
105 }
106
107 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
108 dstFormat->MesaFormat,
109 texWidth, (GLubyte *) dstAddr);
110
111 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
112 dst, dstRowStride);
113
114 if (tempImage)
115 _mesa_free((void*) tempImage);
116
117 return GL_TRUE;
118 }
119
120
121 /**
122 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
123 */
124 static GLboolean
125 texstore_rgba_fxt1(TEXSTORE_PARAMS)
126 {
127 const GLchan *pixels;
128 GLint srcRowStride;
129 GLubyte *dst;
130 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
131 const GLchan *tempImage = NULL;
132
133 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
134 ASSERT(dstXoffset % 8 == 0);
135 ASSERT(dstYoffset % 4 == 0);
136 ASSERT(dstZoffset == 0);
137 (void) dstZoffset;
138 (void) dstImageOffsets;
139
140 if (srcFormat != GL_RGBA ||
141 srcType != CHAN_TYPE ||
142 ctx->_ImageTransferState ||
143 srcPacking->SwapBytes) {
144 /* convert image to RGBA/GLchan */
145 tempImage = _mesa_make_temp_chan_image(ctx, dims,
146 baseInternalFormat,
147 dstFormat->BaseFormat,
148 srcWidth, srcHeight, srcDepth,
149 srcFormat, srcType, srcAddr,
150 srcPacking);
151 if (!tempImage)
152 return GL_FALSE; /* out of memory */
153 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
154 pixels = tempImage;
155 srcRowStride = 4 * srcWidth;
156 srcFormat = GL_RGBA;
157 }
158 else {
159 pixels = (const GLchan *) srcAddr;
160 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
161 srcType) / sizeof(GLchan);
162 }
163
164 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
165 dstFormat->MesaFormat,
166 texWidth, (GLubyte *) dstAddr);
167
168 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
169 dst, dstRowStride);
170
171 if (tempImage)
172 _mesa_free((void*) tempImage);
173
174 return GL_TRUE;
175 }
176
177
178 static void
179 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
180 GLint i, GLint j, GLint k, GLchan *texel )
181 {
182 (void) k;
183 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
184 }
185
186
187 static void
188 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
189 GLint i, GLint j, GLint k, GLfloat *texel )
190 {
191 /* just sample as GLchan and convert to float here */
192 GLchan rgba[4];
193 (void) k;
194 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
195 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
196 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
197 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
198 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
199 }
200
201
202 static void
203 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
204 GLint i, GLint j, GLint k, GLchan *texel )
205 {
206 (void) k;
207 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
208 texel[ACOMP] = 255;
209 }
210
211
212 static void
213 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
214 GLint i, GLint j, GLint k, GLfloat *texel )
215 {
216 /* just sample as GLchan and convert to float here */
217 GLchan rgba[4];
218 (void) k;
219 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
220 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
221 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
222 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
223 texel[ACOMP] = 1.0F;
224 }
225
226
227
228 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
229 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
230 GL_RGB, /* BaseFormat */
231 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
232 4, /*approx*/ /* RedBits */
233 4, /*approx*/ /* GreenBits */
234 4, /*approx*/ /* BlueBits */
235 0, /* AlphaBits */
236 0, /* LuminanceBits */
237 0, /* IntensityBits */
238 0, /* IndexBits */
239 0, /* DepthBits */
240 0, /* StencilBits */
241 0, /* TexelBytes */
242 texstore_rgb_fxt1, /* StoreTexImageFunc */
243 NULL, /*impossible*/ /* FetchTexel1D */
244 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
245 NULL, /*impossible*/ /* FetchTexel3D */
246 NULL, /*impossible*/ /* FetchTexel1Df */
247 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
248 NULL, /*impossible*/ /* FetchTexel3Df */
249 NULL /* StoreTexel */
250 };
251
252 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
253 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
254 GL_RGBA, /* BaseFormat */
255 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
256 4, /*approx*/ /* RedBits */
257 4, /*approx*/ /* GreenBits */
258 4, /*approx*/ /* BlueBits */
259 1, /*approx*/ /* AlphaBits */
260 0, /* LuminanceBits */
261 0, /* IntensityBits */
262 0, /* IndexBits */
263 0, /* DepthBits */
264 0, /* StencilBits */
265 0, /* TexelBytes */
266 texstore_rgba_fxt1, /* StoreTexImageFunc */
267 NULL, /*impossible*/ /* FetchTexel1D */
268 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
269 NULL, /*impossible*/ /* FetchTexel3D */
270 NULL, /*impossible*/ /* FetchTexel1Df */
271 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
272 NULL, /*impossible*/ /* FetchTexel3Df */
273 NULL /* StoreTexel */
274 };
275
276
277 /***************************************************************************\
278 * FXT1 encoder
279 *
280 * The encoder was built by reversing the decoder,
281 * and is vaguely based on Texus2 by 3dfx. Note that this code
282 * is merely a proof of concept, since it is highly UNoptimized;
283 * moreover, it is sub-optimal due to initial conditions passed
284 * to Lloyd's algorithm (the interpolation modes are even worse).
285 \***************************************************************************/
286
287
288 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
289 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
290 #define N_TEXELS 32 /* number of texels in a block (always 32) */
291 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
292 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
293 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
294 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
295 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
296
297
298 /*
299 * Define a 64-bit unsigned integer type and macros
300 */
301 #if 1
302
303 #define FX64_NATIVE 1
304
305 typedef uint64_t Fx64;
306
307 #define FX64_MOV32(a, b) a = b
308 #define FX64_OR32(a, b) a |= b
309 #define FX64_SHL(a, c) a <<= c
310
311 #else
312
313 #define FX64_NATIVE 0
314
315 typedef struct {
316 GLuint lo, hi;
317 } Fx64;
318
319 #define FX64_MOV32(a, b) a.lo = b
320 #define FX64_OR32(a, b) a.lo |= b
321
322 #define FX64_SHL(a, c) \
323 do { \
324 if ((c) >= 32) { \
325 a.hi = a.lo << ((c) - 32); \
326 a.lo = 0; \
327 } else { \
328 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
329 a.lo <<= (c); \
330 } \
331 } while (0)
332
333 #endif
334
335
336 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
337 #define SAFECDOT 1 /* for paranoids */
338
339 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
340 do { \
341 /* compute interpolation vector */ \
342 GLfloat d2 = 0.0F; \
343 GLfloat rd2; \
344 \
345 for (i = 0; i < NC; i++) { \
346 IV[i] = (V1[i] - V0[i]) * F(i); \
347 d2 += IV[i] * IV[i]; \
348 } \
349 rd2 = (GLfloat)NV / d2; \
350 B = 0; \
351 for (i = 0; i < NC; i++) { \
352 IV[i] *= F(i); \
353 B -= IV[i] * V0[i]; \
354 IV[i] *= rd2; \
355 } \
356 B = B * rd2 + 0.5f; \
357 } while (0)
358
359 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
360 do { \
361 GLfloat dot = 0.0F; \
362 for (i = 0; i < NC; i++) { \
363 dot += V[i] * IV[i]; \
364 } \
365 TEXEL = (GLint)(dot + B); \
366 if (SAFECDOT) { \
367 if (TEXEL < 0) { \
368 TEXEL = 0; \
369 } else if (TEXEL > NV) { \
370 TEXEL = NV; \
371 } \
372 } \
373 } while (0)
374
375
376 static GLint
377 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
378 GLubyte input[MAX_COMP], GLint nc)
379 {
380 GLint i, j, best = -1;
381 GLfloat err = 1e9; /* big enough */
382
383 for (j = 0; j < nv; j++) {
384 GLfloat e = 0.0F;
385 for (i = 0; i < nc; i++) {
386 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
387 }
388 if (e < err) {
389 err = e;
390 best = j;
391 }
392 }
393
394 return best;
395 }
396
397
398 static GLint
399 fxt1_worst (GLfloat vec[MAX_COMP],
400 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
401 {
402 GLint i, k, worst = -1;
403 GLfloat err = -1.0F; /* small enough */
404
405 for (k = 0; k < n; k++) {
406 GLfloat e = 0.0F;
407 for (i = 0; i < nc; i++) {
408 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
409 }
410 if (e > err) {
411 err = e;
412 worst = k;
413 }
414 }
415
416 return worst;
417 }
418
419
420 static GLint
421 fxt1_variance (GLdouble variance[MAX_COMP],
422 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
423 {
424 GLint i, k, best = 0;
425 GLint sx, sx2;
426 GLdouble var, maxvar = -1; /* small enough */
427 GLdouble teenth = 1.0 / n;
428
429 for (i = 0; i < nc; i++) {
430 sx = sx2 = 0;
431 for (k = 0; k < n; k++) {
432 GLint t = input[k][i];
433 sx += t;
434 sx2 += t * t;
435 }
436 var = sx2 * teenth - sx * sx * teenth * teenth;
437 if (maxvar < var) {
438 maxvar = var;
439 best = i;
440 }
441 if (variance) {
442 variance[i] = var;
443 }
444 }
445
446 return best;
447 }
448
449
450 static GLint
451 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
452 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
453 {
454 #if 0
455 /* Choose colors from a grid.
456 */
457 GLint i, j;
458
459 for (j = 0; j < nv; j++) {
460 GLint m = j * (n - 1) / (nv - 1);
461 for (i = 0; i < nc; i++) {
462 vec[j][i] = input[m][i];
463 }
464 }
465 #else
466 /* Our solution here is to find the darkest and brightest colors in
467 * the 8x4 tile and use those as the two representative colors.
468 * There are probably better algorithms to use (histogram-based).
469 */
470 GLint i, j, k;
471 GLint minSum = 2000; /* big enough */
472 GLint maxSum = -1; /* small enough */
473 GLint minCol = 0; /* phoudoin: silent compiler! */
474 GLint maxCol = 0; /* phoudoin: silent compiler! */
475
476 struct {
477 GLint flag;
478 GLint key;
479 GLint freq;
480 GLint idx;
481 } hist[N_TEXELS];
482 GLint lenh = 0;
483
484 _mesa_memset(hist, 0, sizeof(hist));
485
486 for (k = 0; k < n; k++) {
487 GLint l;
488 GLint key = 0;
489 GLint sum = 0;
490 for (i = 0; i < nc; i++) {
491 key <<= 8;
492 key |= input[k][i];
493 sum += input[k][i];
494 }
495 for (l = 0; l < n; l++) {
496 if (!hist[l].flag) {
497 /* alloc new slot */
498 hist[l].flag = !0;
499 hist[l].key = key;
500 hist[l].freq = 1;
501 hist[l].idx = k;
502 lenh = l + 1;
503 break;
504 } else if (hist[l].key == key) {
505 hist[l].freq++;
506 break;
507 }
508 }
509 if (minSum > sum) {
510 minSum = sum;
511 minCol = k;
512 }
513 if (maxSum < sum) {
514 maxSum = sum;
515 maxCol = k;
516 }
517 }
518
519 if (lenh <= nv) {
520 for (j = 0; j < lenh; j++) {
521 for (i = 0; i < nc; i++) {
522 vec[j][i] = (GLfloat)input[hist[j].idx][i];
523 }
524 }
525 for (; j < nv; j++) {
526 for (i = 0; i < nc; i++) {
527 vec[j][i] = vec[0][i];
528 }
529 }
530 return 0;
531 }
532
533 for (j = 0; j < nv; j++) {
534 for (i = 0; i < nc; i++) {
535 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
536 }
537 }
538 #endif
539
540 return !0;
541 }
542
543
544 static GLint
545 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
546 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
547 {
548 /* Use the generalized lloyd's algorithm for VQ:
549 * find 4 color vectors.
550 *
551 * for each sample color
552 * sort to nearest vector.
553 *
554 * replace each vector with the centroid of it's matching colors.
555 *
556 * repeat until RMS doesn't improve.
557 *
558 * if a color vector has no samples, or becomes the same as another
559 * vector, replace it with the color which is farthest from a sample.
560 *
561 * vec[][MAX_COMP] initial vectors and resulting colors
562 * nv number of resulting colors required
563 * input[N_TEXELS][MAX_COMP] input texels
564 * nc number of components in input / vec
565 * n number of input samples
566 */
567
568 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
569 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
570 GLfloat error, lasterror = 1e9;
571
572 GLint i, j, k, rep;
573
574 /* the quantizer */
575 for (rep = 0; rep < LL_N_REP; rep++) {
576 /* reset sums & counters */
577 for (j = 0; j < nv; j++) {
578 for (i = 0; i < nc; i++) {
579 sum[j][i] = 0;
580 }
581 cnt[j] = 0;
582 }
583 error = 0;
584
585 /* scan whole block */
586 for (k = 0; k < n; k++) {
587 #if 1
588 GLint best = -1;
589 GLfloat err = 1e9; /* big enough */
590 /* determine best vector */
591 for (j = 0; j < nv; j++) {
592 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
593 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
594 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
595 if (nc == 4) {
596 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
597 }
598 if (e < err) {
599 err = e;
600 best = j;
601 }
602 }
603 #else
604 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
605 #endif
606 /* add in closest color */
607 for (i = 0; i < nc; i++) {
608 sum[best][i] += input[k][i];
609 }
610 /* mark this vector as used */
611 cnt[best]++;
612 /* accumulate error */
613 error += err;
614 }
615
616 /* check RMS */
617 if ((error < LL_RMS_E) ||
618 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
619 return !0; /* good match */
620 }
621 lasterror = error;
622
623 /* move each vector to the barycenter of its closest colors */
624 for (j = 0; j < nv; j++) {
625 if (cnt[j]) {
626 GLfloat div = 1.0F / cnt[j];
627 for (i = 0; i < nc; i++) {
628 vec[j][i] = div * sum[j][i];
629 }
630 } else {
631 /* this vec has no samples or is identical with a previous vec */
632 GLint worst = fxt1_worst(vec[j], input, nc, n);
633 for (i = 0; i < nc; i++) {
634 vec[j][i] = input[worst][i];
635 }
636 }
637 }
638 }
639
640 return 0; /* could not converge fast enough */
641 }
642
643
644 static void
645 fxt1_quantize_CHROMA (GLuint *cc,
646 GLubyte input[N_TEXELS][MAX_COMP])
647 {
648 const GLint n_vect = 4; /* 4 base vectors to find */
649 const GLint n_comp = 3; /* 3 components: R, G, B */
650 GLfloat vec[MAX_VECT][MAX_COMP];
651 GLint i, j, k;
652 Fx64 hi; /* high quadword */
653 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
654
655 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
656 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
657 }
658
659 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
660 for (j = n_vect - 1; j >= 0; j--) {
661 for (i = 0; i < n_comp; i++) {
662 /* add in colors */
663 FX64_SHL(hi, 5);
664 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
665 }
666 }
667 ((Fx64 *)cc)[1] = hi;
668
669 lohi = lolo = 0;
670 /* right microtile */
671 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
672 lohi <<= 2;
673 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
674 }
675 /* left microtile */
676 for (; k >= 0; k--) {
677 lolo <<= 2;
678 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
679 }
680 cc[1] = lohi;
681 cc[0] = lolo;
682 }
683
684
685 static void
686 fxt1_quantize_ALPHA0 (GLuint *cc,
687 GLubyte input[N_TEXELS][MAX_COMP],
688 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
689 {
690 const GLint n_vect = 3; /* 3 base vectors to find */
691 const GLint n_comp = 4; /* 4 components: R, G, B, A */
692 GLfloat vec[MAX_VECT][MAX_COMP];
693 GLint i, j, k;
694 Fx64 hi; /* high quadword */
695 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
696
697 /* the last vector indicates zero */
698 for (i = 0; i < n_comp; i++) {
699 vec[n_vect][i] = 0;
700 }
701
702 /* the first n texels in reord are guaranteed to be non-zero */
703 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
704 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
705 }
706
707 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
708 for (j = n_vect - 1; j >= 0; j--) {
709 /* add in alphas */
710 FX64_SHL(hi, 5);
711 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
712 }
713 for (j = n_vect - 1; j >= 0; j--) {
714 for (i = 0; i < n_comp - 1; i++) {
715 /* add in colors */
716 FX64_SHL(hi, 5);
717 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
718 }
719 }
720 ((Fx64 *)cc)[1] = hi;
721
722 lohi = lolo = 0;
723 /* right microtile */
724 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
725 lohi <<= 2;
726 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
727 }
728 /* left microtile */
729 for (; k >= 0; k--) {
730 lolo <<= 2;
731 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
732 }
733 cc[1] = lohi;
734 cc[0] = lolo;
735 }
736
737
738 static void
739 fxt1_quantize_ALPHA1 (GLuint *cc,
740 GLubyte input[N_TEXELS][MAX_COMP])
741 {
742 const GLint n_vect = 3; /* highest vector number in each microtile */
743 const GLint n_comp = 4; /* 4 components: R, G, B, A */
744 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
745 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
746 GLint i, j, k;
747 Fx64 hi; /* high quadword */
748 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
749
750 GLint minSum;
751 GLint maxSum;
752 GLint minColL = 0, maxColL = 0;
753 GLint minColR = 0, maxColR = 0;
754 GLint sumL = 0, sumR = 0;
755 GLint nn_comp;
756 /* Our solution here is to find the darkest and brightest colors in
757 * the 4x4 tile and use those as the two representative colors.
758 * There are probably better algorithms to use (histogram-based).
759 */
760 nn_comp = n_comp;
761 while ((minColL == maxColL) && nn_comp) {
762 minSum = 2000; /* big enough */
763 maxSum = -1; /* small enough */
764 for (k = 0; k < N_TEXELS / 2; k++) {
765 GLint sum = 0;
766 for (i = 0; i < nn_comp; i++) {
767 sum += input[k][i];
768 }
769 if (minSum > sum) {
770 minSum = sum;
771 minColL = k;
772 }
773 if (maxSum < sum) {
774 maxSum = sum;
775 maxColL = k;
776 }
777 sumL += sum;
778 }
779
780 nn_comp--;
781 }
782
783 nn_comp = n_comp;
784 while ((minColR == maxColR) && nn_comp) {
785 minSum = 2000; /* big enough */
786 maxSum = -1; /* small enough */
787 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
788 GLint sum = 0;
789 for (i = 0; i < nn_comp; i++) {
790 sum += input[k][i];
791 }
792 if (minSum > sum) {
793 minSum = sum;
794 minColR = k;
795 }
796 if (maxSum < sum) {
797 maxSum = sum;
798 maxColR = k;
799 }
800 sumR += sum;
801 }
802
803 nn_comp--;
804 }
805
806 /* choose the common vector (yuck!) */
807 {
808 GLint j1, j2;
809 GLint v1 = 0, v2 = 0;
810 GLfloat err = 1e9; /* big enough */
811 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
812 for (i = 0; i < n_comp; i++) {
813 tv[0][i] = input[minColL][i];
814 tv[1][i] = input[maxColL][i];
815 tv[2][i] = input[minColR][i];
816 tv[3][i] = input[maxColR][i];
817 }
818 for (j1 = 0; j1 < 2; j1++) {
819 for (j2 = 2; j2 < 4; j2++) {
820 GLfloat e = 0.0F;
821 for (i = 0; i < n_comp; i++) {
822 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
823 }
824 if (e < err) {
825 err = e;
826 v1 = j1;
827 v2 = j2;
828 }
829 }
830 }
831 for (i = 0; i < n_comp; i++) {
832 vec[0][i] = tv[1 - v1][i];
833 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
834 vec[2][i] = tv[5 - v2][i];
835 }
836 }
837
838 /* left microtile */
839 cc[0] = 0;
840 if (minColL != maxColL) {
841 /* compute interpolation vector */
842 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
843
844 /* add in texels */
845 lolo = 0;
846 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
847 GLint texel;
848 /* interpolate color */
849 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
850 /* add in texel */
851 lolo <<= 2;
852 lolo |= texel;
853 }
854
855 cc[0] = lolo;
856 }
857
858 /* right microtile */
859 cc[1] = 0;
860 if (minColR != maxColR) {
861 /* compute interpolation vector */
862 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
863
864 /* add in texels */
865 lohi = 0;
866 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
867 GLint texel;
868 /* interpolate color */
869 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
870 /* add in texel */
871 lohi <<= 2;
872 lohi |= texel;
873 }
874
875 cc[1] = lohi;
876 }
877
878 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
879 for (j = n_vect - 1; j >= 0; j--) {
880 /* add in alphas */
881 FX64_SHL(hi, 5);
882 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
883 }
884 for (j = n_vect - 1; j >= 0; j--) {
885 for (i = 0; i < n_comp - 1; i++) {
886 /* add in colors */
887 FX64_SHL(hi, 5);
888 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
889 }
890 }
891 ((Fx64 *)cc)[1] = hi;
892 }
893
894
895 static void
896 fxt1_quantize_HI (GLuint *cc,
897 GLubyte input[N_TEXELS][MAX_COMP],
898 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
899 {
900 const GLint n_vect = 6; /* highest vector number */
901 const GLint n_comp = 3; /* 3 components: R, G, B */
902 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
903 GLfloat iv[MAX_COMP]; /* interpolation vector */
904 GLint i, k;
905 GLuint hihi; /* high quadword: hi dword */
906
907 GLint minSum = 2000; /* big enough */
908 GLint maxSum = -1; /* small enough */
909 GLint minCol = 0; /* phoudoin: silent compiler! */
910 GLint maxCol = 0; /* phoudoin: silent compiler! */
911
912 /* Our solution here is to find the darkest and brightest colors in
913 * the 8x4 tile and use those as the two representative colors.
914 * There are probably better algorithms to use (histogram-based).
915 */
916 for (k = 0; k < n; k++) {
917 GLint sum = 0;
918 for (i = 0; i < n_comp; i++) {
919 sum += reord[k][i];
920 }
921 if (minSum > sum) {
922 minSum = sum;
923 minCol = k;
924 }
925 if (maxSum < sum) {
926 maxSum = sum;
927 maxCol = k;
928 }
929 }
930
931 hihi = 0; /* cc-hi = "00" */
932 for (i = 0; i < n_comp; i++) {
933 /* add in colors */
934 hihi <<= 5;
935 hihi |= reord[maxCol][i] >> 3;
936 }
937 for (i = 0; i < n_comp; i++) {
938 /* add in colors */
939 hihi <<= 5;
940 hihi |= reord[minCol][i] >> 3;
941 }
942 cc[3] = hihi;
943 cc[0] = cc[1] = cc[2] = 0;
944
945 /* compute interpolation vector */
946 if (minCol != maxCol) {
947 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
948 }
949
950 /* add in texels */
951 for (k = N_TEXELS - 1; k >= 0; k--) {
952 GLint t = k * 3;
953 GLuint *kk = (GLuint *)((char *)cc + t / 8);
954 GLint texel = n_vect + 1; /* transparent black */
955
956 if (!ISTBLACK(input[k])) {
957 if (minCol != maxCol) {
958 /* interpolate color */
959 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
960 /* add in texel */
961 kk[0] |= texel << (t & 7);
962 }
963 } else {
964 /* add in texel */
965 kk[0] |= texel << (t & 7);
966 }
967 }
968 }
969
970
971 static void
972 fxt1_quantize_MIXED1 (GLuint *cc,
973 GLubyte input[N_TEXELS][MAX_COMP])
974 {
975 const GLint n_vect = 2; /* highest vector number in each microtile */
976 const GLint n_comp = 3; /* 3 components: R, G, B */
977 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
978 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
979 GLint i, j, k;
980 Fx64 hi; /* high quadword */
981 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
982
983 GLint minSum;
984 GLint maxSum;
985 GLint minColL = 0, maxColL = -1;
986 GLint minColR = 0, maxColR = -1;
987
988 /* Our solution here is to find the darkest and brightest colors in
989 * the 4x4 tile and use those as the two representative colors.
990 * There are probably better algorithms to use (histogram-based).
991 */
992 minSum = 2000; /* big enough */
993 maxSum = -1; /* small enough */
994 for (k = 0; k < N_TEXELS / 2; k++) {
995 if (!ISTBLACK(input[k])) {
996 GLint sum = 0;
997 for (i = 0; i < n_comp; i++) {
998 sum += input[k][i];
999 }
1000 if (minSum > sum) {
1001 minSum = sum;
1002 minColL = k;
1003 }
1004 if (maxSum < sum) {
1005 maxSum = sum;
1006 maxColL = k;
1007 }
1008 }
1009 }
1010 minSum = 2000; /* big enough */
1011 maxSum = -1; /* small enough */
1012 for (; k < N_TEXELS; k++) {
1013 if (!ISTBLACK(input[k])) {
1014 GLint sum = 0;
1015 for (i = 0; i < n_comp; i++) {
1016 sum += input[k][i];
1017 }
1018 if (minSum > sum) {
1019 minSum = sum;
1020 minColR = k;
1021 }
1022 if (maxSum < sum) {
1023 maxSum = sum;
1024 maxColR = k;
1025 }
1026 }
1027 }
1028
1029 /* left microtile */
1030 if (maxColL == -1) {
1031 /* all transparent black */
1032 cc[0] = ~0u;
1033 for (i = 0; i < n_comp; i++) {
1034 vec[0][i] = 0;
1035 vec[1][i] = 0;
1036 }
1037 } else {
1038 cc[0] = 0;
1039 for (i = 0; i < n_comp; i++) {
1040 vec[0][i] = input[minColL][i];
1041 vec[1][i] = input[maxColL][i];
1042 }
1043 if (minColL != maxColL) {
1044 /* compute interpolation vector */
1045 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1046
1047 /* add in texels */
1048 lolo = 0;
1049 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1050 GLint texel = n_vect + 1; /* transparent black */
1051 if (!ISTBLACK(input[k])) {
1052 /* interpolate color */
1053 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1054 }
1055 /* add in texel */
1056 lolo <<= 2;
1057 lolo |= texel;
1058 }
1059 cc[0] = lolo;
1060 }
1061 }
1062
1063 /* right microtile */
1064 if (maxColR == -1) {
1065 /* all transparent black */
1066 cc[1] = ~0u;
1067 for (i = 0; i < n_comp; i++) {
1068 vec[2][i] = 0;
1069 vec[3][i] = 0;
1070 }
1071 } else {
1072 cc[1] = 0;
1073 for (i = 0; i < n_comp; i++) {
1074 vec[2][i] = input[minColR][i];
1075 vec[3][i] = input[maxColR][i];
1076 }
1077 if (minColR != maxColR) {
1078 /* compute interpolation vector */
1079 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1080
1081 /* add in texels */
1082 lohi = 0;
1083 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1084 GLint texel = n_vect + 1; /* transparent black */
1085 if (!ISTBLACK(input[k])) {
1086 /* interpolate color */
1087 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1088 }
1089 /* add in texel */
1090 lohi <<= 2;
1091 lohi |= texel;
1092 }
1093 cc[1] = lohi;
1094 }
1095 }
1096
1097 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1098 for (j = 2 * 2 - 1; j >= 0; j--) {
1099 for (i = 0; i < n_comp; i++) {
1100 /* add in colors */
1101 FX64_SHL(hi, 5);
1102 FX64_OR32(hi, vec[j][i] >> 3);
1103 }
1104 }
1105 ((Fx64 *)cc)[1] = hi;
1106 }
1107
1108
1109 static void
1110 fxt1_quantize_MIXED0 (GLuint *cc,
1111 GLubyte input[N_TEXELS][MAX_COMP])
1112 {
1113 const GLint n_vect = 3; /* highest vector number in each microtile */
1114 const GLint n_comp = 3; /* 3 components: R, G, B */
1115 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1116 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1117 GLint i, j, k;
1118 Fx64 hi; /* high quadword */
1119 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1120
1121 GLint minColL = 0, maxColL = 0;
1122 GLint minColR = 0, maxColR = 0;
1123 #if 0
1124 GLint minSum;
1125 GLint maxSum;
1126
1127 /* Our solution here is to find the darkest and brightest colors in
1128 * the 4x4 tile and use those as the two representative colors.
1129 * There are probably better algorithms to use (histogram-based).
1130 */
1131 minSum = 2000; /* big enough */
1132 maxSum = -1; /* small enough */
1133 for (k = 0; k < N_TEXELS / 2; k++) {
1134 GLint sum = 0;
1135 for (i = 0; i < n_comp; i++) {
1136 sum += input[k][i];
1137 }
1138 if (minSum > sum) {
1139 minSum = sum;
1140 minColL = k;
1141 }
1142 if (maxSum < sum) {
1143 maxSum = sum;
1144 maxColL = k;
1145 }
1146 }
1147 minSum = 2000; /* big enough */
1148 maxSum = -1; /* small enough */
1149 for (; k < N_TEXELS; k++) {
1150 GLint sum = 0;
1151 for (i = 0; i < n_comp; i++) {
1152 sum += input[k][i];
1153 }
1154 if (minSum > sum) {
1155 minSum = sum;
1156 minColR = k;
1157 }
1158 if (maxSum < sum) {
1159 maxSum = sum;
1160 maxColR = k;
1161 }
1162 }
1163 #else
1164 GLint minVal;
1165 GLint maxVal;
1166 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1167 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1168
1169 /* Scan the channel with max variance for lo & hi
1170 * and use those as the two representative colors.
1171 */
1172 minVal = 2000; /* big enough */
1173 maxVal = -1; /* small enough */
1174 for (k = 0; k < N_TEXELS / 2; k++) {
1175 GLint t = input[k][maxVarL];
1176 if (minVal > t) {
1177 minVal = t;
1178 minColL = k;
1179 }
1180 if (maxVal < t) {
1181 maxVal = t;
1182 maxColL = k;
1183 }
1184 }
1185 minVal = 2000; /* big enough */
1186 maxVal = -1; /* small enough */
1187 for (; k < N_TEXELS; k++) {
1188 GLint t = input[k][maxVarR];
1189 if (minVal > t) {
1190 minVal = t;
1191 minColR = k;
1192 }
1193 if (maxVal < t) {
1194 maxVal = t;
1195 maxColR = k;
1196 }
1197 }
1198 #endif
1199
1200 /* left microtile */
1201 cc[0] = 0;
1202 for (i = 0; i < n_comp; i++) {
1203 vec[0][i] = input[minColL][i];
1204 vec[1][i] = input[maxColL][i];
1205 }
1206 if (minColL != maxColL) {
1207 /* compute interpolation vector */
1208 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1209
1210 /* add in texels */
1211 lolo = 0;
1212 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1213 GLint texel;
1214 /* interpolate color */
1215 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1216 /* add in texel */
1217 lolo <<= 2;
1218 lolo |= texel;
1219 }
1220
1221 /* funky encoding for LSB of green */
1222 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1223 for (i = 0; i < n_comp; i++) {
1224 vec[1][i] = input[minColL][i];
1225 vec[0][i] = input[maxColL][i];
1226 }
1227 lolo = ~lolo;
1228 }
1229
1230 cc[0] = lolo;
1231 }
1232
1233 /* right microtile */
1234 cc[1] = 0;
1235 for (i = 0; i < n_comp; i++) {
1236 vec[2][i] = input[minColR][i];
1237 vec[3][i] = input[maxColR][i];
1238 }
1239 if (minColR != maxColR) {
1240 /* compute interpolation vector */
1241 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1242
1243 /* add in texels */
1244 lohi = 0;
1245 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1246 GLint texel;
1247 /* interpolate color */
1248 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1249 /* add in texel */
1250 lohi <<= 2;
1251 lohi |= texel;
1252 }
1253
1254 /* funky encoding for LSB of green */
1255 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1256 for (i = 0; i < n_comp; i++) {
1257 vec[3][i] = input[minColR][i];
1258 vec[2][i] = input[maxColR][i];
1259 }
1260 lohi = ~lohi;
1261 }
1262
1263 cc[1] = lohi;
1264 }
1265
1266 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1267 for (j = 2 * 2 - 1; j >= 0; j--) {
1268 for (i = 0; i < n_comp; i++) {
1269 /* add in colors */
1270 FX64_SHL(hi, 5);
1271 FX64_OR32(hi, vec[j][i] >> 3);
1272 }
1273 }
1274 ((Fx64 *)cc)[1] = hi;
1275 }
1276
1277
1278 static void
1279 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1280 {
1281 GLint trualpha;
1282 GLubyte reord[N_TEXELS][MAX_COMP];
1283
1284 GLubyte input[N_TEXELS][MAX_COMP];
1285 GLint i, k, l;
1286
1287 if (comps == 3) {
1288 /* make the whole block opaque */
1289 _mesa_memset(input, -1, sizeof(input));
1290 }
1291
1292 /* 8 texels each line */
1293 for (l = 0; l < 4; l++) {
1294 for (k = 0; k < 4; k++) {
1295 for (i = 0; i < comps; i++) {
1296 input[k + l * 4][i] = *lines[l]++;
1297 }
1298 }
1299 for (; k < 8; k++) {
1300 for (i = 0; i < comps; i++) {
1301 input[k + l * 4 + 12][i] = *lines[l]++;
1302 }
1303 }
1304 }
1305
1306 /* block layout:
1307 * 00, 01, 02, 03, 08, 09, 0a, 0b
1308 * 10, 11, 12, 13, 18, 19, 1a, 1b
1309 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1310 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1311 */
1312
1313 /* [dBorca]
1314 * stupidity flows forth from this
1315 */
1316 l = N_TEXELS;
1317 trualpha = 0;
1318 if (comps == 4) {
1319 /* skip all transparent black texels */
1320 l = 0;
1321 for (k = 0; k < N_TEXELS; k++) {
1322 /* test all components against 0 */
1323 if (!ISTBLACK(input[k])) {
1324 /* texel is not transparent black */
1325 COPY_4UBV(reord[l], input[k]);
1326 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1327 /* non-opaque texel */
1328 trualpha = !0;
1329 }
1330 l++;
1331 }
1332 }
1333 }
1334
1335 #if 0
1336 if (trualpha) {
1337 fxt1_quantize_ALPHA0(cc, input, reord, l);
1338 } else if (l == 0) {
1339 cc[0] = cc[1] = cc[2] = -1;
1340 cc[3] = 0;
1341 } else if (l < N_TEXELS) {
1342 fxt1_quantize_HI(cc, input, reord, l);
1343 } else {
1344 fxt1_quantize_CHROMA(cc, input);
1345 }
1346 (void)fxt1_quantize_ALPHA1;
1347 (void)fxt1_quantize_MIXED1;
1348 (void)fxt1_quantize_MIXED0;
1349 #else
1350 if (trualpha) {
1351 fxt1_quantize_ALPHA1(cc, input);
1352 } else if (l == 0) {
1353 cc[0] = cc[1] = cc[2] = ~0u;
1354 cc[3] = 0;
1355 } else if (l < N_TEXELS) {
1356 fxt1_quantize_MIXED1(cc, input);
1357 } else {
1358 fxt1_quantize_MIXED0(cc, input);
1359 }
1360 (void)fxt1_quantize_ALPHA0;
1361 (void)fxt1_quantize_HI;
1362 (void)fxt1_quantize_CHROMA;
1363 #endif
1364 }
1365
1366
1367 static void
1368 fxt1_encode (GLuint width, GLuint height, GLint comps,
1369 const void *source, GLint srcRowStride,
1370 void *dest, GLint destRowStride)
1371 {
1372 GLuint x, y;
1373 const GLubyte *data;
1374 GLuint *encoded = (GLuint *)dest;
1375 void *newSource = NULL;
1376
1377 assert(comps == 3 || comps == 4);
1378
1379 /* Replicate image if width is not M8 or height is not M4 */
1380 if ((width & 7) | (height & 3)) {
1381 GLint newWidth = (width + 7) & ~7;
1382 GLint newHeight = (height + 3) & ~3;
1383 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1384 if (!newSource) {
1385 GET_CURRENT_CONTEXT(ctx);
1386 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1387 goto cleanUp;
1388 }
1389 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1390 comps, (const GLchan *) source,
1391 srcRowStride, (GLchan *) newSource);
1392 source = newSource;
1393 width = newWidth;
1394 height = newHeight;
1395 srcRowStride = comps * newWidth;
1396 }
1397
1398 /* convert from 16/32-bit channels to GLubyte if needed */
1399 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1400 const GLuint n = width * height * comps;
1401 const GLchan *src = (const GLchan *) source;
1402 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1403 GLuint i;
1404 if (!dest) {
1405 GET_CURRENT_CONTEXT(ctx);
1406 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1407 goto cleanUp;
1408 }
1409 for (i = 0; i < n; i++) {
1410 dest[i] = CHAN_TO_UBYTE(src[i]);
1411 }
1412 if (newSource != NULL) {
1413 _mesa_free(newSource);
1414 }
1415 newSource = dest; /* we'll free this buffer before returning */
1416 source = dest; /* the new, GLubyte incoming image */
1417 }
1418
1419 data = (const GLubyte *) source;
1420 destRowStride = (destRowStride - width * 2) / 4;
1421 for (y = 0; y < height; y += 4) {
1422 GLuint offs = 0 + (y + 0) * srcRowStride;
1423 for (x = 0; x < width; x += 8) {
1424 const GLubyte *lines[4];
1425 lines[0] = &data[offs];
1426 lines[1] = lines[0] + srcRowStride;
1427 lines[2] = lines[1] + srcRowStride;
1428 lines[3] = lines[2] + srcRowStride;
1429 offs += 8 * comps;
1430 fxt1_quantize(encoded, lines, comps);
1431 /* 128 bits per 8x4 block */
1432 encoded += 4;
1433 }
1434 encoded += destRowStride;
1435 }
1436
1437 cleanUp:
1438 if (newSource != NULL) {
1439 _mesa_free(newSource);
1440 }
1441 }
1442
1443
1444 /***************************************************************************\
1445 * FXT1 decoder
1446 *
1447 * The decoder is based on GL_3DFX_texture_compression_FXT1
1448 * specification and serves as a concept for the encoder.
1449 \***************************************************************************/
1450
1451
1452 /* lookup table for scaling 5 bit colors up to 8 bits */
1453 static const GLubyte _rgb_scale_5[] = {
1454 0, 8, 16, 25, 33, 41, 49, 58,
1455 66, 74, 82, 90, 99, 107, 115, 123,
1456 132, 140, 148, 156, 165, 173, 181, 189,
1457 197, 206, 214, 222, 230, 239, 247, 255
1458 };
1459
1460 /* lookup table for scaling 6 bit colors up to 8 bits */
1461 static const GLubyte _rgb_scale_6[] = {
1462 0, 4, 8, 12, 16, 20, 24, 28,
1463 32, 36, 40, 45, 49, 53, 57, 61,
1464 65, 69, 73, 77, 81, 85, 89, 93,
1465 97, 101, 105, 109, 113, 117, 121, 125,
1466 130, 134, 138, 142, 146, 150, 154, 158,
1467 162, 166, 170, 174, 178, 182, 186, 190,
1468 194, 198, 202, 206, 210, 215, 219, 223,
1469 227, 231, 235, 239, 243, 247, 251, 255
1470 };
1471
1472
1473 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1474 #define UP5(c) _rgb_scale_5[(c) & 31]
1475 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1476 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1477
1478
1479 static void
1480 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1481 {
1482 const GLuint *cc;
1483
1484 t *= 3;
1485 cc = (const GLuint *)(code + t / 8);
1486 t = (cc[0] >> (t & 7)) & 7;
1487
1488 if (t == 7) {
1489 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1490 } else {
1491 GLubyte r, g, b;
1492 cc = (const GLuint *)(code + 12);
1493 if (t == 0) {
1494 b = UP5(CC_SEL(cc, 0));
1495 g = UP5(CC_SEL(cc, 5));
1496 r = UP5(CC_SEL(cc, 10));
1497 } else if (t == 6) {
1498 b = UP5(CC_SEL(cc, 15));
1499 g = UP5(CC_SEL(cc, 20));
1500 r = UP5(CC_SEL(cc, 25));
1501 } else {
1502 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1503 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1504 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1505 }
1506 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1507 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1508 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1509 rgba[ACOMP] = CHAN_MAX;
1510 }
1511 }
1512
1513
1514 static void
1515 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1516 {
1517 const GLuint *cc;
1518 GLuint kk;
1519
1520 cc = (const GLuint *)code;
1521 if (t & 16) {
1522 cc++;
1523 t &= 15;
1524 }
1525 t = (cc[0] >> (t * 2)) & 3;
1526
1527 t *= 15;
1528 cc = (const GLuint *)(code + 8 + t / 8);
1529 kk = cc[0] >> (t & 7);
1530 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1531 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1532 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1533 rgba[ACOMP] = CHAN_MAX;
1534 }
1535
1536
1537 static void
1538 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1539 {
1540 const GLuint *cc;
1541 GLuint col[2][3];
1542 GLint glsb, selb;
1543
1544 cc = (const GLuint *)code;
1545 if (t & 16) {
1546 t &= 15;
1547 t = (cc[1] >> (t * 2)) & 3;
1548 /* col 2 */
1549 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1550 col[0][GCOMP] = CC_SEL(cc, 99);
1551 col[0][RCOMP] = CC_SEL(cc, 104);
1552 /* col 3 */
1553 col[1][BCOMP] = CC_SEL(cc, 109);
1554 col[1][GCOMP] = CC_SEL(cc, 114);
1555 col[1][RCOMP] = CC_SEL(cc, 119);
1556 glsb = CC_SEL(cc, 126);
1557 selb = CC_SEL(cc, 33);
1558 } else {
1559 t = (cc[0] >> (t * 2)) & 3;
1560 /* col 0 */
1561 col[0][BCOMP] = CC_SEL(cc, 64);
1562 col[0][GCOMP] = CC_SEL(cc, 69);
1563 col[0][RCOMP] = CC_SEL(cc, 74);
1564 /* col 1 */
1565 col[1][BCOMP] = CC_SEL(cc, 79);
1566 col[1][GCOMP] = CC_SEL(cc, 84);
1567 col[1][RCOMP] = CC_SEL(cc, 89);
1568 glsb = CC_SEL(cc, 125);
1569 selb = CC_SEL(cc, 1);
1570 }
1571
1572 if (CC_SEL(cc, 124) & 1) {
1573 /* alpha[0] == 1 */
1574
1575 if (t == 3) {
1576 /* zero */
1577 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1578 } else {
1579 GLubyte r, g, b;
1580 if (t == 0) {
1581 b = UP5(col[0][BCOMP]);
1582 g = UP5(col[0][GCOMP]);
1583 r = UP5(col[0][RCOMP]);
1584 } else if (t == 2) {
1585 b = UP5(col[1][BCOMP]);
1586 g = UP6(col[1][GCOMP], glsb);
1587 r = UP5(col[1][RCOMP]);
1588 } else {
1589 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1590 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1591 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1592 }
1593 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1594 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1595 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1596 rgba[ACOMP] = CHAN_MAX;
1597 }
1598 } else {
1599 /* alpha[0] == 0 */
1600 GLubyte r, g, b;
1601 if (t == 0) {
1602 b = UP5(col[0][BCOMP]);
1603 g = UP6(col[0][GCOMP], glsb ^ selb);
1604 r = UP5(col[0][RCOMP]);
1605 } else if (t == 3) {
1606 b = UP5(col[1][BCOMP]);
1607 g = UP6(col[1][GCOMP], glsb);
1608 r = UP5(col[1][RCOMP]);
1609 } else {
1610 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1611 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1612 UP6(col[1][GCOMP], glsb));
1613 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1614 }
1615 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1616 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1617 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1618 rgba[ACOMP] = CHAN_MAX;
1619 }
1620 }
1621
1622
1623 static void
1624 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1625 {
1626 const GLuint *cc;
1627 GLubyte r, g, b, a;
1628
1629 cc = (const GLuint *)code;
1630 if (CC_SEL(cc, 124) & 1) {
1631 /* lerp == 1 */
1632 GLuint col0[4];
1633
1634 if (t & 16) {
1635 t &= 15;
1636 t = (cc[1] >> (t * 2)) & 3;
1637 /* col 2 */
1638 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1639 col0[GCOMP] = CC_SEL(cc, 99);
1640 col0[RCOMP] = CC_SEL(cc, 104);
1641 col0[ACOMP] = CC_SEL(cc, 119);
1642 } else {
1643 t = (cc[0] >> (t * 2)) & 3;
1644 /* col 0 */
1645 col0[BCOMP] = CC_SEL(cc, 64);
1646 col0[GCOMP] = CC_SEL(cc, 69);
1647 col0[RCOMP] = CC_SEL(cc, 74);
1648 col0[ACOMP] = CC_SEL(cc, 109);
1649 }
1650
1651 if (t == 0) {
1652 b = UP5(col0[BCOMP]);
1653 g = UP5(col0[GCOMP]);
1654 r = UP5(col0[RCOMP]);
1655 a = UP5(col0[ACOMP]);
1656 } else if (t == 3) {
1657 b = UP5(CC_SEL(cc, 79));
1658 g = UP5(CC_SEL(cc, 84));
1659 r = UP5(CC_SEL(cc, 89));
1660 a = UP5(CC_SEL(cc, 114));
1661 } else {
1662 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1663 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1664 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1665 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1666 }
1667 } else {
1668 /* lerp == 0 */
1669
1670 if (t & 16) {
1671 cc++;
1672 t &= 15;
1673 }
1674 t = (cc[0] >> (t * 2)) & 3;
1675
1676 if (t == 3) {
1677 /* zero */
1678 r = g = b = a = 0;
1679 } else {
1680 GLuint kk;
1681 cc = (const GLuint *)code;
1682 a = UP5(cc[3] >> (t * 5 + 13));
1683 t *= 15;
1684 cc = (const GLuint *)(code + 8 + t / 8);
1685 kk = cc[0] >> (t & 7);
1686 b = UP5(kk);
1687 g = UP5(kk >> 5);
1688 r = UP5(kk >> 10);
1689 }
1690 }
1691 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1692 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1693 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1694 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1695 }
1696
1697
1698 void
1699 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1700 GLint i, GLint j, GLchan *rgba)
1701 {
1702 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1703 fxt1_decode_1HI, /* cc-high = "00?" */
1704 fxt1_decode_1HI, /* cc-high = "00?" */
1705 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1706 fxt1_decode_1ALPHA, /* alpha = "011" */
1707 fxt1_decode_1MIXED, /* mixed = "1??" */
1708 fxt1_decode_1MIXED, /* mixed = "1??" */
1709 fxt1_decode_1MIXED, /* mixed = "1??" */
1710 fxt1_decode_1MIXED /* mixed = "1??" */
1711 };
1712
1713 const GLubyte *code = (const GLubyte *)texture +
1714 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1715 GLint mode = CC_SEL(code, 125);
1716 GLint t = i & 7;
1717
1718 if (t & 4) {
1719 t += 12;
1720 }
1721 t += (j & 3) * 4;
1722
1723 decode_1[mode](code, t, rgba);
1724 }