mesa: remove gl_texture_format
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texformat.h"
42 #include "texstore.h"
43
44
45 static void
46 fxt1_encode (GLuint width, GLuint height, GLint comps,
47 const void *source, GLint srcRowStride,
48 void *dest, GLint destRowStride);
49
50 void
51 fxt1_decode_1 (const void *texture, GLint stride,
52 GLint i, GLint j, GLchan *rgba);
53
54
55 /**
56 * Called during context initialization.
57 */
58 void
59 _mesa_init_texture_fxt1( GLcontext *ctx )
60 {
61 (void) ctx;
62 }
63
64
65 /**
66 * Store user's image in rgb_fxt1 format.
67 */
68 GLboolean
69 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
70 {
71 const GLchan *pixels;
72 GLint srcRowStride;
73 GLubyte *dst;
74 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
75 const GLchan *tempImage = NULL;
76
77 ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
78 ASSERT(dstXoffset % 8 == 0);
79 ASSERT(dstYoffset % 4 == 0);
80 ASSERT(dstZoffset == 0);
81 (void) dstZoffset;
82 (void) dstImageOffsets;
83
84 if (srcFormat != GL_RGB ||
85 srcType != CHAN_TYPE ||
86 ctx->_ImageTransferState ||
87 srcPacking->SwapBytes) {
88 /* convert image to RGB/GLchan */
89 tempImage = _mesa_make_temp_chan_image(ctx, dims,
90 baseInternalFormat,
91 _mesa_get_format_base_format(dstFormat),
92 srcWidth, srcHeight, srcDepth,
93 srcFormat, srcType, srcAddr,
94 srcPacking);
95 if (!tempImage)
96 return GL_FALSE; /* out of memory */
97 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
98 pixels = tempImage;
99 srcRowStride = 3 * srcWidth;
100 srcFormat = GL_RGB;
101 }
102 else {
103 pixels = (const GLchan *) srcAddr;
104 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
105 srcType) / sizeof(GLchan);
106 }
107
108 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
109 dstFormat,
110 texWidth, (GLubyte *) dstAddr);
111
112 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
113 dst, dstRowStride);
114
115 if (tempImage)
116 _mesa_free((void*) tempImage);
117
118 return GL_TRUE;
119 }
120
121
122 /**
123 * Store user's image in rgba_fxt1 format.
124 */
125 GLboolean
126 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
127 {
128 const GLchan *pixels;
129 GLint srcRowStride;
130 GLubyte *dst;
131 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
132 const GLchan *tempImage = NULL;
133
134 ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
135 ASSERT(dstXoffset % 8 == 0);
136 ASSERT(dstYoffset % 4 == 0);
137 ASSERT(dstZoffset == 0);
138 (void) dstZoffset;
139 (void) dstImageOffsets;
140
141 if (srcFormat != GL_RGBA ||
142 srcType != CHAN_TYPE ||
143 ctx->_ImageTransferState ||
144 srcPacking->SwapBytes) {
145 /* convert image to RGBA/GLchan */
146 tempImage = _mesa_make_temp_chan_image(ctx, dims,
147 baseInternalFormat,
148 _mesa_get_format_base_format(dstFormat),
149 srcWidth, srcHeight, srcDepth,
150 srcFormat, srcType, srcAddr,
151 srcPacking);
152 if (!tempImage)
153 return GL_FALSE; /* out of memory */
154 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
155 pixels = tempImage;
156 srcRowStride = 4 * srcWidth;
157 srcFormat = GL_RGBA;
158 }
159 else {
160 pixels = (const GLchan *) srcAddr;
161 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
162 srcType) / sizeof(GLchan);
163 }
164
165 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
166 dstFormat,
167 texWidth, (GLubyte *) dstAddr);
168
169 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
170 dst, dstRowStride);
171
172 if (tempImage)
173 _mesa_free((void*) tempImage);
174
175 return GL_TRUE;
176 }
177
178
179 void
180 _mesa_fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
181 GLint i, GLint j, GLint k, GLchan *texel )
182 {
183 (void) k;
184 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
185 }
186
187
188 void
189 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
190 GLint i, GLint j, GLint k, GLfloat *texel )
191 {
192 /* just sample as GLchan and convert to float here */
193 GLchan rgba[4];
194 (void) k;
195 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
196 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
197 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
198 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
199 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
200 }
201
202
203 void
204 _mesa_fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
205 GLint i, GLint j, GLint k, GLchan *texel )
206 {
207 (void) k;
208 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
209 texel[ACOMP] = 255;
210 }
211
212
213 void
214 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
215 GLint i, GLint j, GLint k, GLfloat *texel )
216 {
217 /* just sample as GLchan and convert to float here */
218 GLchan rgba[4];
219 (void) k;
220 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
221 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
222 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
223 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
224 texel[ACOMP] = 1.0F;
225 }
226
227
228
229 /***************************************************************************\
230 * FXT1 encoder
231 *
232 * The encoder was built by reversing the decoder,
233 * and is vaguely based on Texus2 by 3dfx. Note that this code
234 * is merely a proof of concept, since it is highly UNoptimized;
235 * moreover, it is sub-optimal due to initial conditions passed
236 * to Lloyd's algorithm (the interpolation modes are even worse).
237 \***************************************************************************/
238
239
240 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
241 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
242 #define N_TEXELS 32 /* number of texels in a block (always 32) */
243 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
244 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
245 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
246 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
247 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
248
249
250 /*
251 * Define a 64-bit unsigned integer type and macros
252 */
253 #if 1
254
255 #define FX64_NATIVE 1
256
257 typedef uint64_t Fx64;
258
259 #define FX64_MOV32(a, b) a = b
260 #define FX64_OR32(a, b) a |= b
261 #define FX64_SHL(a, c) a <<= c
262
263 #else
264
265 #define FX64_NATIVE 0
266
267 typedef struct {
268 GLuint lo, hi;
269 } Fx64;
270
271 #define FX64_MOV32(a, b) a.lo = b
272 #define FX64_OR32(a, b) a.lo |= b
273
274 #define FX64_SHL(a, c) \
275 do { \
276 if ((c) >= 32) { \
277 a.hi = a.lo << ((c) - 32); \
278 a.lo = 0; \
279 } else { \
280 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
281 a.lo <<= (c); \
282 } \
283 } while (0)
284
285 #endif
286
287
288 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
289 #define SAFECDOT 1 /* for paranoids */
290
291 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
292 do { \
293 /* compute interpolation vector */ \
294 GLfloat d2 = 0.0F; \
295 GLfloat rd2; \
296 \
297 for (i = 0; i < NC; i++) { \
298 IV[i] = (V1[i] - V0[i]) * F(i); \
299 d2 += IV[i] * IV[i]; \
300 } \
301 rd2 = (GLfloat)NV / d2; \
302 B = 0; \
303 for (i = 0; i < NC; i++) { \
304 IV[i] *= F(i); \
305 B -= IV[i] * V0[i]; \
306 IV[i] *= rd2; \
307 } \
308 B = B * rd2 + 0.5f; \
309 } while (0)
310
311 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
312 do { \
313 GLfloat dot = 0.0F; \
314 for (i = 0; i < NC; i++) { \
315 dot += V[i] * IV[i]; \
316 } \
317 TEXEL = (GLint)(dot + B); \
318 if (SAFECDOT) { \
319 if (TEXEL < 0) { \
320 TEXEL = 0; \
321 } else if (TEXEL > NV) { \
322 TEXEL = NV; \
323 } \
324 } \
325 } while (0)
326
327
328 static GLint
329 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
330 GLubyte input[MAX_COMP], GLint nc)
331 {
332 GLint i, j, best = -1;
333 GLfloat err = 1e9; /* big enough */
334
335 for (j = 0; j < nv; j++) {
336 GLfloat e = 0.0F;
337 for (i = 0; i < nc; i++) {
338 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
339 }
340 if (e < err) {
341 err = e;
342 best = j;
343 }
344 }
345
346 return best;
347 }
348
349
350 static GLint
351 fxt1_worst (GLfloat vec[MAX_COMP],
352 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
353 {
354 GLint i, k, worst = -1;
355 GLfloat err = -1.0F; /* small enough */
356
357 for (k = 0; k < n; k++) {
358 GLfloat e = 0.0F;
359 for (i = 0; i < nc; i++) {
360 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
361 }
362 if (e > err) {
363 err = e;
364 worst = k;
365 }
366 }
367
368 return worst;
369 }
370
371
372 static GLint
373 fxt1_variance (GLdouble variance[MAX_COMP],
374 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
375 {
376 GLint i, k, best = 0;
377 GLint sx, sx2;
378 GLdouble var, maxvar = -1; /* small enough */
379 GLdouble teenth = 1.0 / n;
380
381 for (i = 0; i < nc; i++) {
382 sx = sx2 = 0;
383 for (k = 0; k < n; k++) {
384 GLint t = input[k][i];
385 sx += t;
386 sx2 += t * t;
387 }
388 var = sx2 * teenth - sx * sx * teenth * teenth;
389 if (maxvar < var) {
390 maxvar = var;
391 best = i;
392 }
393 if (variance) {
394 variance[i] = var;
395 }
396 }
397
398 return best;
399 }
400
401
402 static GLint
403 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
404 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
405 {
406 #if 0
407 /* Choose colors from a grid.
408 */
409 GLint i, j;
410
411 for (j = 0; j < nv; j++) {
412 GLint m = j * (n - 1) / (nv - 1);
413 for (i = 0; i < nc; i++) {
414 vec[j][i] = input[m][i];
415 }
416 }
417 #else
418 /* Our solution here is to find the darkest and brightest colors in
419 * the 8x4 tile and use those as the two representative colors.
420 * There are probably better algorithms to use (histogram-based).
421 */
422 GLint i, j, k;
423 GLint minSum = 2000; /* big enough */
424 GLint maxSum = -1; /* small enough */
425 GLint minCol = 0; /* phoudoin: silent compiler! */
426 GLint maxCol = 0; /* phoudoin: silent compiler! */
427
428 struct {
429 GLint flag;
430 GLint key;
431 GLint freq;
432 GLint idx;
433 } hist[N_TEXELS];
434 GLint lenh = 0;
435
436 _mesa_memset(hist, 0, sizeof(hist));
437
438 for (k = 0; k < n; k++) {
439 GLint l;
440 GLint key = 0;
441 GLint sum = 0;
442 for (i = 0; i < nc; i++) {
443 key <<= 8;
444 key |= input[k][i];
445 sum += input[k][i];
446 }
447 for (l = 0; l < n; l++) {
448 if (!hist[l].flag) {
449 /* alloc new slot */
450 hist[l].flag = !0;
451 hist[l].key = key;
452 hist[l].freq = 1;
453 hist[l].idx = k;
454 lenh = l + 1;
455 break;
456 } else if (hist[l].key == key) {
457 hist[l].freq++;
458 break;
459 }
460 }
461 if (minSum > sum) {
462 minSum = sum;
463 minCol = k;
464 }
465 if (maxSum < sum) {
466 maxSum = sum;
467 maxCol = k;
468 }
469 }
470
471 if (lenh <= nv) {
472 for (j = 0; j < lenh; j++) {
473 for (i = 0; i < nc; i++) {
474 vec[j][i] = (GLfloat)input[hist[j].idx][i];
475 }
476 }
477 for (; j < nv; j++) {
478 for (i = 0; i < nc; i++) {
479 vec[j][i] = vec[0][i];
480 }
481 }
482 return 0;
483 }
484
485 for (j = 0; j < nv; j++) {
486 for (i = 0; i < nc; i++) {
487 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
488 }
489 }
490 #endif
491
492 return !0;
493 }
494
495
496 static GLint
497 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
498 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
499 {
500 /* Use the generalized lloyd's algorithm for VQ:
501 * find 4 color vectors.
502 *
503 * for each sample color
504 * sort to nearest vector.
505 *
506 * replace each vector with the centroid of it's matching colors.
507 *
508 * repeat until RMS doesn't improve.
509 *
510 * if a color vector has no samples, or becomes the same as another
511 * vector, replace it with the color which is farthest from a sample.
512 *
513 * vec[][MAX_COMP] initial vectors and resulting colors
514 * nv number of resulting colors required
515 * input[N_TEXELS][MAX_COMP] input texels
516 * nc number of components in input / vec
517 * n number of input samples
518 */
519
520 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
521 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
522 GLfloat error, lasterror = 1e9;
523
524 GLint i, j, k, rep;
525
526 /* the quantizer */
527 for (rep = 0; rep < LL_N_REP; rep++) {
528 /* reset sums & counters */
529 for (j = 0; j < nv; j++) {
530 for (i = 0; i < nc; i++) {
531 sum[j][i] = 0;
532 }
533 cnt[j] = 0;
534 }
535 error = 0;
536
537 /* scan whole block */
538 for (k = 0; k < n; k++) {
539 #if 1
540 GLint best = -1;
541 GLfloat err = 1e9; /* big enough */
542 /* determine best vector */
543 for (j = 0; j < nv; j++) {
544 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
545 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
546 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
547 if (nc == 4) {
548 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
549 }
550 if (e < err) {
551 err = e;
552 best = j;
553 }
554 }
555 #else
556 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
557 #endif
558 /* add in closest color */
559 for (i = 0; i < nc; i++) {
560 sum[best][i] += input[k][i];
561 }
562 /* mark this vector as used */
563 cnt[best]++;
564 /* accumulate error */
565 error += err;
566 }
567
568 /* check RMS */
569 if ((error < LL_RMS_E) ||
570 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
571 return !0; /* good match */
572 }
573 lasterror = error;
574
575 /* move each vector to the barycenter of its closest colors */
576 for (j = 0; j < nv; j++) {
577 if (cnt[j]) {
578 GLfloat div = 1.0F / cnt[j];
579 for (i = 0; i < nc; i++) {
580 vec[j][i] = div * sum[j][i];
581 }
582 } else {
583 /* this vec has no samples or is identical with a previous vec */
584 GLint worst = fxt1_worst(vec[j], input, nc, n);
585 for (i = 0; i < nc; i++) {
586 vec[j][i] = input[worst][i];
587 }
588 }
589 }
590 }
591
592 return 0; /* could not converge fast enough */
593 }
594
595
596 static void
597 fxt1_quantize_CHROMA (GLuint *cc,
598 GLubyte input[N_TEXELS][MAX_COMP])
599 {
600 const GLint n_vect = 4; /* 4 base vectors to find */
601 const GLint n_comp = 3; /* 3 components: R, G, B */
602 GLfloat vec[MAX_VECT][MAX_COMP];
603 GLint i, j, k;
604 Fx64 hi; /* high quadword */
605 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
606
607 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
608 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
609 }
610
611 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
612 for (j = n_vect - 1; j >= 0; j--) {
613 for (i = 0; i < n_comp; i++) {
614 /* add in colors */
615 FX64_SHL(hi, 5);
616 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
617 }
618 }
619 ((Fx64 *)cc)[1] = hi;
620
621 lohi = lolo = 0;
622 /* right microtile */
623 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
624 lohi <<= 2;
625 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
626 }
627 /* left microtile */
628 for (; k >= 0; k--) {
629 lolo <<= 2;
630 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
631 }
632 cc[1] = lohi;
633 cc[0] = lolo;
634 }
635
636
637 static void
638 fxt1_quantize_ALPHA0 (GLuint *cc,
639 GLubyte input[N_TEXELS][MAX_COMP],
640 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
641 {
642 const GLint n_vect = 3; /* 3 base vectors to find */
643 const GLint n_comp = 4; /* 4 components: R, G, B, A */
644 GLfloat vec[MAX_VECT][MAX_COMP];
645 GLint i, j, k;
646 Fx64 hi; /* high quadword */
647 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
648
649 /* the last vector indicates zero */
650 for (i = 0; i < n_comp; i++) {
651 vec[n_vect][i] = 0;
652 }
653
654 /* the first n texels in reord are guaranteed to be non-zero */
655 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
656 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
657 }
658
659 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
660 for (j = n_vect - 1; j >= 0; j--) {
661 /* add in alphas */
662 FX64_SHL(hi, 5);
663 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
664 }
665 for (j = n_vect - 1; j >= 0; j--) {
666 for (i = 0; i < n_comp - 1; i++) {
667 /* add in colors */
668 FX64_SHL(hi, 5);
669 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
670 }
671 }
672 ((Fx64 *)cc)[1] = hi;
673
674 lohi = lolo = 0;
675 /* right microtile */
676 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
677 lohi <<= 2;
678 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
679 }
680 /* left microtile */
681 for (; k >= 0; k--) {
682 lolo <<= 2;
683 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
684 }
685 cc[1] = lohi;
686 cc[0] = lolo;
687 }
688
689
690 static void
691 fxt1_quantize_ALPHA1 (GLuint *cc,
692 GLubyte input[N_TEXELS][MAX_COMP])
693 {
694 const GLint n_vect = 3; /* highest vector number in each microtile */
695 const GLint n_comp = 4; /* 4 components: R, G, B, A */
696 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
697 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
698 GLint i, j, k;
699 Fx64 hi; /* high quadword */
700 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
701
702 GLint minSum;
703 GLint maxSum;
704 GLint minColL = 0, maxColL = 0;
705 GLint minColR = 0, maxColR = 0;
706 GLint sumL = 0, sumR = 0;
707 GLint nn_comp;
708 /* Our solution here is to find the darkest and brightest colors in
709 * the 4x4 tile and use those as the two representative colors.
710 * There are probably better algorithms to use (histogram-based).
711 */
712 nn_comp = n_comp;
713 while ((minColL == maxColL) && nn_comp) {
714 minSum = 2000; /* big enough */
715 maxSum = -1; /* small enough */
716 for (k = 0; k < N_TEXELS / 2; k++) {
717 GLint sum = 0;
718 for (i = 0; i < nn_comp; i++) {
719 sum += input[k][i];
720 }
721 if (minSum > sum) {
722 minSum = sum;
723 minColL = k;
724 }
725 if (maxSum < sum) {
726 maxSum = sum;
727 maxColL = k;
728 }
729 sumL += sum;
730 }
731
732 nn_comp--;
733 }
734
735 nn_comp = n_comp;
736 while ((minColR == maxColR) && nn_comp) {
737 minSum = 2000; /* big enough */
738 maxSum = -1; /* small enough */
739 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
740 GLint sum = 0;
741 for (i = 0; i < nn_comp; i++) {
742 sum += input[k][i];
743 }
744 if (minSum > sum) {
745 minSum = sum;
746 minColR = k;
747 }
748 if (maxSum < sum) {
749 maxSum = sum;
750 maxColR = k;
751 }
752 sumR += sum;
753 }
754
755 nn_comp--;
756 }
757
758 /* choose the common vector (yuck!) */
759 {
760 GLint j1, j2;
761 GLint v1 = 0, v2 = 0;
762 GLfloat err = 1e9; /* big enough */
763 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
764 for (i = 0; i < n_comp; i++) {
765 tv[0][i] = input[minColL][i];
766 tv[1][i] = input[maxColL][i];
767 tv[2][i] = input[minColR][i];
768 tv[3][i] = input[maxColR][i];
769 }
770 for (j1 = 0; j1 < 2; j1++) {
771 for (j2 = 2; j2 < 4; j2++) {
772 GLfloat e = 0.0F;
773 for (i = 0; i < n_comp; i++) {
774 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
775 }
776 if (e < err) {
777 err = e;
778 v1 = j1;
779 v2 = j2;
780 }
781 }
782 }
783 for (i = 0; i < n_comp; i++) {
784 vec[0][i] = tv[1 - v1][i];
785 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
786 vec[2][i] = tv[5 - v2][i];
787 }
788 }
789
790 /* left microtile */
791 cc[0] = 0;
792 if (minColL != maxColL) {
793 /* compute interpolation vector */
794 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
795
796 /* add in texels */
797 lolo = 0;
798 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
799 GLint texel;
800 /* interpolate color */
801 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
802 /* add in texel */
803 lolo <<= 2;
804 lolo |= texel;
805 }
806
807 cc[0] = lolo;
808 }
809
810 /* right microtile */
811 cc[1] = 0;
812 if (minColR != maxColR) {
813 /* compute interpolation vector */
814 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
815
816 /* add in texels */
817 lohi = 0;
818 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
819 GLint texel;
820 /* interpolate color */
821 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
822 /* add in texel */
823 lohi <<= 2;
824 lohi |= texel;
825 }
826
827 cc[1] = lohi;
828 }
829
830 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
831 for (j = n_vect - 1; j >= 0; j--) {
832 /* add in alphas */
833 FX64_SHL(hi, 5);
834 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
835 }
836 for (j = n_vect - 1; j >= 0; j--) {
837 for (i = 0; i < n_comp - 1; i++) {
838 /* add in colors */
839 FX64_SHL(hi, 5);
840 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
841 }
842 }
843 ((Fx64 *)cc)[1] = hi;
844 }
845
846
847 static void
848 fxt1_quantize_HI (GLuint *cc,
849 GLubyte input[N_TEXELS][MAX_COMP],
850 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
851 {
852 const GLint n_vect = 6; /* highest vector number */
853 const GLint n_comp = 3; /* 3 components: R, G, B */
854 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
855 GLfloat iv[MAX_COMP]; /* interpolation vector */
856 GLint i, k;
857 GLuint hihi; /* high quadword: hi dword */
858
859 GLint minSum = 2000; /* big enough */
860 GLint maxSum = -1; /* small enough */
861 GLint minCol = 0; /* phoudoin: silent compiler! */
862 GLint maxCol = 0; /* phoudoin: silent compiler! */
863
864 /* Our solution here is to find the darkest and brightest colors in
865 * the 8x4 tile and use those as the two representative colors.
866 * There are probably better algorithms to use (histogram-based).
867 */
868 for (k = 0; k < n; k++) {
869 GLint sum = 0;
870 for (i = 0; i < n_comp; i++) {
871 sum += reord[k][i];
872 }
873 if (minSum > sum) {
874 minSum = sum;
875 minCol = k;
876 }
877 if (maxSum < sum) {
878 maxSum = sum;
879 maxCol = k;
880 }
881 }
882
883 hihi = 0; /* cc-hi = "00" */
884 for (i = 0; i < n_comp; i++) {
885 /* add in colors */
886 hihi <<= 5;
887 hihi |= reord[maxCol][i] >> 3;
888 }
889 for (i = 0; i < n_comp; i++) {
890 /* add in colors */
891 hihi <<= 5;
892 hihi |= reord[minCol][i] >> 3;
893 }
894 cc[3] = hihi;
895 cc[0] = cc[1] = cc[2] = 0;
896
897 /* compute interpolation vector */
898 if (minCol != maxCol) {
899 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
900 }
901
902 /* add in texels */
903 for (k = N_TEXELS - 1; k >= 0; k--) {
904 GLint t = k * 3;
905 GLuint *kk = (GLuint *)((char *)cc + t / 8);
906 GLint texel = n_vect + 1; /* transparent black */
907
908 if (!ISTBLACK(input[k])) {
909 if (minCol != maxCol) {
910 /* interpolate color */
911 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
912 /* add in texel */
913 kk[0] |= texel << (t & 7);
914 }
915 } else {
916 /* add in texel */
917 kk[0] |= texel << (t & 7);
918 }
919 }
920 }
921
922
923 static void
924 fxt1_quantize_MIXED1 (GLuint *cc,
925 GLubyte input[N_TEXELS][MAX_COMP])
926 {
927 const GLint n_vect = 2; /* highest vector number in each microtile */
928 const GLint n_comp = 3; /* 3 components: R, G, B */
929 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
930 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
931 GLint i, j, k;
932 Fx64 hi; /* high quadword */
933 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
934
935 GLint minSum;
936 GLint maxSum;
937 GLint minColL = 0, maxColL = -1;
938 GLint minColR = 0, maxColR = -1;
939
940 /* Our solution here is to find the darkest and brightest colors in
941 * the 4x4 tile and use those as the two representative colors.
942 * There are probably better algorithms to use (histogram-based).
943 */
944 minSum = 2000; /* big enough */
945 maxSum = -1; /* small enough */
946 for (k = 0; k < N_TEXELS / 2; k++) {
947 if (!ISTBLACK(input[k])) {
948 GLint sum = 0;
949 for (i = 0; i < n_comp; i++) {
950 sum += input[k][i];
951 }
952 if (minSum > sum) {
953 minSum = sum;
954 minColL = k;
955 }
956 if (maxSum < sum) {
957 maxSum = sum;
958 maxColL = k;
959 }
960 }
961 }
962 minSum = 2000; /* big enough */
963 maxSum = -1; /* small enough */
964 for (; k < N_TEXELS; k++) {
965 if (!ISTBLACK(input[k])) {
966 GLint sum = 0;
967 for (i = 0; i < n_comp; i++) {
968 sum += input[k][i];
969 }
970 if (minSum > sum) {
971 minSum = sum;
972 minColR = k;
973 }
974 if (maxSum < sum) {
975 maxSum = sum;
976 maxColR = k;
977 }
978 }
979 }
980
981 /* left microtile */
982 if (maxColL == -1) {
983 /* all transparent black */
984 cc[0] = ~0u;
985 for (i = 0; i < n_comp; i++) {
986 vec[0][i] = 0;
987 vec[1][i] = 0;
988 }
989 } else {
990 cc[0] = 0;
991 for (i = 0; i < n_comp; i++) {
992 vec[0][i] = input[minColL][i];
993 vec[1][i] = input[maxColL][i];
994 }
995 if (minColL != maxColL) {
996 /* compute interpolation vector */
997 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
998
999 /* add in texels */
1000 lolo = 0;
1001 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1002 GLint texel = n_vect + 1; /* transparent black */
1003 if (!ISTBLACK(input[k])) {
1004 /* interpolate color */
1005 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1006 }
1007 /* add in texel */
1008 lolo <<= 2;
1009 lolo |= texel;
1010 }
1011 cc[0] = lolo;
1012 }
1013 }
1014
1015 /* right microtile */
1016 if (maxColR == -1) {
1017 /* all transparent black */
1018 cc[1] = ~0u;
1019 for (i = 0; i < n_comp; i++) {
1020 vec[2][i] = 0;
1021 vec[3][i] = 0;
1022 }
1023 } else {
1024 cc[1] = 0;
1025 for (i = 0; i < n_comp; i++) {
1026 vec[2][i] = input[minColR][i];
1027 vec[3][i] = input[maxColR][i];
1028 }
1029 if (minColR != maxColR) {
1030 /* compute interpolation vector */
1031 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1032
1033 /* add in texels */
1034 lohi = 0;
1035 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1036 GLint texel = n_vect + 1; /* transparent black */
1037 if (!ISTBLACK(input[k])) {
1038 /* interpolate color */
1039 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1040 }
1041 /* add in texel */
1042 lohi <<= 2;
1043 lohi |= texel;
1044 }
1045 cc[1] = lohi;
1046 }
1047 }
1048
1049 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1050 for (j = 2 * 2 - 1; j >= 0; j--) {
1051 for (i = 0; i < n_comp; i++) {
1052 /* add in colors */
1053 FX64_SHL(hi, 5);
1054 FX64_OR32(hi, vec[j][i] >> 3);
1055 }
1056 }
1057 ((Fx64 *)cc)[1] = hi;
1058 }
1059
1060
1061 static void
1062 fxt1_quantize_MIXED0 (GLuint *cc,
1063 GLubyte input[N_TEXELS][MAX_COMP])
1064 {
1065 const GLint n_vect = 3; /* highest vector number in each microtile */
1066 const GLint n_comp = 3; /* 3 components: R, G, B */
1067 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1068 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1069 GLint i, j, k;
1070 Fx64 hi; /* high quadword */
1071 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1072
1073 GLint minColL = 0, maxColL = 0;
1074 GLint minColR = 0, maxColR = 0;
1075 #if 0
1076 GLint minSum;
1077 GLint maxSum;
1078
1079 /* Our solution here is to find the darkest and brightest colors in
1080 * the 4x4 tile and use those as the two representative colors.
1081 * There are probably better algorithms to use (histogram-based).
1082 */
1083 minSum = 2000; /* big enough */
1084 maxSum = -1; /* small enough */
1085 for (k = 0; k < N_TEXELS / 2; k++) {
1086 GLint sum = 0;
1087 for (i = 0; i < n_comp; i++) {
1088 sum += input[k][i];
1089 }
1090 if (minSum > sum) {
1091 minSum = sum;
1092 minColL = k;
1093 }
1094 if (maxSum < sum) {
1095 maxSum = sum;
1096 maxColL = k;
1097 }
1098 }
1099 minSum = 2000; /* big enough */
1100 maxSum = -1; /* small enough */
1101 for (; k < N_TEXELS; k++) {
1102 GLint sum = 0;
1103 for (i = 0; i < n_comp; i++) {
1104 sum += input[k][i];
1105 }
1106 if (minSum > sum) {
1107 minSum = sum;
1108 minColR = k;
1109 }
1110 if (maxSum < sum) {
1111 maxSum = sum;
1112 maxColR = k;
1113 }
1114 }
1115 #else
1116 GLint minVal;
1117 GLint maxVal;
1118 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1119 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1120
1121 /* Scan the channel with max variance for lo & hi
1122 * and use those as the two representative colors.
1123 */
1124 minVal = 2000; /* big enough */
1125 maxVal = -1; /* small enough */
1126 for (k = 0; k < N_TEXELS / 2; k++) {
1127 GLint t = input[k][maxVarL];
1128 if (minVal > t) {
1129 minVal = t;
1130 minColL = k;
1131 }
1132 if (maxVal < t) {
1133 maxVal = t;
1134 maxColL = k;
1135 }
1136 }
1137 minVal = 2000; /* big enough */
1138 maxVal = -1; /* small enough */
1139 for (; k < N_TEXELS; k++) {
1140 GLint t = input[k][maxVarR];
1141 if (minVal > t) {
1142 minVal = t;
1143 minColR = k;
1144 }
1145 if (maxVal < t) {
1146 maxVal = t;
1147 maxColR = k;
1148 }
1149 }
1150 #endif
1151
1152 /* left microtile */
1153 cc[0] = 0;
1154 for (i = 0; i < n_comp; i++) {
1155 vec[0][i] = input[minColL][i];
1156 vec[1][i] = input[maxColL][i];
1157 }
1158 if (minColL != maxColL) {
1159 /* compute interpolation vector */
1160 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1161
1162 /* add in texels */
1163 lolo = 0;
1164 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1165 GLint texel;
1166 /* interpolate color */
1167 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1168 /* add in texel */
1169 lolo <<= 2;
1170 lolo |= texel;
1171 }
1172
1173 /* funky encoding for LSB of green */
1174 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1175 for (i = 0; i < n_comp; i++) {
1176 vec[1][i] = input[minColL][i];
1177 vec[0][i] = input[maxColL][i];
1178 }
1179 lolo = ~lolo;
1180 }
1181
1182 cc[0] = lolo;
1183 }
1184
1185 /* right microtile */
1186 cc[1] = 0;
1187 for (i = 0; i < n_comp; i++) {
1188 vec[2][i] = input[minColR][i];
1189 vec[3][i] = input[maxColR][i];
1190 }
1191 if (minColR != maxColR) {
1192 /* compute interpolation vector */
1193 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1194
1195 /* add in texels */
1196 lohi = 0;
1197 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1198 GLint texel;
1199 /* interpolate color */
1200 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1201 /* add in texel */
1202 lohi <<= 2;
1203 lohi |= texel;
1204 }
1205
1206 /* funky encoding for LSB of green */
1207 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1208 for (i = 0; i < n_comp; i++) {
1209 vec[3][i] = input[minColR][i];
1210 vec[2][i] = input[maxColR][i];
1211 }
1212 lohi = ~lohi;
1213 }
1214
1215 cc[1] = lohi;
1216 }
1217
1218 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1219 for (j = 2 * 2 - 1; j >= 0; j--) {
1220 for (i = 0; i < n_comp; i++) {
1221 /* add in colors */
1222 FX64_SHL(hi, 5);
1223 FX64_OR32(hi, vec[j][i] >> 3);
1224 }
1225 }
1226 ((Fx64 *)cc)[1] = hi;
1227 }
1228
1229
1230 static void
1231 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1232 {
1233 GLint trualpha;
1234 GLubyte reord[N_TEXELS][MAX_COMP];
1235
1236 GLubyte input[N_TEXELS][MAX_COMP];
1237 GLint i, k, l;
1238
1239 if (comps == 3) {
1240 /* make the whole block opaque */
1241 _mesa_memset(input, -1, sizeof(input));
1242 }
1243
1244 /* 8 texels each line */
1245 for (l = 0; l < 4; l++) {
1246 for (k = 0; k < 4; k++) {
1247 for (i = 0; i < comps; i++) {
1248 input[k + l * 4][i] = *lines[l]++;
1249 }
1250 }
1251 for (; k < 8; k++) {
1252 for (i = 0; i < comps; i++) {
1253 input[k + l * 4 + 12][i] = *lines[l]++;
1254 }
1255 }
1256 }
1257
1258 /* block layout:
1259 * 00, 01, 02, 03, 08, 09, 0a, 0b
1260 * 10, 11, 12, 13, 18, 19, 1a, 1b
1261 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1262 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1263 */
1264
1265 /* [dBorca]
1266 * stupidity flows forth from this
1267 */
1268 l = N_TEXELS;
1269 trualpha = 0;
1270 if (comps == 4) {
1271 /* skip all transparent black texels */
1272 l = 0;
1273 for (k = 0; k < N_TEXELS; k++) {
1274 /* test all components against 0 */
1275 if (!ISTBLACK(input[k])) {
1276 /* texel is not transparent black */
1277 COPY_4UBV(reord[l], input[k]);
1278 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1279 /* non-opaque texel */
1280 trualpha = !0;
1281 }
1282 l++;
1283 }
1284 }
1285 }
1286
1287 #if 0
1288 if (trualpha) {
1289 fxt1_quantize_ALPHA0(cc, input, reord, l);
1290 } else if (l == 0) {
1291 cc[0] = cc[1] = cc[2] = -1;
1292 cc[3] = 0;
1293 } else if (l < N_TEXELS) {
1294 fxt1_quantize_HI(cc, input, reord, l);
1295 } else {
1296 fxt1_quantize_CHROMA(cc, input);
1297 }
1298 (void)fxt1_quantize_ALPHA1;
1299 (void)fxt1_quantize_MIXED1;
1300 (void)fxt1_quantize_MIXED0;
1301 #else
1302 if (trualpha) {
1303 fxt1_quantize_ALPHA1(cc, input);
1304 } else if (l == 0) {
1305 cc[0] = cc[1] = cc[2] = ~0u;
1306 cc[3] = 0;
1307 } else if (l < N_TEXELS) {
1308 fxt1_quantize_MIXED1(cc, input);
1309 } else {
1310 fxt1_quantize_MIXED0(cc, input);
1311 }
1312 (void)fxt1_quantize_ALPHA0;
1313 (void)fxt1_quantize_HI;
1314 (void)fxt1_quantize_CHROMA;
1315 #endif
1316 }
1317
1318
1319 static void
1320 fxt1_encode (GLuint width, GLuint height, GLint comps,
1321 const void *source, GLint srcRowStride,
1322 void *dest, GLint destRowStride)
1323 {
1324 GLuint x, y;
1325 const GLubyte *data;
1326 GLuint *encoded = (GLuint *)dest;
1327 void *newSource = NULL;
1328
1329 assert(comps == 3 || comps == 4);
1330
1331 /* Replicate image if width is not M8 or height is not M4 */
1332 if ((width & 7) | (height & 3)) {
1333 GLint newWidth = (width + 7) & ~7;
1334 GLint newHeight = (height + 3) & ~3;
1335 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1336 if (!newSource) {
1337 GET_CURRENT_CONTEXT(ctx);
1338 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1339 goto cleanUp;
1340 }
1341 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1342 comps, (const GLchan *) source,
1343 srcRowStride, (GLchan *) newSource);
1344 source = newSource;
1345 width = newWidth;
1346 height = newHeight;
1347 srcRowStride = comps * newWidth;
1348 }
1349
1350 /* convert from 16/32-bit channels to GLubyte if needed */
1351 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1352 const GLuint n = width * height * comps;
1353 const GLchan *src = (const GLchan *) source;
1354 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1355 GLuint i;
1356 if (!dest) {
1357 GET_CURRENT_CONTEXT(ctx);
1358 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1359 goto cleanUp;
1360 }
1361 for (i = 0; i < n; i++) {
1362 dest[i] = CHAN_TO_UBYTE(src[i]);
1363 }
1364 if (newSource != NULL) {
1365 _mesa_free(newSource);
1366 }
1367 newSource = dest; /* we'll free this buffer before returning */
1368 source = dest; /* the new, GLubyte incoming image */
1369 }
1370
1371 data = (const GLubyte *) source;
1372 destRowStride = (destRowStride - width * 2) / 4;
1373 for (y = 0; y < height; y += 4) {
1374 GLuint offs = 0 + (y + 0) * srcRowStride;
1375 for (x = 0; x < width; x += 8) {
1376 const GLubyte *lines[4];
1377 lines[0] = &data[offs];
1378 lines[1] = lines[0] + srcRowStride;
1379 lines[2] = lines[1] + srcRowStride;
1380 lines[3] = lines[2] + srcRowStride;
1381 offs += 8 * comps;
1382 fxt1_quantize(encoded, lines, comps);
1383 /* 128 bits per 8x4 block */
1384 encoded += 4;
1385 }
1386 encoded += destRowStride;
1387 }
1388
1389 cleanUp:
1390 if (newSource != NULL) {
1391 _mesa_free(newSource);
1392 }
1393 }
1394
1395
1396 /***************************************************************************\
1397 * FXT1 decoder
1398 *
1399 * The decoder is based on GL_3DFX_texture_compression_FXT1
1400 * specification and serves as a concept for the encoder.
1401 \***************************************************************************/
1402
1403
1404 /* lookup table for scaling 5 bit colors up to 8 bits */
1405 static const GLubyte _rgb_scale_5[] = {
1406 0, 8, 16, 25, 33, 41, 49, 58,
1407 66, 74, 82, 90, 99, 107, 115, 123,
1408 132, 140, 148, 156, 165, 173, 181, 189,
1409 197, 206, 214, 222, 230, 239, 247, 255
1410 };
1411
1412 /* lookup table for scaling 6 bit colors up to 8 bits */
1413 static const GLubyte _rgb_scale_6[] = {
1414 0, 4, 8, 12, 16, 20, 24, 28,
1415 32, 36, 40, 45, 49, 53, 57, 61,
1416 65, 69, 73, 77, 81, 85, 89, 93,
1417 97, 101, 105, 109, 113, 117, 121, 125,
1418 130, 134, 138, 142, 146, 150, 154, 158,
1419 162, 166, 170, 174, 178, 182, 186, 190,
1420 194, 198, 202, 206, 210, 215, 219, 223,
1421 227, 231, 235, 239, 243, 247, 251, 255
1422 };
1423
1424
1425 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1426 #define UP5(c) _rgb_scale_5[(c) & 31]
1427 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1428 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1429
1430
1431 static void
1432 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1433 {
1434 const GLuint *cc;
1435
1436 t *= 3;
1437 cc = (const GLuint *)(code + t / 8);
1438 t = (cc[0] >> (t & 7)) & 7;
1439
1440 if (t == 7) {
1441 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1442 } else {
1443 GLubyte r, g, b;
1444 cc = (const GLuint *)(code + 12);
1445 if (t == 0) {
1446 b = UP5(CC_SEL(cc, 0));
1447 g = UP5(CC_SEL(cc, 5));
1448 r = UP5(CC_SEL(cc, 10));
1449 } else if (t == 6) {
1450 b = UP5(CC_SEL(cc, 15));
1451 g = UP5(CC_SEL(cc, 20));
1452 r = UP5(CC_SEL(cc, 25));
1453 } else {
1454 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1455 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1456 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1457 }
1458 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1459 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1460 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1461 rgba[ACOMP] = CHAN_MAX;
1462 }
1463 }
1464
1465
1466 static void
1467 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1468 {
1469 const GLuint *cc;
1470 GLuint kk;
1471
1472 cc = (const GLuint *)code;
1473 if (t & 16) {
1474 cc++;
1475 t &= 15;
1476 }
1477 t = (cc[0] >> (t * 2)) & 3;
1478
1479 t *= 15;
1480 cc = (const GLuint *)(code + 8 + t / 8);
1481 kk = cc[0] >> (t & 7);
1482 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1483 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1484 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1485 rgba[ACOMP] = CHAN_MAX;
1486 }
1487
1488
1489 static void
1490 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1491 {
1492 const GLuint *cc;
1493 GLuint col[2][3];
1494 GLint glsb, selb;
1495
1496 cc = (const GLuint *)code;
1497 if (t & 16) {
1498 t &= 15;
1499 t = (cc[1] >> (t * 2)) & 3;
1500 /* col 2 */
1501 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1502 col[0][GCOMP] = CC_SEL(cc, 99);
1503 col[0][RCOMP] = CC_SEL(cc, 104);
1504 /* col 3 */
1505 col[1][BCOMP] = CC_SEL(cc, 109);
1506 col[1][GCOMP] = CC_SEL(cc, 114);
1507 col[1][RCOMP] = CC_SEL(cc, 119);
1508 glsb = CC_SEL(cc, 126);
1509 selb = CC_SEL(cc, 33);
1510 } else {
1511 t = (cc[0] >> (t * 2)) & 3;
1512 /* col 0 */
1513 col[0][BCOMP] = CC_SEL(cc, 64);
1514 col[0][GCOMP] = CC_SEL(cc, 69);
1515 col[0][RCOMP] = CC_SEL(cc, 74);
1516 /* col 1 */
1517 col[1][BCOMP] = CC_SEL(cc, 79);
1518 col[1][GCOMP] = CC_SEL(cc, 84);
1519 col[1][RCOMP] = CC_SEL(cc, 89);
1520 glsb = CC_SEL(cc, 125);
1521 selb = CC_SEL(cc, 1);
1522 }
1523
1524 if (CC_SEL(cc, 124) & 1) {
1525 /* alpha[0] == 1 */
1526
1527 if (t == 3) {
1528 /* zero */
1529 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1530 } else {
1531 GLubyte r, g, b;
1532 if (t == 0) {
1533 b = UP5(col[0][BCOMP]);
1534 g = UP5(col[0][GCOMP]);
1535 r = UP5(col[0][RCOMP]);
1536 } else if (t == 2) {
1537 b = UP5(col[1][BCOMP]);
1538 g = UP6(col[1][GCOMP], glsb);
1539 r = UP5(col[1][RCOMP]);
1540 } else {
1541 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1542 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1543 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1544 }
1545 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1546 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1547 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1548 rgba[ACOMP] = CHAN_MAX;
1549 }
1550 } else {
1551 /* alpha[0] == 0 */
1552 GLubyte r, g, b;
1553 if (t == 0) {
1554 b = UP5(col[0][BCOMP]);
1555 g = UP6(col[0][GCOMP], glsb ^ selb);
1556 r = UP5(col[0][RCOMP]);
1557 } else if (t == 3) {
1558 b = UP5(col[1][BCOMP]);
1559 g = UP6(col[1][GCOMP], glsb);
1560 r = UP5(col[1][RCOMP]);
1561 } else {
1562 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1563 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1564 UP6(col[1][GCOMP], glsb));
1565 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1566 }
1567 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1568 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1569 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1570 rgba[ACOMP] = CHAN_MAX;
1571 }
1572 }
1573
1574
1575 static void
1576 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1577 {
1578 const GLuint *cc;
1579 GLubyte r, g, b, a;
1580
1581 cc = (const GLuint *)code;
1582 if (CC_SEL(cc, 124) & 1) {
1583 /* lerp == 1 */
1584 GLuint col0[4];
1585
1586 if (t & 16) {
1587 t &= 15;
1588 t = (cc[1] >> (t * 2)) & 3;
1589 /* col 2 */
1590 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1591 col0[GCOMP] = CC_SEL(cc, 99);
1592 col0[RCOMP] = CC_SEL(cc, 104);
1593 col0[ACOMP] = CC_SEL(cc, 119);
1594 } else {
1595 t = (cc[0] >> (t * 2)) & 3;
1596 /* col 0 */
1597 col0[BCOMP] = CC_SEL(cc, 64);
1598 col0[GCOMP] = CC_SEL(cc, 69);
1599 col0[RCOMP] = CC_SEL(cc, 74);
1600 col0[ACOMP] = CC_SEL(cc, 109);
1601 }
1602
1603 if (t == 0) {
1604 b = UP5(col0[BCOMP]);
1605 g = UP5(col0[GCOMP]);
1606 r = UP5(col0[RCOMP]);
1607 a = UP5(col0[ACOMP]);
1608 } else if (t == 3) {
1609 b = UP5(CC_SEL(cc, 79));
1610 g = UP5(CC_SEL(cc, 84));
1611 r = UP5(CC_SEL(cc, 89));
1612 a = UP5(CC_SEL(cc, 114));
1613 } else {
1614 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1615 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1616 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1617 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1618 }
1619 } else {
1620 /* lerp == 0 */
1621
1622 if (t & 16) {
1623 cc++;
1624 t &= 15;
1625 }
1626 t = (cc[0] >> (t * 2)) & 3;
1627
1628 if (t == 3) {
1629 /* zero */
1630 r = g = b = a = 0;
1631 } else {
1632 GLuint kk;
1633 cc = (const GLuint *)code;
1634 a = UP5(cc[3] >> (t * 5 + 13));
1635 t *= 15;
1636 cc = (const GLuint *)(code + 8 + t / 8);
1637 kk = cc[0] >> (t & 7);
1638 b = UP5(kk);
1639 g = UP5(kk >> 5);
1640 r = UP5(kk >> 10);
1641 }
1642 }
1643 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1644 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1645 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1646 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1647 }
1648
1649
1650 void
1651 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1652 GLint i, GLint j, GLchan *rgba)
1653 {
1654 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1655 fxt1_decode_1HI, /* cc-high = "00?" */
1656 fxt1_decode_1HI, /* cc-high = "00?" */
1657 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1658 fxt1_decode_1ALPHA, /* alpha = "011" */
1659 fxt1_decode_1MIXED, /* mixed = "1??" */
1660 fxt1_decode_1MIXED, /* mixed = "1??" */
1661 fxt1_decode_1MIXED, /* mixed = "1??" */
1662 fxt1_decode_1MIXED /* mixed = "1??" */
1663 };
1664
1665 const GLubyte *code = (const GLubyte *)texture +
1666 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1667 GLint mode = CC_SEL(code, 125);
1668 GLint t = i & 7;
1669
1670 if (t & 4) {
1671 t += 12;
1672 }
1673 t += (j & 3) * 4;
1674
1675 decode_1[mode](code, t, rgba);
1676 }