e3ac37999c157766171ab57c306eec10523c3f24
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texformat.h"
42 #include "texstore.h"
43
44
45 static void
46 fxt1_encode (GLuint width, GLuint height, GLint comps,
47 const void *source, GLint srcRowStride,
48 void *dest, GLint destRowStride);
49
50 void
51 fxt1_decode_1 (const void *texture, GLint stride,
52 GLint i, GLint j, GLchan *rgba);
53
54
55 /**
56 * Called during context initialization.
57 */
58 void
59 _mesa_init_texture_fxt1( GLcontext *ctx )
60 {
61 (void) ctx;
62 }
63
64
65 /**
66 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
67 */
68 GLboolean
69 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
70 {
71 const GLchan *pixels;
72 GLint srcRowStride;
73 GLubyte *dst;
74 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
75 const GLchan *tempImage = NULL;
76
77 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
78 ASSERT(dstXoffset % 8 == 0);
79 ASSERT(dstYoffset % 4 == 0);
80 ASSERT(dstZoffset == 0);
81 (void) dstZoffset;
82 (void) dstImageOffsets;
83
84 if (srcFormat != GL_RGB ||
85 srcType != CHAN_TYPE ||
86 ctx->_ImageTransferState ||
87 srcPacking->SwapBytes) {
88 /* convert image to RGB/GLchan */
89 tempImage = _mesa_make_temp_chan_image(ctx, dims,
90 baseInternalFormat,
91 dstFormat->BaseFormat,
92 srcWidth, srcHeight, srcDepth,
93 srcFormat, srcType, srcAddr,
94 srcPacking);
95 if (!tempImage)
96 return GL_FALSE; /* out of memory */
97 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
98 pixels = tempImage;
99 srcRowStride = 3 * srcWidth;
100 srcFormat = GL_RGB;
101 }
102 else {
103 pixels = (const GLchan *) srcAddr;
104 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
105 srcType) / sizeof(GLchan);
106 }
107
108 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
109 dstFormat->MesaFormat,
110 texWidth, (GLubyte *) dstAddr);
111
112 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
113 dst, dstRowStride);
114
115 if (tempImage)
116 _mesa_free((void*) tempImage);
117
118 return GL_TRUE;
119 }
120
121
122 /**
123 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
124 */
125 GLboolean
126 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
127 {
128 const GLchan *pixels;
129 GLint srcRowStride;
130 GLubyte *dst;
131 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
132 const GLchan *tempImage = NULL;
133
134 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
135 ASSERT(dstXoffset % 8 == 0);
136 ASSERT(dstYoffset % 4 == 0);
137 ASSERT(dstZoffset == 0);
138 (void) dstZoffset;
139 (void) dstImageOffsets;
140
141 if (srcFormat != GL_RGBA ||
142 srcType != CHAN_TYPE ||
143 ctx->_ImageTransferState ||
144 srcPacking->SwapBytes) {
145 /* convert image to RGBA/GLchan */
146 tempImage = _mesa_make_temp_chan_image(ctx, dims,
147 baseInternalFormat,
148 dstFormat->BaseFormat,
149 srcWidth, srcHeight, srcDepth,
150 srcFormat, srcType, srcAddr,
151 srcPacking);
152 if (!tempImage)
153 return GL_FALSE; /* out of memory */
154 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
155 pixels = tempImage;
156 srcRowStride = 4 * srcWidth;
157 srcFormat = GL_RGBA;
158 }
159 else {
160 pixels = (const GLchan *) srcAddr;
161 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
162 srcType) / sizeof(GLchan);
163 }
164
165 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
166 dstFormat->MesaFormat,
167 texWidth, (GLubyte *) dstAddr);
168
169 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
170 dst, dstRowStride);
171
172 if (tempImage)
173 _mesa_free((void*) tempImage);
174
175 return GL_TRUE;
176 }
177
178
179 void
180 _mesa_fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
181 GLint i, GLint j, GLint k, GLchan *texel )
182 {
183 (void) k;
184 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
185 }
186
187
188 void
189 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
190 GLint i, GLint j, GLint k, GLfloat *texel )
191 {
192 /* just sample as GLchan and convert to float here */
193 GLchan rgba[4];
194 (void) k;
195 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
196 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
197 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
198 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
199 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
200 }
201
202
203 void
204 _mesa_fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
205 GLint i, GLint j, GLint k, GLchan *texel )
206 {
207 (void) k;
208 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
209 texel[ACOMP] = 255;
210 }
211
212
213 void
214 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
215 GLint i, GLint j, GLint k, GLfloat *texel )
216 {
217 /* just sample as GLchan and convert to float here */
218 GLchan rgba[4];
219 (void) k;
220 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
221 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
222 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
223 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
224 texel[ACOMP] = 1.0F;
225 }
226
227
228
229 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
230 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
231 GL_RGB, /* BaseFormat */
232 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
233 4, /*approx*/ /* RedBits */
234 4, /*approx*/ /* GreenBits */
235 4, /*approx*/ /* BlueBits */
236 0, /* AlphaBits */
237 0, /* LuminanceBits */
238 0, /* IntensityBits */
239 0, /* IndexBits */
240 0, /* DepthBits */
241 0, /* StencilBits */
242 0, /* TexelBytes */
243 _mesa_texstore_rgb_fxt1, /* StoreTexImageFunc */
244 NULL, /*impossible*/ /* FetchTexel1D */
245 _mesa_fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
246 NULL, /*impossible*/ /* FetchTexel3D */
247 NULL, /*impossible*/ /* FetchTexel1Df */
248 _mesa_fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
249 NULL, /*impossible*/ /* FetchTexel3Df */
250 NULL /* StoreTexel */
251 };
252
253 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
254 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
255 GL_RGBA, /* BaseFormat */
256 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
257 4, /*approx*/ /* RedBits */
258 4, /*approx*/ /* GreenBits */
259 4, /*approx*/ /* BlueBits */
260 1, /*approx*/ /* AlphaBits */
261 0, /* LuminanceBits */
262 0, /* IntensityBits */
263 0, /* IndexBits */
264 0, /* DepthBits */
265 0, /* StencilBits */
266 0, /* TexelBytes */
267 _mesa_texstore_rgba_fxt1, /* StoreTexImageFunc */
268 NULL, /*impossible*/ /* FetchTexel1D */
269 _mesa_fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
270 NULL, /*impossible*/ /* FetchTexel3D */
271 NULL, /*impossible*/ /* FetchTexel1Df */
272 _mesa_fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
273 NULL, /*impossible*/ /* FetchTexel3Df */
274 NULL /* StoreTexel */
275 };
276
277
278 /***************************************************************************\
279 * FXT1 encoder
280 *
281 * The encoder was built by reversing the decoder,
282 * and is vaguely based on Texus2 by 3dfx. Note that this code
283 * is merely a proof of concept, since it is highly UNoptimized;
284 * moreover, it is sub-optimal due to initial conditions passed
285 * to Lloyd's algorithm (the interpolation modes are even worse).
286 \***************************************************************************/
287
288
289 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
290 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
291 #define N_TEXELS 32 /* number of texels in a block (always 32) */
292 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
293 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
294 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
295 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
296 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
297
298
299 /*
300 * Define a 64-bit unsigned integer type and macros
301 */
302 #if 1
303
304 #define FX64_NATIVE 1
305
306 typedef uint64_t Fx64;
307
308 #define FX64_MOV32(a, b) a = b
309 #define FX64_OR32(a, b) a |= b
310 #define FX64_SHL(a, c) a <<= c
311
312 #else
313
314 #define FX64_NATIVE 0
315
316 typedef struct {
317 GLuint lo, hi;
318 } Fx64;
319
320 #define FX64_MOV32(a, b) a.lo = b
321 #define FX64_OR32(a, b) a.lo |= b
322
323 #define FX64_SHL(a, c) \
324 do { \
325 if ((c) >= 32) { \
326 a.hi = a.lo << ((c) - 32); \
327 a.lo = 0; \
328 } else { \
329 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
330 a.lo <<= (c); \
331 } \
332 } while (0)
333
334 #endif
335
336
337 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
338 #define SAFECDOT 1 /* for paranoids */
339
340 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
341 do { \
342 /* compute interpolation vector */ \
343 GLfloat d2 = 0.0F; \
344 GLfloat rd2; \
345 \
346 for (i = 0; i < NC; i++) { \
347 IV[i] = (V1[i] - V0[i]) * F(i); \
348 d2 += IV[i] * IV[i]; \
349 } \
350 rd2 = (GLfloat)NV / d2; \
351 B = 0; \
352 for (i = 0; i < NC; i++) { \
353 IV[i] *= F(i); \
354 B -= IV[i] * V0[i]; \
355 IV[i] *= rd2; \
356 } \
357 B = B * rd2 + 0.5f; \
358 } while (0)
359
360 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
361 do { \
362 GLfloat dot = 0.0F; \
363 for (i = 0; i < NC; i++) { \
364 dot += V[i] * IV[i]; \
365 } \
366 TEXEL = (GLint)(dot + B); \
367 if (SAFECDOT) { \
368 if (TEXEL < 0) { \
369 TEXEL = 0; \
370 } else if (TEXEL > NV) { \
371 TEXEL = NV; \
372 } \
373 } \
374 } while (0)
375
376
377 static GLint
378 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
379 GLubyte input[MAX_COMP], GLint nc)
380 {
381 GLint i, j, best = -1;
382 GLfloat err = 1e9; /* big enough */
383
384 for (j = 0; j < nv; j++) {
385 GLfloat e = 0.0F;
386 for (i = 0; i < nc; i++) {
387 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
388 }
389 if (e < err) {
390 err = e;
391 best = j;
392 }
393 }
394
395 return best;
396 }
397
398
399 static GLint
400 fxt1_worst (GLfloat vec[MAX_COMP],
401 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
402 {
403 GLint i, k, worst = -1;
404 GLfloat err = -1.0F; /* small enough */
405
406 for (k = 0; k < n; k++) {
407 GLfloat e = 0.0F;
408 for (i = 0; i < nc; i++) {
409 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
410 }
411 if (e > err) {
412 err = e;
413 worst = k;
414 }
415 }
416
417 return worst;
418 }
419
420
421 static GLint
422 fxt1_variance (GLdouble variance[MAX_COMP],
423 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
424 {
425 GLint i, k, best = 0;
426 GLint sx, sx2;
427 GLdouble var, maxvar = -1; /* small enough */
428 GLdouble teenth = 1.0 / n;
429
430 for (i = 0; i < nc; i++) {
431 sx = sx2 = 0;
432 for (k = 0; k < n; k++) {
433 GLint t = input[k][i];
434 sx += t;
435 sx2 += t * t;
436 }
437 var = sx2 * teenth - sx * sx * teenth * teenth;
438 if (maxvar < var) {
439 maxvar = var;
440 best = i;
441 }
442 if (variance) {
443 variance[i] = var;
444 }
445 }
446
447 return best;
448 }
449
450
451 static GLint
452 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
453 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
454 {
455 #if 0
456 /* Choose colors from a grid.
457 */
458 GLint i, j;
459
460 for (j = 0; j < nv; j++) {
461 GLint m = j * (n - 1) / (nv - 1);
462 for (i = 0; i < nc; i++) {
463 vec[j][i] = input[m][i];
464 }
465 }
466 #else
467 /* Our solution here is to find the darkest and brightest colors in
468 * the 8x4 tile and use those as the two representative colors.
469 * There are probably better algorithms to use (histogram-based).
470 */
471 GLint i, j, k;
472 GLint minSum = 2000; /* big enough */
473 GLint maxSum = -1; /* small enough */
474 GLint minCol = 0; /* phoudoin: silent compiler! */
475 GLint maxCol = 0; /* phoudoin: silent compiler! */
476
477 struct {
478 GLint flag;
479 GLint key;
480 GLint freq;
481 GLint idx;
482 } hist[N_TEXELS];
483 GLint lenh = 0;
484
485 _mesa_memset(hist, 0, sizeof(hist));
486
487 for (k = 0; k < n; k++) {
488 GLint l;
489 GLint key = 0;
490 GLint sum = 0;
491 for (i = 0; i < nc; i++) {
492 key <<= 8;
493 key |= input[k][i];
494 sum += input[k][i];
495 }
496 for (l = 0; l < n; l++) {
497 if (!hist[l].flag) {
498 /* alloc new slot */
499 hist[l].flag = !0;
500 hist[l].key = key;
501 hist[l].freq = 1;
502 hist[l].idx = k;
503 lenh = l + 1;
504 break;
505 } else if (hist[l].key == key) {
506 hist[l].freq++;
507 break;
508 }
509 }
510 if (minSum > sum) {
511 minSum = sum;
512 minCol = k;
513 }
514 if (maxSum < sum) {
515 maxSum = sum;
516 maxCol = k;
517 }
518 }
519
520 if (lenh <= nv) {
521 for (j = 0; j < lenh; j++) {
522 for (i = 0; i < nc; i++) {
523 vec[j][i] = (GLfloat)input[hist[j].idx][i];
524 }
525 }
526 for (; j < nv; j++) {
527 for (i = 0; i < nc; i++) {
528 vec[j][i] = vec[0][i];
529 }
530 }
531 return 0;
532 }
533
534 for (j = 0; j < nv; j++) {
535 for (i = 0; i < nc; i++) {
536 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
537 }
538 }
539 #endif
540
541 return !0;
542 }
543
544
545 static GLint
546 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
547 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
548 {
549 /* Use the generalized lloyd's algorithm for VQ:
550 * find 4 color vectors.
551 *
552 * for each sample color
553 * sort to nearest vector.
554 *
555 * replace each vector with the centroid of it's matching colors.
556 *
557 * repeat until RMS doesn't improve.
558 *
559 * if a color vector has no samples, or becomes the same as another
560 * vector, replace it with the color which is farthest from a sample.
561 *
562 * vec[][MAX_COMP] initial vectors and resulting colors
563 * nv number of resulting colors required
564 * input[N_TEXELS][MAX_COMP] input texels
565 * nc number of components in input / vec
566 * n number of input samples
567 */
568
569 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
570 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
571 GLfloat error, lasterror = 1e9;
572
573 GLint i, j, k, rep;
574
575 /* the quantizer */
576 for (rep = 0; rep < LL_N_REP; rep++) {
577 /* reset sums & counters */
578 for (j = 0; j < nv; j++) {
579 for (i = 0; i < nc; i++) {
580 sum[j][i] = 0;
581 }
582 cnt[j] = 0;
583 }
584 error = 0;
585
586 /* scan whole block */
587 for (k = 0; k < n; k++) {
588 #if 1
589 GLint best = -1;
590 GLfloat err = 1e9; /* big enough */
591 /* determine best vector */
592 for (j = 0; j < nv; j++) {
593 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
594 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
595 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
596 if (nc == 4) {
597 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
598 }
599 if (e < err) {
600 err = e;
601 best = j;
602 }
603 }
604 #else
605 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
606 #endif
607 /* add in closest color */
608 for (i = 0; i < nc; i++) {
609 sum[best][i] += input[k][i];
610 }
611 /* mark this vector as used */
612 cnt[best]++;
613 /* accumulate error */
614 error += err;
615 }
616
617 /* check RMS */
618 if ((error < LL_RMS_E) ||
619 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
620 return !0; /* good match */
621 }
622 lasterror = error;
623
624 /* move each vector to the barycenter of its closest colors */
625 for (j = 0; j < nv; j++) {
626 if (cnt[j]) {
627 GLfloat div = 1.0F / cnt[j];
628 for (i = 0; i < nc; i++) {
629 vec[j][i] = div * sum[j][i];
630 }
631 } else {
632 /* this vec has no samples or is identical with a previous vec */
633 GLint worst = fxt1_worst(vec[j], input, nc, n);
634 for (i = 0; i < nc; i++) {
635 vec[j][i] = input[worst][i];
636 }
637 }
638 }
639 }
640
641 return 0; /* could not converge fast enough */
642 }
643
644
645 static void
646 fxt1_quantize_CHROMA (GLuint *cc,
647 GLubyte input[N_TEXELS][MAX_COMP])
648 {
649 const GLint n_vect = 4; /* 4 base vectors to find */
650 const GLint n_comp = 3; /* 3 components: R, G, B */
651 GLfloat vec[MAX_VECT][MAX_COMP];
652 GLint i, j, k;
653 Fx64 hi; /* high quadword */
654 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
655
656 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
657 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
658 }
659
660 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
661 for (j = n_vect - 1; j >= 0; j--) {
662 for (i = 0; i < n_comp; i++) {
663 /* add in colors */
664 FX64_SHL(hi, 5);
665 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
666 }
667 }
668 ((Fx64 *)cc)[1] = hi;
669
670 lohi = lolo = 0;
671 /* right microtile */
672 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
673 lohi <<= 2;
674 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
675 }
676 /* left microtile */
677 for (; k >= 0; k--) {
678 lolo <<= 2;
679 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
680 }
681 cc[1] = lohi;
682 cc[0] = lolo;
683 }
684
685
686 static void
687 fxt1_quantize_ALPHA0 (GLuint *cc,
688 GLubyte input[N_TEXELS][MAX_COMP],
689 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
690 {
691 const GLint n_vect = 3; /* 3 base vectors to find */
692 const GLint n_comp = 4; /* 4 components: R, G, B, A */
693 GLfloat vec[MAX_VECT][MAX_COMP];
694 GLint i, j, k;
695 Fx64 hi; /* high quadword */
696 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
697
698 /* the last vector indicates zero */
699 for (i = 0; i < n_comp; i++) {
700 vec[n_vect][i] = 0;
701 }
702
703 /* the first n texels in reord are guaranteed to be non-zero */
704 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
705 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
706 }
707
708 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
709 for (j = n_vect - 1; j >= 0; j--) {
710 /* add in alphas */
711 FX64_SHL(hi, 5);
712 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
713 }
714 for (j = n_vect - 1; j >= 0; j--) {
715 for (i = 0; i < n_comp - 1; i++) {
716 /* add in colors */
717 FX64_SHL(hi, 5);
718 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
719 }
720 }
721 ((Fx64 *)cc)[1] = hi;
722
723 lohi = lolo = 0;
724 /* right microtile */
725 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
726 lohi <<= 2;
727 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
728 }
729 /* left microtile */
730 for (; k >= 0; k--) {
731 lolo <<= 2;
732 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
733 }
734 cc[1] = lohi;
735 cc[0] = lolo;
736 }
737
738
739 static void
740 fxt1_quantize_ALPHA1 (GLuint *cc,
741 GLubyte input[N_TEXELS][MAX_COMP])
742 {
743 const GLint n_vect = 3; /* highest vector number in each microtile */
744 const GLint n_comp = 4; /* 4 components: R, G, B, A */
745 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
746 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
747 GLint i, j, k;
748 Fx64 hi; /* high quadword */
749 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
750
751 GLint minSum;
752 GLint maxSum;
753 GLint minColL = 0, maxColL = 0;
754 GLint minColR = 0, maxColR = 0;
755 GLint sumL = 0, sumR = 0;
756 GLint nn_comp;
757 /* Our solution here is to find the darkest and brightest colors in
758 * the 4x4 tile and use those as the two representative colors.
759 * There are probably better algorithms to use (histogram-based).
760 */
761 nn_comp = n_comp;
762 while ((minColL == maxColL) && nn_comp) {
763 minSum = 2000; /* big enough */
764 maxSum = -1; /* small enough */
765 for (k = 0; k < N_TEXELS / 2; k++) {
766 GLint sum = 0;
767 for (i = 0; i < nn_comp; i++) {
768 sum += input[k][i];
769 }
770 if (minSum > sum) {
771 minSum = sum;
772 minColL = k;
773 }
774 if (maxSum < sum) {
775 maxSum = sum;
776 maxColL = k;
777 }
778 sumL += sum;
779 }
780
781 nn_comp--;
782 }
783
784 nn_comp = n_comp;
785 while ((minColR == maxColR) && nn_comp) {
786 minSum = 2000; /* big enough */
787 maxSum = -1; /* small enough */
788 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
789 GLint sum = 0;
790 for (i = 0; i < nn_comp; i++) {
791 sum += input[k][i];
792 }
793 if (minSum > sum) {
794 minSum = sum;
795 minColR = k;
796 }
797 if (maxSum < sum) {
798 maxSum = sum;
799 maxColR = k;
800 }
801 sumR += sum;
802 }
803
804 nn_comp--;
805 }
806
807 /* choose the common vector (yuck!) */
808 {
809 GLint j1, j2;
810 GLint v1 = 0, v2 = 0;
811 GLfloat err = 1e9; /* big enough */
812 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
813 for (i = 0; i < n_comp; i++) {
814 tv[0][i] = input[minColL][i];
815 tv[1][i] = input[maxColL][i];
816 tv[2][i] = input[minColR][i];
817 tv[3][i] = input[maxColR][i];
818 }
819 for (j1 = 0; j1 < 2; j1++) {
820 for (j2 = 2; j2 < 4; j2++) {
821 GLfloat e = 0.0F;
822 for (i = 0; i < n_comp; i++) {
823 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
824 }
825 if (e < err) {
826 err = e;
827 v1 = j1;
828 v2 = j2;
829 }
830 }
831 }
832 for (i = 0; i < n_comp; i++) {
833 vec[0][i] = tv[1 - v1][i];
834 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
835 vec[2][i] = tv[5 - v2][i];
836 }
837 }
838
839 /* left microtile */
840 cc[0] = 0;
841 if (minColL != maxColL) {
842 /* compute interpolation vector */
843 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
844
845 /* add in texels */
846 lolo = 0;
847 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
848 GLint texel;
849 /* interpolate color */
850 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
851 /* add in texel */
852 lolo <<= 2;
853 lolo |= texel;
854 }
855
856 cc[0] = lolo;
857 }
858
859 /* right microtile */
860 cc[1] = 0;
861 if (minColR != maxColR) {
862 /* compute interpolation vector */
863 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
864
865 /* add in texels */
866 lohi = 0;
867 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
868 GLint texel;
869 /* interpolate color */
870 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
871 /* add in texel */
872 lohi <<= 2;
873 lohi |= texel;
874 }
875
876 cc[1] = lohi;
877 }
878
879 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
880 for (j = n_vect - 1; j >= 0; j--) {
881 /* add in alphas */
882 FX64_SHL(hi, 5);
883 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
884 }
885 for (j = n_vect - 1; j >= 0; j--) {
886 for (i = 0; i < n_comp - 1; i++) {
887 /* add in colors */
888 FX64_SHL(hi, 5);
889 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
890 }
891 }
892 ((Fx64 *)cc)[1] = hi;
893 }
894
895
896 static void
897 fxt1_quantize_HI (GLuint *cc,
898 GLubyte input[N_TEXELS][MAX_COMP],
899 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
900 {
901 const GLint n_vect = 6; /* highest vector number */
902 const GLint n_comp = 3; /* 3 components: R, G, B */
903 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
904 GLfloat iv[MAX_COMP]; /* interpolation vector */
905 GLint i, k;
906 GLuint hihi; /* high quadword: hi dword */
907
908 GLint minSum = 2000; /* big enough */
909 GLint maxSum = -1; /* small enough */
910 GLint minCol = 0; /* phoudoin: silent compiler! */
911 GLint maxCol = 0; /* phoudoin: silent compiler! */
912
913 /* Our solution here is to find the darkest and brightest colors in
914 * the 8x4 tile and use those as the two representative colors.
915 * There are probably better algorithms to use (histogram-based).
916 */
917 for (k = 0; k < n; k++) {
918 GLint sum = 0;
919 for (i = 0; i < n_comp; i++) {
920 sum += reord[k][i];
921 }
922 if (minSum > sum) {
923 minSum = sum;
924 minCol = k;
925 }
926 if (maxSum < sum) {
927 maxSum = sum;
928 maxCol = k;
929 }
930 }
931
932 hihi = 0; /* cc-hi = "00" */
933 for (i = 0; i < n_comp; i++) {
934 /* add in colors */
935 hihi <<= 5;
936 hihi |= reord[maxCol][i] >> 3;
937 }
938 for (i = 0; i < n_comp; i++) {
939 /* add in colors */
940 hihi <<= 5;
941 hihi |= reord[minCol][i] >> 3;
942 }
943 cc[3] = hihi;
944 cc[0] = cc[1] = cc[2] = 0;
945
946 /* compute interpolation vector */
947 if (minCol != maxCol) {
948 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
949 }
950
951 /* add in texels */
952 for (k = N_TEXELS - 1; k >= 0; k--) {
953 GLint t = k * 3;
954 GLuint *kk = (GLuint *)((char *)cc + t / 8);
955 GLint texel = n_vect + 1; /* transparent black */
956
957 if (!ISTBLACK(input[k])) {
958 if (minCol != maxCol) {
959 /* interpolate color */
960 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
961 /* add in texel */
962 kk[0] |= texel << (t & 7);
963 }
964 } else {
965 /* add in texel */
966 kk[0] |= texel << (t & 7);
967 }
968 }
969 }
970
971
972 static void
973 fxt1_quantize_MIXED1 (GLuint *cc,
974 GLubyte input[N_TEXELS][MAX_COMP])
975 {
976 const GLint n_vect = 2; /* highest vector number in each microtile */
977 const GLint n_comp = 3; /* 3 components: R, G, B */
978 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
979 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
980 GLint i, j, k;
981 Fx64 hi; /* high quadword */
982 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
983
984 GLint minSum;
985 GLint maxSum;
986 GLint minColL = 0, maxColL = -1;
987 GLint minColR = 0, maxColR = -1;
988
989 /* Our solution here is to find the darkest and brightest colors in
990 * the 4x4 tile and use those as the two representative colors.
991 * There are probably better algorithms to use (histogram-based).
992 */
993 minSum = 2000; /* big enough */
994 maxSum = -1; /* small enough */
995 for (k = 0; k < N_TEXELS / 2; k++) {
996 if (!ISTBLACK(input[k])) {
997 GLint sum = 0;
998 for (i = 0; i < n_comp; i++) {
999 sum += input[k][i];
1000 }
1001 if (minSum > sum) {
1002 minSum = sum;
1003 minColL = k;
1004 }
1005 if (maxSum < sum) {
1006 maxSum = sum;
1007 maxColL = k;
1008 }
1009 }
1010 }
1011 minSum = 2000; /* big enough */
1012 maxSum = -1; /* small enough */
1013 for (; k < N_TEXELS; k++) {
1014 if (!ISTBLACK(input[k])) {
1015 GLint sum = 0;
1016 for (i = 0; i < n_comp; i++) {
1017 sum += input[k][i];
1018 }
1019 if (minSum > sum) {
1020 minSum = sum;
1021 minColR = k;
1022 }
1023 if (maxSum < sum) {
1024 maxSum = sum;
1025 maxColR = k;
1026 }
1027 }
1028 }
1029
1030 /* left microtile */
1031 if (maxColL == -1) {
1032 /* all transparent black */
1033 cc[0] = ~0u;
1034 for (i = 0; i < n_comp; i++) {
1035 vec[0][i] = 0;
1036 vec[1][i] = 0;
1037 }
1038 } else {
1039 cc[0] = 0;
1040 for (i = 0; i < n_comp; i++) {
1041 vec[0][i] = input[minColL][i];
1042 vec[1][i] = input[maxColL][i];
1043 }
1044 if (minColL != maxColL) {
1045 /* compute interpolation vector */
1046 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1047
1048 /* add in texels */
1049 lolo = 0;
1050 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1051 GLint texel = n_vect + 1; /* transparent black */
1052 if (!ISTBLACK(input[k])) {
1053 /* interpolate color */
1054 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1055 }
1056 /* add in texel */
1057 lolo <<= 2;
1058 lolo |= texel;
1059 }
1060 cc[0] = lolo;
1061 }
1062 }
1063
1064 /* right microtile */
1065 if (maxColR == -1) {
1066 /* all transparent black */
1067 cc[1] = ~0u;
1068 for (i = 0; i < n_comp; i++) {
1069 vec[2][i] = 0;
1070 vec[3][i] = 0;
1071 }
1072 } else {
1073 cc[1] = 0;
1074 for (i = 0; i < n_comp; i++) {
1075 vec[2][i] = input[minColR][i];
1076 vec[3][i] = input[maxColR][i];
1077 }
1078 if (minColR != maxColR) {
1079 /* compute interpolation vector */
1080 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1081
1082 /* add in texels */
1083 lohi = 0;
1084 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1085 GLint texel = n_vect + 1; /* transparent black */
1086 if (!ISTBLACK(input[k])) {
1087 /* interpolate color */
1088 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1089 }
1090 /* add in texel */
1091 lohi <<= 2;
1092 lohi |= texel;
1093 }
1094 cc[1] = lohi;
1095 }
1096 }
1097
1098 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1099 for (j = 2 * 2 - 1; j >= 0; j--) {
1100 for (i = 0; i < n_comp; i++) {
1101 /* add in colors */
1102 FX64_SHL(hi, 5);
1103 FX64_OR32(hi, vec[j][i] >> 3);
1104 }
1105 }
1106 ((Fx64 *)cc)[1] = hi;
1107 }
1108
1109
1110 static void
1111 fxt1_quantize_MIXED0 (GLuint *cc,
1112 GLubyte input[N_TEXELS][MAX_COMP])
1113 {
1114 const GLint n_vect = 3; /* highest vector number in each microtile */
1115 const GLint n_comp = 3; /* 3 components: R, G, B */
1116 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1117 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1118 GLint i, j, k;
1119 Fx64 hi; /* high quadword */
1120 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1121
1122 GLint minColL = 0, maxColL = 0;
1123 GLint minColR = 0, maxColR = 0;
1124 #if 0
1125 GLint minSum;
1126 GLint maxSum;
1127
1128 /* Our solution here is to find the darkest and brightest colors in
1129 * the 4x4 tile and use those as the two representative colors.
1130 * There are probably better algorithms to use (histogram-based).
1131 */
1132 minSum = 2000; /* big enough */
1133 maxSum = -1; /* small enough */
1134 for (k = 0; k < N_TEXELS / 2; k++) {
1135 GLint sum = 0;
1136 for (i = 0; i < n_comp; i++) {
1137 sum += input[k][i];
1138 }
1139 if (minSum > sum) {
1140 minSum = sum;
1141 minColL = k;
1142 }
1143 if (maxSum < sum) {
1144 maxSum = sum;
1145 maxColL = k;
1146 }
1147 }
1148 minSum = 2000; /* big enough */
1149 maxSum = -1; /* small enough */
1150 for (; k < N_TEXELS; k++) {
1151 GLint sum = 0;
1152 for (i = 0; i < n_comp; i++) {
1153 sum += input[k][i];
1154 }
1155 if (minSum > sum) {
1156 minSum = sum;
1157 minColR = k;
1158 }
1159 if (maxSum < sum) {
1160 maxSum = sum;
1161 maxColR = k;
1162 }
1163 }
1164 #else
1165 GLint minVal;
1166 GLint maxVal;
1167 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1168 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1169
1170 /* Scan the channel with max variance for lo & hi
1171 * and use those as the two representative colors.
1172 */
1173 minVal = 2000; /* big enough */
1174 maxVal = -1; /* small enough */
1175 for (k = 0; k < N_TEXELS / 2; k++) {
1176 GLint t = input[k][maxVarL];
1177 if (minVal > t) {
1178 minVal = t;
1179 minColL = k;
1180 }
1181 if (maxVal < t) {
1182 maxVal = t;
1183 maxColL = k;
1184 }
1185 }
1186 minVal = 2000; /* big enough */
1187 maxVal = -1; /* small enough */
1188 for (; k < N_TEXELS; k++) {
1189 GLint t = input[k][maxVarR];
1190 if (minVal > t) {
1191 minVal = t;
1192 minColR = k;
1193 }
1194 if (maxVal < t) {
1195 maxVal = t;
1196 maxColR = k;
1197 }
1198 }
1199 #endif
1200
1201 /* left microtile */
1202 cc[0] = 0;
1203 for (i = 0; i < n_comp; i++) {
1204 vec[0][i] = input[minColL][i];
1205 vec[1][i] = input[maxColL][i];
1206 }
1207 if (minColL != maxColL) {
1208 /* compute interpolation vector */
1209 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1210
1211 /* add in texels */
1212 lolo = 0;
1213 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1214 GLint texel;
1215 /* interpolate color */
1216 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1217 /* add in texel */
1218 lolo <<= 2;
1219 lolo |= texel;
1220 }
1221
1222 /* funky encoding for LSB of green */
1223 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1224 for (i = 0; i < n_comp; i++) {
1225 vec[1][i] = input[minColL][i];
1226 vec[0][i] = input[maxColL][i];
1227 }
1228 lolo = ~lolo;
1229 }
1230
1231 cc[0] = lolo;
1232 }
1233
1234 /* right microtile */
1235 cc[1] = 0;
1236 for (i = 0; i < n_comp; i++) {
1237 vec[2][i] = input[minColR][i];
1238 vec[3][i] = input[maxColR][i];
1239 }
1240 if (minColR != maxColR) {
1241 /* compute interpolation vector */
1242 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1243
1244 /* add in texels */
1245 lohi = 0;
1246 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1247 GLint texel;
1248 /* interpolate color */
1249 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1250 /* add in texel */
1251 lohi <<= 2;
1252 lohi |= texel;
1253 }
1254
1255 /* funky encoding for LSB of green */
1256 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1257 for (i = 0; i < n_comp; i++) {
1258 vec[3][i] = input[minColR][i];
1259 vec[2][i] = input[maxColR][i];
1260 }
1261 lohi = ~lohi;
1262 }
1263
1264 cc[1] = lohi;
1265 }
1266
1267 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1268 for (j = 2 * 2 - 1; j >= 0; j--) {
1269 for (i = 0; i < n_comp; i++) {
1270 /* add in colors */
1271 FX64_SHL(hi, 5);
1272 FX64_OR32(hi, vec[j][i] >> 3);
1273 }
1274 }
1275 ((Fx64 *)cc)[1] = hi;
1276 }
1277
1278
1279 static void
1280 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1281 {
1282 GLint trualpha;
1283 GLubyte reord[N_TEXELS][MAX_COMP];
1284
1285 GLubyte input[N_TEXELS][MAX_COMP];
1286 GLint i, k, l;
1287
1288 if (comps == 3) {
1289 /* make the whole block opaque */
1290 _mesa_memset(input, -1, sizeof(input));
1291 }
1292
1293 /* 8 texels each line */
1294 for (l = 0; l < 4; l++) {
1295 for (k = 0; k < 4; k++) {
1296 for (i = 0; i < comps; i++) {
1297 input[k + l * 4][i] = *lines[l]++;
1298 }
1299 }
1300 for (; k < 8; k++) {
1301 for (i = 0; i < comps; i++) {
1302 input[k + l * 4 + 12][i] = *lines[l]++;
1303 }
1304 }
1305 }
1306
1307 /* block layout:
1308 * 00, 01, 02, 03, 08, 09, 0a, 0b
1309 * 10, 11, 12, 13, 18, 19, 1a, 1b
1310 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1311 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1312 */
1313
1314 /* [dBorca]
1315 * stupidity flows forth from this
1316 */
1317 l = N_TEXELS;
1318 trualpha = 0;
1319 if (comps == 4) {
1320 /* skip all transparent black texels */
1321 l = 0;
1322 for (k = 0; k < N_TEXELS; k++) {
1323 /* test all components against 0 */
1324 if (!ISTBLACK(input[k])) {
1325 /* texel is not transparent black */
1326 COPY_4UBV(reord[l], input[k]);
1327 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1328 /* non-opaque texel */
1329 trualpha = !0;
1330 }
1331 l++;
1332 }
1333 }
1334 }
1335
1336 #if 0
1337 if (trualpha) {
1338 fxt1_quantize_ALPHA0(cc, input, reord, l);
1339 } else if (l == 0) {
1340 cc[0] = cc[1] = cc[2] = -1;
1341 cc[3] = 0;
1342 } else if (l < N_TEXELS) {
1343 fxt1_quantize_HI(cc, input, reord, l);
1344 } else {
1345 fxt1_quantize_CHROMA(cc, input);
1346 }
1347 (void)fxt1_quantize_ALPHA1;
1348 (void)fxt1_quantize_MIXED1;
1349 (void)fxt1_quantize_MIXED0;
1350 #else
1351 if (trualpha) {
1352 fxt1_quantize_ALPHA1(cc, input);
1353 } else if (l == 0) {
1354 cc[0] = cc[1] = cc[2] = ~0u;
1355 cc[3] = 0;
1356 } else if (l < N_TEXELS) {
1357 fxt1_quantize_MIXED1(cc, input);
1358 } else {
1359 fxt1_quantize_MIXED0(cc, input);
1360 }
1361 (void)fxt1_quantize_ALPHA0;
1362 (void)fxt1_quantize_HI;
1363 (void)fxt1_quantize_CHROMA;
1364 #endif
1365 }
1366
1367
1368 static void
1369 fxt1_encode (GLuint width, GLuint height, GLint comps,
1370 const void *source, GLint srcRowStride,
1371 void *dest, GLint destRowStride)
1372 {
1373 GLuint x, y;
1374 const GLubyte *data;
1375 GLuint *encoded = (GLuint *)dest;
1376 void *newSource = NULL;
1377
1378 assert(comps == 3 || comps == 4);
1379
1380 /* Replicate image if width is not M8 or height is not M4 */
1381 if ((width & 7) | (height & 3)) {
1382 GLint newWidth = (width + 7) & ~7;
1383 GLint newHeight = (height + 3) & ~3;
1384 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1385 if (!newSource) {
1386 GET_CURRENT_CONTEXT(ctx);
1387 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1388 goto cleanUp;
1389 }
1390 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1391 comps, (const GLchan *) source,
1392 srcRowStride, (GLchan *) newSource);
1393 source = newSource;
1394 width = newWidth;
1395 height = newHeight;
1396 srcRowStride = comps * newWidth;
1397 }
1398
1399 /* convert from 16/32-bit channels to GLubyte if needed */
1400 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1401 const GLuint n = width * height * comps;
1402 const GLchan *src = (const GLchan *) source;
1403 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1404 GLuint i;
1405 if (!dest) {
1406 GET_CURRENT_CONTEXT(ctx);
1407 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1408 goto cleanUp;
1409 }
1410 for (i = 0; i < n; i++) {
1411 dest[i] = CHAN_TO_UBYTE(src[i]);
1412 }
1413 if (newSource != NULL) {
1414 _mesa_free(newSource);
1415 }
1416 newSource = dest; /* we'll free this buffer before returning */
1417 source = dest; /* the new, GLubyte incoming image */
1418 }
1419
1420 data = (const GLubyte *) source;
1421 destRowStride = (destRowStride - width * 2) / 4;
1422 for (y = 0; y < height; y += 4) {
1423 GLuint offs = 0 + (y + 0) * srcRowStride;
1424 for (x = 0; x < width; x += 8) {
1425 const GLubyte *lines[4];
1426 lines[0] = &data[offs];
1427 lines[1] = lines[0] + srcRowStride;
1428 lines[2] = lines[1] + srcRowStride;
1429 lines[3] = lines[2] + srcRowStride;
1430 offs += 8 * comps;
1431 fxt1_quantize(encoded, lines, comps);
1432 /* 128 bits per 8x4 block */
1433 encoded += 4;
1434 }
1435 encoded += destRowStride;
1436 }
1437
1438 cleanUp:
1439 if (newSource != NULL) {
1440 _mesa_free(newSource);
1441 }
1442 }
1443
1444
1445 /***************************************************************************\
1446 * FXT1 decoder
1447 *
1448 * The decoder is based on GL_3DFX_texture_compression_FXT1
1449 * specification and serves as a concept for the encoder.
1450 \***************************************************************************/
1451
1452
1453 /* lookup table for scaling 5 bit colors up to 8 bits */
1454 static const GLubyte _rgb_scale_5[] = {
1455 0, 8, 16, 25, 33, 41, 49, 58,
1456 66, 74, 82, 90, 99, 107, 115, 123,
1457 132, 140, 148, 156, 165, 173, 181, 189,
1458 197, 206, 214, 222, 230, 239, 247, 255
1459 };
1460
1461 /* lookup table for scaling 6 bit colors up to 8 bits */
1462 static const GLubyte _rgb_scale_6[] = {
1463 0, 4, 8, 12, 16, 20, 24, 28,
1464 32, 36, 40, 45, 49, 53, 57, 61,
1465 65, 69, 73, 77, 81, 85, 89, 93,
1466 97, 101, 105, 109, 113, 117, 121, 125,
1467 130, 134, 138, 142, 146, 150, 154, 158,
1468 162, 166, 170, 174, 178, 182, 186, 190,
1469 194, 198, 202, 206, 210, 215, 219, 223,
1470 227, 231, 235, 239, 243, 247, 251, 255
1471 };
1472
1473
1474 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1475 #define UP5(c) _rgb_scale_5[(c) & 31]
1476 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1477 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1478
1479
1480 static void
1481 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1482 {
1483 const GLuint *cc;
1484
1485 t *= 3;
1486 cc = (const GLuint *)(code + t / 8);
1487 t = (cc[0] >> (t & 7)) & 7;
1488
1489 if (t == 7) {
1490 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1491 } else {
1492 GLubyte r, g, b;
1493 cc = (const GLuint *)(code + 12);
1494 if (t == 0) {
1495 b = UP5(CC_SEL(cc, 0));
1496 g = UP5(CC_SEL(cc, 5));
1497 r = UP5(CC_SEL(cc, 10));
1498 } else if (t == 6) {
1499 b = UP5(CC_SEL(cc, 15));
1500 g = UP5(CC_SEL(cc, 20));
1501 r = UP5(CC_SEL(cc, 25));
1502 } else {
1503 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1504 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1505 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1506 }
1507 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1508 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1509 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1510 rgba[ACOMP] = CHAN_MAX;
1511 }
1512 }
1513
1514
1515 static void
1516 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1517 {
1518 const GLuint *cc;
1519 GLuint kk;
1520
1521 cc = (const GLuint *)code;
1522 if (t & 16) {
1523 cc++;
1524 t &= 15;
1525 }
1526 t = (cc[0] >> (t * 2)) & 3;
1527
1528 t *= 15;
1529 cc = (const GLuint *)(code + 8 + t / 8);
1530 kk = cc[0] >> (t & 7);
1531 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1532 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1533 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1534 rgba[ACOMP] = CHAN_MAX;
1535 }
1536
1537
1538 static void
1539 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1540 {
1541 const GLuint *cc;
1542 GLuint col[2][3];
1543 GLint glsb, selb;
1544
1545 cc = (const GLuint *)code;
1546 if (t & 16) {
1547 t &= 15;
1548 t = (cc[1] >> (t * 2)) & 3;
1549 /* col 2 */
1550 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1551 col[0][GCOMP] = CC_SEL(cc, 99);
1552 col[0][RCOMP] = CC_SEL(cc, 104);
1553 /* col 3 */
1554 col[1][BCOMP] = CC_SEL(cc, 109);
1555 col[1][GCOMP] = CC_SEL(cc, 114);
1556 col[1][RCOMP] = CC_SEL(cc, 119);
1557 glsb = CC_SEL(cc, 126);
1558 selb = CC_SEL(cc, 33);
1559 } else {
1560 t = (cc[0] >> (t * 2)) & 3;
1561 /* col 0 */
1562 col[0][BCOMP] = CC_SEL(cc, 64);
1563 col[0][GCOMP] = CC_SEL(cc, 69);
1564 col[0][RCOMP] = CC_SEL(cc, 74);
1565 /* col 1 */
1566 col[1][BCOMP] = CC_SEL(cc, 79);
1567 col[1][GCOMP] = CC_SEL(cc, 84);
1568 col[1][RCOMP] = CC_SEL(cc, 89);
1569 glsb = CC_SEL(cc, 125);
1570 selb = CC_SEL(cc, 1);
1571 }
1572
1573 if (CC_SEL(cc, 124) & 1) {
1574 /* alpha[0] == 1 */
1575
1576 if (t == 3) {
1577 /* zero */
1578 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1579 } else {
1580 GLubyte r, g, b;
1581 if (t == 0) {
1582 b = UP5(col[0][BCOMP]);
1583 g = UP5(col[0][GCOMP]);
1584 r = UP5(col[0][RCOMP]);
1585 } else if (t == 2) {
1586 b = UP5(col[1][BCOMP]);
1587 g = UP6(col[1][GCOMP], glsb);
1588 r = UP5(col[1][RCOMP]);
1589 } else {
1590 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1591 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1592 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1593 }
1594 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1595 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1596 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1597 rgba[ACOMP] = CHAN_MAX;
1598 }
1599 } else {
1600 /* alpha[0] == 0 */
1601 GLubyte r, g, b;
1602 if (t == 0) {
1603 b = UP5(col[0][BCOMP]);
1604 g = UP6(col[0][GCOMP], glsb ^ selb);
1605 r = UP5(col[0][RCOMP]);
1606 } else if (t == 3) {
1607 b = UP5(col[1][BCOMP]);
1608 g = UP6(col[1][GCOMP], glsb);
1609 r = UP5(col[1][RCOMP]);
1610 } else {
1611 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1612 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1613 UP6(col[1][GCOMP], glsb));
1614 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1615 }
1616 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1617 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1618 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1619 rgba[ACOMP] = CHAN_MAX;
1620 }
1621 }
1622
1623
1624 static void
1625 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1626 {
1627 const GLuint *cc;
1628 GLubyte r, g, b, a;
1629
1630 cc = (const GLuint *)code;
1631 if (CC_SEL(cc, 124) & 1) {
1632 /* lerp == 1 */
1633 GLuint col0[4];
1634
1635 if (t & 16) {
1636 t &= 15;
1637 t = (cc[1] >> (t * 2)) & 3;
1638 /* col 2 */
1639 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1640 col0[GCOMP] = CC_SEL(cc, 99);
1641 col0[RCOMP] = CC_SEL(cc, 104);
1642 col0[ACOMP] = CC_SEL(cc, 119);
1643 } else {
1644 t = (cc[0] >> (t * 2)) & 3;
1645 /* col 0 */
1646 col0[BCOMP] = CC_SEL(cc, 64);
1647 col0[GCOMP] = CC_SEL(cc, 69);
1648 col0[RCOMP] = CC_SEL(cc, 74);
1649 col0[ACOMP] = CC_SEL(cc, 109);
1650 }
1651
1652 if (t == 0) {
1653 b = UP5(col0[BCOMP]);
1654 g = UP5(col0[GCOMP]);
1655 r = UP5(col0[RCOMP]);
1656 a = UP5(col0[ACOMP]);
1657 } else if (t == 3) {
1658 b = UP5(CC_SEL(cc, 79));
1659 g = UP5(CC_SEL(cc, 84));
1660 r = UP5(CC_SEL(cc, 89));
1661 a = UP5(CC_SEL(cc, 114));
1662 } else {
1663 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1664 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1665 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1666 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1667 }
1668 } else {
1669 /* lerp == 0 */
1670
1671 if (t & 16) {
1672 cc++;
1673 t &= 15;
1674 }
1675 t = (cc[0] >> (t * 2)) & 3;
1676
1677 if (t == 3) {
1678 /* zero */
1679 r = g = b = a = 0;
1680 } else {
1681 GLuint kk;
1682 cc = (const GLuint *)code;
1683 a = UP5(cc[3] >> (t * 5 + 13));
1684 t *= 15;
1685 cc = (const GLuint *)(code + 8 + t / 8);
1686 kk = cc[0] >> (t & 7);
1687 b = UP5(kk);
1688 g = UP5(kk >> 5);
1689 r = UP5(kk >> 10);
1690 }
1691 }
1692 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1693 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1694 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1695 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1696 }
1697
1698
1699 void
1700 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1701 GLint i, GLint j, GLchan *rgba)
1702 {
1703 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1704 fxt1_decode_1HI, /* cc-high = "00?" */
1705 fxt1_decode_1HI, /* cc-high = "00?" */
1706 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1707 fxt1_decode_1ALPHA, /* alpha = "011" */
1708 fxt1_decode_1MIXED, /* mixed = "1??" */
1709 fxt1_decode_1MIXED, /* mixed = "1??" */
1710 fxt1_decode_1MIXED, /* mixed = "1??" */
1711 fxt1_decode_1MIXED /* mixed = "1??" */
1712 };
1713
1714 const GLubyte *code = (const GLubyte *)texture +
1715 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1716 GLint mode = CC_SEL(code, 125);
1717 GLint t = i & 7;
1718
1719 if (t & 4) {
1720 t += 12;
1721 }
1722 t += (j & 3) * 4;
1723
1724 decode_1[mode](code, t, rgba);
1725 }