updated comments, mark GetBufferSize() and ResizeBuffers() as obsolete
[mesa.git] / src / mesa / main / texcompress_fxt1.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "context.h"
36 #include "convolve.h"
37 #include "image.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texformat.h"
41 #include "texstore.h"
42
43
44 static void
45 fxt1_encode (GLuint width, GLuint height, GLint comps,
46 const void *source, GLint srcRowStride,
47 void *dest, GLint destRowStride);
48
49 void
50 fxt1_decode_1 (const void *texture, GLint stride,
51 GLint i, GLint j, GLchan *rgba);
52
53
54 /**
55 * Called during context initialization.
56 */
57 void
58 _mesa_init_texture_fxt1( GLcontext *ctx )
59 {
60 (void) ctx;
61 }
62
63
64 /**
65 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
66 */
67 static GLboolean
68 texstore_rgb_fxt1(TEXSTORE_PARAMS)
69 {
70 const GLchan *pixels;
71 GLint srcRowStride;
72 GLubyte *dst;
73 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
74 const GLchan *tempImage = NULL;
75
76 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
77 ASSERT(dstXoffset % 8 == 0);
78 ASSERT(dstYoffset % 4 == 0);
79 ASSERT(dstZoffset == 0);
80 (void) dstZoffset;
81 (void) dstImageOffsets;
82
83 if (srcFormat != GL_RGB ||
84 srcType != CHAN_TYPE ||
85 ctx->_ImageTransferState ||
86 srcPacking->SwapBytes) {
87 /* convert image to RGB/GLchan */
88 tempImage = _mesa_make_temp_chan_image(ctx, dims,
89 baseInternalFormat,
90 dstFormat->BaseFormat,
91 srcWidth, srcHeight, srcDepth,
92 srcFormat, srcType, srcAddr,
93 srcPacking);
94 if (!tempImage)
95 return GL_FALSE; /* out of memory */
96 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
97 pixels = tempImage;
98 srcRowStride = 3 * srcWidth;
99 srcFormat = GL_RGB;
100 }
101 else {
102 pixels = (const GLchan *) srcAddr;
103 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
104 srcType) / sizeof(GLchan);
105 }
106
107 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
108 GL_COMPRESSED_RGB_FXT1_3DFX,
109 texWidth, (GLubyte *) dstAddr);
110
111 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
112 dst, dstRowStride);
113
114 if (tempImage)
115 _mesa_free((void*) tempImage);
116
117 return GL_TRUE;
118 }
119
120
121 /**
122 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
123 */
124 static GLboolean
125 texstore_rgba_fxt1(TEXSTORE_PARAMS)
126 {
127 const GLchan *pixels;
128 GLint srcRowStride;
129 GLubyte *dst;
130 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
131 const GLchan *tempImage = NULL;
132
133 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
134 ASSERT(dstXoffset % 8 == 0);
135 ASSERT(dstYoffset % 4 == 0);
136 ASSERT(dstZoffset == 0);
137 (void) dstZoffset;
138 (void) dstImageOffsets;
139
140 if (srcFormat != GL_RGBA ||
141 srcType != CHAN_TYPE ||
142 ctx->_ImageTransferState ||
143 srcPacking->SwapBytes) {
144 /* convert image to RGBA/GLchan */
145 tempImage = _mesa_make_temp_chan_image(ctx, dims,
146 baseInternalFormat,
147 dstFormat->BaseFormat,
148 srcWidth, srcHeight, srcDepth,
149 srcFormat, srcType, srcAddr,
150 srcPacking);
151 if (!tempImage)
152 return GL_FALSE; /* out of memory */
153 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
154 pixels = tempImage;
155 srcRowStride = 4 * srcWidth;
156 srcFormat = GL_RGBA;
157 }
158 else {
159 pixels = (const GLchan *) srcAddr;
160 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
161 srcType) / sizeof(GLchan);
162 }
163
164 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
165 GL_COMPRESSED_RGBA_FXT1_3DFX,
166 texWidth, (GLubyte *) dstAddr);
167
168 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
169 dst, dstRowStride);
170
171 if (tempImage)
172 _mesa_free((void*) tempImage);
173
174 return GL_TRUE;
175 }
176
177
178 static void
179 fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
180 GLint i, GLint j, GLint k, GLchan *texel )
181 {
182 (void) k;
183 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
184 }
185
186
187 static void
188 fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
189 GLint i, GLint j, GLint k, GLfloat *texel )
190 {
191 /* just sample as GLchan and convert to float here */
192 GLchan rgba[4];
193 (void) k;
194 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
195 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
196 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
197 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
198 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
199 }
200
201
202 static void
203 fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
204 GLint i, GLint j, GLint k, GLchan *texel )
205 {
206 (void) k;
207 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, texel);
208 texel[ACOMP] = 255;
209 }
210
211
212 static void
213 fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
214 GLint i, GLint j, GLint k, GLfloat *texel )
215 {
216 /* just sample as GLchan and convert to float here */
217 GLchan rgba[4];
218 (void) k;
219 fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
220 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
221 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
222 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
223 texel[ACOMP] = 1.0F;
224 }
225
226
227
228 const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
229 MESA_FORMAT_RGB_FXT1, /* MesaFormat */
230 GL_RGB, /* BaseFormat */
231 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
232 4, /*approx*/ /* RedBits */
233 4, /*approx*/ /* GreenBits */
234 4, /*approx*/ /* BlueBits */
235 0, /* AlphaBits */
236 0, /* LuminanceBits */
237 0, /* IntensityBits */
238 0, /* IndexBits */
239 0, /* DepthBits */
240 0, /* StencilBits */
241 0, /* TexelBytes */
242 texstore_rgb_fxt1, /* StoreTexImageFunc */
243 NULL, /*impossible*/ /* FetchTexel1D */
244 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */
245 NULL, /*impossible*/ /* FetchTexel3D */
246 NULL, /*impossible*/ /* FetchTexel1Df */
247 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */
248 NULL, /*impossible*/ /* FetchTexel3Df */
249 NULL /* StoreTexel */
250 };
251
252 const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
253 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */
254 GL_RGBA, /* BaseFormat */
255 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */
256 4, /*approx*/ /* RedBits */
257 4, /*approx*/ /* GreenBits */
258 4, /*approx*/ /* BlueBits */
259 1, /*approx*/ /* AlphaBits */
260 0, /* LuminanceBits */
261 0, /* IntensityBits */
262 0, /* IndexBits */
263 0, /* DepthBits */
264 0, /* StencilBits */
265 0, /* TexelBytes */
266 texstore_rgba_fxt1, /* StoreTexImageFunc */
267 NULL, /*impossible*/ /* FetchTexel1D */
268 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */
269 NULL, /*impossible*/ /* FetchTexel3D */
270 NULL, /*impossible*/ /* FetchTexel1Df */
271 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */
272 NULL, /*impossible*/ /* FetchTexel3Df */
273 NULL /* StoreTexel */
274 };
275
276
277 /***************************************************************************\
278 * FXT1 encoder
279 *
280 * The encoder was built by reversing the decoder,
281 * and is vaguely based on Texus2 by 3dfx. Note that this code
282 * is merely a proof of concept, since it is highly UNoptimized;
283 * moreover, it is sub-optimal due to initial conditions passed
284 * to Lloyd's algorithm (the interpolation modes are even worse).
285 \***************************************************************************/
286
287
288 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
289 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
290 #define N_TEXELS 32 /* number of texels in a block (always 32) */
291 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
292 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
293 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
294 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
295 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
296
297
298 /*
299 * Define a 64-bit unsigned integer type and macros
300 */
301 #if defined(__GNUC__) && !defined(__cplusplus)
302
303 #define FX64_NATIVE 1
304
305 typedef unsigned long long Fx64;
306
307 #define FX64_MOV32(a, b) a = b
308 #define FX64_OR32(a, b) a |= b
309 #define FX64_SHL(a, c) a <<= c
310
311 #else /* !__GNUC__ */
312
313 #define FX64_NATIVE 0
314
315 typedef struct {
316 GLuint lo, hi;
317 } Fx64;
318
319 #define FX64_MOV32(a, b) a.lo = b
320 #define FX64_OR32(a, b) a.lo |= b
321
322 #define FX64_SHL(a, c) \
323 do { \
324 if ((c) >= 32) { \
325 a.hi = a.lo << ((c) - 32); \
326 a.lo = 0; \
327 } else { \
328 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
329 a.lo <<= (c); \
330 } \
331 } while (0)
332
333 #endif /* !__GNUC__ */
334
335
336 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
337 #define SAFECDOT 1 /* for paranoids */
338
339 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
340 do { \
341 /* compute interpolation vector */ \
342 GLfloat d2 = 0.0F; \
343 GLfloat rd2; \
344 \
345 for (i = 0; i < NC; i++) { \
346 IV[i] = (V1[i] - V0[i]) * F(i); \
347 d2 += IV[i] * IV[i]; \
348 } \
349 rd2 = (GLfloat)NV / d2; \
350 B = 0; \
351 for (i = 0; i < NC; i++) { \
352 IV[i] *= F(i); \
353 B -= IV[i] * V0[i]; \
354 IV[i] *= rd2; \
355 } \
356 B = B * rd2 + 0.5f; \
357 } while (0)
358
359 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
360 do { \
361 GLfloat dot = 0.0F; \
362 for (i = 0; i < NC; i++) { \
363 dot += V[i] * IV[i]; \
364 } \
365 TEXEL = (GLint)(dot + B); \
366 if (SAFECDOT) { \
367 if (TEXEL < 0) { \
368 TEXEL = 0; \
369 } else if (TEXEL > NV) { \
370 TEXEL = NV; \
371 } \
372 } \
373 } while (0)
374
375
376 static GLint
377 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
378 GLubyte input[MAX_COMP], GLint nc)
379 {
380 GLint i, j, best = -1;
381 GLfloat err = 1e9; /* big enough */
382
383 for (j = 0; j < nv; j++) {
384 GLfloat e = 0.0F;
385 for (i = 0; i < nc; i++) {
386 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
387 }
388 if (e < err) {
389 err = e;
390 best = j;
391 }
392 }
393
394 return best;
395 }
396
397
398 static GLint
399 fxt1_worst (GLfloat vec[MAX_COMP],
400 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
401 {
402 GLint i, k, worst = -1;
403 GLfloat err = -1.0F; /* small enough */
404
405 for (k = 0; k < n; k++) {
406 GLfloat e = 0.0F;
407 for (i = 0; i < nc; i++) {
408 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
409 }
410 if (e > err) {
411 err = e;
412 worst = k;
413 }
414 }
415
416 return worst;
417 }
418
419
420 static GLint
421 fxt1_variance (GLdouble variance[MAX_COMP],
422 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
423 {
424 GLint i, k, best = 0;
425 GLint sx, sx2;
426 GLdouble var, maxvar = -1; /* small enough */
427 GLdouble teenth = 1.0 / n;
428
429 for (i = 0; i < nc; i++) {
430 sx = sx2 = 0;
431 for (k = 0; k < n; k++) {
432 GLint t = input[k][i];
433 sx += t;
434 sx2 += t * t;
435 }
436 var = sx2 * teenth - sx * sx * teenth * teenth;
437 if (maxvar < var) {
438 maxvar = var;
439 best = i;
440 }
441 if (variance) {
442 variance[i] = var;
443 }
444 }
445
446 return best;
447 }
448
449
450 static GLint
451 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
452 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
453 {
454 #if 0
455 /* Choose colors from a grid.
456 */
457 GLint i, j;
458
459 for (j = 0; j < nv; j++) {
460 GLint m = j * (n - 1) / (nv - 1);
461 for (i = 0; i < nc; i++) {
462 vec[j][i] = input[m][i];
463 }
464 }
465 #else
466 /* Our solution here is to find the darkest and brightest colors in
467 * the 8x4 tile and use those as the two representative colors.
468 * There are probably better algorithms to use (histogram-based).
469 */
470 GLint i, j, k;
471 GLint minSum = 2000; /* big enough */
472 GLint maxSum = -1; /* small enough */
473 GLint minCol = 0; /* phoudoin: silent compiler! */
474 GLint maxCol = 0; /* phoudoin: silent compiler! */
475
476 struct {
477 GLint flag;
478 GLint key;
479 GLint freq;
480 GLint idx;
481 } hist[N_TEXELS];
482 GLint lenh = 0;
483
484 _mesa_memset(hist, 0, sizeof(hist));
485
486 for (k = 0; k < n; k++) {
487 GLint l;
488 GLint key = 0;
489 GLint sum = 0;
490 for (i = 0; i < nc; i++) {
491 key <<= 8;
492 key |= input[k][i];
493 sum += input[k][i];
494 }
495 for (l = 0; l < n; l++) {
496 if (!hist[l].flag) {
497 /* alloc new slot */
498 hist[l].flag = !0;
499 hist[l].key = key;
500 hist[l].freq = 1;
501 hist[l].idx = k;
502 lenh = l + 1;
503 break;
504 } else if (hist[l].key == key) {
505 hist[l].freq++;
506 break;
507 }
508 }
509 if (minSum > sum) {
510 minSum = sum;
511 minCol = k;
512 }
513 if (maxSum < sum) {
514 maxSum = sum;
515 maxCol = k;
516 }
517 }
518
519 if (lenh <= nv) {
520 for (j = 0; j < lenh; j++) {
521 for (i = 0; i < nc; i++) {
522 vec[j][i] = (GLfloat)input[hist[j].idx][i];
523 }
524 }
525 for (; j < nv; j++) {
526 for (i = 0; i < nc; i++) {
527 vec[j][i] = vec[0][i];
528 }
529 }
530 return 0;
531 }
532
533 for (j = 0; j < nv; j++) {
534 for (i = 0; i < nc; i++) {
535 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
536 }
537 }
538 #endif
539
540 return !0;
541 }
542
543
544 static GLint
545 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
546 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
547 {
548 /* Use the generalized lloyd's algorithm for VQ:
549 * find 4 color vectors.
550 *
551 * for each sample color
552 * sort to nearest vector.
553 *
554 * replace each vector with the centroid of it's matching colors.
555 *
556 * repeat until RMS doesn't improve.
557 *
558 * if a color vector has no samples, or becomes the same as another
559 * vector, replace it with the color which is farthest from a sample.
560 *
561 * vec[][MAX_COMP] initial vectors and resulting colors
562 * nv number of resulting colors required
563 * input[N_TEXELS][MAX_COMP] input texels
564 * nc number of components in input / vec
565 * n number of input samples
566 */
567
568 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
569 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
570 GLfloat error, lasterror = 1e9;
571
572 GLint i, j, k, rep;
573
574 /* the quantizer */
575 for (rep = 0; rep < LL_N_REP; rep++) {
576 /* reset sums & counters */
577 for (j = 0; j < nv; j++) {
578 for (i = 0; i < nc; i++) {
579 sum[j][i] = 0;
580 }
581 cnt[j] = 0;
582 }
583 error = 0;
584
585 /* scan whole block */
586 for (k = 0; k < n; k++) {
587 #if 1
588 GLint best = -1;
589 GLfloat err = 1e9; /* big enough */
590 /* determine best vector */
591 for (j = 0; j < nv; j++) {
592 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
593 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
594 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
595 if (nc == 4) {
596 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
597 }
598 if (e < err) {
599 err = e;
600 best = j;
601 }
602 }
603 #else
604 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
605 #endif
606 /* add in closest color */
607 for (i = 0; i < nc; i++) {
608 sum[best][i] += input[k][i];
609 }
610 /* mark this vector as used */
611 cnt[best]++;
612 /* accumulate error */
613 error += err;
614 }
615
616 /* check RMS */
617 if ((error < LL_RMS_E) ||
618 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
619 return !0; /* good match */
620 }
621 lasterror = error;
622
623 /* move each vector to the barycenter of its closest colors */
624 for (j = 0; j < nv; j++) {
625 if (cnt[j]) {
626 GLfloat div = 1.0F / cnt[j];
627 for (i = 0; i < nc; i++) {
628 vec[j][i] = div * sum[j][i];
629 }
630 } else {
631 /* this vec has no samples or is identical with a previous vec */
632 GLint worst = fxt1_worst(vec[j], input, nc, n);
633 for (i = 0; i < nc; i++) {
634 vec[j][i] = input[worst][i];
635 }
636 }
637 }
638 }
639
640 return 0; /* could not converge fast enough */
641 }
642
643
644 static void
645 fxt1_quantize_CHROMA (GLuint *cc,
646 GLubyte input[N_TEXELS][MAX_COMP])
647 {
648 const GLint n_vect = 4; /* 4 base vectors to find */
649 const GLint n_comp = 3; /* 3 components: R, G, B */
650 GLfloat vec[MAX_VECT][MAX_COMP];
651 GLint i, j, k;
652 Fx64 hi; /* high quadword */
653 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
654
655 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
656 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
657 }
658
659 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
660 for (j = n_vect - 1; j >= 0; j--) {
661 for (i = 0; i < n_comp; i++) {
662 /* add in colors */
663 FX64_SHL(hi, 5);
664 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
665 }
666 }
667 ((Fx64 *)cc)[1] = hi;
668
669 lohi = lolo = 0;
670 /* right microtile */
671 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
672 lohi <<= 2;
673 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
674 }
675 /* left microtile */
676 for (; k >= 0; k--) {
677 lolo <<= 2;
678 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
679 }
680 cc[1] = lohi;
681 cc[0] = lolo;
682 }
683
684
685 static void
686 fxt1_quantize_ALPHA0 (GLuint *cc,
687 GLubyte input[N_TEXELS][MAX_COMP],
688 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
689 {
690 const GLint n_vect = 3; /* 3 base vectors to find */
691 const GLint n_comp = 4; /* 4 components: R, G, B, A */
692 GLfloat vec[MAX_VECT][MAX_COMP];
693 GLint i, j, k;
694 Fx64 hi; /* high quadword */
695 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
696
697 /* the last vector indicates zero */
698 for (i = 0; i < n_comp; i++) {
699 vec[n_vect][i] = 0;
700 }
701
702 /* the first n texels in reord are guaranteed to be non-zero */
703 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
704 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
705 }
706
707 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
708 for (j = n_vect - 1; j >= 0; j--) {
709 /* add in alphas */
710 FX64_SHL(hi, 5);
711 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
712 }
713 for (j = n_vect - 1; j >= 0; j--) {
714 for (i = 0; i < n_comp - 1; i++) {
715 /* add in colors */
716 FX64_SHL(hi, 5);
717 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
718 }
719 }
720 ((Fx64 *)cc)[1] = hi;
721
722 lohi = lolo = 0;
723 /* right microtile */
724 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
725 lohi <<= 2;
726 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
727 }
728 /* left microtile */
729 for (; k >= 0; k--) {
730 lolo <<= 2;
731 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
732 }
733 cc[1] = lohi;
734 cc[0] = lolo;
735 }
736
737
738 static void
739 fxt1_quantize_ALPHA1 (GLuint *cc,
740 GLubyte input[N_TEXELS][MAX_COMP])
741 {
742 const GLint n_vect = 3; /* highest vector number in each microtile */
743 const GLint n_comp = 4; /* 4 components: R, G, B, A */
744 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
745 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
746 GLint i, j, k;
747 Fx64 hi; /* high quadword */
748 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
749
750 GLint minSum;
751 GLint maxSum;
752 GLint minColL = 0, maxColL = 0;
753 GLint minColR = 0, maxColR = 0;
754 GLint sumL = 0, sumR = 0;
755
756 /* Our solution here is to find the darkest and brightest colors in
757 * the 4x4 tile and use those as the two representative colors.
758 * There are probably better algorithms to use (histogram-based).
759 */
760 minSum = 2000; /* big enough */
761 maxSum = -1; /* small enough */
762 for (k = 0; k < N_TEXELS / 2; k++) {
763 GLint sum = 0;
764 for (i = 0; i < n_comp; i++) {
765 sum += input[k][i];
766 }
767 if (minSum > sum) {
768 minSum = sum;
769 minColL = k;
770 }
771 if (maxSum < sum) {
772 maxSum = sum;
773 maxColL = k;
774 }
775 sumL += sum;
776 }
777 minSum = 2000; /* big enough */
778 maxSum = -1; /* small enough */
779 for (; k < N_TEXELS; k++) {
780 GLint sum = 0;
781 for (i = 0; i < n_comp; i++) {
782 sum += input[k][i];
783 }
784 if (minSum > sum) {
785 minSum = sum;
786 minColR = k;
787 }
788 if (maxSum < sum) {
789 maxSum = sum;
790 maxColR = k;
791 }
792 sumR += sum;
793 }
794
795 /* choose the common vector (yuck!) */
796 {
797 GLint j1, j2;
798 GLint v1 = 0, v2 = 0;
799 GLfloat err = 1e9; /* big enough */
800 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
801 for (i = 0; i < n_comp; i++) {
802 tv[0][i] = input[minColL][i];
803 tv[1][i] = input[maxColL][i];
804 tv[2][i] = input[minColR][i];
805 tv[3][i] = input[maxColR][i];
806 }
807 for (j1 = 0; j1 < 2; j1++) {
808 for (j2 = 2; j2 < 4; j2++) {
809 GLfloat e = 0.0F;
810 for (i = 0; i < n_comp; i++) {
811 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
812 }
813 if (e < err) {
814 err = e;
815 v1 = j1;
816 v2 = j2;
817 }
818 }
819 }
820 for (i = 0; i < n_comp; i++) {
821 vec[0][i] = tv[1 - v1][i];
822 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
823 vec[2][i] = tv[5 - v2][i];
824 }
825 }
826
827 /* left microtile */
828 cc[0] = 0;
829 if (minColL != maxColL) {
830 /* compute interpolation vector */
831 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
832
833 /* add in texels */
834 lolo = 0;
835 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
836 GLint texel;
837 /* interpolate color */
838 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
839 /* add in texel */
840 lolo <<= 2;
841 lolo |= texel;
842 }
843
844 cc[0] = lolo;
845 }
846
847 /* right microtile */
848 cc[1] = 0;
849 if (minColR != maxColR) {
850 /* compute interpolation vector */
851 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
852
853 /* add in texels */
854 lohi = 0;
855 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
856 GLint texel;
857 /* interpolate color */
858 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
859 /* add in texel */
860 lohi <<= 2;
861 lohi |= texel;
862 }
863
864 cc[1] = lohi;
865 }
866
867 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
868 for (j = n_vect - 1; j >= 0; j--) {
869 /* add in alphas */
870 FX64_SHL(hi, 5);
871 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
872 }
873 for (j = n_vect - 1; j >= 0; j--) {
874 for (i = 0; i < n_comp - 1; i++) {
875 /* add in colors */
876 FX64_SHL(hi, 5);
877 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
878 }
879 }
880 ((Fx64 *)cc)[1] = hi;
881 }
882
883
884 static void
885 fxt1_quantize_HI (GLuint *cc,
886 GLubyte input[N_TEXELS][MAX_COMP],
887 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
888 {
889 const GLint n_vect = 6; /* highest vector number */
890 const GLint n_comp = 3; /* 3 components: R, G, B */
891 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
892 GLfloat iv[MAX_COMP]; /* interpolation vector */
893 GLint i, k;
894 GLuint hihi; /* high quadword: hi dword */
895
896 GLint minSum = 2000; /* big enough */
897 GLint maxSum = -1; /* small enough */
898 GLint minCol = 0; /* phoudoin: silent compiler! */
899 GLint maxCol = 0; /* phoudoin: silent compiler! */
900
901 /* Our solution here is to find the darkest and brightest colors in
902 * the 8x4 tile and use those as the two representative colors.
903 * There are probably better algorithms to use (histogram-based).
904 */
905 for (k = 0; k < n; k++) {
906 GLint sum = 0;
907 for (i = 0; i < n_comp; i++) {
908 sum += reord[k][i];
909 }
910 if (minSum > sum) {
911 minSum = sum;
912 minCol = k;
913 }
914 if (maxSum < sum) {
915 maxSum = sum;
916 maxCol = k;
917 }
918 }
919
920 hihi = 0; /* cc-hi = "00" */
921 for (i = 0; i < n_comp; i++) {
922 /* add in colors */
923 hihi <<= 5;
924 hihi |= reord[maxCol][i] >> 3;
925 }
926 for (i = 0; i < n_comp; i++) {
927 /* add in colors */
928 hihi <<= 5;
929 hihi |= reord[minCol][i] >> 3;
930 }
931 cc[3] = hihi;
932 cc[0] = cc[1] = cc[2] = 0;
933
934 /* compute interpolation vector */
935 if (minCol != maxCol) {
936 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
937 }
938
939 /* add in texels */
940 for (k = N_TEXELS - 1; k >= 0; k--) {
941 GLint t = k * 3;
942 GLuint *kk = (GLuint *)((char *)cc + t / 8);
943 GLint texel = n_vect + 1; /* transparent black */
944
945 if (!ISTBLACK(input[k])) {
946 if (minCol != maxCol) {
947 /* interpolate color */
948 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
949 /* add in texel */
950 kk[0] |= texel << (t & 7);
951 }
952 } else {
953 /* add in texel */
954 kk[0] |= texel << (t & 7);
955 }
956 }
957 }
958
959
960 static void
961 fxt1_quantize_MIXED1 (GLuint *cc,
962 GLubyte input[N_TEXELS][MAX_COMP])
963 {
964 const GLint n_vect = 2; /* highest vector number in each microtile */
965 const GLint n_comp = 3; /* 3 components: R, G, B */
966 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
967 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
968 GLint i, j, k;
969 Fx64 hi; /* high quadword */
970 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
971
972 GLint minSum;
973 GLint maxSum;
974 GLint minColL = 0, maxColL = -1;
975 GLint minColR = 0, maxColR = -1;
976
977 /* Our solution here is to find the darkest and brightest colors in
978 * the 4x4 tile and use those as the two representative colors.
979 * There are probably better algorithms to use (histogram-based).
980 */
981 minSum = 2000; /* big enough */
982 maxSum = -1; /* small enough */
983 for (k = 0; k < N_TEXELS / 2; k++) {
984 if (!ISTBLACK(input[k])) {
985 GLint sum = 0;
986 for (i = 0; i < n_comp; i++) {
987 sum += input[k][i];
988 }
989 if (minSum > sum) {
990 minSum = sum;
991 minColL = k;
992 }
993 if (maxSum < sum) {
994 maxSum = sum;
995 maxColL = k;
996 }
997 }
998 }
999 minSum = 2000; /* big enough */
1000 maxSum = -1; /* small enough */
1001 for (; k < N_TEXELS; k++) {
1002 if (!ISTBLACK(input[k])) {
1003 GLint sum = 0;
1004 for (i = 0; i < n_comp; i++) {
1005 sum += input[k][i];
1006 }
1007 if (minSum > sum) {
1008 minSum = sum;
1009 minColR = k;
1010 }
1011 if (maxSum < sum) {
1012 maxSum = sum;
1013 maxColR = k;
1014 }
1015 }
1016 }
1017
1018 /* left microtile */
1019 if (maxColL == -1) {
1020 /* all transparent black */
1021 cc[0] = ~0u;
1022 for (i = 0; i < n_comp; i++) {
1023 vec[0][i] = 0;
1024 vec[1][i] = 0;
1025 }
1026 } else {
1027 cc[0] = 0;
1028 for (i = 0; i < n_comp; i++) {
1029 vec[0][i] = input[minColL][i];
1030 vec[1][i] = input[maxColL][i];
1031 }
1032 if (minColL != maxColL) {
1033 /* compute interpolation vector */
1034 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1035
1036 /* add in texels */
1037 lolo = 0;
1038 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1039 GLint texel = n_vect + 1; /* transparent black */
1040 if (!ISTBLACK(input[k])) {
1041 /* interpolate color */
1042 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1043 }
1044 /* add in texel */
1045 lolo <<= 2;
1046 lolo |= texel;
1047 }
1048 cc[0] = lolo;
1049 }
1050 }
1051
1052 /* right microtile */
1053 if (maxColR == -1) {
1054 /* all transparent black */
1055 cc[1] = ~0u;
1056 for (i = 0; i < n_comp; i++) {
1057 vec[2][i] = 0;
1058 vec[3][i] = 0;
1059 }
1060 } else {
1061 cc[1] = 0;
1062 for (i = 0; i < n_comp; i++) {
1063 vec[2][i] = input[minColR][i];
1064 vec[3][i] = input[maxColR][i];
1065 }
1066 if (minColR != maxColR) {
1067 /* compute interpolation vector */
1068 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1069
1070 /* add in texels */
1071 lohi = 0;
1072 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1073 GLint texel = n_vect + 1; /* transparent black */
1074 if (!ISTBLACK(input[k])) {
1075 /* interpolate color */
1076 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1077 }
1078 /* add in texel */
1079 lohi <<= 2;
1080 lohi |= texel;
1081 }
1082 cc[1] = lohi;
1083 }
1084 }
1085
1086 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1087 for (j = 2 * 2 - 1; j >= 0; j--) {
1088 for (i = 0; i < n_comp; i++) {
1089 /* add in colors */
1090 FX64_SHL(hi, 5);
1091 FX64_OR32(hi, vec[j][i] >> 3);
1092 }
1093 }
1094 ((Fx64 *)cc)[1] = hi;
1095 }
1096
1097
1098 static void
1099 fxt1_quantize_MIXED0 (GLuint *cc,
1100 GLubyte input[N_TEXELS][MAX_COMP])
1101 {
1102 const GLint n_vect = 3; /* highest vector number in each microtile */
1103 const GLint n_comp = 3; /* 3 components: R, G, B */
1104 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1105 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1106 GLint i, j, k;
1107 Fx64 hi; /* high quadword */
1108 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1109
1110 GLint minColL = 0, maxColL = 0;
1111 GLint minColR = 0, maxColR = 0;
1112 #if 0
1113 GLint minSum;
1114 GLint maxSum;
1115
1116 /* Our solution here is to find the darkest and brightest colors in
1117 * the 4x4 tile and use those as the two representative colors.
1118 * There are probably better algorithms to use (histogram-based).
1119 */
1120 minSum = 2000; /* big enough */
1121 maxSum = -1; /* small enough */
1122 for (k = 0; k < N_TEXELS / 2; k++) {
1123 GLint sum = 0;
1124 for (i = 0; i < n_comp; i++) {
1125 sum += input[k][i];
1126 }
1127 if (minSum > sum) {
1128 minSum = sum;
1129 minColL = k;
1130 }
1131 if (maxSum < sum) {
1132 maxSum = sum;
1133 maxColL = k;
1134 }
1135 }
1136 minSum = 2000; /* big enough */
1137 maxSum = -1; /* small enough */
1138 for (; k < N_TEXELS; k++) {
1139 GLint sum = 0;
1140 for (i = 0; i < n_comp; i++) {
1141 sum += input[k][i];
1142 }
1143 if (minSum > sum) {
1144 minSum = sum;
1145 minColR = k;
1146 }
1147 if (maxSum < sum) {
1148 maxSum = sum;
1149 maxColR = k;
1150 }
1151 }
1152 #else
1153 GLint minVal;
1154 GLint maxVal;
1155 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1156 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1157
1158 /* Scan the channel with max variance for lo & hi
1159 * and use those as the two representative colors.
1160 */
1161 minVal = 2000; /* big enough */
1162 maxVal = -1; /* small enough */
1163 for (k = 0; k < N_TEXELS / 2; k++) {
1164 GLint t = input[k][maxVarL];
1165 if (minVal > t) {
1166 minVal = t;
1167 minColL = k;
1168 }
1169 if (maxVal < t) {
1170 maxVal = t;
1171 maxColL = k;
1172 }
1173 }
1174 minVal = 2000; /* big enough */
1175 maxVal = -1; /* small enough */
1176 for (; k < N_TEXELS; k++) {
1177 GLint t = input[k][maxVarR];
1178 if (minVal > t) {
1179 minVal = t;
1180 minColR = k;
1181 }
1182 if (maxVal < t) {
1183 maxVal = t;
1184 maxColR = k;
1185 }
1186 }
1187 #endif
1188
1189 /* left microtile */
1190 cc[0] = 0;
1191 for (i = 0; i < n_comp; i++) {
1192 vec[0][i] = input[minColL][i];
1193 vec[1][i] = input[maxColL][i];
1194 }
1195 if (minColL != maxColL) {
1196 /* compute interpolation vector */
1197 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1198
1199 /* add in texels */
1200 lolo = 0;
1201 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1202 GLint texel;
1203 /* interpolate color */
1204 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1205 /* add in texel */
1206 lolo <<= 2;
1207 lolo |= texel;
1208 }
1209
1210 /* funky encoding for LSB of green */
1211 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1212 for (i = 0; i < n_comp; i++) {
1213 vec[1][i] = input[minColL][i];
1214 vec[0][i] = input[maxColL][i];
1215 }
1216 lolo = ~lolo;
1217 }
1218
1219 cc[0] = lolo;
1220 }
1221
1222 /* right microtile */
1223 cc[1] = 0;
1224 for (i = 0; i < n_comp; i++) {
1225 vec[2][i] = input[minColR][i];
1226 vec[3][i] = input[maxColR][i];
1227 }
1228 if (minColR != maxColR) {
1229 /* compute interpolation vector */
1230 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1231
1232 /* add in texels */
1233 lohi = 0;
1234 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1235 GLint texel;
1236 /* interpolate color */
1237 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1238 /* add in texel */
1239 lohi <<= 2;
1240 lohi |= texel;
1241 }
1242
1243 /* funky encoding for LSB of green */
1244 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1245 for (i = 0; i < n_comp; i++) {
1246 vec[3][i] = input[minColR][i];
1247 vec[2][i] = input[maxColR][i];
1248 }
1249 lohi = ~lohi;
1250 }
1251
1252 cc[1] = lohi;
1253 }
1254
1255 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1256 for (j = 2 * 2 - 1; j >= 0; j--) {
1257 for (i = 0; i < n_comp; i++) {
1258 /* add in colors */
1259 FX64_SHL(hi, 5);
1260 FX64_OR32(hi, vec[j][i] >> 3);
1261 }
1262 }
1263 ((Fx64 *)cc)[1] = hi;
1264 }
1265
1266
1267 static void
1268 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1269 {
1270 GLint trualpha;
1271 GLubyte reord[N_TEXELS][MAX_COMP];
1272
1273 GLubyte input[N_TEXELS][MAX_COMP];
1274 GLint i, k, l;
1275
1276 if (comps == 3) {
1277 /* make the whole block opaque */
1278 _mesa_memset(input, -1, sizeof(input));
1279 }
1280
1281 /* 8 texels each line */
1282 for (l = 0; l < 4; l++) {
1283 for (k = 0; k < 4; k++) {
1284 for (i = 0; i < comps; i++) {
1285 input[k + l * 4][i] = *lines[l]++;
1286 }
1287 }
1288 for (; k < 8; k++) {
1289 for (i = 0; i < comps; i++) {
1290 input[k + l * 4 + 12][i] = *lines[l]++;
1291 }
1292 }
1293 }
1294
1295 /* block layout:
1296 * 00, 01, 02, 03, 08, 09, 0a, 0b
1297 * 10, 11, 12, 13, 18, 19, 1a, 1b
1298 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1299 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1300 */
1301
1302 /* [dBorca]
1303 * stupidity flows forth from this
1304 */
1305 l = N_TEXELS;
1306 trualpha = 0;
1307 if (comps == 4) {
1308 /* skip all transparent black texels */
1309 l = 0;
1310 for (k = 0; k < N_TEXELS; k++) {
1311 /* test all components against 0 */
1312 if (!ISTBLACK(input[k])) {
1313 /* texel is not transparent black */
1314 COPY_4UBV(reord[l], input[k]);
1315 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1316 /* non-opaque texel */
1317 trualpha = !0;
1318 }
1319 l++;
1320 }
1321 }
1322 }
1323
1324 #if 0
1325 if (trualpha) {
1326 fxt1_quantize_ALPHA0(cc, input, reord, l);
1327 } else if (l == 0) {
1328 cc[0] = cc[1] = cc[2] = -1;
1329 cc[3] = 0;
1330 } else if (l < N_TEXELS) {
1331 fxt1_quantize_HI(cc, input, reord, l);
1332 } else {
1333 fxt1_quantize_CHROMA(cc, input);
1334 }
1335 (void)fxt1_quantize_ALPHA1;
1336 (void)fxt1_quantize_MIXED1;
1337 (void)fxt1_quantize_MIXED0;
1338 #else
1339 if (trualpha) {
1340 fxt1_quantize_ALPHA1(cc, input);
1341 } else if (l == 0) {
1342 cc[0] = cc[1] = cc[2] = ~0u;
1343 cc[3] = 0;
1344 } else if (l < N_TEXELS) {
1345 fxt1_quantize_MIXED1(cc, input);
1346 } else {
1347 fxt1_quantize_MIXED0(cc, input);
1348 }
1349 (void)fxt1_quantize_ALPHA0;
1350 (void)fxt1_quantize_HI;
1351 (void)fxt1_quantize_CHROMA;
1352 #endif
1353 }
1354
1355
1356 static void
1357 fxt1_encode (GLuint width, GLuint height, GLint comps,
1358 const void *source, GLint srcRowStride,
1359 void *dest, GLint destRowStride)
1360 {
1361 GLuint x, y;
1362 const GLubyte *data;
1363 GLuint *encoded = (GLuint *)dest;
1364 void *newSource = NULL;
1365
1366 assert(comps == 3 || comps == 4);
1367
1368 /* Replicate image if width is not M8 or height is not M4 */
1369 if ((width & 7) | (height & 3)) {
1370 GLint newWidth = (width + 7) & ~7;
1371 GLint newHeight = (height + 3) & ~3;
1372 newSource = _mesa_malloc(comps * newWidth * newHeight * sizeof(GLchan));
1373 if (!newSource) {
1374 GET_CURRENT_CONTEXT(ctx);
1375 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1376 goto cleanUp;
1377 }
1378 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1379 comps, (const GLchan *) source,
1380 srcRowStride, (GLchan *) newSource);
1381 source = newSource;
1382 width = newWidth;
1383 height = newHeight;
1384 srcRowStride = comps * newWidth;
1385 }
1386
1387 /* convert from 16/32-bit channels to GLubyte if needed */
1388 if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1389 const GLuint n = width * height * comps;
1390 const GLchan *src = (const GLchan *) source;
1391 GLubyte *dest = (GLubyte *) _mesa_malloc(n * sizeof(GLubyte));
1392 GLuint i;
1393 if (!dest) {
1394 GET_CURRENT_CONTEXT(ctx);
1395 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1396 goto cleanUp;
1397 }
1398 for (i = 0; i < n; i++) {
1399 dest[i] = CHAN_TO_UBYTE(src[i]);
1400 }
1401 if (newSource != NULL) {
1402 _mesa_free(newSource);
1403 }
1404 newSource = dest; /* we'll free this buffer before returning */
1405 source = dest; /* the new, GLubyte incoming image */
1406 }
1407
1408 data = (const GLubyte *) source;
1409 destRowStride = (destRowStride - width * 2) / 4;
1410 for (y = 0; y < height; y += 4) {
1411 GLuint offs = 0 + (y + 0) * srcRowStride;
1412 for (x = 0; x < width; x += 8) {
1413 const GLubyte *lines[4];
1414 lines[0] = &data[offs];
1415 lines[1] = lines[0] + srcRowStride;
1416 lines[2] = lines[1] + srcRowStride;
1417 lines[3] = lines[2] + srcRowStride;
1418 offs += 8 * comps;
1419 fxt1_quantize(encoded, lines, comps);
1420 /* 128 bits per 8x4 block */
1421 encoded += 4;
1422 }
1423 encoded += destRowStride;
1424 }
1425
1426 cleanUp:
1427 if (newSource != NULL) {
1428 _mesa_free(newSource);
1429 }
1430 }
1431
1432
1433 /***************************************************************************\
1434 * FXT1 decoder
1435 *
1436 * The decoder is based on GL_3DFX_texture_compression_FXT1
1437 * specification and serves as a concept for the encoder.
1438 \***************************************************************************/
1439
1440
1441 /* lookup table for scaling 5 bit colors up to 8 bits */
1442 static const GLubyte _rgb_scale_5[] = {
1443 0, 8, 16, 25, 33, 41, 49, 58,
1444 66, 74, 82, 90, 99, 107, 115, 123,
1445 132, 140, 148, 156, 165, 173, 181, 189,
1446 197, 206, 214, 222, 230, 239, 247, 255
1447 };
1448
1449 /* lookup table for scaling 6 bit colors up to 8 bits */
1450 static const GLubyte _rgb_scale_6[] = {
1451 0, 4, 8, 12, 16, 20, 24, 28,
1452 32, 36, 40, 45, 49, 53, 57, 61,
1453 65, 69, 73, 77, 81, 85, 89, 93,
1454 97, 101, 105, 109, 113, 117, 121, 125,
1455 130, 134, 138, 142, 146, 150, 154, 158,
1456 162, 166, 170, 174, 178, 182, 186, 190,
1457 194, 198, 202, 206, 210, 215, 219, 223,
1458 227, 231, 235, 239, 243, 247, 251, 255
1459 };
1460
1461
1462 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1463 #define UP5(c) _rgb_scale_5[(c) & 31]
1464 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1465 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1466
1467
1468 static void
1469 fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1470 {
1471 const GLuint *cc;
1472
1473 t *= 3;
1474 cc = (const GLuint *)(code + t / 8);
1475 t = (cc[0] >> (t & 7)) & 7;
1476
1477 if (t == 7) {
1478 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1479 } else {
1480 GLubyte r, g, b;
1481 cc = (const GLuint *)(code + 12);
1482 if (t == 0) {
1483 b = UP5(CC_SEL(cc, 0));
1484 g = UP5(CC_SEL(cc, 5));
1485 r = UP5(CC_SEL(cc, 10));
1486 } else if (t == 6) {
1487 b = UP5(CC_SEL(cc, 15));
1488 g = UP5(CC_SEL(cc, 20));
1489 r = UP5(CC_SEL(cc, 25));
1490 } else {
1491 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1492 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1493 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1494 }
1495 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1496 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1497 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1498 rgba[ACOMP] = CHAN_MAX;
1499 }
1500 }
1501
1502
1503 static void
1504 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1505 {
1506 const GLuint *cc;
1507 GLuint kk;
1508
1509 cc = (const GLuint *)code;
1510 if (t & 16) {
1511 cc++;
1512 t &= 15;
1513 }
1514 t = (cc[0] >> (t * 2)) & 3;
1515
1516 t *= 15;
1517 cc = (const GLuint *)(code + 8 + t / 8);
1518 kk = cc[0] >> (t & 7);
1519 rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1520 rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1521 rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1522 rgba[ACOMP] = CHAN_MAX;
1523 }
1524
1525
1526 static void
1527 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1528 {
1529 const GLuint *cc;
1530 GLuint col[2][3];
1531 GLint glsb, selb;
1532
1533 cc = (const GLuint *)code;
1534 if (t & 16) {
1535 t &= 15;
1536 t = (cc[1] >> (t * 2)) & 3;
1537 /* col 2 */
1538 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1539 col[0][GCOMP] = CC_SEL(cc, 99);
1540 col[0][RCOMP] = CC_SEL(cc, 104);
1541 /* col 3 */
1542 col[1][BCOMP] = CC_SEL(cc, 109);
1543 col[1][GCOMP] = CC_SEL(cc, 114);
1544 col[1][RCOMP] = CC_SEL(cc, 119);
1545 glsb = CC_SEL(cc, 126);
1546 selb = CC_SEL(cc, 33);
1547 } else {
1548 t = (cc[0] >> (t * 2)) & 3;
1549 /* col 0 */
1550 col[0][BCOMP] = CC_SEL(cc, 64);
1551 col[0][GCOMP] = CC_SEL(cc, 69);
1552 col[0][RCOMP] = CC_SEL(cc, 74);
1553 /* col 1 */
1554 col[1][BCOMP] = CC_SEL(cc, 79);
1555 col[1][GCOMP] = CC_SEL(cc, 84);
1556 col[1][RCOMP] = CC_SEL(cc, 89);
1557 glsb = CC_SEL(cc, 125);
1558 selb = CC_SEL(cc, 1);
1559 }
1560
1561 if (CC_SEL(cc, 124) & 1) {
1562 /* alpha[0] == 1 */
1563
1564 if (t == 3) {
1565 /* zero */
1566 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1567 } else {
1568 GLubyte r, g, b;
1569 if (t == 0) {
1570 b = UP5(col[0][BCOMP]);
1571 g = UP5(col[0][GCOMP]);
1572 r = UP5(col[0][RCOMP]);
1573 } else if (t == 2) {
1574 b = UP5(col[1][BCOMP]);
1575 g = UP6(col[1][GCOMP], glsb);
1576 r = UP5(col[1][RCOMP]);
1577 } else {
1578 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1579 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1580 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1581 }
1582 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1583 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1584 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1585 rgba[ACOMP] = CHAN_MAX;
1586 }
1587 } else {
1588 /* alpha[0] == 0 */
1589 GLubyte r, g, b;
1590 if (t == 0) {
1591 b = UP5(col[0][BCOMP]);
1592 g = UP6(col[0][GCOMP], glsb ^ selb);
1593 r = UP5(col[0][RCOMP]);
1594 } else if (t == 3) {
1595 b = UP5(col[1][BCOMP]);
1596 g = UP6(col[1][GCOMP], glsb);
1597 r = UP5(col[1][RCOMP]);
1598 } else {
1599 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1600 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1601 UP6(col[1][GCOMP], glsb));
1602 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1603 }
1604 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1605 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1606 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1607 rgba[ACOMP] = CHAN_MAX;
1608 }
1609 }
1610
1611
1612 static void
1613 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1614 {
1615 const GLuint *cc;
1616 GLubyte r, g, b, a;
1617
1618 cc = (const GLuint *)code;
1619 if (CC_SEL(cc, 124) & 1) {
1620 /* lerp == 1 */
1621 GLuint col0[4];
1622
1623 if (t & 16) {
1624 t &= 15;
1625 t = (cc[1] >> (t * 2)) & 3;
1626 /* col 2 */
1627 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1628 col0[GCOMP] = CC_SEL(cc, 99);
1629 col0[RCOMP] = CC_SEL(cc, 104);
1630 col0[ACOMP] = CC_SEL(cc, 119);
1631 } else {
1632 t = (cc[0] >> (t * 2)) & 3;
1633 /* col 0 */
1634 col0[BCOMP] = CC_SEL(cc, 64);
1635 col0[GCOMP] = CC_SEL(cc, 69);
1636 col0[RCOMP] = CC_SEL(cc, 74);
1637 col0[ACOMP] = CC_SEL(cc, 109);
1638 }
1639
1640 if (t == 0) {
1641 b = UP5(col0[BCOMP]);
1642 g = UP5(col0[GCOMP]);
1643 r = UP5(col0[RCOMP]);
1644 a = UP5(col0[ACOMP]);
1645 } else if (t == 3) {
1646 b = UP5(CC_SEL(cc, 79));
1647 g = UP5(CC_SEL(cc, 84));
1648 r = UP5(CC_SEL(cc, 89));
1649 a = UP5(CC_SEL(cc, 114));
1650 } else {
1651 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1652 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1653 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1654 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1655 }
1656 } else {
1657 /* lerp == 0 */
1658
1659 if (t & 16) {
1660 cc++;
1661 t &= 15;
1662 }
1663 t = (cc[0] >> (t * 2)) & 3;
1664
1665 if (t == 3) {
1666 /* zero */
1667 r = g = b = a = 0;
1668 } else {
1669 GLuint kk;
1670 cc = (const GLuint *)code;
1671 a = UP5(cc[3] >> (t * 5 + 13));
1672 t *= 15;
1673 cc = (const GLuint *)(code + 8 + t / 8);
1674 kk = cc[0] >> (t & 7);
1675 b = UP5(kk);
1676 g = UP5(kk >> 5);
1677 r = UP5(kk >> 10);
1678 }
1679 }
1680 rgba[RCOMP] = UBYTE_TO_CHAN(r);
1681 rgba[GCOMP] = UBYTE_TO_CHAN(g);
1682 rgba[BCOMP] = UBYTE_TO_CHAN(b);
1683 rgba[ACOMP] = UBYTE_TO_CHAN(a);
1684 }
1685
1686
1687 void
1688 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1689 GLint i, GLint j, GLchan *rgba)
1690 {
1691 static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1692 fxt1_decode_1HI, /* cc-high = "00?" */
1693 fxt1_decode_1HI, /* cc-high = "00?" */
1694 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1695 fxt1_decode_1ALPHA, /* alpha = "011" */
1696 fxt1_decode_1MIXED, /* mixed = "1??" */
1697 fxt1_decode_1MIXED, /* mixed = "1??" */
1698 fxt1_decode_1MIXED, /* mixed = "1??" */
1699 fxt1_decode_1MIXED /* mixed = "1??" */
1700 };
1701
1702 const GLubyte *code = (const GLubyte *)texture +
1703 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1704 GLint mode = CC_SEL(code, 125);
1705 GLint t = i & 7;
1706
1707 if (t & 4) {
1708 t += 12;
1709 }
1710 t += (j & 3) * 4;
1711
1712 decode_1[mode](code, t, rgba);
1713 }