2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
44 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
45 const void *source
, GLint srcRowStride
,
46 void *dest
, GLint destRowStride
);
49 fxt1_decode_1 (const void *texture
, GLint stride
,
50 GLint i
, GLint j
, GLubyte
*rgba
);
54 * Store user's image in rgb_fxt1 format.
57 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
59 const GLubyte
*pixels
;
62 const GLubyte
*tempImage
= NULL
;
64 ASSERT(dstFormat
== MESA_FORMAT_RGB_FXT1
);
66 if (srcFormat
!= GL_RGB
||
67 srcType
!= GL_UNSIGNED_BYTE
||
68 ctx
->_ImageTransferState
||
69 srcPacking
->RowLength
!= srcWidth
||
70 srcPacking
->SwapBytes
) {
71 /* convert image to RGB/GLubyte */
72 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
74 _mesa_get_format_base_format(dstFormat
),
75 srcWidth
, srcHeight
, srcDepth
,
76 srcFormat
, srcType
, srcAddr
,
79 return GL_FALSE
; /* out of memory */
81 srcRowStride
= 3 * srcWidth
;
85 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
86 srcFormat
, srcType
, 0, 0);
88 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
89 srcType
) / sizeof(GLubyte
);
94 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
97 free((void*) tempImage
);
104 * Store user's image in rgba_fxt1 format.
107 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
109 const GLubyte
*pixels
;
112 const GLubyte
*tempImage
= NULL
;
114 ASSERT(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
116 if (srcFormat
!= GL_RGBA
||
117 srcType
!= GL_UNSIGNED_BYTE
||
118 ctx
->_ImageTransferState
||
119 srcPacking
->SwapBytes
) {
120 /* convert image to RGBA/GLubyte */
121 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
123 _mesa_get_format_base_format(dstFormat
),
124 srcWidth
, srcHeight
, srcDepth
,
125 srcFormat
, srcType
, srcAddr
,
128 return GL_FALSE
; /* out of memory */
130 srcRowStride
= 4 * srcWidth
;
134 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
135 srcFormat
, srcType
, 0, 0);
137 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
138 srcType
) / sizeof(GLubyte
);
143 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
146 free((void*) tempImage
);
152 /***************************************************************************\
155 * The encoder was built by reversing the decoder,
156 * and is vaguely based on Texus2 by 3dfx. Note that this code
157 * is merely a proof of concept, since it is highly UNoptimized;
158 * moreover, it is sub-optimal due to initial conditions passed
159 * to Lloyd's algorithm (the interpolation modes are even worse).
160 \***************************************************************************/
163 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
164 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
165 #define N_TEXELS 32 /* number of texels in a block (always 32) */
166 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
167 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
168 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
169 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
170 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
174 * Define a 64-bit unsigned integer type and macros
178 #define FX64_NATIVE 1
180 typedef uint64_t Fx64
;
182 #define FX64_MOV32(a, b) a = b
183 #define FX64_OR32(a, b) a |= b
184 #define FX64_SHL(a, c) a <<= c
188 #define FX64_NATIVE 0
194 #define FX64_MOV32(a, b) a.lo = b
195 #define FX64_OR32(a, b) a.lo |= b
197 #define FX64_SHL(a, c) \
200 a.hi = a.lo << ((c) - 32); \
203 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
211 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
212 #define SAFECDOT 1 /* for paranoids */
214 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
216 /* compute interpolation vector */ \
220 for (i = 0; i < NC; i++) { \
221 IV[i] = (V1[i] - V0[i]) * F(i); \
222 d2 += IV[i] * IV[i]; \
224 rd2 = (GLfloat)NV / d2; \
226 for (i = 0; i < NC; i++) { \
228 B -= IV[i] * V0[i]; \
231 B = B * rd2 + 0.5f; \
234 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
236 GLfloat dot = 0.0F; \
237 for (i = 0; i < NC; i++) { \
238 dot += V[i] * IV[i]; \
240 TEXEL = (GLint)(dot + B); \
244 } else if (TEXEL > NV) { \
252 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
253 GLubyte input
[MAX_COMP
], GLint nc
)
255 GLint i
, j
, best
= -1;
256 GLfloat err
= 1e9
; /* big enough */
258 for (j
= 0; j
< nv
; j
++) {
260 for (i
= 0; i
< nc
; i
++) {
261 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
274 fxt1_worst (GLfloat vec
[MAX_COMP
],
275 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
277 GLint i
, k
, worst
= -1;
278 GLfloat err
= -1.0F
; /* small enough */
280 for (k
= 0; k
< n
; k
++) {
282 for (i
= 0; i
< nc
; i
++) {
283 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
296 fxt1_variance (GLdouble variance
[MAX_COMP
],
297 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
299 GLint i
, k
, best
= 0;
301 GLdouble var
, maxvar
= -1; /* small enough */
302 GLdouble teenth
= 1.0 / n
;
304 for (i
= 0; i
< nc
; i
++) {
306 for (k
= 0; k
< n
; k
++) {
307 GLint t
= input
[k
][i
];
311 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
326 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
327 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
330 /* Choose colors from a grid.
334 for (j
= 0; j
< nv
; j
++) {
335 GLint m
= j
* (n
- 1) / (nv
- 1);
336 for (i
= 0; i
< nc
; i
++) {
337 vec
[j
][i
] = input
[m
][i
];
341 /* Our solution here is to find the darkest and brightest colors in
342 * the 8x4 tile and use those as the two representative colors.
343 * There are probably better algorithms to use (histogram-based).
346 GLint minSum
= 2000; /* big enough */
347 GLint maxSum
= -1; /* small enough */
348 GLint minCol
= 0; /* phoudoin: silent compiler! */
349 GLint maxCol
= 0; /* phoudoin: silent compiler! */
359 memset(hist
, 0, sizeof(hist
));
361 for (k
= 0; k
< n
; k
++) {
365 for (i
= 0; i
< nc
; i
++) {
370 for (l
= 0; l
< n
; l
++) {
379 } else if (hist
[l
].key
== key
) {
395 for (j
= 0; j
< lenh
; j
++) {
396 for (i
= 0; i
< nc
; i
++) {
397 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
400 for (; j
< nv
; j
++) {
401 for (i
= 0; i
< nc
; i
++) {
402 vec
[j
][i
] = vec
[0][i
];
408 for (j
= 0; j
< nv
; j
++) {
409 for (i
= 0; i
< nc
; i
++) {
410 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
420 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
421 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
423 /* Use the generalized lloyd's algorithm for VQ:
424 * find 4 color vectors.
426 * for each sample color
427 * sort to nearest vector.
429 * replace each vector with the centroid of its matching colors.
431 * repeat until RMS doesn't improve.
433 * if a color vector has no samples, or becomes the same as another
434 * vector, replace it with the color which is farthest from a sample.
436 * vec[][MAX_COMP] initial vectors and resulting colors
437 * nv number of resulting colors required
438 * input[N_TEXELS][MAX_COMP] input texels
439 * nc number of components in input / vec
440 * n number of input samples
443 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
444 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
445 GLfloat error
, lasterror
= 1e9
;
450 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
451 /* reset sums & counters */
452 for (j
= 0; j
< nv
; j
++) {
453 for (i
= 0; i
< nc
; i
++) {
460 /* scan whole block */
461 for (k
= 0; k
< n
; k
++) {
464 GLfloat err
= 1e9
; /* big enough */
465 /* determine best vector */
466 for (j
= 0; j
< nv
; j
++) {
467 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
468 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
469 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
471 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
479 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
482 /* add in closest color */
483 for (i
= 0; i
< nc
; i
++) {
484 sum
[best
][i
] += input
[k
][i
];
486 /* mark this vector as used */
488 /* accumulate error */
493 if ((error
< LL_RMS_E
) ||
494 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
495 return !0; /* good match */
499 /* move each vector to the barycenter of its closest colors */
500 for (j
= 0; j
< nv
; j
++) {
502 GLfloat div
= 1.0F
/ cnt
[j
];
503 for (i
= 0; i
< nc
; i
++) {
504 vec
[j
][i
] = div
* sum
[j
][i
];
507 /* this vec has no samples or is identical with a previous vec */
508 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
509 for (i
= 0; i
< nc
; i
++) {
510 vec
[j
][i
] = input
[worst
][i
];
516 return 0; /* could not converge fast enough */
521 fxt1_quantize_CHROMA (GLuint
*cc
,
522 GLubyte input
[N_TEXELS
][MAX_COMP
])
524 const GLint n_vect
= 4; /* 4 base vectors to find */
525 const GLint n_comp
= 3; /* 3 components: R, G, B */
526 GLfloat vec
[MAX_VECT
][MAX_COMP
];
528 Fx64 hi
; /* high quadword */
529 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
531 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
532 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
535 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
536 for (j
= n_vect
- 1; j
>= 0; j
--) {
537 for (i
= 0; i
< n_comp
; i
++) {
540 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
543 ((Fx64
*)cc
)[1] = hi
;
546 /* right microtile */
547 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
549 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
552 for (; k
>= 0; k
--) {
554 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
562 fxt1_quantize_ALPHA0 (GLuint
*cc
,
563 GLubyte input
[N_TEXELS
][MAX_COMP
],
564 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
566 const GLint n_vect
= 3; /* 3 base vectors to find */
567 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
568 GLfloat vec
[MAX_VECT
][MAX_COMP
];
570 Fx64 hi
; /* high quadword */
571 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
573 /* the last vector indicates zero */
574 for (i
= 0; i
< n_comp
; i
++) {
578 /* the first n texels in reord are guaranteed to be non-zero */
579 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
580 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
583 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
584 for (j
= n_vect
- 1; j
>= 0; j
--) {
587 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
589 for (j
= n_vect
- 1; j
>= 0; j
--) {
590 for (i
= 0; i
< n_comp
- 1; i
++) {
593 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
596 ((Fx64
*)cc
)[1] = hi
;
599 /* right microtile */
600 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
602 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
605 for (; k
>= 0; k
--) {
607 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
615 fxt1_quantize_ALPHA1 (GLuint
*cc
,
616 GLubyte input
[N_TEXELS
][MAX_COMP
])
618 const GLint n_vect
= 3; /* highest vector number in each microtile */
619 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
620 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
621 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
623 Fx64 hi
; /* high quadword */
624 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
628 GLint minColL
= 0, maxColL
= 0;
629 GLint minColR
= 0, maxColR
= 0;
630 GLint sumL
= 0, sumR
= 0;
632 /* Our solution here is to find the darkest and brightest colors in
633 * the 4x4 tile and use those as the two representative colors.
634 * There are probably better algorithms to use (histogram-based).
637 while ((minColL
== maxColL
) && nn_comp
) {
638 minSum
= 2000; /* big enough */
639 maxSum
= -1; /* small enough */
640 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
642 for (i
= 0; i
< nn_comp
; i
++) {
660 while ((minColR
== maxColR
) && nn_comp
) {
661 minSum
= 2000; /* big enough */
662 maxSum
= -1; /* small enough */
663 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
665 for (i
= 0; i
< nn_comp
; i
++) {
682 /* choose the common vector (yuck!) */
685 GLint v1
= 0, v2
= 0;
686 GLfloat err
= 1e9
; /* big enough */
687 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
688 for (i
= 0; i
< n_comp
; i
++) {
689 tv
[0][i
] = input
[minColL
][i
];
690 tv
[1][i
] = input
[maxColL
][i
];
691 tv
[2][i
] = input
[minColR
][i
];
692 tv
[3][i
] = input
[maxColR
][i
];
694 for (j1
= 0; j1
< 2; j1
++) {
695 for (j2
= 2; j2
< 4; j2
++) {
697 for (i
= 0; i
< n_comp
; i
++) {
698 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
707 for (i
= 0; i
< n_comp
; i
++) {
708 vec
[0][i
] = tv
[1 - v1
][i
];
709 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
710 vec
[2][i
] = tv
[5 - v2
][i
];
716 if (minColL
!= maxColL
) {
717 /* compute interpolation vector */
718 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
722 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
724 /* interpolate color */
725 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
734 /* right microtile */
736 if (minColR
!= maxColR
) {
737 /* compute interpolation vector */
738 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
742 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
744 /* interpolate color */
745 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
754 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
755 for (j
= n_vect
- 1; j
>= 0; j
--) {
758 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
760 for (j
= n_vect
- 1; j
>= 0; j
--) {
761 for (i
= 0; i
< n_comp
- 1; i
++) {
764 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
767 ((Fx64
*)cc
)[1] = hi
;
772 fxt1_quantize_HI (GLuint
*cc
,
773 GLubyte input
[N_TEXELS
][MAX_COMP
],
774 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
776 const GLint n_vect
= 6; /* highest vector number */
777 const GLint n_comp
= 3; /* 3 components: R, G, B */
778 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
779 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
781 GLuint hihi
; /* high quadword: hi dword */
783 GLint minSum
= 2000; /* big enough */
784 GLint maxSum
= -1; /* small enough */
785 GLint minCol
= 0; /* phoudoin: silent compiler! */
786 GLint maxCol
= 0; /* phoudoin: silent compiler! */
788 /* Our solution here is to find the darkest and brightest colors in
789 * the 8x4 tile and use those as the two representative colors.
790 * There are probably better algorithms to use (histogram-based).
792 for (k
= 0; k
< n
; k
++) {
794 for (i
= 0; i
< n_comp
; i
++) {
807 hihi
= 0; /* cc-hi = "00" */
808 for (i
= 0; i
< n_comp
; i
++) {
811 hihi
|= reord
[maxCol
][i
] >> 3;
813 for (i
= 0; i
< n_comp
; i
++) {
816 hihi
|= reord
[minCol
][i
] >> 3;
819 cc
[0] = cc
[1] = cc
[2] = 0;
821 /* compute interpolation vector */
822 if (minCol
!= maxCol
) {
823 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
827 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
829 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
830 GLint texel
= n_vect
+ 1; /* transparent black */
832 if (!ISTBLACK(input
[k
])) {
833 if (minCol
!= maxCol
) {
834 /* interpolate color */
835 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
837 kk
[0] |= texel
<< (t
& 7);
841 kk
[0] |= texel
<< (t
& 7);
848 fxt1_quantize_MIXED1 (GLuint
*cc
,
849 GLubyte input
[N_TEXELS
][MAX_COMP
])
851 const GLint n_vect
= 2; /* highest vector number in each microtile */
852 const GLint n_comp
= 3; /* 3 components: R, G, B */
853 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
854 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
856 Fx64 hi
; /* high quadword */
857 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
861 GLint minColL
= 0, maxColL
= -1;
862 GLint minColR
= 0, maxColR
= -1;
864 /* Our solution here is to find the darkest and brightest colors in
865 * the 4x4 tile and use those as the two representative colors.
866 * There are probably better algorithms to use (histogram-based).
868 minSum
= 2000; /* big enough */
869 maxSum
= -1; /* small enough */
870 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
871 if (!ISTBLACK(input
[k
])) {
873 for (i
= 0; i
< n_comp
; i
++) {
886 minSum
= 2000; /* big enough */
887 maxSum
= -1; /* small enough */
888 for (; k
< N_TEXELS
; k
++) {
889 if (!ISTBLACK(input
[k
])) {
891 for (i
= 0; i
< n_comp
; i
++) {
907 /* all transparent black */
909 for (i
= 0; i
< n_comp
; i
++) {
915 for (i
= 0; i
< n_comp
; i
++) {
916 vec
[0][i
] = input
[minColL
][i
];
917 vec
[1][i
] = input
[maxColL
][i
];
919 if (minColL
!= maxColL
) {
920 /* compute interpolation vector */
921 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
925 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
926 GLint texel
= n_vect
+ 1; /* transparent black */
927 if (!ISTBLACK(input
[k
])) {
928 /* interpolate color */
929 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
939 /* right microtile */
941 /* all transparent black */
943 for (i
= 0; i
< n_comp
; i
++) {
949 for (i
= 0; i
< n_comp
; i
++) {
950 vec
[2][i
] = input
[minColR
][i
];
951 vec
[3][i
] = input
[maxColR
][i
];
953 if (minColR
!= maxColR
) {
954 /* compute interpolation vector */
955 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
959 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
960 GLint texel
= n_vect
+ 1; /* transparent black */
961 if (!ISTBLACK(input
[k
])) {
962 /* interpolate color */
963 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
973 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
974 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
975 for (i
= 0; i
< n_comp
; i
++) {
978 FX64_OR32(hi
, vec
[j
][i
] >> 3);
981 ((Fx64
*)cc
)[1] = hi
;
986 fxt1_quantize_MIXED0 (GLuint
*cc
,
987 GLubyte input
[N_TEXELS
][MAX_COMP
])
989 const GLint n_vect
= 3; /* highest vector number in each microtile */
990 const GLint n_comp
= 3; /* 3 components: R, G, B */
991 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
992 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
994 Fx64 hi
; /* high quadword */
995 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
997 GLint minColL
= 0, maxColL
= 0;
998 GLint minColR
= 0, maxColR
= 0;
1003 /* Our solution here is to find the darkest and brightest colors in
1004 * the 4x4 tile and use those as the two representative colors.
1005 * There are probably better algorithms to use (histogram-based).
1007 minSum
= 2000; /* big enough */
1008 maxSum
= -1; /* small enough */
1009 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1011 for (i
= 0; i
< n_comp
; i
++) {
1023 minSum
= 2000; /* big enough */
1024 maxSum
= -1; /* small enough */
1025 for (; k
< N_TEXELS
; k
++) {
1027 for (i
= 0; i
< n_comp
; i
++) {
1042 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1043 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1045 /* Scan the channel with max variance for lo & hi
1046 * and use those as the two representative colors.
1048 minVal
= 2000; /* big enough */
1049 maxVal
= -1; /* small enough */
1050 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1051 GLint t
= input
[k
][maxVarL
];
1061 minVal
= 2000; /* big enough */
1062 maxVal
= -1; /* small enough */
1063 for (; k
< N_TEXELS
; k
++) {
1064 GLint t
= input
[k
][maxVarR
];
1076 /* left microtile */
1078 for (i
= 0; i
< n_comp
; i
++) {
1079 vec
[0][i
] = input
[minColL
][i
];
1080 vec
[1][i
] = input
[maxColL
][i
];
1082 if (minColL
!= maxColL
) {
1083 /* compute interpolation vector */
1084 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1088 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1090 /* interpolate color */
1091 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1097 /* funky encoding for LSB of green */
1098 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1099 for (i
= 0; i
< n_comp
; i
++) {
1100 vec
[1][i
] = input
[minColL
][i
];
1101 vec
[0][i
] = input
[maxColL
][i
];
1109 /* right microtile */
1111 for (i
= 0; i
< n_comp
; i
++) {
1112 vec
[2][i
] = input
[minColR
][i
];
1113 vec
[3][i
] = input
[maxColR
][i
];
1115 if (minColR
!= maxColR
) {
1116 /* compute interpolation vector */
1117 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1121 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1123 /* interpolate color */
1124 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1130 /* funky encoding for LSB of green */
1131 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1132 for (i
= 0; i
< n_comp
; i
++) {
1133 vec
[3][i
] = input
[minColR
][i
];
1134 vec
[2][i
] = input
[maxColR
][i
];
1142 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1143 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1144 for (i
= 0; i
< n_comp
; i
++) {
1147 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1150 ((Fx64
*)cc
)[1] = hi
;
1155 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1158 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1160 GLubyte input
[N_TEXELS
][MAX_COMP
];
1164 /* make the whole block opaque */
1165 memset(input
, -1, sizeof(input
));
1168 /* 8 texels each line */
1169 for (l
= 0; l
< 4; l
++) {
1170 for (k
= 0; k
< 4; k
++) {
1171 for (i
= 0; i
< comps
; i
++) {
1172 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1175 for (; k
< 8; k
++) {
1176 for (i
= 0; i
< comps
; i
++) {
1177 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1183 * 00, 01, 02, 03, 08, 09, 0a, 0b
1184 * 10, 11, 12, 13, 18, 19, 1a, 1b
1185 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1186 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1190 * stupidity flows forth from this
1195 /* skip all transparent black texels */
1197 for (k
= 0; k
< N_TEXELS
; k
++) {
1198 /* test all components against 0 */
1199 if (!ISTBLACK(input
[k
])) {
1200 /* texel is not transparent black */
1201 COPY_4UBV(reord
[l
], input
[k
]);
1202 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1203 /* non-opaque texel */
1213 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1214 } else if (l
== 0) {
1215 cc
[0] = cc
[1] = cc
[2] = -1;
1217 } else if (l
< N_TEXELS
) {
1218 fxt1_quantize_HI(cc
, input
, reord
, l
);
1220 fxt1_quantize_CHROMA(cc
, input
);
1222 (void)fxt1_quantize_ALPHA1
;
1223 (void)fxt1_quantize_MIXED1
;
1224 (void)fxt1_quantize_MIXED0
;
1227 fxt1_quantize_ALPHA1(cc
, input
);
1228 } else if (l
== 0) {
1229 cc
[0] = cc
[1] = cc
[2] = ~0u;
1231 } else if (l
< N_TEXELS
) {
1232 fxt1_quantize_MIXED1(cc
, input
);
1234 fxt1_quantize_MIXED0(cc
, input
);
1236 (void)fxt1_quantize_ALPHA0
;
1237 (void)fxt1_quantize_HI
;
1238 (void)fxt1_quantize_CHROMA
;
1245 * Upscale an image by replication, not (typical) stretching.
1246 * We use this when the image width or height is less than a
1247 * certain size (4, 8) and we need to upscale an image.
1250 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1251 GLsizei outWidth
, GLsizei outHeight
,
1252 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1257 ASSERT(outWidth
>= inWidth
);
1258 ASSERT(outHeight
>= inHeight
);
1260 ASSERT(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1261 ASSERT((outWidth
& 3) == 0);
1262 ASSERT((outHeight
& 3) == 0);
1265 for (i
= 0; i
< outHeight
; i
++) {
1266 const GLint ii
= i
% inHeight
;
1267 for (j
= 0; j
< outWidth
; j
++) {
1268 const GLint jj
= j
% inWidth
;
1269 for (k
= 0; k
< comps
; k
++) {
1270 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1271 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1279 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1280 const void *source
, GLint srcRowStride
,
1281 void *dest
, GLint destRowStride
)
1284 const GLubyte
*data
;
1285 GLuint
*encoded
= (GLuint
*)dest
;
1286 void *newSource
= NULL
;
1288 assert(comps
== 3 || comps
== 4);
1290 /* Replicate image if width is not M8 or height is not M4 */
1291 if ((width
& 7) | (height
& 3)) {
1292 GLint newWidth
= (width
+ 7) & ~7;
1293 GLint newHeight
= (height
+ 3) & ~3;
1294 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1296 GET_CURRENT_CONTEXT(ctx
);
1297 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1300 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1301 comps
, (const GLubyte
*) source
,
1302 srcRowStride
, (GLubyte
*) newSource
);
1306 srcRowStride
= comps
* newWidth
;
1309 data
= (const GLubyte
*) source
;
1310 destRowStride
= (destRowStride
- width
* 2) / 4;
1311 for (y
= 0; y
< height
; y
+= 4) {
1312 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1313 for (x
= 0; x
< width
; x
+= 8) {
1314 const GLubyte
*lines
[4];
1315 lines
[0] = &data
[offs
];
1316 lines
[1] = lines
[0] + srcRowStride
;
1317 lines
[2] = lines
[1] + srcRowStride
;
1318 lines
[3] = lines
[2] + srcRowStride
;
1320 fxt1_quantize(encoded
, lines
, comps
);
1321 /* 128 bits per 8x4 block */
1324 encoded
+= destRowStride
;
1332 /***************************************************************************\
1335 * The decoder is based on GL_3DFX_texture_compression_FXT1
1336 * specification and serves as a concept for the encoder.
1337 \***************************************************************************/
1340 /* lookup table for scaling 5 bit colors up to 8 bits */
1341 static const GLubyte _rgb_scale_5
[] = {
1342 0, 8, 16, 25, 33, 41, 49, 58,
1343 66, 74, 82, 90, 99, 107, 115, 123,
1344 132, 140, 148, 156, 165, 173, 181, 189,
1345 197, 206, 214, 222, 230, 239, 247, 255
1348 /* lookup table for scaling 6 bit colors up to 8 bits */
1349 static const GLubyte _rgb_scale_6
[] = {
1350 0, 4, 8, 12, 16, 20, 24, 28,
1351 32, 36, 40, 45, 49, 53, 57, 61,
1352 65, 69, 73, 77, 81, 85, 89, 93,
1353 97, 101, 105, 109, 113, 117, 121, 125,
1354 130, 134, 138, 142, 146, 150, 154, 158,
1355 162, 166, 170, 174, 178, 182, 186, 190,
1356 194, 198, 202, 206, 210, 215, 219, 223,
1357 227, 231, 235, 239, 243, 247, 251, 255
1361 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1362 #define UP5(c) _rgb_scale_5[(c) & 31]
1363 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1364 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1368 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1373 cc
= (const GLuint
*)(code
+ t
/ 8);
1374 t
= (cc
[0] >> (t
& 7)) & 7;
1377 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1380 cc
= (const GLuint
*)(code
+ 12);
1382 b
= UP5(CC_SEL(cc
, 0));
1383 g
= UP5(CC_SEL(cc
, 5));
1384 r
= UP5(CC_SEL(cc
, 10));
1385 } else if (t
== 6) {
1386 b
= UP5(CC_SEL(cc
, 15));
1387 g
= UP5(CC_SEL(cc
, 20));
1388 r
= UP5(CC_SEL(cc
, 25));
1390 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1391 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1392 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1403 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1408 cc
= (const GLuint
*)code
;
1413 t
= (cc
[0] >> (t
* 2)) & 3;
1416 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1417 kk
= cc
[0] >> (t
& 7);
1418 rgba
[BCOMP
] = UP5(kk
);
1419 rgba
[GCOMP
] = UP5(kk
>> 5);
1420 rgba
[RCOMP
] = UP5(kk
>> 10);
1426 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1432 cc
= (const GLuint
*)code
;
1435 t
= (cc
[1] >> (t
* 2)) & 3;
1437 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1438 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1439 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1441 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1442 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1443 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1444 glsb
= CC_SEL(cc
, 126);
1445 selb
= CC_SEL(cc
, 33);
1447 t
= (cc
[0] >> (t
* 2)) & 3;
1449 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1450 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1451 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1453 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1454 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1455 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1456 glsb
= CC_SEL(cc
, 125);
1457 selb
= CC_SEL(cc
, 1);
1460 if (CC_SEL(cc
, 124) & 1) {
1465 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1469 b
= UP5(col
[0][BCOMP
]);
1470 g
= UP5(col
[0][GCOMP
]);
1471 r
= UP5(col
[0][RCOMP
]);
1472 } else if (t
== 2) {
1473 b
= UP5(col
[1][BCOMP
]);
1474 g
= UP6(col
[1][GCOMP
], glsb
);
1475 r
= UP5(col
[1][RCOMP
]);
1477 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1478 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1479 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1490 b
= UP5(col
[0][BCOMP
]);
1491 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1492 r
= UP5(col
[0][RCOMP
]);
1493 } else if (t
== 3) {
1494 b
= UP5(col
[1][BCOMP
]);
1495 g
= UP6(col
[1][GCOMP
], glsb
);
1496 r
= UP5(col
[1][RCOMP
]);
1498 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1499 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1500 UP6(col
[1][GCOMP
], glsb
));
1501 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1512 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1517 cc
= (const GLuint
*)code
;
1518 if (CC_SEL(cc
, 124) & 1) {
1524 t
= (cc
[1] >> (t
* 2)) & 3;
1526 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1527 col0
[GCOMP
] = CC_SEL(cc
, 99);
1528 col0
[RCOMP
] = CC_SEL(cc
, 104);
1529 col0
[ACOMP
] = CC_SEL(cc
, 119);
1531 t
= (cc
[0] >> (t
* 2)) & 3;
1533 col0
[BCOMP
] = CC_SEL(cc
, 64);
1534 col0
[GCOMP
] = CC_SEL(cc
, 69);
1535 col0
[RCOMP
] = CC_SEL(cc
, 74);
1536 col0
[ACOMP
] = CC_SEL(cc
, 109);
1540 b
= UP5(col0
[BCOMP
]);
1541 g
= UP5(col0
[GCOMP
]);
1542 r
= UP5(col0
[RCOMP
]);
1543 a
= UP5(col0
[ACOMP
]);
1544 } else if (t
== 3) {
1545 b
= UP5(CC_SEL(cc
, 79));
1546 g
= UP5(CC_SEL(cc
, 84));
1547 r
= UP5(CC_SEL(cc
, 89));
1548 a
= UP5(CC_SEL(cc
, 114));
1550 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1551 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1552 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1553 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1562 t
= (cc
[0] >> (t
* 2)) & 3;
1569 cc
= (const GLuint
*)code
;
1570 a
= UP5(cc
[3] >> (t
* 5 + 13));
1572 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1573 kk
= cc
[0] >> (t
& 7);
1587 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1588 GLint i
, GLint j
, GLubyte
*rgba
)
1590 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1591 fxt1_decode_1HI
, /* cc-high = "00?" */
1592 fxt1_decode_1HI
, /* cc-high = "00?" */
1593 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1594 fxt1_decode_1ALPHA
, /* alpha = "011" */
1595 fxt1_decode_1MIXED
, /* mixed = "1??" */
1596 fxt1_decode_1MIXED
, /* mixed = "1??" */
1597 fxt1_decode_1MIXED
, /* mixed = "1??" */
1598 fxt1_decode_1MIXED
/* mixed = "1??" */
1601 const GLubyte
*code
= (const GLubyte
*)texture
+
1602 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1603 GLint mode
= CC_SEL(code
, 125);
1611 decode_1
[mode
](code
, t
, rgba
);
1618 fetch_rgb_fxt1(const GLubyte
*map
,
1619 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1622 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1623 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1624 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1625 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1626 texel
[ACOMP
] = 1.0F
;
1631 fetch_rgba_fxt1(const GLubyte
*map
,
1632 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1635 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1636 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1637 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1638 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1639 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
1643 compressed_fetch_func
1644 _mesa_get_fxt_fetch_func(gl_format format
)
1647 case MESA_FORMAT_RGB_FXT1
:
1648 return fetch_rgb_fxt1
;
1649 case MESA_FORMAT_RGBA_FXT1
:
1650 return fetch_rgba_fxt1
;