2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
37 #include "mfeatures.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
42 #include "swrast/s_context.h"
45 #if FEATURE_texture_fxt1
49 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
50 const void *source
, GLint srcRowStride
,
51 void *dest
, GLint destRowStride
);
54 fxt1_decode_1 (const void *texture
, GLint stride
,
55 GLint i
, GLint j
, GLubyte
*rgba
);
59 * Store user's image in rgb_fxt1 format.
62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
64 const GLubyte
*pixels
;
67 const GLubyte
*tempImage
= NULL
;
69 ASSERT(dstFormat
== MESA_FORMAT_RGB_FXT1
);
71 if (srcFormat
!= GL_RGB
||
72 srcType
!= GL_UNSIGNED_BYTE
||
73 ctx
->_ImageTransferState
||
74 srcPacking
->RowLength
!= srcWidth
||
75 srcPacking
->SwapBytes
) {
76 /* convert image to RGB/GLubyte */
77 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
79 _mesa_get_format_base_format(dstFormat
),
80 srcWidth
, srcHeight
, srcDepth
,
81 srcFormat
, srcType
, srcAddr
,
84 return GL_FALSE
; /* out of memory */
86 srcRowStride
= 3 * srcWidth
;
90 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
91 srcFormat
, srcType
, 0, 0);
93 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
94 srcType
) / sizeof(GLubyte
);
99 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
102 free((void*) tempImage
);
109 * Store user's image in rgba_fxt1 format.
112 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
114 const GLubyte
*pixels
;
117 const GLubyte
*tempImage
= NULL
;
119 ASSERT(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
121 if (srcFormat
!= GL_RGBA
||
122 srcType
!= GL_UNSIGNED_BYTE
||
123 ctx
->_ImageTransferState
||
124 srcPacking
->SwapBytes
) {
125 /* convert image to RGBA/GLubyte */
126 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
128 _mesa_get_format_base_format(dstFormat
),
129 srcWidth
, srcHeight
, srcDepth
,
130 srcFormat
, srcType
, srcAddr
,
133 return GL_FALSE
; /* out of memory */
135 srcRowStride
= 4 * srcWidth
;
139 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
140 srcFormat
, srcType
, 0, 0);
142 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
143 srcType
) / sizeof(GLubyte
);
148 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
151 free((void*) tempImage
);
158 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image
*texImage
,
159 GLint i
, GLint j
, GLint k
, GLfloat
*texel
)
161 /* just sample as GLubyte and convert to float here */
164 fxt1_decode_1(texImage
->Map
, texImage
->RowStride
, i
, j
, rgba
);
165 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
166 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
167 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
168 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
173 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image
*texImage
,
174 GLint i
, GLint j
, GLint k
, GLfloat
*texel
)
176 /* just sample as GLubyte and convert to float here */
179 fxt1_decode_1(texImage
->Map
, texImage
->RowStride
, i
, j
, rgba
);
180 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
181 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
182 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
188 /***************************************************************************\
191 * The encoder was built by reversing the decoder,
192 * and is vaguely based on Texus2 by 3dfx. Note that this code
193 * is merely a proof of concept, since it is highly UNoptimized;
194 * moreover, it is sub-optimal due to initial conditions passed
195 * to Lloyd's algorithm (the interpolation modes are even worse).
196 \***************************************************************************/
199 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
200 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
201 #define N_TEXELS 32 /* number of texels in a block (always 32) */
202 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
203 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
204 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
205 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
206 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
210 * Define a 64-bit unsigned integer type and macros
214 #define FX64_NATIVE 1
216 typedef uint64_t Fx64
;
218 #define FX64_MOV32(a, b) a = b
219 #define FX64_OR32(a, b) a |= b
220 #define FX64_SHL(a, c) a <<= c
224 #define FX64_NATIVE 0
230 #define FX64_MOV32(a, b) a.lo = b
231 #define FX64_OR32(a, b) a.lo |= b
233 #define FX64_SHL(a, c) \
236 a.hi = a.lo << ((c) - 32); \
239 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
247 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
248 #define SAFECDOT 1 /* for paranoids */
250 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
252 /* compute interpolation vector */ \
256 for (i = 0; i < NC; i++) { \
257 IV[i] = (V1[i] - V0[i]) * F(i); \
258 d2 += IV[i] * IV[i]; \
260 rd2 = (GLfloat)NV / d2; \
262 for (i = 0; i < NC; i++) { \
264 B -= IV[i] * V0[i]; \
267 B = B * rd2 + 0.5f; \
270 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
272 GLfloat dot = 0.0F; \
273 for (i = 0; i < NC; i++) { \
274 dot += V[i] * IV[i]; \
276 TEXEL = (GLint)(dot + B); \
280 } else if (TEXEL > NV) { \
288 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
289 GLubyte input
[MAX_COMP
], GLint nc
)
291 GLint i
, j
, best
= -1;
292 GLfloat err
= 1e9
; /* big enough */
294 for (j
= 0; j
< nv
; j
++) {
296 for (i
= 0; i
< nc
; i
++) {
297 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
310 fxt1_worst (GLfloat vec
[MAX_COMP
],
311 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
313 GLint i
, k
, worst
= -1;
314 GLfloat err
= -1.0F
; /* small enough */
316 for (k
= 0; k
< n
; k
++) {
318 for (i
= 0; i
< nc
; i
++) {
319 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
332 fxt1_variance (GLdouble variance
[MAX_COMP
],
333 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
335 GLint i
, k
, best
= 0;
337 GLdouble var
, maxvar
= -1; /* small enough */
338 GLdouble teenth
= 1.0 / n
;
340 for (i
= 0; i
< nc
; i
++) {
342 for (k
= 0; k
< n
; k
++) {
343 GLint t
= input
[k
][i
];
347 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
362 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
363 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
366 /* Choose colors from a grid.
370 for (j
= 0; j
< nv
; j
++) {
371 GLint m
= j
* (n
- 1) / (nv
- 1);
372 for (i
= 0; i
< nc
; i
++) {
373 vec
[j
][i
] = input
[m
][i
];
377 /* Our solution here is to find the darkest and brightest colors in
378 * the 8x4 tile and use those as the two representative colors.
379 * There are probably better algorithms to use (histogram-based).
382 GLint minSum
= 2000; /* big enough */
383 GLint maxSum
= -1; /* small enough */
384 GLint minCol
= 0; /* phoudoin: silent compiler! */
385 GLint maxCol
= 0; /* phoudoin: silent compiler! */
395 memset(hist
, 0, sizeof(hist
));
397 for (k
= 0; k
< n
; k
++) {
401 for (i
= 0; i
< nc
; i
++) {
406 for (l
= 0; l
< n
; l
++) {
415 } else if (hist
[l
].key
== key
) {
431 for (j
= 0; j
< lenh
; j
++) {
432 for (i
= 0; i
< nc
; i
++) {
433 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
436 for (; j
< nv
; j
++) {
437 for (i
= 0; i
< nc
; i
++) {
438 vec
[j
][i
] = vec
[0][i
];
444 for (j
= 0; j
< nv
; j
++) {
445 for (i
= 0; i
< nc
; i
++) {
446 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
456 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
457 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
459 /* Use the generalized lloyd's algorithm for VQ:
460 * find 4 color vectors.
462 * for each sample color
463 * sort to nearest vector.
465 * replace each vector with the centroid of its matching colors.
467 * repeat until RMS doesn't improve.
469 * if a color vector has no samples, or becomes the same as another
470 * vector, replace it with the color which is farthest from a sample.
472 * vec[][MAX_COMP] initial vectors and resulting colors
473 * nv number of resulting colors required
474 * input[N_TEXELS][MAX_COMP] input texels
475 * nc number of components in input / vec
476 * n number of input samples
479 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
480 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
481 GLfloat error
, lasterror
= 1e9
;
486 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
487 /* reset sums & counters */
488 for (j
= 0; j
< nv
; j
++) {
489 for (i
= 0; i
< nc
; i
++) {
496 /* scan whole block */
497 for (k
= 0; k
< n
; k
++) {
500 GLfloat err
= 1e9
; /* big enough */
501 /* determine best vector */
502 for (j
= 0; j
< nv
; j
++) {
503 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
504 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
505 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
507 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
515 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
518 /* add in closest color */
519 for (i
= 0; i
< nc
; i
++) {
520 sum
[best
][i
] += input
[k
][i
];
522 /* mark this vector as used */
524 /* accumulate error */
529 if ((error
< LL_RMS_E
) ||
530 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
531 return !0; /* good match */
535 /* move each vector to the barycenter of its closest colors */
536 for (j
= 0; j
< nv
; j
++) {
538 GLfloat div
= 1.0F
/ cnt
[j
];
539 for (i
= 0; i
< nc
; i
++) {
540 vec
[j
][i
] = div
* sum
[j
][i
];
543 /* this vec has no samples or is identical with a previous vec */
544 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
545 for (i
= 0; i
< nc
; i
++) {
546 vec
[j
][i
] = input
[worst
][i
];
552 return 0; /* could not converge fast enough */
557 fxt1_quantize_CHROMA (GLuint
*cc
,
558 GLubyte input
[N_TEXELS
][MAX_COMP
])
560 const GLint n_vect
= 4; /* 4 base vectors to find */
561 const GLint n_comp
= 3; /* 3 components: R, G, B */
562 GLfloat vec
[MAX_VECT
][MAX_COMP
];
564 Fx64 hi
; /* high quadword */
565 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
567 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
568 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
571 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
572 for (j
= n_vect
- 1; j
>= 0; j
--) {
573 for (i
= 0; i
< n_comp
; i
++) {
576 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
579 ((Fx64
*)cc
)[1] = hi
;
582 /* right microtile */
583 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
585 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
588 for (; k
>= 0; k
--) {
590 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
598 fxt1_quantize_ALPHA0 (GLuint
*cc
,
599 GLubyte input
[N_TEXELS
][MAX_COMP
],
600 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
602 const GLint n_vect
= 3; /* 3 base vectors to find */
603 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
604 GLfloat vec
[MAX_VECT
][MAX_COMP
];
606 Fx64 hi
; /* high quadword */
607 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
609 /* the last vector indicates zero */
610 for (i
= 0; i
< n_comp
; i
++) {
614 /* the first n texels in reord are guaranteed to be non-zero */
615 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
616 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
619 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
620 for (j
= n_vect
- 1; j
>= 0; j
--) {
623 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
625 for (j
= n_vect
- 1; j
>= 0; j
--) {
626 for (i
= 0; i
< n_comp
- 1; i
++) {
629 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
632 ((Fx64
*)cc
)[1] = hi
;
635 /* right microtile */
636 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
638 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
641 for (; k
>= 0; k
--) {
643 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
651 fxt1_quantize_ALPHA1 (GLuint
*cc
,
652 GLubyte input
[N_TEXELS
][MAX_COMP
])
654 const GLint n_vect
= 3; /* highest vector number in each microtile */
655 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
656 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
657 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
659 Fx64 hi
; /* high quadword */
660 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
664 GLint minColL
= 0, maxColL
= 0;
665 GLint minColR
= 0, maxColR
= 0;
666 GLint sumL
= 0, sumR
= 0;
668 /* Our solution here is to find the darkest and brightest colors in
669 * the 4x4 tile and use those as the two representative colors.
670 * There are probably better algorithms to use (histogram-based).
673 while ((minColL
== maxColL
) && nn_comp
) {
674 minSum
= 2000; /* big enough */
675 maxSum
= -1; /* small enough */
676 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
678 for (i
= 0; i
< nn_comp
; i
++) {
696 while ((minColR
== maxColR
) && nn_comp
) {
697 minSum
= 2000; /* big enough */
698 maxSum
= -1; /* small enough */
699 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
701 for (i
= 0; i
< nn_comp
; i
++) {
718 /* choose the common vector (yuck!) */
721 GLint v1
= 0, v2
= 0;
722 GLfloat err
= 1e9
; /* big enough */
723 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
724 for (i
= 0; i
< n_comp
; i
++) {
725 tv
[0][i
] = input
[minColL
][i
];
726 tv
[1][i
] = input
[maxColL
][i
];
727 tv
[2][i
] = input
[minColR
][i
];
728 tv
[3][i
] = input
[maxColR
][i
];
730 for (j1
= 0; j1
< 2; j1
++) {
731 for (j2
= 2; j2
< 4; j2
++) {
733 for (i
= 0; i
< n_comp
; i
++) {
734 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
743 for (i
= 0; i
< n_comp
; i
++) {
744 vec
[0][i
] = tv
[1 - v1
][i
];
745 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
746 vec
[2][i
] = tv
[5 - v2
][i
];
752 if (minColL
!= maxColL
) {
753 /* compute interpolation vector */
754 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
758 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
760 /* interpolate color */
761 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
770 /* right microtile */
772 if (minColR
!= maxColR
) {
773 /* compute interpolation vector */
774 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
778 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
780 /* interpolate color */
781 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
790 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
791 for (j
= n_vect
- 1; j
>= 0; j
--) {
794 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
796 for (j
= n_vect
- 1; j
>= 0; j
--) {
797 for (i
= 0; i
< n_comp
- 1; i
++) {
800 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
803 ((Fx64
*)cc
)[1] = hi
;
808 fxt1_quantize_HI (GLuint
*cc
,
809 GLubyte input
[N_TEXELS
][MAX_COMP
],
810 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
812 const GLint n_vect
= 6; /* highest vector number */
813 const GLint n_comp
= 3; /* 3 components: R, G, B */
814 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
815 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
817 GLuint hihi
; /* high quadword: hi dword */
819 GLint minSum
= 2000; /* big enough */
820 GLint maxSum
= -1; /* small enough */
821 GLint minCol
= 0; /* phoudoin: silent compiler! */
822 GLint maxCol
= 0; /* phoudoin: silent compiler! */
824 /* Our solution here is to find the darkest and brightest colors in
825 * the 8x4 tile and use those as the two representative colors.
826 * There are probably better algorithms to use (histogram-based).
828 for (k
= 0; k
< n
; k
++) {
830 for (i
= 0; i
< n_comp
; i
++) {
843 hihi
= 0; /* cc-hi = "00" */
844 for (i
= 0; i
< n_comp
; i
++) {
847 hihi
|= reord
[maxCol
][i
] >> 3;
849 for (i
= 0; i
< n_comp
; i
++) {
852 hihi
|= reord
[minCol
][i
] >> 3;
855 cc
[0] = cc
[1] = cc
[2] = 0;
857 /* compute interpolation vector */
858 if (minCol
!= maxCol
) {
859 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
863 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
865 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
866 GLint texel
= n_vect
+ 1; /* transparent black */
868 if (!ISTBLACK(input
[k
])) {
869 if (minCol
!= maxCol
) {
870 /* interpolate color */
871 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
873 kk
[0] |= texel
<< (t
& 7);
877 kk
[0] |= texel
<< (t
& 7);
884 fxt1_quantize_MIXED1 (GLuint
*cc
,
885 GLubyte input
[N_TEXELS
][MAX_COMP
])
887 const GLint n_vect
= 2; /* highest vector number in each microtile */
888 const GLint n_comp
= 3; /* 3 components: R, G, B */
889 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
890 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
892 Fx64 hi
; /* high quadword */
893 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
897 GLint minColL
= 0, maxColL
= -1;
898 GLint minColR
= 0, maxColR
= -1;
900 /* Our solution here is to find the darkest and brightest colors in
901 * the 4x4 tile and use those as the two representative colors.
902 * There are probably better algorithms to use (histogram-based).
904 minSum
= 2000; /* big enough */
905 maxSum
= -1; /* small enough */
906 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
907 if (!ISTBLACK(input
[k
])) {
909 for (i
= 0; i
< n_comp
; i
++) {
922 minSum
= 2000; /* big enough */
923 maxSum
= -1; /* small enough */
924 for (; k
< N_TEXELS
; k
++) {
925 if (!ISTBLACK(input
[k
])) {
927 for (i
= 0; i
< n_comp
; i
++) {
943 /* all transparent black */
945 for (i
= 0; i
< n_comp
; i
++) {
951 for (i
= 0; i
< n_comp
; i
++) {
952 vec
[0][i
] = input
[minColL
][i
];
953 vec
[1][i
] = input
[maxColL
][i
];
955 if (minColL
!= maxColL
) {
956 /* compute interpolation vector */
957 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
961 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
962 GLint texel
= n_vect
+ 1; /* transparent black */
963 if (!ISTBLACK(input
[k
])) {
964 /* interpolate color */
965 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
975 /* right microtile */
977 /* all transparent black */
979 for (i
= 0; i
< n_comp
; i
++) {
985 for (i
= 0; i
< n_comp
; i
++) {
986 vec
[2][i
] = input
[minColR
][i
];
987 vec
[3][i
] = input
[maxColR
][i
];
989 if (minColR
!= maxColR
) {
990 /* compute interpolation vector */
991 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
995 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
996 GLint texel
= n_vect
+ 1; /* transparent black */
997 if (!ISTBLACK(input
[k
])) {
998 /* interpolate color */
999 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1009 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1010 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1011 for (i
= 0; i
< n_comp
; i
++) {
1014 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1017 ((Fx64
*)cc
)[1] = hi
;
1022 fxt1_quantize_MIXED0 (GLuint
*cc
,
1023 GLubyte input
[N_TEXELS
][MAX_COMP
])
1025 const GLint n_vect
= 3; /* highest vector number in each microtile */
1026 const GLint n_comp
= 3; /* 3 components: R, G, B */
1027 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
1028 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
1030 Fx64 hi
; /* high quadword */
1031 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
1033 GLint minColL
= 0, maxColL
= 0;
1034 GLint minColR
= 0, maxColR
= 0;
1039 /* Our solution here is to find the darkest and brightest colors in
1040 * the 4x4 tile and use those as the two representative colors.
1041 * There are probably better algorithms to use (histogram-based).
1043 minSum
= 2000; /* big enough */
1044 maxSum
= -1; /* small enough */
1045 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1047 for (i
= 0; i
< n_comp
; i
++) {
1059 minSum
= 2000; /* big enough */
1060 maxSum
= -1; /* small enough */
1061 for (; k
< N_TEXELS
; k
++) {
1063 for (i
= 0; i
< n_comp
; i
++) {
1078 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1079 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1081 /* Scan the channel with max variance for lo & hi
1082 * and use those as the two representative colors.
1084 minVal
= 2000; /* big enough */
1085 maxVal
= -1; /* small enough */
1086 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1087 GLint t
= input
[k
][maxVarL
];
1097 minVal
= 2000; /* big enough */
1098 maxVal
= -1; /* small enough */
1099 for (; k
< N_TEXELS
; k
++) {
1100 GLint t
= input
[k
][maxVarR
];
1112 /* left microtile */
1114 for (i
= 0; i
< n_comp
; i
++) {
1115 vec
[0][i
] = input
[minColL
][i
];
1116 vec
[1][i
] = input
[maxColL
][i
];
1118 if (minColL
!= maxColL
) {
1119 /* compute interpolation vector */
1120 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1124 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1126 /* interpolate color */
1127 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1133 /* funky encoding for LSB of green */
1134 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1135 for (i
= 0; i
< n_comp
; i
++) {
1136 vec
[1][i
] = input
[minColL
][i
];
1137 vec
[0][i
] = input
[maxColL
][i
];
1145 /* right microtile */
1147 for (i
= 0; i
< n_comp
; i
++) {
1148 vec
[2][i
] = input
[minColR
][i
];
1149 vec
[3][i
] = input
[maxColR
][i
];
1151 if (minColR
!= maxColR
) {
1152 /* compute interpolation vector */
1153 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1157 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1159 /* interpolate color */
1160 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1166 /* funky encoding for LSB of green */
1167 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1168 for (i
= 0; i
< n_comp
; i
++) {
1169 vec
[3][i
] = input
[minColR
][i
];
1170 vec
[2][i
] = input
[maxColR
][i
];
1178 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1179 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1180 for (i
= 0; i
< n_comp
; i
++) {
1183 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1186 ((Fx64
*)cc
)[1] = hi
;
1191 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1194 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1196 GLubyte input
[N_TEXELS
][MAX_COMP
];
1200 /* make the whole block opaque */
1201 memset(input
, -1, sizeof(input
));
1204 /* 8 texels each line */
1205 for (l
= 0; l
< 4; l
++) {
1206 for (k
= 0; k
< 4; k
++) {
1207 for (i
= 0; i
< comps
; i
++) {
1208 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1211 for (; k
< 8; k
++) {
1212 for (i
= 0; i
< comps
; i
++) {
1213 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1219 * 00, 01, 02, 03, 08, 09, 0a, 0b
1220 * 10, 11, 12, 13, 18, 19, 1a, 1b
1221 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1222 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1226 * stupidity flows forth from this
1231 /* skip all transparent black texels */
1233 for (k
= 0; k
< N_TEXELS
; k
++) {
1234 /* test all components against 0 */
1235 if (!ISTBLACK(input
[k
])) {
1236 /* texel is not transparent black */
1237 COPY_4UBV(reord
[l
], input
[k
]);
1238 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1239 /* non-opaque texel */
1249 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1250 } else if (l
== 0) {
1251 cc
[0] = cc
[1] = cc
[2] = -1;
1253 } else if (l
< N_TEXELS
) {
1254 fxt1_quantize_HI(cc
, input
, reord
, l
);
1256 fxt1_quantize_CHROMA(cc
, input
);
1258 (void)fxt1_quantize_ALPHA1
;
1259 (void)fxt1_quantize_MIXED1
;
1260 (void)fxt1_quantize_MIXED0
;
1263 fxt1_quantize_ALPHA1(cc
, input
);
1264 } else if (l
== 0) {
1265 cc
[0] = cc
[1] = cc
[2] = ~0u;
1267 } else if (l
< N_TEXELS
) {
1268 fxt1_quantize_MIXED1(cc
, input
);
1270 fxt1_quantize_MIXED0(cc
, input
);
1272 (void)fxt1_quantize_ALPHA0
;
1273 (void)fxt1_quantize_HI
;
1274 (void)fxt1_quantize_CHROMA
;
1281 * Upscale an image by replication, not (typical) stretching.
1282 * We use this when the image width or height is less than a
1283 * certain size (4, 8) and we need to upscale an image.
1286 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1287 GLsizei outWidth
, GLsizei outHeight
,
1288 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1293 ASSERT(outWidth
>= inWidth
);
1294 ASSERT(outHeight
>= inHeight
);
1296 ASSERT(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1297 ASSERT((outWidth
& 3) == 0);
1298 ASSERT((outHeight
& 3) == 0);
1301 for (i
= 0; i
< outHeight
; i
++) {
1302 const GLint ii
= i
% inHeight
;
1303 for (j
= 0; j
< outWidth
; j
++) {
1304 const GLint jj
= j
% inWidth
;
1305 for (k
= 0; k
< comps
; k
++) {
1306 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1307 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1315 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1316 const void *source
, GLint srcRowStride
,
1317 void *dest
, GLint destRowStride
)
1320 const GLubyte
*data
;
1321 GLuint
*encoded
= (GLuint
*)dest
;
1322 void *newSource
= NULL
;
1324 assert(comps
== 3 || comps
== 4);
1326 /* Replicate image if width is not M8 or height is not M4 */
1327 if ((width
& 7) | (height
& 3)) {
1328 GLint newWidth
= (width
+ 7) & ~7;
1329 GLint newHeight
= (height
+ 3) & ~3;
1330 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1332 GET_CURRENT_CONTEXT(ctx
);
1333 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1336 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1337 comps
, (const GLubyte
*) source
,
1338 srcRowStride
, (GLubyte
*) newSource
);
1342 srcRowStride
= comps
* newWidth
;
1345 data
= (const GLubyte
*) source
;
1346 destRowStride
= (destRowStride
- width
* 2) / 4;
1347 for (y
= 0; y
< height
; y
+= 4) {
1348 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1349 for (x
= 0; x
< width
; x
+= 8) {
1350 const GLubyte
*lines
[4];
1351 lines
[0] = &data
[offs
];
1352 lines
[1] = lines
[0] + srcRowStride
;
1353 lines
[2] = lines
[1] + srcRowStride
;
1354 lines
[3] = lines
[2] + srcRowStride
;
1356 fxt1_quantize(encoded
, lines
, comps
);
1357 /* 128 bits per 8x4 block */
1360 encoded
+= destRowStride
;
1368 /***************************************************************************\
1371 * The decoder is based on GL_3DFX_texture_compression_FXT1
1372 * specification and serves as a concept for the encoder.
1373 \***************************************************************************/
1376 /* lookup table for scaling 5 bit colors up to 8 bits */
1377 static const GLubyte _rgb_scale_5
[] = {
1378 0, 8, 16, 25, 33, 41, 49, 58,
1379 66, 74, 82, 90, 99, 107, 115, 123,
1380 132, 140, 148, 156, 165, 173, 181, 189,
1381 197, 206, 214, 222, 230, 239, 247, 255
1384 /* lookup table for scaling 6 bit colors up to 8 bits */
1385 static const GLubyte _rgb_scale_6
[] = {
1386 0, 4, 8, 12, 16, 20, 24, 28,
1387 32, 36, 40, 45, 49, 53, 57, 61,
1388 65, 69, 73, 77, 81, 85, 89, 93,
1389 97, 101, 105, 109, 113, 117, 121, 125,
1390 130, 134, 138, 142, 146, 150, 154, 158,
1391 162, 166, 170, 174, 178, 182, 186, 190,
1392 194, 198, 202, 206, 210, 215, 219, 223,
1393 227, 231, 235, 239, 243, 247, 251, 255
1397 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1398 #define UP5(c) _rgb_scale_5[(c) & 31]
1399 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1400 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1404 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1409 cc
= (const GLuint
*)(code
+ t
/ 8);
1410 t
= (cc
[0] >> (t
& 7)) & 7;
1413 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1416 cc
= (const GLuint
*)(code
+ 12);
1418 b
= UP5(CC_SEL(cc
, 0));
1419 g
= UP5(CC_SEL(cc
, 5));
1420 r
= UP5(CC_SEL(cc
, 10));
1421 } else if (t
== 6) {
1422 b
= UP5(CC_SEL(cc
, 15));
1423 g
= UP5(CC_SEL(cc
, 20));
1424 r
= UP5(CC_SEL(cc
, 25));
1426 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1427 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1428 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1439 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1444 cc
= (const GLuint
*)code
;
1449 t
= (cc
[0] >> (t
* 2)) & 3;
1452 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1453 kk
= cc
[0] >> (t
& 7);
1454 rgba
[BCOMP
] = UP5(kk
);
1455 rgba
[GCOMP
] = UP5(kk
>> 5);
1456 rgba
[RCOMP
] = UP5(kk
>> 10);
1462 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1468 cc
= (const GLuint
*)code
;
1471 t
= (cc
[1] >> (t
* 2)) & 3;
1473 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1474 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1475 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1477 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1478 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1479 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1480 glsb
= CC_SEL(cc
, 126);
1481 selb
= CC_SEL(cc
, 33);
1483 t
= (cc
[0] >> (t
* 2)) & 3;
1485 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1486 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1487 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1489 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1490 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1491 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1492 glsb
= CC_SEL(cc
, 125);
1493 selb
= CC_SEL(cc
, 1);
1496 if (CC_SEL(cc
, 124) & 1) {
1501 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1505 b
= UP5(col
[0][BCOMP
]);
1506 g
= UP5(col
[0][GCOMP
]);
1507 r
= UP5(col
[0][RCOMP
]);
1508 } else if (t
== 2) {
1509 b
= UP5(col
[1][BCOMP
]);
1510 g
= UP6(col
[1][GCOMP
], glsb
);
1511 r
= UP5(col
[1][RCOMP
]);
1513 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1514 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1515 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1526 b
= UP5(col
[0][BCOMP
]);
1527 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1528 r
= UP5(col
[0][RCOMP
]);
1529 } else if (t
== 3) {
1530 b
= UP5(col
[1][BCOMP
]);
1531 g
= UP6(col
[1][GCOMP
], glsb
);
1532 r
= UP5(col
[1][RCOMP
]);
1534 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1535 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1536 UP6(col
[1][GCOMP
], glsb
));
1537 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1548 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1553 cc
= (const GLuint
*)code
;
1554 if (CC_SEL(cc
, 124) & 1) {
1560 t
= (cc
[1] >> (t
* 2)) & 3;
1562 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1563 col0
[GCOMP
] = CC_SEL(cc
, 99);
1564 col0
[RCOMP
] = CC_SEL(cc
, 104);
1565 col0
[ACOMP
] = CC_SEL(cc
, 119);
1567 t
= (cc
[0] >> (t
* 2)) & 3;
1569 col0
[BCOMP
] = CC_SEL(cc
, 64);
1570 col0
[GCOMP
] = CC_SEL(cc
, 69);
1571 col0
[RCOMP
] = CC_SEL(cc
, 74);
1572 col0
[ACOMP
] = CC_SEL(cc
, 109);
1576 b
= UP5(col0
[BCOMP
]);
1577 g
= UP5(col0
[GCOMP
]);
1578 r
= UP5(col0
[RCOMP
]);
1579 a
= UP5(col0
[ACOMP
]);
1580 } else if (t
== 3) {
1581 b
= UP5(CC_SEL(cc
, 79));
1582 g
= UP5(CC_SEL(cc
, 84));
1583 r
= UP5(CC_SEL(cc
, 89));
1584 a
= UP5(CC_SEL(cc
, 114));
1586 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1587 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1588 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1589 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1598 t
= (cc
[0] >> (t
* 2)) & 3;
1605 cc
= (const GLuint
*)code
;
1606 a
= UP5(cc
[3] >> (t
* 5 + 13));
1608 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1609 kk
= cc
[0] >> (t
& 7);
1623 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1624 GLint i
, GLint j
, GLubyte
*rgba
)
1626 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1627 fxt1_decode_1HI
, /* cc-high = "00?" */
1628 fxt1_decode_1HI
, /* cc-high = "00?" */
1629 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1630 fxt1_decode_1ALPHA
, /* alpha = "011" */
1631 fxt1_decode_1MIXED
, /* mixed = "1??" */
1632 fxt1_decode_1MIXED
, /* mixed = "1??" */
1633 fxt1_decode_1MIXED
, /* mixed = "1??" */
1634 fxt1_decode_1MIXED
/* mixed = "1??" */
1637 const GLubyte
*code
= (const GLubyte
*)texture
+
1638 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1639 GLint mode
= CC_SEL(code
, 125);
1647 decode_1
[mode
](code
, t
, rgba
);
1651 #endif /* FEATURE_texture_fxt1 */