0437cfcc16ec58cd0f1e6ed22bff8b1beb5ff6e0
2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
37 #include "mfeatures.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
42 #include "swrast/s_context.h"
45 #if FEATURE_texture_fxt1
49 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
50 const void *source
, GLint srcRowStride
,
51 void *dest
, GLint destRowStride
);
54 fxt1_decode_1 (const void *texture
, GLint stride
,
55 GLint i
, GLint j
, GLubyte
*rgba
);
59 * Store user's image in rgb_fxt1 format.
62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
64 const GLubyte
*pixels
;
67 const GLint texWidth
= dstRowStride
* 8 / 16; /* a bit of a hack */
68 const GLubyte
*tempImage
= NULL
;
70 ASSERT(dstFormat
== MESA_FORMAT_RGB_FXT1
);
71 ASSERT(dstXoffset
% 8 == 0);
72 ASSERT(dstYoffset
% 4 == 0);
73 ASSERT(dstZoffset
== 0);
75 (void) dstImageOffsets
;
77 if (srcFormat
!= GL_RGB
||
78 srcType
!= GL_UNSIGNED_BYTE
||
79 ctx
->_ImageTransferState
||
80 srcPacking
->SwapBytes
) {
81 /* convert image to RGB/GLubyte */
82 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
84 _mesa_get_format_base_format(dstFormat
),
85 srcWidth
, srcHeight
, srcDepth
,
86 srcFormat
, srcType
, srcAddr
,
89 return GL_FALSE
; /* out of memory */
91 srcRowStride
= 3 * srcWidth
;
95 pixels
= (const GLubyte
*) srcAddr
;
96 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
97 srcType
) / sizeof(GLubyte
);
100 dst
= _mesa_compressed_image_address(dstXoffset
, dstYoffset
, 0,
102 texWidth
, (GLubyte
*) dstAddr
);
104 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
108 free((void*) tempImage
);
115 * Store user's image in rgba_fxt1 format.
118 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
120 const GLubyte
*pixels
;
123 GLint texWidth
= dstRowStride
* 8 / 16; /* a bit of a hack */
124 const GLubyte
*tempImage
= NULL
;
126 ASSERT(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
127 ASSERT(dstXoffset
% 8 == 0);
128 ASSERT(dstYoffset
% 4 == 0);
129 ASSERT(dstZoffset
== 0);
131 (void) dstImageOffsets
;
133 if (srcFormat
!= GL_RGBA
||
134 srcType
!= GL_UNSIGNED_BYTE
||
135 ctx
->_ImageTransferState
||
136 srcPacking
->SwapBytes
) {
137 /* convert image to RGBA/GLubyte */
138 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
140 _mesa_get_format_base_format(dstFormat
),
141 srcWidth
, srcHeight
, srcDepth
,
142 srcFormat
, srcType
, srcAddr
,
145 return GL_FALSE
; /* out of memory */
147 srcRowStride
= 4 * srcWidth
;
151 pixels
= (const GLubyte
*) srcAddr
;
152 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
153 srcType
) / sizeof(GLubyte
);
156 dst
= _mesa_compressed_image_address(dstXoffset
, dstYoffset
, 0,
158 texWidth
, (GLubyte
*) dstAddr
);
160 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
164 free((void*) tempImage
);
171 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image
*texImage
,
172 GLint i
, GLint j
, GLint k
, GLfloat
*texel
)
174 /* just sample as GLubyte and convert to float here */
177 fxt1_decode_1(texImage
->Base
.Data
, texImage
->Base
.RowStride
, i
, j
, rgba
);
178 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
179 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
180 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
181 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
186 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image
*texImage
,
187 GLint i
, GLint j
, GLint k
, GLfloat
*texel
)
189 /* just sample as GLubyte and convert to float here */
192 fxt1_decode_1(texImage
->Base
.Data
, texImage
->Base
.RowStride
, i
, j
, rgba
);
193 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
194 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
195 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
201 /***************************************************************************\
204 * The encoder was built by reversing the decoder,
205 * and is vaguely based on Texus2 by 3dfx. Note that this code
206 * is merely a proof of concept, since it is highly UNoptimized;
207 * moreover, it is sub-optimal due to initial conditions passed
208 * to Lloyd's algorithm (the interpolation modes are even worse).
209 \***************************************************************************/
212 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
213 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
214 #define N_TEXELS 32 /* number of texels in a block (always 32) */
215 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
216 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
217 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
218 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
219 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
223 * Define a 64-bit unsigned integer type and macros
227 #define FX64_NATIVE 1
229 typedef uint64_t Fx64
;
231 #define FX64_MOV32(a, b) a = b
232 #define FX64_OR32(a, b) a |= b
233 #define FX64_SHL(a, c) a <<= c
237 #define FX64_NATIVE 0
243 #define FX64_MOV32(a, b) a.lo = b
244 #define FX64_OR32(a, b) a.lo |= b
246 #define FX64_SHL(a, c) \
249 a.hi = a.lo << ((c) - 32); \
252 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
260 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
261 #define SAFECDOT 1 /* for paranoids */
263 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
265 /* compute interpolation vector */ \
269 for (i = 0; i < NC; i++) { \
270 IV[i] = (V1[i] - V0[i]) * F(i); \
271 d2 += IV[i] * IV[i]; \
273 rd2 = (GLfloat)NV / d2; \
275 for (i = 0; i < NC; i++) { \
277 B -= IV[i] * V0[i]; \
280 B = B * rd2 + 0.5f; \
283 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
285 GLfloat dot = 0.0F; \
286 for (i = 0; i < NC; i++) { \
287 dot += V[i] * IV[i]; \
289 TEXEL = (GLint)(dot + B); \
293 } else if (TEXEL > NV) { \
301 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
302 GLubyte input
[MAX_COMP
], GLint nc
)
304 GLint i
, j
, best
= -1;
305 GLfloat err
= 1e9
; /* big enough */
307 for (j
= 0; j
< nv
; j
++) {
309 for (i
= 0; i
< nc
; i
++) {
310 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
323 fxt1_worst (GLfloat vec
[MAX_COMP
],
324 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
326 GLint i
, k
, worst
= -1;
327 GLfloat err
= -1.0F
; /* small enough */
329 for (k
= 0; k
< n
; k
++) {
331 for (i
= 0; i
< nc
; i
++) {
332 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
345 fxt1_variance (GLdouble variance
[MAX_COMP
],
346 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
348 GLint i
, k
, best
= 0;
350 GLdouble var
, maxvar
= -1; /* small enough */
351 GLdouble teenth
= 1.0 / n
;
353 for (i
= 0; i
< nc
; i
++) {
355 for (k
= 0; k
< n
; k
++) {
356 GLint t
= input
[k
][i
];
360 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
375 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
376 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
379 /* Choose colors from a grid.
383 for (j
= 0; j
< nv
; j
++) {
384 GLint m
= j
* (n
- 1) / (nv
- 1);
385 for (i
= 0; i
< nc
; i
++) {
386 vec
[j
][i
] = input
[m
][i
];
390 /* Our solution here is to find the darkest and brightest colors in
391 * the 8x4 tile and use those as the two representative colors.
392 * There are probably better algorithms to use (histogram-based).
395 GLint minSum
= 2000; /* big enough */
396 GLint maxSum
= -1; /* small enough */
397 GLint minCol
= 0; /* phoudoin: silent compiler! */
398 GLint maxCol
= 0; /* phoudoin: silent compiler! */
408 memset(hist
, 0, sizeof(hist
));
410 for (k
= 0; k
< n
; k
++) {
414 for (i
= 0; i
< nc
; i
++) {
419 for (l
= 0; l
< n
; l
++) {
428 } else if (hist
[l
].key
== key
) {
444 for (j
= 0; j
< lenh
; j
++) {
445 for (i
= 0; i
< nc
; i
++) {
446 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
449 for (; j
< nv
; j
++) {
450 for (i
= 0; i
< nc
; i
++) {
451 vec
[j
][i
] = vec
[0][i
];
457 for (j
= 0; j
< nv
; j
++) {
458 for (i
= 0; i
< nc
; i
++) {
459 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
469 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
470 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
472 /* Use the generalized lloyd's algorithm for VQ:
473 * find 4 color vectors.
475 * for each sample color
476 * sort to nearest vector.
478 * replace each vector with the centroid of its matching colors.
480 * repeat until RMS doesn't improve.
482 * if a color vector has no samples, or becomes the same as another
483 * vector, replace it with the color which is farthest from a sample.
485 * vec[][MAX_COMP] initial vectors and resulting colors
486 * nv number of resulting colors required
487 * input[N_TEXELS][MAX_COMP] input texels
488 * nc number of components in input / vec
489 * n number of input samples
492 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
493 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
494 GLfloat error
, lasterror
= 1e9
;
499 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
500 /* reset sums & counters */
501 for (j
= 0; j
< nv
; j
++) {
502 for (i
= 0; i
< nc
; i
++) {
509 /* scan whole block */
510 for (k
= 0; k
< n
; k
++) {
513 GLfloat err
= 1e9
; /* big enough */
514 /* determine best vector */
515 for (j
= 0; j
< nv
; j
++) {
516 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
517 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
518 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
520 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
528 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
531 /* add in closest color */
532 for (i
= 0; i
< nc
; i
++) {
533 sum
[best
][i
] += input
[k
][i
];
535 /* mark this vector as used */
537 /* accumulate error */
542 if ((error
< LL_RMS_E
) ||
543 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
544 return !0; /* good match */
548 /* move each vector to the barycenter of its closest colors */
549 for (j
= 0; j
< nv
; j
++) {
551 GLfloat div
= 1.0F
/ cnt
[j
];
552 for (i
= 0; i
< nc
; i
++) {
553 vec
[j
][i
] = div
* sum
[j
][i
];
556 /* this vec has no samples or is identical with a previous vec */
557 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
558 for (i
= 0; i
< nc
; i
++) {
559 vec
[j
][i
] = input
[worst
][i
];
565 return 0; /* could not converge fast enough */
570 fxt1_quantize_CHROMA (GLuint
*cc
,
571 GLubyte input
[N_TEXELS
][MAX_COMP
])
573 const GLint n_vect
= 4; /* 4 base vectors to find */
574 const GLint n_comp
= 3; /* 3 components: R, G, B */
575 GLfloat vec
[MAX_VECT
][MAX_COMP
];
577 Fx64 hi
; /* high quadword */
578 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
580 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
581 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
584 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
585 for (j
= n_vect
- 1; j
>= 0; j
--) {
586 for (i
= 0; i
< n_comp
; i
++) {
589 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
592 ((Fx64
*)cc
)[1] = hi
;
595 /* right microtile */
596 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
598 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
601 for (; k
>= 0; k
--) {
603 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
611 fxt1_quantize_ALPHA0 (GLuint
*cc
,
612 GLubyte input
[N_TEXELS
][MAX_COMP
],
613 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
615 const GLint n_vect
= 3; /* 3 base vectors to find */
616 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
617 GLfloat vec
[MAX_VECT
][MAX_COMP
];
619 Fx64 hi
; /* high quadword */
620 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
622 /* the last vector indicates zero */
623 for (i
= 0; i
< n_comp
; i
++) {
627 /* the first n texels in reord are guaranteed to be non-zero */
628 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
629 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
632 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
633 for (j
= n_vect
- 1; j
>= 0; j
--) {
636 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
638 for (j
= n_vect
- 1; j
>= 0; j
--) {
639 for (i
= 0; i
< n_comp
- 1; i
++) {
642 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
645 ((Fx64
*)cc
)[1] = hi
;
648 /* right microtile */
649 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
651 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
654 for (; k
>= 0; k
--) {
656 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
664 fxt1_quantize_ALPHA1 (GLuint
*cc
,
665 GLubyte input
[N_TEXELS
][MAX_COMP
])
667 const GLint n_vect
= 3; /* highest vector number in each microtile */
668 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
669 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
670 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
672 Fx64 hi
; /* high quadword */
673 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
677 GLint minColL
= 0, maxColL
= 0;
678 GLint minColR
= 0, maxColR
= 0;
679 GLint sumL
= 0, sumR
= 0;
681 /* Our solution here is to find the darkest and brightest colors in
682 * the 4x4 tile and use those as the two representative colors.
683 * There are probably better algorithms to use (histogram-based).
686 while ((minColL
== maxColL
) && nn_comp
) {
687 minSum
= 2000; /* big enough */
688 maxSum
= -1; /* small enough */
689 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
691 for (i
= 0; i
< nn_comp
; i
++) {
709 while ((minColR
== maxColR
) && nn_comp
) {
710 minSum
= 2000; /* big enough */
711 maxSum
= -1; /* small enough */
712 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
714 for (i
= 0; i
< nn_comp
; i
++) {
731 /* choose the common vector (yuck!) */
734 GLint v1
= 0, v2
= 0;
735 GLfloat err
= 1e9
; /* big enough */
736 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
737 for (i
= 0; i
< n_comp
; i
++) {
738 tv
[0][i
] = input
[minColL
][i
];
739 tv
[1][i
] = input
[maxColL
][i
];
740 tv
[2][i
] = input
[minColR
][i
];
741 tv
[3][i
] = input
[maxColR
][i
];
743 for (j1
= 0; j1
< 2; j1
++) {
744 for (j2
= 2; j2
< 4; j2
++) {
746 for (i
= 0; i
< n_comp
; i
++) {
747 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
756 for (i
= 0; i
< n_comp
; i
++) {
757 vec
[0][i
] = tv
[1 - v1
][i
];
758 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
759 vec
[2][i
] = tv
[5 - v2
][i
];
765 if (minColL
!= maxColL
) {
766 /* compute interpolation vector */
767 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
771 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
773 /* interpolate color */
774 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
783 /* right microtile */
785 if (minColR
!= maxColR
) {
786 /* compute interpolation vector */
787 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
791 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
793 /* interpolate color */
794 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
803 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
804 for (j
= n_vect
- 1; j
>= 0; j
--) {
807 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
809 for (j
= n_vect
- 1; j
>= 0; j
--) {
810 for (i
= 0; i
< n_comp
- 1; i
++) {
813 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
816 ((Fx64
*)cc
)[1] = hi
;
821 fxt1_quantize_HI (GLuint
*cc
,
822 GLubyte input
[N_TEXELS
][MAX_COMP
],
823 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
825 const GLint n_vect
= 6; /* highest vector number */
826 const GLint n_comp
= 3; /* 3 components: R, G, B */
827 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
828 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
830 GLuint hihi
; /* high quadword: hi dword */
832 GLint minSum
= 2000; /* big enough */
833 GLint maxSum
= -1; /* small enough */
834 GLint minCol
= 0; /* phoudoin: silent compiler! */
835 GLint maxCol
= 0; /* phoudoin: silent compiler! */
837 /* Our solution here is to find the darkest and brightest colors in
838 * the 8x4 tile and use those as the two representative colors.
839 * There are probably better algorithms to use (histogram-based).
841 for (k
= 0; k
< n
; k
++) {
843 for (i
= 0; i
< n_comp
; i
++) {
856 hihi
= 0; /* cc-hi = "00" */
857 for (i
= 0; i
< n_comp
; i
++) {
860 hihi
|= reord
[maxCol
][i
] >> 3;
862 for (i
= 0; i
< n_comp
; i
++) {
865 hihi
|= reord
[minCol
][i
] >> 3;
868 cc
[0] = cc
[1] = cc
[2] = 0;
870 /* compute interpolation vector */
871 if (minCol
!= maxCol
) {
872 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
876 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
878 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
879 GLint texel
= n_vect
+ 1; /* transparent black */
881 if (!ISTBLACK(input
[k
])) {
882 if (minCol
!= maxCol
) {
883 /* interpolate color */
884 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
886 kk
[0] |= texel
<< (t
& 7);
890 kk
[0] |= texel
<< (t
& 7);
897 fxt1_quantize_MIXED1 (GLuint
*cc
,
898 GLubyte input
[N_TEXELS
][MAX_COMP
])
900 const GLint n_vect
= 2; /* highest vector number in each microtile */
901 const GLint n_comp
= 3; /* 3 components: R, G, B */
902 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
903 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
905 Fx64 hi
; /* high quadword */
906 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
910 GLint minColL
= 0, maxColL
= -1;
911 GLint minColR
= 0, maxColR
= -1;
913 /* Our solution here is to find the darkest and brightest colors in
914 * the 4x4 tile and use those as the two representative colors.
915 * There are probably better algorithms to use (histogram-based).
917 minSum
= 2000; /* big enough */
918 maxSum
= -1; /* small enough */
919 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
920 if (!ISTBLACK(input
[k
])) {
922 for (i
= 0; i
< n_comp
; i
++) {
935 minSum
= 2000; /* big enough */
936 maxSum
= -1; /* small enough */
937 for (; k
< N_TEXELS
; k
++) {
938 if (!ISTBLACK(input
[k
])) {
940 for (i
= 0; i
< n_comp
; i
++) {
956 /* all transparent black */
958 for (i
= 0; i
< n_comp
; i
++) {
964 for (i
= 0; i
< n_comp
; i
++) {
965 vec
[0][i
] = input
[minColL
][i
];
966 vec
[1][i
] = input
[maxColL
][i
];
968 if (minColL
!= maxColL
) {
969 /* compute interpolation vector */
970 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
974 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
975 GLint texel
= n_vect
+ 1; /* transparent black */
976 if (!ISTBLACK(input
[k
])) {
977 /* interpolate color */
978 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
988 /* right microtile */
990 /* all transparent black */
992 for (i
= 0; i
< n_comp
; i
++) {
998 for (i
= 0; i
< n_comp
; i
++) {
999 vec
[2][i
] = input
[minColR
][i
];
1000 vec
[3][i
] = input
[maxColR
][i
];
1002 if (minColR
!= maxColR
) {
1003 /* compute interpolation vector */
1004 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1008 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1009 GLint texel
= n_vect
+ 1; /* transparent black */
1010 if (!ISTBLACK(input
[k
])) {
1011 /* interpolate color */
1012 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1022 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1023 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1024 for (i
= 0; i
< n_comp
; i
++) {
1027 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1030 ((Fx64
*)cc
)[1] = hi
;
1035 fxt1_quantize_MIXED0 (GLuint
*cc
,
1036 GLubyte input
[N_TEXELS
][MAX_COMP
])
1038 const GLint n_vect
= 3; /* highest vector number in each microtile */
1039 const GLint n_comp
= 3; /* 3 components: R, G, B */
1040 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
1041 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
1043 Fx64 hi
; /* high quadword */
1044 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
1046 GLint minColL
= 0, maxColL
= 0;
1047 GLint minColR
= 0, maxColR
= 0;
1052 /* Our solution here is to find the darkest and brightest colors in
1053 * the 4x4 tile and use those as the two representative colors.
1054 * There are probably better algorithms to use (histogram-based).
1056 minSum
= 2000; /* big enough */
1057 maxSum
= -1; /* small enough */
1058 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1060 for (i
= 0; i
< n_comp
; i
++) {
1072 minSum
= 2000; /* big enough */
1073 maxSum
= -1; /* small enough */
1074 for (; k
< N_TEXELS
; k
++) {
1076 for (i
= 0; i
< n_comp
; i
++) {
1091 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1092 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1094 /* Scan the channel with max variance for lo & hi
1095 * and use those as the two representative colors.
1097 minVal
= 2000; /* big enough */
1098 maxVal
= -1; /* small enough */
1099 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1100 GLint t
= input
[k
][maxVarL
];
1110 minVal
= 2000; /* big enough */
1111 maxVal
= -1; /* small enough */
1112 for (; k
< N_TEXELS
; k
++) {
1113 GLint t
= input
[k
][maxVarR
];
1125 /* left microtile */
1127 for (i
= 0; i
< n_comp
; i
++) {
1128 vec
[0][i
] = input
[minColL
][i
];
1129 vec
[1][i
] = input
[maxColL
][i
];
1131 if (minColL
!= maxColL
) {
1132 /* compute interpolation vector */
1133 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1137 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1139 /* interpolate color */
1140 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1146 /* funky encoding for LSB of green */
1147 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1148 for (i
= 0; i
< n_comp
; i
++) {
1149 vec
[1][i
] = input
[minColL
][i
];
1150 vec
[0][i
] = input
[maxColL
][i
];
1158 /* right microtile */
1160 for (i
= 0; i
< n_comp
; i
++) {
1161 vec
[2][i
] = input
[minColR
][i
];
1162 vec
[3][i
] = input
[maxColR
][i
];
1164 if (minColR
!= maxColR
) {
1165 /* compute interpolation vector */
1166 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1170 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1172 /* interpolate color */
1173 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1179 /* funky encoding for LSB of green */
1180 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1181 for (i
= 0; i
< n_comp
; i
++) {
1182 vec
[3][i
] = input
[minColR
][i
];
1183 vec
[2][i
] = input
[maxColR
][i
];
1191 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1192 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1193 for (i
= 0; i
< n_comp
; i
++) {
1196 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1199 ((Fx64
*)cc
)[1] = hi
;
1204 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1207 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1209 GLubyte input
[N_TEXELS
][MAX_COMP
];
1213 /* make the whole block opaque */
1214 memset(input
, -1, sizeof(input
));
1217 /* 8 texels each line */
1218 for (l
= 0; l
< 4; l
++) {
1219 for (k
= 0; k
< 4; k
++) {
1220 for (i
= 0; i
< comps
; i
++) {
1221 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1224 for (; k
< 8; k
++) {
1225 for (i
= 0; i
< comps
; i
++) {
1226 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1232 * 00, 01, 02, 03, 08, 09, 0a, 0b
1233 * 10, 11, 12, 13, 18, 19, 1a, 1b
1234 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1235 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1239 * stupidity flows forth from this
1244 /* skip all transparent black texels */
1246 for (k
= 0; k
< N_TEXELS
; k
++) {
1247 /* test all components against 0 */
1248 if (!ISTBLACK(input
[k
])) {
1249 /* texel is not transparent black */
1250 COPY_4UBV(reord
[l
], input
[k
]);
1251 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1252 /* non-opaque texel */
1262 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1263 } else if (l
== 0) {
1264 cc
[0] = cc
[1] = cc
[2] = -1;
1266 } else if (l
< N_TEXELS
) {
1267 fxt1_quantize_HI(cc
, input
, reord
, l
);
1269 fxt1_quantize_CHROMA(cc
, input
);
1271 (void)fxt1_quantize_ALPHA1
;
1272 (void)fxt1_quantize_MIXED1
;
1273 (void)fxt1_quantize_MIXED0
;
1276 fxt1_quantize_ALPHA1(cc
, input
);
1277 } else if (l
== 0) {
1278 cc
[0] = cc
[1] = cc
[2] = ~0u;
1280 } else if (l
< N_TEXELS
) {
1281 fxt1_quantize_MIXED1(cc
, input
);
1283 fxt1_quantize_MIXED0(cc
, input
);
1285 (void)fxt1_quantize_ALPHA0
;
1286 (void)fxt1_quantize_HI
;
1287 (void)fxt1_quantize_CHROMA
;
1294 * Upscale an image by replication, not (typical) stretching.
1295 * We use this when the image width or height is less than a
1296 * certain size (4, 8) and we need to upscale an image.
1299 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1300 GLsizei outWidth
, GLsizei outHeight
,
1301 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1306 ASSERT(outWidth
>= inWidth
);
1307 ASSERT(outHeight
>= inHeight
);
1309 ASSERT(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1310 ASSERT((outWidth
& 3) == 0);
1311 ASSERT((outHeight
& 3) == 0);
1314 for (i
= 0; i
< outHeight
; i
++) {
1315 const GLint ii
= i
% inHeight
;
1316 for (j
= 0; j
< outWidth
; j
++) {
1317 const GLint jj
= j
% inWidth
;
1318 for (k
= 0; k
< comps
; k
++) {
1319 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1320 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1328 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1329 const void *source
, GLint srcRowStride
,
1330 void *dest
, GLint destRowStride
)
1333 const GLubyte
*data
;
1334 GLuint
*encoded
= (GLuint
*)dest
;
1335 void *newSource
= NULL
;
1337 assert(comps
== 3 || comps
== 4);
1339 /* Replicate image if width is not M8 or height is not M4 */
1340 if ((width
& 7) | (height
& 3)) {
1341 GLint newWidth
= (width
+ 7) & ~7;
1342 GLint newHeight
= (height
+ 3) & ~3;
1343 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1345 GET_CURRENT_CONTEXT(ctx
);
1346 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1349 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1350 comps
, (const GLubyte
*) source
,
1351 srcRowStride
, (GLubyte
*) newSource
);
1355 srcRowStride
= comps
* newWidth
;
1358 data
= (const GLubyte
*) source
;
1359 destRowStride
= (destRowStride
- width
* 2) / 4;
1360 for (y
= 0; y
< height
; y
+= 4) {
1361 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1362 for (x
= 0; x
< width
; x
+= 8) {
1363 const GLubyte
*lines
[4];
1364 lines
[0] = &data
[offs
];
1365 lines
[1] = lines
[0] + srcRowStride
;
1366 lines
[2] = lines
[1] + srcRowStride
;
1367 lines
[3] = lines
[2] + srcRowStride
;
1369 fxt1_quantize(encoded
, lines
, comps
);
1370 /* 128 bits per 8x4 block */
1373 encoded
+= destRowStride
;
1377 if (newSource
!= NULL
) {
1383 /***************************************************************************\
1386 * The decoder is based on GL_3DFX_texture_compression_FXT1
1387 * specification and serves as a concept for the encoder.
1388 \***************************************************************************/
1391 /* lookup table for scaling 5 bit colors up to 8 bits */
1392 static const GLubyte _rgb_scale_5
[] = {
1393 0, 8, 16, 25, 33, 41, 49, 58,
1394 66, 74, 82, 90, 99, 107, 115, 123,
1395 132, 140, 148, 156, 165, 173, 181, 189,
1396 197, 206, 214, 222, 230, 239, 247, 255
1399 /* lookup table for scaling 6 bit colors up to 8 bits */
1400 static const GLubyte _rgb_scale_6
[] = {
1401 0, 4, 8, 12, 16, 20, 24, 28,
1402 32, 36, 40, 45, 49, 53, 57, 61,
1403 65, 69, 73, 77, 81, 85, 89, 93,
1404 97, 101, 105, 109, 113, 117, 121, 125,
1405 130, 134, 138, 142, 146, 150, 154, 158,
1406 162, 166, 170, 174, 178, 182, 186, 190,
1407 194, 198, 202, 206, 210, 215, 219, 223,
1408 227, 231, 235, 239, 243, 247, 251, 255
1412 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1413 #define UP5(c) _rgb_scale_5[(c) & 31]
1414 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1415 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1419 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1424 cc
= (const GLuint
*)(code
+ t
/ 8);
1425 t
= (cc
[0] >> (t
& 7)) & 7;
1428 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1431 cc
= (const GLuint
*)(code
+ 12);
1433 b
= UP5(CC_SEL(cc
, 0));
1434 g
= UP5(CC_SEL(cc
, 5));
1435 r
= UP5(CC_SEL(cc
, 10));
1436 } else if (t
== 6) {
1437 b
= UP5(CC_SEL(cc
, 15));
1438 g
= UP5(CC_SEL(cc
, 20));
1439 r
= UP5(CC_SEL(cc
, 25));
1441 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1442 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1443 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1454 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1459 cc
= (const GLuint
*)code
;
1464 t
= (cc
[0] >> (t
* 2)) & 3;
1467 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1468 kk
= cc
[0] >> (t
& 7);
1469 rgba
[BCOMP
] = UP5(kk
);
1470 rgba
[GCOMP
] = UP5(kk
>> 5);
1471 rgba
[RCOMP
] = UP5(kk
>> 10);
1477 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1483 cc
= (const GLuint
*)code
;
1486 t
= (cc
[1] >> (t
* 2)) & 3;
1488 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1489 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1490 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1492 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1493 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1494 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1495 glsb
= CC_SEL(cc
, 126);
1496 selb
= CC_SEL(cc
, 33);
1498 t
= (cc
[0] >> (t
* 2)) & 3;
1500 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1501 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1502 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1504 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1505 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1506 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1507 glsb
= CC_SEL(cc
, 125);
1508 selb
= CC_SEL(cc
, 1);
1511 if (CC_SEL(cc
, 124) & 1) {
1516 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1520 b
= UP5(col
[0][BCOMP
]);
1521 g
= UP5(col
[0][GCOMP
]);
1522 r
= UP5(col
[0][RCOMP
]);
1523 } else if (t
== 2) {
1524 b
= UP5(col
[1][BCOMP
]);
1525 g
= UP6(col
[1][GCOMP
], glsb
);
1526 r
= UP5(col
[1][RCOMP
]);
1528 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1529 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1530 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1541 b
= UP5(col
[0][BCOMP
]);
1542 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1543 r
= UP5(col
[0][RCOMP
]);
1544 } else if (t
== 3) {
1545 b
= UP5(col
[1][BCOMP
]);
1546 g
= UP6(col
[1][GCOMP
], glsb
);
1547 r
= UP5(col
[1][RCOMP
]);
1549 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1550 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1551 UP6(col
[1][GCOMP
], glsb
));
1552 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1563 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1568 cc
= (const GLuint
*)code
;
1569 if (CC_SEL(cc
, 124) & 1) {
1575 t
= (cc
[1] >> (t
* 2)) & 3;
1577 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1578 col0
[GCOMP
] = CC_SEL(cc
, 99);
1579 col0
[RCOMP
] = CC_SEL(cc
, 104);
1580 col0
[ACOMP
] = CC_SEL(cc
, 119);
1582 t
= (cc
[0] >> (t
* 2)) & 3;
1584 col0
[BCOMP
] = CC_SEL(cc
, 64);
1585 col0
[GCOMP
] = CC_SEL(cc
, 69);
1586 col0
[RCOMP
] = CC_SEL(cc
, 74);
1587 col0
[ACOMP
] = CC_SEL(cc
, 109);
1591 b
= UP5(col0
[BCOMP
]);
1592 g
= UP5(col0
[GCOMP
]);
1593 r
= UP5(col0
[RCOMP
]);
1594 a
= UP5(col0
[ACOMP
]);
1595 } else if (t
== 3) {
1596 b
= UP5(CC_SEL(cc
, 79));
1597 g
= UP5(CC_SEL(cc
, 84));
1598 r
= UP5(CC_SEL(cc
, 89));
1599 a
= UP5(CC_SEL(cc
, 114));
1601 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1602 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1603 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1604 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1613 t
= (cc
[0] >> (t
* 2)) & 3;
1620 cc
= (const GLuint
*)code
;
1621 a
= UP5(cc
[3] >> (t
* 5 + 13));
1623 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1624 kk
= cc
[0] >> (t
& 7);
1638 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1639 GLint i
, GLint j
, GLubyte
*rgba
)
1641 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1642 fxt1_decode_1HI
, /* cc-high = "00?" */
1643 fxt1_decode_1HI
, /* cc-high = "00?" */
1644 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1645 fxt1_decode_1ALPHA
, /* alpha = "011" */
1646 fxt1_decode_1MIXED
, /* mixed = "1??" */
1647 fxt1_decode_1MIXED
, /* mixed = "1??" */
1648 fxt1_decode_1MIXED
, /* mixed = "1??" */
1649 fxt1_decode_1MIXED
/* mixed = "1??" */
1652 const GLubyte
*code
= (const GLubyte
*)texture
+
1653 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1654 GLint mode
= CC_SEL(code
, 125);
1662 decode_1
[mode
](code
, t
, rgba
);
1666 #endif /* FEATURE_texture_fxt1 */