2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
37 #include "mfeatures.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
42 #include "swrast/s_context.h"
45 #if FEATURE_texture_fxt1
49 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
50 const void *source
, GLint srcRowStride
,
51 void *dest
, GLint destRowStride
);
54 fxt1_decode_1 (const void *texture
, GLint stride
,
55 GLint i
, GLint j
, GLubyte
*rgba
);
59 * Store user's image in rgb_fxt1 format.
62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
64 const GLubyte
*pixels
;
67 const GLubyte
*tempImage
= NULL
;
69 ASSERT(dstFormat
== MESA_FORMAT_RGB_FXT1
);
71 if (srcFormat
!= GL_RGB
||
72 srcType
!= GL_UNSIGNED_BYTE
||
73 ctx
->_ImageTransferState
||
74 srcPacking
->RowLength
!= srcWidth
||
75 srcPacking
->SwapBytes
) {
76 /* convert image to RGB/GLubyte */
77 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
79 _mesa_get_format_base_format(dstFormat
),
80 srcWidth
, srcHeight
, srcDepth
,
81 srcFormat
, srcType
, srcAddr
,
84 return GL_FALSE
; /* out of memory */
86 srcRowStride
= 3 * srcWidth
;
90 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
91 srcFormat
, srcType
, 0, 0);
93 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
94 srcType
) / sizeof(GLubyte
);
99 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
103 free((void*) tempImage
);
110 * Store user's image in rgba_fxt1 format.
113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
115 const GLubyte
*pixels
;
118 const GLubyte
*tempImage
= NULL
;
120 ASSERT(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
122 if (srcFormat
!= GL_RGBA
||
123 srcType
!= GL_UNSIGNED_BYTE
||
124 ctx
->_ImageTransferState
||
125 srcPacking
->SwapBytes
) {
126 /* convert image to RGBA/GLubyte */
127 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
129 _mesa_get_format_base_format(dstFormat
),
130 srcWidth
, srcHeight
, srcDepth
,
131 srcFormat
, srcType
, srcAddr
,
134 return GL_FALSE
; /* out of memory */
136 srcRowStride
= 4 * srcWidth
;
140 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
141 srcFormat
, srcType
, 0, 0);
143 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
144 srcType
) / sizeof(GLubyte
);
149 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
153 free((void*) tempImage
);
160 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image
*texImage
,
161 GLint i
, GLint j
, GLint k
, GLfloat
*texel
)
163 /* just sample as GLubyte and convert to float here */
166 fxt1_decode_1(texImage
->Data
, texImage
->RowStride
, i
, j
, rgba
);
167 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
168 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
169 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
170 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
175 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image
*texImage
,
176 GLint i
, GLint j
, GLint k
, GLfloat
*texel
)
178 /* just sample as GLubyte and convert to float here */
181 fxt1_decode_1(texImage
->Data
, texImage
->RowStride
, i
, j
, rgba
);
182 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
183 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
184 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
190 /***************************************************************************\
193 * The encoder was built by reversing the decoder,
194 * and is vaguely based on Texus2 by 3dfx. Note that this code
195 * is merely a proof of concept, since it is highly UNoptimized;
196 * moreover, it is sub-optimal due to initial conditions passed
197 * to Lloyd's algorithm (the interpolation modes are even worse).
198 \***************************************************************************/
201 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
202 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
203 #define N_TEXELS 32 /* number of texels in a block (always 32) */
204 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
205 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
206 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
207 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
208 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
212 * Define a 64-bit unsigned integer type and macros
216 #define FX64_NATIVE 1
218 typedef uint64_t Fx64
;
220 #define FX64_MOV32(a, b) a = b
221 #define FX64_OR32(a, b) a |= b
222 #define FX64_SHL(a, c) a <<= c
226 #define FX64_NATIVE 0
232 #define FX64_MOV32(a, b) a.lo = b
233 #define FX64_OR32(a, b) a.lo |= b
235 #define FX64_SHL(a, c) \
238 a.hi = a.lo << ((c) - 32); \
241 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
249 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
250 #define SAFECDOT 1 /* for paranoids */
252 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
254 /* compute interpolation vector */ \
258 for (i = 0; i < NC; i++) { \
259 IV[i] = (V1[i] - V0[i]) * F(i); \
260 d2 += IV[i] * IV[i]; \
262 rd2 = (GLfloat)NV / d2; \
264 for (i = 0; i < NC; i++) { \
266 B -= IV[i] * V0[i]; \
269 B = B * rd2 + 0.5f; \
272 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
274 GLfloat dot = 0.0F; \
275 for (i = 0; i < NC; i++) { \
276 dot += V[i] * IV[i]; \
278 TEXEL = (GLint)(dot + B); \
282 } else if (TEXEL > NV) { \
290 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
291 GLubyte input
[MAX_COMP
], GLint nc
)
293 GLint i
, j
, best
= -1;
294 GLfloat err
= 1e9
; /* big enough */
296 for (j
= 0; j
< nv
; j
++) {
298 for (i
= 0; i
< nc
; i
++) {
299 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
312 fxt1_worst (GLfloat vec
[MAX_COMP
],
313 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
315 GLint i
, k
, worst
= -1;
316 GLfloat err
= -1.0F
; /* small enough */
318 for (k
= 0; k
< n
; k
++) {
320 for (i
= 0; i
< nc
; i
++) {
321 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
334 fxt1_variance (GLdouble variance
[MAX_COMP
],
335 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
337 GLint i
, k
, best
= 0;
339 GLdouble var
, maxvar
= -1; /* small enough */
340 GLdouble teenth
= 1.0 / n
;
342 for (i
= 0; i
< nc
; i
++) {
344 for (k
= 0; k
< n
; k
++) {
345 GLint t
= input
[k
][i
];
349 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
364 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
365 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
368 /* Choose colors from a grid.
372 for (j
= 0; j
< nv
; j
++) {
373 GLint m
= j
* (n
- 1) / (nv
- 1);
374 for (i
= 0; i
< nc
; i
++) {
375 vec
[j
][i
] = input
[m
][i
];
379 /* Our solution here is to find the darkest and brightest colors in
380 * the 8x4 tile and use those as the two representative colors.
381 * There are probably better algorithms to use (histogram-based).
384 GLint minSum
= 2000; /* big enough */
385 GLint maxSum
= -1; /* small enough */
386 GLint minCol
= 0; /* phoudoin: silent compiler! */
387 GLint maxCol
= 0; /* phoudoin: silent compiler! */
397 memset(hist
, 0, sizeof(hist
));
399 for (k
= 0; k
< n
; k
++) {
403 for (i
= 0; i
< nc
; i
++) {
408 for (l
= 0; l
< n
; l
++) {
417 } else if (hist
[l
].key
== key
) {
433 for (j
= 0; j
< lenh
; j
++) {
434 for (i
= 0; i
< nc
; i
++) {
435 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
438 for (; j
< nv
; j
++) {
439 for (i
= 0; i
< nc
; i
++) {
440 vec
[j
][i
] = vec
[0][i
];
446 for (j
= 0; j
< nv
; j
++) {
447 for (i
= 0; i
< nc
; i
++) {
448 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
458 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
459 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
461 /* Use the generalized lloyd's algorithm for VQ:
462 * find 4 color vectors.
464 * for each sample color
465 * sort to nearest vector.
467 * replace each vector with the centroid of its matching colors.
469 * repeat until RMS doesn't improve.
471 * if a color vector has no samples, or becomes the same as another
472 * vector, replace it with the color which is farthest from a sample.
474 * vec[][MAX_COMP] initial vectors and resulting colors
475 * nv number of resulting colors required
476 * input[N_TEXELS][MAX_COMP] input texels
477 * nc number of components in input / vec
478 * n number of input samples
481 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
482 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
483 GLfloat error
, lasterror
= 1e9
;
488 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
489 /* reset sums & counters */
490 for (j
= 0; j
< nv
; j
++) {
491 for (i
= 0; i
< nc
; i
++) {
498 /* scan whole block */
499 for (k
= 0; k
< n
; k
++) {
502 GLfloat err
= 1e9
; /* big enough */
503 /* determine best vector */
504 for (j
= 0; j
< nv
; j
++) {
505 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
506 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
507 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
509 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
517 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
520 /* add in closest color */
521 for (i
= 0; i
< nc
; i
++) {
522 sum
[best
][i
] += input
[k
][i
];
524 /* mark this vector as used */
526 /* accumulate error */
531 if ((error
< LL_RMS_E
) ||
532 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
533 return !0; /* good match */
537 /* move each vector to the barycenter of its closest colors */
538 for (j
= 0; j
< nv
; j
++) {
540 GLfloat div
= 1.0F
/ cnt
[j
];
541 for (i
= 0; i
< nc
; i
++) {
542 vec
[j
][i
] = div
* sum
[j
][i
];
545 /* this vec has no samples or is identical with a previous vec */
546 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
547 for (i
= 0; i
< nc
; i
++) {
548 vec
[j
][i
] = input
[worst
][i
];
554 return 0; /* could not converge fast enough */
559 fxt1_quantize_CHROMA (GLuint
*cc
,
560 GLubyte input
[N_TEXELS
][MAX_COMP
])
562 const GLint n_vect
= 4; /* 4 base vectors to find */
563 const GLint n_comp
= 3; /* 3 components: R, G, B */
564 GLfloat vec
[MAX_VECT
][MAX_COMP
];
566 Fx64 hi
; /* high quadword */
567 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
569 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
570 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
573 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
574 for (j
= n_vect
- 1; j
>= 0; j
--) {
575 for (i
= 0; i
< n_comp
; i
++) {
578 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
581 ((Fx64
*)cc
)[1] = hi
;
584 /* right microtile */
585 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
587 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
590 for (; k
>= 0; k
--) {
592 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
600 fxt1_quantize_ALPHA0 (GLuint
*cc
,
601 GLubyte input
[N_TEXELS
][MAX_COMP
],
602 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
604 const GLint n_vect
= 3; /* 3 base vectors to find */
605 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
606 GLfloat vec
[MAX_VECT
][MAX_COMP
];
608 Fx64 hi
; /* high quadword */
609 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
611 /* the last vector indicates zero */
612 for (i
= 0; i
< n_comp
; i
++) {
616 /* the first n texels in reord are guaranteed to be non-zero */
617 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
618 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
621 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
622 for (j
= n_vect
- 1; j
>= 0; j
--) {
625 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
627 for (j
= n_vect
- 1; j
>= 0; j
--) {
628 for (i
= 0; i
< n_comp
- 1; i
++) {
631 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
634 ((Fx64
*)cc
)[1] = hi
;
637 /* right microtile */
638 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
640 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
643 for (; k
>= 0; k
--) {
645 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
653 fxt1_quantize_ALPHA1 (GLuint
*cc
,
654 GLubyte input
[N_TEXELS
][MAX_COMP
])
656 const GLint n_vect
= 3; /* highest vector number in each microtile */
657 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
658 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
659 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
661 Fx64 hi
; /* high quadword */
662 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
666 GLint minColL
= 0, maxColL
= 0;
667 GLint minColR
= 0, maxColR
= 0;
668 GLint sumL
= 0, sumR
= 0;
670 /* Our solution here is to find the darkest and brightest colors in
671 * the 4x4 tile and use those as the two representative colors.
672 * There are probably better algorithms to use (histogram-based).
675 while ((minColL
== maxColL
) && nn_comp
) {
676 minSum
= 2000; /* big enough */
677 maxSum
= -1; /* small enough */
678 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
680 for (i
= 0; i
< nn_comp
; i
++) {
698 while ((minColR
== maxColR
) && nn_comp
) {
699 minSum
= 2000; /* big enough */
700 maxSum
= -1; /* small enough */
701 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
703 for (i
= 0; i
< nn_comp
; i
++) {
720 /* choose the common vector (yuck!) */
723 GLint v1
= 0, v2
= 0;
724 GLfloat err
= 1e9
; /* big enough */
725 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
726 for (i
= 0; i
< n_comp
; i
++) {
727 tv
[0][i
] = input
[minColL
][i
];
728 tv
[1][i
] = input
[maxColL
][i
];
729 tv
[2][i
] = input
[minColR
][i
];
730 tv
[3][i
] = input
[maxColR
][i
];
732 for (j1
= 0; j1
< 2; j1
++) {
733 for (j2
= 2; j2
< 4; j2
++) {
735 for (i
= 0; i
< n_comp
; i
++) {
736 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
745 for (i
= 0; i
< n_comp
; i
++) {
746 vec
[0][i
] = tv
[1 - v1
][i
];
747 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
748 vec
[2][i
] = tv
[5 - v2
][i
];
754 if (minColL
!= maxColL
) {
755 /* compute interpolation vector */
756 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
760 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
762 /* interpolate color */
763 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
772 /* right microtile */
774 if (minColR
!= maxColR
) {
775 /* compute interpolation vector */
776 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
780 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
782 /* interpolate color */
783 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
792 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
793 for (j
= n_vect
- 1; j
>= 0; j
--) {
796 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
798 for (j
= n_vect
- 1; j
>= 0; j
--) {
799 for (i
= 0; i
< n_comp
- 1; i
++) {
802 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
805 ((Fx64
*)cc
)[1] = hi
;
810 fxt1_quantize_HI (GLuint
*cc
,
811 GLubyte input
[N_TEXELS
][MAX_COMP
],
812 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
814 const GLint n_vect
= 6; /* highest vector number */
815 const GLint n_comp
= 3; /* 3 components: R, G, B */
816 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
817 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
819 GLuint hihi
; /* high quadword: hi dword */
821 GLint minSum
= 2000; /* big enough */
822 GLint maxSum
= -1; /* small enough */
823 GLint minCol
= 0; /* phoudoin: silent compiler! */
824 GLint maxCol
= 0; /* phoudoin: silent compiler! */
826 /* Our solution here is to find the darkest and brightest colors in
827 * the 8x4 tile and use those as the two representative colors.
828 * There are probably better algorithms to use (histogram-based).
830 for (k
= 0; k
< n
; k
++) {
832 for (i
= 0; i
< n_comp
; i
++) {
845 hihi
= 0; /* cc-hi = "00" */
846 for (i
= 0; i
< n_comp
; i
++) {
849 hihi
|= reord
[maxCol
][i
] >> 3;
851 for (i
= 0; i
< n_comp
; i
++) {
854 hihi
|= reord
[minCol
][i
] >> 3;
857 cc
[0] = cc
[1] = cc
[2] = 0;
859 /* compute interpolation vector */
860 if (minCol
!= maxCol
) {
861 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
865 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
867 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
868 GLint texel
= n_vect
+ 1; /* transparent black */
870 if (!ISTBLACK(input
[k
])) {
871 if (minCol
!= maxCol
) {
872 /* interpolate color */
873 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
875 kk
[0] |= texel
<< (t
& 7);
879 kk
[0] |= texel
<< (t
& 7);
886 fxt1_quantize_MIXED1 (GLuint
*cc
,
887 GLubyte input
[N_TEXELS
][MAX_COMP
])
889 const GLint n_vect
= 2; /* highest vector number in each microtile */
890 const GLint n_comp
= 3; /* 3 components: R, G, B */
891 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
892 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
894 Fx64 hi
; /* high quadword */
895 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
899 GLint minColL
= 0, maxColL
= -1;
900 GLint minColR
= 0, maxColR
= -1;
902 /* Our solution here is to find the darkest and brightest colors in
903 * the 4x4 tile and use those as the two representative colors.
904 * There are probably better algorithms to use (histogram-based).
906 minSum
= 2000; /* big enough */
907 maxSum
= -1; /* small enough */
908 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
909 if (!ISTBLACK(input
[k
])) {
911 for (i
= 0; i
< n_comp
; i
++) {
924 minSum
= 2000; /* big enough */
925 maxSum
= -1; /* small enough */
926 for (; k
< N_TEXELS
; k
++) {
927 if (!ISTBLACK(input
[k
])) {
929 for (i
= 0; i
< n_comp
; i
++) {
945 /* all transparent black */
947 for (i
= 0; i
< n_comp
; i
++) {
953 for (i
= 0; i
< n_comp
; i
++) {
954 vec
[0][i
] = input
[minColL
][i
];
955 vec
[1][i
] = input
[maxColL
][i
];
957 if (minColL
!= maxColL
) {
958 /* compute interpolation vector */
959 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
963 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
964 GLint texel
= n_vect
+ 1; /* transparent black */
965 if (!ISTBLACK(input
[k
])) {
966 /* interpolate color */
967 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
977 /* right microtile */
979 /* all transparent black */
981 for (i
= 0; i
< n_comp
; i
++) {
987 for (i
= 0; i
< n_comp
; i
++) {
988 vec
[2][i
] = input
[minColR
][i
];
989 vec
[3][i
] = input
[maxColR
][i
];
991 if (minColR
!= maxColR
) {
992 /* compute interpolation vector */
993 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
997 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
998 GLint texel
= n_vect
+ 1; /* transparent black */
999 if (!ISTBLACK(input
[k
])) {
1000 /* interpolate color */
1001 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1011 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1012 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1013 for (i
= 0; i
< n_comp
; i
++) {
1016 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1019 ((Fx64
*)cc
)[1] = hi
;
1024 fxt1_quantize_MIXED0 (GLuint
*cc
,
1025 GLubyte input
[N_TEXELS
][MAX_COMP
])
1027 const GLint n_vect
= 3; /* highest vector number in each microtile */
1028 const GLint n_comp
= 3; /* 3 components: R, G, B */
1029 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
1030 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
1032 Fx64 hi
; /* high quadword */
1033 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
1035 GLint minColL
= 0, maxColL
= 0;
1036 GLint minColR
= 0, maxColR
= 0;
1041 /* Our solution here is to find the darkest and brightest colors in
1042 * the 4x4 tile and use those as the two representative colors.
1043 * There are probably better algorithms to use (histogram-based).
1045 minSum
= 2000; /* big enough */
1046 maxSum
= -1; /* small enough */
1047 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1049 for (i
= 0; i
< n_comp
; i
++) {
1061 minSum
= 2000; /* big enough */
1062 maxSum
= -1; /* small enough */
1063 for (; k
< N_TEXELS
; k
++) {
1065 for (i
= 0; i
< n_comp
; i
++) {
1080 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1081 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1083 /* Scan the channel with max variance for lo & hi
1084 * and use those as the two representative colors.
1086 minVal
= 2000; /* big enough */
1087 maxVal
= -1; /* small enough */
1088 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1089 GLint t
= input
[k
][maxVarL
];
1099 minVal
= 2000; /* big enough */
1100 maxVal
= -1; /* small enough */
1101 for (; k
< N_TEXELS
; k
++) {
1102 GLint t
= input
[k
][maxVarR
];
1114 /* left microtile */
1116 for (i
= 0; i
< n_comp
; i
++) {
1117 vec
[0][i
] = input
[minColL
][i
];
1118 vec
[1][i
] = input
[maxColL
][i
];
1120 if (minColL
!= maxColL
) {
1121 /* compute interpolation vector */
1122 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1126 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1128 /* interpolate color */
1129 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1135 /* funky encoding for LSB of green */
1136 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1137 for (i
= 0; i
< n_comp
; i
++) {
1138 vec
[1][i
] = input
[minColL
][i
];
1139 vec
[0][i
] = input
[maxColL
][i
];
1147 /* right microtile */
1149 for (i
= 0; i
< n_comp
; i
++) {
1150 vec
[2][i
] = input
[minColR
][i
];
1151 vec
[3][i
] = input
[maxColR
][i
];
1153 if (minColR
!= maxColR
) {
1154 /* compute interpolation vector */
1155 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1159 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1161 /* interpolate color */
1162 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1168 /* funky encoding for LSB of green */
1169 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1170 for (i
= 0; i
< n_comp
; i
++) {
1171 vec
[3][i
] = input
[minColR
][i
];
1172 vec
[2][i
] = input
[maxColR
][i
];
1180 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1181 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1182 for (i
= 0; i
< n_comp
; i
++) {
1185 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1188 ((Fx64
*)cc
)[1] = hi
;
1193 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1196 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1198 GLubyte input
[N_TEXELS
][MAX_COMP
];
1202 /* make the whole block opaque */
1203 memset(input
, -1, sizeof(input
));
1206 /* 8 texels each line */
1207 for (l
= 0; l
< 4; l
++) {
1208 for (k
= 0; k
< 4; k
++) {
1209 for (i
= 0; i
< comps
; i
++) {
1210 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1213 for (; k
< 8; k
++) {
1214 for (i
= 0; i
< comps
; i
++) {
1215 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1221 * 00, 01, 02, 03, 08, 09, 0a, 0b
1222 * 10, 11, 12, 13, 18, 19, 1a, 1b
1223 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1224 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1228 * stupidity flows forth from this
1233 /* skip all transparent black texels */
1235 for (k
= 0; k
< N_TEXELS
; k
++) {
1236 /* test all components against 0 */
1237 if (!ISTBLACK(input
[k
])) {
1238 /* texel is not transparent black */
1239 COPY_4UBV(reord
[l
], input
[k
]);
1240 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1241 /* non-opaque texel */
1251 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1252 } else if (l
== 0) {
1253 cc
[0] = cc
[1] = cc
[2] = -1;
1255 } else if (l
< N_TEXELS
) {
1256 fxt1_quantize_HI(cc
, input
, reord
, l
);
1258 fxt1_quantize_CHROMA(cc
, input
);
1260 (void)fxt1_quantize_ALPHA1
;
1261 (void)fxt1_quantize_MIXED1
;
1262 (void)fxt1_quantize_MIXED0
;
1265 fxt1_quantize_ALPHA1(cc
, input
);
1266 } else if (l
== 0) {
1267 cc
[0] = cc
[1] = cc
[2] = ~0u;
1269 } else if (l
< N_TEXELS
) {
1270 fxt1_quantize_MIXED1(cc
, input
);
1272 fxt1_quantize_MIXED0(cc
, input
);
1274 (void)fxt1_quantize_ALPHA0
;
1275 (void)fxt1_quantize_HI
;
1276 (void)fxt1_quantize_CHROMA
;
1283 * Upscale an image by replication, not (typical) stretching.
1284 * We use this when the image width or height is less than a
1285 * certain size (4, 8) and we need to upscale an image.
1288 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1289 GLsizei outWidth
, GLsizei outHeight
,
1290 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1295 ASSERT(outWidth
>= inWidth
);
1296 ASSERT(outHeight
>= inHeight
);
1298 ASSERT(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1299 ASSERT((outWidth
& 3) == 0);
1300 ASSERT((outHeight
& 3) == 0);
1303 for (i
= 0; i
< outHeight
; i
++) {
1304 const GLint ii
= i
% inHeight
;
1305 for (j
= 0; j
< outWidth
; j
++) {
1306 const GLint jj
= j
% inWidth
;
1307 for (k
= 0; k
< comps
; k
++) {
1308 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1309 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1317 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1318 const void *source
, GLint srcRowStride
,
1319 void *dest
, GLint destRowStride
)
1322 const GLubyte
*data
;
1323 GLuint
*encoded
= (GLuint
*)dest
;
1324 void *newSource
= NULL
;
1326 assert(comps
== 3 || comps
== 4);
1328 /* Replicate image if width is not M8 or height is not M4 */
1329 if ((width
& 7) | (height
& 3)) {
1330 GLint newWidth
= (width
+ 7) & ~7;
1331 GLint newHeight
= (height
+ 3) & ~3;
1332 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1334 GET_CURRENT_CONTEXT(ctx
);
1335 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1338 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1339 comps
, (const GLubyte
*) source
,
1340 srcRowStride
, (GLubyte
*) newSource
);
1344 srcRowStride
= comps
* newWidth
;
1347 data
= (const GLubyte
*) source
;
1348 destRowStride
= (destRowStride
- width
* 2) / 4;
1349 for (y
= 0; y
< height
; y
+= 4) {
1350 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1351 for (x
= 0; x
< width
; x
+= 8) {
1352 const GLubyte
*lines
[4];
1353 lines
[0] = &data
[offs
];
1354 lines
[1] = lines
[0] + srcRowStride
;
1355 lines
[2] = lines
[1] + srcRowStride
;
1356 lines
[3] = lines
[2] + srcRowStride
;
1358 fxt1_quantize(encoded
, lines
, comps
);
1359 /* 128 bits per 8x4 block */
1362 encoded
+= destRowStride
;
1366 if (newSource
!= NULL
) {
1372 /***************************************************************************\
1375 * The decoder is based on GL_3DFX_texture_compression_FXT1
1376 * specification and serves as a concept for the encoder.
1377 \***************************************************************************/
1380 /* lookup table for scaling 5 bit colors up to 8 bits */
1381 static const GLubyte _rgb_scale_5
[] = {
1382 0, 8, 16, 25, 33, 41, 49, 58,
1383 66, 74, 82, 90, 99, 107, 115, 123,
1384 132, 140, 148, 156, 165, 173, 181, 189,
1385 197, 206, 214, 222, 230, 239, 247, 255
1388 /* lookup table for scaling 6 bit colors up to 8 bits */
1389 static const GLubyte _rgb_scale_6
[] = {
1390 0, 4, 8, 12, 16, 20, 24, 28,
1391 32, 36, 40, 45, 49, 53, 57, 61,
1392 65, 69, 73, 77, 81, 85, 89, 93,
1393 97, 101, 105, 109, 113, 117, 121, 125,
1394 130, 134, 138, 142, 146, 150, 154, 158,
1395 162, 166, 170, 174, 178, 182, 186, 190,
1396 194, 198, 202, 206, 210, 215, 219, 223,
1397 227, 231, 235, 239, 243, 247, 251, 255
1401 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1402 #define UP5(c) _rgb_scale_5[(c) & 31]
1403 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1404 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1408 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1413 cc
= (const GLuint
*)(code
+ t
/ 8);
1414 t
= (cc
[0] >> (t
& 7)) & 7;
1417 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1420 cc
= (const GLuint
*)(code
+ 12);
1422 b
= UP5(CC_SEL(cc
, 0));
1423 g
= UP5(CC_SEL(cc
, 5));
1424 r
= UP5(CC_SEL(cc
, 10));
1425 } else if (t
== 6) {
1426 b
= UP5(CC_SEL(cc
, 15));
1427 g
= UP5(CC_SEL(cc
, 20));
1428 r
= UP5(CC_SEL(cc
, 25));
1430 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1431 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1432 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1443 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1448 cc
= (const GLuint
*)code
;
1453 t
= (cc
[0] >> (t
* 2)) & 3;
1456 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1457 kk
= cc
[0] >> (t
& 7);
1458 rgba
[BCOMP
] = UP5(kk
);
1459 rgba
[GCOMP
] = UP5(kk
>> 5);
1460 rgba
[RCOMP
] = UP5(kk
>> 10);
1466 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1472 cc
= (const GLuint
*)code
;
1475 t
= (cc
[1] >> (t
* 2)) & 3;
1477 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1478 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1479 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1481 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1482 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1483 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1484 glsb
= CC_SEL(cc
, 126);
1485 selb
= CC_SEL(cc
, 33);
1487 t
= (cc
[0] >> (t
* 2)) & 3;
1489 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1490 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1491 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1493 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1494 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1495 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1496 glsb
= CC_SEL(cc
, 125);
1497 selb
= CC_SEL(cc
, 1);
1500 if (CC_SEL(cc
, 124) & 1) {
1505 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1509 b
= UP5(col
[0][BCOMP
]);
1510 g
= UP5(col
[0][GCOMP
]);
1511 r
= UP5(col
[0][RCOMP
]);
1512 } else if (t
== 2) {
1513 b
= UP5(col
[1][BCOMP
]);
1514 g
= UP6(col
[1][GCOMP
], glsb
);
1515 r
= UP5(col
[1][RCOMP
]);
1517 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1518 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1519 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1530 b
= UP5(col
[0][BCOMP
]);
1531 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1532 r
= UP5(col
[0][RCOMP
]);
1533 } else if (t
== 3) {
1534 b
= UP5(col
[1][BCOMP
]);
1535 g
= UP6(col
[1][GCOMP
], glsb
);
1536 r
= UP5(col
[1][RCOMP
]);
1538 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1539 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1540 UP6(col
[1][GCOMP
], glsb
));
1541 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1552 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1557 cc
= (const GLuint
*)code
;
1558 if (CC_SEL(cc
, 124) & 1) {
1564 t
= (cc
[1] >> (t
* 2)) & 3;
1566 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1567 col0
[GCOMP
] = CC_SEL(cc
, 99);
1568 col0
[RCOMP
] = CC_SEL(cc
, 104);
1569 col0
[ACOMP
] = CC_SEL(cc
, 119);
1571 t
= (cc
[0] >> (t
* 2)) & 3;
1573 col0
[BCOMP
] = CC_SEL(cc
, 64);
1574 col0
[GCOMP
] = CC_SEL(cc
, 69);
1575 col0
[RCOMP
] = CC_SEL(cc
, 74);
1576 col0
[ACOMP
] = CC_SEL(cc
, 109);
1580 b
= UP5(col0
[BCOMP
]);
1581 g
= UP5(col0
[GCOMP
]);
1582 r
= UP5(col0
[RCOMP
]);
1583 a
= UP5(col0
[ACOMP
]);
1584 } else if (t
== 3) {
1585 b
= UP5(CC_SEL(cc
, 79));
1586 g
= UP5(CC_SEL(cc
, 84));
1587 r
= UP5(CC_SEL(cc
, 89));
1588 a
= UP5(CC_SEL(cc
, 114));
1590 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1591 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1592 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1593 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1602 t
= (cc
[0] >> (t
* 2)) & 3;
1609 cc
= (const GLuint
*)code
;
1610 a
= UP5(cc
[3] >> (t
* 5 + 13));
1612 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1613 kk
= cc
[0] >> (t
& 7);
1627 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1628 GLint i
, GLint j
, GLubyte
*rgba
)
1630 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1631 fxt1_decode_1HI
, /* cc-high = "00?" */
1632 fxt1_decode_1HI
, /* cc-high = "00?" */
1633 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1634 fxt1_decode_1ALPHA
, /* alpha = "011" */
1635 fxt1_decode_1MIXED
, /* mixed = "1??" */
1636 fxt1_decode_1MIXED
, /* mixed = "1??" */
1637 fxt1_decode_1MIXED
, /* mixed = "1??" */
1638 fxt1_decode_1MIXED
/* mixed = "1??" */
1641 const GLubyte
*code
= (const GLubyte
*)texture
+
1642 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1643 GLint mode
= CC_SEL(code
, 125);
1651 decode_1
[mode
](code
, t
, rgba
);
1655 #endif /* FEATURE_texture_fxt1 */