2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
28 * \file texcompress_fxt1.c
29 * GL_3DFX_texture_compression_FXT1 support.
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
45 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
46 const void *source
, GLint srcRowStride
,
47 void *dest
, GLint destRowStride
);
50 fxt1_decode_1 (const void *texture
, GLint stride
,
51 GLint i
, GLint j
, GLubyte
*rgba
);
55 * Store user's image in rgb_fxt1 format.
58 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
60 const GLubyte
*pixels
;
63 const GLubyte
*tempImage
= NULL
;
65 ASSERT(dstFormat
== MESA_FORMAT_RGB_FXT1
);
67 if (srcFormat
!= GL_RGB
||
68 srcType
!= GL_UNSIGNED_BYTE
||
69 ctx
->_ImageTransferState
||
70 srcPacking
->RowLength
!= srcWidth
||
71 srcPacking
->SwapBytes
) {
72 /* convert image to RGB/GLubyte */
73 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
75 _mesa_get_format_base_format(dstFormat
),
76 srcWidth
, srcHeight
, srcDepth
,
77 srcFormat
, srcType
, srcAddr
,
80 return GL_FALSE
; /* out of memory */
82 srcRowStride
= 3 * srcWidth
;
86 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
87 srcFormat
, srcType
, 0, 0);
89 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
90 srcType
) / sizeof(GLubyte
);
95 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
98 free((void*) tempImage
);
105 * Store user's image in rgba_fxt1 format.
108 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
110 const GLubyte
*pixels
;
113 const GLubyte
*tempImage
= NULL
;
115 ASSERT(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
117 if (srcFormat
!= GL_RGBA
||
118 srcType
!= GL_UNSIGNED_BYTE
||
119 ctx
->_ImageTransferState
||
120 srcPacking
->SwapBytes
) {
121 /* convert image to RGBA/GLubyte */
122 tempImage
= _mesa_make_temp_ubyte_image(ctx
, dims
,
124 _mesa_get_format_base_format(dstFormat
),
125 srcWidth
, srcHeight
, srcDepth
,
126 srcFormat
, srcType
, srcAddr
,
129 return GL_FALSE
; /* out of memory */
131 srcRowStride
= 4 * srcWidth
;
135 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
136 srcFormat
, srcType
, 0, 0);
138 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
139 srcType
) / sizeof(GLubyte
);
144 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
147 free((void*) tempImage
);
153 /***************************************************************************\
156 * The encoder was built by reversing the decoder,
157 * and is vaguely based on Texus2 by 3dfx. Note that this code
158 * is merely a proof of concept, since it is highly UNoptimized;
159 * moreover, it is sub-optimal due to initial conditions passed
160 * to Lloyd's algorithm (the interpolation modes are even worse).
161 \***************************************************************************/
164 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
165 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
166 #define N_TEXELS 32 /* number of texels in a block (always 32) */
167 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
168 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
169 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
170 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
171 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
175 * Define a 64-bit unsigned integer type and macros
179 #define FX64_NATIVE 1
181 typedef uint64_t Fx64
;
183 #define FX64_MOV32(a, b) a = b
184 #define FX64_OR32(a, b) a |= b
185 #define FX64_SHL(a, c) a <<= c
189 #define FX64_NATIVE 0
195 #define FX64_MOV32(a, b) a.lo = b
196 #define FX64_OR32(a, b) a.lo |= b
198 #define FX64_SHL(a, c) \
201 a.hi = a.lo << ((c) - 32); \
204 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
212 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
213 #define SAFECDOT 1 /* for paranoids */
215 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
217 /* compute interpolation vector */ \
221 for (i = 0; i < NC; i++) { \
222 IV[i] = (V1[i] - V0[i]) * F(i); \
223 d2 += IV[i] * IV[i]; \
225 rd2 = (GLfloat)NV / d2; \
227 for (i = 0; i < NC; i++) { \
229 B -= IV[i] * V0[i]; \
232 B = B * rd2 + 0.5f; \
235 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
237 GLfloat dot = 0.0F; \
238 for (i = 0; i < NC; i++) { \
239 dot += V[i] * IV[i]; \
241 TEXEL = (GLint)(dot + B); \
245 } else if (TEXEL > NV) { \
253 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
254 GLubyte input
[MAX_COMP
], GLint nc
)
256 GLint i
, j
, best
= -1;
257 GLfloat err
= 1e9
; /* big enough */
259 for (j
= 0; j
< nv
; j
++) {
261 for (i
= 0; i
< nc
; i
++) {
262 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
275 fxt1_worst (GLfloat vec
[MAX_COMP
],
276 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
278 GLint i
, k
, worst
= -1;
279 GLfloat err
= -1.0F
; /* small enough */
281 for (k
= 0; k
< n
; k
++) {
283 for (i
= 0; i
< nc
; i
++) {
284 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
297 fxt1_variance (GLdouble variance
[MAX_COMP
],
298 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
300 GLint i
, k
, best
= 0;
302 GLdouble var
, maxvar
= -1; /* small enough */
303 GLdouble teenth
= 1.0 / n
;
305 for (i
= 0; i
< nc
; i
++) {
307 for (k
= 0; k
< n
; k
++) {
308 GLint t
= input
[k
][i
];
312 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
327 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
328 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
331 /* Choose colors from a grid.
335 for (j
= 0; j
< nv
; j
++) {
336 GLint m
= j
* (n
- 1) / (nv
- 1);
337 for (i
= 0; i
< nc
; i
++) {
338 vec
[j
][i
] = input
[m
][i
];
342 /* Our solution here is to find the darkest and brightest colors in
343 * the 8x4 tile and use those as the two representative colors.
344 * There are probably better algorithms to use (histogram-based).
347 GLint minSum
= 2000; /* big enough */
348 GLint maxSum
= -1; /* small enough */
349 GLint minCol
= 0; /* phoudoin: silent compiler! */
350 GLint maxCol
= 0; /* phoudoin: silent compiler! */
360 memset(hist
, 0, sizeof(hist
));
362 for (k
= 0; k
< n
; k
++) {
366 for (i
= 0; i
< nc
; i
++) {
371 for (l
= 0; l
< n
; l
++) {
380 } else if (hist
[l
].key
== key
) {
396 for (j
= 0; j
< lenh
; j
++) {
397 for (i
= 0; i
< nc
; i
++) {
398 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
401 for (; j
< nv
; j
++) {
402 for (i
= 0; i
< nc
; i
++) {
403 vec
[j
][i
] = vec
[0][i
];
409 for (j
= 0; j
< nv
; j
++) {
410 for (i
= 0; i
< nc
; i
++) {
411 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
421 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
422 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
424 /* Use the generalized lloyd's algorithm for VQ:
425 * find 4 color vectors.
427 * for each sample color
428 * sort to nearest vector.
430 * replace each vector with the centroid of its matching colors.
432 * repeat until RMS doesn't improve.
434 * if a color vector has no samples, or becomes the same as another
435 * vector, replace it with the color which is farthest from a sample.
437 * vec[][MAX_COMP] initial vectors and resulting colors
438 * nv number of resulting colors required
439 * input[N_TEXELS][MAX_COMP] input texels
440 * nc number of components in input / vec
441 * n number of input samples
444 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
445 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
446 GLfloat error
, lasterror
= 1e9
;
451 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
452 /* reset sums & counters */
453 for (j
= 0; j
< nv
; j
++) {
454 for (i
= 0; i
< nc
; i
++) {
461 /* scan whole block */
462 for (k
= 0; k
< n
; k
++) {
465 GLfloat err
= 1e9
; /* big enough */
466 /* determine best vector */
467 for (j
= 0; j
< nv
; j
++) {
468 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
469 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
470 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
472 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
480 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
483 /* add in closest color */
484 for (i
= 0; i
< nc
; i
++) {
485 sum
[best
][i
] += input
[k
][i
];
487 /* mark this vector as used */
489 /* accumulate error */
494 if ((error
< LL_RMS_E
) ||
495 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
496 return !0; /* good match */
500 /* move each vector to the barycenter of its closest colors */
501 for (j
= 0; j
< nv
; j
++) {
503 GLfloat div
= 1.0F
/ cnt
[j
];
504 for (i
= 0; i
< nc
; i
++) {
505 vec
[j
][i
] = div
* sum
[j
][i
];
508 /* this vec has no samples or is identical with a previous vec */
509 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
510 for (i
= 0; i
< nc
; i
++) {
511 vec
[j
][i
] = input
[worst
][i
];
517 return 0; /* could not converge fast enough */
522 fxt1_quantize_CHROMA (GLuint
*cc
,
523 GLubyte input
[N_TEXELS
][MAX_COMP
])
525 const GLint n_vect
= 4; /* 4 base vectors to find */
526 const GLint n_comp
= 3; /* 3 components: R, G, B */
527 GLfloat vec
[MAX_VECT
][MAX_COMP
];
529 Fx64 hi
; /* high quadword */
530 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
532 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
533 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
536 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
537 for (j
= n_vect
- 1; j
>= 0; j
--) {
538 for (i
= 0; i
< n_comp
; i
++) {
541 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
544 ((Fx64
*)cc
)[1] = hi
;
547 /* right microtile */
548 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
550 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
553 for (; k
>= 0; k
--) {
555 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
563 fxt1_quantize_ALPHA0 (GLuint
*cc
,
564 GLubyte input
[N_TEXELS
][MAX_COMP
],
565 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
567 const GLint n_vect
= 3; /* 3 base vectors to find */
568 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
569 GLfloat vec
[MAX_VECT
][MAX_COMP
];
571 Fx64 hi
; /* high quadword */
572 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
574 /* the last vector indicates zero */
575 for (i
= 0; i
< n_comp
; i
++) {
579 /* the first n texels in reord are guaranteed to be non-zero */
580 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
581 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
584 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
585 for (j
= n_vect
- 1; j
>= 0; j
--) {
588 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
590 for (j
= n_vect
- 1; j
>= 0; j
--) {
591 for (i
= 0; i
< n_comp
- 1; i
++) {
594 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
597 ((Fx64
*)cc
)[1] = hi
;
600 /* right microtile */
601 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
603 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
606 for (; k
>= 0; k
--) {
608 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
616 fxt1_quantize_ALPHA1 (GLuint
*cc
,
617 GLubyte input
[N_TEXELS
][MAX_COMP
])
619 const GLint n_vect
= 3; /* highest vector number in each microtile */
620 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
621 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
622 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
624 Fx64 hi
; /* high quadword */
625 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
629 GLint minColL
= 0, maxColL
= 0;
630 GLint minColR
= 0, maxColR
= 0;
631 GLint sumL
= 0, sumR
= 0;
633 /* Our solution here is to find the darkest and brightest colors in
634 * the 4x4 tile and use those as the two representative colors.
635 * There are probably better algorithms to use (histogram-based).
638 while ((minColL
== maxColL
) && nn_comp
) {
639 minSum
= 2000; /* big enough */
640 maxSum
= -1; /* small enough */
641 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
643 for (i
= 0; i
< nn_comp
; i
++) {
661 while ((minColR
== maxColR
) && nn_comp
) {
662 minSum
= 2000; /* big enough */
663 maxSum
= -1; /* small enough */
664 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
666 for (i
= 0; i
< nn_comp
; i
++) {
683 /* choose the common vector (yuck!) */
686 GLint v1
= 0, v2
= 0;
687 GLfloat err
= 1e9
; /* big enough */
688 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
689 for (i
= 0; i
< n_comp
; i
++) {
690 tv
[0][i
] = input
[minColL
][i
];
691 tv
[1][i
] = input
[maxColL
][i
];
692 tv
[2][i
] = input
[minColR
][i
];
693 tv
[3][i
] = input
[maxColR
][i
];
695 for (j1
= 0; j1
< 2; j1
++) {
696 for (j2
= 2; j2
< 4; j2
++) {
698 for (i
= 0; i
< n_comp
; i
++) {
699 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
708 for (i
= 0; i
< n_comp
; i
++) {
709 vec
[0][i
] = tv
[1 - v1
][i
];
710 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
711 vec
[2][i
] = tv
[5 - v2
][i
];
717 if (minColL
!= maxColL
) {
718 /* compute interpolation vector */
719 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
723 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
725 /* interpolate color */
726 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
735 /* right microtile */
737 if (minColR
!= maxColR
) {
738 /* compute interpolation vector */
739 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
743 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
745 /* interpolate color */
746 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
755 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
756 for (j
= n_vect
- 1; j
>= 0; j
--) {
759 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
761 for (j
= n_vect
- 1; j
>= 0; j
--) {
762 for (i
= 0; i
< n_comp
- 1; i
++) {
765 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
768 ((Fx64
*)cc
)[1] = hi
;
773 fxt1_quantize_HI (GLuint
*cc
,
774 GLubyte input
[N_TEXELS
][MAX_COMP
],
775 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
777 const GLint n_vect
= 6; /* highest vector number */
778 const GLint n_comp
= 3; /* 3 components: R, G, B */
779 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
780 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
782 GLuint hihi
; /* high quadword: hi dword */
784 GLint minSum
= 2000; /* big enough */
785 GLint maxSum
= -1; /* small enough */
786 GLint minCol
= 0; /* phoudoin: silent compiler! */
787 GLint maxCol
= 0; /* phoudoin: silent compiler! */
789 /* Our solution here is to find the darkest and brightest colors in
790 * the 8x4 tile and use those as the two representative colors.
791 * There are probably better algorithms to use (histogram-based).
793 for (k
= 0; k
< n
; k
++) {
795 for (i
= 0; i
< n_comp
; i
++) {
808 hihi
= 0; /* cc-hi = "00" */
809 for (i
= 0; i
< n_comp
; i
++) {
812 hihi
|= reord
[maxCol
][i
] >> 3;
814 for (i
= 0; i
< n_comp
; i
++) {
817 hihi
|= reord
[minCol
][i
] >> 3;
820 cc
[0] = cc
[1] = cc
[2] = 0;
822 /* compute interpolation vector */
823 if (minCol
!= maxCol
) {
824 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
828 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
830 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
831 GLint texel
= n_vect
+ 1; /* transparent black */
833 if (!ISTBLACK(input
[k
])) {
834 if (minCol
!= maxCol
) {
835 /* interpolate color */
836 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
838 kk
[0] |= texel
<< (t
& 7);
842 kk
[0] |= texel
<< (t
& 7);
849 fxt1_quantize_MIXED1 (GLuint
*cc
,
850 GLubyte input
[N_TEXELS
][MAX_COMP
])
852 const GLint n_vect
= 2; /* highest vector number in each microtile */
853 const GLint n_comp
= 3; /* 3 components: R, G, B */
854 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
855 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
857 Fx64 hi
; /* high quadword */
858 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
862 GLint minColL
= 0, maxColL
= -1;
863 GLint minColR
= 0, maxColR
= -1;
865 /* Our solution here is to find the darkest and brightest colors in
866 * the 4x4 tile and use those as the two representative colors.
867 * There are probably better algorithms to use (histogram-based).
869 minSum
= 2000; /* big enough */
870 maxSum
= -1; /* small enough */
871 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
872 if (!ISTBLACK(input
[k
])) {
874 for (i
= 0; i
< n_comp
; i
++) {
887 minSum
= 2000; /* big enough */
888 maxSum
= -1; /* small enough */
889 for (; k
< N_TEXELS
; k
++) {
890 if (!ISTBLACK(input
[k
])) {
892 for (i
= 0; i
< n_comp
; i
++) {
908 /* all transparent black */
910 for (i
= 0; i
< n_comp
; i
++) {
916 for (i
= 0; i
< n_comp
; i
++) {
917 vec
[0][i
] = input
[minColL
][i
];
918 vec
[1][i
] = input
[maxColL
][i
];
920 if (minColL
!= maxColL
) {
921 /* compute interpolation vector */
922 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
926 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
927 GLint texel
= n_vect
+ 1; /* transparent black */
928 if (!ISTBLACK(input
[k
])) {
929 /* interpolate color */
930 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
940 /* right microtile */
942 /* all transparent black */
944 for (i
= 0; i
< n_comp
; i
++) {
950 for (i
= 0; i
< n_comp
; i
++) {
951 vec
[2][i
] = input
[minColR
][i
];
952 vec
[3][i
] = input
[maxColR
][i
];
954 if (minColR
!= maxColR
) {
955 /* compute interpolation vector */
956 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
960 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
961 GLint texel
= n_vect
+ 1; /* transparent black */
962 if (!ISTBLACK(input
[k
])) {
963 /* interpolate color */
964 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
974 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
975 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
976 for (i
= 0; i
< n_comp
; i
++) {
979 FX64_OR32(hi
, vec
[j
][i
] >> 3);
982 ((Fx64
*)cc
)[1] = hi
;
987 fxt1_quantize_MIXED0 (GLuint
*cc
,
988 GLubyte input
[N_TEXELS
][MAX_COMP
])
990 const GLint n_vect
= 3; /* highest vector number in each microtile */
991 const GLint n_comp
= 3; /* 3 components: R, G, B */
992 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
993 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
995 Fx64 hi
; /* high quadword */
996 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
998 GLint minColL
= 0, maxColL
= 0;
999 GLint minColR
= 0, maxColR
= 0;
1004 /* Our solution here is to find the darkest and brightest colors in
1005 * the 4x4 tile and use those as the two representative colors.
1006 * There are probably better algorithms to use (histogram-based).
1008 minSum
= 2000; /* big enough */
1009 maxSum
= -1; /* small enough */
1010 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1012 for (i
= 0; i
< n_comp
; i
++) {
1024 minSum
= 2000; /* big enough */
1025 maxSum
= -1; /* small enough */
1026 for (; k
< N_TEXELS
; k
++) {
1028 for (i
= 0; i
< n_comp
; i
++) {
1043 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1044 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1046 /* Scan the channel with max variance for lo & hi
1047 * and use those as the two representative colors.
1049 minVal
= 2000; /* big enough */
1050 maxVal
= -1; /* small enough */
1051 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1052 GLint t
= input
[k
][maxVarL
];
1062 minVal
= 2000; /* big enough */
1063 maxVal
= -1; /* small enough */
1064 for (; k
< N_TEXELS
; k
++) {
1065 GLint t
= input
[k
][maxVarR
];
1077 /* left microtile */
1079 for (i
= 0; i
< n_comp
; i
++) {
1080 vec
[0][i
] = input
[minColL
][i
];
1081 vec
[1][i
] = input
[maxColL
][i
];
1083 if (minColL
!= maxColL
) {
1084 /* compute interpolation vector */
1085 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1089 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1091 /* interpolate color */
1092 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1098 /* funky encoding for LSB of green */
1099 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1100 for (i
= 0; i
< n_comp
; i
++) {
1101 vec
[1][i
] = input
[minColL
][i
];
1102 vec
[0][i
] = input
[maxColL
][i
];
1110 /* right microtile */
1112 for (i
= 0; i
< n_comp
; i
++) {
1113 vec
[2][i
] = input
[minColR
][i
];
1114 vec
[3][i
] = input
[maxColR
][i
];
1116 if (minColR
!= maxColR
) {
1117 /* compute interpolation vector */
1118 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1122 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1124 /* interpolate color */
1125 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1131 /* funky encoding for LSB of green */
1132 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1133 for (i
= 0; i
< n_comp
; i
++) {
1134 vec
[3][i
] = input
[minColR
][i
];
1135 vec
[2][i
] = input
[maxColR
][i
];
1143 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1144 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1145 for (i
= 0; i
< n_comp
; i
++) {
1148 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1151 ((Fx64
*)cc
)[1] = hi
;
1156 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1159 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1161 GLubyte input
[N_TEXELS
][MAX_COMP
];
1165 /* make the whole block opaque */
1166 memset(input
, -1, sizeof(input
));
1169 /* 8 texels each line */
1170 for (l
= 0; l
< 4; l
++) {
1171 for (k
= 0; k
< 4; k
++) {
1172 for (i
= 0; i
< comps
; i
++) {
1173 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1176 for (; k
< 8; k
++) {
1177 for (i
= 0; i
< comps
; i
++) {
1178 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1184 * 00, 01, 02, 03, 08, 09, 0a, 0b
1185 * 10, 11, 12, 13, 18, 19, 1a, 1b
1186 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1187 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1191 * stupidity flows forth from this
1196 /* skip all transparent black texels */
1198 for (k
= 0; k
< N_TEXELS
; k
++) {
1199 /* test all components against 0 */
1200 if (!ISTBLACK(input
[k
])) {
1201 /* texel is not transparent black */
1202 COPY_4UBV(reord
[l
], input
[k
]);
1203 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1204 /* non-opaque texel */
1214 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1215 } else if (l
== 0) {
1216 cc
[0] = cc
[1] = cc
[2] = -1;
1218 } else if (l
< N_TEXELS
) {
1219 fxt1_quantize_HI(cc
, input
, reord
, l
);
1221 fxt1_quantize_CHROMA(cc
, input
);
1223 (void)fxt1_quantize_ALPHA1
;
1224 (void)fxt1_quantize_MIXED1
;
1225 (void)fxt1_quantize_MIXED0
;
1228 fxt1_quantize_ALPHA1(cc
, input
);
1229 } else if (l
== 0) {
1230 cc
[0] = cc
[1] = cc
[2] = ~0u;
1232 } else if (l
< N_TEXELS
) {
1233 fxt1_quantize_MIXED1(cc
, input
);
1235 fxt1_quantize_MIXED0(cc
, input
);
1237 (void)fxt1_quantize_ALPHA0
;
1238 (void)fxt1_quantize_HI
;
1239 (void)fxt1_quantize_CHROMA
;
1246 * Upscale an image by replication, not (typical) stretching.
1247 * We use this when the image width or height is less than a
1248 * certain size (4, 8) and we need to upscale an image.
1251 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1252 GLsizei outWidth
, GLsizei outHeight
,
1253 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1258 ASSERT(outWidth
>= inWidth
);
1259 ASSERT(outHeight
>= inHeight
);
1261 ASSERT(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1262 ASSERT((outWidth
& 3) == 0);
1263 ASSERT((outHeight
& 3) == 0);
1266 for (i
= 0; i
< outHeight
; i
++) {
1267 const GLint ii
= i
% inHeight
;
1268 for (j
= 0; j
< outWidth
; j
++) {
1269 const GLint jj
= j
% inWidth
;
1270 for (k
= 0; k
< comps
; k
++) {
1271 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1272 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1280 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1281 const void *source
, GLint srcRowStride
,
1282 void *dest
, GLint destRowStride
)
1285 const GLubyte
*data
;
1286 GLuint
*encoded
= (GLuint
*)dest
;
1287 void *newSource
= NULL
;
1289 assert(comps
== 3 || comps
== 4);
1291 /* Replicate image if width is not M8 or height is not M4 */
1292 if ((width
& 7) | (height
& 3)) {
1293 GLint newWidth
= (width
+ 7) & ~7;
1294 GLint newHeight
= (height
+ 3) & ~3;
1295 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1297 GET_CURRENT_CONTEXT(ctx
);
1298 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1301 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1302 comps
, (const GLubyte
*) source
,
1303 srcRowStride
, (GLubyte
*) newSource
);
1307 srcRowStride
= comps
* newWidth
;
1310 data
= (const GLubyte
*) source
;
1311 destRowStride
= (destRowStride
- width
* 2) / 4;
1312 for (y
= 0; y
< height
; y
+= 4) {
1313 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1314 for (x
= 0; x
< width
; x
+= 8) {
1315 const GLubyte
*lines
[4];
1316 lines
[0] = &data
[offs
];
1317 lines
[1] = lines
[0] + srcRowStride
;
1318 lines
[2] = lines
[1] + srcRowStride
;
1319 lines
[3] = lines
[2] + srcRowStride
;
1321 fxt1_quantize(encoded
, lines
, comps
);
1322 /* 128 bits per 8x4 block */
1325 encoded
+= destRowStride
;
1333 /***************************************************************************\
1336 * The decoder is based on GL_3DFX_texture_compression_FXT1
1337 * specification and serves as a concept for the encoder.
1338 \***************************************************************************/
1341 /* lookup table for scaling 5 bit colors up to 8 bits */
1342 static const GLubyte _rgb_scale_5
[] = {
1343 0, 8, 16, 25, 33, 41, 49, 58,
1344 66, 74, 82, 90, 99, 107, 115, 123,
1345 132, 140, 148, 156, 165, 173, 181, 189,
1346 197, 206, 214, 222, 230, 239, 247, 255
1349 /* lookup table for scaling 6 bit colors up to 8 bits */
1350 static const GLubyte _rgb_scale_6
[] = {
1351 0, 4, 8, 12, 16, 20, 24, 28,
1352 32, 36, 40, 45, 49, 53, 57, 61,
1353 65, 69, 73, 77, 81, 85, 89, 93,
1354 97, 101, 105, 109, 113, 117, 121, 125,
1355 130, 134, 138, 142, 146, 150, 154, 158,
1356 162, 166, 170, 174, 178, 182, 186, 190,
1357 194, 198, 202, 206, 210, 215, 219, 223,
1358 227, 231, 235, 239, 243, 247, 251, 255
1362 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1363 #define UP5(c) _rgb_scale_5[(c) & 31]
1364 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1365 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1369 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1374 cc
= (const GLuint
*)(code
+ t
/ 8);
1375 t
= (cc
[0] >> (t
& 7)) & 7;
1378 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1381 cc
= (const GLuint
*)(code
+ 12);
1383 b
= UP5(CC_SEL(cc
, 0));
1384 g
= UP5(CC_SEL(cc
, 5));
1385 r
= UP5(CC_SEL(cc
, 10));
1386 } else if (t
== 6) {
1387 b
= UP5(CC_SEL(cc
, 15));
1388 g
= UP5(CC_SEL(cc
, 20));
1389 r
= UP5(CC_SEL(cc
, 25));
1391 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1392 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1393 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1404 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1409 cc
= (const GLuint
*)code
;
1414 t
= (cc
[0] >> (t
* 2)) & 3;
1417 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1418 kk
= cc
[0] >> (t
& 7);
1419 rgba
[BCOMP
] = UP5(kk
);
1420 rgba
[GCOMP
] = UP5(kk
>> 5);
1421 rgba
[RCOMP
] = UP5(kk
>> 10);
1427 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1433 cc
= (const GLuint
*)code
;
1436 t
= (cc
[1] >> (t
* 2)) & 3;
1438 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1439 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1440 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1442 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1443 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1444 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1445 glsb
= CC_SEL(cc
, 126);
1446 selb
= CC_SEL(cc
, 33);
1448 t
= (cc
[0] >> (t
* 2)) & 3;
1450 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1451 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1452 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1454 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1455 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1456 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1457 glsb
= CC_SEL(cc
, 125);
1458 selb
= CC_SEL(cc
, 1);
1461 if (CC_SEL(cc
, 124) & 1) {
1466 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1470 b
= UP5(col
[0][BCOMP
]);
1471 g
= UP5(col
[0][GCOMP
]);
1472 r
= UP5(col
[0][RCOMP
]);
1473 } else if (t
== 2) {
1474 b
= UP5(col
[1][BCOMP
]);
1475 g
= UP6(col
[1][GCOMP
], glsb
);
1476 r
= UP5(col
[1][RCOMP
]);
1478 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1479 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1480 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1491 b
= UP5(col
[0][BCOMP
]);
1492 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1493 r
= UP5(col
[0][RCOMP
]);
1494 } else if (t
== 3) {
1495 b
= UP5(col
[1][BCOMP
]);
1496 g
= UP6(col
[1][GCOMP
], glsb
);
1497 r
= UP5(col
[1][RCOMP
]);
1499 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1500 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1501 UP6(col
[1][GCOMP
], glsb
));
1502 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1513 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1518 cc
= (const GLuint
*)code
;
1519 if (CC_SEL(cc
, 124) & 1) {
1525 t
= (cc
[1] >> (t
* 2)) & 3;
1527 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1528 col0
[GCOMP
] = CC_SEL(cc
, 99);
1529 col0
[RCOMP
] = CC_SEL(cc
, 104);
1530 col0
[ACOMP
] = CC_SEL(cc
, 119);
1532 t
= (cc
[0] >> (t
* 2)) & 3;
1534 col0
[BCOMP
] = CC_SEL(cc
, 64);
1535 col0
[GCOMP
] = CC_SEL(cc
, 69);
1536 col0
[RCOMP
] = CC_SEL(cc
, 74);
1537 col0
[ACOMP
] = CC_SEL(cc
, 109);
1541 b
= UP5(col0
[BCOMP
]);
1542 g
= UP5(col0
[GCOMP
]);
1543 r
= UP5(col0
[RCOMP
]);
1544 a
= UP5(col0
[ACOMP
]);
1545 } else if (t
== 3) {
1546 b
= UP5(CC_SEL(cc
, 79));
1547 g
= UP5(CC_SEL(cc
, 84));
1548 r
= UP5(CC_SEL(cc
, 89));
1549 a
= UP5(CC_SEL(cc
, 114));
1551 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1552 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1553 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1554 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1563 t
= (cc
[0] >> (t
* 2)) & 3;
1570 cc
= (const GLuint
*)code
;
1571 a
= UP5(cc
[3] >> (t
* 5 + 13));
1573 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1574 kk
= cc
[0] >> (t
& 7);
1588 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1589 GLint i
, GLint j
, GLubyte
*rgba
)
1591 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1592 fxt1_decode_1HI
, /* cc-high = "00?" */
1593 fxt1_decode_1HI
, /* cc-high = "00?" */
1594 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1595 fxt1_decode_1ALPHA
, /* alpha = "011" */
1596 fxt1_decode_1MIXED
, /* mixed = "1??" */
1597 fxt1_decode_1MIXED
, /* mixed = "1??" */
1598 fxt1_decode_1MIXED
, /* mixed = "1??" */
1599 fxt1_decode_1MIXED
/* mixed = "1??" */
1602 const GLubyte
*code
= (const GLubyte
*)texture
+
1603 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1604 GLint mode
= CC_SEL(code
, 125);
1612 decode_1
[mode
](code
, t
, rgba
);
1619 fetch_rgb_fxt1(const GLubyte
*map
,
1620 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1623 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1624 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1625 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1626 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1627 texel
[ACOMP
] = 1.0F
;
1632 fetch_rgba_fxt1(const GLubyte
*map
,
1633 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1636 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1637 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1638 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1639 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1640 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
1644 compressed_fetch_func
1645 _mesa_get_fxt_fetch_func(gl_format format
)
1648 case MESA_FORMAT_RGB_FXT1
:
1649 return fetch_rgb_fxt1
;
1650 case MESA_FORMAT_RGBA_FXT1
:
1651 return fetch_rgba_fxt1
;