2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
37 #include "texcompress.h"
38 #include "texcompress_fxt1.h"
43 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
44 const void *source
, GLint srcRowStride
,
45 void *dest
, GLint destRowStride
);
48 fxt1_decode_1 (const void *texture
, GLint stride
,
49 GLint i
, GLint j
, GLubyte
*rgba
);
53 * Store user's image in rgb_fxt1 format.
56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
58 const GLubyte
*pixels
;
61 const GLubyte
*tempImage
= NULL
;
63 assert(dstFormat
== MESA_FORMAT_RGB_FXT1
);
65 if (srcFormat
!= GL_RGB
||
66 srcType
!= GL_UNSIGNED_BYTE
||
67 ctx
->_ImageTransferState
||
68 srcPacking
->RowLength
!= srcWidth
||
69 srcPacking
->SwapBytes
) {
70 /* convert image to RGB/GLubyte */
71 GLubyte
*tempImageSlices
[1];
72 int rgbRowStride
= 3 * srcWidth
* sizeof(GLubyte
);
73 tempImage
= malloc(srcWidth
* srcHeight
* 3 * sizeof(GLubyte
));
75 return GL_FALSE
; /* out of memory */
76 tempImageSlices
[0] = (GLubyte
*) tempImage
;
77 _mesa_texstore(ctx
, dims
,
79 MESA_FORMAT_RGB_UNORM8
,
80 rgbRowStride
, tempImageSlices
,
81 srcWidth
, srcHeight
, srcDepth
,
82 srcFormat
, srcType
, srcAddr
,
85 srcRowStride
= 3 * srcWidth
;
89 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
90 srcFormat
, srcType
, 0, 0);
92 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
93 srcType
) / sizeof(GLubyte
);
98 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
101 free((void*) tempImage
);
108 * Store user's image in rgba_fxt1 format.
111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
113 const GLubyte
*pixels
;
116 const GLubyte
*tempImage
= NULL
;
118 assert(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
120 if (srcFormat
!= GL_RGBA
||
121 srcType
!= GL_UNSIGNED_BYTE
||
122 ctx
->_ImageTransferState
||
123 srcPacking
->SwapBytes
) {
124 /* convert image to RGBA/GLubyte */
125 GLubyte
*tempImageSlices
[1];
126 int rgbaRowStride
= 4 * srcWidth
* sizeof(GLubyte
);
127 tempImage
= malloc(srcWidth
* srcHeight
* 4 * sizeof(GLubyte
));
129 return GL_FALSE
; /* out of memory */
130 tempImageSlices
[0] = (GLubyte
*) tempImage
;
131 _mesa_texstore(ctx
, dims
,
133 MESA_FORMAT_R8G8B8A8_UNORM
,
134 rgbaRowStride
, tempImageSlices
,
135 srcWidth
, srcHeight
, srcDepth
,
136 srcFormat
, srcType
, srcAddr
,
139 srcRowStride
= 4 * srcWidth
;
143 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
144 srcFormat
, srcType
, 0, 0);
146 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
147 srcType
) / sizeof(GLubyte
);
152 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
155 free((void*) tempImage
);
161 /***************************************************************************\
164 * The encoder was built by reversing the decoder,
165 * and is vaguely based on Texus2 by 3dfx. Note that this code
166 * is merely a proof of concept, since it is highly UNoptimized;
167 * moreover, it is sub-optimal due to initial conditions passed
168 * to Lloyd's algorithm (the interpolation modes are even worse).
169 \***************************************************************************/
172 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
173 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
174 #define N_TEXELS 32 /* number of texels in a block (always 32) */
175 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
176 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
177 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
178 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
179 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
183 * Define a 64-bit unsigned integer type and macros
187 #define FX64_NATIVE 1
189 typedef uint64_t Fx64
;
191 #define FX64_MOV32(a, b) a = b
192 #define FX64_OR32(a, b) a |= b
193 #define FX64_SHL(a, c) a <<= c
197 #define FX64_NATIVE 0
203 #define FX64_MOV32(a, b) a.lo = b
204 #define FX64_OR32(a, b) a.lo |= b
206 #define FX64_SHL(a, c) \
209 a.hi = a.lo << ((c) - 32); \
212 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
220 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
221 #define SAFECDOT 1 /* for paranoids */
223 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
225 /* compute interpolation vector */ \
229 for (i = 0; i < NC; i++) { \
230 IV[i] = (V1[i] - V0[i]) * F(i); \
231 d2 += IV[i] * IV[i]; \
233 rd2 = (GLfloat)NV / d2; \
235 for (i = 0; i < NC; i++) { \
237 B -= IV[i] * V0[i]; \
240 B = B * rd2 + 0.5f; \
243 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
245 GLfloat dot = 0.0F; \
246 for (i = 0; i < NC; i++) { \
247 dot += V[i] * IV[i]; \
249 TEXEL = (GLint)(dot + B); \
253 } else if (TEXEL > NV) { \
261 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
262 GLubyte input
[MAX_COMP
], GLint nc
)
264 GLint i
, j
, best
= -1;
265 GLfloat err
= 1e9
; /* big enough */
267 for (j
= 0; j
< nv
; j
++) {
269 for (i
= 0; i
< nc
; i
++) {
270 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
283 fxt1_worst (GLfloat vec
[MAX_COMP
],
284 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
286 GLint i
, k
, worst
= -1;
287 GLfloat err
= -1.0F
; /* small enough */
289 for (k
= 0; k
< n
; k
++) {
291 for (i
= 0; i
< nc
; i
++) {
292 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
305 fxt1_variance (GLdouble variance
[MAX_COMP
],
306 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
308 GLint i
, k
, best
= 0;
310 GLdouble var
, maxvar
= -1; /* small enough */
311 GLdouble teenth
= 1.0 / n
;
313 for (i
= 0; i
< nc
; i
++) {
315 for (k
= 0; k
< n
; k
++) {
316 GLint t
= input
[k
][i
];
320 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
335 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
336 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
339 /* Choose colors from a grid.
343 for (j
= 0; j
< nv
; j
++) {
344 GLint m
= j
* (n
- 1) / (nv
- 1);
345 for (i
= 0; i
< nc
; i
++) {
346 vec
[j
][i
] = input
[m
][i
];
350 /* Our solution here is to find the darkest and brightest colors in
351 * the 8x4 tile and use those as the two representative colors.
352 * There are probably better algorithms to use (histogram-based).
355 GLint minSum
= 2000; /* big enough */
356 GLint maxSum
= -1; /* small enough */
357 GLint minCol
= 0; /* phoudoin: silent compiler! */
358 GLint maxCol
= 0; /* phoudoin: silent compiler! */
368 memset(hist
, 0, sizeof(hist
));
370 for (k
= 0; k
< n
; k
++) {
374 for (i
= 0; i
< nc
; i
++) {
379 for (l
= 0; l
< n
; l
++) {
388 } else if (hist
[l
].key
== key
) {
404 for (j
= 0; j
< lenh
; j
++) {
405 for (i
= 0; i
< nc
; i
++) {
406 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
409 for (; j
< nv
; j
++) {
410 for (i
= 0; i
< nc
; i
++) {
411 vec
[j
][i
] = vec
[0][i
];
417 for (j
= 0; j
< nv
; j
++) {
418 for (i
= 0; i
< nc
; i
++) {
419 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
429 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
430 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
432 /* Use the generalized lloyd's algorithm for VQ:
433 * find 4 color vectors.
435 * for each sample color
436 * sort to nearest vector.
438 * replace each vector with the centroid of its matching colors.
440 * repeat until RMS doesn't improve.
442 * if a color vector has no samples, or becomes the same as another
443 * vector, replace it with the color which is farthest from a sample.
445 * vec[][MAX_COMP] initial vectors and resulting colors
446 * nv number of resulting colors required
447 * input[N_TEXELS][MAX_COMP] input texels
448 * nc number of components in input / vec
449 * n number of input samples
452 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
453 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
454 GLfloat error
, lasterror
= 1e9
;
459 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
460 /* reset sums & counters */
461 for (j
= 0; j
< nv
; j
++) {
462 for (i
= 0; i
< nc
; i
++) {
469 /* scan whole block */
470 for (k
= 0; k
< n
; k
++) {
473 GLfloat err
= 1e9
; /* big enough */
474 /* determine best vector */
475 for (j
= 0; j
< nv
; j
++) {
476 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
477 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
478 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
480 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
488 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
491 /* add in closest color */
492 for (i
= 0; i
< nc
; i
++) {
493 sum
[best
][i
] += input
[k
][i
];
495 /* mark this vector as used */
497 /* accumulate error */
502 if ((error
< LL_RMS_E
) ||
503 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
504 return !0; /* good match */
508 /* move each vector to the barycenter of its closest colors */
509 for (j
= 0; j
< nv
; j
++) {
511 GLfloat div
= 1.0F
/ cnt
[j
];
512 for (i
= 0; i
< nc
; i
++) {
513 vec
[j
][i
] = div
* sum
[j
][i
];
516 /* this vec has no samples or is identical with a previous vec */
517 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
518 for (i
= 0; i
< nc
; i
++) {
519 vec
[j
][i
] = input
[worst
][i
];
525 return 0; /* could not converge fast enough */
530 fxt1_quantize_CHROMA (GLuint
*cc
,
531 GLubyte input
[N_TEXELS
][MAX_COMP
])
533 const GLint n_vect
= 4; /* 4 base vectors to find */
534 const GLint n_comp
= 3; /* 3 components: R, G, B */
535 GLfloat vec
[MAX_VECT
][MAX_COMP
];
537 Fx64 hi
; /* high quadword */
538 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
540 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
541 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
544 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
545 for (j
= n_vect
- 1; j
>= 0; j
--) {
546 for (i
= 0; i
< n_comp
; i
++) {
549 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
552 ((Fx64
*)cc
)[1] = hi
;
555 /* right microtile */
556 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
558 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
561 for (; k
>= 0; k
--) {
563 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
571 fxt1_quantize_ALPHA0 (GLuint
*cc
,
572 GLubyte input
[N_TEXELS
][MAX_COMP
],
573 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
575 const GLint n_vect
= 3; /* 3 base vectors to find */
576 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
577 GLfloat vec
[MAX_VECT
][MAX_COMP
];
579 Fx64 hi
; /* high quadword */
580 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
582 /* the last vector indicates zero */
583 for (i
= 0; i
< n_comp
; i
++) {
587 /* the first n texels in reord are guaranteed to be non-zero */
588 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
589 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
592 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
593 for (j
= n_vect
- 1; j
>= 0; j
--) {
596 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
598 for (j
= n_vect
- 1; j
>= 0; j
--) {
599 for (i
= 0; i
< n_comp
- 1; i
++) {
602 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
605 ((Fx64
*)cc
)[1] = hi
;
608 /* right microtile */
609 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
611 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
614 for (; k
>= 0; k
--) {
616 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
624 fxt1_quantize_ALPHA1 (GLuint
*cc
,
625 GLubyte input
[N_TEXELS
][MAX_COMP
])
627 const GLint n_vect
= 3; /* highest vector number in each microtile */
628 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
629 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
630 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
632 Fx64 hi
; /* high quadword */
633 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
637 GLint minColL
= 0, maxColL
= 0;
638 GLint minColR
= 0, maxColR
= 0;
639 GLint sumL
= 0, sumR
= 0;
641 /* Our solution here is to find the darkest and brightest colors in
642 * the 4x4 tile and use those as the two representative colors.
643 * There are probably better algorithms to use (histogram-based).
646 while ((minColL
== maxColL
) && nn_comp
) {
647 minSum
= 2000; /* big enough */
648 maxSum
= -1; /* small enough */
649 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
651 for (i
= 0; i
< nn_comp
; i
++) {
669 while ((minColR
== maxColR
) && nn_comp
) {
670 minSum
= 2000; /* big enough */
671 maxSum
= -1; /* small enough */
672 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
674 for (i
= 0; i
< nn_comp
; i
++) {
691 /* choose the common vector (yuck!) */
694 GLint v1
= 0, v2
= 0;
695 GLfloat err
= 1e9
; /* big enough */
696 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
697 for (i
= 0; i
< n_comp
; i
++) {
698 tv
[0][i
] = input
[minColL
][i
];
699 tv
[1][i
] = input
[maxColL
][i
];
700 tv
[2][i
] = input
[minColR
][i
];
701 tv
[3][i
] = input
[maxColR
][i
];
703 for (j1
= 0; j1
< 2; j1
++) {
704 for (j2
= 2; j2
< 4; j2
++) {
706 for (i
= 0; i
< n_comp
; i
++) {
707 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
716 for (i
= 0; i
< n_comp
; i
++) {
717 vec
[0][i
] = tv
[1 - v1
][i
];
718 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
719 vec
[2][i
] = tv
[5 - v2
][i
];
725 if (minColL
!= maxColL
) {
726 /* compute interpolation vector */
727 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
731 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
733 /* interpolate color */
734 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
743 /* right microtile */
745 if (minColR
!= maxColR
) {
746 /* compute interpolation vector */
747 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
751 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
753 /* interpolate color */
754 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
763 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
764 for (j
= n_vect
- 1; j
>= 0; j
--) {
767 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
769 for (j
= n_vect
- 1; j
>= 0; j
--) {
770 for (i
= 0; i
< n_comp
- 1; i
++) {
773 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
776 ((Fx64
*)cc
)[1] = hi
;
781 fxt1_quantize_HI (GLuint
*cc
,
782 GLubyte input
[N_TEXELS
][MAX_COMP
],
783 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
785 const GLint n_vect
= 6; /* highest vector number */
786 const GLint n_comp
= 3; /* 3 components: R, G, B */
787 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
788 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
790 GLuint hihi
; /* high quadword: hi dword */
792 GLint minSum
= 2000; /* big enough */
793 GLint maxSum
= -1; /* small enough */
794 GLint minCol
= 0; /* phoudoin: silent compiler! */
795 GLint maxCol
= 0; /* phoudoin: silent compiler! */
797 /* Our solution here is to find the darkest and brightest colors in
798 * the 8x4 tile and use those as the two representative colors.
799 * There are probably better algorithms to use (histogram-based).
801 for (k
= 0; k
< n
; k
++) {
803 for (i
= 0; i
< n_comp
; i
++) {
816 hihi
= 0; /* cc-hi = "00" */
817 for (i
= 0; i
< n_comp
; i
++) {
820 hihi
|= reord
[maxCol
][i
] >> 3;
822 for (i
= 0; i
< n_comp
; i
++) {
825 hihi
|= reord
[minCol
][i
] >> 3;
828 cc
[0] = cc
[1] = cc
[2] = 0;
830 /* compute interpolation vector */
831 if (minCol
!= maxCol
) {
832 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
836 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
838 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
839 GLint texel
= n_vect
+ 1; /* transparent black */
841 if (!ISTBLACK(input
[k
])) {
842 if (minCol
!= maxCol
) {
843 /* interpolate color */
844 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
846 kk
[0] |= texel
<< (t
& 7);
850 kk
[0] |= texel
<< (t
& 7);
857 fxt1_quantize_MIXED1 (GLuint
*cc
,
858 GLubyte input
[N_TEXELS
][MAX_COMP
])
860 const GLint n_vect
= 2; /* highest vector number in each microtile */
861 const GLint n_comp
= 3; /* 3 components: R, G, B */
862 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
863 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
865 Fx64 hi
; /* high quadword */
866 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
870 GLint minColL
= 0, maxColL
= -1;
871 GLint minColR
= 0, maxColR
= -1;
873 /* Our solution here is to find the darkest and brightest colors in
874 * the 4x4 tile and use those as the two representative colors.
875 * There are probably better algorithms to use (histogram-based).
877 minSum
= 2000; /* big enough */
878 maxSum
= -1; /* small enough */
879 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
880 if (!ISTBLACK(input
[k
])) {
882 for (i
= 0; i
< n_comp
; i
++) {
895 minSum
= 2000; /* big enough */
896 maxSum
= -1; /* small enough */
897 for (; k
< N_TEXELS
; k
++) {
898 if (!ISTBLACK(input
[k
])) {
900 for (i
= 0; i
< n_comp
; i
++) {
916 /* all transparent black */
918 for (i
= 0; i
< n_comp
; i
++) {
924 for (i
= 0; i
< n_comp
; i
++) {
925 vec
[0][i
] = input
[minColL
][i
];
926 vec
[1][i
] = input
[maxColL
][i
];
928 if (minColL
!= maxColL
) {
929 /* compute interpolation vector */
930 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
934 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
935 GLint texel
= n_vect
+ 1; /* transparent black */
936 if (!ISTBLACK(input
[k
])) {
937 /* interpolate color */
938 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
948 /* right microtile */
950 /* all transparent black */
952 for (i
= 0; i
< n_comp
; i
++) {
958 for (i
= 0; i
< n_comp
; i
++) {
959 vec
[2][i
] = input
[minColR
][i
];
960 vec
[3][i
] = input
[maxColR
][i
];
962 if (minColR
!= maxColR
) {
963 /* compute interpolation vector */
964 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
968 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
969 GLint texel
= n_vect
+ 1; /* transparent black */
970 if (!ISTBLACK(input
[k
])) {
971 /* interpolate color */
972 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
982 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
983 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
984 for (i
= 0; i
< n_comp
; i
++) {
987 FX64_OR32(hi
, vec
[j
][i
] >> 3);
990 ((Fx64
*)cc
)[1] = hi
;
995 fxt1_quantize_MIXED0 (GLuint
*cc
,
996 GLubyte input
[N_TEXELS
][MAX_COMP
])
998 const GLint n_vect
= 3; /* highest vector number in each microtile */
999 const GLint n_comp
= 3; /* 3 components: R, G, B */
1000 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
1001 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
1003 Fx64 hi
; /* high quadword */
1004 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
1006 GLint minColL
= 0, maxColL
= 0;
1007 GLint minColR
= 0, maxColR
= 0;
1012 /* Our solution here is to find the darkest and brightest colors in
1013 * the 4x4 tile and use those as the two representative colors.
1014 * There are probably better algorithms to use (histogram-based).
1016 minSum
= 2000; /* big enough */
1017 maxSum
= -1; /* small enough */
1018 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1020 for (i
= 0; i
< n_comp
; i
++) {
1032 minSum
= 2000; /* big enough */
1033 maxSum
= -1; /* small enough */
1034 for (; k
< N_TEXELS
; k
++) {
1036 for (i
= 0; i
< n_comp
; i
++) {
1051 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1052 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1054 /* Scan the channel with max variance for lo & hi
1055 * and use those as the two representative colors.
1057 minVal
= 2000; /* big enough */
1058 maxVal
= -1; /* small enough */
1059 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1060 GLint t
= input
[k
][maxVarL
];
1070 minVal
= 2000; /* big enough */
1071 maxVal
= -1; /* small enough */
1072 for (; k
< N_TEXELS
; k
++) {
1073 GLint t
= input
[k
][maxVarR
];
1085 /* left microtile */
1087 for (i
= 0; i
< n_comp
; i
++) {
1088 vec
[0][i
] = input
[minColL
][i
];
1089 vec
[1][i
] = input
[maxColL
][i
];
1091 if (minColL
!= maxColL
) {
1092 /* compute interpolation vector */
1093 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1097 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1099 /* interpolate color */
1100 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1106 /* funky encoding for LSB of green */
1107 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1108 for (i
= 0; i
< n_comp
; i
++) {
1109 vec
[1][i
] = input
[minColL
][i
];
1110 vec
[0][i
] = input
[maxColL
][i
];
1118 /* right microtile */
1120 for (i
= 0; i
< n_comp
; i
++) {
1121 vec
[2][i
] = input
[minColR
][i
];
1122 vec
[3][i
] = input
[maxColR
][i
];
1124 if (minColR
!= maxColR
) {
1125 /* compute interpolation vector */
1126 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1130 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1132 /* interpolate color */
1133 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1139 /* funky encoding for LSB of green */
1140 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1141 for (i
= 0; i
< n_comp
; i
++) {
1142 vec
[3][i
] = input
[minColR
][i
];
1143 vec
[2][i
] = input
[maxColR
][i
];
1151 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1152 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1153 for (i
= 0; i
< n_comp
; i
++) {
1156 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1159 ((Fx64
*)cc
)[1] = hi
;
1164 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1167 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1169 GLubyte input
[N_TEXELS
][MAX_COMP
];
1173 /* make the whole block opaque */
1174 memset(input
, -1, sizeof(input
));
1177 /* 8 texels each line */
1178 for (l
= 0; l
< 4; l
++) {
1179 for (k
= 0; k
< 4; k
++) {
1180 for (i
= 0; i
< comps
; i
++) {
1181 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1184 for (; k
< 8; k
++) {
1185 for (i
= 0; i
< comps
; i
++) {
1186 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1192 * 00, 01, 02, 03, 08, 09, 0a, 0b
1193 * 10, 11, 12, 13, 18, 19, 1a, 1b
1194 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1195 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1199 * stupidity flows forth from this
1204 /* skip all transparent black texels */
1206 for (k
= 0; k
< N_TEXELS
; k
++) {
1207 /* test all components against 0 */
1208 if (!ISTBLACK(input
[k
])) {
1209 /* texel is not transparent black */
1210 COPY_4UBV(reord
[l
], input
[k
]);
1211 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1212 /* non-opaque texel */
1222 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1223 } else if (l
== 0) {
1224 cc
[0] = cc
[1] = cc
[2] = -1;
1226 } else if (l
< N_TEXELS
) {
1227 fxt1_quantize_HI(cc
, input
, reord
, l
);
1229 fxt1_quantize_CHROMA(cc
, input
);
1231 (void)fxt1_quantize_ALPHA1
;
1232 (void)fxt1_quantize_MIXED1
;
1233 (void)fxt1_quantize_MIXED0
;
1236 fxt1_quantize_ALPHA1(cc
, input
);
1237 } else if (l
== 0) {
1238 cc
[0] = cc
[1] = cc
[2] = ~0u;
1240 } else if (l
< N_TEXELS
) {
1241 fxt1_quantize_MIXED1(cc
, input
);
1243 fxt1_quantize_MIXED0(cc
, input
);
1245 (void)fxt1_quantize_ALPHA0
;
1246 (void)fxt1_quantize_HI
;
1247 (void)fxt1_quantize_CHROMA
;
1254 * Upscale an image by replication, not (typical) stretching.
1255 * We use this when the image width or height is less than a
1256 * certain size (4, 8) and we need to upscale an image.
1259 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1260 GLsizei outWidth
, GLsizei outHeight
,
1261 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1266 assert(outWidth
>= inWidth
);
1267 assert(outHeight
>= inHeight
);
1269 assert(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1270 assert((outWidth
& 3) == 0);
1271 assert((outHeight
& 3) == 0);
1274 for (i
= 0; i
< outHeight
; i
++) {
1275 const GLint ii
= i
% inHeight
;
1276 for (j
= 0; j
< outWidth
; j
++) {
1277 const GLint jj
= j
% inWidth
;
1278 for (k
= 0; k
< comps
; k
++) {
1279 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1280 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1288 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1289 const void *source
, GLint srcRowStride
,
1290 void *dest
, GLint destRowStride
)
1293 const GLubyte
*data
;
1294 GLuint
*encoded
= (GLuint
*)dest
;
1295 void *newSource
= NULL
;
1297 assert(comps
== 3 || comps
== 4);
1299 /* Replicate image if width is not M8 or height is not M4 */
1300 if ((width
& 7) | (height
& 3)) {
1301 GLint newWidth
= (width
+ 7) & ~7;
1302 GLint newHeight
= (height
+ 3) & ~3;
1303 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1305 GET_CURRENT_CONTEXT(ctx
);
1306 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1309 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1310 comps
, (const GLubyte
*) source
,
1311 srcRowStride
, (GLubyte
*) newSource
);
1315 srcRowStride
= comps
* newWidth
;
1318 data
= (const GLubyte
*) source
;
1319 destRowStride
= (destRowStride
- width
* 2) / 4;
1320 for (y
= 0; y
< height
; y
+= 4) {
1321 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1322 for (x
= 0; x
< width
; x
+= 8) {
1323 const GLubyte
*lines
[4];
1324 lines
[0] = &data
[offs
];
1325 lines
[1] = lines
[0] + srcRowStride
;
1326 lines
[2] = lines
[1] + srcRowStride
;
1327 lines
[3] = lines
[2] + srcRowStride
;
1329 fxt1_quantize(encoded
, lines
, comps
);
1330 /* 128 bits per 8x4 block */
1333 encoded
+= destRowStride
;
1341 /***************************************************************************\
1344 * The decoder is based on GL_3DFX_texture_compression_FXT1
1345 * specification and serves as a concept for the encoder.
1346 \***************************************************************************/
1349 /* lookup table for scaling 5 bit colors up to 8 bits */
1350 static const GLubyte _rgb_scale_5
[] = {
1351 0, 8, 16, 25, 33, 41, 49, 58,
1352 66, 74, 82, 90, 99, 107, 115, 123,
1353 132, 140, 148, 156, 165, 173, 181, 189,
1354 197, 206, 214, 222, 230, 239, 247, 255
1357 /* lookup table for scaling 6 bit colors up to 8 bits */
1358 static const GLubyte _rgb_scale_6
[] = {
1359 0, 4, 8, 12, 16, 20, 24, 28,
1360 32, 36, 40, 45, 49, 53, 57, 61,
1361 65, 69, 73, 77, 81, 85, 89, 93,
1362 97, 101, 105, 109, 113, 117, 121, 125,
1363 130, 134, 138, 142, 146, 150, 154, 158,
1364 162, 166, 170, 174, 178, 182, 186, 190,
1365 194, 198, 202, 206, 210, 215, 219, 223,
1366 227, 231, 235, 239, 243, 247, 251, 255
1370 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1371 #define UP5(c) _rgb_scale_5[(c) & 31]
1372 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1373 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1377 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1382 cc
= (const GLuint
*)(code
+ t
/ 8);
1383 t
= (cc
[0] >> (t
& 7)) & 7;
1386 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1389 cc
= (const GLuint
*)(code
+ 12);
1391 b
= UP5(CC_SEL(cc
, 0));
1392 g
= UP5(CC_SEL(cc
, 5));
1393 r
= UP5(CC_SEL(cc
, 10));
1394 } else if (t
== 6) {
1395 b
= UP5(CC_SEL(cc
, 15));
1396 g
= UP5(CC_SEL(cc
, 20));
1397 r
= UP5(CC_SEL(cc
, 25));
1399 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1400 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1401 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1412 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1417 cc
= (const GLuint
*)code
;
1422 t
= (cc
[0] >> (t
* 2)) & 3;
1425 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1426 kk
= cc
[0] >> (t
& 7);
1427 rgba
[BCOMP
] = UP5(kk
);
1428 rgba
[GCOMP
] = UP5(kk
>> 5);
1429 rgba
[RCOMP
] = UP5(kk
>> 10);
1435 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1441 cc
= (const GLuint
*)code
;
1444 t
= (cc
[1] >> (t
* 2)) & 3;
1446 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1447 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1448 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1450 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1451 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1452 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1453 glsb
= CC_SEL(cc
, 126);
1454 selb
= CC_SEL(cc
, 33);
1456 t
= (cc
[0] >> (t
* 2)) & 3;
1458 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1459 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1460 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1462 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1463 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1464 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1465 glsb
= CC_SEL(cc
, 125);
1466 selb
= CC_SEL(cc
, 1);
1469 if (CC_SEL(cc
, 124) & 1) {
1474 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1478 b
= UP5(col
[0][BCOMP
]);
1479 g
= UP5(col
[0][GCOMP
]);
1480 r
= UP5(col
[0][RCOMP
]);
1481 } else if (t
== 2) {
1482 b
= UP5(col
[1][BCOMP
]);
1483 g
= UP6(col
[1][GCOMP
], glsb
);
1484 r
= UP5(col
[1][RCOMP
]);
1486 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1487 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1488 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1499 b
= UP5(col
[0][BCOMP
]);
1500 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1501 r
= UP5(col
[0][RCOMP
]);
1502 } else if (t
== 3) {
1503 b
= UP5(col
[1][BCOMP
]);
1504 g
= UP6(col
[1][GCOMP
], glsb
);
1505 r
= UP5(col
[1][RCOMP
]);
1507 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1508 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1509 UP6(col
[1][GCOMP
], glsb
));
1510 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1521 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1526 cc
= (const GLuint
*)code
;
1527 if (CC_SEL(cc
, 124) & 1) {
1533 t
= (cc
[1] >> (t
* 2)) & 3;
1535 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1536 col0
[GCOMP
] = CC_SEL(cc
, 99);
1537 col0
[RCOMP
] = CC_SEL(cc
, 104);
1538 col0
[ACOMP
] = CC_SEL(cc
, 119);
1540 t
= (cc
[0] >> (t
* 2)) & 3;
1542 col0
[BCOMP
] = CC_SEL(cc
, 64);
1543 col0
[GCOMP
] = CC_SEL(cc
, 69);
1544 col0
[RCOMP
] = CC_SEL(cc
, 74);
1545 col0
[ACOMP
] = CC_SEL(cc
, 109);
1549 b
= UP5(col0
[BCOMP
]);
1550 g
= UP5(col0
[GCOMP
]);
1551 r
= UP5(col0
[RCOMP
]);
1552 a
= UP5(col0
[ACOMP
]);
1553 } else if (t
== 3) {
1554 b
= UP5(CC_SEL(cc
, 79));
1555 g
= UP5(CC_SEL(cc
, 84));
1556 r
= UP5(CC_SEL(cc
, 89));
1557 a
= UP5(CC_SEL(cc
, 114));
1559 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1560 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1561 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1562 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1571 t
= (cc
[0] >> (t
* 2)) & 3;
1578 cc
= (const GLuint
*)code
;
1579 a
= UP5(cc
[3] >> (t
* 5 + 13));
1581 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1582 kk
= cc
[0] >> (t
& 7);
1596 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1597 GLint i
, GLint j
, GLubyte
*rgba
)
1599 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1600 fxt1_decode_1HI
, /* cc-high = "00?" */
1601 fxt1_decode_1HI
, /* cc-high = "00?" */
1602 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1603 fxt1_decode_1ALPHA
, /* alpha = "011" */
1604 fxt1_decode_1MIXED
, /* mixed = "1??" */
1605 fxt1_decode_1MIXED
, /* mixed = "1??" */
1606 fxt1_decode_1MIXED
, /* mixed = "1??" */
1607 fxt1_decode_1MIXED
/* mixed = "1??" */
1610 const GLubyte
*code
= (const GLubyte
*)texture
+
1611 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1612 GLint mode
= CC_SEL(code
, 125);
1620 decode_1
[mode
](code
, t
, rgba
);
1627 fetch_rgb_fxt1(const GLubyte
*map
,
1628 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1631 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1632 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1633 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1634 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1635 texel
[ACOMP
] = 1.0F
;
1640 fetch_rgba_fxt1(const GLubyte
*map
,
1641 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1644 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1645 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1646 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1647 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1648 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
1652 compressed_fetch_func
1653 _mesa_get_fxt_fetch_func(mesa_format format
)
1656 case MESA_FORMAT_RGB_FXT1
:
1657 return fetch_rgb_fxt1
;
1658 case MESA_FORMAT_RGBA_FXT1
:
1659 return fetch_rgba_fxt1
;