2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
38 #include "texcompress.h"
39 #include "texcompress_fxt1.h"
44 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
45 const void *source
, GLint srcRowStride
,
46 void *dest
, GLint destRowStride
);
49 fxt1_decode_1 (const void *texture
, GLint stride
,
50 GLint i
, GLint j
, GLubyte
*rgba
);
54 * Store user's image in rgb_fxt1 format.
57 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS
)
59 const GLubyte
*pixels
;
62 const GLubyte
*tempImage
= NULL
;
64 ASSERT(dstFormat
== MESA_FORMAT_RGB_FXT1
);
66 if (srcFormat
!= GL_RGB
||
67 srcType
!= GL_UNSIGNED_BYTE
||
68 ctx
->_ImageTransferState
||
69 srcPacking
->RowLength
!= srcWidth
||
70 srcPacking
->SwapBytes
) {
71 /* convert image to RGB/GLubyte */
72 GLubyte
*tempImageSlices
[1];
73 int rgbRowStride
= 3 * srcWidth
* sizeof(GLubyte
);
74 tempImage
= malloc(srcWidth
* srcHeight
* 3 * sizeof(GLubyte
));
76 return GL_FALSE
; /* out of memory */
77 tempImageSlices
[0] = (GLubyte
*) tempImage
;
78 _mesa_texstore(ctx
, dims
,
80 MESA_FORMAT_RGB_UNORM8
,
81 rgbRowStride
, tempImageSlices
,
82 srcWidth
, srcHeight
, srcDepth
,
83 srcFormat
, srcType
, srcAddr
,
86 srcRowStride
= 3 * srcWidth
;
90 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
91 srcFormat
, srcType
, 0, 0);
93 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
94 srcType
) / sizeof(GLubyte
);
99 fxt1_encode(srcWidth
, srcHeight
, 3, pixels
, srcRowStride
,
102 free((void*) tempImage
);
109 * Store user's image in rgba_fxt1 format.
112 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS
)
114 const GLubyte
*pixels
;
117 const GLubyte
*tempImage
= NULL
;
119 ASSERT(dstFormat
== MESA_FORMAT_RGBA_FXT1
);
121 if (srcFormat
!= GL_RGBA
||
122 srcType
!= GL_UNSIGNED_BYTE
||
123 ctx
->_ImageTransferState
||
124 srcPacking
->SwapBytes
) {
125 /* convert image to RGBA/GLubyte */
126 GLubyte
*tempImageSlices
[1];
127 int rgbaRowStride
= 4 * srcWidth
* sizeof(GLubyte
);
128 tempImage
= malloc(srcWidth
* srcHeight
* 4 * sizeof(GLubyte
));
130 return GL_FALSE
; /* out of memory */
131 tempImageSlices
[0] = (GLubyte
*) tempImage
;
132 _mesa_texstore(ctx
, dims
,
134 MESA_FORMAT_R8G8B8A8_UNORM
,
135 rgbaRowStride
, tempImageSlices
,
136 srcWidth
, srcHeight
, srcDepth
,
137 srcFormat
, srcType
, srcAddr
,
140 srcRowStride
= 4 * srcWidth
;
144 pixels
= _mesa_image_address2d(srcPacking
, srcAddr
, srcWidth
, srcHeight
,
145 srcFormat
, srcType
, 0, 0);
147 srcRowStride
= _mesa_image_row_stride(srcPacking
, srcWidth
, srcFormat
,
148 srcType
) / sizeof(GLubyte
);
153 fxt1_encode(srcWidth
, srcHeight
, 4, pixels
, srcRowStride
,
156 free((void*) tempImage
);
162 /***************************************************************************\
165 * The encoder was built by reversing the decoder,
166 * and is vaguely based on Texus2 by 3dfx. Note that this code
167 * is merely a proof of concept, since it is highly UNoptimized;
168 * moreover, it is sub-optimal due to initial conditions passed
169 * to Lloyd's algorithm (the interpolation modes are even worse).
170 \***************************************************************************/
173 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
174 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
175 #define N_TEXELS 32 /* number of texels in a block (always 32) */
176 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
177 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
178 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
179 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
180 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
184 * Define a 64-bit unsigned integer type and macros
188 #define FX64_NATIVE 1
190 typedef uint64_t Fx64
;
192 #define FX64_MOV32(a, b) a = b
193 #define FX64_OR32(a, b) a |= b
194 #define FX64_SHL(a, c) a <<= c
198 #define FX64_NATIVE 0
204 #define FX64_MOV32(a, b) a.lo = b
205 #define FX64_OR32(a, b) a.lo |= b
207 #define FX64_SHL(a, c) \
210 a.hi = a.lo << ((c) - 32); \
213 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
221 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
222 #define SAFECDOT 1 /* for paranoids */
224 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
226 /* compute interpolation vector */ \
230 for (i = 0; i < NC; i++) { \
231 IV[i] = (V1[i] - V0[i]) * F(i); \
232 d2 += IV[i] * IV[i]; \
234 rd2 = (GLfloat)NV / d2; \
236 for (i = 0; i < NC; i++) { \
238 B -= IV[i] * V0[i]; \
241 B = B * rd2 + 0.5f; \
244 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
246 GLfloat dot = 0.0F; \
247 for (i = 0; i < NC; i++) { \
248 dot += V[i] * IV[i]; \
250 TEXEL = (GLint)(dot + B); \
254 } else if (TEXEL > NV) { \
262 fxt1_bestcol (GLfloat vec
[][MAX_COMP
], GLint nv
,
263 GLubyte input
[MAX_COMP
], GLint nc
)
265 GLint i
, j
, best
= -1;
266 GLfloat err
= 1e9
; /* big enough */
268 for (j
= 0; j
< nv
; j
++) {
270 for (i
= 0; i
< nc
; i
++) {
271 e
+= (vec
[j
][i
] - input
[i
]) * (vec
[j
][i
] - input
[i
]);
284 fxt1_worst (GLfloat vec
[MAX_COMP
],
285 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
287 GLint i
, k
, worst
= -1;
288 GLfloat err
= -1.0F
; /* small enough */
290 for (k
= 0; k
< n
; k
++) {
292 for (i
= 0; i
< nc
; i
++) {
293 e
+= (vec
[i
] - input
[k
][i
]) * (vec
[i
] - input
[k
][i
]);
306 fxt1_variance (GLdouble variance
[MAX_COMP
],
307 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
309 GLint i
, k
, best
= 0;
311 GLdouble var
, maxvar
= -1; /* small enough */
312 GLdouble teenth
= 1.0 / n
;
314 for (i
= 0; i
< nc
; i
++) {
316 for (k
= 0; k
< n
; k
++) {
317 GLint t
= input
[k
][i
];
321 var
= sx2
* teenth
- sx
* sx
* teenth
* teenth
;
336 fxt1_choose (GLfloat vec
[][MAX_COMP
], GLint nv
,
337 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
340 /* Choose colors from a grid.
344 for (j
= 0; j
< nv
; j
++) {
345 GLint m
= j
* (n
- 1) / (nv
- 1);
346 for (i
= 0; i
< nc
; i
++) {
347 vec
[j
][i
] = input
[m
][i
];
351 /* Our solution here is to find the darkest and brightest colors in
352 * the 8x4 tile and use those as the two representative colors.
353 * There are probably better algorithms to use (histogram-based).
356 GLint minSum
= 2000; /* big enough */
357 GLint maxSum
= -1; /* small enough */
358 GLint minCol
= 0; /* phoudoin: silent compiler! */
359 GLint maxCol
= 0; /* phoudoin: silent compiler! */
369 memset(hist
, 0, sizeof(hist
));
371 for (k
= 0; k
< n
; k
++) {
375 for (i
= 0; i
< nc
; i
++) {
380 for (l
= 0; l
< n
; l
++) {
389 } else if (hist
[l
].key
== key
) {
405 for (j
= 0; j
< lenh
; j
++) {
406 for (i
= 0; i
< nc
; i
++) {
407 vec
[j
][i
] = (GLfloat
)input
[hist
[j
].idx
][i
];
410 for (; j
< nv
; j
++) {
411 for (i
= 0; i
< nc
; i
++) {
412 vec
[j
][i
] = vec
[0][i
];
418 for (j
= 0; j
< nv
; j
++) {
419 for (i
= 0; i
< nc
; i
++) {
420 vec
[j
][i
] = ((nv
- 1 - j
) * input
[minCol
][i
] + j
* input
[maxCol
][i
] + (nv
- 1) / 2) / (GLfloat
)(nv
- 1);
430 fxt1_lloyd (GLfloat vec
[][MAX_COMP
], GLint nv
,
431 GLubyte input
[N_TEXELS
][MAX_COMP
], GLint nc
, GLint n
)
433 /* Use the generalized lloyd's algorithm for VQ:
434 * find 4 color vectors.
436 * for each sample color
437 * sort to nearest vector.
439 * replace each vector with the centroid of its matching colors.
441 * repeat until RMS doesn't improve.
443 * if a color vector has no samples, or becomes the same as another
444 * vector, replace it with the color which is farthest from a sample.
446 * vec[][MAX_COMP] initial vectors and resulting colors
447 * nv number of resulting colors required
448 * input[N_TEXELS][MAX_COMP] input texels
449 * nc number of components in input / vec
450 * n number of input samples
453 GLint sum
[MAX_VECT
][MAX_COMP
]; /* used to accumulate closest texels */
454 GLint cnt
[MAX_VECT
]; /* how many times a certain vector was chosen */
455 GLfloat error
, lasterror
= 1e9
;
460 for (rep
= 0; rep
< LL_N_REP
; rep
++) {
461 /* reset sums & counters */
462 for (j
= 0; j
< nv
; j
++) {
463 for (i
= 0; i
< nc
; i
++) {
470 /* scan whole block */
471 for (k
= 0; k
< n
; k
++) {
474 GLfloat err
= 1e9
; /* big enough */
475 /* determine best vector */
476 for (j
= 0; j
< nv
; j
++) {
477 GLfloat e
= (vec
[j
][0] - input
[k
][0]) * (vec
[j
][0] - input
[k
][0]) +
478 (vec
[j
][1] - input
[k
][1]) * (vec
[j
][1] - input
[k
][1]) +
479 (vec
[j
][2] - input
[k
][2]) * (vec
[j
][2] - input
[k
][2]);
481 e
+= (vec
[j
][3] - input
[k
][3]) * (vec
[j
][3] - input
[k
][3]);
489 GLint best
= fxt1_bestcol(vec
, nv
, input
[k
], nc
, &err
);
492 /* add in closest color */
493 for (i
= 0; i
< nc
; i
++) {
494 sum
[best
][i
] += input
[k
][i
];
496 /* mark this vector as used */
498 /* accumulate error */
503 if ((error
< LL_RMS_E
) ||
504 ((error
< lasterror
) && ((lasterror
- error
) < LL_RMS_D
))) {
505 return !0; /* good match */
509 /* move each vector to the barycenter of its closest colors */
510 for (j
= 0; j
< nv
; j
++) {
512 GLfloat div
= 1.0F
/ cnt
[j
];
513 for (i
= 0; i
< nc
; i
++) {
514 vec
[j
][i
] = div
* sum
[j
][i
];
517 /* this vec has no samples or is identical with a previous vec */
518 GLint worst
= fxt1_worst(vec
[j
], input
, nc
, n
);
519 for (i
= 0; i
< nc
; i
++) {
520 vec
[j
][i
] = input
[worst
][i
];
526 return 0; /* could not converge fast enough */
531 fxt1_quantize_CHROMA (GLuint
*cc
,
532 GLubyte input
[N_TEXELS
][MAX_COMP
])
534 const GLint n_vect
= 4; /* 4 base vectors to find */
535 const GLint n_comp
= 3; /* 3 components: R, G, B */
536 GLfloat vec
[MAX_VECT
][MAX_COMP
];
538 Fx64 hi
; /* high quadword */
539 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
541 if (fxt1_choose(vec
, n_vect
, input
, n_comp
, N_TEXELS
) != 0) {
542 fxt1_lloyd(vec
, n_vect
, input
, n_comp
, N_TEXELS
);
545 FX64_MOV32(hi
, 4); /* cc-chroma = "010" + unused bit */
546 for (j
= n_vect
- 1; j
>= 0; j
--) {
547 for (i
= 0; i
< n_comp
; i
++) {
550 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
553 ((Fx64
*)cc
)[1] = hi
;
556 /* right microtile */
557 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
559 lohi
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
562 for (; k
>= 0; k
--) {
564 lolo
|= fxt1_bestcol(vec
, n_vect
, input
[k
], n_comp
);
572 fxt1_quantize_ALPHA0 (GLuint
*cc
,
573 GLubyte input
[N_TEXELS
][MAX_COMP
],
574 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
576 const GLint n_vect
= 3; /* 3 base vectors to find */
577 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
578 GLfloat vec
[MAX_VECT
][MAX_COMP
];
580 Fx64 hi
; /* high quadword */
581 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
583 /* the last vector indicates zero */
584 for (i
= 0; i
< n_comp
; i
++) {
588 /* the first n texels in reord are guaranteed to be non-zero */
589 if (fxt1_choose(vec
, n_vect
, reord
, n_comp
, n
) != 0) {
590 fxt1_lloyd(vec
, n_vect
, reord
, n_comp
, n
);
593 FX64_MOV32(hi
, 6); /* alpha = "011" + lerp = 0 */
594 for (j
= n_vect
- 1; j
>= 0; j
--) {
597 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
599 for (j
= n_vect
- 1; j
>= 0; j
--) {
600 for (i
= 0; i
< n_comp
- 1; i
++) {
603 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
606 ((Fx64
*)cc
)[1] = hi
;
609 /* right microtile */
610 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/2; k
--) {
612 lohi
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
615 for (; k
>= 0; k
--) {
617 lolo
|= fxt1_bestcol(vec
, n_vect
+ 1, input
[k
], n_comp
);
625 fxt1_quantize_ALPHA1 (GLuint
*cc
,
626 GLubyte input
[N_TEXELS
][MAX_COMP
])
628 const GLint n_vect
= 3; /* highest vector number in each microtile */
629 const GLint n_comp
= 4; /* 4 components: R, G, B, A */
630 GLfloat vec
[1 + 1 + 1][MAX_COMP
]; /* 1.5 extrema for each sub-block */
631 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
633 Fx64 hi
; /* high quadword */
634 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
638 GLint minColL
= 0, maxColL
= 0;
639 GLint minColR
= 0, maxColR
= 0;
640 GLint sumL
= 0, sumR
= 0;
642 /* Our solution here is to find the darkest and brightest colors in
643 * the 4x4 tile and use those as the two representative colors.
644 * There are probably better algorithms to use (histogram-based).
647 while ((minColL
== maxColL
) && nn_comp
) {
648 minSum
= 2000; /* big enough */
649 maxSum
= -1; /* small enough */
650 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
652 for (i
= 0; i
< nn_comp
; i
++) {
670 while ((minColR
== maxColR
) && nn_comp
) {
671 minSum
= 2000; /* big enough */
672 maxSum
= -1; /* small enough */
673 for (k
= N_TEXELS
/ 2; k
< N_TEXELS
; k
++) {
675 for (i
= 0; i
< nn_comp
; i
++) {
692 /* choose the common vector (yuck!) */
695 GLint v1
= 0, v2
= 0;
696 GLfloat err
= 1e9
; /* big enough */
697 GLfloat tv
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
698 for (i
= 0; i
< n_comp
; i
++) {
699 tv
[0][i
] = input
[minColL
][i
];
700 tv
[1][i
] = input
[maxColL
][i
];
701 tv
[2][i
] = input
[minColR
][i
];
702 tv
[3][i
] = input
[maxColR
][i
];
704 for (j1
= 0; j1
< 2; j1
++) {
705 for (j2
= 2; j2
< 4; j2
++) {
707 for (i
= 0; i
< n_comp
; i
++) {
708 e
+= (tv
[j1
][i
] - tv
[j2
][i
]) * (tv
[j1
][i
] - tv
[j2
][i
]);
717 for (i
= 0; i
< n_comp
; i
++) {
718 vec
[0][i
] = tv
[1 - v1
][i
];
719 vec
[1][i
] = (tv
[v1
][i
] * sumL
+ tv
[v2
][i
] * sumR
) / (sumL
+ sumR
);
720 vec
[2][i
] = tv
[5 - v2
][i
];
726 if (minColL
!= maxColL
) {
727 /* compute interpolation vector */
728 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
732 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
734 /* interpolate color */
735 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
744 /* right microtile */
746 if (minColR
!= maxColR
) {
747 /* compute interpolation vector */
748 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[1]);
752 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
754 /* interpolate color */
755 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
764 FX64_MOV32(hi
, 7); /* alpha = "011" + lerp = 1 */
765 for (j
= n_vect
- 1; j
>= 0; j
--) {
768 FX64_OR32(hi
, (GLuint
)(vec
[j
][ACOMP
] / 8.0F
));
770 for (j
= n_vect
- 1; j
>= 0; j
--) {
771 for (i
= 0; i
< n_comp
- 1; i
++) {
774 FX64_OR32(hi
, (GLuint
)(vec
[j
][i
] / 8.0F
));
777 ((Fx64
*)cc
)[1] = hi
;
782 fxt1_quantize_HI (GLuint
*cc
,
783 GLubyte input
[N_TEXELS
][MAX_COMP
],
784 GLubyte reord
[N_TEXELS
][MAX_COMP
], GLint n
)
786 const GLint n_vect
= 6; /* highest vector number */
787 const GLint n_comp
= 3; /* 3 components: R, G, B */
788 GLfloat b
= 0.0F
; /* phoudoin: silent compiler! */
789 GLfloat iv
[MAX_COMP
]; /* interpolation vector */
791 GLuint hihi
; /* high quadword: hi dword */
793 GLint minSum
= 2000; /* big enough */
794 GLint maxSum
= -1; /* small enough */
795 GLint minCol
= 0; /* phoudoin: silent compiler! */
796 GLint maxCol
= 0; /* phoudoin: silent compiler! */
798 /* Our solution here is to find the darkest and brightest colors in
799 * the 8x4 tile and use those as the two representative colors.
800 * There are probably better algorithms to use (histogram-based).
802 for (k
= 0; k
< n
; k
++) {
804 for (i
= 0; i
< n_comp
; i
++) {
817 hihi
= 0; /* cc-hi = "00" */
818 for (i
= 0; i
< n_comp
; i
++) {
821 hihi
|= reord
[maxCol
][i
] >> 3;
823 for (i
= 0; i
< n_comp
; i
++) {
826 hihi
|= reord
[minCol
][i
] >> 3;
829 cc
[0] = cc
[1] = cc
[2] = 0;
831 /* compute interpolation vector */
832 if (minCol
!= maxCol
) {
833 MAKEIVEC(n_vect
, n_comp
, iv
, b
, reord
[minCol
], reord
[maxCol
]);
837 for (k
= N_TEXELS
- 1; k
>= 0; k
--) {
839 GLuint
*kk
= (GLuint
*)((char *)cc
+ t
/ 8);
840 GLint texel
= n_vect
+ 1; /* transparent black */
842 if (!ISTBLACK(input
[k
])) {
843 if (minCol
!= maxCol
) {
844 /* interpolate color */
845 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
847 kk
[0] |= texel
<< (t
& 7);
851 kk
[0] |= texel
<< (t
& 7);
858 fxt1_quantize_MIXED1 (GLuint
*cc
,
859 GLubyte input
[N_TEXELS
][MAX_COMP
])
861 const GLint n_vect
= 2; /* highest vector number in each microtile */
862 const GLint n_comp
= 3; /* 3 components: R, G, B */
863 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
864 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
866 Fx64 hi
; /* high quadword */
867 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
871 GLint minColL
= 0, maxColL
= -1;
872 GLint minColR
= 0, maxColR
= -1;
874 /* Our solution here is to find the darkest and brightest colors in
875 * the 4x4 tile and use those as the two representative colors.
876 * There are probably better algorithms to use (histogram-based).
878 minSum
= 2000; /* big enough */
879 maxSum
= -1; /* small enough */
880 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
881 if (!ISTBLACK(input
[k
])) {
883 for (i
= 0; i
< n_comp
; i
++) {
896 minSum
= 2000; /* big enough */
897 maxSum
= -1; /* small enough */
898 for (; k
< N_TEXELS
; k
++) {
899 if (!ISTBLACK(input
[k
])) {
901 for (i
= 0; i
< n_comp
; i
++) {
917 /* all transparent black */
919 for (i
= 0; i
< n_comp
; i
++) {
925 for (i
= 0; i
< n_comp
; i
++) {
926 vec
[0][i
] = input
[minColL
][i
];
927 vec
[1][i
] = input
[maxColL
][i
];
929 if (minColL
!= maxColL
) {
930 /* compute interpolation vector */
931 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
935 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
936 GLint texel
= n_vect
+ 1; /* transparent black */
937 if (!ISTBLACK(input
[k
])) {
938 /* interpolate color */
939 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
949 /* right microtile */
951 /* all transparent black */
953 for (i
= 0; i
< n_comp
; i
++) {
959 for (i
= 0; i
< n_comp
; i
++) {
960 vec
[2][i
] = input
[minColR
][i
];
961 vec
[3][i
] = input
[maxColR
][i
];
963 if (minColR
!= maxColR
) {
964 /* compute interpolation vector */
965 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
969 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
970 GLint texel
= n_vect
+ 1; /* transparent black */
971 if (!ISTBLACK(input
[k
])) {
972 /* interpolate color */
973 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
983 FX64_MOV32(hi
, 9 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
984 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
985 for (i
= 0; i
< n_comp
; i
++) {
988 FX64_OR32(hi
, vec
[j
][i
] >> 3);
991 ((Fx64
*)cc
)[1] = hi
;
996 fxt1_quantize_MIXED0 (GLuint
*cc
,
997 GLubyte input
[N_TEXELS
][MAX_COMP
])
999 const GLint n_vect
= 3; /* highest vector number in each microtile */
1000 const GLint n_comp
= 3; /* 3 components: R, G, B */
1001 GLubyte vec
[2 * 2][MAX_COMP
]; /* 2 extrema for each sub-block */
1002 GLfloat b
, iv
[MAX_COMP
]; /* interpolation vector */
1004 Fx64 hi
; /* high quadword */
1005 GLuint lohi
, lolo
; /* low quadword: hi dword, lo dword */
1007 GLint minColL
= 0, maxColL
= 0;
1008 GLint minColR
= 0, maxColR
= 0;
1013 /* Our solution here is to find the darkest and brightest colors in
1014 * the 4x4 tile and use those as the two representative colors.
1015 * There are probably better algorithms to use (histogram-based).
1017 minSum
= 2000; /* big enough */
1018 maxSum
= -1; /* small enough */
1019 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1021 for (i
= 0; i
< n_comp
; i
++) {
1033 minSum
= 2000; /* big enough */
1034 maxSum
= -1; /* small enough */
1035 for (; k
< N_TEXELS
; k
++) {
1037 for (i
= 0; i
< n_comp
; i
++) {
1052 GLint maxVarL
= fxt1_variance(NULL
, input
, n_comp
, N_TEXELS
/ 2);
1053 GLint maxVarR
= fxt1_variance(NULL
, &input
[N_TEXELS
/ 2], n_comp
, N_TEXELS
/ 2);
1055 /* Scan the channel with max variance for lo & hi
1056 * and use those as the two representative colors.
1058 minVal
= 2000; /* big enough */
1059 maxVal
= -1; /* small enough */
1060 for (k
= 0; k
< N_TEXELS
/ 2; k
++) {
1061 GLint t
= input
[k
][maxVarL
];
1071 minVal
= 2000; /* big enough */
1072 maxVal
= -1; /* small enough */
1073 for (; k
< N_TEXELS
; k
++) {
1074 GLint t
= input
[k
][maxVarR
];
1086 /* left microtile */
1088 for (i
= 0; i
< n_comp
; i
++) {
1089 vec
[0][i
] = input
[minColL
][i
];
1090 vec
[1][i
] = input
[maxColL
][i
];
1092 if (minColL
!= maxColL
) {
1093 /* compute interpolation vector */
1094 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[0], vec
[1]);
1098 for (k
= N_TEXELS
/ 2 - 1; k
>= 0; k
--) {
1100 /* interpolate color */
1101 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1107 /* funky encoding for LSB of green */
1108 if ((GLint
)((lolo
>> 1) & 1) != (((vec
[1][GCOMP
] ^ vec
[0][GCOMP
]) >> 2) & 1)) {
1109 for (i
= 0; i
< n_comp
; i
++) {
1110 vec
[1][i
] = input
[minColL
][i
];
1111 vec
[0][i
] = input
[maxColL
][i
];
1119 /* right microtile */
1121 for (i
= 0; i
< n_comp
; i
++) {
1122 vec
[2][i
] = input
[minColR
][i
];
1123 vec
[3][i
] = input
[maxColR
][i
];
1125 if (minColR
!= maxColR
) {
1126 /* compute interpolation vector */
1127 MAKEIVEC(n_vect
, n_comp
, iv
, b
, vec
[2], vec
[3]);
1131 for (k
= N_TEXELS
- 1; k
>= N_TEXELS
/ 2; k
--) {
1133 /* interpolate color */
1134 CALCCDOT(texel
, n_vect
, n_comp
, iv
, b
, input
[k
]);
1140 /* funky encoding for LSB of green */
1141 if ((GLint
)((lohi
>> 1) & 1) != (((vec
[3][GCOMP
] ^ vec
[2][GCOMP
]) >> 2) & 1)) {
1142 for (i
= 0; i
< n_comp
; i
++) {
1143 vec
[3][i
] = input
[minColR
][i
];
1144 vec
[2][i
] = input
[maxColR
][i
];
1152 FX64_MOV32(hi
, 8 | (vec
[3][GCOMP
] & 4) | ((vec
[1][GCOMP
] >> 1) & 2)); /* chroma = "1" */
1153 for (j
= 2 * 2 - 1; j
>= 0; j
--) {
1154 for (i
= 0; i
< n_comp
; i
++) {
1157 FX64_OR32(hi
, vec
[j
][i
] >> 3);
1160 ((Fx64
*)cc
)[1] = hi
;
1165 fxt1_quantize (GLuint
*cc
, const GLubyte
*lines
[], GLint comps
)
1168 GLubyte reord
[N_TEXELS
][MAX_COMP
];
1170 GLubyte input
[N_TEXELS
][MAX_COMP
];
1174 /* make the whole block opaque */
1175 memset(input
, -1, sizeof(input
));
1178 /* 8 texels each line */
1179 for (l
= 0; l
< 4; l
++) {
1180 for (k
= 0; k
< 4; k
++) {
1181 for (i
= 0; i
< comps
; i
++) {
1182 input
[k
+ l
* 4][i
] = *lines
[l
]++;
1185 for (; k
< 8; k
++) {
1186 for (i
= 0; i
< comps
; i
++) {
1187 input
[k
+ l
* 4 + 12][i
] = *lines
[l
]++;
1193 * 00, 01, 02, 03, 08, 09, 0a, 0b
1194 * 10, 11, 12, 13, 18, 19, 1a, 1b
1195 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1196 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1200 * stupidity flows forth from this
1205 /* skip all transparent black texels */
1207 for (k
= 0; k
< N_TEXELS
; k
++) {
1208 /* test all components against 0 */
1209 if (!ISTBLACK(input
[k
])) {
1210 /* texel is not transparent black */
1211 COPY_4UBV(reord
[l
], input
[k
]);
1212 if (reord
[l
][ACOMP
] < (255 - ALPHA_TS
)) {
1213 /* non-opaque texel */
1223 fxt1_quantize_ALPHA0(cc
, input
, reord
, l
);
1224 } else if (l
== 0) {
1225 cc
[0] = cc
[1] = cc
[2] = -1;
1227 } else if (l
< N_TEXELS
) {
1228 fxt1_quantize_HI(cc
, input
, reord
, l
);
1230 fxt1_quantize_CHROMA(cc
, input
);
1232 (void)fxt1_quantize_ALPHA1
;
1233 (void)fxt1_quantize_MIXED1
;
1234 (void)fxt1_quantize_MIXED0
;
1237 fxt1_quantize_ALPHA1(cc
, input
);
1238 } else if (l
== 0) {
1239 cc
[0] = cc
[1] = cc
[2] = ~0u;
1241 } else if (l
< N_TEXELS
) {
1242 fxt1_quantize_MIXED1(cc
, input
);
1244 fxt1_quantize_MIXED0(cc
, input
);
1246 (void)fxt1_quantize_ALPHA0
;
1247 (void)fxt1_quantize_HI
;
1248 (void)fxt1_quantize_CHROMA
;
1255 * Upscale an image by replication, not (typical) stretching.
1256 * We use this when the image width or height is less than a
1257 * certain size (4, 8) and we need to upscale an image.
1260 upscale_teximage2d(GLsizei inWidth
, GLsizei inHeight
,
1261 GLsizei outWidth
, GLsizei outHeight
,
1262 GLint comps
, const GLubyte
*src
, GLint srcRowStride
,
1267 ASSERT(outWidth
>= inWidth
);
1268 ASSERT(outHeight
>= inHeight
);
1270 ASSERT(inWidth
== 1 || inWidth
== 2 || inHeight
== 1 || inHeight
== 2);
1271 ASSERT((outWidth
& 3) == 0);
1272 ASSERT((outHeight
& 3) == 0);
1275 for (i
= 0; i
< outHeight
; i
++) {
1276 const GLint ii
= i
% inHeight
;
1277 for (j
= 0; j
< outWidth
; j
++) {
1278 const GLint jj
= j
% inWidth
;
1279 for (k
= 0; k
< comps
; k
++) {
1280 dest
[(i
* outWidth
+ j
) * comps
+ k
]
1281 = src
[ii
* srcRowStride
+ jj
* comps
+ k
];
1289 fxt1_encode (GLuint width
, GLuint height
, GLint comps
,
1290 const void *source
, GLint srcRowStride
,
1291 void *dest
, GLint destRowStride
)
1294 const GLubyte
*data
;
1295 GLuint
*encoded
= (GLuint
*)dest
;
1296 void *newSource
= NULL
;
1298 assert(comps
== 3 || comps
== 4);
1300 /* Replicate image if width is not M8 or height is not M4 */
1301 if ((width
& 7) | (height
& 3)) {
1302 GLint newWidth
= (width
+ 7) & ~7;
1303 GLint newHeight
= (height
+ 3) & ~3;
1304 newSource
= malloc(comps
* newWidth
* newHeight
* sizeof(GLubyte
));
1306 GET_CURRENT_CONTEXT(ctx
);
1307 _mesa_error(ctx
, GL_OUT_OF_MEMORY
, "texture compression");
1310 upscale_teximage2d(width
, height
, newWidth
, newHeight
,
1311 comps
, (const GLubyte
*) source
,
1312 srcRowStride
, (GLubyte
*) newSource
);
1316 srcRowStride
= comps
* newWidth
;
1319 data
= (const GLubyte
*) source
;
1320 destRowStride
= (destRowStride
- width
* 2) / 4;
1321 for (y
= 0; y
< height
; y
+= 4) {
1322 GLuint offs
= 0 + (y
+ 0) * srcRowStride
;
1323 for (x
= 0; x
< width
; x
+= 8) {
1324 const GLubyte
*lines
[4];
1325 lines
[0] = &data
[offs
];
1326 lines
[1] = lines
[0] + srcRowStride
;
1327 lines
[2] = lines
[1] + srcRowStride
;
1328 lines
[3] = lines
[2] + srcRowStride
;
1330 fxt1_quantize(encoded
, lines
, comps
);
1331 /* 128 bits per 8x4 block */
1334 encoded
+= destRowStride
;
1342 /***************************************************************************\
1345 * The decoder is based on GL_3DFX_texture_compression_FXT1
1346 * specification and serves as a concept for the encoder.
1347 \***************************************************************************/
1350 /* lookup table for scaling 5 bit colors up to 8 bits */
1351 static const GLubyte _rgb_scale_5
[] = {
1352 0, 8, 16, 25, 33, 41, 49, 58,
1353 66, 74, 82, 90, 99, 107, 115, 123,
1354 132, 140, 148, 156, 165, 173, 181, 189,
1355 197, 206, 214, 222, 230, 239, 247, 255
1358 /* lookup table for scaling 6 bit colors up to 8 bits */
1359 static const GLubyte _rgb_scale_6
[] = {
1360 0, 4, 8, 12, 16, 20, 24, 28,
1361 32, 36, 40, 45, 49, 53, 57, 61,
1362 65, 69, 73, 77, 81, 85, 89, 93,
1363 97, 101, 105, 109, 113, 117, 121, 125,
1364 130, 134, 138, 142, 146, 150, 154, 158,
1365 162, 166, 170, 174, 178, 182, 186, 190,
1366 194, 198, 202, 206, 210, 215, 219, 223,
1367 227, 231, 235, 239, 243, 247, 251, 255
1371 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1372 #define UP5(c) _rgb_scale_5[(c) & 31]
1373 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1374 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1378 fxt1_decode_1HI (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1383 cc
= (const GLuint
*)(code
+ t
/ 8);
1384 t
= (cc
[0] >> (t
& 7)) & 7;
1387 rgba
[RCOMP
] = rgba
[GCOMP
] = rgba
[BCOMP
] = rgba
[ACOMP
] = 0;
1390 cc
= (const GLuint
*)(code
+ 12);
1392 b
= UP5(CC_SEL(cc
, 0));
1393 g
= UP5(CC_SEL(cc
, 5));
1394 r
= UP5(CC_SEL(cc
, 10));
1395 } else if (t
== 6) {
1396 b
= UP5(CC_SEL(cc
, 15));
1397 g
= UP5(CC_SEL(cc
, 20));
1398 r
= UP5(CC_SEL(cc
, 25));
1400 b
= LERP(6, t
, UP5(CC_SEL(cc
, 0)), UP5(CC_SEL(cc
, 15)));
1401 g
= LERP(6, t
, UP5(CC_SEL(cc
, 5)), UP5(CC_SEL(cc
, 20)));
1402 r
= LERP(6, t
, UP5(CC_SEL(cc
, 10)), UP5(CC_SEL(cc
, 25)));
1413 fxt1_decode_1CHROMA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1418 cc
= (const GLuint
*)code
;
1423 t
= (cc
[0] >> (t
* 2)) & 3;
1426 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1427 kk
= cc
[0] >> (t
& 7);
1428 rgba
[BCOMP
] = UP5(kk
);
1429 rgba
[GCOMP
] = UP5(kk
>> 5);
1430 rgba
[RCOMP
] = UP5(kk
>> 10);
1436 fxt1_decode_1MIXED (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1442 cc
= (const GLuint
*)code
;
1445 t
= (cc
[1] >> (t
* 2)) & 3;
1447 col
[0][BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1448 col
[0][GCOMP
] = CC_SEL(cc
, 99);
1449 col
[0][RCOMP
] = CC_SEL(cc
, 104);
1451 col
[1][BCOMP
] = CC_SEL(cc
, 109);
1452 col
[1][GCOMP
] = CC_SEL(cc
, 114);
1453 col
[1][RCOMP
] = CC_SEL(cc
, 119);
1454 glsb
= CC_SEL(cc
, 126);
1455 selb
= CC_SEL(cc
, 33);
1457 t
= (cc
[0] >> (t
* 2)) & 3;
1459 col
[0][BCOMP
] = CC_SEL(cc
, 64);
1460 col
[0][GCOMP
] = CC_SEL(cc
, 69);
1461 col
[0][RCOMP
] = CC_SEL(cc
, 74);
1463 col
[1][BCOMP
] = CC_SEL(cc
, 79);
1464 col
[1][GCOMP
] = CC_SEL(cc
, 84);
1465 col
[1][RCOMP
] = CC_SEL(cc
, 89);
1466 glsb
= CC_SEL(cc
, 125);
1467 selb
= CC_SEL(cc
, 1);
1470 if (CC_SEL(cc
, 124) & 1) {
1475 rgba
[RCOMP
] = rgba
[BCOMP
] = rgba
[GCOMP
] = rgba
[ACOMP
] = 0;
1479 b
= UP5(col
[0][BCOMP
]);
1480 g
= UP5(col
[0][GCOMP
]);
1481 r
= UP5(col
[0][RCOMP
]);
1482 } else if (t
== 2) {
1483 b
= UP5(col
[1][BCOMP
]);
1484 g
= UP6(col
[1][GCOMP
], glsb
);
1485 r
= UP5(col
[1][RCOMP
]);
1487 b
= (UP5(col
[0][BCOMP
]) + UP5(col
[1][BCOMP
])) / 2;
1488 g
= (UP5(col
[0][GCOMP
]) + UP6(col
[1][GCOMP
], glsb
)) / 2;
1489 r
= (UP5(col
[0][RCOMP
]) + UP5(col
[1][RCOMP
])) / 2;
1500 b
= UP5(col
[0][BCOMP
]);
1501 g
= UP6(col
[0][GCOMP
], glsb
^ selb
);
1502 r
= UP5(col
[0][RCOMP
]);
1503 } else if (t
== 3) {
1504 b
= UP5(col
[1][BCOMP
]);
1505 g
= UP6(col
[1][GCOMP
], glsb
);
1506 r
= UP5(col
[1][RCOMP
]);
1508 b
= LERP(3, t
, UP5(col
[0][BCOMP
]), UP5(col
[1][BCOMP
]));
1509 g
= LERP(3, t
, UP6(col
[0][GCOMP
], glsb
^ selb
),
1510 UP6(col
[1][GCOMP
], glsb
));
1511 r
= LERP(3, t
, UP5(col
[0][RCOMP
]), UP5(col
[1][RCOMP
]));
1522 fxt1_decode_1ALPHA (const GLubyte
*code
, GLint t
, GLubyte
*rgba
)
1527 cc
= (const GLuint
*)code
;
1528 if (CC_SEL(cc
, 124) & 1) {
1534 t
= (cc
[1] >> (t
* 2)) & 3;
1536 col0
[BCOMP
] = (*(const GLuint
*)(code
+ 11)) >> 6;
1537 col0
[GCOMP
] = CC_SEL(cc
, 99);
1538 col0
[RCOMP
] = CC_SEL(cc
, 104);
1539 col0
[ACOMP
] = CC_SEL(cc
, 119);
1541 t
= (cc
[0] >> (t
* 2)) & 3;
1543 col0
[BCOMP
] = CC_SEL(cc
, 64);
1544 col0
[GCOMP
] = CC_SEL(cc
, 69);
1545 col0
[RCOMP
] = CC_SEL(cc
, 74);
1546 col0
[ACOMP
] = CC_SEL(cc
, 109);
1550 b
= UP5(col0
[BCOMP
]);
1551 g
= UP5(col0
[GCOMP
]);
1552 r
= UP5(col0
[RCOMP
]);
1553 a
= UP5(col0
[ACOMP
]);
1554 } else if (t
== 3) {
1555 b
= UP5(CC_SEL(cc
, 79));
1556 g
= UP5(CC_SEL(cc
, 84));
1557 r
= UP5(CC_SEL(cc
, 89));
1558 a
= UP5(CC_SEL(cc
, 114));
1560 b
= LERP(3, t
, UP5(col0
[BCOMP
]), UP5(CC_SEL(cc
, 79)));
1561 g
= LERP(3, t
, UP5(col0
[GCOMP
]), UP5(CC_SEL(cc
, 84)));
1562 r
= LERP(3, t
, UP5(col0
[RCOMP
]), UP5(CC_SEL(cc
, 89)));
1563 a
= LERP(3, t
, UP5(col0
[ACOMP
]), UP5(CC_SEL(cc
, 114)));
1572 t
= (cc
[0] >> (t
* 2)) & 3;
1579 cc
= (const GLuint
*)code
;
1580 a
= UP5(cc
[3] >> (t
* 5 + 13));
1582 cc
= (const GLuint
*)(code
+ 8 + t
/ 8);
1583 kk
= cc
[0] >> (t
& 7);
1597 fxt1_decode_1 (const void *texture
, GLint stride
, /* in pixels */
1598 GLint i
, GLint j
, GLubyte
*rgba
)
1600 static void (*decode_1
[]) (const GLubyte
*, GLint
, GLubyte
*) = {
1601 fxt1_decode_1HI
, /* cc-high = "00?" */
1602 fxt1_decode_1HI
, /* cc-high = "00?" */
1603 fxt1_decode_1CHROMA
, /* cc-chroma = "010" */
1604 fxt1_decode_1ALPHA
, /* alpha = "011" */
1605 fxt1_decode_1MIXED
, /* mixed = "1??" */
1606 fxt1_decode_1MIXED
, /* mixed = "1??" */
1607 fxt1_decode_1MIXED
, /* mixed = "1??" */
1608 fxt1_decode_1MIXED
/* mixed = "1??" */
1611 const GLubyte
*code
= (const GLubyte
*)texture
+
1612 ((j
/ 4) * (stride
/ 8) + (i
/ 8)) * 16;
1613 GLint mode
= CC_SEL(code
, 125);
1621 decode_1
[mode
](code
, t
, rgba
);
1628 fetch_rgb_fxt1(const GLubyte
*map
,
1629 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1632 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1633 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1634 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1635 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1636 texel
[ACOMP
] = 1.0F
;
1641 fetch_rgba_fxt1(const GLubyte
*map
,
1642 GLint rowStride
, GLint i
, GLint j
, GLfloat
*texel
)
1645 fxt1_decode_1(map
, rowStride
, i
, j
, rgba
);
1646 texel
[RCOMP
] = UBYTE_TO_FLOAT(rgba
[RCOMP
]);
1647 texel
[GCOMP
] = UBYTE_TO_FLOAT(rgba
[GCOMP
]);
1648 texel
[BCOMP
] = UBYTE_TO_FLOAT(rgba
[BCOMP
]);
1649 texel
[ACOMP
] = UBYTE_TO_FLOAT(rgba
[ACOMP
]);
1653 compressed_fetch_func
1654 _mesa_get_fxt_fetch_func(mesa_format format
)
1657 case MESA_FORMAT_RGB_FXT1
:
1658 return fetch_rgb_fxt1
;
1659 case MESA_FORMAT_RGBA_FXT1
:
1660 return fetch_rgba_fxt1
;