mesa: Combine libtxc_dxtn sources into texcompress_s3tc_tmp.h
[mesa.git] / src / mesa / main / texcompress_s3tc_tmp.h
1 /*
2 * libtxc_dxtn
3 * Version: 1.0
4 *
5 * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #ifdef __APPLE__
26 #include <OpenGL/gl.h>
27 #else
28 #include <GL/gl.h>
29 #endif
30
31 typedef GLubyte GLchan;
32 #define UBYTE_TO_CHAN(b) (b)
33 #define CHAN_MAX 255
34 #define RCOMP 0
35 #define GCOMP 1
36 #define BCOMP 2
37 #define ACOMP 3
38
39 void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
40 GLint i, GLint j, GLvoid *texel);
41 void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
42 GLint i, GLint j, GLvoid *texel);
43 void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
44 GLint i, GLint j, GLvoid *texel);
45 void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
46 GLint i, GLint j, GLvoid *texel);
47
48 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height,
49 const GLubyte *srcPixData, GLenum destformat,
50 GLubyte *dest, GLint dstRowStride);
51
52 #define EXP5TO8R(packedcol) \
53 ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
54
55 #define EXP6TO8G(packedcol) \
56 ((((packedcol) >> 3) & 0xfc) | (((packedcol) >> 9) & 0x3))
57
58 #define EXP5TO8B(packedcol) \
59 ((((packedcol) << 3) & 0xf8) | (((packedcol) >> 2) & 0x7))
60
61 #define EXP4TO8(col) \
62 ((col) | ((col) << 4))
63
64 /* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
65
66 static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
67 GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
68 GLchan *rgba = (GLchan *) texel;
69 const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
70 const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
71 const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
72 (img_block_src[6] << 16) | (img_block_src[7] << 24);
73 /* What about big/little endian? */
74 GLubyte bit_pos = 2 * (j * 4 + i) ;
75 GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
76
77 rgba[ACOMP] = CHAN_MAX;
78 switch (code) {
79 case 0:
80 rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
81 rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
82 rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
83 break;
84 case 1:
85 rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
86 rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
87 rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
88 break;
89 case 2:
90 if ((dxt_type > 1) || (color0 > color1)) {
91 rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
92 rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
93 rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
94 }
95 else {
96 rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
97 rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
98 rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
99 }
100 break;
101 case 3:
102 if ((dxt_type > 1) || (color0 > color1)) {
103 rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
104 rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
105 rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
106 }
107 else {
108 rgba[RCOMP] = 0;
109 rgba[GCOMP] = 0;
110 rgba[BCOMP] = 0;
111 if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
112 }
113 break;
114 default:
115 /* CANNOT happen (I hope) */
116 break;
117 }
118 }
119
120
121 void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
122 GLint i, GLint j, GLvoid *texel)
123 {
124 /* Extract the (i,j) pixel from pixdata and return it
125 * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
126 */
127
128 const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
129 dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
130 }
131
132
133 void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
134 GLint i, GLint j, GLvoid *texel)
135 {
136 /* Extract the (i,j) pixel from pixdata and return it
137 * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
138 */
139
140 const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
141 dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
142 }
143
144 void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
145 GLint i, GLint j, GLvoid *texel) {
146
147 /* Extract the (i,j) pixel from pixdata and return it
148 * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
149 */
150
151 GLchan *rgba = (GLchan *) texel;
152 const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
153 #if 0
154 /* Simple 32bit version. */
155 /* that's pretty brain-dead for a single pixel, isn't it? */
156 const GLubyte bit_pos = 4 * ((j&3) * 4 + (i&3));
157 const GLuint alpha_low = blksrc[0] | (blksrc[1] << 8) | (blksrc[2] << 16) | (blksrc[3] << 24);
158 const GLuint alpha_high = blksrc[4] | (blksrc[5] << 8) | (blksrc[6] << 16) | (blksrc[7] << 24);
159
160 dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
161 if (bit_pos < 32)
162 rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_low >> bit_pos) & 15)) );
163 else
164 rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_high >> (bit_pos - 32)) & 15)) );
165 #endif
166 #if 1
167 /* TODO test this! */
168 const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
169 dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
170 rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
171 #endif
172
173 }
174
175 void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
176 GLint i, GLint j, GLvoid *texel) {
177
178 /* Extract the (i,j) pixel from pixdata and return it
179 * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
180 */
181
182 GLchan *rgba = (GLchan *) texel;
183 const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
184 const GLubyte alpha0 = blksrc[0];
185 const GLubyte alpha1 = blksrc[1];
186 #if 0
187 const GLubyte bit_pos = 3 * ((j&3) * 4 + (i&3));
188 /* simple 32bit version */
189 const GLuint bits_low = blksrc[2] | (blksrc[3] << 8) | (blksrc[4] << 16) | (blksrc[5] << 24);
190 const GLuint bits_high = blksrc[6] | (blksrc[7] << 8);
191 GLubyte code;
192
193 if (bit_pos < 30)
194 code = (GLubyte) ((bits_low >> bit_pos) & 7);
195 else if (bit_pos == 30)
196 code = (GLubyte) ((bits_low >> 30) & 3) | ((bits_high << 2) & 4);
197 else
198 code = (GLubyte) ((bits_high >> (bit_pos - 32)) & 7);
199 #endif
200 #if 1
201 /* TODO test this! */
202 const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
203 const GLubyte acodelow = blksrc[2 + bit_pos / 8];
204 const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
205 const GLubyte code = (acodelow >> (bit_pos & 0x7) |
206 (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7;
207 #endif
208 dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
209 #if 0
210 if (alpha0 > alpha1) {
211 switch (code) {
212 case 0:
213 rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
214 break;
215 case 1:
216 rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
217 break;
218 case 2:
219 case 3:
220 case 4:
221 case 5:
222 case 6:
223 case 7:
224 rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
225 break;
226 }
227 }
228 else {
229 switch (code) {
230 case 0:
231 rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
232 break;
233 case 1:
234 rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
235 break;
236 case 2:
237 case 3:
238 case 4:
239 case 5:
240 rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
241 break;
242 case 6:
243 rgba[ACOMP] = 0;
244 break;
245 case 7:
246 rgba[ACOMP] = CHAN_MAX;
247 break;
248 }
249 }
250 #endif
251 /* not sure. Which version is faster? */
252 #if 1
253 /* TODO test this */
254 if (code == 0)
255 rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
256 else if (code == 1)
257 rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
258 else if (alpha0 > alpha1)
259 rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
260 else if (code < 6)
261 rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
262 else if (code == 6)
263 rgba[ACOMP] = 0;
264 else
265 rgba[ACOMP] = CHAN_MAX;
266 #endif
267 }
268
269
270 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
271 not sure if this really reflects visual perception */
272 #define REDWEIGHT 4
273 #define GREENWEIGHT 16
274 #define BLUEWEIGHT 1
275
276 #define ALPHACUT 127
277
278 static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
279 GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha)
280 {
281 /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
282
283 /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
284 if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
285 due to their alpha value will influence the result */
286 GLint i, j, colors, z;
287 GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
288 GLint colordist, blockerrlin[2][3];
289 GLubyte nrcolor[2];
290 GLint pixerrorcolorbest[3];
291 GLubyte enc = 0;
292 GLubyte cv[4][4];
293 GLubyte testcolor[2][3];
294
295 /* fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
296 bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
297 if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
298 ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
299 testcolor[0][0] = bestcolor[0][0];
300 testcolor[0][1] = bestcolor[0][1];
301 testcolor[0][2] = bestcolor[0][2];
302 testcolor[1][0] = bestcolor[1][0];
303 testcolor[1][1] = bestcolor[1][1];
304 testcolor[1][2] = bestcolor[1][2];
305 }
306 else {
307 testcolor[1][0] = bestcolor[0][0];
308 testcolor[1][1] = bestcolor[0][1];
309 testcolor[1][2] = bestcolor[0][2];
310 testcolor[0][0] = bestcolor[1][0];
311 testcolor[0][1] = bestcolor[1][1];
312 testcolor[0][2] = bestcolor[1][2];
313 }
314
315 for (i = 0; i < 3; i ++) {
316 cv[0][i] = testcolor[0][i];
317 cv[1][i] = testcolor[1][i];
318 cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
319 cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
320 }
321
322 blockerrlin[0][0] = 0;
323 blockerrlin[0][1] = 0;
324 blockerrlin[0][2] = 0;
325 blockerrlin[1][0] = 0;
326 blockerrlin[1][1] = 0;
327 blockerrlin[1][2] = 0;
328
329 nrcolor[0] = 0;
330 nrcolor[1] = 0;
331
332 for (j = 0; j < numypixels; j++) {
333 for (i = 0; i < numxpixels; i++) {
334 pixerrorbest = 0xffffffff;
335 for (colors = 0; colors < 4; colors++) {
336 colordist = srccolors[j][i][0] - (cv[colors][0]);
337 pixerror = colordist * colordist * REDWEIGHT;
338 pixerrorred = colordist;
339 colordist = srccolors[j][i][1] - (cv[colors][1]);
340 pixerror += colordist * colordist * GREENWEIGHT;
341 pixerrorgreen = colordist;
342 colordist = srccolors[j][i][2] - (cv[colors][2]);
343 pixerror += colordist * colordist * BLUEWEIGHT;
344 pixerrorblue = colordist;
345 if (pixerror < pixerrorbest) {
346 enc = colors;
347 pixerrorbest = pixerror;
348 pixerrorcolorbest[0] = pixerrorred;
349 pixerrorcolorbest[1] = pixerrorgreen;
350 pixerrorcolorbest[2] = pixerrorblue;
351 }
352 }
353 if (enc == 0) {
354 for (z = 0; z < 3; z++) {
355 blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
356 }
357 nrcolor[0] += 3;
358 }
359 else if (enc == 2) {
360 for (z = 0; z < 3; z++) {
361 blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
362 }
363 nrcolor[0] += 2;
364 for (z = 0; z < 3; z++) {
365 blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
366 }
367 nrcolor[1] += 1;
368 }
369 else if (enc == 3) {
370 for (z = 0; z < 3; z++) {
371 blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
372 }
373 nrcolor[0] += 1;
374 for (z = 0; z < 3; z++) {
375 blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
376 }
377 nrcolor[1] += 2;
378 }
379 else if (enc == 1) {
380 for (z = 0; z < 3; z++) {
381 blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
382 }
383 nrcolor[1] += 3;
384 }
385 }
386 }
387 if (nrcolor[0] == 0) nrcolor[0] = 1;
388 if (nrcolor[1] == 0) nrcolor[1] = 1;
389 for (j = 0; j < 2; j++) {
390 for (i = 0; i < 3; i++) {
391 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
392 if (newvalue <= 0)
393 testcolor[j][i] = 0;
394 else if (newvalue >= 255)
395 testcolor[j][i] = 255;
396 else testcolor[j][i] = newvalue;
397 }
398 }
399
400 if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
401 (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
402 (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
403 /* both colors are so close they might get encoded as the same 16bit values */
404 GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
405
406 coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
407 coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
408 coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
409 coldiffmax = coldiffred;
410 if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
411 if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
412 if (coldiffmax > 0) {
413 if (coldiffmax > 4) factor = 2;
414 else if (coldiffmax > 2) factor = 3;
415 else factor = 4;
416 /* Won't do much if the color value is near 255... */
417 /* argh so many ifs */
418 if (testcolor[1][1] >= testcolor[0][1]) {
419 ind1 = 1; ind0 = 0;
420 }
421 else {
422 ind1 = 0; ind0 = 1;
423 }
424 if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
425 testcolor[ind1][1] += factor * coldiffgreen;
426 else testcolor[ind1][1] = 255;
427 if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
428 if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
429 testcolor[ind1][0] += factor * coldiffred;
430 else testcolor[ind1][0] = 255;
431 }
432 else {
433 if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
434 testcolor[ind0][0] += factor * coldiffred;
435 else testcolor[ind0][0] = 255;
436 }
437 if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
438 if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
439 testcolor[ind1][2] += factor * coldiffblue;
440 else testcolor[ind1][2] = 255;
441 }
442 else {
443 if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
444 testcolor[ind0][2] += factor * coldiffblue;
445 else testcolor[ind0][2] = 255;
446 }
447 }
448 }
449
450 if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
451 ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
452 for (i = 0; i < 3; i++) {
453 bestcolor[0][i] = testcolor[0][i];
454 bestcolor[1][i] = testcolor[1][i];
455 }
456 }
457 else {
458 for (i = 0; i < 3; i++) {
459 bestcolor[0][i] = testcolor[1][i];
460 bestcolor[1][i] = testcolor[0][i];
461 }
462 }
463
464 /* fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
465 bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
466 }
467
468
469
470 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
471 GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
472 {
473 /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
474
475 GLint i, j, colors;
476 GLuint testerror, testerror2, pixerror, pixerrorbest;
477 GLint colordist;
478 GLushort color0, color1, tempcolor;
479 GLuint bits = 0, bits2 = 0;
480 GLubyte *colorptr;
481 GLubyte enc = 0;
482 GLubyte cv[4][4];
483
484 bestcolor[0][0] = bestcolor[0][0] & 0xf8;
485 bestcolor[0][1] = bestcolor[0][1] & 0xfc;
486 bestcolor[0][2] = bestcolor[0][2] & 0xf8;
487 bestcolor[1][0] = bestcolor[1][0] & 0xf8;
488 bestcolor[1][1] = bestcolor[1][1] & 0xfc;
489 bestcolor[1][2] = bestcolor[1][2] & 0xf8;
490
491 color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
492 color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
493 if (color0 < color1) {
494 tempcolor = color0; color0 = color1; color1 = tempcolor;
495 colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
496 }
497
498
499 for (i = 0; i < 3; i++) {
500 cv[0][i] = bestcolor[0][i];
501 cv[1][i] = bestcolor[1][i];
502 cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
503 cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
504 }
505
506 testerror = 0;
507 for (j = 0; j < numypixels; j++) {
508 for (i = 0; i < numxpixels; i++) {
509 pixerrorbest = 0xffffffff;
510 for (colors = 0; colors < 4; colors++) {
511 colordist = srccolors[j][i][0] - cv[colors][0];
512 pixerror = colordist * colordist * REDWEIGHT;
513 colordist = srccolors[j][i][1] - cv[colors][1];
514 pixerror += colordist * colordist * GREENWEIGHT;
515 colordist = srccolors[j][i][2] - cv[colors][2];
516 pixerror += colordist * colordist * BLUEWEIGHT;
517 if (pixerror < pixerrorbest) {
518 pixerrorbest = pixerror;
519 enc = colors;
520 }
521 }
522 testerror += pixerrorbest;
523 bits |= enc << (2 * (j * 4 + i));
524 }
525 }
526 /* some hw might disagree but actually decoding should always use 4-color encoding
527 for non-dxt1 formats */
528 if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
529 for (i = 0; i < 3; i++) {
530 cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
531 /* this isn't used. Looks like the black color constant can only be used
532 with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
533 it will decode 3 to black even with DXT3/5), and due to how the color searching works
534 it won't get used even then */
535 cv[3][i] = 0;
536 }
537 testerror2 = 0;
538 for (j = 0; j < numypixels; j++) {
539 for (i = 0; i < numxpixels; i++) {
540 pixerrorbest = 0xffffffff;
541 if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
542 enc = 3;
543 pixerrorbest = 0; /* don't calculate error */
544 }
545 else {
546 /* we're calculating the same what we have done already for colors 0-1 above... */
547 for (colors = 0; colors < 3; colors++) {
548 colordist = srccolors[j][i][0] - cv[colors][0];
549 pixerror = colordist * colordist * REDWEIGHT;
550 colordist = srccolors[j][i][1] - cv[colors][1];
551 pixerror += colordist * colordist * GREENWEIGHT;
552 colordist = srccolors[j][i][2] - cv[colors][2];
553 pixerror += colordist * colordist * BLUEWEIGHT;
554 if (pixerror < pixerrorbest) {
555 pixerrorbest = pixerror;
556 /* need to exchange colors later */
557 if (colors > 1) enc = colors;
558 else enc = colors ^ 1;
559 }
560 }
561 }
562 testerror2 += pixerrorbest;
563 bits2 |= enc << (2 * (j * 4 + i));
564 }
565 }
566 } else {
567 testerror2 = 0xffffffff;
568 }
569
570 /* finally we're finished, write back colors and bits */
571 if ((testerror > testerror2) || (haveAlpha)) {
572 *blkaddr++ = color1 & 0xff;
573 *blkaddr++ = color1 >> 8;
574 *blkaddr++ = color0 & 0xff;
575 *blkaddr++ = color0 >> 8;
576 *blkaddr++ = bits2 & 0xff;
577 *blkaddr++ = ( bits2 >> 8) & 0xff;
578 *blkaddr++ = ( bits2 >> 16) & 0xff;
579 *blkaddr = bits2 >> 24;
580 }
581 else {
582 *blkaddr++ = color0 & 0xff;
583 *blkaddr++ = color0 >> 8;
584 *blkaddr++ = color1 & 0xff;
585 *blkaddr++ = color1 >> 8;
586 *blkaddr++ = bits & 0xff;
587 *blkaddr++ = ( bits >> 8) & 0xff;
588 *blkaddr++ = ( bits >> 16) & 0xff;
589 *blkaddr = bits >> 24;
590 }
591 }
592
593 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
594 GLint numxpixels, GLint numypixels, GLuint type )
595 {
596 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
597 present in the picture as base colors */
598
599 /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
600 vectors are weighted similar to their importance in rgb-luminance conversion
601 doesn't work too well though...
602 This seems to be a rather difficult problem */
603
604 GLubyte *bestcolor[2];
605 GLubyte basecolors[2][3];
606 GLubyte i, j;
607 GLuint lowcv, highcv, testcv;
608 GLboolean haveAlpha = GL_FALSE;
609
610 lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
611 srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
612 srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
613 bestcolor[0] = bestcolor[1] = srccolors[0][0];
614 for (j = 0; j < numypixels; j++) {
615 for (i = 0; i < numxpixels; i++) {
616 /* don't use this as a base color if the pixel will get black/transparent anyway */
617 if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
618 testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
619 srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
620 srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
621 if (testcv > highcv) {
622 highcv = testcv;
623 bestcolor[1] = srccolors[j][i];
624 }
625 else if (testcv < lowcv) {
626 lowcv = testcv;
627 bestcolor[0] = srccolors[j][i];
628 }
629 }
630 else haveAlpha = GL_TRUE;
631 }
632 }
633 /* make sure the original color values won't get touched... */
634 for (j = 0; j < 2; j++) {
635 for (i = 0; i < 3; i++) {
636 basecolors[j][i] = bestcolor[j][i];
637 }
638 }
639 bestcolor[0] = basecolors[0];
640 bestcolor[1] = basecolors[1];
641
642 /* try to find better base colors */
643 fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
644 /* find the best encoding for these colors, and store the result */
645 storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
646 }
647
648 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
649 GLubyte alphaenc[16])
650 {
651 *blkaddr++ = alphabase1;
652 *blkaddr++ = alphabase2;
653 *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
654 *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
655 *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
656 *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
657 *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
658 *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
659 }
660
661 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
662 GLint numxpixels, GLint numypixels)
663 {
664 GLubyte alphabase[2], alphause[2];
665 GLshort alphatest[2];
666 GLuint alphablockerror1, alphablockerror2, alphablockerror3;
667 GLubyte i, j, aindex, acutValues[7];
668 GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
669 GLboolean alphaabsmin = GL_FALSE;
670 GLboolean alphaabsmax = GL_FALSE;
671 GLshort alphadist;
672
673 /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
674 alphabase[0] = 0xff; alphabase[1] = 0x0;
675 for (j = 0; j < numypixels; j++) {
676 for (i = 0; i < numxpixels; i++) {
677 if (srccolors[j][i][3] == 0)
678 alphaabsmin = GL_TRUE;
679 else if (srccolors[j][i][3] == 255)
680 alphaabsmax = GL_TRUE;
681 else {
682 if (srccolors[j][i][3] > alphabase[1])
683 alphabase[1] = srccolors[j][i][3];
684 if (srccolors[j][i][3] < alphabase[0])
685 alphabase[0] = srccolors[j][i][3];
686 }
687 }
688 }
689
690
691 if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
692 /* shortcut here since it is a very common case (and also avoids later problems) */
693 /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
694 /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
695
696 *blkaddr++ = srccolors[0][0][3];
697 blkaddr++;
698 *blkaddr++ = 0;
699 *blkaddr++ = 0;
700 *blkaddr++ = 0;
701 *blkaddr++ = 0;
702 *blkaddr++ = 0;
703 *blkaddr++ = 0;
704 /* fprintf(stderr, "enc0 used\n");*/
705 return;
706 }
707
708 /* find best encoding for alpha0 > alpha1 */
709 /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
710 alphablockerror1 = 0x0;
711 alphablockerror2 = 0xffffffff;
712 alphablockerror3 = 0xffffffff;
713 if (alphaabsmin) alphause[0] = 0;
714 else alphause[0] = alphabase[0];
715 if (alphaabsmax) alphause[1] = 255;
716 else alphause[1] = alphabase[1];
717 /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
718 for (aindex = 0; aindex < 7; aindex++) {
719 /* don't forget here is always rounded down */
720 acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
721 }
722
723 for (j = 0; j < numypixels; j++) {
724 for (i = 0; i < numxpixels; i++) {
725 /* maybe it's overkill to have the most complicated calculation just for the error
726 calculation which we only need to figure out if encoding1 or encoding2 is better... */
727 if (srccolors[j][i][3] > acutValues[0]) {
728 alphaenc1[4*j + i] = 0;
729 alphadist = srccolors[j][i][3] - alphause[1];
730 }
731 else if (srccolors[j][i][3] > acutValues[1]) {
732 alphaenc1[4*j + i] = 2;
733 alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
734 }
735 else if (srccolors[j][i][3] > acutValues[2]) {
736 alphaenc1[4*j + i] = 3;
737 alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
738 }
739 else if (srccolors[j][i][3] > acutValues[3]) {
740 alphaenc1[4*j + i] = 4;
741 alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
742 }
743 else if (srccolors[j][i][3] > acutValues[4]) {
744 alphaenc1[4*j + i] = 5;
745 alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
746 }
747 else if (srccolors[j][i][3] > acutValues[5]) {
748 alphaenc1[4*j + i] = 6;
749 alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
750 }
751 else if (srccolors[j][i][3] > acutValues[6]) {
752 alphaenc1[4*j + i] = 7;
753 alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
754 }
755 else {
756 alphaenc1[4*j + i] = 1;
757 alphadist = srccolors[j][i][3] - alphause[0];
758 }
759 alphablockerror1 += alphadist * alphadist;
760 }
761 }
762 /* for (i = 0; i < 16; i++) {
763 fprintf(stderr, "%d ", alphaenc1[i]);
764 }
765 fprintf(stderr, "cutVals ");
766 for (i = 0; i < 8; i++) {
767 fprintf(stderr, "%d ", acutValues[i]);
768 }
769 fprintf(stderr, "srcVals ");
770 for (j = 0; j < numypixels; j++)
771 for (i = 0; i < numxpixels; i++) {
772 fprintf(stderr, "%d ", srccolors[j][i][3]);
773 }
774
775 fprintf(stderr, "\n");
776 }*/
777 /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
778 are false but try it anyway */
779 if (alphablockerror1 >= 32) {
780
781 /* don't bother if encoding is already very good, this condition should also imply
782 we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
783 alphablockerror2 = 0;
784 for (aindex = 0; aindex < 5; aindex++) {
785 /* don't forget here is always rounded down */
786 acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
787 }
788 for (j = 0; j < numypixels; j++) {
789 for (i = 0; i < numxpixels; i++) {
790 /* maybe it's overkill to have the most complicated calculation just for the error
791 calculation which we only need to figure out if encoding1 or encoding2 is better... */
792 if (srccolors[j][i][3] == 0) {
793 alphaenc2[4*j + i] = 6;
794 alphadist = 0;
795 }
796 else if (srccolors[j][i][3] == 255) {
797 alphaenc2[4*j + i] = 7;
798 alphadist = 0;
799 }
800 else if (srccolors[j][i][3] <= acutValues[0]) {
801 alphaenc2[4*j + i] = 0;
802 alphadist = srccolors[j][i][3] - alphabase[0];
803 }
804 else if (srccolors[j][i][3] <= acutValues[1]) {
805 alphaenc2[4*j + i] = 2;
806 alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
807 }
808 else if (srccolors[j][i][3] <= acutValues[2]) {
809 alphaenc2[4*j + i] = 3;
810 alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
811 }
812 else if (srccolors[j][i][3] <= acutValues[3]) {
813 alphaenc2[4*j + i] = 4;
814 alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
815 }
816 else if (srccolors[j][i][3] <= acutValues[4]) {
817 alphaenc2[4*j + i] = 5;
818 alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
819 }
820 else {
821 alphaenc2[4*j + i] = 1;
822 alphadist = srccolors[j][i][3] - alphabase[1];
823 }
824 alphablockerror2 += alphadist * alphadist;
825 }
826 }
827
828
829 /* skip this if the error is already very small
830 this encoding is MUCH better on average than #2 though, but expensive! */
831 if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
832 GLshort blockerrlin1 = 0;
833 GLshort blockerrlin2 = 0;
834 GLubyte nralphainrangelow = 0;
835 GLubyte nralphainrangehigh = 0;
836 alphatest[0] = 0xff;
837 alphatest[1] = 0x0;
838 /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
839 for (j = 0; j < numypixels; j++) {
840 for (i = 0; i < numxpixels; i++) {
841 if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
842 alphatest[1] = srccolors[j][i][3];
843 if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
844 alphatest[0] = srccolors[j][i][3];
845 }
846 }
847 /* shouldn't happen too often, don't really care about those degenerated cases */
848 if (alphatest[1] <= alphatest[0]) {
849 alphatest[0] = 1;
850 alphatest[1] = 254;
851 /* fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
852 }
853 for (aindex = 0; aindex < 5; aindex++) {
854 /* don't forget here is always rounded down */
855 acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
856 }
857
858 /* find the "average" difference between the alpha values and the next encoded value.
859 This is then used to calculate new base values.
860 Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
861 since they will see more improvement, and also because the values in the middle are somewhat
862 likely to get no improvement at all (because the base values might move in different directions)?
863 OTOH it would mean the values in the middle are even less likely to get an improvement
864 */
865 for (j = 0; j < numypixels; j++) {
866 for (i = 0; i < numxpixels; i++) {
867 if (srccolors[j][i][3] <= alphatest[0] / 2) {
868 }
869 else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
870 }
871 else if (srccolors[j][i][3] <= acutValues[0]) {
872 blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
873 nralphainrangelow += 1;
874 }
875 else if (srccolors[j][i][3] <= acutValues[1]) {
876 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
877 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
878 nralphainrangelow += 1;
879 nralphainrangehigh += 1;
880 }
881 else if (srccolors[j][i][3] <= acutValues[2]) {
882 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
883 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
884 nralphainrangelow += 1;
885 nralphainrangehigh += 1;
886 }
887 else if (srccolors[j][i][3] <= acutValues[3]) {
888 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
889 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
890 nralphainrangelow += 1;
891 nralphainrangehigh += 1;
892 }
893 else if (srccolors[j][i][3] <= acutValues[4]) {
894 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
895 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
896 nralphainrangelow += 1;
897 nralphainrangehigh += 1;
898 }
899 else {
900 blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
901 nralphainrangehigh += 1;
902 }
903 }
904 }
905 /* shouldn't happen often, needed to avoid div by zero */
906 if (nralphainrangelow == 0) nralphainrangelow = 1;
907 if (nralphainrangehigh == 0) nralphainrangehigh = 1;
908 alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
909 /* fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
910 fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
911 /* again shouldn't really happen often... */
912 if (alphatest[0] < 0) {
913 alphatest[0] = 0;
914 /* fprintf(stderr, "adj alpha base val to 0\n");*/
915 }
916 alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
917 if (alphatest[1] > 255) {
918 alphatest[1] = 255;
919 /* fprintf(stderr, "adj alpha base val to 255\n");*/
920 }
921
922 alphablockerror3 = 0;
923 for (aindex = 0; aindex < 5; aindex++) {
924 /* don't forget here is always rounded down */
925 acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
926 }
927 for (j = 0; j < numypixels; j++) {
928 for (i = 0; i < numxpixels; i++) {
929 /* maybe it's overkill to have the most complicated calculation just for the error
930 calculation which we only need to figure out if encoding1 or encoding2 is better... */
931 if (srccolors[j][i][3] <= alphatest[0] / 2) {
932 alphaenc3[4*j + i] = 6;
933 alphadist = srccolors[j][i][3];
934 }
935 else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
936 alphaenc3[4*j + i] = 7;
937 alphadist = 255 - srccolors[j][i][3];
938 }
939 else if (srccolors[j][i][3] <= acutValues[0]) {
940 alphaenc3[4*j + i] = 0;
941 alphadist = srccolors[j][i][3] - alphatest[0];
942 }
943 else if (srccolors[j][i][3] <= acutValues[1]) {
944 alphaenc3[4*j + i] = 2;
945 alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
946 }
947 else if (srccolors[j][i][3] <= acutValues[2]) {
948 alphaenc3[4*j + i] = 3;
949 alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
950 }
951 else if (srccolors[j][i][3] <= acutValues[3]) {
952 alphaenc3[4*j + i] = 4;
953 alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
954 }
955 else if (srccolors[j][i][3] <= acutValues[4]) {
956 alphaenc3[4*j + i] = 5;
957 alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
958 }
959 else {
960 alphaenc3[4*j + i] = 1;
961 alphadist = srccolors[j][i][3] - alphatest[1];
962 }
963 alphablockerror3 += alphadist * alphadist;
964 }
965 }
966 }
967 }
968 /* write the alpha values and encoding back. */
969 if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
970 /* if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
971 writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
972 }
973 else if (alphablockerror2 <= alphablockerror3) {
974 /* if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
975 writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
976 }
977 else {
978 /* fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
979 writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
980 }
981 }
982
983 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
984 GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
985 {
986 GLubyte i, j, c;
987 const GLchan *curaddr;
988 for (j = 0; j < numypixels; j++) {
989 curaddr = srcaddr + j * srcRowStride * comps;
990 for (i = 0; i < numxpixels; i++) {
991 for (c = 0; c < comps; c++) {
992 srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
993 }
994 }
995 }
996 }
997
998
999 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
1000 GLenum destFormat, GLubyte *dest, GLint dstRowStride)
1001 {
1002 GLubyte *blkaddr = dest;
1003 GLubyte srcpixels[4][4][4];
1004 const GLchan *srcaddr = srcPixData;
1005 GLint numxpixels, numypixels;
1006 GLint i, j;
1007 GLint dstRowDiff;
1008
1009 switch (destFormat) {
1010 case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
1011 case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
1012 /* hmm we used to get called without dstRowStride... */
1013 dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
1014 /* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
1015 width, height, dstRowStride); */
1016 for (j = 0; j < height; j += 4) {
1017 if (height > j + 3) numypixels = 4;
1018 else numypixels = height - j;
1019 srcaddr = srcPixData + j * width * srccomps;
1020 for (i = 0; i < width; i += 4) {
1021 if (width > i + 3) numxpixels = 4;
1022 else numxpixels = width - i;
1023 extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1024 encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
1025 srcaddr += srccomps * numxpixels;
1026 blkaddr += 8;
1027 }
1028 blkaddr += dstRowDiff;
1029 }
1030 break;
1031 case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
1032 dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
1033 /* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
1034 width, height, dstRowStride); */
1035 for (j = 0; j < height; j += 4) {
1036 if (height > j + 3) numypixels = 4;
1037 else numypixels = height - j;
1038 srcaddr = srcPixData + j * width * srccomps;
1039 for (i = 0; i < width; i += 4) {
1040 if (width > i + 3) numxpixels = 4;
1041 else numxpixels = width - i;
1042 extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1043 *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
1044 *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
1045 *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
1046 *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
1047 *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
1048 *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
1049 *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
1050 *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
1051 encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
1052 srcaddr += srccomps * numxpixels;
1053 blkaddr += 8;
1054 }
1055 blkaddr += dstRowDiff;
1056 }
1057 break;
1058 case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
1059 dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
1060 /* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
1061 width, height, dstRowStride); */
1062 for (j = 0; j < height; j += 4) {
1063 if (height > j + 3) numypixels = 4;
1064 else numypixels = height - j;
1065 srcaddr = srcPixData + j * width * srccomps;
1066 for (i = 0; i < width; i += 4) {
1067 if (width > i + 3) numxpixels = 4;
1068 else numxpixels = width - i;
1069 extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1070 encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
1071 encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
1072 srcaddr += srccomps * numxpixels;
1073 blkaddr += 16;
1074 }
1075 blkaddr += dstRowDiff;
1076 }
1077 break;
1078 default:
1079 fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat);
1080 return;
1081 }
1082 }