mesa: Delay s_texcombine.c memory allocation until it's used.
[mesa.git] / src / mesa / swrast / s_texcombine.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.5
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/colormac.h"
30 #include "main/imports.h"
31 #include "main/pixeltransfer.h"
32 #include "program/prog_instruction.h"
33
34 #include "s_context.h"
35 #include "s_texcombine.h"
36
37
38 /**
39 * Pointer to array of float[4]
40 * This type makes the code below more concise and avoids a lot of casting.
41 */
42 typedef float (*float4_array)[4];
43
44
45 /**
46 * Return array of texels for given unit.
47 */
48 static inline float4_array
49 get_texel_array(SWcontext *swrast, GLuint unit)
50 {
51 #ifdef _OPENMP
52 return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
53 #else
54 return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
55 #endif
56 }
57
58
59
60 /**
61 * Do texture application for:
62 * GL_EXT_texture_env_combine
63 * GL_ARB_texture_env_combine
64 * GL_EXT_texture_env_dot3
65 * GL_ARB_texture_env_dot3
66 * GL_ATI_texture_env_combine3
67 * GL_NV_texture_env_combine4
68 * conventional GL texture env modes
69 *
70 * \param ctx rendering context
71 * \param unit the texture combiner unit
72 * \param n number of fragments to process (span width)
73 * \param primary_rgba incoming fragment color array
74 * \param texelBuffer pointer to texel colors for all texture units
75 *
76 * \param rgba incoming/result fragment colors
77 */
78 static void
79 texture_combine( struct gl_context *ctx, GLuint unit, GLuint n,
80 const float4_array primary_rgba,
81 const GLfloat *texelBuffer,
82 GLchan (*rgbaChan)[4] )
83 {
84 SWcontext *swrast = SWRAST_CONTEXT(ctx);
85 const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
86 const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
87 float4_array argRGB[MAX_COMBINER_TERMS];
88 float4_array argA[MAX_COMBINER_TERMS];
89 const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
90 const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
91 const GLuint numArgsRGB = combine->_NumArgsRGB;
92 const GLuint numArgsA = combine->_NumArgsA;
93 float4_array ccolor[4], rgba;
94 GLuint i, term;
95
96 if (!swrast->TexelBuffer) {
97 #ifdef _OPENMP
98 const GLint maxThreads = omp_get_max_threads();
99 #else
100 const GLint maxThreads = 1;
101 #endif
102
103 /* TexelBuffer is also global and normally shared by all SWspan
104 * instances; when running with multiple threads, create one per
105 * thread.
106 */
107 swrast->TexelBuffer =
108 (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
109 MAX_WIDTH * 4 * sizeof(GLfloat));
110 if (!swrast->TexelBuffer) {
111 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
112 return;
113 }
114 }
115
116 /* alloc temp pixel buffers */
117 rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
118 if (!rgba) {
119 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
120 return;
121 }
122
123 for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
124 ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
125 if (!ccolor[i]) {
126 while (i) {
127 free(ccolor[i]);
128 i--;
129 }
130 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
131 return;
132 }
133 }
134
135 for (i = 0; i < n; i++) {
136 rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
137 rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
138 rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
139 rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
140 }
141
142 /*
143 printf("modeRGB 0x%x modeA 0x%x srcRGB1 0x%x srcA1 0x%x srcRGB2 0x%x srcA2 0x%x\n",
144 combine->ModeRGB,
145 combine->ModeA,
146 combine->SourceRGB[0],
147 combine->SourceA[0],
148 combine->SourceRGB[1],
149 combine->SourceA[1]);
150 */
151
152 /*
153 * Do operand setup for up to 4 operands. Loop over the terms.
154 */
155 for (term = 0; term < numArgsRGB; term++) {
156 const GLenum srcRGB = combine->SourceRGB[term];
157 const GLenum operandRGB = combine->OperandRGB[term];
158
159 switch (srcRGB) {
160 case GL_TEXTURE:
161 argRGB[term] = get_texel_array(swrast, unit);
162 break;
163 case GL_PRIMARY_COLOR:
164 argRGB[term] = primary_rgba;
165 break;
166 case GL_PREVIOUS:
167 argRGB[term] = rgba;
168 break;
169 case GL_CONSTANT:
170 {
171 float4_array c = ccolor[term];
172 GLfloat red = textureUnit->EnvColor[0];
173 GLfloat green = textureUnit->EnvColor[1];
174 GLfloat blue = textureUnit->EnvColor[2];
175 GLfloat alpha = textureUnit->EnvColor[3];
176 for (i = 0; i < n; i++) {
177 ASSIGN_4V(c[i], red, green, blue, alpha);
178 }
179 argRGB[term] = ccolor[term];
180 }
181 break;
182 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
183 */
184 case GL_ZERO:
185 {
186 float4_array c = ccolor[term];
187 for (i = 0; i < n; i++) {
188 ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
189 }
190 argRGB[term] = ccolor[term];
191 }
192 break;
193 case GL_ONE:
194 {
195 float4_array c = ccolor[term];
196 for (i = 0; i < n; i++) {
197 ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
198 }
199 argRGB[term] = ccolor[term];
200 }
201 break;
202 default:
203 /* ARB_texture_env_crossbar source */
204 {
205 const GLuint srcUnit = srcRGB - GL_TEXTURE0;
206 ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
207 if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
208 goto end;
209 argRGB[term] = get_texel_array(swrast, srcUnit);
210 }
211 }
212
213 if (operandRGB != GL_SRC_COLOR) {
214 float4_array src = argRGB[term];
215 float4_array dst = ccolor[term];
216
217 /* point to new arg[term] storage */
218 argRGB[term] = ccolor[term];
219
220 switch (operandRGB) {
221 case GL_ONE_MINUS_SRC_COLOR:
222 for (i = 0; i < n; i++) {
223 dst[i][RCOMP] = 1.0F - src[i][RCOMP];
224 dst[i][GCOMP] = 1.0F - src[i][GCOMP];
225 dst[i][BCOMP] = 1.0F - src[i][BCOMP];
226 }
227 break;
228 case GL_SRC_ALPHA:
229 for (i = 0; i < n; i++) {
230 dst[i][RCOMP] =
231 dst[i][GCOMP] =
232 dst[i][BCOMP] = src[i][ACOMP];
233 }
234 break;
235 case GL_ONE_MINUS_SRC_ALPHA:
236 for (i = 0; i < n; i++) {
237 dst[i][RCOMP] =
238 dst[i][GCOMP] =
239 dst[i][BCOMP] = 1.0F - src[i][ACOMP];
240 }
241 break;
242 default:
243 _mesa_problem(ctx, "Bad operandRGB");
244 }
245 }
246 }
247
248 /*
249 * Set up the argA[term] pointers
250 */
251 for (term = 0; term < numArgsA; term++) {
252 const GLenum srcA = combine->SourceA[term];
253 const GLenum operandA = combine->OperandA[term];
254
255 switch (srcA) {
256 case GL_TEXTURE:
257 argA[term] = get_texel_array(swrast, unit);
258 break;
259 case GL_PRIMARY_COLOR:
260 argA[term] = primary_rgba;
261 break;
262 case GL_PREVIOUS:
263 argA[term] = rgba;
264 break;
265 case GL_CONSTANT:
266 {
267 float4_array c = ccolor[term];
268 GLfloat alpha = textureUnit->EnvColor[3];
269 for (i = 0; i < n; i++)
270 c[i][ACOMP] = alpha;
271 argA[term] = ccolor[term];
272 }
273 break;
274 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
275 */
276 case GL_ZERO:
277 {
278 float4_array c = ccolor[term];
279 for (i = 0; i < n; i++)
280 c[i][ACOMP] = 0.0F;
281 argA[term] = ccolor[term];
282 }
283 break;
284 case GL_ONE:
285 {
286 float4_array c = ccolor[term];
287 for (i = 0; i < n; i++)
288 c[i][ACOMP] = 1.0F;
289 argA[term] = ccolor[term];
290 }
291 break;
292 default:
293 /* ARB_texture_env_crossbar source */
294 {
295 const GLuint srcUnit = srcA - GL_TEXTURE0;
296 ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
297 if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
298 goto end;
299 argA[term] = get_texel_array(swrast, srcUnit);
300 }
301 }
302
303 if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
304 float4_array src = argA[term];
305 float4_array dst = ccolor[term];
306 argA[term] = ccolor[term];
307 for (i = 0; i < n; i++) {
308 dst[i][ACOMP] = 1.0F - src[i][ACOMP];
309 }
310 }
311 }
312
313 /* RGB channel combine */
314 {
315 float4_array arg0 = argRGB[0];
316 float4_array arg1 = argRGB[1];
317 float4_array arg2 = argRGB[2];
318 float4_array arg3 = argRGB[3];
319
320 switch (combine->ModeRGB) {
321 case GL_REPLACE:
322 for (i = 0; i < n; i++) {
323 rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
324 rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
325 rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
326 }
327 break;
328 case GL_MODULATE:
329 for (i = 0; i < n; i++) {
330 rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
331 rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
332 rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
333 }
334 break;
335 case GL_ADD:
336 if (textureUnit->EnvMode == GL_COMBINE4_NV) {
337 /* (a * b) + (c * d) */
338 for (i = 0; i < n; i++) {
339 rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
340 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
341 rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
342 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
343 rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
344 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
345 }
346 }
347 else {
348 /* 2-term addition */
349 for (i = 0; i < n; i++) {
350 rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
351 rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
352 rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
353 }
354 }
355 break;
356 case GL_ADD_SIGNED:
357 if (textureUnit->EnvMode == GL_COMBINE4_NV) {
358 /* (a * b) + (c * d) - 0.5 */
359 for (i = 0; i < n; i++) {
360 rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
361 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
362 rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
363 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
364 rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
365 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
366 }
367 }
368 else {
369 for (i = 0; i < n; i++) {
370 rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
371 rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
372 rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
373 }
374 }
375 break;
376 case GL_INTERPOLATE:
377 for (i = 0; i < n; i++) {
378 rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
379 arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
380 rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
381 arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
382 rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
383 arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
384 }
385 break;
386 case GL_SUBTRACT:
387 for (i = 0; i < n; i++) {
388 rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
389 rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
390 rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
391 }
392 break;
393 case GL_DOT3_RGB_EXT:
394 case GL_DOT3_RGBA_EXT:
395 /* Do not scale the result by 1 2 or 4 */
396 for (i = 0; i < n; i++) {
397 GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
398 (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
399 (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
400 * 4.0F;
401 dot = CLAMP(dot, 0.0F, 1.0F);
402 rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
403 }
404 break;
405 case GL_DOT3_RGB:
406 case GL_DOT3_RGBA:
407 /* DO scale the result by 1 2 or 4 */
408 for (i = 0; i < n; i++) {
409 GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
410 (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
411 (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
412 * 4.0F * scaleRGB;
413 dot = CLAMP(dot, 0.0F, 1.0F);
414 rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
415 }
416 break;
417 case GL_MODULATE_ADD_ATI:
418 for (i = 0; i < n; i++) {
419 rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
420 arg1[i][RCOMP]) * scaleRGB;
421 rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
422 arg1[i][GCOMP]) * scaleRGB;
423 rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
424 arg1[i][BCOMP]) * scaleRGB;
425 }
426 break;
427 case GL_MODULATE_SIGNED_ADD_ATI:
428 for (i = 0; i < n; i++) {
429 rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
430 arg1[i][RCOMP] - 0.5F) * scaleRGB;
431 rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
432 arg1[i][GCOMP] - 0.5F) * scaleRGB;
433 rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
434 arg1[i][BCOMP] - 0.5F) * scaleRGB;
435 }
436 break;
437 case GL_MODULATE_SUBTRACT_ATI:
438 for (i = 0; i < n; i++) {
439 rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
440 arg1[i][RCOMP]) * scaleRGB;
441 rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
442 arg1[i][GCOMP]) * scaleRGB;
443 rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
444 arg1[i][BCOMP]) * scaleRGB;
445 }
446 break;
447 case GL_BUMP_ENVMAP_ATI:
448 /* this produces a fixed rgba color, and the coord calc is done elsewhere */
449 for (i = 0; i < n; i++) {
450 /* rgba result is 0,0,0,1 */
451 rgba[i][RCOMP] = 0.0;
452 rgba[i][GCOMP] = 0.0;
453 rgba[i][BCOMP] = 0.0;
454 rgba[i][ACOMP] = 1.0;
455 }
456 goto end; /* no alpha processing */
457 default:
458 _mesa_problem(ctx, "invalid combine mode");
459 }
460 }
461
462 /* Alpha channel combine */
463 {
464 float4_array arg0 = argA[0];
465 float4_array arg1 = argA[1];
466 float4_array arg2 = argA[2];
467 float4_array arg3 = argA[3];
468
469 switch (combine->ModeA) {
470 case GL_REPLACE:
471 for (i = 0; i < n; i++) {
472 rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
473 }
474 break;
475 case GL_MODULATE:
476 for (i = 0; i < n; i++) {
477 rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
478 }
479 break;
480 case GL_ADD:
481 if (textureUnit->EnvMode == GL_COMBINE4_NV) {
482 /* (a * b) + (c * d) */
483 for (i = 0; i < n; i++) {
484 rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
485 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
486 }
487 }
488 else {
489 /* two-term add */
490 for (i = 0; i < n; i++) {
491 rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
492 }
493 }
494 break;
495 case GL_ADD_SIGNED:
496 if (textureUnit->EnvMode == GL_COMBINE4_NV) {
497 /* (a * b) + (c * d) - 0.5 */
498 for (i = 0; i < n; i++) {
499 rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
500 arg2[i][ACOMP] * arg3[i][ACOMP] -
501 0.5F) * scaleA;
502 }
503 }
504 else {
505 /* a + b - 0.5 */
506 for (i = 0; i < n; i++) {
507 rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
508 }
509 }
510 break;
511 case GL_INTERPOLATE:
512 for (i = 0; i < n; i++) {
513 rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
514 arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
515 * scaleA;
516 }
517 break;
518 case GL_SUBTRACT:
519 for (i = 0; i < n; i++) {
520 rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
521 }
522 break;
523 case GL_MODULATE_ADD_ATI:
524 for (i = 0; i < n; i++) {
525 rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
526 + arg1[i][ACOMP]) * scaleA;
527 }
528 break;
529 case GL_MODULATE_SIGNED_ADD_ATI:
530 for (i = 0; i < n; i++) {
531 rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
532 arg1[i][ACOMP] - 0.5F) * scaleA;
533 }
534 break;
535 case GL_MODULATE_SUBTRACT_ATI:
536 for (i = 0; i < n; i++) {
537 rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
538 - arg1[i][ACOMP]) * scaleA;
539 }
540 break;
541 default:
542 _mesa_problem(ctx, "invalid combine mode");
543 }
544 }
545
546 /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
547 * This is kind of a kludge. It would have been better if the spec
548 * were written such that the GL_COMBINE_ALPHA value could be set to
549 * GL_DOT3.
550 */
551 if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
552 combine->ModeRGB == GL_DOT3_RGBA) {
553 for (i = 0; i < n; i++) {
554 rgba[i][ACOMP] = rgba[i][RCOMP];
555 }
556 }
557
558 for (i = 0; i < n; i++) {
559 UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
560 UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
561 UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
562 UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
563 }
564
565 end:
566 for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
567 free(ccolor[i]);
568 }
569 free(rgba);
570 }
571
572
573 /**
574 * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
575 * See GL_EXT_texture_swizzle.
576 */
577 static void
578 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
579 {
580 const GLuint swzR = GET_SWZ(swizzle, 0);
581 const GLuint swzG = GET_SWZ(swizzle, 1);
582 const GLuint swzB = GET_SWZ(swizzle, 2);
583 const GLuint swzA = GET_SWZ(swizzle, 3);
584 GLfloat vector[6];
585 GLuint i;
586
587 vector[SWIZZLE_ZERO] = 0;
588 vector[SWIZZLE_ONE] = 1.0F;
589
590 for (i = 0; i < count; i++) {
591 vector[SWIZZLE_X] = texels[i][0];
592 vector[SWIZZLE_Y] = texels[i][1];
593 vector[SWIZZLE_Z] = texels[i][2];
594 vector[SWIZZLE_W] = texels[i][3];
595 texels[i][RCOMP] = vector[swzR];
596 texels[i][GCOMP] = vector[swzG];
597 texels[i][BCOMP] = vector[swzB];
598 texels[i][ACOMP] = vector[swzA];
599 }
600 }
601
602
603 /**
604 * Apply texture mapping to a span of fragments.
605 */
606 void
607 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
608 {
609 SWcontext *swrast = SWRAST_CONTEXT(ctx);
610 float4_array primary_rgba;
611 GLuint unit;
612
613 primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
614
615 if (!primary_rgba) {
616 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
617 return;
618 }
619
620 ASSERT(span->end <= MAX_WIDTH);
621
622 /*
623 * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
624 */
625 if (swrast->_TextureCombinePrimary) {
626 GLuint i;
627 for (i = 0; i < span->end; i++) {
628 primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
629 primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
630 primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
631 primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
632 }
633 }
634
635 /* First must sample all bump maps */
636 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
637 const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
638
639 if (texUnit->_ReallyEnabled &&
640 texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
641 const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
642 span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
643 float4_array targetcoords =
644 span->array->attribs[FRAG_ATTRIB_TEX0 +
645 ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
646
647 const struct gl_texture_object *curObj = texUnit->_Current;
648 GLfloat *lambda = span->array->lambda[unit];
649 float4_array texels = get_texel_array(swrast, unit);
650 GLuint i;
651 GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
652 GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
653 GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
654 GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
655
656 /* adjust texture lod (lambda) */
657 if (span->arrayMask & SPAN_LAMBDA) {
658 if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
659 /* apply LOD bias, but don't clamp yet */
660 const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
661 -ctx->Const.MaxTextureLodBias,
662 ctx->Const.MaxTextureLodBias);
663 GLuint i;
664 for (i = 0; i < span->end; i++) {
665 lambda[i] += bias;
666 }
667 }
668
669 if (curObj->Sampler.MinLod != -1000.0 ||
670 curObj->Sampler.MaxLod != 1000.0) {
671 /* apply LOD clamping to lambda */
672 const GLfloat min = curObj->Sampler.MinLod;
673 const GLfloat max = curObj->Sampler.MaxLod;
674 GLuint i;
675 for (i = 0; i < span->end; i++) {
676 GLfloat l = lambda[i];
677 lambda[i] = CLAMP(l, min, max);
678 }
679 }
680 }
681
682 /* Sample the texture (span->end = number of fragments) */
683 swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
684 texcoords, lambda, texels );
685
686 /* manipulate the span values of the bump target
687 not sure this can work correctly even ignoring
688 the problem that channel is unsigned */
689 for (i = 0; i < span->end; i++) {
690 targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
691 rotMatrix01) / targetcoords[i][3];
692 targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
693 rotMatrix11) / targetcoords[i][3];
694 }
695 }
696 }
697
698 /*
699 * Must do all texture sampling before combining in order to
700 * accomodate GL_ARB_texture_env_crossbar.
701 */
702 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
703 const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
704 if (texUnit->_ReallyEnabled &&
705 texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
706 const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
707 span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
708 const struct gl_texture_object *curObj = texUnit->_Current;
709 GLfloat *lambda = span->array->lambda[unit];
710 float4_array texels = get_texel_array(swrast, unit);
711
712 /* adjust texture lod (lambda) */
713 if (span->arrayMask & SPAN_LAMBDA) {
714 if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
715 /* apply LOD bias, but don't clamp yet */
716 const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
717 -ctx->Const.MaxTextureLodBias,
718 ctx->Const.MaxTextureLodBias);
719 GLuint i;
720 for (i = 0; i < span->end; i++) {
721 lambda[i] += bias;
722 }
723 }
724
725 if (curObj->Sampler.MinLod != -1000.0 ||
726 curObj->Sampler.MaxLod != 1000.0) {
727 /* apply LOD clamping to lambda */
728 const GLfloat min = curObj->Sampler.MinLod;
729 const GLfloat max = curObj->Sampler.MaxLod;
730 GLuint i;
731 for (i = 0; i < span->end; i++) {
732 GLfloat l = lambda[i];
733 lambda[i] = CLAMP(l, min, max);
734 }
735 }
736 }
737 else if (curObj->Sampler.MaxAnisotropy > 1.0 &&
738 curObj->Sampler.MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
739 /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
740 * it requires the current SWspan *span as an additional parameter.
741 * In order to keep the same function signature, the unused lambda
742 * parameter will be modified to actually contain the SWspan pointer.
743 * This is a Hack. To make it right, the texture_sample_func
744 * signature and all implementing functions need to be modified.
745 */
746 /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
747 lambda = (GLfloat *)span;
748 }
749
750 /* Sample the texture (span->end = number of fragments) */
751 swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
752 texcoords, lambda, texels );
753
754 /* GL_EXT_texture_swizzle */
755 if (curObj->_Swizzle != SWIZZLE_NOOP) {
756 swizzle_texels(curObj->_Swizzle, span->end, texels);
757 }
758 }
759 }
760
761 /*
762 * OK, now apply the texture (aka texture combine/blend).
763 * We modify the span->color.rgba values.
764 */
765 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
766 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
767 texture_combine( ctx, unit, span->end,
768 primary_rgba,
769 swrast->TexelBuffer,
770 span->array->rgba );
771 }
772 }
773
774 free(primary_rgba);
775 }