src/mesa/swrast/s_texcombine.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.5
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a
   9  * copy of this software and associated documentation files (the "Software"),
  10  * to deal in the Software without restriction, including without limitation
  11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12  * and/or sell copies of the Software, and to permit persons to whom the
  13  * Software is furnished to do so, subject to the following conditions:
  14  *
  15  * The above copyright notice and this permission notice shall be included
  16  * in all copies or substantial portions of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26
  27 #include "main/glheader.h"
  28 #include "main/context.h"
  29 #include "main/colormac.h"
  30 #include "main/imports.h"
  31 #include "main/pixeltransfer.h"
  32 #include "program/prog_instruction.h"
  33
  34 #include "s_context.h"
  35 #include "s_texcombine.h"
  36
  37
  38 /**
  39  * Pointer to array of float[4]
  40  * This type makes the code below more concise and avoids a lot of casting.
  41  */
  42 typedef float (*float4_array)[4];
  43
  44
  45 /**
  46  * Return array of texels for given unit.
  47  */
  48 static inline float4_array
  49 get_texel_array(SWcontext *swrast, GLuint unit)
  50 {
  51 #ifdef _OPENMP
  52    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
  53 #else
  54    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
  55 #endif
  56 }
  57
  58
  59
  60 /**
  61  * Do texture application for:
  62  *  GL_EXT_texture_env_combine
  63  *  GL_ARB_texture_env_combine
  64  *  GL_EXT_texture_env_dot3
  65  *  GL_ARB_texture_env_dot3
  66  *  GL_ATI_texture_env_combine3
  67  *  GL_NV_texture_env_combine4
  68  *  conventional GL texture env modes
  69  *
  70  * \param ctx          rendering context
  71  * \param unit         the texture combiner unit
  72  * \param n            number of fragments to process (span width)
  73  * \param primary_rgba incoming fragment color array
  74  * \param texelBuffer  pointer to texel colors for all texture units
  75  *
  76  * \param rgba         incoming/result fragment colors
  77  */
  78 static void
  79 texture_combine( struct gl_context *ctx, GLuint unit, GLuint n,
  80                  const float4_array primary_rgba,
  81                  const GLfloat *texelBuffer,
  82                  GLchan (*rgbaChan)[4] )
  83 {
  84    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  85    const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
  86    const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
  87    float4_array argRGB[MAX_COMBINER_TERMS];
  88    float4_array argA[MAX_COMBINER_TERMS];
  89    const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
  90    const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
  91    const GLuint numArgsRGB = combine->_NumArgsRGB;
  92    const GLuint numArgsA = combine->_NumArgsA;
  93    float4_array ccolor[4], rgba;
  94    GLuint i, term;
  95
  96    /* alloc temp pixel buffers */
  97    rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
  98    if (!rgba) {
  99       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 100       return;
 101    }
 102
 103    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
 104       ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
 105       if (!ccolor[i]) {
 106          while (i) {
 107             free(ccolor[i]);
 108             i--;
 109          }
 110          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 111          return;
 112       }
 113    }
 114
 115    for (i = 0; i < n; i++) {
 116       rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
 117       rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
 118       rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
 119       rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
 120    }
 121
 122    /*
 123    printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
 124           combine->ModeRGB,
 125           combine->ModeA,
 126           combine->SourceRGB[0],
 127           combine->SourceA[0],
 128           combine->SourceRGB[1],
 129           combine->SourceA[1]);
 130    */
 131
 132    /*
 133     * Do operand setup for up to 4 operands.  Loop over the terms.
 134     */
 135    for (term = 0; term < numArgsRGB; term++) {
 136       const GLenum srcRGB = combine->SourceRGB[term];
 137       const GLenum operandRGB = combine->OperandRGB[term];
 138
 139       switch (srcRGB) {
 140          case GL_TEXTURE:
 141             argRGB[term] = get_texel_array(swrast, unit);
 142             break;
 143          case GL_PRIMARY_COLOR:
 144             argRGB[term] = primary_rgba;
 145             break;
 146          case GL_PREVIOUS:
 147             argRGB[term] = rgba;
 148             break;
 149          case GL_CONSTANT:
 150             {
 151                float4_array c = ccolor[term];
 152                GLfloat red   = textureUnit->EnvColor[0];
 153                GLfloat green = textureUnit->EnvColor[1];
 154                GLfloat blue  = textureUnit->EnvColor[2];
 155                GLfloat alpha = textureUnit->EnvColor[3];
 156                for (i = 0; i < n; i++) {
 157                   ASSIGN_4V(c[i], red, green, blue, alpha);
 158                }
 159                argRGB[term] = ccolor[term];
 160             }
 161             break;
 162          /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
 163           */
 164          case GL_ZERO:
 165             {
 166                float4_array c = ccolor[term];
 167                for (i = 0; i < n; i++) {
 168                   ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
 169                }
 170                argRGB[term] = ccolor[term];
 171             }
 172             break;
 173          case GL_ONE:
 174             {
 175                float4_array c = ccolor[term];
 176                for (i = 0; i < n; i++) {
 177                   ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
 178                }
 179                argRGB[term] = ccolor[term];
 180             }
 181             break;
 182          default:
 183             /* ARB_texture_env_crossbar source */
 184             {
 185                const GLuint srcUnit = srcRGB - GL_TEXTURE0;
 186                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
 187                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
 188                   goto end;
 189                argRGB[term] = get_texel_array(swrast, srcUnit);
 190             }
 191       }
 192
 193       if (operandRGB != GL_SRC_COLOR) {
 194          float4_array src = argRGB[term];
 195          float4_array dst = ccolor[term];
 196
 197          /* point to new arg[term] storage */
 198          argRGB[term] = ccolor[term];
 199
 200          switch (operandRGB) {
 201          case GL_ONE_MINUS_SRC_COLOR:
 202             for (i = 0; i < n; i++) {
 203                dst[i][RCOMP] = 1.0F - src[i][RCOMP];
 204                dst[i][GCOMP] = 1.0F - src[i][GCOMP];
 205                dst[i][BCOMP] = 1.0F - src[i][BCOMP];
 206             }
 207             break;
 208          case GL_SRC_ALPHA:
 209             for (i = 0; i < n; i++) {
 210                dst[i][RCOMP] =
 211                dst[i][GCOMP] =
 212                dst[i][BCOMP] = src[i][ACOMP];
 213             }
 214             break;
 215          case GL_ONE_MINUS_SRC_ALPHA:
 216             for (i = 0; i < n; i++) {
 217                dst[i][RCOMP] =
 218                dst[i][GCOMP] =
 219                dst[i][BCOMP] = 1.0F - src[i][ACOMP];
 220             }
 221             break;
 222          default:
 223             _mesa_problem(ctx, "Bad operandRGB");
 224          }
 225       }
 226    }
 227
 228    /*
 229     * Set up the argA[term] pointers
 230     */
 231    for (term = 0; term < numArgsA; term++) {
 232       const GLenum srcA = combine->SourceA[term];
 233       const GLenum operandA = combine->OperandA[term];
 234
 235       switch (srcA) {
 236          case GL_TEXTURE:
 237             argA[term] = get_texel_array(swrast, unit);
 238             break;
 239          case GL_PRIMARY_COLOR:
 240             argA[term] = primary_rgba;
 241             break;
 242          case GL_PREVIOUS:
 243             argA[term] = rgba;
 244             break;
 245          case GL_CONSTANT:
 246             {
 247                float4_array c = ccolor[term];
 248                GLfloat alpha = textureUnit->EnvColor[3];
 249                for (i = 0; i < n; i++)
 250                   c[i][ACOMP] = alpha;
 251                argA[term] = ccolor[term];
 252             }
 253             break;
 254          /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
 255           */
 256          case GL_ZERO:
 257             {
 258                float4_array c = ccolor[term];
 259                for (i = 0; i < n; i++)
 260                   c[i][ACOMP] = 0.0F;
 261                argA[term] = ccolor[term];
 262             }
 263             break;
 264          case GL_ONE:
 265             {
 266                float4_array c = ccolor[term];
 267                for (i = 0; i < n; i++)
 268                   c[i][ACOMP] = 1.0F;
 269                argA[term] = ccolor[term];
 270             }
 271             break;
 272          default:
 273             /* ARB_texture_env_crossbar source */
 274             {
 275                const GLuint srcUnit = srcA - GL_TEXTURE0;
 276                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
 277                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
 278                   goto end;
 279                argA[term] = get_texel_array(swrast, srcUnit);
 280             }
 281       }
 282
 283       if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
 284          float4_array src = argA[term];
 285          float4_array dst = ccolor[term];
 286          argA[term] = ccolor[term];
 287          for (i = 0; i < n; i++) {
 288             dst[i][ACOMP] = 1.0F - src[i][ACOMP];
 289          }
 290       }
 291    }
 292
 293    /* RGB channel combine */
 294    {
 295       float4_array arg0 = argRGB[0];
 296       float4_array arg1 = argRGB[1];
 297       float4_array arg2 = argRGB[2];
 298       float4_array arg3 = argRGB[3];
 299
 300       switch (combine->ModeRGB) {
 301       case GL_REPLACE:
 302          for (i = 0; i < n; i++) {
 303             rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
 304             rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
 305             rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
 306          }
 307          break;
 308       case GL_MODULATE:
 309          for (i = 0; i < n; i++) {
 310             rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
 311             rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
 312             rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
 313          }
 314          break;
 315       case GL_ADD:
 316          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 317             /* (a * b) + (c * d) */
 318             for (i = 0; i < n; i++) {
 319                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
 320                                  arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
 321                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
 322                                  arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
 323                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
 324                                  arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
 325             }
 326          }
 327          else {
 328             /* 2-term addition */
 329             for (i = 0; i < n; i++) {
 330                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
 331                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
 332                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
 333             }
 334          }
 335          break;
 336       case GL_ADD_SIGNED:
 337          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 338             /* (a * b) + (c * d) - 0.5 */
 339             for (i = 0; i < n; i++) {
 340                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
 341                                  arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
 342                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
 343                                  arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
 344                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
 345                                  arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
 346             }
 347          }
 348          else {
 349             for (i = 0; i < n; i++) {
 350                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
 351                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
 352                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
 353             }
 354          }
 355          break;
 356       case GL_INTERPOLATE:
 357          for (i = 0; i < n; i++) {
 358             rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
 359                           arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
 360             rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
 361                           arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
 362             rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
 363                           arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
 364          }
 365          break;
 366       case GL_SUBTRACT:
 367          for (i = 0; i < n; i++) {
 368             rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
 369             rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
 370             rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
 371          }
 372          break;
 373       case GL_DOT3_RGB_EXT:
 374       case GL_DOT3_RGBA_EXT:
 375          /* Do not scale the result by 1 2 or 4 */
 376          for (i = 0; i < n; i++) {
 377             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
 378                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
 379                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
 380                * 4.0F;
 381             dot = CLAMP(dot, 0.0F, 1.0F);
 382             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
 383          }
 384          break;
 385       case GL_DOT3_RGB:
 386       case GL_DOT3_RGBA:
 387          /* DO scale the result by 1 2 or 4 */
 388          for (i = 0; i < n; i++) {
 389             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
 390                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
 391                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
 392                * 4.0F * scaleRGB;
 393             dot = CLAMP(dot, 0.0F, 1.0F);
 394             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
 395          }
 396          break;
 397       case GL_MODULATE_ADD_ATI:
 398          for (i = 0; i < n; i++) {
 399             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
 400                               arg1[i][RCOMP]) * scaleRGB;
 401             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
 402                               arg1[i][GCOMP]) * scaleRGB;
 403             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
 404                               arg1[i][BCOMP]) * scaleRGB;
 405          }
 406          break;
 407       case GL_MODULATE_SIGNED_ADD_ATI:
 408          for (i = 0; i < n; i++) {
 409             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
 410                               arg1[i][RCOMP] - 0.5F) * scaleRGB;
 411             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
 412                               arg1[i][GCOMP] - 0.5F) * scaleRGB;
 413             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
 414                               arg1[i][BCOMP] - 0.5F) * scaleRGB;
 415          }
 416          break;
 417       case GL_MODULATE_SUBTRACT_ATI:
 418          for (i = 0; i < n; i++) {
 419             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
 420                               arg1[i][RCOMP]) * scaleRGB;
 421             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
 422                               arg1[i][GCOMP]) * scaleRGB;
 423             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
 424                               arg1[i][BCOMP]) * scaleRGB;
 425          }
 426          break;
 427       case GL_BUMP_ENVMAP_ATI:
 428          /* this produces a fixed rgba color, and the coord calc is done elsewhere */
 429          for (i = 0; i < n; i++) {
 430             /* rgba result is 0,0,0,1 */
 431             rgba[i][RCOMP] = 0.0;
 432             rgba[i][GCOMP] = 0.0;
 433             rgba[i][BCOMP] = 0.0;
 434             rgba[i][ACOMP] = 1.0;
 435          }
 436          goto end; /* no alpha processing */
 437       default:
 438          _mesa_problem(ctx, "invalid combine mode");
 439       }
 440    }
 441
 442    /* Alpha channel combine */
 443    {
 444       float4_array arg0 = argA[0];
 445       float4_array arg1 = argA[1];
 446       float4_array arg2 = argA[2];
 447       float4_array arg3 = argA[3];
 448
 449       switch (combine->ModeA) {
 450       case GL_REPLACE:
 451          for (i = 0; i < n; i++) {
 452             rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
 453          }
 454          break;
 455       case GL_MODULATE:
 456          for (i = 0; i < n; i++) {
 457             rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
 458          }
 459          break;
 460       case GL_ADD:
 461          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 462             /* (a * b) + (c * d) */
 463             for (i = 0; i < n; i++) {
 464                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
 465                                  arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
 466             }
 467          }
 468          else {
 469             /* two-term add */
 470             for (i = 0; i < n; i++) {
 471                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
 472             }
 473          }
 474          break;
 475       case GL_ADD_SIGNED:
 476          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 477             /* (a * b) + (c * d) - 0.5 */
 478             for (i = 0; i < n; i++) {
 479                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
 480                                  arg2[i][ACOMP] * arg3[i][ACOMP] -
 481                                  0.5F) * scaleA;
 482             }
 483          }
 484          else {
 485             /* a + b - 0.5 */
 486             for (i = 0; i < n; i++) {
 487                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
 488             }
 489          }
 490          break;
 491       case GL_INTERPOLATE:
 492          for (i = 0; i < n; i++) {
 493             rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
 494                               arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
 495                * scaleA;
 496          }
 497          break;
 498       case GL_SUBTRACT:
 499          for (i = 0; i < n; i++) {
 500             rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
 501          }
 502          break;
 503       case GL_MODULATE_ADD_ATI:
 504          for (i = 0; i < n; i++) {
 505             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
 506                               + arg1[i][ACOMP]) * scaleA;
 507          }
 508          break;
 509       case GL_MODULATE_SIGNED_ADD_ATI:
 510          for (i = 0; i < n; i++) {
 511             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
 512                               arg1[i][ACOMP] - 0.5F) * scaleA;
 513          }
 514          break;
 515       case GL_MODULATE_SUBTRACT_ATI:
 516          for (i = 0; i < n; i++) {
 517             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
 518                               - arg1[i][ACOMP]) * scaleA;
 519          }
 520          break;
 521       default:
 522          _mesa_problem(ctx, "invalid combine mode");
 523       }
 524    }
 525
 526    /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
 527     * This is kind of a kludge.  It would have been better if the spec
 528     * were written such that the GL_COMBINE_ALPHA value could be set to
 529     * GL_DOT3.
 530     */
 531    if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
 532        combine->ModeRGB == GL_DOT3_RGBA) {
 533       for (i = 0; i < n; i++) {
 534          rgba[i][ACOMP] = rgba[i][RCOMP];
 535       }
 536    }
 537
 538    for (i = 0; i < n; i++) {
 539       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
 540       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
 541       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
 542       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
 543    }
 544
 545 end:
 546    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
 547       free(ccolor[i]);
 548    }
 549    free(rgba);
 550 }
 551
 552
 553 /**
 554  * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
 555  * See GL_EXT_texture_swizzle.
 556  */
 557 static void
 558 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
 559 {
 560    const GLuint swzR = GET_SWZ(swizzle, 0);
 561    const GLuint swzG = GET_SWZ(swizzle, 1);
 562    const GLuint swzB = GET_SWZ(swizzle, 2);
 563    const GLuint swzA = GET_SWZ(swizzle, 3);
 564    GLfloat vector[6];
 565    GLuint i;
 566
 567    vector[SWIZZLE_ZERO] = 0;
 568    vector[SWIZZLE_ONE] = 1.0F;
 569
 570    for (i = 0; i < count; i++) {
 571       vector[SWIZZLE_X] = texels[i][0];
 572       vector[SWIZZLE_Y] = texels[i][1];
 573       vector[SWIZZLE_Z] = texels[i][2];
 574       vector[SWIZZLE_W] = texels[i][3];
 575       texels[i][RCOMP] = vector[swzR];
 576       texels[i][GCOMP] = vector[swzG];
 577       texels[i][BCOMP] = vector[swzB];
 578       texels[i][ACOMP] = vector[swzA];
 579    }
 580 }
 581
 582
 583 /**
 584  * Apply texture mapping to a span of fragments.
 585  */
 586 void
 587 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
 588 {
 589    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 590    float4_array primary_rgba;
 591    GLuint unit;
 592
 593    if (!swrast->TexelBuffer) {
 594 #ifdef _OPENMP
 595       const GLint maxThreads = omp_get_max_threads();
 596 #else
 597       const GLint maxThreads = 1;
 598 #endif
 599
 600       /* TexelBuffer is also global and normally shared by all SWspan
 601        * instances; when running with multiple threads, create one per
 602        * thread.
 603        */
 604       swrast->TexelBuffer =
 605          (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
 606                             MAX_WIDTH * 4 * sizeof(GLfloat));
 607       if (!swrast->TexelBuffer) {
 608          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 609          return;
 610       }
 611    }
 612
 613    primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
 614
 615    if (!primary_rgba) {
 616       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
 617       return;
 618    }
 619
 620    ASSERT(span->end <= MAX_WIDTH);
 621
 622    /*
 623     * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
 624     */
 625    if (swrast->_TextureCombinePrimary) {
 626       GLuint i;
 627       for (i = 0; i < span->end; i++) {
 628          primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
 629          primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
 630          primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
 631          primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
 632       }
 633    }
 634
 635    /* First must sample all bump maps */
 636    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 637       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 638
 639       if (texUnit->_ReallyEnabled &&
 640          texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
 641          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
 642             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
 643          float4_array targetcoords =
 644             span->array->attribs[FRAG_ATTRIB_TEX0 +
 645                ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
 646
 647          const struct gl_texture_object *curObj = texUnit->_Current;
 648          GLfloat *lambda = span->array->lambda[unit];
 649          float4_array texels = get_texel_array(swrast, unit);
 650          GLuint i;
 651          GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
 652          GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
 653          GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
 654          GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
 655
 656          /* adjust texture lod (lambda) */
 657          if (span->arrayMask & SPAN_LAMBDA) {
 658             if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
 659                /* apply LOD bias, but don't clamp yet */
 660                const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
 661                                           -ctx->Const.MaxTextureLodBias,
 662                                           ctx->Const.MaxTextureLodBias);
 663                GLuint i;
 664                for (i = 0; i < span->end; i++) {
 665                   lambda[i] += bias;
 666                }
 667             }
 668
 669             if (curObj->Sampler.MinLod != -1000.0 ||
 670                 curObj->Sampler.MaxLod != 1000.0) {
 671                /* apply LOD clamping to lambda */
 672                const GLfloat min = curObj->Sampler.MinLod;
 673                const GLfloat max = curObj->Sampler.MaxLod;
 674                GLuint i;
 675                for (i = 0; i < span->end; i++) {
 676                   GLfloat l = lambda[i];
 677                   lambda[i] = CLAMP(l, min, max);
 678                }
 679             }
 680          }
 681
 682          /* Sample the texture (span->end = number of fragments) */
 683          swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
 684                                       texcoords, lambda, texels );
 685
 686          /* manipulate the span values of the bump target
 687             not sure this can work correctly even ignoring
 688             the problem that channel is unsigned */
 689          for (i = 0; i < span->end; i++) {
 690             targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
 691                                   rotMatrix01) / targetcoords[i][3];
 692             targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
 693                                   rotMatrix11) / targetcoords[i][3];
 694          }
 695       }
 696    }
 697
 698    /*
 699     * Must do all texture sampling before combining in order to
 700     * accomodate GL_ARB_texture_env_crossbar.
 701     */
 702    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 703       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 704       if (texUnit->_ReallyEnabled &&
 705           texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
 706          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
 707             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
 708          const struct gl_texture_object *curObj = texUnit->_Current;
 709          GLfloat *lambda = span->array->lambda[unit];
 710          float4_array texels = get_texel_array(swrast, unit);
 711
 712          /* adjust texture lod (lambda) */
 713          if (span->arrayMask & SPAN_LAMBDA) {
 714             if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
 715                /* apply LOD bias, but don't clamp yet */
 716                const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
 717                                           -ctx->Const.MaxTextureLodBias,
 718                                           ctx->Const.MaxTextureLodBias);
 719                GLuint i;
 720                for (i = 0; i < span->end; i++) {
 721                   lambda[i] += bias;
 722                }
 723             }
 724
 725             if (curObj->Sampler.MinLod != -1000.0 ||
 726                 curObj->Sampler.MaxLod != 1000.0) {
 727                /* apply LOD clamping to lambda */
 728                const GLfloat min = curObj->Sampler.MinLod;
 729                const GLfloat max = curObj->Sampler.MaxLod;
 730                GLuint i;
 731                for (i = 0; i < span->end; i++) {
 732                   GLfloat l = lambda[i];
 733                   lambda[i] = CLAMP(l, min, max);
 734                }
 735             }
 736          }
 737          else if (curObj->Sampler.MaxAnisotropy > 1.0 &&
 738                   curObj->Sampler.MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
 739             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
 740              * it requires the current SWspan *span as an additional parameter.
 741              * In order to keep the same function signature, the unused lambda
 742              * parameter will be modified to actually contain the SWspan pointer.
 743              * This is a Hack. To make it right, the texture_sample_func
 744              * signature and all implementing functions need to be modified.
 745              */
 746             /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
 747             lambda = (GLfloat *)span;
 748          }
 749
 750          /* Sample the texture (span->end = number of fragments) */
 751          swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
 752                                       texcoords, lambda, texels );
 753
 754          /* GL_EXT_texture_swizzle */
 755          if (curObj->_Swizzle != SWIZZLE_NOOP) {
 756             swizzle_texels(curObj->_Swizzle, span->end, texels);
 757          }
 758       }
 759    }
 760
 761    /*
 762     * OK, now apply the texture (aka texture combine/blend).
 763     * We modify the span->color.rgba values.
 764     */
 765    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 766       if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 767          texture_combine( ctx, unit, span->end,
 768                           primary_rgba,
 769                           swrast->TexelBuffer,
 770                           span->array->rgba );
 771       }
 772    }
 773
 774    free(primary_rgba);
 775 }