src/mesa/swrast/s_texcombine.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.5
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a
   9  * copy of this software and associated documentation files (the "Software"),
  10  * to deal in the Software without restriction, including without limitation
  11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12  * and/or sell copies of the Software, and to permit persons to whom the
  13  * Software is furnished to do so, subject to the following conditions:
  14  *
  15  * The above copyright notice and this permission notice shall be included
  16  * in all copies or substantial portions of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26
  27 #include "main/glheader.h"
  28 #include "main/context.h"
  29 #include "main/colormac.h"
  30 #include "main/imports.h"
  31 #include "main/pixeltransfer.h"
  32 #include "program/prog_instruction.h"
  33
  34 #include "s_context.h"
  35 #include "s_texcombine.h"
  36
  37
  38 /**
  39  * Pointer to array of float[4]
  40  * This type makes the code below more concise and avoids a lot of casting.
  41  */
  42 typedef float (*float4_array)[4];
  43
  44
  45 /**
  46  * Return array of texels for given unit.
  47  */
  48 static inline float4_array
  49 get_texel_array(SWcontext *swrast, GLuint unit)
  50 {
  51 #ifdef _OPENMP
  52    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
  53 #else
  54    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
  55 #endif
  56 }
  57
  58
  59
  60 /**
  61  * Do texture application for:
  62  *  GL_EXT_texture_env_combine
  63  *  GL_ARB_texture_env_combine
  64  *  GL_EXT_texture_env_dot3
  65  *  GL_ARB_texture_env_dot3
  66  *  GL_ATI_texture_env_combine3
  67  *  GL_NV_texture_env_combine4
  68  *  conventional GL texture env modes
  69  *
  70  * \param ctx          rendering context
  71  * \param unit         the texture combiner unit
  72  * \param n            number of fragments to process (span width)
  73  * \param primary_rgba incoming fragment color array
  74  * \param texelBuffer  pointer to texel colors for all texture units
  75  *
  76  * \param rgba         incoming/result fragment colors
  77  */
  78 static void
  79 texture_combine( struct gl_context *ctx, GLuint unit, GLuint n,
  80                  const float4_array primary_rgba,
  81                  const GLfloat *texelBuffer,
  82                  GLchan (*rgbaChan)[4] )
  83 {
  84    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  85    const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
  86    const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
  87    float4_array argRGB[MAX_COMBINER_TERMS];
  88    float4_array argA[MAX_COMBINER_TERMS];
  89    const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
  90    const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
  91    const GLuint numArgsRGB = combine->_NumArgsRGB;
  92    const GLuint numArgsA = combine->_NumArgsA;
  93    float4_array ccolor[4], rgba;
  94    GLuint i, term;
  95
  96    /* alloc temp pixel buffers */
  97    rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
  98    if (!rgba) {
  99       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 100       return;
 101    }
 102
 103    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
 104       ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
 105       if (!ccolor[i]) {
 106          while (i) {
 107             free(ccolor[i]);
 108             i--;
 109          }
 110          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 111          free(rgba);
 112          return;
 113       }
 114    }
 115
 116    for (i = 0; i < n; i++) {
 117       rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
 118       rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
 119       rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
 120       rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
 121    }
 122
 123    /*
 124    printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
 125           combine->ModeRGB,
 126           combine->ModeA,
 127           combine->SourceRGB[0],
 128           combine->SourceA[0],
 129           combine->SourceRGB[1],
 130           combine->SourceA[1]);
 131    */
 132
 133    /*
 134     * Do operand setup for up to 4 operands.  Loop over the terms.
 135     */
 136    for (term = 0; term < numArgsRGB; term++) {
 137       const GLenum srcRGB = combine->SourceRGB[term];
 138       const GLenum operandRGB = combine->OperandRGB[term];
 139
 140       switch (srcRGB) {
 141          case GL_TEXTURE:
 142             argRGB[term] = get_texel_array(swrast, unit);
 143             break;
 144          case GL_PRIMARY_COLOR:
 145             argRGB[term] = primary_rgba;
 146             break;
 147          case GL_PREVIOUS:
 148             argRGB[term] = rgba;
 149             break;
 150          case GL_CONSTANT:
 151             {
 152                float4_array c = ccolor[term];
 153                GLfloat red   = textureUnit->EnvColor[0];
 154                GLfloat green = textureUnit->EnvColor[1];
 155                GLfloat blue  = textureUnit->EnvColor[2];
 156                GLfloat alpha = textureUnit->EnvColor[3];
 157                for (i = 0; i < n; i++) {
 158                   ASSIGN_4V(c[i], red, green, blue, alpha);
 159                }
 160                argRGB[term] = ccolor[term];
 161             }
 162             break;
 163          /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
 164           */
 165          case GL_ZERO:
 166             {
 167                float4_array c = ccolor[term];
 168                for (i = 0; i < n; i++) {
 169                   ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
 170                }
 171                argRGB[term] = ccolor[term];
 172             }
 173             break;
 174          case GL_ONE:
 175             {
 176                float4_array c = ccolor[term];
 177                for (i = 0; i < n; i++) {
 178                   ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
 179                }
 180                argRGB[term] = ccolor[term];
 181             }
 182             break;
 183          default:
 184             /* ARB_texture_env_crossbar source */
 185             {
 186                const GLuint srcUnit = srcRGB - GL_TEXTURE0;
 187                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
 188                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
 189                   goto end;
 190                argRGB[term] = get_texel_array(swrast, srcUnit);
 191             }
 192       }
 193
 194       if (operandRGB != GL_SRC_COLOR) {
 195          float4_array src = argRGB[term];
 196          float4_array dst = ccolor[term];
 197
 198          /* point to new arg[term] storage */
 199          argRGB[term] = ccolor[term];
 200
 201          switch (operandRGB) {
 202          case GL_ONE_MINUS_SRC_COLOR:
 203             for (i = 0; i < n; i++) {
 204                dst[i][RCOMP] = 1.0F - src[i][RCOMP];
 205                dst[i][GCOMP] = 1.0F - src[i][GCOMP];
 206                dst[i][BCOMP] = 1.0F - src[i][BCOMP];
 207             }
 208             break;
 209          case GL_SRC_ALPHA:
 210             for (i = 0; i < n; i++) {
 211                dst[i][RCOMP] =
 212                dst[i][GCOMP] =
 213                dst[i][BCOMP] = src[i][ACOMP];
 214             }
 215             break;
 216          case GL_ONE_MINUS_SRC_ALPHA:
 217             for (i = 0; i < n; i++) {
 218                dst[i][RCOMP] =
 219                dst[i][GCOMP] =
 220                dst[i][BCOMP] = 1.0F - src[i][ACOMP];
 221             }
 222             break;
 223          default:
 224             _mesa_problem(ctx, "Bad operandRGB");
 225          }
 226       }
 227    }
 228
 229    /*
 230     * Set up the argA[term] pointers
 231     */
 232    for (term = 0; term < numArgsA; term++) {
 233       const GLenum srcA = combine->SourceA[term];
 234       const GLenum operandA = combine->OperandA[term];
 235
 236       switch (srcA) {
 237          case GL_TEXTURE:
 238             argA[term] = get_texel_array(swrast, unit);
 239             break;
 240          case GL_PRIMARY_COLOR:
 241             argA[term] = primary_rgba;
 242             break;
 243          case GL_PREVIOUS:
 244             argA[term] = rgba;
 245             break;
 246          case GL_CONSTANT:
 247             {
 248                float4_array c = ccolor[term];
 249                GLfloat alpha = textureUnit->EnvColor[3];
 250                for (i = 0; i < n; i++)
 251                   c[i][ACOMP] = alpha;
 252                argA[term] = ccolor[term];
 253             }
 254             break;
 255          /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
 256           */
 257          case GL_ZERO:
 258             {
 259                float4_array c = ccolor[term];
 260                for (i = 0; i < n; i++)
 261                   c[i][ACOMP] = 0.0F;
 262                argA[term] = ccolor[term];
 263             }
 264             break;
 265          case GL_ONE:
 266             {
 267                float4_array c = ccolor[term];
 268                for (i = 0; i < n; i++)
 269                   c[i][ACOMP] = 1.0F;
 270                argA[term] = ccolor[term];
 271             }
 272             break;
 273          default:
 274             /* ARB_texture_env_crossbar source */
 275             {
 276                const GLuint srcUnit = srcA - GL_TEXTURE0;
 277                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
 278                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
 279                   goto end;
 280                argA[term] = get_texel_array(swrast, srcUnit);
 281             }
 282       }
 283
 284       if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
 285          float4_array src = argA[term];
 286          float4_array dst = ccolor[term];
 287          argA[term] = ccolor[term];
 288          for (i = 0; i < n; i++) {
 289             dst[i][ACOMP] = 1.0F - src[i][ACOMP];
 290          }
 291       }
 292    }
 293
 294    /* RGB channel combine */
 295    {
 296       float4_array arg0 = argRGB[0];
 297       float4_array arg1 = argRGB[1];
 298       float4_array arg2 = argRGB[2];
 299       float4_array arg3 = argRGB[3];
 300
 301       switch (combine->ModeRGB) {
 302       case GL_REPLACE:
 303          for (i = 0; i < n; i++) {
 304             rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
 305             rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
 306             rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
 307          }
 308          break;
 309       case GL_MODULATE:
 310          for (i = 0; i < n; i++) {
 311             rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
 312             rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
 313             rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
 314          }
 315          break;
 316       case GL_ADD:
 317          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 318             /* (a * b) + (c * d) */
 319             for (i = 0; i < n; i++) {
 320                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
 321                                  arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
 322                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
 323                                  arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
 324                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
 325                                  arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
 326             }
 327          }
 328          else {
 329             /* 2-term addition */
 330             for (i = 0; i < n; i++) {
 331                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
 332                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
 333                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
 334             }
 335          }
 336          break;
 337       case GL_ADD_SIGNED:
 338          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 339             /* (a * b) + (c * d) - 0.5 */
 340             for (i = 0; i < n; i++) {
 341                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
 342                                  arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
 343                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
 344                                  arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
 345                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
 346                                  arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
 347             }
 348          }
 349          else {
 350             for (i = 0; i < n; i++) {
 351                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
 352                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
 353                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
 354             }
 355          }
 356          break;
 357       case GL_INTERPOLATE:
 358          for (i = 0; i < n; i++) {
 359             rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
 360                           arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
 361             rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
 362                           arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
 363             rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
 364                           arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
 365          }
 366          break;
 367       case GL_SUBTRACT:
 368          for (i = 0; i < n; i++) {
 369             rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
 370             rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
 371             rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
 372          }
 373          break;
 374       case GL_DOT3_RGB_EXT:
 375       case GL_DOT3_RGBA_EXT:
 376          /* Do not scale the result by 1 2 or 4 */
 377          for (i = 0; i < n; i++) {
 378             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
 379                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
 380                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
 381                * 4.0F;
 382             dot = CLAMP(dot, 0.0F, 1.0F);
 383             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
 384          }
 385          break;
 386       case GL_DOT3_RGB:
 387       case GL_DOT3_RGBA:
 388          /* DO scale the result by 1 2 or 4 */
 389          for (i = 0; i < n; i++) {
 390             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
 391                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
 392                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
 393                * 4.0F * scaleRGB;
 394             dot = CLAMP(dot, 0.0F, 1.0F);
 395             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
 396          }
 397          break;
 398       case GL_MODULATE_ADD_ATI:
 399          for (i = 0; i < n; i++) {
 400             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
 401                               arg1[i][RCOMP]) * scaleRGB;
 402             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
 403                               arg1[i][GCOMP]) * scaleRGB;
 404             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
 405                               arg1[i][BCOMP]) * scaleRGB;
 406          }
 407          break;
 408       case GL_MODULATE_SIGNED_ADD_ATI:
 409          for (i = 0; i < n; i++) {
 410             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
 411                               arg1[i][RCOMP] - 0.5F) * scaleRGB;
 412             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
 413                               arg1[i][GCOMP] - 0.5F) * scaleRGB;
 414             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
 415                               arg1[i][BCOMP] - 0.5F) * scaleRGB;
 416          }
 417          break;
 418       case GL_MODULATE_SUBTRACT_ATI:
 419          for (i = 0; i < n; i++) {
 420             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
 421                               arg1[i][RCOMP]) * scaleRGB;
 422             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
 423                               arg1[i][GCOMP]) * scaleRGB;
 424             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
 425                               arg1[i][BCOMP]) * scaleRGB;
 426          }
 427          break;
 428       case GL_BUMP_ENVMAP_ATI:
 429          /* this produces a fixed rgba color, and the coord calc is done elsewhere */
 430          for (i = 0; i < n; i++) {
 431             /* rgba result is 0,0,0,1 */
 432             rgba[i][RCOMP] = 0.0;
 433             rgba[i][GCOMP] = 0.0;
 434             rgba[i][BCOMP] = 0.0;
 435             rgba[i][ACOMP] = 1.0;
 436          }
 437          goto end; /* no alpha processing */
 438       default:
 439          _mesa_problem(ctx, "invalid combine mode");
 440       }
 441    }
 442
 443    /* Alpha channel combine */
 444    {
 445       float4_array arg0 = argA[0];
 446       float4_array arg1 = argA[1];
 447       float4_array arg2 = argA[2];
 448       float4_array arg3 = argA[3];
 449
 450       switch (combine->ModeA) {
 451       case GL_REPLACE:
 452          for (i = 0; i < n; i++) {
 453             rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
 454          }
 455          break;
 456       case GL_MODULATE:
 457          for (i = 0; i < n; i++) {
 458             rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
 459          }
 460          break;
 461       case GL_ADD:
 462          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 463             /* (a * b) + (c * d) */
 464             for (i = 0; i < n; i++) {
 465                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
 466                                  arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
 467             }
 468          }
 469          else {
 470             /* two-term add */
 471             for (i = 0; i < n; i++) {
 472                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
 473             }
 474          }
 475          break;
 476       case GL_ADD_SIGNED:
 477          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 478             /* (a * b) + (c * d) - 0.5 */
 479             for (i = 0; i < n; i++) {
 480                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
 481                                  arg2[i][ACOMP] * arg3[i][ACOMP] -
 482                                  0.5F) * scaleA;
 483             }
 484          }
 485          else {
 486             /* a + b - 0.5 */
 487             for (i = 0; i < n; i++) {
 488                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
 489             }
 490          }
 491          break;
 492       case GL_INTERPOLATE:
 493          for (i = 0; i < n; i++) {
 494             rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
 495                               arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
 496                * scaleA;
 497          }
 498          break;
 499       case GL_SUBTRACT:
 500          for (i = 0; i < n; i++) {
 501             rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
 502          }
 503          break;
 504       case GL_MODULATE_ADD_ATI:
 505          for (i = 0; i < n; i++) {
 506             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
 507                               + arg1[i][ACOMP]) * scaleA;
 508          }
 509          break;
 510       case GL_MODULATE_SIGNED_ADD_ATI:
 511          for (i = 0; i < n; i++) {
 512             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
 513                               arg1[i][ACOMP] - 0.5F) * scaleA;
 514          }
 515          break;
 516       case GL_MODULATE_SUBTRACT_ATI:
 517          for (i = 0; i < n; i++) {
 518             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
 519                               - arg1[i][ACOMP]) * scaleA;
 520          }
 521          break;
 522       default:
 523          _mesa_problem(ctx, "invalid combine mode");
 524       }
 525    }
 526
 527    /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
 528     * This is kind of a kludge.  It would have been better if the spec
 529     * were written such that the GL_COMBINE_ALPHA value could be set to
 530     * GL_DOT3.
 531     */
 532    if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
 533        combine->ModeRGB == GL_DOT3_RGBA) {
 534       for (i = 0; i < n; i++) {
 535          rgba[i][ACOMP] = rgba[i][RCOMP];
 536       }
 537    }
 538
 539    for (i = 0; i < n; i++) {
 540       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
 541       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
 542       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
 543       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
 544    }
 545
 546 end:
 547    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
 548       free(ccolor[i]);
 549    }
 550    free(rgba);
 551 }
 552
 553
 554 /**
 555  * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
 556  * See GL_EXT_texture_swizzle.
 557  */
 558 static void
 559 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
 560 {
 561    const GLuint swzR = GET_SWZ(swizzle, 0);
 562    const GLuint swzG = GET_SWZ(swizzle, 1);
 563    const GLuint swzB = GET_SWZ(swizzle, 2);
 564    const GLuint swzA = GET_SWZ(swizzle, 3);
 565    GLfloat vector[6];
 566    GLuint i;
 567
 568    vector[SWIZZLE_ZERO] = 0;
 569    vector[SWIZZLE_ONE] = 1.0F;
 570
 571    for (i = 0; i < count; i++) {
 572       vector[SWIZZLE_X] = texels[i][0];
 573       vector[SWIZZLE_Y] = texels[i][1];
 574       vector[SWIZZLE_Z] = texels[i][2];
 575       vector[SWIZZLE_W] = texels[i][3];
 576       texels[i][RCOMP] = vector[swzR];
 577       texels[i][GCOMP] = vector[swzG];
 578       texels[i][BCOMP] = vector[swzB];
 579       texels[i][ACOMP] = vector[swzA];
 580    }
 581 }
 582
 583
 584 /**
 585  * Apply texture mapping to a span of fragments.
 586  */
 587 void
 588 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
 589 {
 590    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 591    float4_array primary_rgba;
 592    GLuint unit;
 593
 594    if (!swrast->TexelBuffer) {
 595 #ifdef _OPENMP
 596       const GLint maxThreads = omp_get_max_threads();
 597 #else
 598       const GLint maxThreads = 1;
 599 #endif
 600
 601       /* TexelBuffer is also global and normally shared by all SWspan
 602        * instances; when running with multiple threads, create one per
 603        * thread.
 604        */
 605       swrast->TexelBuffer =
 606          (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
 607                             MAX_WIDTH * 4 * sizeof(GLfloat));
 608       if (!swrast->TexelBuffer) {
 609          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 610          return;
 611       }
 612    }
 613
 614    primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
 615
 616    if (!primary_rgba) {
 617       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
 618       return;
 619    }
 620
 621    ASSERT(span->end <= MAX_WIDTH);
 622
 623    /*
 624     * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
 625     */
 626    if (swrast->_TextureCombinePrimary) {
 627       GLuint i;
 628       for (i = 0; i < span->end; i++) {
 629          primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
 630          primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
 631          primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
 632          primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
 633       }
 634    }
 635
 636    /* First must sample all bump maps */
 637    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 638       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 639
 640       if (texUnit->_ReallyEnabled &&
 641          texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
 642          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
 643             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
 644          float4_array targetcoords =
 645             span->array->attribs[FRAG_ATTRIB_TEX0 +
 646                ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
 647
 648          const struct gl_texture_object *curObj = texUnit->_Current;
 649          GLfloat *lambda = span->array->lambda[unit];
 650          float4_array texels = get_texel_array(swrast, unit);
 651          GLuint i;
 652          GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
 653          GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
 654          GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
 655          GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
 656
 657          /* adjust texture lod (lambda) */
 658          if (span->arrayMask & SPAN_LAMBDA) {
 659             if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
 660                /* apply LOD bias, but don't clamp yet */
 661                const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
 662                                           -ctx->Const.MaxTextureLodBias,
 663                                           ctx->Const.MaxTextureLodBias);
 664                GLuint i;
 665                for (i = 0; i < span->end; i++) {
 666                   lambda[i] += bias;
 667                }
 668             }
 669
 670             if (curObj->Sampler.MinLod != -1000.0 ||
 671                 curObj->Sampler.MaxLod != 1000.0) {
 672                /* apply LOD clamping to lambda */
 673                const GLfloat min = curObj->Sampler.MinLod;
 674                const GLfloat max = curObj->Sampler.MaxLod;
 675                GLuint i;
 676                for (i = 0; i < span->end; i++) {
 677                   GLfloat l = lambda[i];
 678                   lambda[i] = CLAMP(l, min, max);
 679                }
 680             }
 681          }
 682
 683          /* Sample the texture (span->end = number of fragments) */
 684          swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
 685                                       texcoords, lambda, texels );
 686
 687          /* manipulate the span values of the bump target
 688             not sure this can work correctly even ignoring
 689             the problem that channel is unsigned */
 690          for (i = 0; i < span->end; i++) {
 691             targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
 692                                   rotMatrix01) / targetcoords[i][3];
 693             targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
 694                                   rotMatrix11) / targetcoords[i][3];
 695          }
 696       }
 697    }
 698
 699    /*
 700     * Must do all texture sampling before combining in order to
 701     * accomodate GL_ARB_texture_env_crossbar.
 702     */
 703    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 704       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 705       if (texUnit->_ReallyEnabled &&
 706           texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
 707          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
 708             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
 709          const struct gl_texture_object *curObj = texUnit->_Current;
 710          GLfloat *lambda = span->array->lambda[unit];
 711          float4_array texels = get_texel_array(swrast, unit);
 712
 713          /* adjust texture lod (lambda) */
 714          if (span->arrayMask & SPAN_LAMBDA) {
 715             if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) {
 716                /* apply LOD bias, but don't clamp yet */
 717                const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias,
 718                                           -ctx->Const.MaxTextureLodBias,
 719                                           ctx->Const.MaxTextureLodBias);
 720                GLuint i;
 721                for (i = 0; i < span->end; i++) {
 722                   lambda[i] += bias;
 723                }
 724             }
 725
 726             if (curObj->Sampler.MinLod != -1000.0 ||
 727                 curObj->Sampler.MaxLod != 1000.0) {
 728                /* apply LOD clamping to lambda */
 729                const GLfloat min = curObj->Sampler.MinLod;
 730                const GLfloat max = curObj->Sampler.MaxLod;
 731                GLuint i;
 732                for (i = 0; i < span->end; i++) {
 733                   GLfloat l = lambda[i];
 734                   lambda[i] = CLAMP(l, min, max);
 735                }
 736             }
 737          }
 738          else if (curObj->Sampler.MaxAnisotropy > 1.0 &&
 739                   curObj->Sampler.MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
 740             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
 741              * it requires the current SWspan *span as an additional parameter.
 742              * In order to keep the same function signature, the unused lambda
 743              * parameter will be modified to actually contain the SWspan pointer.
 744              * This is a Hack. To make it right, the texture_sample_func
 745              * signature and all implementing functions need to be modified.
 746              */
 747             /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
 748             lambda = (GLfloat *)span;
 749          }
 750
 751          /* Sample the texture (span->end = number of fragments) */
 752          swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
 753                                       texcoords, lambda, texels );
 754
 755          /* GL_EXT_texture_swizzle */
 756          if (curObj->_Swizzle != SWIZZLE_NOOP) {
 757             swizzle_texels(curObj->_Swizzle, span->end, texels);
 758          }
 759       }
 760    }
 761
 762    /*
 763     * OK, now apply the texture (aka texture combine/blend).
 764     * We modify the span->color.rgba values.
 765     */
 766    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 767       if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 768          texture_combine( ctx, unit, span->end,
 769                           primary_rgba,
 770                           swrast->TexelBuffer,
 771                           span->array->rgba );
 772       }
 773    }
 774
 775    free(primary_rgba);
 776 }