src/mesa/swrast/s_texcombine.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.5
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a
   9  * copy of this software and associated documentation files (the "Software"),
  10  * to deal in the Software without restriction, including without limitation
  11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12  * and/or sell copies of the Software, and to permit persons to whom the
  13  * Software is furnished to do so, subject to the following conditions:
  14  *
  15  * The above copyright notice and this permission notice shall be included
  16  * in all copies or substantial portions of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26
  27 #include "main/glheader.h"
  28 #include "main/context.h"
  29 #include "main/colormac.h"
  30 #include "main/imports.h"
  31 #include "main/pixeltransfer.h"
  32 #include "main/samplerobj.h"
  33 #include "program/prog_instruction.h"
  34
  35 #include "s_context.h"
  36 #include "s_texcombine.h"
  37
  38
  39 /**
  40  * Pointer to array of float[4]
  41  * This type makes the code below more concise and avoids a lot of casting.
  42  */
  43 typedef float (*float4_array)[4];
  44
  45
  46 /**
  47  * Return array of texels for given unit.
  48  */
  49 static inline float4_array
  50 get_texel_array(SWcontext *swrast, GLuint unit)
  51 {
  52 #ifdef _OPENMP
  53    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
  54 #else
  55    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
  56 #endif
  57 }
  58
  59
  60
  61 /**
  62  * Do texture application for:
  63  *  GL_EXT_texture_env_combine
  64  *  GL_ARB_texture_env_combine
  65  *  GL_EXT_texture_env_dot3
  66  *  GL_ARB_texture_env_dot3
  67  *  GL_ATI_texture_env_combine3
  68  *  GL_NV_texture_env_combine4
  69  *  conventional GL texture env modes
  70  *
  71  * \param ctx          rendering context
  72  * \param unit         the texture combiner unit
  73  * \param primary_rgba incoming fragment color array
  74  * \param texelBuffer  pointer to texel colors for all texture units
  75  *
  76  * \param span         two fields are used in this function:
  77  *                       span->end: number of fragments to process
  78  *                       span->array->rgba: incoming/result fragment colors
  79  */
  80 static void
  81 texture_combine( struct gl_context *ctx, GLuint unit,
  82                  const float4_array primary_rgba,
  83                  const GLfloat *texelBuffer,
  84                  SWspan *span )
  85 {
  86    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  87    const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
  88    const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
  89    float4_array argRGB[MAX_COMBINER_TERMS];
  90    float4_array argA[MAX_COMBINER_TERMS];
  91    const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
  92    const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
  93    const GLuint numArgsRGB = combine->_NumArgsRGB;
  94    const GLuint numArgsA = combine->_NumArgsA;
  95    float4_array ccolor[4], rgba;
  96    GLuint i, term;
  97    GLuint n = span->end;
  98    GLchan (*rgbaChan)[4] = span->array->rgba;
  99
 100    /* alloc temp pixel buffers */
 101    rgba = malloc(4 * n * sizeof(GLfloat));
 102    if (!rgba) {
 103       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 104       return;
 105    }
 106
 107    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
 108       ccolor[i] = malloc(4 * n * sizeof(GLfloat));
 109       if (!ccolor[i]) {
 110          while (i) {
 111             free(ccolor[i]);
 112             i--;
 113          }
 114          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 115          free(rgba);
 116          return;
 117       }
 118    }
 119
 120    for (i = 0; i < n; i++) {
 121       rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
 122       rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
 123       rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
 124       rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
 125    }
 126
 127    /*
 128    printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
 129           combine->ModeRGB,
 130           combine->ModeA,
 131           combine->SourceRGB[0],
 132           combine->SourceA[0],
 133           combine->SourceRGB[1],
 134           combine->SourceA[1]);
 135    */
 136
 137    /*
 138     * Do operand setup for up to 4 operands.  Loop over the terms.
 139     */
 140    for (term = 0; term < numArgsRGB; term++) {
 141       const GLenum srcRGB = combine->SourceRGB[term];
 142       const GLenum operandRGB = combine->OperandRGB[term];
 143
 144       switch (srcRGB) {
 145          case GL_TEXTURE:
 146             argRGB[term] = get_texel_array(swrast, unit);
 147             break;
 148          case GL_PRIMARY_COLOR:
 149             argRGB[term] = primary_rgba;
 150             break;
 151          case GL_PREVIOUS:
 152             argRGB[term] = rgba;
 153             break;
 154          case GL_CONSTANT:
 155             {
 156                float4_array c = ccolor[term];
 157                GLfloat red   = textureUnit->EnvColor[0];
 158                GLfloat green = textureUnit->EnvColor[1];
 159                GLfloat blue  = textureUnit->EnvColor[2];
 160                GLfloat alpha = textureUnit->EnvColor[3];
 161                for (i = 0; i < n; i++) {
 162                   ASSIGN_4V(c[i], red, green, blue, alpha);
 163                }
 164                argRGB[term] = ccolor[term];
 165             }
 166             break;
 167          /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
 168           */
 169          case GL_ZERO:
 170             {
 171                float4_array c = ccolor[term];
 172                for (i = 0; i < n; i++) {
 173                   ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
 174                }
 175                argRGB[term] = ccolor[term];
 176             }
 177             break;
 178          case GL_ONE:
 179             {
 180                float4_array c = ccolor[term];
 181                for (i = 0; i < n; i++) {
 182                   ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
 183                }
 184                argRGB[term] = ccolor[term];
 185             }
 186             break;
 187          default:
 188             /* ARB_texture_env_crossbar source */
 189             {
 190                const GLuint srcUnit = srcRGB - GL_TEXTURE0;
 191                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
 192                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
 193                   goto end;
 194                argRGB[term] = get_texel_array(swrast, srcUnit);
 195             }
 196       }
 197
 198       if (operandRGB != GL_SRC_COLOR) {
 199          float4_array src = argRGB[term];
 200          float4_array dst = ccolor[term];
 201
 202          /* point to new arg[term] storage */
 203          argRGB[term] = ccolor[term];
 204
 205          switch (operandRGB) {
 206          case GL_ONE_MINUS_SRC_COLOR:
 207             for (i = 0; i < n; i++) {
 208                dst[i][RCOMP] = 1.0F - src[i][RCOMP];
 209                dst[i][GCOMP] = 1.0F - src[i][GCOMP];
 210                dst[i][BCOMP] = 1.0F - src[i][BCOMP];
 211             }
 212             break;
 213          case GL_SRC_ALPHA:
 214             for (i = 0; i < n; i++) {
 215                dst[i][RCOMP] =
 216                dst[i][GCOMP] =
 217                dst[i][BCOMP] = src[i][ACOMP];
 218             }
 219             break;
 220          case GL_ONE_MINUS_SRC_ALPHA:
 221             for (i = 0; i < n; i++) {
 222                dst[i][RCOMP] =
 223                dst[i][GCOMP] =
 224                dst[i][BCOMP] = 1.0F - src[i][ACOMP];
 225             }
 226             break;
 227          default:
 228             _mesa_problem(ctx, "Bad operandRGB");
 229          }
 230       }
 231    }
 232
 233    /*
 234     * Set up the argA[term] pointers
 235     */
 236    for (term = 0; term < numArgsA; term++) {
 237       const GLenum srcA = combine->SourceA[term];
 238       const GLenum operandA = combine->OperandA[term];
 239
 240       switch (srcA) {
 241          case GL_TEXTURE:
 242             argA[term] = get_texel_array(swrast, unit);
 243             break;
 244          case GL_PRIMARY_COLOR:
 245             argA[term] = primary_rgba;
 246             break;
 247          case GL_PREVIOUS:
 248             argA[term] = rgba;
 249             break;
 250          case GL_CONSTANT:
 251             {
 252                float4_array c = ccolor[term];
 253                GLfloat alpha = textureUnit->EnvColor[3];
 254                for (i = 0; i < n; i++)
 255                   c[i][ACOMP] = alpha;
 256                argA[term] = ccolor[term];
 257             }
 258             break;
 259          /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
 260           */
 261          case GL_ZERO:
 262             {
 263                float4_array c = ccolor[term];
 264                for (i = 0; i < n; i++)
 265                   c[i][ACOMP] = 0.0F;
 266                argA[term] = ccolor[term];
 267             }
 268             break;
 269          case GL_ONE:
 270             {
 271                float4_array c = ccolor[term];
 272                for (i = 0; i < n; i++)
 273                   c[i][ACOMP] = 1.0F;
 274                argA[term] = ccolor[term];
 275             }
 276             break;
 277          default:
 278             /* ARB_texture_env_crossbar source */
 279             {
 280                const GLuint srcUnit = srcA - GL_TEXTURE0;
 281                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
 282                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
 283                   goto end;
 284                argA[term] = get_texel_array(swrast, srcUnit);
 285             }
 286       }
 287
 288       if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
 289          float4_array src = argA[term];
 290          float4_array dst = ccolor[term];
 291          argA[term] = ccolor[term];
 292          for (i = 0; i < n; i++) {
 293             dst[i][ACOMP] = 1.0F - src[i][ACOMP];
 294          }
 295       }
 296    }
 297
 298    /* RGB channel combine */
 299    {
 300       float4_array arg0 = argRGB[0];
 301       float4_array arg1 = argRGB[1];
 302       float4_array arg2 = argRGB[2];
 303       float4_array arg3 = argRGB[3];
 304
 305       switch (combine->ModeRGB) {
 306       case GL_REPLACE:
 307          for (i = 0; i < n; i++) {
 308             rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
 309             rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
 310             rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
 311          }
 312          break;
 313       case GL_MODULATE:
 314          for (i = 0; i < n; i++) {
 315             rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
 316             rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
 317             rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
 318          }
 319          break;
 320       case GL_ADD:
 321          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 322             /* (a * b) + (c * d) */
 323             for (i = 0; i < n; i++) {
 324                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
 325                                  arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
 326                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
 327                                  arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
 328                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
 329                                  arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
 330             }
 331          }
 332          else {
 333             /* 2-term addition */
 334             for (i = 0; i < n; i++) {
 335                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
 336                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
 337                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
 338             }
 339          }
 340          break;
 341       case GL_ADD_SIGNED:
 342          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 343             /* (a * b) + (c * d) - 0.5 */
 344             for (i = 0; i < n; i++) {
 345                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
 346                                  arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
 347                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
 348                                  arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
 349                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
 350                                  arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
 351             }
 352          }
 353          else {
 354             for (i = 0; i < n; i++) {
 355                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
 356                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
 357                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
 358             }
 359          }
 360          break;
 361       case GL_INTERPOLATE:
 362          for (i = 0; i < n; i++) {
 363             rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
 364                           arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
 365             rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
 366                           arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
 367             rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
 368                           arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
 369          }
 370          break;
 371       case GL_SUBTRACT:
 372          for (i = 0; i < n; i++) {
 373             rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
 374             rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
 375             rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
 376          }
 377          break;
 378       case GL_DOT3_RGB_EXT:
 379       case GL_DOT3_RGBA_EXT:
 380          /* Do not scale the result by 1 2 or 4 */
 381          for (i = 0; i < n; i++) {
 382             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
 383                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
 384                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
 385                * 4.0F;
 386             dot = CLAMP(dot, 0.0F, 1.0F);
 387             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
 388          }
 389          break;
 390       case GL_DOT3_RGB:
 391       case GL_DOT3_RGBA:
 392          /* DO scale the result by 1 2 or 4 */
 393          for (i = 0; i < n; i++) {
 394             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
 395                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
 396                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
 397                * 4.0F * scaleRGB;
 398             dot = CLAMP(dot, 0.0F, 1.0F);
 399             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
 400          }
 401          break;
 402       case GL_MODULATE_ADD_ATI:
 403          for (i = 0; i < n; i++) {
 404             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
 405                               arg1[i][RCOMP]) * scaleRGB;
 406             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
 407                               arg1[i][GCOMP]) * scaleRGB;
 408             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
 409                               arg1[i][BCOMP]) * scaleRGB;
 410          }
 411          break;
 412       case GL_MODULATE_SIGNED_ADD_ATI:
 413          for (i = 0; i < n; i++) {
 414             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
 415                               arg1[i][RCOMP] - 0.5F) * scaleRGB;
 416             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
 417                               arg1[i][GCOMP] - 0.5F) * scaleRGB;
 418             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
 419                               arg1[i][BCOMP] - 0.5F) * scaleRGB;
 420          }
 421          break;
 422       case GL_MODULATE_SUBTRACT_ATI:
 423          for (i = 0; i < n; i++) {
 424             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
 425                               arg1[i][RCOMP]) * scaleRGB;
 426             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
 427                               arg1[i][GCOMP]) * scaleRGB;
 428             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
 429                               arg1[i][BCOMP]) * scaleRGB;
 430          }
 431          break;
 432       case GL_BUMP_ENVMAP_ATI:
 433          /* this produces a fixed rgba color, and the coord calc is done elsewhere */
 434          for (i = 0; i < n; i++) {
 435             /* rgba result is 0,0,0,1 */
 436             rgba[i][RCOMP] = 0.0;
 437             rgba[i][GCOMP] = 0.0;
 438             rgba[i][BCOMP] = 0.0;
 439             rgba[i][ACOMP] = 1.0;
 440          }
 441          goto end; /* no alpha processing */
 442       default:
 443          _mesa_problem(ctx, "invalid combine mode");
 444       }
 445    }
 446
 447    /* Alpha channel combine */
 448    {
 449       float4_array arg0 = argA[0];
 450       float4_array arg1 = argA[1];
 451       float4_array arg2 = argA[2];
 452       float4_array arg3 = argA[3];
 453
 454       switch (combine->ModeA) {
 455       case GL_REPLACE:
 456          for (i = 0; i < n; i++) {
 457             rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
 458          }
 459          break;
 460       case GL_MODULATE:
 461          for (i = 0; i < n; i++) {
 462             rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
 463          }
 464          break;
 465       case GL_ADD:
 466          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 467             /* (a * b) + (c * d) */
 468             for (i = 0; i < n; i++) {
 469                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
 470                                  arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
 471             }
 472          }
 473          else {
 474             /* two-term add */
 475             for (i = 0; i < n; i++) {
 476                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
 477             }
 478          }
 479          break;
 480       case GL_ADD_SIGNED:
 481          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
 482             /* (a * b) + (c * d) - 0.5 */
 483             for (i = 0; i < n; i++) {
 484                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
 485                                  arg2[i][ACOMP] * arg3[i][ACOMP] -
 486                                  0.5F) * scaleA;
 487             }
 488          }
 489          else {
 490             /* a + b - 0.5 */
 491             for (i = 0; i < n; i++) {
 492                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
 493             }
 494          }
 495          break;
 496       case GL_INTERPOLATE:
 497          for (i = 0; i < n; i++) {
 498             rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
 499                               arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
 500                * scaleA;
 501          }
 502          break;
 503       case GL_SUBTRACT:
 504          for (i = 0; i < n; i++) {
 505             rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
 506          }
 507          break;
 508       case GL_MODULATE_ADD_ATI:
 509          for (i = 0; i < n; i++) {
 510             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
 511                               + arg1[i][ACOMP]) * scaleA;
 512          }
 513          break;
 514       case GL_MODULATE_SIGNED_ADD_ATI:
 515          for (i = 0; i < n; i++) {
 516             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
 517                               arg1[i][ACOMP] - 0.5F) * scaleA;
 518          }
 519          break;
 520       case GL_MODULATE_SUBTRACT_ATI:
 521          for (i = 0; i < n; i++) {
 522             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
 523                               - arg1[i][ACOMP]) * scaleA;
 524          }
 525          break;
 526       default:
 527          _mesa_problem(ctx, "invalid combine mode");
 528       }
 529    }
 530
 531    /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
 532     * This is kind of a kludge.  It would have been better if the spec
 533     * were written such that the GL_COMBINE_ALPHA value could be set to
 534     * GL_DOT3.
 535     */
 536    if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
 537        combine->ModeRGB == GL_DOT3_RGBA) {
 538       for (i = 0; i < n; i++) {
 539          rgba[i][ACOMP] = rgba[i][RCOMP];
 540       }
 541    }
 542
 543    for (i = 0; i < n; i++) {
 544       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
 545       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
 546       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
 547       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
 548    }
 549    /* The span->array->rgba values are of CHAN type so set
 550     * span->array->ChanType field accordingly.
 551     */
 552    span->array->ChanType = CHAN_TYPE;
 553
 554 end:
 555    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
 556       free(ccolor[i]);
 557    }
 558    free(rgba);
 559 }
 560
 561
 562 /**
 563  * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
 564  * See GL_EXT_texture_swizzle.
 565  */
 566 static void
 567 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
 568 {
 569    const GLuint swzR = GET_SWZ(swizzle, 0);
 570    const GLuint swzG = GET_SWZ(swizzle, 1);
 571    const GLuint swzB = GET_SWZ(swizzle, 2);
 572    const GLuint swzA = GET_SWZ(swizzle, 3);
 573    GLfloat vector[6];
 574    GLuint i;
 575
 576    vector[SWIZZLE_ZERO] = 0;
 577    vector[SWIZZLE_ONE] = 1.0F;
 578
 579    for (i = 0; i < count; i++) {
 580       vector[SWIZZLE_X] = texels[i][0];
 581       vector[SWIZZLE_Y] = texels[i][1];
 582       vector[SWIZZLE_Z] = texels[i][2];
 583       vector[SWIZZLE_W] = texels[i][3];
 584       texels[i][RCOMP] = vector[swzR];
 585       texels[i][GCOMP] = vector[swzG];
 586       texels[i][BCOMP] = vector[swzB];
 587       texels[i][ACOMP] = vector[swzA];
 588    }
 589 }
 590
 591
 592 /**
 593  * Apply texture mapping to a span of fragments.
 594  */
 595 void
 596 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
 597 {
 598    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 599    float4_array primary_rgba;
 600    GLuint unit;
 601
 602    if (!swrast->TexelBuffer) {
 603 #ifdef _OPENMP
 604       const GLint maxThreads = omp_get_max_threads();
 605 #else
 606       const GLint maxThreads = 1;
 607 #endif
 608
 609       /* TexelBuffer is also global and normally shared by all SWspan
 610        * instances; when running with multiple threads, create one per
 611        * thread.
 612        */
 613       swrast->TexelBuffer =
 614          malloc(ctx->Const.MaxTextureImageUnits * maxThreads *
 615                             SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
 616       if (!swrast->TexelBuffer) {
 617          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 618          return;
 619       }
 620    }
 621
 622    primary_rgba = malloc(span->end * 4 * sizeof(GLfloat));
 623
 624    if (!primary_rgba) {
 625       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
 626       return;
 627    }
 628
 629    ASSERT(span->end <= SWRAST_MAX_WIDTH);
 630
 631    /*
 632     * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
 633     */
 634    if (swrast->_TextureCombinePrimary) {
 635       GLuint i;
 636       for (i = 0; i < span->end; i++) {
 637          primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
 638          primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
 639          primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
 640          primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
 641       }
 642    }
 643
 644    /* First must sample all bump maps */
 645    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 646       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 647
 648       if (texUnit->_ReallyEnabled &&
 649          texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
 650          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
 651             span->array->attribs[VARYING_SLOT_TEX0 + unit];
 652          float4_array targetcoords =
 653             span->array->attribs[VARYING_SLOT_TEX0 +
 654                ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
 655
 656          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
 657          GLfloat *lambda = span->array->lambda[unit];
 658          float4_array texels = get_texel_array(swrast, unit);
 659          GLuint i;
 660          GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
 661          GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
 662          GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
 663          GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
 664
 665          /* adjust texture lod (lambda) */
 666          if (span->arrayMask & SPAN_LAMBDA) {
 667             if (texUnit->LodBias + samp->LodBias != 0.0F) {
 668                /* apply LOD bias, but don't clamp yet */
 669                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
 670                                           -ctx->Const.MaxTextureLodBias,
 671                                           ctx->Const.MaxTextureLodBias);
 672                GLuint i;
 673                for (i = 0; i < span->end; i++) {
 674                   lambda[i] += bias;
 675                }
 676             }
 677
 678             if (samp->MinLod != -1000.0 ||
 679                 samp->MaxLod != 1000.0) {
 680                /* apply LOD clamping to lambda */
 681                const GLfloat min = samp->MinLod;
 682                const GLfloat max = samp->MaxLod;
 683                GLuint i;
 684                for (i = 0; i < span->end; i++) {
 685                   GLfloat l = lambda[i];
 686                   lambda[i] = CLAMP(l, min, max);
 687                }
 688             }
 689          }
 690
 691          /* Sample the texture (span->end = number of fragments) */
 692          swrast->TextureSample[unit]( ctx, samp,
 693                                       ctx->Texture.Unit[unit]._Current,
 694                                       span->end, texcoords, lambda, texels );
 695
 696          /* manipulate the span values of the bump target
 697             not sure this can work correctly even ignoring
 698             the problem that channel is unsigned */
 699          for (i = 0; i < span->end; i++) {
 700             targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
 701                                   rotMatrix01) / targetcoords[i][3];
 702             targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
 703                                   rotMatrix11) / targetcoords[i][3];
 704          }
 705       }
 706    }
 707
 708    /*
 709     * Must do all texture sampling before combining in order to
 710     * accomodate GL_ARB_texture_env_crossbar.
 711     */
 712    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 713       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 714       if (texUnit->_ReallyEnabled &&
 715           texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
 716          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
 717             span->array->attribs[VARYING_SLOT_TEX0 + unit];
 718          const struct gl_texture_object *curObj = texUnit->_Current;
 719          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
 720          GLfloat *lambda = span->array->lambda[unit];
 721          float4_array texels = get_texel_array(swrast, unit);
 722
 723          /* adjust texture lod (lambda) */
 724          if (span->arrayMask & SPAN_LAMBDA) {
 725             if (texUnit->LodBias + samp->LodBias != 0.0F) {
 726                /* apply LOD bias, but don't clamp yet */
 727                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
 728                                           -ctx->Const.MaxTextureLodBias,
 729                                           ctx->Const.MaxTextureLodBias);
 730                GLuint i;
 731                for (i = 0; i < span->end; i++) {
 732                   lambda[i] += bias;
 733                }
 734             }
 735
 736             if (samp->MinLod != -1000.0 ||
 737                 samp->MaxLod != 1000.0) {
 738                /* apply LOD clamping to lambda */
 739                const GLfloat min = samp->MinLod;
 740                const GLfloat max = samp->MaxLod;
 741                GLuint i;
 742                for (i = 0; i < span->end; i++) {
 743                   GLfloat l = lambda[i];
 744                   lambda[i] = CLAMP(l, min, max);
 745                }
 746             }
 747          }
 748          else if (samp->MaxAnisotropy > 1.0 &&
 749                   samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
 750             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
 751              * it requires the current SWspan *span as an additional parameter.
 752              * In order to keep the same function signature, the unused lambda
 753              * parameter will be modified to actually contain the SWspan pointer.
 754              * This is a Hack. To make it right, the texture_sample_func
 755              * signature and all implementing functions need to be modified.
 756              */
 757             /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
 758             lambda = (GLfloat *)span;
 759          }
 760
 761          /* Sample the texture (span->end = number of fragments) */
 762          swrast->TextureSample[unit]( ctx, samp,
 763                                       ctx->Texture.Unit[unit]._Current,
 764                                       span->end, texcoords, lambda, texels );
 765
 766          /* GL_EXT_texture_swizzle */
 767          if (curObj->_Swizzle != SWIZZLE_NOOP) {
 768             swizzle_texels(curObj->_Swizzle, span->end, texels);
 769          }
 770       }
 771    }
 772
 773    /*
 774     * OK, now apply the texture (aka texture combine/blend).
 775     * We modify the span->color.rgba values.
 776     */
 777    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 778       if (ctx->Texture.Unit[unit]._ReallyEnabled)
 779          texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
 780    }
 781
 782    free(primary_rgba);
 783 }