src/mesa/swrast/s_nvfragprog.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.5.2
   4  *
   5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /*
  26  * Regarding GL_NV_fragment_program:
  27  *
  28  * Portions of this software may use or implement intellectual
  29  * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
  30  * any and all warranties with respect to such intellectual property,
  31  * including any use thereof or modifications thereto.
  32  */
  33
  34 #include "glheader.h"
  35 #include "colormac.h"
  36 #include "context.h"
  37 #include "program_instruction.h"
  38 #include "program.h"
  39
  40 #include "s_nvfragprog.h"
  41 #include "s_span.h"
  42
  43
  44 /* See comments below for info about this */
  45 #define LAMBDA_ZERO 1
  46
  47 /* if 1, print some debugging info */
  48 #define DEBUG_FRAG 0
  49
  50
  51 /**
  52  * Virtual machine state used during execution of a fragment programs.
  53  */
  54 struct fp_machine
  55 {
  56    GLfloat Temporaries[MAX_NV_FRAGMENT_PROGRAM_TEMPS][4];
  57    GLfloat Inputs[MAX_NV_FRAGMENT_PROGRAM_INPUTS][4];
  58    GLfloat Outputs[MAX_NV_FRAGMENT_PROGRAM_OUTPUTS][4];
  59    GLuint CondCodes[4];  /**< COND_* value for x/y/z/w */
  60 };
  61
  62
  63 #if FEATURE_MESA_program_debug
  64 static struct fp_machine *CurrentMachine = NULL;
  65
  66 /**
  67  * For GL_MESA_program_debug.
  68  * Return current value (4*GLfloat) of a fragment program register.
  69  * Called via ctx->Driver.GetFragmentProgramRegister().
  70  */
  71 void
  72 _swrast_get_program_register(GLcontext *ctx, enum register_file file,
  73                              GLuint index, GLfloat val[4])
  74 {
  75    if (CurrentMachine) {
  76       switch (file) {
  77       case PROGRAM_INPUT:
  78          COPY_4V(val, CurrentMachine->Inputs[index]);
  79          break;
  80       case PROGRAM_OUTPUT:
  81          COPY_4V(val, CurrentMachine->Outputs[index]);
  82          break;
  83       case PROGRAM_TEMPORARY:
  84          COPY_4V(val, CurrentMachine->Temporaries[index]);
  85          break;
  86       default:
  87          _mesa_problem(NULL,
  88                        "bad register file in _swrast_get_program_register");
  89       }
  90    }
  91 }
  92 #endif /* FEATURE_MESA_program_debug */
  93
  94
  95 /**
  96  * Fetch a texel.
  97  */
  98 static void
  99 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
 100              GLuint unit, GLfloat color[4] )
 101 {
 102    GLchan rgba[4];
 103    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 104
 105    /* XXX use a float-valued TextureSample routine here!!! */
 106    swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
 107                                1, (const GLfloat (*)[4]) texcoord,
 108                                &lambda, &rgba);
 109    color[0] = CHAN_TO_FLOAT(rgba[0]);
 110    color[1] = CHAN_TO_FLOAT(rgba[1]);
 111    color[2] = CHAN_TO_FLOAT(rgba[2]);
 112    color[3] = CHAN_TO_FLOAT(rgba[3]);
 113 }
 114
 115
 116 /**
 117  * Fetch a texel with the given partial derivatives to compute a level
 118  * of detail in the mipmap.
 119  */
 120 static void
 121 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
 122                    const GLfloat texdx[4], const GLfloat texdy[4],
 123                    GLuint unit, GLfloat color[4] )
 124 {
 125    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 126    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
 127    const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
 128    const GLfloat texW = (GLfloat) texImg->WidthScale;
 129    const GLfloat texH = (GLfloat) texImg->HeightScale;
 130    GLchan rgba[4];
 131
 132    GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
 133                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
 134                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
 135                                          texW, texH,
 136                                          texcoord[0], texcoord[1], texcoord[3],
 137                                          1.0F / texcoord[3]);
 138
 139    swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
 140                                1, (const GLfloat (*)[4]) texcoord,
 141                                &lambda, &rgba);
 142    color[0] = CHAN_TO_FLOAT(rgba[0]);
 143    color[1] = CHAN_TO_FLOAT(rgba[1]);
 144    color[2] = CHAN_TO_FLOAT(rgba[2]);
 145    color[3] = CHAN_TO_FLOAT(rgba[3]);
 146 }
 147
 148
 149 /**
 150  * Return a pointer to the 4-element float vector specified by the given
 151  * source register.
 152  */
 153 static INLINE const GLfloat *
 154 get_register_pointer( GLcontext *ctx,
 155                       const struct prog_src_register *source,
 156                       const struct fp_machine *machine,
 157                       const struct gl_fragment_program *program )
 158 {
 159    switch (source->File) {
 160    case PROGRAM_TEMPORARY:
 161       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
 162       return machine->Temporaries[source->Index];
 163    case PROGRAM_INPUT:
 164       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
 165       return machine->Inputs[source->Index];
 166    case PROGRAM_OUTPUT:
 167       /* This is only for PRINT */
 168       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
 169       return machine->Outputs[source->Index];
 170    case PROGRAM_LOCAL_PARAM:
 171       ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 172       return program->Base.LocalParams[source->Index];
 173    case PROGRAM_ENV_PARAM:
 174       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
 175       return ctx->FragmentProgram.Parameters[source->Index];
 176    case PROGRAM_STATE_VAR:
 177       /* Fallthrough */
 178    case PROGRAM_CONSTANT:
 179       /* Fallthrough */
 180    case PROGRAM_NAMED_PARAM:
 181       ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
 182       return program->Base.Parameters->ParameterValues[source->Index];
 183    default:
 184       _mesa_problem(ctx, "Invalid input register file %d in fp "
 185                     "get_register_pointer", source->File);
 186       return NULL;
 187    }
 188 }
 189
 190
 191 /**
 192  * Fetch a 4-element float vector from the given source register.
 193  * Apply swizzling and negating as needed.
 194  */
 195 static void
 196 fetch_vector4( GLcontext *ctx,
 197                const struct prog_src_register *source,
 198                const struct fp_machine *machine,
 199                const struct gl_fragment_program *program,
 200                GLfloat result[4] )
 201 {
 202    const GLfloat *src = get_register_pointer(ctx, source, machine, program);
 203    ASSERT(src);
 204
 205    if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
 206                                         SWIZZLE_Z, SWIZZLE_W)) {
 207       /* no swizzling */
 208       COPY_4V(result, src);
 209    }
 210    else {
 211       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 212       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 213       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 214       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 215    }
 216
 217    if (source->NegateBase) {
 218       result[0] = -result[0];
 219       result[1] = -result[1];
 220       result[2] = -result[2];
 221       result[3] = -result[3];
 222    }
 223    if (source->Abs) {
 224       result[0] = FABSF(result[0]);
 225       result[1] = FABSF(result[1]);
 226       result[2] = FABSF(result[2]);
 227       result[3] = FABSF(result[3]);
 228    }
 229    if (source->NegateAbs) {
 230       result[0] = -result[0];
 231       result[1] = -result[1];
 232       result[2] = -result[2];
 233       result[3] = -result[3];
 234    }
 235 }
 236
 237
 238 /**
 239  * Fetch the derivative with respect to X for the given register.
 240  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 241  * need to execute another instance of the program (ugh)!
 242  */
 243 static GLboolean
 244 fetch_vector4_deriv( GLcontext *ctx,
 245                      const struct prog_src_register *source,
 246                      const SWspan *span,
 247                      char xOrY, GLint column, GLfloat result[4] )
 248 {
 249    GLfloat src[4];
 250
 251    ASSERT(xOrY == 'X' || xOrY == 'Y');
 252
 253    switch (source->Index) {
 254    case FRAG_ATTRIB_WPOS:
 255       if (xOrY == 'X') {
 256          src[0] = 1.0;
 257          src[1] = 0.0;
 258          src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
 259          src[3] = span->dwdx;
 260       }
 261       else {
 262          src[0] = 0.0;
 263          src[1] = 1.0;
 264          src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
 265          src[3] = span->dwdy;
 266       }
 267       break;
 268    case FRAG_ATTRIB_COL0:
 269       if (xOrY == 'X') {
 270          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 271          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 272          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 273          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 274       }
 275       else {
 276          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 277          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 278          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 279          src[3] = span->dady * (1.0F / CHAN_MAXF);
 280       }
 281       break;
 282    case FRAG_ATTRIB_COL1:
 283       if (xOrY == 'X') {
 284          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 285          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 286          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 287          src[3] = 0.0; /* XXX need this */
 288       }
 289       else {
 290          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 291          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 292          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 293          src[3] = 0.0; /* XXX need this */
 294       }
 295       break;
 296    case FRAG_ATTRIB_FOGC:
 297       if (xOrY == 'X') {
 298          src[0] = span->dfogdx;
 299          src[1] = 0.0;
 300          src[2] = 0.0;
 301          src[3] = 0.0;
 302       }
 303       else {
 304          src[0] = span->dfogdy;
 305          src[1] = 0.0;
 306          src[2] = 0.0;
 307          src[3] = 0.0;
 308       }
 309       break;
 310    case FRAG_ATTRIB_TEX0:
 311    case FRAG_ATTRIB_TEX1:
 312    case FRAG_ATTRIB_TEX2:
 313    case FRAG_ATTRIB_TEX3:
 314    case FRAG_ATTRIB_TEX4:
 315    case FRAG_ATTRIB_TEX5:
 316    case FRAG_ATTRIB_TEX6:
 317    case FRAG_ATTRIB_TEX7:
 318       if (xOrY == 'X') {
 319          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 320          /* this is a little tricky - I think I've got it right */
 321          const GLfloat invQ = 1.0f / (span->tex[u][3]
 322                                       + span->texStepX[u][3] * column);
 323          src[0] = span->texStepX[u][0] * invQ;
 324          src[1] = span->texStepX[u][1] * invQ;
 325          src[2] = span->texStepX[u][2] * invQ;
 326          src[3] = span->texStepX[u][3] * invQ;
 327       }
 328       else {
 329          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 330          /* Tricky, as above, but in Y direction */
 331          const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
 332          src[0] = span->texStepY[u][0] * invQ;
 333          src[1] = span->texStepY[u][1] * invQ;
 334          src[2] = span->texStepY[u][2] * invQ;
 335          src[3] = span->texStepY[u][3] * invQ;
 336       }
 337       break;
 338    default:
 339       return GL_FALSE;
 340    }
 341
 342    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 343    result[1] = src[GET_SWZ(source->Swizzle, 1)];
 344    result[2] = src[GET_SWZ(source->Swizzle, 2)];
 345    result[3] = src[GET_SWZ(source->Swizzle, 3)];
 346
 347    if (source->NegateBase) {
 348       result[0] = -result[0];
 349       result[1] = -result[1];
 350       result[2] = -result[2];
 351       result[3] = -result[3];
 352    }
 353    if (source->Abs) {
 354       result[0] = FABSF(result[0]);
 355       result[1] = FABSF(result[1]);
 356       result[2] = FABSF(result[2]);
 357       result[3] = FABSF(result[3]);
 358    }
 359    if (source->NegateAbs) {
 360       result[0] = -result[0];
 361       result[1] = -result[1];
 362       result[2] = -result[2];
 363       result[3] = -result[3];
 364    }
 365    return GL_TRUE;
 366 }
 367
 368
 369 /**
 370  * As above, but only return result[0] element.
 371  */
 372 static void
 373 fetch_vector1( GLcontext *ctx,
 374                const struct prog_src_register *source,
 375                const struct fp_machine *machine,
 376                const struct gl_fragment_program *program,
 377                GLfloat result[4] )
 378 {
 379    const GLfloat *src = get_register_pointer(ctx, source, machine, program);
 380    ASSERT(src);
 381
 382    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 383
 384    if (source->NegateBase) {
 385       result[0] = -result[0];
 386    }
 387    if (source->Abs) {
 388       result[0] = FABSF(result[0]);
 389    }
 390    if (source->NegateAbs) {
 391       result[0] = -result[0];
 392    }
 393 }
 394
 395
 396 /**
 397  * Test value against zero and return GT, LT, EQ or UN if NaN.
 398  */
 399 static INLINE GLuint
 400 generate_cc( float value )
 401 {
 402    if (value != value)
 403       return COND_UN;  /* NaN */
 404    if (value > 0.0F)
 405       return COND_GT;
 406    if (value < 0.0F)
 407       return COND_LT;
 408    return COND_EQ;
 409 }
 410
 411
 412 /**
 413  * Test if the ccMaskRule is satisfied by the given condition code.
 414  * Used to mask destination writes according to the current condition code.
 415  */
 416 static INLINE GLboolean
 417 test_cc(GLuint condCode, GLuint ccMaskRule)
 418 {
 419    switch (ccMaskRule) {
 420    case COND_EQ: return (condCode == COND_EQ);
 421    case COND_NE: return (condCode != COND_EQ);
 422    case COND_LT: return (condCode == COND_LT);
 423    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 424    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 425    case COND_GT: return (condCode == COND_GT);
 426    case COND_TR: return GL_TRUE;
 427    case COND_FL: return GL_FALSE;
 428    default:      return GL_TRUE;
 429    }
 430 }
 431
 432
 433 /**
 434  * Store 4 floats into a register.  Observe the instructions saturate and
 435  * set-condition-code flags.
 436  */
 437 static void
 438 store_vector4( const struct prog_instruction *inst,
 439                struct fp_machine *machine,
 440                const GLfloat value[4] )
 441 {
 442    const struct prog_dst_register *dest = &(inst->DstReg);
 443    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 444    GLfloat *dstReg;
 445    GLfloat dummyReg[4];
 446    GLfloat clampedValue[4];
 447    GLuint writeMask = dest->WriteMask;
 448
 449    switch (dest->File) {
 450       case PROGRAM_OUTPUT:
 451          dstReg = machine->Outputs[dest->Index];
 452          break;
 453       case PROGRAM_TEMPORARY:
 454          dstReg = machine->Temporaries[dest->Index];
 455          break;
 456       case PROGRAM_WRITE_ONLY:
 457          dstReg = dummyReg;
 458          return;
 459       default:
 460          _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 461          return;
 462    }
 463
 464 #if DEBUG_FRAG
 465    if (value[0] > 1.0e10 ||
 466        IS_INF_OR_NAN(value[0]) ||
 467        IS_INF_OR_NAN(value[1]) ||
 468        IS_INF_OR_NAN(value[2]) ||
 469        IS_INF_OR_NAN(value[3])  )
 470       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 471 #endif
 472
 473    if (clamp) {
 474       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 475       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 476       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 477       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 478       value = clampedValue;
 479    }
 480
 481    if (dest->CondMask != COND_TR) {
 482       /* condition codes may turn off some writes */
 483       if (writeMask & WRITEMASK_X) {
 484          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
 485                       dest->CondMask))
 486             writeMask &= ~WRITEMASK_X;
 487       }
 488       if (writeMask & WRITEMASK_Y) {
 489          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
 490                       dest->CondMask))
 491             writeMask &= ~WRITEMASK_Y;
 492       }
 493       if (writeMask & WRITEMASK_Z) {
 494          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
 495                       dest->CondMask))
 496             writeMask &= ~WRITEMASK_Z;
 497       }
 498       if (writeMask & WRITEMASK_W) {
 499          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
 500                       dest->CondMask))
 501             writeMask &= ~WRITEMASK_W;
 502       }
 503    }
 504
 505    if (writeMask & WRITEMASK_X)
 506       dstReg[0] = value[0];
 507    if (writeMask & WRITEMASK_Y)
 508       dstReg[1] = value[1];
 509    if (writeMask & WRITEMASK_Z)
 510       dstReg[2] = value[2];
 511    if (writeMask & WRITEMASK_W)
 512       dstReg[3] = value[3];
 513
 514    if (inst->CondUpdate) {
 515       if (writeMask & WRITEMASK_X)
 516          machine->CondCodes[0] = generate_cc(value[0]);
 517       if (writeMask & WRITEMASK_Y)
 518          machine->CondCodes[1] = generate_cc(value[1]);
 519       if (writeMask & WRITEMASK_Z)
 520          machine->CondCodes[2] = generate_cc(value[2]);
 521       if (writeMask & WRITEMASK_W)
 522          machine->CondCodes[3] = generate_cc(value[3]);
 523    }
 524 }
 525
 526
 527 /**
 528  * Initialize a new machine state instance from an existing one, adding
 529  * the partial derivatives onto the input registers.
 530  * Used to implement DDX and DDY instructions in non-trivial cases.
 531  */
 532 static void
 533 init_machine_deriv( GLcontext *ctx,
 534                     const struct fp_machine *machine,
 535                     const struct gl_fragment_program *program,
 536                     const SWspan *span, char xOrY,
 537                     struct fp_machine *dMachine )
 538 {
 539    GLuint u;
 540
 541    ASSERT(xOrY == 'X' || xOrY == 'Y');
 542
 543    /* copy existing machine */
 544    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 545
 546    if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
 547       /* Clear temporary registers (undefined for ARB_f_p) */
 548       _mesa_bzero( (void*) machine->Temporaries,
 549                    MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 550    }
 551
 552    /* Add derivatives */
 553    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 554       GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
 555       if (xOrY == 'X') {
 556          wpos[0] += 1.0F;
 557          wpos[1] += 0.0F;
 558          wpos[2] += span->dzdx;
 559          wpos[3] += span->dwdx;
 560       }
 561       else {
 562          wpos[0] += 0.0F;
 563          wpos[1] += 1.0F;
 564          wpos[2] += span->dzdy;
 565          wpos[3] += span->dwdy;
 566       }
 567    }
 568    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 569       GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
 570       if (xOrY == 'X') {
 571          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 572          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 573          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 574          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 575       }
 576       else {
 577          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 578          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 579          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 580          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 581       }
 582    }
 583    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 584       GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
 585       if (xOrY == 'X') {
 586          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 587          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 588          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 589          col1[3] += 0.0; /*XXX fix */
 590       }
 591       else {
 592          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 593          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 594          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 595          col1[3] += 0.0; /*XXX fix */
 596       }
 597    }
 598    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 599       GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
 600       if (xOrY == 'X') {
 601          fogc[0] += span->dfogdx;
 602       }
 603       else {
 604          fogc[0] += span->dfogdy;
 605       }
 606    }
 607    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 608       if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 609          GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
 610          /* XXX perspective-correct interpolation */
 611          if (xOrY == 'X') {
 612             tex[0] += span->texStepX[u][0];
 613             tex[1] += span->texStepX[u][1];
 614             tex[2] += span->texStepX[u][2];
 615             tex[3] += span->texStepX[u][3];
 616          }
 617          else {
 618             tex[0] += span->texStepY[u][0];
 619             tex[1] += span->texStepY[u][1];
 620             tex[2] += span->texStepY[u][2];
 621             tex[3] += span->texStepY[u][3];
 622          }
 623       }
 624    }
 625
 626    /* init condition codes */
 627    dMachine->CondCodes[0] = COND_EQ;
 628    dMachine->CondCodes[1] = COND_EQ;
 629    dMachine->CondCodes[2] = COND_EQ;
 630    dMachine->CondCodes[3] = COND_EQ;
 631 }
 632
 633
 634 /**
 635  * Execute the given vertex program.
 636  * NOTE: we do everything in single-precision floating point; we don't
 637  * currently observe the single/half/fixed-precision qualifiers.
 638  * \param ctx - rendering context
 639  * \param program - the fragment program to execute
 640  * \param machine - machine state (register file)
 641  * \param maxInst - max number of instructions to execute
 642  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 643  */
 644 static GLboolean
 645 execute_program( GLcontext *ctx,
 646                  const struct gl_fragment_program *program, GLuint maxInst,
 647                  struct fp_machine *machine, const SWspan *span,
 648                  GLuint column )
 649 {
 650    GLuint pc;
 651
 652 #if DEBUG_FRAG
 653    printf("execute fragment program --------------------\n");
 654 #endif
 655
 656    for (pc = 0; pc < maxInst; pc++) {
 657       const struct prog_instruction *inst = program->Base.Instructions + pc;
 658
 659       if (ctx->FragmentProgram.CallbackEnabled &&
 660           ctx->FragmentProgram.Callback) {
 661          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 662          ctx->FragmentProgram.Callback(program->Base.Target,
 663                                        ctx->FragmentProgram.CallbackData);
 664       }
 665
 666 #if DEBUG_FRAG
 667       _mesa_print_instruction(inst);
 668 #endif
 669       switch (inst->Opcode) {
 670          case OPCODE_ABS:
 671             {
 672                GLfloat a[4], result[4];
 673                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 674                result[0] = FABSF(a[0]);
 675                result[1] = FABSF(a[1]);
 676                result[2] = FABSF(a[2]);
 677                result[3] = FABSF(a[3]);
 678                store_vector4( inst, machine, result );
 679             }
 680             break;
 681          case OPCODE_ADD:
 682             {
 683                GLfloat a[4], b[4], result[4];
 684                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 685                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 686                result[0] = a[0] + b[0];
 687                result[1] = a[1] + b[1];
 688                result[2] = a[2] + b[2];
 689                result[3] = a[3] + b[3];
 690                store_vector4( inst, machine, result );
 691 #if DEBUG_FRAG
 692                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 693                       result[0], result[1], result[2], result[3],
 694                       a[0], a[1], a[2], a[3],
 695                       b[0], b[1], b[2], b[3]);
 696 #endif
 697             }
 698             break;
 699          case OPCODE_CMP:
 700             {
 701                GLfloat a[4], b[4], c[4], result[4];
 702                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 703                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 704                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 705                result[0] = a[0] < 0.0F ? b[0] : c[0];
 706                result[1] = a[1] < 0.0F ? b[1] : c[1];
 707                result[2] = a[2] < 0.0F ? b[2] : c[2];
 708                result[3] = a[3] < 0.0F ? b[3] : c[3];
 709                store_vector4( inst, machine, result );
 710             }
 711             break;
 712          case OPCODE_COS:
 713             {
 714                GLfloat a[4], result[4];
 715                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 716                result[0] = result[1] = result[2] = result[3]
 717                   = (GLfloat) _mesa_cos(a[0]);
 718                store_vector4( inst, machine, result );
 719             }
 720             break;
 721          case OPCODE_DDX: /* Partial derivative with respect to X */
 722             {
 723                GLfloat a[4], aNext[4], result[4];
 724                struct fp_machine dMachine;
 725                if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
 726                                         column, result)) {
 727                   /* This is tricky.  Make a copy of the current machine state,
 728                    * increment the input registers by the dx or dy partial
 729                    * derivatives, then re-execute the program up to the
 730                    * preceeding instruction, then fetch the source register.
 731                    * Finally, find the difference in the register values for
 732                    * the original and derivative runs.
 733                    */
 734                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 735                   init_machine_deriv(ctx, machine, program, span,
 736                                      'X', &dMachine);
 737                   execute_program(ctx, program, pc, &dMachine, span, column);
 738                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 739                   result[0] = aNext[0] - a[0];
 740                   result[1] = aNext[1] - a[1];
 741                   result[2] = aNext[2] - a[2];
 742                   result[3] = aNext[3] - a[3];
 743                }
 744                store_vector4( inst, machine, result );
 745             }
 746             break;
 747          case OPCODE_DDY: /* Partial derivative with respect to Y */
 748             {
 749                GLfloat a[4], aNext[4], result[4];
 750                struct fp_machine dMachine;
 751                if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
 752                                         column, result)) {
 753                   init_machine_deriv(ctx, machine, program, span,
 754                                      'Y', &dMachine);
 755                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 756                   execute_program(ctx, program, pc, &dMachine, span, column);
 757                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 758                   result[0] = aNext[0] - a[0];
 759                   result[1] = aNext[1] - a[1];
 760                   result[2] = aNext[2] - a[2];
 761                   result[3] = aNext[3] - a[3];
 762                }
 763                store_vector4( inst, machine, result );
 764             }
 765             break;
 766          case OPCODE_DP3:
 767             {
 768                GLfloat a[4], b[4], result[4];
 769                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 770                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 771                result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 772                store_vector4( inst, machine, result );
 773 #if DEBUG_FRAG
 774                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 775                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 776 #endif
 777             }
 778             break;
 779          case OPCODE_DP4:
 780             {
 781                GLfloat a[4], b[4], result[4];
 782                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 783                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 784                result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
 785                store_vector4( inst, machine, result );
 786 #if DEBUG_FRAG
 787                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 788                       result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 789 #endif
 790             }
 791             break;
 792          case OPCODE_DPH:
 793             {
 794                GLfloat a[4], b[4], result[4];
 795                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 796                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 797                result[0] = result[1] = result[2] = result[3] =
 798                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
 799                store_vector4( inst, machine, result );
 800             }
 801             break;
 802          case OPCODE_DST: /* Distance vector */
 803             {
 804                GLfloat a[4], b[4], result[4];
 805                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 806                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 807                result[0] = 1.0F;
 808                result[1] = a[1] * b[1];
 809                result[2] = a[2];
 810                result[3] = b[3];
 811                store_vector4( inst, machine, result );
 812             }
 813             break;
 814          case OPCODE_EX2: /* Exponential base 2 */
 815             {
 816                GLfloat a[4], result[4];
 817                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 818                result[0] = result[1] = result[2] = result[3] =
 819                   (GLfloat) _mesa_pow(2.0, a[0]);
 820                store_vector4( inst, machine, result );
 821             }
 822             break;
 823          case OPCODE_FLR:
 824             {
 825                GLfloat a[4], result[4];
 826                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 827                result[0] = FLOORF(a[0]);
 828                result[1] = FLOORF(a[1]);
 829                result[2] = FLOORF(a[2]);
 830                result[3] = FLOORF(a[3]);
 831                store_vector4( inst, machine, result );
 832             }
 833             break;
 834          case OPCODE_FRC:
 835             {
 836                GLfloat a[4], result[4];
 837                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 838                result[0] = a[0] - FLOORF(a[0]);
 839                result[1] = a[1] - FLOORF(a[1]);
 840                result[2] = a[2] - FLOORF(a[2]);
 841                result[3] = a[3] - FLOORF(a[3]);
 842                store_vector4( inst, machine, result );
 843             }
 844             break;
 845          case OPCODE_KIL_NV: /* NV_f_p only */
 846             {
 847                const GLuint swizzle = inst->DstReg.CondSwizzle;
 848                const GLuint condMask = inst->DstReg.CondMask;
 849                if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 850                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 851                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 852                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 853                   return GL_FALSE;
 854                }
 855             }
 856             break;
 857          case OPCODE_KIL: /* ARB_f_p only */
 858             {
 859                GLfloat a[4];
 860                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 861                if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 862                   return GL_FALSE;
 863                }
 864             }
 865             break;
 866          case OPCODE_LG2:  /* log base 2 */
 867             {
 868                GLfloat a[4], result[4];
 869                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 870                result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
 871                store_vector4( inst, machine, result );
 872             }
 873             break;
 874          case OPCODE_LIT:
 875             {
 876                const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
 877                GLfloat a[4], result[4];
 878                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 879                a[0] = MAX2(a[0], 0.0F);
 880                a[1] = MAX2(a[1], 0.0F);
 881                /* XXX ARB version clamps a[3], NV version doesn't */
 882                a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 883                result[0] = 1.0F;
 884                result[1] = a[0];
 885                /* XXX we could probably just use pow() here */
 886                if (a[0] > 0.0F) {
 887                   if (a[1] == 0.0 && a[3] == 0.0)
 888                      result[2] = 1.0;
 889                   else
 890                      result[2] = EXPF(a[3] * LOGF(a[1]));
 891                }
 892                else {
 893                   result[2] = 0.0;
 894                }
 895                result[3] = 1.0F;
 896                store_vector4( inst, machine, result );
 897 #if DEBUG_FRAG
 898                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 899                       result[0], result[1], result[2], result[3],
 900                       a[0], a[1], a[2], a[3]);
 901 #endif
 902             }
 903             break;
 904          case OPCODE_LRP:
 905             {
 906                GLfloat a[4], b[4], c[4], result[4];
 907                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 908                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 909                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 910                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 911                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 912                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 913                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 914                store_vector4( inst, machine, result );
 915 #if DEBUG_FRAG
 916                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
 917                       "(%g %g %g %g), (%g %g %g %g)\n",
 918                       result[0], result[1], result[2], result[3],
 919                       a[0], a[1], a[2], a[3],
 920                       b[0], b[1], b[2], b[3],
 921                       c[0], c[1], c[2], c[3]);
 922 #endif
 923             }
 924             break;
 925          case OPCODE_MAD:
 926             {
 927                GLfloat a[4], b[4], c[4], result[4];
 928                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 929                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 930                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 931                result[0] = a[0] * b[0] + c[0];
 932                result[1] = a[1] * b[1] + c[1];
 933                result[2] = a[2] * b[2] + c[2];
 934                result[3] = a[3] * b[3] + c[3];
 935                store_vector4( inst, machine, result );
 936 #if DEBUG_FRAG
 937                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
 938                       "(%g %g %g %g) + (%g %g %g %g)\n",
 939                       result[0], result[1], result[2], result[3],
 940                       a[0], a[1], a[2], a[3],
 941                       b[0], b[1], b[2], b[3],
 942                       c[0], c[1], c[2], c[3]);
 943 #endif
 944             }
 945             break;
 946          case OPCODE_MAX:
 947             {
 948                GLfloat a[4], b[4], result[4];
 949                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 950                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 951                result[0] = MAX2(a[0], b[0]);
 952                result[1] = MAX2(a[1], b[1]);
 953                result[2] = MAX2(a[2], b[2]);
 954                result[3] = MAX2(a[3], b[3]);
 955                store_vector4( inst, machine, result );
 956 #if DEBUG_FRAG
 957                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
 958                       result[0], result[1], result[2], result[3],
 959                       a[0], a[1], a[2], a[3],
 960                       b[0], b[1], b[2], b[3]);
 961 #endif
 962             }
 963             break;
 964          case OPCODE_MIN:
 965             {
 966                GLfloat a[4], b[4], result[4];
 967                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 968                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 969                result[0] = MIN2(a[0], b[0]);
 970                result[1] = MIN2(a[1], b[1]);
 971                result[2] = MIN2(a[2], b[2]);
 972                result[3] = MIN2(a[3], b[3]);
 973                store_vector4( inst, machine, result );
 974             }
 975             break;
 976          case OPCODE_MOV:
 977             {
 978                GLfloat result[4];
 979                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
 980                store_vector4( inst, machine, result );
 981 #if DEBUG_FRAG
 982                printf("MOV (%g %g %g %g)\n",
 983                       result[0], result[1], result[2], result[3]);
 984 #endif
 985             }
 986             break;
 987          case OPCODE_MUL:
 988             {
 989                GLfloat a[4], b[4], result[4];
 990                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 991                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 992                result[0] = a[0] * b[0];
 993                result[1] = a[1] * b[1];
 994                result[2] = a[2] * b[2];
 995                result[3] = a[3] * b[3];
 996                store_vector4( inst, machine, result );
 997 #if DEBUG_FRAG
 998                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 999                       result[0], result[1], result[2], result[3],
1000                       a[0], a[1], a[2], a[3],
1001                       b[0], b[1], b[2], b[3]);
1002 #endif
1003             }
1004             break;
1005          case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1006             {
1007                GLfloat a[4], result[4];
1008                GLhalfNV hx, hy;
1009                GLuint *rawResult = (GLuint *) result;
1010                GLuint twoHalves;
1011                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1012                hx = _mesa_float_to_half(a[0]);
1013                hy = _mesa_float_to_half(a[1]);
1014                twoHalves = hx | (hy << 16);
1015                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1016                   = twoHalves;
1017                store_vector4( inst, machine, result );
1018             }
1019             break;
1020          case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1021             {
1022                GLfloat a[4], result[4];
1023                GLuint usx, usy, *rawResult = (GLuint *) result;
1024                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1025                a[0] = CLAMP(a[0], 0.0F, 1.0F);
1026                a[1] = CLAMP(a[1], 0.0F, 1.0F);
1027                usx = IROUND(a[0] * 65535.0F);
1028                usy = IROUND(a[1] * 65535.0F);
1029                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1030                   = usx | (usy << 16);
1031                store_vector4( inst, machine, result );
1032             }
1033             break;
1034          case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1035             {
1036                GLfloat a[4], result[4];
1037                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1038                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1039                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1040                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1041                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1042                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1043                ubx = IROUND(127.0F * a[0] + 128.0F);
1044                uby = IROUND(127.0F * a[1] + 128.0F);
1045                ubz = IROUND(127.0F * a[2] + 128.0F);
1046                ubw = IROUND(127.0F * a[3] + 128.0F);
1047                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1048                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1049                store_vector4( inst, machine, result );
1050             }
1051             break;
1052          case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1053             {
1054                GLfloat a[4], result[4];
1055                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1056                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1057                a[0] = CLAMP(a[0], 0.0F, 1.0F);
1058                a[1] = CLAMP(a[1], 0.0F, 1.0F);
1059                a[2] = CLAMP(a[2], 0.0F, 1.0F);
1060                a[3] = CLAMP(a[3], 0.0F, 1.0F);
1061                ubx = IROUND(255.0F * a[0]);
1062                uby = IROUND(255.0F * a[1]);
1063                ubz = IROUND(255.0F * a[2]);
1064                ubw = IROUND(255.0F * a[3]);
1065                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1066                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1067                store_vector4( inst, machine, result );
1068             }
1069             break;
1070          case OPCODE_POW:
1071             {
1072                GLfloat a[4], b[4], result[4];
1073                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1074                fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1075                result[0] = result[1] = result[2] = result[3]
1076                   = (GLfloat)_mesa_pow(a[0], b[0]);
1077                store_vector4( inst, machine, result );
1078             }
1079             break;
1080          case OPCODE_RCP:
1081             {
1082                GLfloat a[4], result[4];
1083                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1084 #if DEBUG_FRAG
1085                if (a[0] == 0)
1086                   printf("RCP(0)\n");
1087                else if (IS_INF_OR_NAN(a[0]))
1088                   printf("RCP(inf)\n");
1089 #endif
1090                result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1091                store_vector4( inst, machine, result );
1092             }
1093             break;
1094          case OPCODE_RFL: /* reflection vector */
1095             {
1096                GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1097                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1098                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1099                tmpW = DOT3(axis, axis);
1100                tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1101                result[0] = tmpX * axis[0] - dir[0];
1102                result[1] = tmpX * axis[1] - dir[1];
1103                result[2] = tmpX * axis[2] - dir[2];
1104                /* result[3] is never written! XXX enforce in parser! */
1105                store_vector4( inst, machine, result );
1106             }
1107             break;
1108          case OPCODE_RSQ: /* 1 / sqrt() */
1109             {
1110                GLfloat a[4], result[4];
1111                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1112                a[0] = FABSF(a[0]);
1113                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1114                store_vector4( inst, machine, result );
1115 #if DEBUG_FRAG
1116                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1117 #endif
1118             }
1119             break;
1120          case OPCODE_SCS: /* sine and cos */
1121             {
1122                GLfloat a[4], result[4];
1123                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1124                result[0] = (GLfloat)_mesa_cos(a[0]);
1125                result[1] = (GLfloat)_mesa_sin(a[0]);
1126                result[2] = 0.0;  /* undefined! */
1127                result[3] = 0.0;  /* undefined! */
1128                store_vector4( inst, machine, result );
1129             }
1130             break;
1131          case OPCODE_SEQ: /* set on equal */
1132             {
1133                GLfloat a[4], b[4], result[4];
1134                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1135                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1136                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1137                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1138                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1139                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1140                store_vector4( inst, machine, result );
1141             }
1142             break;
1143          case OPCODE_SFL: /* set false, operands ignored */
1144             {
1145                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1146                store_vector4( inst, machine, result );
1147             }
1148             break;
1149          case OPCODE_SGE: /* set on greater or equal */
1150             {
1151                GLfloat a[4], b[4], result[4];
1152                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1153                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1154                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1155                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1156                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1157                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1158                store_vector4( inst, machine, result );
1159             }
1160             break;
1161          case OPCODE_SGT: /* set on greater */
1162             {
1163                GLfloat a[4], b[4], result[4];
1164                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1165                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1166                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1167                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1168                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1169                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1170                store_vector4( inst, machine, result );
1171             }
1172             break;
1173          case OPCODE_SIN:
1174             {
1175                GLfloat a[4], result[4];
1176                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1177                result[0] = result[1] = result[2] = result[3]
1178                   = (GLfloat) _mesa_sin(a[0]);
1179                store_vector4( inst, machine, result );
1180             }
1181             break;
1182          case OPCODE_SLE: /* set on less or equal */
1183             {
1184                GLfloat a[4], b[4], result[4];
1185                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1186                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1187                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1188                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1189                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1190                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1191                store_vector4( inst, machine, result );
1192             }
1193             break;
1194          case OPCODE_SLT: /* set on less */
1195             {
1196                GLfloat a[4], b[4], result[4];
1197                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1198                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1199                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1200                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1201                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1202                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1203                store_vector4( inst, machine, result );
1204             }
1205             break;
1206          case OPCODE_SNE: /* set on not equal */
1207             {
1208                GLfloat a[4], b[4], result[4];
1209                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1210                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1211                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1212                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1213                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1214                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1215                store_vector4( inst, machine, result );
1216             }
1217             break;
1218          case OPCODE_STR: /* set true, operands ignored */
1219             {
1220                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1221                store_vector4( inst, machine, result );
1222             }
1223             break;
1224          case OPCODE_SUB:
1225             {
1226                GLfloat a[4], b[4], result[4];
1227                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1228                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1229                result[0] = a[0] - b[0];
1230                result[1] = a[1] - b[1];
1231                result[2] = a[2] - b[2];
1232                result[3] = a[3] - b[3];
1233                store_vector4( inst, machine, result );
1234 #if DEBUG_FRAG
1235                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1236                       result[0], result[1], result[2], result[3],
1237                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1238 #endif
1239             }
1240             break;
1241          case OPCODE_SWZ: /* extended swizzle */
1242             {
1243                const struct prog_src_register *source = &inst->SrcReg[0];
1244                const GLfloat *src = get_register_pointer(ctx, source,
1245                                                          machine, program);
1246                GLfloat result[4];
1247                GLuint i;
1248                for (i = 0; i < 4; i++) {
1249                   const GLuint swz = GET_SWZ(source->Swizzle, i);
1250                   if (swz == SWIZZLE_ZERO)
1251                      result[i] = 0.0;
1252                   else if (swz == SWIZZLE_ONE)
1253                      result[i] = 1.0;
1254                   else {
1255                      ASSERT(swz >= 0);
1256                      ASSERT(swz <= 3);
1257                      result[i] = src[swz];
1258                   }
1259                   if (source->NegateBase & (1 << i))
1260                      result[i] = -result[i];
1261                }
1262                store_vector4( inst, machine, result );
1263             }
1264             break;
1265          case OPCODE_TEX: /* Both ARB and NV frag prog */
1266             /* Texel lookup */
1267             {
1268                /* Note: only use the precomputed lambda value when we're
1269                 * sampling texture unit [K] with texcoord[K].
1270                 * Otherwise, the lambda value may have no relation to the
1271                 * instruction's texcoord or texture image.  Using the wrong
1272                 * lambda is usually bad news.
1273                 * The rest of the time, just use zero (until we get a more
1274                 * sophisticated way of computing lambda).
1275                 */
1276                GLfloat coord[4], color[4], lambda;
1277                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1278                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1279                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1280                else
1281                   lambda = 0.0;
1282                fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1283                fetch_texel( ctx, coord, lambda, inst->TexSrcUnit, color );
1284 #if DEBUG_FRAG
1285                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1286                       "lod %f\n",
1287                       color[0], color[1], color[2], color[3], inst->TexSrcUnit,
1288                       coord[0], coord[1], coord[2], coord[3], lambda);
1289 #endif
1290                store_vector4( inst, machine, color );
1291             }
1292             break;
1293          case OPCODE_TXB: /* GL_ARB_fragment_program only */
1294             /* Texel lookup with LOD bias */
1295             {
1296                GLfloat coord[4], color[4], lambda, bias;
1297                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1298                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1299                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1300                else
1301                   lambda = 0.0;
1302                fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1303                /* coord[3] is the bias to add to lambda */
1304                bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1305                     + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1306                     + coord[3];
1307                fetch_texel(ctx, coord, lambda + bias, inst->TexSrcUnit, color);
1308                store_vector4( inst, machine, color );
1309             }
1310             break;
1311          case OPCODE_TXD: /* GL_NV_fragment_program only */
1312             /* Texture lookup w/ partial derivatives for LOD */
1313             {
1314                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1315                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1316                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1317                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1318                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1319                                   color );
1320                store_vector4( inst, machine, color );
1321             }
1322             break;
1323          case OPCODE_TXP: /* GL_ARB_fragment_program only */
1324             /* Texture lookup w/ projective divide */
1325             {
1326                GLfloat texcoord[4], color[4], lambda;
1327                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1328                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1329                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1330                else
1331                   lambda = 0.0;
1332                fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1333                /* Not so sure about this test - if texcoord[3] is
1334                 * zero, we'd probably be fine except for an ASSERT in
1335                 * IROUND_POS() which gets triggered by the inf values created.
1336                 */
1337                if (texcoord[3] != 0.0) {
1338                   texcoord[0] /= texcoord[3];
1339                   texcoord[1] /= texcoord[3];
1340                   texcoord[2] /= texcoord[3];
1341                }
1342                fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1343                store_vector4( inst, machine, color );
1344             }
1345             break;
1346          case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1347             /* Texture lookup w/ projective divide */
1348             {
1349                GLfloat texcoord[4], color[4], lambda;
1350                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1351                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1352                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1353                else
1354                   lambda = 0.0;
1355                fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1356                if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1357                    texcoord[3] != 0.0) {
1358                   texcoord[0] /= texcoord[3];
1359                   texcoord[1] /= texcoord[3];
1360                   texcoord[2] /= texcoord[3];
1361                }
1362                fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1363                store_vector4( inst, machine, color );
1364             }
1365             break;
1366          case OPCODE_UP2H: /* unpack two 16-bit floats */
1367             {
1368                GLfloat a[4], result[4];
1369                const GLuint *rawBits = (const GLuint *) a;
1370                GLhalfNV hx, hy;
1371                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1372                hx = rawBits[0] & 0xffff;
1373                hy = rawBits[0] >> 16;
1374                result[0] = result[2] = _mesa_half_to_float(hx);
1375                result[1] = result[3] = _mesa_half_to_float(hy);
1376                store_vector4( inst, machine, result );
1377             }
1378             break;
1379          case OPCODE_UP2US: /* unpack two GLushorts */
1380             {
1381                GLfloat a[4], result[4];
1382                const GLuint *rawBits = (const GLuint *) a;
1383                GLushort usx, usy;
1384                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1385                usx = rawBits[0] & 0xffff;
1386                usy = rawBits[0] >> 16;
1387                result[0] = result[2] = usx * (1.0f / 65535.0f);
1388                result[1] = result[3] = usy * (1.0f / 65535.0f);
1389                store_vector4( inst, machine, result );
1390             }
1391             break;
1392          case OPCODE_UP4B: /* unpack four GLbytes */
1393             {
1394                GLfloat a[4], result[4];
1395                const GLuint *rawBits = (const GLuint *) a;
1396                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1397                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1398                result[1] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1399                result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1400                result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1401                store_vector4( inst, machine, result );
1402             }
1403             break;
1404          case OPCODE_UP4UB: /* unpack four GLubytes */
1405             {
1406                GLfloat a[4], result[4];
1407                const GLuint *rawBits = (const GLuint *) a;
1408                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1409                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1410                result[1] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1411                result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1412                result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1413                store_vector4( inst, machine, result );
1414             }
1415             break;
1416          case OPCODE_XPD: /* cross product */
1417             {
1418                GLfloat a[4], b[4], result[4];
1419                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1420                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1421                result[0] = a[1] * b[2] - a[2] * b[1];
1422                result[1] = a[2] * b[0] - a[0] * b[2];
1423                result[2] = a[0] * b[1] - a[1] * b[0];
1424                result[3] = 1.0;
1425                store_vector4( inst, machine, result );
1426             }
1427             break;
1428          case OPCODE_X2D: /* 2-D matrix transform */
1429             {
1430                GLfloat a[4], b[4], c[4], result[4];
1431                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1432                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1433                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1434                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1435                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1436                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1437                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1438                store_vector4( inst, machine, result );
1439             }
1440             break;
1441          case OPCODE_PRINT:
1442             {
1443                if (inst->SrcReg[0].File != -1) {
1444                   GLfloat a[4];
1445                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1446                   _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1447                                a[0], a[1], a[2], a[3]);
1448                }
1449                else {
1450                   _mesa_printf("%s\n", (const char *) inst->Data);
1451                }
1452             }
1453             break;
1454          case OPCODE_END:
1455             return GL_TRUE;
1456          default:
1457             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1458                           inst->Opcode);
1459             return GL_TRUE; /* return value doesn't matter */
1460       }
1461    }
1462    return GL_TRUE;
1463 }
1464
1465
1466 /**
1467  * Initialize the virtual fragment program machine state prior to running
1468  * fragment program on a fragment.  This involves initializing the input
1469  * registers, condition codes, etc.
1470  * \param machine  the virtual machine state to init
1471  * \param program  the fragment program we're about to run
1472  * \param span  the span of pixels we'll operate on
1473  * \param col  which element (column) of the span we'll operate on
1474  */
1475 static void
1476 init_machine( GLcontext *ctx, struct fp_machine *machine,
1477               const struct gl_fragment_program *program,
1478               const SWspan *span, GLuint col )
1479 {
1480    GLuint inputsRead = program->Base.InputsRead;
1481    GLuint u;
1482
1483    if (ctx->FragmentProgram.CallbackEnabled)
1484       inputsRead = ~0;
1485
1486    if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1487       /* Clear temporary registers (undefined for ARB_f_p) */
1488       _mesa_bzero(machine->Temporaries,
1489                   MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1490    }
1491
1492    /* Load input registers */
1493    if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1494       GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1495       ASSERT(span->arrayMask & SPAN_Z);
1496       if (span->arrayMask & SPAN_XY) {
1497          wpos[0] = (GLfloat) span->array->x[col];
1498          wpos[1] = (GLfloat) span->array->y[col];
1499       }
1500       else {
1501          wpos[0] = (GLfloat) span->x + col;
1502          wpos[1] = (GLfloat) span->y;
1503       }
1504       wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1505       wpos[3] = span->w + col * span->dwdx;
1506    }
1507    if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1508       ASSERT(span->arrayMask & SPAN_RGBA);
1509       COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1510               span->array->color.sz4.rgba[col]);
1511    }
1512    if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1513       ASSERT(span->arrayMask & SPAN_SPEC);
1514       COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1515               span->array->color.sz4.spec[col]);
1516    }
1517    if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1518       GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1519       ASSERT(span->arrayMask & SPAN_FOG);
1520       fogc[0] = span->array->fog[col];
1521       fogc[1] = 0.0F;
1522       fogc[2] = 0.0F;
1523       fogc[3] = 0.0F;
1524    }
1525    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1526       if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1527          GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1528          /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1529          COPY_4V(tex, span->array->texcoords[u][col]);
1530          /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1531       }
1532    }
1533
1534    /* init condition codes */
1535    machine->CondCodes[0] = COND_EQ;
1536    machine->CondCodes[1] = COND_EQ;
1537    machine->CondCodes[2] = COND_EQ;
1538    machine->CondCodes[3] = COND_EQ;
1539 }
1540
1541
1542 /**
1543  * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1544  */
1545 static void
1546 run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1547 {
1548    const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1549    struct fp_machine machine;
1550    GLuint i;
1551
1552    CurrentMachine = &machine;
1553
1554    for (i = start; i < end; i++) {
1555       if (span->array->mask[i]) {
1556          init_machine(ctx, &machine, program, span, i);
1557
1558          if (execute_program(ctx, program, ~0, &machine, span, i)) {
1559             /* Store result color */
1560             COPY_4V(span->array->color.sz4.rgba[i],
1561                     machine.Outputs[FRAG_RESULT_COLR]);
1562
1563             /* Store result depth/z */
1564             if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1565                const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1566                if (depth <= 0.0)
1567                   span->array->z[i] = 0;
1568                else if (depth >= 1.0)
1569                   span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1570                else
1571                   span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1572             }
1573          }
1574          else {
1575             /* killed fragment */
1576             span->array->mask[i] = GL_FALSE;
1577             span->writeAll = GL_FALSE;
1578          }
1579       }
1580    }
1581
1582    CurrentMachine = NULL;
1583 }
1584
1585
1586 /**
1587  * Execute the current fragment program for all the fragments
1588  * in the given span.
1589  */
1590 void
1591 _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1592 {
1593    const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1594
1595    /* incoming colors should be floats */
1596    ASSERT(span->array->ChanType == GL_FLOAT);
1597
1598    ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1599
1600    run_program(ctx, span, 0, span->end);
1601
1602    if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1603       span->interpMask &= ~SPAN_Z;
1604       span->arrayMask |= SPAN_Z;
1605    }
1606
1607    ctx->_CurrentProgram = 0;
1608 }
1609