src/mesa/swrast/s_nvfragprog.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  5.1
   4  *
   5  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 #include "glheader.h"
  27 #include "colormac.h"
  28 #include "context.h"
  29 #include "nvfragprog.h"
  30 #include "macros.h"
  31
  32 #include "s_nvfragprog.h"
  33 #include "s_span.h"
  34 #include "s_texture.h"
  35
  36
  37 /* if 1, print some debugging info */
  38 #define DEBUG_FRAG 0
  39
  40
  41 /**
  42  * Fetch a texel.
  43  */
  44 static void
  45 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
  46              GLuint unit, GLfloat color[4] )
  47 {
  48    GLchan rgba[4];
  49    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  50
  51    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  52                                1, (const GLfloat (*)[4]) texcoord,
  53                                &lambda, &rgba);
  54    color[0] = CHAN_TO_FLOAT(rgba[0]);
  55    color[1] = CHAN_TO_FLOAT(rgba[1]);
  56    color[2] = CHAN_TO_FLOAT(rgba[2]);
  57    color[3] = CHAN_TO_FLOAT(rgba[3]);
  58 }
  59
  60
  61 /**
  62  * Fetch a texel with the given partial derivatives to compute a level
  63  * of detail in the mipmap.
  64  */
  65 static void
  66 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
  67                    const GLfloat texdx[4], const GLfloat texdy[4],
  68                    GLuint unit, GLfloat color[4] )
  69 {
  70    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  71    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
  72    const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
  73    const GLfloat texW = (GLfloat) texImg->WidthScale;
  74    const GLfloat texH = (GLfloat) texImg->HeightScale;
  75    GLchan rgba[4];
  76
  77    GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
  78                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
  79                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
  80                                          texW, texH,
  81                                          texcoord[0], texcoord[1], texcoord[3],
  82                                          1.0F / texcoord[3]);
  83
  84    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  85                                1, (const GLfloat (*)[4]) texcoord,
  86                                &lambda, &rgba);
  87    color[0] = CHAN_TO_FLOAT(rgba[0]);
  88    color[1] = CHAN_TO_FLOAT(rgba[1]);
  89    color[2] = CHAN_TO_FLOAT(rgba[2]);
  90    color[3] = CHAN_TO_FLOAT(rgba[3]);
  91 }
  92
  93
  94
  95 /**
  96  * Fetch a 4-element float vector from the given source register.
  97  * Apply swizzling and negating as needed.
  98  */
  99 static void
 100 fetch_vector4( const struct fp_src_register *source,
 101                const struct fp_machine *machine,
 102                const struct fragment_program *program,
 103                GLfloat result[4] )
 104 {
 105    const GLfloat *src;
 106
 107    if (source->IsParameter) {
 108       src = program->Parameters[source->Register].Values;
 109    }
 110    else {
 111       src = machine->Registers[source->Register];
 112    }
 113
 114    result[0] = src[source->Swizzle[0]];
 115    result[1] = src[source->Swizzle[1]];
 116    result[2] = src[source->Swizzle[2]];
 117    result[3] = src[source->Swizzle[3]];
 118
 119    if (source->NegateBase) {
 120       result[0] = -result[0];
 121       result[1] = -result[1];
 122       result[2] = -result[2];
 123       result[3] = -result[3];
 124    }
 125    if (source->Abs) {
 126       result[0] = FABSF(result[0]);
 127       result[1] = FABSF(result[1]);
 128       result[2] = FABSF(result[2]);
 129       result[3] = FABSF(result[3]);
 130    }
 131    if (source->NegateAbs) {
 132       result[0] = -result[0];
 133       result[1] = -result[1];
 134       result[2] = -result[2];
 135       result[3] = -result[3];
 136    }
 137 }
 138
 139
 140 /**
 141  * Fetch the derivative with respect to X for the given register.
 142  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 143  * need to execute another instance of the program (ugh)!
 144  */
 145 static GLboolean
 146 fetch_vector4_deriv( const struct fp_src_register *source,
 147                      const struct sw_span *span,
 148                      char xOrY, GLfloat result[4] )
 149 {
 150    GLfloat src[4];
 151
 152    ASSERT(xOrY == 'X' || xOrY == 'Y');
 153
 154    switch (source->Register) {
 155    case FRAG_ATTRIB_WPOS:
 156       if (xOrY == 'X') {
 157          src[0] = 1.0;
 158          src[1] = 0.0;
 159          src[2] = span->dzdx;
 160          src[3] = span->dwdx;
 161       }
 162       else {
 163          src[0] = 0.0;
 164          src[1] = 1.0;
 165          src[2] = span->dzdy;
 166          src[3] = span->dwdy;
 167       }
 168       break;
 169    case FRAG_ATTRIB_COL0:
 170       if (xOrY == 'X') {
 171          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 172          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 173          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 174          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 175       }
 176       else {
 177          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 178          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 179          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 180          src[3] = span->dady * (1.0F / CHAN_MAXF);
 181       }
 182       break;
 183    case FRAG_ATTRIB_COL1:
 184       if (xOrY == 'X') {
 185          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 186          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 187          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 188          src[3] = 0.0; /* XXX need this */
 189       }
 190       else {
 191          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 192          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 193          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 194          src[3] = 0.0; /* XXX need this */
 195       }
 196       break;
 197    case FRAG_ATTRIB_FOGC:
 198       if (xOrY == 'X') {
 199          src[0] = span->dfogdx;
 200          src[1] = 0.0;
 201          src[2] = 0.0;
 202          src[3] = 0.0;
 203       }
 204       else {
 205          src[0] = span->dfogdy;
 206          src[1] = 0.0;
 207          src[2] = 0.0;
 208          src[3] = 0.0;
 209       }
 210       break;
 211    case FRAG_ATTRIB_TEX0:
 212    case FRAG_ATTRIB_TEX1:
 213    case FRAG_ATTRIB_TEX2:
 214    case FRAG_ATTRIB_TEX3:
 215    case FRAG_ATTRIB_TEX4:
 216    case FRAG_ATTRIB_TEX5:
 217    case FRAG_ATTRIB_TEX6:
 218    case FRAG_ATTRIB_TEX7:
 219       if (xOrY == 'X') {
 220          const GLuint u = source->Register - FRAG_ATTRIB_TEX0;
 221          src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
 222          src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
 223          src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
 224          src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
 225       }
 226       else {
 227          const GLuint u = source->Register - FRAG_ATTRIB_TEX0;
 228          src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
 229          src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
 230          src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
 231          src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
 232       }
 233       break;
 234    default:
 235       return GL_FALSE;
 236    }
 237
 238    result[0] = src[source->Swizzle[0]];
 239    result[1] = src[source->Swizzle[1]];
 240    result[2] = src[source->Swizzle[2]];
 241    result[3] = src[source->Swizzle[3]];
 242
 243    if (source->NegateBase) {
 244       result[0] = -result[0];
 245       result[1] = -result[1];
 246       result[2] = -result[2];
 247       result[3] = -result[3];
 248    }
 249    if (source->Abs) {
 250       result[0] = FABSF(result[0]);
 251       result[1] = FABSF(result[1]);
 252       result[2] = FABSF(result[2]);
 253       result[3] = FABSF(result[3]);
 254    }
 255    if (source->NegateAbs) {
 256       result[0] = -result[0];
 257       result[1] = -result[1];
 258       result[2] = -result[2];
 259       result[3] = -result[3];
 260    }
 261    return GL_TRUE;
 262 }
 263
 264
 265 /**
 266  * As above, but only return result[0] element.
 267  */
 268 static void
 269 fetch_vector1( const struct fp_src_register *source,
 270                const struct fp_machine *machine,
 271                const struct fragment_program *program,
 272                GLfloat result[4] )
 273 {
 274    const GLfloat *src;
 275
 276    if (source->IsParameter) {
 277       src = program->Parameters[source->Register].Values;
 278    }
 279    else {
 280       src = machine->Registers[source->Register];
 281    }
 282
 283    result[0] = src[source->Swizzle[0]];
 284
 285    if (source->NegateBase) {
 286       result[0] = -result[0];
 287    }
 288    if (source->Abs) {
 289       result[0] = FABSF(result[0]);
 290    }
 291    if (source->NegateAbs) {
 292       result[0] = -result[0];
 293    }
 294 }
 295
 296
 297 /*
 298  * Test value against zero and return GT, LT, EQ or UN if NaN.
 299  */
 300 static INLINE GLuint
 301 generate_cc( float value )
 302 {
 303    if (value != value)
 304       return COND_UN;  /* NaN */
 305    if (value > 0.0F)
 306       return COND_GT;
 307    if (value < 0.0F)
 308       return COND_LT;
 309    return COND_EQ;
 310 }
 311
 312 /*
 313  * Test if the ccMaskRule is satisfied by the given condition code.
 314  * Used to mask destination writes according to the current condition codee.
 315  */
 316 static INLINE GLboolean
 317 test_cc(GLuint condCode, GLuint ccMaskRule)
 318 {
 319    switch (ccMaskRule) {
 320    case COND_EQ: return (condCode == COND_EQ);
 321    case COND_NE: return (condCode != COND_EQ);
 322    case COND_LT: return (condCode == COND_LT);
 323    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 324    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 325    case COND_GT: return (condCode == COND_GT);
 326    case COND_TR: return GL_TRUE;
 327    case COND_FL: return GL_FALSE;
 328    default:      return GL_TRUE;
 329    }
 330 }
 331
 332
 333 /**
 334  * Store 4 floats into a register.  Observe the instructions saturate and
 335  * set-condition-code flags.
 336  */
 337 static void
 338 store_vector4( const struct fp_instruction *inst,
 339                struct fp_machine *machine,
 340                const GLfloat value[4] )
 341 {
 342    const struct fp_dst_register *dest = &(inst->DstReg);
 343    const GLboolean clamp = inst->Saturate;
 344    const GLboolean updateCC = inst->UpdateCondRegister;
 345    GLfloat *dstReg = machine->Registers[dest->Register];
 346    GLfloat clampedValue[4];
 347    const GLboolean *writeMask = dest->WriteMask;
 348    GLboolean condWriteMask[4];
 349
 350 #if DEBUG_FRAG
 351    if (value[0] > 1.0e10 ||
 352        IS_INF_OR_NAN(value[0]) ||
 353        IS_INF_OR_NAN(value[1]) ||
 354        IS_INF_OR_NAN(value[2]) ||
 355        IS_INF_OR_NAN(value[3])  )
 356       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 357 #endif
 358
 359    if (clamp) {
 360       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 361       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 362       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 363       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 364       value = clampedValue;
 365    }
 366
 367    if (dest->CondMask != COND_TR) {
 368       condWriteMask[0] = writeMask[0]
 369          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 370       condWriteMask[1] = writeMask[1]
 371          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 372       condWriteMask[2] = writeMask[2]
 373          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 374       condWriteMask[3] = writeMask[3]
 375          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 376       writeMask = condWriteMask;
 377    }
 378
 379    if (writeMask[0]) {
 380       dstReg[0] = value[0];
 381       if (updateCC)
 382          machine->CondCodes[0] = generate_cc(value[0]);
 383    }
 384    if (writeMask[1]) {
 385       dstReg[1] = value[1];
 386       if (updateCC)
 387          machine->CondCodes[1] = generate_cc(value[1]);
 388    }
 389    if (writeMask[2]) {
 390       dstReg[2] = value[2];
 391       if (updateCC)
 392          machine->CondCodes[2] = generate_cc(value[2]);
 393    }
 394    if (writeMask[3]) {
 395       dstReg[3] = value[3];
 396       if (updateCC)
 397          machine->CondCodes[3] = generate_cc(value[3]);
 398    }
 399 }
 400
 401
 402 /**
 403  * Initialize a new machine state instance from an existing one, adding
 404  * the partial derivatives onto the input registers.
 405  * Used to implement DDX and DDY instructions in non-trivial cases.
 406  */
 407 static void
 408 init_machine_deriv( GLcontext *ctx,
 409                     const struct fp_machine *machine,
 410                     const struct fragment_program *program,
 411                     const struct sw_span *span, char xOrY,
 412                     struct fp_machine *dMachine )
 413 {
 414    GLuint u;
 415
 416    ASSERT(xOrY == 'X' || xOrY == 'Y');
 417
 418    /* copy existing machine */
 419    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 420
 421    /* Clear temporary registers */
 422    _mesa_bzero((GLfloat*) (machine->Registers + FP_TEMP_REG_START) ,
 423                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 424
 425    /* Add derivatives */
 426    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 427       GLfloat *wpos = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_WPOS];
 428       if (xOrY == 'X') {
 429          wpos[0] += 1.0F;
 430          wpos[1] += 0.0F;
 431          wpos[2] += span->dzdx;
 432          wpos[3] += span->dwdx;
 433       }
 434       else {
 435          wpos[0] += 0.0F;
 436          wpos[1] += 1.0F;
 437          wpos[2] += span->dzdy;
 438          wpos[3] += span->dwdy;
 439       }
 440    }
 441    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 442       GLfloat *col0 = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL0];
 443       if (xOrY == 'X') {
 444          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 445          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 446          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 447          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 448       }
 449       else {
 450          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 451          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 452          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 453          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 454       }
 455    }
 456    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 457       GLfloat *col1 = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL1];
 458       if (xOrY == 'X') {
 459          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 460          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 461          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 462          col1[3] += 0.0; /*XXX fix */
 463       }
 464       else {
 465          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 466          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 467          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 468          col1[3] += 0.0; /*XXX fix */
 469       }
 470    }
 471    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 472       GLfloat *fogc = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_FOGC];
 473       if (xOrY == 'X') {
 474          fogc[0] += span->dfogdx;
 475       }
 476       else {
 477          fogc[0] += span->dfogdy;
 478       }
 479    }
 480    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 481       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 482          GLfloat *tex = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_TEX0+u];
 483          if (xOrY == 'X') {
 484             tex[0] += span->texStepX[u][0];
 485             tex[1] += span->texStepX[u][1];
 486             tex[2] += span->texStepX[u][2];
 487             tex[3] += span->texStepX[u][3];
 488          }
 489          else {
 490             tex[0] += span->texStepY[u][0];
 491             tex[1] += span->texStepY[u][1];
 492             tex[2] += span->texStepY[u][2];
 493             tex[3] += span->texStepY[u][3];
 494          }
 495       }
 496    }
 497 }
 498
 499
 500 /**
 501  * Execute the given vertex program.
 502  * NOTE: we do everything in single-precision floating point; we don't
 503  * currently observe the single/half/fixed-precision qualifiers.
 504  * \param ctx - rendering context
 505  * \param program - the fragment program to execute
 506  * \param machine - machine state (register file)
 507  * \param maxInst - max number of instructions to execute
 508  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 509  */
 510 static GLboolean
 511 execute_program( GLcontext *ctx,
 512                  const struct fragment_program *program, GLuint maxInst,
 513                  struct fp_machine *machine, const struct sw_span *span,
 514                  GLuint column )
 515 {
 516    GLuint pc;
 517
 518 #if DEBUG_FRAG
 519    printf("execute fragment program --------------------\n");
 520 #endif
 521
 522    for (pc = 0; pc < maxInst; pc++) {
 523       const struct fp_instruction *inst = program->Instructions + pc;
 524       switch (inst->Opcode) {
 525          case FP_OPCODE_ADD:
 526             {
 527                GLfloat a[4], b[4], result[4];
 528                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 529                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 530                result[0] = a[0] + b[0];
 531                result[1] = a[1] + b[1];
 532                result[2] = a[2] + b[2];
 533                result[3] = a[3] + b[3];
 534                store_vector4( inst, machine, result );
 535             }
 536             break;
 537          case FP_OPCODE_COS:
 538             {
 539                GLfloat a[4], result[4];
 540                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 541                result[0] = result[1] = result[2] = result[3] = _mesa_cos(a[0]);
 542                store_vector4( inst, machine, result );
 543             }
 544             break;
 545          case FP_OPCODE_DDX: /* Partial derivative with respect to X */
 546             {
 547                GLfloat a[4], aNext[4], result[4];
 548                struct fp_machine dMachine;
 549                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
 550                   /* This is tricky.  Make a copy of the current machine state,
 551                    * increment the input registers by the dx or dy partial
 552                    * derivatives, then re-execute the program up to the
 553                    * preceeding instruction, then fetch the source register.
 554                    * Finally, find the difference in the register values for
 555                    * the original and derivative runs.
 556                    */
 557                   fetch_vector4( &inst->SrcReg[0], machine, program, a);
 558                   init_machine_deriv(ctx, machine, program, span,
 559                                      'X', &dMachine);
 560                   execute_program(ctx, program, pc, &dMachine, span, column);
 561                   fetch_vector4( &inst->SrcReg[0], &dMachine, program, aNext );
 562                   result[0] = aNext[0] - a[0];
 563                   result[1] = aNext[1] - a[1];
 564                   result[2] = aNext[2] - a[2];
 565                   result[3] = aNext[3] - a[3];
 566                }
 567                store_vector4( inst, machine, result );
 568             }
 569             break;
 570          case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
 571             {
 572                GLfloat a[4], aNext[4], result[4];
 573                struct fp_machine dMachine;
 574                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
 575                   init_machine_deriv(ctx, machine, program, span,
 576                                      'Y', &dMachine);
 577                   fetch_vector4( &inst->SrcReg[0], machine, program, a);
 578                   execute_program(ctx, program, pc, &dMachine, span, column);
 579                   fetch_vector4( &inst->SrcReg[0], &dMachine, program, aNext );
 580                   result[0] = aNext[0] - a[0];
 581                   result[1] = aNext[1] - a[1];
 582                   result[2] = aNext[2] - a[2];
 583                   result[3] = aNext[3] - a[3];
 584                }
 585                store_vector4( inst, machine, result );
 586             }
 587             break;
 588          case FP_OPCODE_DP3:
 589             {
 590                GLfloat a[4], b[4], result[4];
 591                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 592                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 593                result[0] = result[1] = result[2] = result[3] =
 594                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
 595                store_vector4( inst, machine, result );
 596 #if DEBUG_FRAG
 597                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 598                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 599 #endif
 600             }
 601             break;
 602          case FP_OPCODE_DP4:
 603             {
 604                GLfloat a[4], b[4], result[4];
 605                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 606                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 607                result[0] = result[1] = result[2] = result[3] =
 608                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 609                store_vector4( inst, machine, result );
 610             }
 611             break;
 612          case FP_OPCODE_DST: /* Distance vector */
 613             {
 614                GLfloat a[4], b[4], result[4];
 615                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 616                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 617                result[0] = 1.0F;
 618                result[1] = a[1] * b[1];
 619                result[2] = a[2];
 620                result[3] = b[3];
 621                store_vector4( inst, machine, result );
 622             }
 623             break;
 624          case FP_OPCODE_EX2: /* Exponential base 2 */
 625             {
 626                GLfloat a[4], result[4];
 627                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 628                result[0] = result[1] = result[2] = result[3] =
 629                   (GLfloat) _mesa_pow(2.0, a[0]);
 630                store_vector4( inst, machine, result );
 631             }
 632             break;
 633          case FP_OPCODE_FLR:
 634             {
 635                GLfloat a[4], result[4];
 636                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 637                result[0] = FLOORF(a[0]);
 638                result[1] = FLOORF(a[1]);
 639                result[2] = FLOORF(a[2]);
 640                result[3] = FLOORF(a[3]);
 641                store_vector4( inst, machine, result );
 642             }
 643             break;
 644          case FP_OPCODE_FRC:
 645             {
 646                GLfloat a[4], result[4];
 647                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 648                result[0] = a[0] - FLOORF(a[0]);
 649                result[1] = a[1] - FLOORF(a[1]);
 650                result[2] = a[2] - FLOORF(a[2]);
 651                result[3] = a[3] - FLOORF(a[3]);
 652                store_vector4( inst, machine, result );
 653             }
 654             break;
 655          case FP_OPCODE_KIL:
 656             {
 657                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 658                const GLuint condMask = inst->DstReg.CondMask;
 659                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 660                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 661                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 662                    test_cc(machine->CondCodes[swizzle[3]], condMask))
 663                   return GL_FALSE;
 664             }
 665             break;
 666          case FP_OPCODE_LG2:  /* log base 2 */
 667             {
 668                GLfloat a[4], result[4];
 669                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 670                result[0] = result[1] = result[2] = result[3]
 671                   = LOG2(a[0]);
 672                store_vector4( inst, machine, result );
 673             }
 674             break;
 675          case FP_OPCODE_LIT:
 676             {
 677                GLfloat a[4], result[4];
 678                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 679                if (a[0] < 0.0F)
 680                   a[0] = 0.0F;
 681                if (a[1] < 0.0F)
 682                   a[1] = 0.0F;
 683                result[0] = 1.0F;
 684                result[1] = a[0];
 685                result[2] = (a[0] > 0.0) ? _mesa_pow(2.0, a[3]) : 0.0F;
 686                result[3] = 1.0F;
 687                store_vector4( inst, machine, result );
 688             }
 689             break;
 690          case FP_OPCODE_LRP:
 691             {
 692                GLfloat a[4], b[4], c[4], result[4];
 693                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 694                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 695                fetch_vector4( &inst->SrcReg[2], machine, program, c );
 696                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 697                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 698                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 699                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 700                store_vector4( inst, machine, result );
 701             }
 702             break;
 703          case FP_OPCODE_MAD:
 704             {
 705                GLfloat a[4], b[4], c[4], result[4];
 706                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 707                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 708                fetch_vector4( &inst->SrcReg[2], machine, program, c );
 709                result[0] = a[0] * b[0] + c[0];
 710                result[1] = a[1] * b[1] + c[1];
 711                result[2] = a[2] * b[2] + c[2];
 712                result[3] = a[3] * b[3] + c[3];
 713                store_vector4( inst, machine, result );
 714             }
 715             break;
 716          case FP_OPCODE_MAX:
 717             {
 718                GLfloat a[4], b[4], result[4];
 719                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 720                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 721                result[0] = MAX2(a[0], b[0]);
 722                result[1] = MAX2(a[1], b[1]);
 723                result[2] = MAX2(a[2], b[2]);
 724                result[3] = MAX2(a[3], b[3]);
 725                store_vector4( inst, machine, result );
 726             }
 727             break;
 728          case FP_OPCODE_MIN:
 729             {
 730                GLfloat a[4], b[4], result[4];
 731                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 732                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 733                result[0] = MIN2(a[0], b[0]);
 734                result[1] = MIN2(a[1], b[1]);
 735                result[2] = MIN2(a[2], b[2]);
 736                result[3] = MIN2(a[3], b[3]);
 737                store_vector4( inst, machine, result );
 738             }
 739             break;
 740          case FP_OPCODE_MOV:
 741             {
 742                GLfloat result[4];
 743                fetch_vector4( &inst->SrcReg[0], machine, program, result );
 744                store_vector4( inst, machine, result );
 745             }
 746             break;
 747          case FP_OPCODE_MUL:
 748             {
 749                GLfloat a[4], b[4], result[4];
 750                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 751                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 752                result[0] = a[0] * b[0];
 753                result[1] = a[1] * b[1];
 754                result[2] = a[2] * b[2];
 755                result[3] = a[3] * b[3];
 756                store_vector4( inst, machine, result );
 757 #if DEBUG_FRAG
 758                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 759                       result[0], result[1], result[2], result[3],
 760                       a[0], a[1], a[2], a[3],
 761                       b[0], b[1], b[2], b[3]);
 762 #endif
 763             }
 764             break;
 765          case FP_OPCODE_PK2H: /* pack two 16-bit floats */
 766             /* XXX this is probably wrong */
 767             {
 768                GLfloat a[4], result[4];
 769                const GLuint *rawBits = (const GLuint *) a;
 770                GLuint *rawResult = (GLuint *) result;
 771                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 772                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 773                   = rawBits[0] | (rawBits[1] << 16);
 774                store_vector4( inst, machine, result );
 775             }
 776             break;
 777          case FP_OPCODE_PK2US: /* pack two GLushorts */
 778             {
 779                GLfloat a[4], result[4];
 780                GLuint usx, usy, *rawResult = (GLuint *) result;
 781                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 782                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 783                a[1] = CLAMP(a[0], 0.0F, 1.0F);
 784                usx = IROUND(a[0] * 65535.0F);
 785                usy = IROUND(a[1] * 65535.0F);
 786                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 787                   = usx | (usy << 16);
 788                store_vector4( inst, machine, result );
 789             }
 790             break;
 791          case FP_OPCODE_PK4B: /* pack four GLbytes */
 792             {
 793                GLfloat a[4], result[4];
 794                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 795                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 796                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
 797                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
 798                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
 799                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
 800                ubx = IROUND(127.0F * a[0] + 128.0F);
 801                uby = IROUND(127.0F * a[1] + 128.0F);
 802                ubz = IROUND(127.0F * a[2] + 128.0F);
 803                ubw = IROUND(127.0F * a[3] + 128.0F);
 804                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 805                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 806                store_vector4( inst, machine, result );
 807             }
 808             break;
 809          case FP_OPCODE_PK4UB: /* pack four GLubytes */
 810             {
 811                GLfloat a[4], result[4];
 812                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 813                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 814                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 815                a[1] = CLAMP(a[1], 0.0F, 1.0F);
 816                a[2] = CLAMP(a[2], 0.0F, 1.0F);
 817                a[3] = CLAMP(a[3], 0.0F, 1.0F);
 818                ubx = IROUND(255.0F * a[0]);
 819                uby = IROUND(255.0F * a[1]);
 820                ubz = IROUND(255.0F * a[2]);
 821                ubw = IROUND(255.0F * a[3]);
 822                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 823                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 824                store_vector4( inst, machine, result );
 825             }
 826             break;
 827          case FP_OPCODE_POW:
 828             {
 829                GLfloat a[4], b[4], result[4];
 830                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 831                fetch_vector1( &inst->SrcReg[1], machine, program, b );
 832                result[0] = result[1] = result[2] = result[3]
 833                   = _mesa_pow(a[0], b[0]);
 834                store_vector4( inst, machine, result );
 835             }
 836             break;
 837          case FP_OPCODE_RCP:
 838             {
 839                GLfloat a[4], result[4];
 840                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 841 #if DEBUG_FRAG
 842                if (a[0] == 0)
 843                   printf("RCP(0)\n");
 844                else if (IS_INF_OR_NAN(a[0]))
 845                   printf("RCP(inf)\n");
 846 #endif
 847                result[0] = result[1] = result[2] = result[3]
 848                   = 1.0F / a[0];
 849                store_vector4( inst, machine, result );
 850             }
 851             break;
 852          case FP_OPCODE_RFL:
 853             {
 854                GLfloat axis[4], dir[4], result[4], tmp[4];
 855                fetch_vector4( &inst->SrcReg[0], machine, program, axis );
 856                fetch_vector4( &inst->SrcReg[1], machine, program, dir );
 857                tmp[3] = axis[0] * axis[0]
 858                       + axis[1] * axis[1]
 859                       + axis[2] * axis[2];
 860                tmp[0] = (2.0F * (axis[0] * dir[0] +
 861                                  axis[1] * dir[1] +
 862                                  axis[2] * dir[2])) / tmp[3];
 863                result[0] = tmp[0] * axis[0] - dir[0];
 864                result[1] = tmp[0] * axis[1] - dir[1];
 865                result[2] = tmp[0] * axis[2] - dir[2];
 866                /* result[3] is never written! XXX enforce in parser! */
 867                store_vector4( inst, machine, result );
 868             }
 869             break;
 870          case FP_OPCODE_RSQ: /* 1 / sqrt() */
 871             {
 872                GLfloat a[4], result[4];
 873                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 874                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
 875                store_vector4( inst, machine, result );
 876 #if DEBUG_FRAG
 877                printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
 878 #endif
 879             }
 880             break;
 881          case FP_OPCODE_SEQ: /* set on equal */
 882             {
 883                GLfloat a[4], b[4], result[4];
 884                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 885                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 886                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
 887                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
 888                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
 889                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
 890                store_vector4( inst, machine, result );
 891             }
 892             break;
 893          case FP_OPCODE_SFL: /* set false, operands ignored */
 894             {
 895                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
 896                store_vector4( inst, machine, result );
 897             }
 898             break;
 899          case FP_OPCODE_SGE: /* set on greater or equal */
 900             {
 901                GLfloat a[4], b[4], result[4];
 902                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 903                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 904                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
 905                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
 906                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
 907                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
 908                store_vector4( inst, machine, result );
 909             }
 910             break;
 911          case FP_OPCODE_SGT: /* set on greater */
 912             {
 913                GLfloat a[4], b[4], result[4];
 914                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 915                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 916                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
 917                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
 918                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
 919                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
 920                store_vector4( inst, machine, result );
 921             }
 922             break;
 923          case FP_OPCODE_SIN:
 924             {
 925                GLfloat a[4], result[4];
 926                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 927                result[0] = result[1] = result[2] = result[3] = _mesa_sin(a[0]);
 928                store_vector4( inst, machine, result );
 929             }
 930             break;
 931          case FP_OPCODE_SLE: /* set on less or equal */
 932             {
 933                GLfloat a[4], b[4], result[4];
 934                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 935                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 936                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
 937                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
 938                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
 939                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
 940                store_vector4( inst, machine, result );
 941             }
 942             break;
 943          case FP_OPCODE_SLT: /* set on less */
 944             {
 945                GLfloat a[4], b[4], result[4];
 946                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 947                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 948                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
 949                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
 950                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
 951                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
 952                store_vector4( inst, machine, result );
 953             }
 954             break;
 955          case FP_OPCODE_SNE: /* set on not equal */
 956             {
 957                GLfloat a[4], b[4], result[4];
 958                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 959                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 960                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
 961                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
 962                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
 963                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
 964                store_vector4( inst, machine, result );
 965             }
 966             break;
 967          case FP_OPCODE_STR: /* set true, operands ignored */
 968             {
 969                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
 970                store_vector4( inst, machine, result );
 971             }
 972             break;
 973          case FP_OPCODE_SUB:
 974             {
 975                GLfloat a[4], b[4], result[4];
 976                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 977                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 978                result[0] = a[0] - b[0];
 979                result[1] = a[1] - b[1];
 980                result[2] = a[2] - b[2];
 981                result[3] = a[3] - b[3];
 982                store_vector4( inst, machine, result );
 983             }
 984             break;
 985          case FP_OPCODE_TEX:
 986             /* Texel lookup */
 987             {
 988                GLfloat texcoord[4], color[4];
 989                fetch_vector4( &inst->SrcReg[0], machine, program, texcoord );
 990                /* XXX: Undo perspective divide from interpolate_texcoords() */
 991                fetch_texel( ctx, texcoord,
 992                             span->array->lambda[inst->TexSrcUnit][column],
 993                             inst->TexSrcUnit, color );
 994                store_vector4( inst, machine, color );
 995             }
 996             break;
 997          case FP_OPCODE_TXD:
 998             /* Texture lookup w/ partial derivatives for LOD */
 999             {
1000                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1001                fetch_vector4( &inst->SrcReg[0], machine, program, texcoord );
1002                fetch_vector4( &inst->SrcReg[1], machine, program, dtdx );
1003                fetch_vector4( &inst->SrcReg[2], machine, program, dtdy );
1004                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1005                                   color );
1006                store_vector4( inst, machine, color );
1007             }
1008             break;
1009          case FP_OPCODE_TXP:
1010             /* Texture lookup w/ perspective divide */
1011             {
1012                GLfloat texcoord[4], color[4];
1013                fetch_vector4( &inst->SrcReg[0], machine, program, texcoord );
1014                /* Already did perspective divide in interpolate_texcoords() */
1015                fetch_texel( ctx, texcoord,
1016                             span->array->lambda[inst->TexSrcUnit][column],
1017                             inst->TexSrcUnit, color );
1018                store_vector4( inst, machine, color );
1019             }
1020             break;
1021          case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1022             /* XXX this is probably wrong */
1023             {
1024                GLfloat a[4], result[4];
1025                const GLuint *rawBits = (const GLuint *) a;
1026                GLuint *rawResult = (GLuint *) result;
1027                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1028                rawResult[0] = rawBits[0] & 0xffff;
1029                rawResult[1] = (rawBits[0] >> 16) & 0xffff;
1030                rawResult[2] = rawBits[0] & 0xffff;
1031                rawResult[3] = (rawBits[0] >> 16) & 0xffff;
1032                store_vector4( inst, machine, result );
1033             }
1034             break;
1035          case FP_OPCODE_UP2US: /* unpack two GLushorts */
1036             {
1037                GLfloat a[4], result[4];
1038                const GLuint *rawBits = (const GLuint *) a;
1039                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1040                result[0] = (GLfloat) ((rawBits[0] >>  0) & 0xffff) / 65535.0F;
1041                result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
1042                result[2] = result[0];
1043                result[3] = result[1];
1044                store_vector4( inst, machine, result );
1045             }
1046             break;
1047          case FP_OPCODE_UP4B: /* unpack four GLbytes */
1048             {
1049                GLfloat a[4], result[4];
1050                const GLuint *rawBits = (const GLuint *) a;
1051                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1052                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1053                result[0] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1054                result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1055                result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1056                store_vector4( inst, machine, result );
1057             }
1058             break;
1059          case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1060             {
1061                GLfloat a[4], result[4];
1062                const GLuint *rawBits = (const GLuint *) a;
1063                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1064                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1065                result[0] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1066                result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1067                result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1068                store_vector4( inst, machine, result );
1069             }
1070             break;
1071          case FP_OPCODE_X2D: /* 2-D matrix transform */
1072             {
1073                GLfloat a[4], b[4], c[4], result[4];
1074                fetch_vector4( &inst->SrcReg[0], machine, program, a );
1075                fetch_vector4( &inst->SrcReg[1], machine, program, b );
1076                fetch_vector4( &inst->SrcReg[2], machine, program, c );
1077                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1078                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1079                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1080                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1081                store_vector4( inst, machine, result );
1082             }
1083             break;
1084          case FP_OPCODE_END:
1085             return GL_TRUE;
1086          default:
1087             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1088                           inst->Opcode);
1089             return GL_TRUE; /* return value doesn't matter */
1090       }
1091    }
1092    return GL_TRUE;
1093 }
1094
1095
1096 static void
1097 init_machine( GLcontext *ctx, struct fp_machine *machine,
1098               const struct fragment_program *program,
1099               const struct sw_span *span, GLuint col )
1100 {
1101    GLuint j, u;
1102
1103    /* Clear temporary registers */
1104    _mesa_bzero(machine->Registers + FP_TEMP_REG_START,
1105                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1106
1107    /* Load program local parameters */
1108    for (j = 0; j < MAX_NV_FRAGMENT_PROGRAM_PARAMS; j++) {
1109       COPY_4V(machine->Registers[FP_PROG_REG_START + j],
1110               program->Base.LocalParams[j]);
1111    }
1112
1113    /* Load input registers */
1114    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1115       GLfloat *wpos = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_WPOS];
1116       wpos[0] = span->x + col;
1117       wpos[1] = span->y;
1118       wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1119       wpos[3] = span->w + col * span->dwdx;
1120    }
1121    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
1122       GLfloat *col0 = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL0];
1123       col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1124       col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1125       col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1126       col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1127    }
1128    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
1129       GLfloat *col1 = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL1];
1130       col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1131       col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1132       col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1133       col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1134    }
1135    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1136       GLfloat *fogc = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_FOGC];
1137       fogc[0] = span->array->fog[col];
1138       fogc[1] = 0.0F;
1139       fogc[2] = 0.0F;
1140       fogc[3] = 0.0F;
1141    }
1142    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1143       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1144          GLfloat *tex = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_TEX0+u];
1145          ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));
1146          COPY_4V(tex, span->array->texcoords[u][col]);
1147          ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);
1148       }
1149    }
1150 }
1151
1152
1153 void
1154 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1155 {
1156    const struct fragment_program *program = ctx->FragmentProgram.Current;
1157    GLuint i;
1158
1159    for (i = 0; i < span->end; i++) {
1160       if (span->array->mask[i]) {
1161          init_machine(ctx, &ctx->FragmentProgram.Machine,
1162                       ctx->FragmentProgram.Current, span, i);
1163
1164          if (!execute_program(ctx, program, ~0,
1165                               &ctx->FragmentProgram.Machine, span, i))
1166             span->array->mask[i] = GL_FALSE;  /* killed fragment */
1167
1168          /* Store output registers */
1169          {
1170             const GLfloat *colOut
1171                = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
1172             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1173             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1174             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1175             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1176          }
1177          /* depth value */
1178          if (ctx->FragmentProgram.Current->OutputsWritten & 2)
1179             span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START + 2][0] * ctx->DepthMaxF);
1180       }
1181    }
1182 }
1183