src/mesa/swrast/s_nvfragprog.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  5.1
   4  *
   5  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 #include "glheader.h"
  27 #include "colormac.h"
  28 #include "context.h"
  29 #include "nvfragprog.h"
  30 #include "macros.h"
  31 #include "program.h"
  32
  33 #include "s_nvfragprog.h"
  34 #include "s_span.h"
  35 #include "s_texture.h"
  36
  37
  38 /* if 1, print some debugging info */
  39 #define DEBUG_FRAG 0
  40
  41
  42 /**
  43  * Fetch a texel.
  44  */
  45 static void
  46 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
  47              GLuint unit, GLfloat color[4] )
  48 {
  49    GLchan rgba[4];
  50    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  51
  52    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  53                                1, (const GLfloat (*)[4]) texcoord,
  54                                &lambda, &rgba);
  55    color[0] = CHAN_TO_FLOAT(rgba[0]);
  56    color[1] = CHAN_TO_FLOAT(rgba[1]);
  57    color[2] = CHAN_TO_FLOAT(rgba[2]);
  58    color[3] = CHAN_TO_FLOAT(rgba[3]);
  59 }
  60
  61
  62 /**
  63  * Fetch a texel with the given partial derivatives to compute a level
  64  * of detail in the mipmap.
  65  */
  66 static void
  67 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
  68                    const GLfloat texdx[4], const GLfloat texdy[4],
  69                    GLuint unit, GLfloat color[4] )
  70 {
  71    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  72    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
  73    const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
  74    const GLfloat texW = (GLfloat) texImg->WidthScale;
  75    const GLfloat texH = (GLfloat) texImg->HeightScale;
  76    GLchan rgba[4];
  77
  78    GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
  79                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
  80                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
  81                                          texW, texH,
  82                                          texcoord[0], texcoord[1], texcoord[3],
  83                                          1.0F / texcoord[3]);
  84
  85    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  86                                1, (const GLfloat (*)[4]) texcoord,
  87                                &lambda, &rgba);
  88    color[0] = CHAN_TO_FLOAT(rgba[0]);
  89    color[1] = CHAN_TO_FLOAT(rgba[1]);
  90    color[2] = CHAN_TO_FLOAT(rgba[2]);
  91    color[3] = CHAN_TO_FLOAT(rgba[3]);
  92 }
  93
  94
  95
  96 /**
  97  * Fetch a 4-element float vector from the given source register.
  98  * Apply swizzling and negating as needed.
  99  */
 100 static void
 101 fetch_vector4( GLcontext *ctx,
 102                const struct fp_src_register *source,
 103                struct fp_machine *machine,
 104                const struct fragment_program *program,
 105                GLfloat result[4] )
 106 {
 107    const GLfloat *src;
 108
 109    switch (source->File) {
 110       case PROGRAM_TEMPORARY:
 111          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
 112          src = machine->Temporaries[source->Index];
 113          break;
 114       case PROGRAM_INPUT:
 115          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
 116          src = machine->Inputs[source->Index];
 117          break;
 118       case PROGRAM_LOCAL_PARAM:
 119          ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 120          src = program->Base.LocalParams[source->Index];
 121          break;
 122       case PROGRAM_ENV_PARAM:
 123          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
 124          src = ctx->FragmentProgram.Parameters[source->Index];
 125          break;
 126       case PROGRAM_NAMED_PARAM:
 127          ASSERT(source->Index < program->Parameters->NumParameters);
 128          src = program->Parameters->Parameters[source->Index].Values;
 129          break;
 130       case PROGRAM_STATE_VAR:
 131          abort();
 132       default:
 133          _mesa_problem(ctx, "Invalid input register file in fetch_vector4");
 134          return;
 135    }
 136
 137    result[0] = src[source->Swizzle[0]];
 138    result[1] = src[source->Swizzle[1]];
 139    result[2] = src[source->Swizzle[2]];
 140    result[3] = src[source->Swizzle[3]];
 141
 142    if (source->NegateBase) {
 143       result[0] = -result[0];
 144       result[1] = -result[1];
 145       result[2] = -result[2];
 146       result[3] = -result[3];
 147    }
 148    if (source->Abs) {
 149       result[0] = FABSF(result[0]);
 150       result[1] = FABSF(result[1]);
 151       result[2] = FABSF(result[2]);
 152       result[3] = FABSF(result[3]);
 153    }
 154    if (source->NegateAbs) {
 155       result[0] = -result[0];
 156       result[1] = -result[1];
 157       result[2] = -result[2];
 158       result[3] = -result[3];
 159    }
 160 }
 161
 162
 163 /**
 164  * Fetch the derivative with respect to X for the given register.
 165  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 166  * need to execute another instance of the program (ugh)!
 167  */
 168 static GLboolean
 169 fetch_vector4_deriv( const struct fp_src_register *source,
 170                      const struct sw_span *span,
 171                      char xOrY, GLfloat result[4] )
 172 {
 173    GLfloat src[4];
 174
 175    ASSERT(xOrY == 'X' || xOrY == 'Y');
 176
 177    assert(source->File == PROGRAM_INPUT);
 178
 179    switch (source->Index) {
 180    case FRAG_ATTRIB_WPOS:
 181       if (xOrY == 'X') {
 182          src[0] = 1.0;
 183          src[1] = 0.0;
 184          src[2] = span->dzdx;
 185          src[3] = span->dwdx;
 186       }
 187       else {
 188          src[0] = 0.0;
 189          src[1] = 1.0;
 190          src[2] = span->dzdy;
 191          src[3] = span->dwdy;
 192       }
 193       break;
 194    case FRAG_ATTRIB_COL0:
 195       if (xOrY == 'X') {
 196          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 197          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 198          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 199          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 200       }
 201       else {
 202          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 203          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 204          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 205          src[3] = span->dady * (1.0F / CHAN_MAXF);
 206       }
 207       break;
 208    case FRAG_ATTRIB_COL1:
 209       if (xOrY == 'X') {
 210          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 211          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 212          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 213          src[3] = 0.0; /* XXX need this */
 214       }
 215       else {
 216          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 217          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 218          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 219          src[3] = 0.0; /* XXX need this */
 220       }
 221       break;
 222    case FRAG_ATTRIB_FOGC:
 223       if (xOrY == 'X') {
 224          src[0] = span->dfogdx;
 225          src[1] = 0.0;
 226          src[2] = 0.0;
 227          src[3] = 0.0;
 228       }
 229       else {
 230          src[0] = span->dfogdy;
 231          src[1] = 0.0;
 232          src[2] = 0.0;
 233          src[3] = 0.0;
 234       }
 235       break;
 236    case FRAG_ATTRIB_TEX0:
 237    case FRAG_ATTRIB_TEX1:
 238    case FRAG_ATTRIB_TEX2:
 239    case FRAG_ATTRIB_TEX3:
 240    case FRAG_ATTRIB_TEX4:
 241    case FRAG_ATTRIB_TEX5:
 242    case FRAG_ATTRIB_TEX6:
 243    case FRAG_ATTRIB_TEX7:
 244       if (xOrY == 'X') {
 245          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 246          src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
 247          src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
 248          src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
 249          src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
 250       }
 251       else {
 252          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 253          src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
 254          src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
 255          src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
 256          src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
 257       }
 258       break;
 259    default:
 260       return GL_FALSE;
 261    }
 262
 263    result[0] = src[source->Swizzle[0]];
 264    result[1] = src[source->Swizzle[1]];
 265    result[2] = src[source->Swizzle[2]];
 266    result[3] = src[source->Swizzle[3]];
 267
 268    if (source->NegateBase) {
 269       result[0] = -result[0];
 270       result[1] = -result[1];
 271       result[2] = -result[2];
 272       result[3] = -result[3];
 273    }
 274    if (source->Abs) {
 275       result[0] = FABSF(result[0]);
 276       result[1] = FABSF(result[1]);
 277       result[2] = FABSF(result[2]);
 278       result[3] = FABSF(result[3]);
 279    }
 280    if (source->NegateAbs) {
 281       result[0] = -result[0];
 282       result[1] = -result[1];
 283       result[2] = -result[2];
 284       result[3] = -result[3];
 285    }
 286    return GL_TRUE;
 287 }
 288
 289
 290 /**
 291  * As above, but only return result[0] element.
 292  */
 293 static void
 294 fetch_vector1( GLcontext *ctx,
 295                const struct fp_src_register *source,
 296                const struct fp_machine *machine,
 297                const struct fragment_program *program,
 298                GLfloat result[4] )
 299 {
 300    const GLfloat *src;
 301
 302    switch (source->File) {
 303       case PROGRAM_TEMPORARY:
 304          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
 305          src = machine->Temporaries[source->Index];
 306          break;
 307       case PROGRAM_INPUT:
 308          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
 309          src = machine->Inputs[source->Index];
 310          break;
 311       case PROGRAM_LOCAL_PARAM:
 312          ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 313          src = program->Base.LocalParams[source->Index];
 314          break;
 315       case PROGRAM_ENV_PARAM:
 316          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
 317          src = ctx->FragmentProgram.Parameters[source->Index];
 318          break;
 319       case PROGRAM_NAMED_PARAM:
 320          ASSERT(source->Index < program->Parameters->NumParameters);
 321          src = program->Parameters->Parameters[source->Index].Values;
 322          break;
 323       case PROGRAM_STATE_VAR:
 324          abort();
 325       default:
 326          _mesa_problem(ctx, "Invalid input register file in fetch_vector1");
 327          return;
 328    }
 329
 330    result[0] = src[source->Swizzle[0]];
 331
 332    if (source->NegateBase) {
 333       result[0] = -result[0];
 334    }
 335    if (source->Abs) {
 336       result[0] = FABSF(result[0]);
 337    }
 338    if (source->NegateAbs) {
 339       result[0] = -result[0];
 340    }
 341 }
 342
 343
 344 /*
 345  * Test value against zero and return GT, LT, EQ or UN if NaN.
 346  */
 347 static INLINE GLuint
 348 generate_cc( float value )
 349 {
 350    if (value != value)
 351       return COND_UN;  /* NaN */
 352    if (value > 0.0F)
 353       return COND_GT;
 354    if (value < 0.0F)
 355       return COND_LT;
 356    return COND_EQ;
 357 }
 358
 359 /*
 360  * Test if the ccMaskRule is satisfied by the given condition code.
 361  * Used to mask destination writes according to the current condition codee.
 362  */
 363 static INLINE GLboolean
 364 test_cc(GLuint condCode, GLuint ccMaskRule)
 365 {
 366    switch (ccMaskRule) {
 367    case COND_EQ: return (condCode == COND_EQ);
 368    case COND_NE: return (condCode != COND_EQ);
 369    case COND_LT: return (condCode == COND_LT);
 370    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 371    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 372    case COND_GT: return (condCode == COND_GT);
 373    case COND_TR: return GL_TRUE;
 374    case COND_FL: return GL_FALSE;
 375    default:      return GL_TRUE;
 376    }
 377 }
 378
 379
 380 /**
 381  * Store 4 floats into a register.  Observe the instructions saturate and
 382  * set-condition-code flags.
 383  */
 384 static void
 385 store_vector4( const struct fp_instruction *inst,
 386                struct fp_machine *machine,
 387                const GLfloat value[4] )
 388 {
 389    const struct fp_dst_register *dest = &(inst->DstReg);
 390    const GLboolean clamp = inst->Saturate;
 391    const GLboolean updateCC = inst->UpdateCondRegister;
 392    GLfloat *dstReg;
 393    GLfloat clampedValue[4];
 394    const GLboolean *writeMask = dest->WriteMask;
 395    GLboolean condWriteMask[4];
 396
 397    switch (dest->File) {
 398       case PROGRAM_OUTPUT:
 399          dstReg = machine->Outputs[dest->Index];
 400          break;
 401       case PROGRAM_TEMPORARY:
 402          dstReg = machine->Temporaries[dest->Index];
 403          break;
 404       default:
 405          _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 406          return;
 407    }
 408
 409 #if DEBUG_FRAG
 410    if (value[0] > 1.0e10 ||
 411        IS_INF_OR_NAN(value[0]) ||
 412        IS_INF_OR_NAN(value[1]) ||
 413        IS_INF_OR_NAN(value[2]) ||
 414        IS_INF_OR_NAN(value[3])  )
 415       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 416 #endif
 417
 418    if (clamp) {
 419       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 420       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 421       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 422       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 423       value = clampedValue;
 424    }
 425
 426    if (dest->CondMask != COND_TR) {
 427       condWriteMask[0] = writeMask[0]
 428          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 429       condWriteMask[1] = writeMask[1]
 430          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 431       condWriteMask[2] = writeMask[2]
 432          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 433       condWriteMask[3] = writeMask[3]
 434          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 435       writeMask = condWriteMask;
 436    }
 437
 438    if (writeMask[0]) {
 439       dstReg[0] = value[0];
 440       if (updateCC)
 441          machine->CondCodes[0] = generate_cc(value[0]);
 442    }
 443    if (writeMask[1]) {
 444       dstReg[1] = value[1];
 445       if (updateCC)
 446          machine->CondCodes[1] = generate_cc(value[1]);
 447    }
 448    if (writeMask[2]) {
 449       dstReg[2] = value[2];
 450       if (updateCC)
 451          machine->CondCodes[2] = generate_cc(value[2]);
 452    }
 453    if (writeMask[3]) {
 454       dstReg[3] = value[3];
 455       if (updateCC)
 456          machine->CondCodes[3] = generate_cc(value[3]);
 457    }
 458 }
 459
 460
 461 /**
 462  * Initialize a new machine state instance from an existing one, adding
 463  * the partial derivatives onto the input registers.
 464  * Used to implement DDX and DDY instructions in non-trivial cases.
 465  */
 466 static void
 467 init_machine_deriv( GLcontext *ctx,
 468                     const struct fp_machine *machine,
 469                     const struct fragment_program *program,
 470                     const struct sw_span *span, char xOrY,
 471                     struct fp_machine *dMachine )
 472 {
 473    GLuint u;
 474
 475    ASSERT(xOrY == 'X' || xOrY == 'Y');
 476
 477    /* copy existing machine */
 478    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 479
 480    /* Clear temporary registers */
 481    _mesa_bzero( (void*) machine->Temporaries,
 482                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 483
 484    /* Add derivatives */
 485    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 486       GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
 487       if (xOrY == 'X') {
 488          wpos[0] += 1.0F;
 489          wpos[1] += 0.0F;
 490          wpos[2] += span->dzdx;
 491          wpos[3] += span->dwdx;
 492       }
 493       else {
 494          wpos[0] += 0.0F;
 495          wpos[1] += 1.0F;
 496          wpos[2] += span->dzdy;
 497          wpos[3] += span->dwdy;
 498       }
 499    }
 500    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 501       GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
 502       if (xOrY == 'X') {
 503          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 504          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 505          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 506          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 507       }
 508       else {
 509          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 510          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 511          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 512          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 513       }
 514    }
 515    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 516       GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
 517       if (xOrY == 'X') {
 518          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 519          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 520          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 521          col1[3] += 0.0; /*XXX fix */
 522       }
 523       else {
 524          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 525          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 526          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 527          col1[3] += 0.0; /*XXX fix */
 528       }
 529    }
 530    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 531       GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
 532       if (xOrY == 'X') {
 533          fogc[0] += span->dfogdx;
 534       }
 535       else {
 536          fogc[0] += span->dfogdy;
 537       }
 538    }
 539    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 540       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 541          GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
 542          if (xOrY == 'X') {
 543             tex[0] += span->texStepX[u][0];
 544             tex[1] += span->texStepX[u][1];
 545             tex[2] += span->texStepX[u][2];
 546             tex[3] += span->texStepX[u][3];
 547          }
 548          else {
 549             tex[0] += span->texStepY[u][0];
 550             tex[1] += span->texStepY[u][1];
 551             tex[2] += span->texStepY[u][2];
 552             tex[3] += span->texStepY[u][3];
 553          }
 554       }
 555    }
 556
 557    /* init condition codes */
 558    dMachine->CondCodes[0] = COND_EQ;
 559    dMachine->CondCodes[1] = COND_EQ;
 560    dMachine->CondCodes[2] = COND_EQ;
 561    dMachine->CondCodes[3] = COND_EQ;
 562 }
 563
 564
 565 /**
 566  * Execute the given vertex program.
 567  * NOTE: we do everything in single-precision floating point; we don't
 568  * currently observe the single/half/fixed-precision qualifiers.
 569  * \param ctx - rendering context
 570  * \param program - the fragment program to execute
 571  * \param machine - machine state (register file)
 572  * \param maxInst - max number of instructions to execute
 573  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 574  */
 575 static GLboolean
 576 execute_program( GLcontext *ctx,
 577                  const struct fragment_program *program, GLuint maxInst,
 578                  struct fp_machine *machine, const struct sw_span *span,
 579                  GLuint column )
 580 {
 581    GLuint pc;
 582
 583 #if DEBUG_FRAG
 584    printf("execute fragment program --------------------\n");
 585 #endif
 586
 587    for (pc = 0; pc < maxInst; pc++) {
 588       const struct fp_instruction *inst = program->Instructions + pc;
 589
 590       if (ctx->FragmentProgram.CallbackEnabled &&
 591           ctx->FragmentProgram.Callback) {
 592          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 593          ctx->FragmentProgram.Callback(program->Base.Target,
 594                                        ctx->FragmentProgram.CallbackData);
 595       }
 596
 597       switch (inst->Opcode) {
 598          case FP_OPCODE_ABS:
 599             {
 600                GLfloat a[4], result[4];
 601                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 602                result[0] = FABSF(a[0]);
 603                result[1] = FABSF(a[1]);
 604                result[2] = FABSF(a[2]);
 605                result[3] = FABSF(a[3]);
 606                store_vector4( inst, machine, result );
 607             }
 608             break;
 609          case FP_OPCODE_ADD:
 610             {
 611                GLfloat a[4], b[4], result[4];
 612                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 613                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 614                result[0] = a[0] + b[0];
 615                result[1] = a[1] + b[1];
 616                result[2] = a[2] + b[2];
 617                result[3] = a[3] + b[3];
 618                store_vector4( inst, machine, result );
 619             }
 620             break;
 621          case FP_OPCODE_CMP:
 622             {
 623                GLfloat a[4], b[4], c[4], result[4];
 624                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 625                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 626                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 627                result[0] = a[0] < 0.0F ? b[0] : c[0];
 628                result[1] = a[1] < 0.0F ? b[1] : c[1];
 629                result[2] = a[2] < 0.0F ? b[2] : c[2];
 630                result[3] = a[3] < 0.0F ? b[3] : c[3];
 631                store_vector4( inst, machine, result );
 632             }
 633             break;
 634          case FP_OPCODE_COS:
 635             {
 636                GLfloat a[4], result[4];
 637                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 638                result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
 639                store_vector4( inst, machine, result );
 640             }
 641             break;
 642          case FP_OPCODE_DDX: /* Partial derivative with respect to X */
 643             {
 644                GLfloat a[4], aNext[4], result[4];
 645                struct fp_machine dMachine;
 646                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
 647                   /* This is tricky.  Make a copy of the current machine state,
 648                    * increment the input registers by the dx or dy partial
 649                    * derivatives, then re-execute the program up to the
 650                    * preceeding instruction, then fetch the source register.
 651                    * Finally, find the difference in the register values for
 652                    * the original and derivative runs.
 653                    */
 654                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 655                   init_machine_deriv(ctx, machine, program, span,
 656                                      'X', &dMachine);
 657                   execute_program(ctx, program, pc, &dMachine, span, column);
 658                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 659                   result[0] = aNext[0] - a[0];
 660                   result[1] = aNext[1] - a[1];
 661                   result[2] = aNext[2] - a[2];
 662                   result[3] = aNext[3] - a[3];
 663                }
 664                store_vector4( inst, machine, result );
 665             }
 666             break;
 667          case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
 668             {
 669                GLfloat a[4], aNext[4], result[4];
 670                struct fp_machine dMachine;
 671                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
 672                   init_machine_deriv(ctx, machine, program, span,
 673                                      'Y', &dMachine);
 674                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 675                   execute_program(ctx, program, pc, &dMachine, span, column);
 676                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 677                   result[0] = aNext[0] - a[0];
 678                   result[1] = aNext[1] - a[1];
 679                   result[2] = aNext[2] - a[2];
 680                   result[3] = aNext[3] - a[3];
 681                }
 682                store_vector4( inst, machine, result );
 683             }
 684             break;
 685          case FP_OPCODE_DP3:
 686             {
 687                GLfloat a[4], b[4], result[4];
 688                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 689                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 690                result[0] = result[1] = result[2] = result[3] =
 691                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
 692                store_vector4( inst, machine, result );
 693 #if DEBUG_FRAG
 694                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 695                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 696 #endif
 697             }
 698             break;
 699          case FP_OPCODE_DP4:
 700             {
 701                GLfloat a[4], b[4], result[4];
 702                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 703                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 704                result[0] = result[1] = result[2] = result[3] =
 705                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 706                store_vector4( inst, machine, result );
 707             }
 708             break;
 709          case FP_OPCODE_DPH:
 710             {
 711                GLfloat a[4], b[4], result[4];
 712                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 713                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 714                result[0] = result[1] = result[2] = result[3] =
 715                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
 716                store_vector4( inst, machine, result );
 717             }
 718             break;
 719          case FP_OPCODE_DST: /* Distance vector */
 720             {
 721                GLfloat a[4], b[4], result[4];
 722                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 723                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 724                result[0] = 1.0F;
 725                result[1] = a[1] * b[1];
 726                result[2] = a[2];
 727                result[3] = b[3];
 728                store_vector4( inst, machine, result );
 729             }
 730             break;
 731          case FP_OPCODE_EX2: /* Exponential base 2 */
 732             {
 733                GLfloat a[4], result[4];
 734                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 735                result[0] = result[1] = result[2] = result[3] =
 736                   (GLfloat) _mesa_pow(2.0, a[0]);
 737                store_vector4( inst, machine, result );
 738             }
 739             break;
 740          case FP_OPCODE_FLR:
 741             {
 742                GLfloat a[4], result[4];
 743                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 744                result[0] = FLOORF(a[0]);
 745                result[1] = FLOORF(a[1]);
 746                result[2] = FLOORF(a[2]);
 747                result[3] = FLOORF(a[3]);
 748                store_vector4( inst, machine, result );
 749             }
 750             break;
 751          case FP_OPCODE_FRC:
 752             {
 753                GLfloat a[4], result[4];
 754                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 755                result[0] = a[0] - FLOORF(a[0]);
 756                result[1] = a[1] - FLOORF(a[1]);
 757                result[2] = a[2] - FLOORF(a[2]);
 758                result[3] = a[3] - FLOORF(a[3]);
 759                store_vector4( inst, machine, result );
 760             }
 761             break;
 762          case FP_OPCODE_KIL:
 763             {
 764                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 765                const GLuint condMask = inst->DstReg.CondMask;
 766                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 767                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 768                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 769                    test_cc(machine->CondCodes[swizzle[3]], condMask)) {
 770                   return GL_FALSE;
 771                }
 772             }
 773             break;
 774          case FP_OPCODE_LG2:  /* log base 2 */
 775             {
 776                GLfloat a[4], result[4];
 777                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 778                result[0] = result[1] = result[2] = result[3]
 779                   = LOG2(a[0]);
 780                store_vector4( inst, machine, result );
 781             }
 782             break;
 783          case FP_OPCODE_LIT:
 784             {
 785                GLfloat a[4], result[4];
 786                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 787                if (a[0] < 0.0F)
 788                   a[0] = 0.0F;
 789                if (a[1] < 0.0F)
 790                   a[1] = 0.0F;
 791                result[0] = 1.0F;
 792                result[1] = a[0];
 793                result[2] = (a[0] > 0.0F) ? (GLfloat)_mesa_pow(2.0, a[3]) : 0.0F;
 794                result[3] = 1.0F;
 795                store_vector4( inst, machine, result );
 796             }
 797             break;
 798          case FP_OPCODE_LRP:
 799             {
 800                GLfloat a[4], b[4], c[4], result[4];
 801                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 802                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 803                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 804                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 805                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 806                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 807                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 808                store_vector4( inst, machine, result );
 809             }
 810             break;
 811          case FP_OPCODE_MAD:
 812             {
 813                GLfloat a[4], b[4], c[4], result[4];
 814                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 815                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 816                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 817                result[0] = a[0] * b[0] + c[0];
 818                result[1] = a[1] * b[1] + c[1];
 819                result[2] = a[2] * b[2] + c[2];
 820                result[3] = a[3] * b[3] + c[3];
 821                store_vector4( inst, machine, result );
 822             }
 823             break;
 824          case FP_OPCODE_MAX:
 825             {
 826                GLfloat a[4], b[4], result[4];
 827                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 828                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 829                result[0] = MAX2(a[0], b[0]);
 830                result[1] = MAX2(a[1], b[1]);
 831                result[2] = MAX2(a[2], b[2]);
 832                result[3] = MAX2(a[3], b[3]);
 833                store_vector4( inst, machine, result );
 834             }
 835             break;
 836          case FP_OPCODE_MIN:
 837             {
 838                GLfloat a[4], b[4], result[4];
 839                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 840                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 841                result[0] = MIN2(a[0], b[0]);
 842                result[1] = MIN2(a[1], b[1]);
 843                result[2] = MIN2(a[2], b[2]);
 844                result[3] = MIN2(a[3], b[3]);
 845                store_vector4( inst, machine, result );
 846             }
 847             break;
 848          case FP_OPCODE_MOV:
 849             {
 850                GLfloat result[4];
 851                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
 852                store_vector4( inst, machine, result );
 853             }
 854             break;
 855          case FP_OPCODE_MUL:
 856             {
 857                GLfloat a[4], b[4], result[4];
 858                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 859                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 860                result[0] = a[0] * b[0];
 861                result[1] = a[1] * b[1];
 862                result[2] = a[2] * b[2];
 863                result[3] = a[3] * b[3];
 864                store_vector4( inst, machine, result );
 865 #if DEBUG_FRAG
 866                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 867                       result[0], result[1], result[2], result[3],
 868                       a[0], a[1], a[2], a[3],
 869                       b[0], b[1], b[2], b[3]);
 870 #endif
 871             }
 872             break;
 873          case FP_OPCODE_PK2H: /* pack two 16-bit floats */
 874             /* XXX this is probably wrong */
 875             {
 876                GLfloat a[4], result[4];
 877                const GLuint *rawBits = (const GLuint *) a;
 878                GLuint *rawResult = (GLuint *) result;
 879                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 880                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 881                   = rawBits[0] | (rawBits[1] << 16);
 882                store_vector4( inst, machine, result );
 883             }
 884             break;
 885          case FP_OPCODE_PK2US: /* pack two GLushorts */
 886             {
 887                GLfloat a[4], result[4];
 888                GLuint usx, usy, *rawResult = (GLuint *) result;
 889                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 890                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 891                a[1] = CLAMP(a[0], 0.0F, 1.0F);
 892                usx = IROUND(a[0] * 65535.0F);
 893                usy = IROUND(a[1] * 65535.0F);
 894                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 895                   = usx | (usy << 16);
 896                store_vector4( inst, machine, result );
 897             }
 898             break;
 899          case FP_OPCODE_PK4B: /* pack four GLbytes */
 900             {
 901                GLfloat a[4], result[4];
 902                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 903                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 904                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
 905                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
 906                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
 907                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
 908                ubx = IROUND(127.0F * a[0] + 128.0F);
 909                uby = IROUND(127.0F * a[1] + 128.0F);
 910                ubz = IROUND(127.0F * a[2] + 128.0F);
 911                ubw = IROUND(127.0F * a[3] + 128.0F);
 912                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 913                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 914                store_vector4( inst, machine, result );
 915             }
 916             break;
 917          case FP_OPCODE_PK4UB: /* pack four GLubytes */
 918             {
 919                GLfloat a[4], result[4];
 920                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 921                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 922                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 923                a[1] = CLAMP(a[1], 0.0F, 1.0F);
 924                a[2] = CLAMP(a[2], 0.0F, 1.0F);
 925                a[3] = CLAMP(a[3], 0.0F, 1.0F);
 926                ubx = IROUND(255.0F * a[0]);
 927                uby = IROUND(255.0F * a[1]);
 928                ubz = IROUND(255.0F * a[2]);
 929                ubw = IROUND(255.0F * a[3]);
 930                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 931                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 932                store_vector4( inst, machine, result );
 933             }
 934             break;
 935          case FP_OPCODE_POW:
 936             {
 937                GLfloat a[4], b[4], result[4];
 938                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 939                fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
 940                result[0] = result[1] = result[2] = result[3]
 941                   = (GLfloat)_mesa_pow(a[0], b[0]);
 942                store_vector4( inst, machine, result );
 943             }
 944             break;
 945          case FP_OPCODE_RCP:
 946             {
 947                GLfloat a[4], result[4];
 948                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 949 #if DEBUG_FRAG
 950                if (a[0] == 0)
 951                   printf("RCP(0)\n");
 952                else if (IS_INF_OR_NAN(a[0]))
 953                   printf("RCP(inf)\n");
 954 #endif
 955                result[0] = result[1] = result[2] = result[3]
 956                   = 1.0F / a[0];
 957                store_vector4( inst, machine, result );
 958             }
 959             break;
 960          case FP_OPCODE_RFL:
 961             {
 962                GLfloat axis[4], dir[4], result[4], tmp[4];
 963                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
 964                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
 965                tmp[3] = axis[0] * axis[0]
 966                       + axis[1] * axis[1]
 967                       + axis[2] * axis[2];
 968                tmp[0] = (2.0F * (axis[0] * dir[0] +
 969                                  axis[1] * dir[1] +
 970                                  axis[2] * dir[2])) / tmp[3];
 971                result[0] = tmp[0] * axis[0] - dir[0];
 972                result[1] = tmp[0] * axis[1] - dir[1];
 973                result[2] = tmp[0] * axis[2] - dir[2];
 974                /* result[3] is never written! XXX enforce in parser! */
 975                store_vector4( inst, machine, result );
 976             }
 977             break;
 978          case FP_OPCODE_RSQ: /* 1 / sqrt() */
 979             {
 980                GLfloat a[4], result[4];
 981                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 982                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
 983                store_vector4( inst, machine, result );
 984 #if DEBUG_FRAG
 985                printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
 986 #endif
 987             }
 988             break;
 989          case FP_OPCODE_SCS: /* sine and cos */
 990             {
 991                GLfloat a[4], result[4];
 992                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 993                result[0] = cos(a[0]);
 994                result[1] = sin(a[0]);
 995                result[2] = 0.0;  /* undefined! */
 996                result[3] = 0.0;  /* undefined! */
 997                store_vector4( inst, machine, result );
 998             }
 999             break;
1000          case FP_OPCODE_SEQ: /* set on equal */
1001             {
1002                GLfloat a[4], b[4], result[4];
1003                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1004                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1005                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1006                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1007                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1008                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1009                store_vector4( inst, machine, result );
1010             }
1011             break;
1012          case FP_OPCODE_SFL: /* set false, operands ignored */
1013             {
1014                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1015                store_vector4( inst, machine, result );
1016             }
1017             break;
1018          case FP_OPCODE_SGE: /* set on greater or equal */
1019             {
1020                GLfloat a[4], b[4], result[4];
1021                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1022                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1023                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1024                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1025                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1026                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1027                store_vector4( inst, machine, result );
1028             }
1029             break;
1030          case FP_OPCODE_SGT: /* set on greater */
1031             {
1032                GLfloat a[4], b[4], result[4];
1033                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1034                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1035                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1036                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1037                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1038                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1039                store_vector4( inst, machine, result );
1040             }
1041             break;
1042          case FP_OPCODE_SIN:
1043             {
1044                GLfloat a[4], result[4];
1045                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1046                result[0] = result[1] = result[2] =
1047                        result[3] = (GLfloat)_mesa_sin(a[0]);
1048                store_vector4( inst, machine, result );
1049             }
1050             break;
1051          case FP_OPCODE_SLE: /* set on less or equal */
1052             {
1053                GLfloat a[4], b[4], result[4];
1054                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1055                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1056                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1057                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1058                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1059                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1060                store_vector4( inst, machine, result );
1061             }
1062             break;
1063          case FP_OPCODE_SLT: /* set on less */
1064             {
1065                GLfloat a[4], b[4], result[4];
1066                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1067                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1068                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1069                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1070                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1071                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1072                store_vector4( inst, machine, result );
1073             }
1074             break;
1075          case FP_OPCODE_SNE: /* set on not equal */
1076             {
1077                GLfloat a[4], b[4], result[4];
1078                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1079                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1080                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1081                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1082                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1083                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1084                store_vector4( inst, machine, result );
1085             }
1086             break;
1087          case FP_OPCODE_STR: /* set true, operands ignored */
1088             {
1089                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1090                store_vector4( inst, machine, result );
1091             }
1092             break;
1093          case FP_OPCODE_SUB:
1094             {
1095                GLfloat a[4], b[4], result[4];
1096                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1097                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1098                result[0] = a[0] - b[0];
1099                result[1] = a[1] - b[1];
1100                result[2] = a[2] - b[2];
1101                result[3] = a[3] - b[3];
1102                store_vector4( inst, machine, result );
1103             }
1104             break;
1105          case FP_OPCODE_SWZ:
1106             {
1107                /* XXX to do: extended swizzle */
1108             }
1109             break;
1110          case FP_OPCODE_TEX:
1111             /* Texel lookup */
1112             {
1113                GLfloat texcoord[4], color[4];
1114                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1115                /* XXX: Undo perspective divide from interpolate_texcoords() */
1116                fetch_texel( ctx, texcoord,
1117                             span->array->lambda[inst->TexSrcUnit][column],
1118                             inst->TexSrcUnit, color );
1119                store_vector4( inst, machine, color );
1120             }
1121             break;
1122          case FP_OPCODE_TXB:
1123             /* Texel lookup with LOD bias */
1124             {
1125                GLfloat texcoord[4], color[4];
1126                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1127                /* XXX: apply bias from texcoord[3]!!! */
1128                fetch_texel( ctx, texcoord,
1129                             span->array->lambda[inst->TexSrcUnit][column],
1130                             inst->TexSrcUnit, color );
1131                store_vector4( inst, machine, color );
1132             }
1133             break;
1134          case FP_OPCODE_TXD:
1135             /* Texture lookup w/ partial derivatives for LOD */
1136             {
1137                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1138                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1139                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1140                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1141                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1142                                   color );
1143                store_vector4( inst, machine, color );
1144             }
1145             break;
1146          case FP_OPCODE_TXP:
1147             /* Texture lookup w/ perspective divide */
1148             {
1149                GLfloat texcoord[4], color[4];
1150                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1151                /* Already did perspective divide in interpolate_texcoords() */
1152                fetch_texel( ctx, texcoord,
1153                             span->array->lambda[inst->TexSrcUnit][column],
1154                             inst->TexSrcUnit, color );
1155                store_vector4( inst, machine, color );
1156             }
1157             break;
1158          case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1159             /* XXX this is probably wrong */
1160             {
1161                GLfloat a[4], result[4];
1162                const GLuint *rawBits = (const GLuint *) a;
1163                GLuint *rawResult = (GLuint *) result;
1164                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1165                rawResult[0] = rawBits[0] & 0xffff;
1166                rawResult[1] = (rawBits[0] >> 16) & 0xffff;
1167                rawResult[2] = rawBits[0] & 0xffff;
1168                rawResult[3] = (rawBits[0] >> 16) & 0xffff;
1169                store_vector4( inst, machine, result );
1170             }
1171             break;
1172          case FP_OPCODE_UP2US: /* unpack two GLushorts */
1173             {
1174                GLfloat a[4], result[4];
1175                const GLuint *rawBits = (const GLuint *) a;
1176                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1177                result[0] = (GLfloat) ((rawBits[0] >>  0) & 0xffff) / 65535.0F;
1178                result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
1179                result[2] = result[0];
1180                result[3] = result[1];
1181                store_vector4( inst, machine, result );
1182             }
1183             break;
1184          case FP_OPCODE_UP4B: /* unpack four GLbytes */
1185             {
1186                GLfloat a[4], result[4];
1187                const GLuint *rawBits = (const GLuint *) a;
1188                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1189                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1190                result[0] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1191                result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1192                result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1193                store_vector4( inst, machine, result );
1194             }
1195             break;
1196          case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1197             {
1198                GLfloat a[4], result[4];
1199                const GLuint *rawBits = (const GLuint *) a;
1200                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1201                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1202                result[0] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1203                result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1204                result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1205                store_vector4( inst, machine, result );
1206             }
1207             break;
1208          case FP_OPCODE_X2D: /* 2-D matrix transform */
1209             {
1210                GLfloat a[4], b[4], c[4], result[4];
1211                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1212                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1213                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1214                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1215                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1216                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1217                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1218                store_vector4( inst, machine, result );
1219             }
1220             break;
1221          case FP_OPCODE_END:
1222             return GL_TRUE;
1223          default:
1224             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1225                           inst->Opcode);
1226             return GL_TRUE; /* return value doesn't matter */
1227       }
1228    }
1229    return GL_TRUE;
1230 }
1231
1232
1233 static void
1234 init_machine( GLcontext *ctx, struct fp_machine *machine,
1235               const struct fragment_program *program,
1236               const struct sw_span *span, GLuint col )
1237 {
1238    GLuint inputsRead = program->InputsRead;
1239    GLuint u;
1240
1241    if (ctx->FragmentProgram.CallbackEnabled)
1242       inputsRead = ~0;
1243
1244    /* Clear temporary registers */
1245    _mesa_bzero(machine->Temporaries,
1246                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1247
1248    /* Load input registers */
1249    if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1250       GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1251       wpos[0] = (GLfloat) span->x + col;
1252       wpos[1] = (GLfloat) span->y;
1253       wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1254       wpos[3] = span->w + col * span->dwdx;
1255    }
1256    if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1257       GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1258       col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1259       col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1260       col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1261       col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1262    }
1263    if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1264       GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1265       col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1266       col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1267       col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1268       col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1269    }
1270    if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1271       GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1272       fogc[0] = span->array->fog[col];
1273       fogc[1] = 0.0F;
1274       fogc[2] = 0.0F;
1275       fogc[3] = 0.0F;
1276    }
1277    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1278       if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1279          GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1280          /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1281          COPY_4V(tex, span->array->texcoords[u][col]);
1282          /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1283       }
1284    }
1285
1286    /* init condition codes */
1287    machine->CondCodes[0] = COND_EQ;
1288    machine->CondCodes[1] = COND_EQ;
1289    machine->CondCodes[2] = COND_EQ;
1290    machine->CondCodes[3] = COND_EQ;
1291 }
1292
1293
1294 void
1295 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1296 {
1297    const struct fragment_program *program = ctx->FragmentProgram.Current;
1298    GLuint i;
1299
1300    ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1301
1302    for (i = 0; i < span->end; i++) {
1303       if (span->array->mask[i]) {
1304          init_machine(ctx, &ctx->FragmentProgram.Machine,
1305                       ctx->FragmentProgram.Current, span, i);
1306
1307          if (!execute_program(ctx, program, ~0,
1308                               &ctx->FragmentProgram.Machine, span, i)) {
1309             span->array->mask[i] = GL_FALSE;  /* killed fragment */
1310          }
1311
1312          /* Store output registers */
1313          {
1314             const GLfloat *colOut
1315                = ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_COLR];
1316             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1317             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1318             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1319             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1320          }
1321          /* depth value */
1322          if (program->OutputsWritten & (1 << FRAG_OUTPUT_DEPR))
1323             span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_DEPR][0] * ctx->DepthMaxF);
1324       }
1325    }
1326
1327    ctx->_CurrentProgram = 0;
1328 }
1329