src/mesa/swrast/s_nvfragprog.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  5.1
   4  *
   5  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25
  26 #include "glheader.h"
  27 #include "colormac.h"
  28 #include "context.h"
  29 #include "nvfragprog.h"
  30 #include "macros.h"
  31
  32 #include "s_nvfragprog.h"
  33 #include "s_span.h"
  34 #include "s_texture.h"
  35
  36
  37 /* if 1, print some debugging info */
  38 #define DEBUG_FRAG 0
  39
  40
  41 /**
  42  * Fetch a texel.
  43  */
  44 static void
  45 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
  46              GLuint unit, GLfloat color[4] )
  47 {
  48    GLchan rgba[4];
  49    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  50
  51    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  52                                1, (const GLfloat (*)[4]) texcoord,
  53                                &lambda, &rgba);
  54    color[0] = CHAN_TO_FLOAT(rgba[0]);
  55    color[1] = CHAN_TO_FLOAT(rgba[1]);
  56    color[2] = CHAN_TO_FLOAT(rgba[2]);
  57    color[3] = CHAN_TO_FLOAT(rgba[3]);
  58 }
  59
  60
  61 /**
  62  * Fetch a texel with the given partial derivatives to compute a level
  63  * of detail in the mipmap.
  64  */
  65 static void
  66 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
  67                    const GLfloat texdx[4], const GLfloat texdy[4],
  68                    GLuint unit, GLfloat color[4] )
  69 {
  70    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  71    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
  72    const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
  73    const GLfloat texW = (GLfloat) texImg->WidthScale;
  74    const GLfloat texH = (GLfloat) texImg->HeightScale;
  75    GLchan rgba[4];
  76
  77    GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
  78                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
  79                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
  80                                          texW, texH,
  81                                          texcoord[0], texcoord[1], texcoord[3],
  82                                          1.0F / texcoord[3]);
  83
  84    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  85                                1, (const GLfloat (*)[4]) texcoord,
  86                                &lambda, &rgba);
  87    color[0] = CHAN_TO_FLOAT(rgba[0]);
  88    color[1] = CHAN_TO_FLOAT(rgba[1]);
  89    color[2] = CHAN_TO_FLOAT(rgba[2]);
  90    color[3] = CHAN_TO_FLOAT(rgba[3]);
  91 }
  92
  93
  94
  95 /**
  96  * Fetch a 4-element float vector from the given source register.
  97  * Apply swizzling and negating as needed.
  98  */
  99 static void
 100 fetch_vector4( GLcontext *ctx,
 101                const struct fp_src_register *source,
 102                struct fp_machine *machine,
 103                const struct fragment_program *program,
 104                GLfloat result[4] )
 105 {
 106    const GLfloat *src;
 107
 108    switch (source->File) {
 109       case PROGRAM_TEMPORARY:
 110          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
 111          src = machine->Temporaries[source->Index];
 112          break;
 113       case PROGRAM_INPUT:
 114          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
 115          src = machine->Inputs[source->Index];
 116          break;
 117       case PROGRAM_LOCAL_PARAM:
 118          ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 119          src = program->Base.LocalParams[source->Index];
 120          break;
 121       case PROGRAM_ENV_PARAM:
 122          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
 123          src = ctx->FragmentProgram.Parameters[source->Index];
 124          break;
 125       case PROGRAM_NAMED_PARAM:
 126          ASSERT(source->Index < program->NumParameters);
 127          src = program->Parameters[source->Index].Values;
 128          break;
 129       case PROGRAM_STATE_VAR:
 130          abort();
 131       default:
 132          _mesa_problem(ctx, "Invalid input register file in fetch_vector4");
 133          return;
 134    }
 135
 136    result[0] = src[source->Swizzle[0]];
 137    result[1] = src[source->Swizzle[1]];
 138    result[2] = src[source->Swizzle[2]];
 139    result[3] = src[source->Swizzle[3]];
 140
 141    if (source->NegateBase) {
 142       result[0] = -result[0];
 143       result[1] = -result[1];
 144       result[2] = -result[2];
 145       result[3] = -result[3];
 146    }
 147    if (source->Abs) {
 148       result[0] = FABSF(result[0]);
 149       result[1] = FABSF(result[1]);
 150       result[2] = FABSF(result[2]);
 151       result[3] = FABSF(result[3]);
 152    }
 153    if (source->NegateAbs) {
 154       result[0] = -result[0];
 155       result[1] = -result[1];
 156       result[2] = -result[2];
 157       result[3] = -result[3];
 158    }
 159 }
 160
 161
 162 /**
 163  * Fetch the derivative with respect to X for the given register.
 164  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 165  * need to execute another instance of the program (ugh)!
 166  */
 167 static GLboolean
 168 fetch_vector4_deriv( const struct fp_src_register *source,
 169                      const struct sw_span *span,
 170                      char xOrY, GLfloat result[4] )
 171 {
 172    GLfloat src[4];
 173
 174    ASSERT(xOrY == 'X' || xOrY == 'Y');
 175
 176    assert(source->File == PROGRAM_INPUT);
 177
 178    switch (source->Index) {
 179    case FRAG_ATTRIB_WPOS:
 180       if (xOrY == 'X') {
 181          src[0] = 1.0;
 182          src[1] = 0.0;
 183          src[2] = span->dzdx;
 184          src[3] = span->dwdx;
 185       }
 186       else {
 187          src[0] = 0.0;
 188          src[1] = 1.0;
 189          src[2] = span->dzdy;
 190          src[3] = span->dwdy;
 191       }
 192       break;
 193    case FRAG_ATTRIB_COL0:
 194       if (xOrY == 'X') {
 195          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 196          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 197          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 198          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 199       }
 200       else {
 201          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 202          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 203          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 204          src[3] = span->dady * (1.0F / CHAN_MAXF);
 205       }
 206       break;
 207    case FRAG_ATTRIB_COL1:
 208       if (xOrY == 'X') {
 209          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 210          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 211          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 212          src[3] = 0.0; /* XXX need this */
 213       }
 214       else {
 215          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 216          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 217          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 218          src[3] = 0.0; /* XXX need this */
 219       }
 220       break;
 221    case FRAG_ATTRIB_FOGC:
 222       if (xOrY == 'X') {
 223          src[0] = span->dfogdx;
 224          src[1] = 0.0;
 225          src[2] = 0.0;
 226          src[3] = 0.0;
 227       }
 228       else {
 229          src[0] = span->dfogdy;
 230          src[1] = 0.0;
 231          src[2] = 0.0;
 232          src[3] = 0.0;
 233       }
 234       break;
 235    case FRAG_ATTRIB_TEX0:
 236    case FRAG_ATTRIB_TEX1:
 237    case FRAG_ATTRIB_TEX2:
 238    case FRAG_ATTRIB_TEX3:
 239    case FRAG_ATTRIB_TEX4:
 240    case FRAG_ATTRIB_TEX5:
 241    case FRAG_ATTRIB_TEX6:
 242    case FRAG_ATTRIB_TEX7:
 243       if (xOrY == 'X') {
 244          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 245          src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
 246          src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
 247          src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
 248          src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
 249       }
 250       else {
 251          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 252          src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
 253          src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
 254          src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
 255          src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
 256       }
 257       break;
 258    default:
 259       return GL_FALSE;
 260    }
 261
 262    result[0] = src[source->Swizzle[0]];
 263    result[1] = src[source->Swizzle[1]];
 264    result[2] = src[source->Swizzle[2]];
 265    result[3] = src[source->Swizzle[3]];
 266
 267    if (source->NegateBase) {
 268       result[0] = -result[0];
 269       result[1] = -result[1];
 270       result[2] = -result[2];
 271       result[3] = -result[3];
 272    }
 273    if (source->Abs) {
 274       result[0] = FABSF(result[0]);
 275       result[1] = FABSF(result[1]);
 276       result[2] = FABSF(result[2]);
 277       result[3] = FABSF(result[3]);
 278    }
 279    if (source->NegateAbs) {
 280       result[0] = -result[0];
 281       result[1] = -result[1];
 282       result[2] = -result[2];
 283       result[3] = -result[3];
 284    }
 285    return GL_TRUE;
 286 }
 287
 288
 289 /**
 290  * As above, but only return result[0] element.
 291  */
 292 static void
 293 fetch_vector1( GLcontext *ctx,
 294                const struct fp_src_register *source,
 295                const struct fp_machine *machine,
 296                const struct fragment_program *program,
 297                GLfloat result[4] )
 298 {
 299    const GLfloat *src;
 300
 301    switch (source->File) {
 302       case PROGRAM_TEMPORARY:
 303          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
 304          src = machine->Temporaries[source->Index];
 305          break;
 306       case PROGRAM_INPUT:
 307          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
 308          src = machine->Inputs[source->Index];
 309          break;
 310       case PROGRAM_LOCAL_PARAM:
 311          ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 312          src = program->Base.LocalParams[source->Index];
 313          break;
 314       case PROGRAM_ENV_PARAM:
 315          ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
 316          src = ctx->FragmentProgram.Parameters[source->Index];
 317          break;
 318       case PROGRAM_NAMED_PARAM:
 319          ASSERT(source->Index < program->NumParameters);
 320          src = program->Parameters[source->Index].Values;
 321          break;
 322       case PROGRAM_STATE_VAR:
 323          abort();
 324       default:
 325          _mesa_problem(ctx, "Invalid input register file in fetch_vector1");
 326          return;
 327    }
 328
 329    result[0] = src[source->Swizzle[0]];
 330
 331    if (source->NegateBase) {
 332       result[0] = -result[0];
 333    }
 334    if (source->Abs) {
 335       result[0] = FABSF(result[0]);
 336    }
 337    if (source->NegateAbs) {
 338       result[0] = -result[0];
 339    }
 340 }
 341
 342
 343 /*
 344  * Test value against zero and return GT, LT, EQ or UN if NaN.
 345  */
 346 static INLINE GLuint
 347 generate_cc( float value )
 348 {
 349    if (value != value)
 350       return COND_UN;  /* NaN */
 351    if (value > 0.0F)
 352       return COND_GT;
 353    if (value < 0.0F)
 354       return COND_LT;
 355    return COND_EQ;
 356 }
 357
 358 /*
 359  * Test if the ccMaskRule is satisfied by the given condition code.
 360  * Used to mask destination writes according to the current condition codee.
 361  */
 362 static INLINE GLboolean
 363 test_cc(GLuint condCode, GLuint ccMaskRule)
 364 {
 365    switch (ccMaskRule) {
 366    case COND_EQ: return (condCode == COND_EQ);
 367    case COND_NE: return (condCode != COND_EQ);
 368    case COND_LT: return (condCode == COND_LT);
 369    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 370    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 371    case COND_GT: return (condCode == COND_GT);
 372    case COND_TR: return GL_TRUE;
 373    case COND_FL: return GL_FALSE;
 374    default:      return GL_TRUE;
 375    }
 376 }
 377
 378
 379 /**
 380  * Store 4 floats into a register.  Observe the instructions saturate and
 381  * set-condition-code flags.
 382  */
 383 static void
 384 store_vector4( const struct fp_instruction *inst,
 385                struct fp_machine *machine,
 386                const GLfloat value[4] )
 387 {
 388    const struct fp_dst_register *dest = &(inst->DstReg);
 389    const GLboolean clamp = inst->Saturate;
 390    const GLboolean updateCC = inst->UpdateCondRegister;
 391    GLfloat *dstReg;
 392    GLfloat clampedValue[4];
 393    const GLboolean *writeMask = dest->WriteMask;
 394    GLboolean condWriteMask[4];
 395
 396    switch (dest->File) {
 397       case PROGRAM_OUTPUT:
 398          dstReg = machine->Outputs[dest->Index];
 399          break;
 400       case PROGRAM_TEMPORARY:
 401          dstReg = machine->Temporaries[dest->Index];
 402          break;
 403       default:
 404          _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 405          return;
 406    }
 407
 408 #if DEBUG_FRAG
 409    if (value[0] > 1.0e10 ||
 410        IS_INF_OR_NAN(value[0]) ||
 411        IS_INF_OR_NAN(value[1]) ||
 412        IS_INF_OR_NAN(value[2]) ||
 413        IS_INF_OR_NAN(value[3])  )
 414       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 415 #endif
 416
 417    if (clamp) {
 418       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 419       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 420       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 421       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 422       value = clampedValue;
 423    }
 424
 425    if (dest->CondMask != COND_TR) {
 426       condWriteMask[0] = writeMask[0]
 427          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 428       condWriteMask[1] = writeMask[1]
 429          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 430       condWriteMask[2] = writeMask[2]
 431          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 432       condWriteMask[3] = writeMask[3]
 433          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 434       writeMask = condWriteMask;
 435    }
 436
 437    if (writeMask[0]) {
 438       dstReg[0] = value[0];
 439       if (updateCC)
 440          machine->CondCodes[0] = generate_cc(value[0]);
 441    }
 442    if (writeMask[1]) {
 443       dstReg[1] = value[1];
 444       if (updateCC)
 445          machine->CondCodes[1] = generate_cc(value[1]);
 446    }
 447    if (writeMask[2]) {
 448       dstReg[2] = value[2];
 449       if (updateCC)
 450          machine->CondCodes[2] = generate_cc(value[2]);
 451    }
 452    if (writeMask[3]) {
 453       dstReg[3] = value[3];
 454       if (updateCC)
 455          machine->CondCodes[3] = generate_cc(value[3]);
 456    }
 457 }
 458
 459
 460 /**
 461  * Initialize a new machine state instance from an existing one, adding
 462  * the partial derivatives onto the input registers.
 463  * Used to implement DDX and DDY instructions in non-trivial cases.
 464  */
 465 static void
 466 init_machine_deriv( GLcontext *ctx,
 467                     const struct fp_machine *machine,
 468                     const struct fragment_program *program,
 469                     const struct sw_span *span, char xOrY,
 470                     struct fp_machine *dMachine )
 471 {
 472    GLuint u;
 473
 474    ASSERT(xOrY == 'X' || xOrY == 'Y');
 475
 476    /* copy existing machine */
 477    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 478
 479    /* Clear temporary registers */
 480    _mesa_bzero( (void*) machine->Temporaries,
 481                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 482
 483    /* Add derivatives */
 484    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 485       GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
 486       if (xOrY == 'X') {
 487          wpos[0] += 1.0F;
 488          wpos[1] += 0.0F;
 489          wpos[2] += span->dzdx;
 490          wpos[3] += span->dwdx;
 491       }
 492       else {
 493          wpos[0] += 0.0F;
 494          wpos[1] += 1.0F;
 495          wpos[2] += span->dzdy;
 496          wpos[3] += span->dwdy;
 497       }
 498    }
 499    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 500       GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
 501       if (xOrY == 'X') {
 502          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 503          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 504          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 505          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 506       }
 507       else {
 508          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 509          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 510          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 511          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 512       }
 513    }
 514    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 515       GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
 516       if (xOrY == 'X') {
 517          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 518          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 519          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 520          col1[3] += 0.0; /*XXX fix */
 521       }
 522       else {
 523          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 524          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 525          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 526          col1[3] += 0.0; /*XXX fix */
 527       }
 528    }
 529    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 530       GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
 531       if (xOrY == 'X') {
 532          fogc[0] += span->dfogdx;
 533       }
 534       else {
 535          fogc[0] += span->dfogdy;
 536       }
 537    }
 538    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 539       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 540          GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
 541          if (xOrY == 'X') {
 542             tex[0] += span->texStepX[u][0];
 543             tex[1] += span->texStepX[u][1];
 544             tex[2] += span->texStepX[u][2];
 545             tex[3] += span->texStepX[u][3];
 546          }
 547          else {
 548             tex[0] += span->texStepY[u][0];
 549             tex[1] += span->texStepY[u][1];
 550             tex[2] += span->texStepY[u][2];
 551             tex[3] += span->texStepY[u][3];
 552          }
 553       }
 554    }
 555
 556    /* init condition codes */
 557    dMachine->CondCodes[0] = COND_EQ;
 558    dMachine->CondCodes[1] = COND_EQ;
 559    dMachine->CondCodes[2] = COND_EQ;
 560    dMachine->CondCodes[3] = COND_EQ;
 561 }
 562
 563
 564 /**
 565  * Execute the given vertex program.
 566  * NOTE: we do everything in single-precision floating point; we don't
 567  * currently observe the single/half/fixed-precision qualifiers.
 568  * \param ctx - rendering context
 569  * \param program - the fragment program to execute
 570  * \param machine - machine state (register file)
 571  * \param maxInst - max number of instructions to execute
 572  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 573  */
 574 static GLboolean
 575 execute_program( GLcontext *ctx,
 576                  const struct fragment_program *program, GLuint maxInst,
 577                  struct fp_machine *machine, const struct sw_span *span,
 578                  GLuint column )
 579 {
 580    GLuint pc;
 581
 582 #if DEBUG_FRAG
 583    printf("execute fragment program --------------------\n");
 584 #endif
 585
 586    for (pc = 0; pc < maxInst; pc++) {
 587       const struct fp_instruction *inst = program->Instructions + pc;
 588
 589       if (ctx->FragmentProgram.CallbackEnabled &&
 590           ctx->FragmentProgram.Callback) {
 591          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 592          ctx->FragmentProgram.Callback(program->Base.Target,
 593                                        ctx->FragmentProgram.CallbackData);
 594       }
 595
 596       switch (inst->Opcode) {
 597          case FP_OPCODE_ADD:
 598             {
 599                GLfloat a[4], b[4], result[4];
 600                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 601                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 602                result[0] = a[0] + b[0];
 603                result[1] = a[1] + b[1];
 604                result[2] = a[2] + b[2];
 605                result[3] = a[3] + b[3];
 606                store_vector4( inst, machine, result );
 607             }
 608             break;
 609          case FP_OPCODE_COS:
 610             {
 611                GLfloat a[4], result[4];
 612                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 613                result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
 614                store_vector4( inst, machine, result );
 615             }
 616             break;
 617          case FP_OPCODE_DDX: /* Partial derivative with respect to X */
 618             {
 619                GLfloat a[4], aNext[4], result[4];
 620                struct fp_machine dMachine;
 621                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
 622                   /* This is tricky.  Make a copy of the current machine state,
 623                    * increment the input registers by the dx or dy partial
 624                    * derivatives, then re-execute the program up to the
 625                    * preceeding instruction, then fetch the source register.
 626                    * Finally, find the difference in the register values for
 627                    * the original and derivative runs.
 628                    */
 629                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 630                   init_machine_deriv(ctx, machine, program, span,
 631                                      'X', &dMachine);
 632                   execute_program(ctx, program, pc, &dMachine, span, column);
 633                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 634                   result[0] = aNext[0] - a[0];
 635                   result[1] = aNext[1] - a[1];
 636                   result[2] = aNext[2] - a[2];
 637                   result[3] = aNext[3] - a[3];
 638                }
 639                store_vector4( inst, machine, result );
 640             }
 641             break;
 642          case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
 643             {
 644                GLfloat a[4], aNext[4], result[4];
 645                struct fp_machine dMachine;
 646                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
 647                   init_machine_deriv(ctx, machine, program, span,
 648                                      'Y', &dMachine);
 649                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 650                   execute_program(ctx, program, pc, &dMachine, span, column);
 651                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 652                   result[0] = aNext[0] - a[0];
 653                   result[1] = aNext[1] - a[1];
 654                   result[2] = aNext[2] - a[2];
 655                   result[3] = aNext[3] - a[3];
 656                }
 657                store_vector4( inst, machine, result );
 658             }
 659             break;
 660          case FP_OPCODE_DP3:
 661             {
 662                GLfloat a[4], b[4], result[4];
 663                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 664                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 665                result[0] = result[1] = result[2] = result[3] =
 666                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
 667                store_vector4( inst, machine, result );
 668 #if DEBUG_FRAG
 669                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 670                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 671 #endif
 672             }
 673             break;
 674          case FP_OPCODE_DP4:
 675             {
 676                GLfloat a[4], b[4], result[4];
 677                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 678                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 679                result[0] = result[1] = result[2] = result[3] =
 680                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 681                store_vector4( inst, machine, result );
 682             }
 683             break;
 684          case FP_OPCODE_DST: /* Distance vector */
 685             {
 686                GLfloat a[4], b[4], result[4];
 687                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 688                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 689                result[0] = 1.0F;
 690                result[1] = a[1] * b[1];
 691                result[2] = a[2];
 692                result[3] = b[3];
 693                store_vector4( inst, machine, result );
 694             }
 695             break;
 696          case FP_OPCODE_EX2: /* Exponential base 2 */
 697             {
 698                GLfloat a[4], result[4];
 699                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 700                result[0] = result[1] = result[2] = result[3] =
 701                   (GLfloat) _mesa_pow(2.0, a[0]);
 702                store_vector4( inst, machine, result );
 703             }
 704             break;
 705          case FP_OPCODE_FLR:
 706             {
 707                GLfloat a[4], result[4];
 708                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 709                result[0] = FLOORF(a[0]);
 710                result[1] = FLOORF(a[1]);
 711                result[2] = FLOORF(a[2]);
 712                result[3] = FLOORF(a[3]);
 713                store_vector4( inst, machine, result );
 714             }
 715             break;
 716          case FP_OPCODE_FRC:
 717             {
 718                GLfloat a[4], result[4];
 719                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 720                result[0] = a[0] - FLOORF(a[0]);
 721                result[1] = a[1] - FLOORF(a[1]);
 722                result[2] = a[2] - FLOORF(a[2]);
 723                result[3] = a[3] - FLOORF(a[3]);
 724                store_vector4( inst, machine, result );
 725             }
 726             break;
 727          case FP_OPCODE_KIL:
 728             {
 729                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 730                const GLuint condMask = inst->DstReg.CondMask;
 731                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 732                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 733                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 734                    test_cc(machine->CondCodes[swizzle[3]], condMask)) {
 735                   return GL_FALSE;
 736                }
 737             }
 738             break;
 739          case FP_OPCODE_LG2:  /* log base 2 */
 740             {
 741                GLfloat a[4], result[4];
 742                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 743                result[0] = result[1] = result[2] = result[3]
 744                   = LOG2(a[0]);
 745                store_vector4( inst, machine, result );
 746             }
 747             break;
 748          case FP_OPCODE_LIT:
 749             {
 750                GLfloat a[4], result[4];
 751                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 752                if (a[0] < 0.0F)
 753                   a[0] = 0.0F;
 754                if (a[1] < 0.0F)
 755                   a[1] = 0.0F;
 756                result[0] = 1.0F;
 757                result[1] = a[0];
 758                result[2] = (a[0] > 0.0F) ? (GLfloat)_mesa_pow(2.0, a[3]) : 0.0F;
 759                result[3] = 1.0F;
 760                store_vector4( inst, machine, result );
 761             }
 762             break;
 763          case FP_OPCODE_LRP:
 764             {
 765                GLfloat a[4], b[4], c[4], result[4];
 766                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 767                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 768                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 769                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 770                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 771                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 772                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 773                store_vector4( inst, machine, result );
 774             }
 775             break;
 776          case FP_OPCODE_MAD:
 777             {
 778                GLfloat a[4], b[4], c[4], result[4];
 779                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 780                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 781                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 782                result[0] = a[0] * b[0] + c[0];
 783                result[1] = a[1] * b[1] + c[1];
 784                result[2] = a[2] * b[2] + c[2];
 785                result[3] = a[3] * b[3] + c[3];
 786                store_vector4( inst, machine, result );
 787             }
 788             break;
 789          case FP_OPCODE_MAX:
 790             {
 791                GLfloat a[4], b[4], result[4];
 792                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 793                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 794                result[0] = MAX2(a[0], b[0]);
 795                result[1] = MAX2(a[1], b[1]);
 796                result[2] = MAX2(a[2], b[2]);
 797                result[3] = MAX2(a[3], b[3]);
 798                store_vector4( inst, machine, result );
 799             }
 800             break;
 801          case FP_OPCODE_MIN:
 802             {
 803                GLfloat a[4], b[4], result[4];
 804                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 805                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 806                result[0] = MIN2(a[0], b[0]);
 807                result[1] = MIN2(a[1], b[1]);
 808                result[2] = MIN2(a[2], b[2]);
 809                result[3] = MIN2(a[3], b[3]);
 810                store_vector4( inst, machine, result );
 811             }
 812             break;
 813          case FP_OPCODE_MOV:
 814             {
 815                GLfloat result[4];
 816                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
 817                store_vector4( inst, machine, result );
 818             }
 819             break;
 820          case FP_OPCODE_MUL:
 821             {
 822                GLfloat a[4], b[4], result[4];
 823                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 824                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 825                result[0] = a[0] * b[0];
 826                result[1] = a[1] * b[1];
 827                result[2] = a[2] * b[2];
 828                result[3] = a[3] * b[3];
 829                store_vector4( inst, machine, result );
 830 #if DEBUG_FRAG
 831                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 832                       result[0], result[1], result[2], result[3],
 833                       a[0], a[1], a[2], a[3],
 834                       b[0], b[1], b[2], b[3]);
 835 #endif
 836             }
 837             break;
 838          case FP_OPCODE_PK2H: /* pack two 16-bit floats */
 839             /* XXX this is probably wrong */
 840             {
 841                GLfloat a[4], result[4];
 842                const GLuint *rawBits = (const GLuint *) a;
 843                GLuint *rawResult = (GLuint *) result;
 844                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 845                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 846                   = rawBits[0] | (rawBits[1] << 16);
 847                store_vector4( inst, machine, result );
 848             }
 849             break;
 850          case FP_OPCODE_PK2US: /* pack two GLushorts */
 851             {
 852                GLfloat a[4], result[4];
 853                GLuint usx, usy, *rawResult = (GLuint *) result;
 854                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 855                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 856                a[1] = CLAMP(a[0], 0.0F, 1.0F);
 857                usx = IROUND(a[0] * 65535.0F);
 858                usy = IROUND(a[1] * 65535.0F);
 859                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 860                   = usx | (usy << 16);
 861                store_vector4( inst, machine, result );
 862             }
 863             break;
 864          case FP_OPCODE_PK4B: /* pack four GLbytes */
 865             {
 866                GLfloat a[4], result[4];
 867                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 868                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 869                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
 870                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
 871                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
 872                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
 873                ubx = IROUND(127.0F * a[0] + 128.0F);
 874                uby = IROUND(127.0F * a[1] + 128.0F);
 875                ubz = IROUND(127.0F * a[2] + 128.0F);
 876                ubw = IROUND(127.0F * a[3] + 128.0F);
 877                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 878                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 879                store_vector4( inst, machine, result );
 880             }
 881             break;
 882          case FP_OPCODE_PK4UB: /* pack four GLubytes */
 883             {
 884                GLfloat a[4], result[4];
 885                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 886                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 887                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 888                a[1] = CLAMP(a[1], 0.0F, 1.0F);
 889                a[2] = CLAMP(a[2], 0.0F, 1.0F);
 890                a[3] = CLAMP(a[3], 0.0F, 1.0F);
 891                ubx = IROUND(255.0F * a[0]);
 892                uby = IROUND(255.0F * a[1]);
 893                ubz = IROUND(255.0F * a[2]);
 894                ubw = IROUND(255.0F * a[3]);
 895                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 896                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 897                store_vector4( inst, machine, result );
 898             }
 899             break;
 900          case FP_OPCODE_POW:
 901             {
 902                GLfloat a[4], b[4], result[4];
 903                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 904                fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
 905                result[0] = result[1] = result[2] = result[3]
 906                   = (GLfloat)_mesa_pow(a[0], b[0]);
 907                store_vector4( inst, machine, result );
 908             }
 909             break;
 910          case FP_OPCODE_RCP:
 911             {
 912                GLfloat a[4], result[4];
 913                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 914 #if DEBUG_FRAG
 915                if (a[0] == 0)
 916                   printf("RCP(0)\n");
 917                else if (IS_INF_OR_NAN(a[0]))
 918                   printf("RCP(inf)\n");
 919 #endif
 920                result[0] = result[1] = result[2] = result[3]
 921                   = 1.0F / a[0];
 922                store_vector4( inst, machine, result );
 923             }
 924             break;
 925          case FP_OPCODE_RFL:
 926             {
 927                GLfloat axis[4], dir[4], result[4], tmp[4];
 928                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
 929                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
 930                tmp[3] = axis[0] * axis[0]
 931                       + axis[1] * axis[1]
 932                       + axis[2] * axis[2];
 933                tmp[0] = (2.0F * (axis[0] * dir[0] +
 934                                  axis[1] * dir[1] +
 935                                  axis[2] * dir[2])) / tmp[3];
 936                result[0] = tmp[0] * axis[0] - dir[0];
 937                result[1] = tmp[0] * axis[1] - dir[1];
 938                result[2] = tmp[0] * axis[2] - dir[2];
 939                /* result[3] is never written! XXX enforce in parser! */
 940                store_vector4( inst, machine, result );
 941             }
 942             break;
 943          case FP_OPCODE_RSQ: /* 1 / sqrt() */
 944             {
 945                GLfloat a[4], result[4];
 946                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 947                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
 948                store_vector4( inst, machine, result );
 949 #if DEBUG_FRAG
 950                printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
 951 #endif
 952             }
 953             break;
 954          case FP_OPCODE_SEQ: /* set on equal */
 955             {
 956                GLfloat a[4], b[4], result[4];
 957                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 958                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 959                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
 960                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
 961                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
 962                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
 963                store_vector4( inst, machine, result );
 964             }
 965             break;
 966          case FP_OPCODE_SFL: /* set false, operands ignored */
 967             {
 968                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
 969                store_vector4( inst, machine, result );
 970             }
 971             break;
 972          case FP_OPCODE_SGE: /* set on greater or equal */
 973             {
 974                GLfloat a[4], b[4], result[4];
 975                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 976                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 977                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
 978                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
 979                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
 980                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
 981                store_vector4( inst, machine, result );
 982             }
 983             break;
 984          case FP_OPCODE_SGT: /* set on greater */
 985             {
 986                GLfloat a[4], b[4], result[4];
 987                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 988                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 989                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
 990                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
 991                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
 992                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
 993                store_vector4( inst, machine, result );
 994             }
 995             break;
 996          case FP_OPCODE_SIN:
 997             {
 998                GLfloat a[4], result[4];
 999                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1000                result[0] = result[1] = result[2] =
1001                        result[3] = (GLfloat)_mesa_sin(a[0]);
1002                store_vector4( inst, machine, result );
1003             }
1004             break;
1005          case FP_OPCODE_SLE: /* set on less or equal */
1006             {
1007                GLfloat a[4], b[4], result[4];
1008                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1009                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1010                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1011                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1012                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1013                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1014                store_vector4( inst, machine, result );
1015             }
1016             break;
1017          case FP_OPCODE_SLT: /* set on less */
1018             {
1019                GLfloat a[4], b[4], result[4];
1020                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1021                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1022                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1023                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1024                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1025                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1026                store_vector4( inst, machine, result );
1027             }
1028             break;
1029          case FP_OPCODE_SNE: /* set on not equal */
1030             {
1031                GLfloat a[4], b[4], result[4];
1032                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1033                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1034                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1035                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1036                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1037                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1038                store_vector4( inst, machine, result );
1039             }
1040             break;
1041          case FP_OPCODE_STR: /* set true, operands ignored */
1042             {
1043                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1044                store_vector4( inst, machine, result );
1045             }
1046             break;
1047          case FP_OPCODE_SUB:
1048             {
1049                GLfloat a[4], b[4], result[4];
1050                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1051                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1052                result[0] = a[0] - b[0];
1053                result[1] = a[1] - b[1];
1054                result[2] = a[2] - b[2];
1055                result[3] = a[3] - b[3];
1056                store_vector4( inst, machine, result );
1057             }
1058             break;
1059          case FP_OPCODE_TEX:
1060             /* Texel lookup */
1061             {
1062                GLfloat texcoord[4], color[4];
1063                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1064                /* XXX: Undo perspective divide from interpolate_texcoords() */
1065                fetch_texel( ctx, texcoord,
1066                             span->array->lambda[inst->TexSrcUnit][column],
1067                             inst->TexSrcUnit, color );
1068                store_vector4( inst, machine, color );
1069             }
1070             break;
1071          case FP_OPCODE_TXD:
1072             /* Texture lookup w/ partial derivatives for LOD */
1073             {
1074                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1075                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1076                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1077                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1078                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1079                                   color );
1080                store_vector4( inst, machine, color );
1081             }
1082             break;
1083          case FP_OPCODE_TXP:
1084             /* Texture lookup w/ perspective divide */
1085             {
1086                GLfloat texcoord[4], color[4];
1087                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1088                /* Already did perspective divide in interpolate_texcoords() */
1089                fetch_texel( ctx, texcoord,
1090                             span->array->lambda[inst->TexSrcUnit][column],
1091                             inst->TexSrcUnit, color );
1092                store_vector4( inst, machine, color );
1093             }
1094             break;
1095          case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1096             /* XXX this is probably wrong */
1097             {
1098                GLfloat a[4], result[4];
1099                const GLuint *rawBits = (const GLuint *) a;
1100                GLuint *rawResult = (GLuint *) result;
1101                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1102                rawResult[0] = rawBits[0] & 0xffff;
1103                rawResult[1] = (rawBits[0] >> 16) & 0xffff;
1104                rawResult[2] = rawBits[0] & 0xffff;
1105                rawResult[3] = (rawBits[0] >> 16) & 0xffff;
1106                store_vector4( inst, machine, result );
1107             }
1108             break;
1109          case FP_OPCODE_UP2US: /* unpack two GLushorts */
1110             {
1111                GLfloat a[4], result[4];
1112                const GLuint *rawBits = (const GLuint *) a;
1113                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1114                result[0] = (GLfloat) ((rawBits[0] >>  0) & 0xffff) / 65535.0F;
1115                result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
1116                result[2] = result[0];
1117                result[3] = result[1];
1118                store_vector4( inst, machine, result );
1119             }
1120             break;
1121          case FP_OPCODE_UP4B: /* unpack four GLbytes */
1122             {
1123                GLfloat a[4], result[4];
1124                const GLuint *rawBits = (const GLuint *) a;
1125                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1126                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1127                result[0] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1128                result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1129                result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1130                store_vector4( inst, machine, result );
1131             }
1132             break;
1133          case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1134             {
1135                GLfloat a[4], result[4];
1136                const GLuint *rawBits = (const GLuint *) a;
1137                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1138                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1139                result[0] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1140                result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1141                result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1142                store_vector4( inst, machine, result );
1143             }
1144             break;
1145          case FP_OPCODE_X2D: /* 2-D matrix transform */
1146             {
1147                GLfloat a[4], b[4], c[4], result[4];
1148                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1149                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1150                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1151                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1152                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1153                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1154                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1155                store_vector4( inst, machine, result );
1156             }
1157             break;
1158          case FP_OPCODE_END:
1159             return GL_TRUE;
1160          default:
1161             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1162                           inst->Opcode);
1163             return GL_TRUE; /* return value doesn't matter */
1164       }
1165    }
1166    return GL_TRUE;
1167 }
1168
1169
1170 static void
1171 init_machine( GLcontext *ctx, struct fp_machine *machine,
1172               const struct fragment_program *program,
1173               const struct sw_span *span, GLuint col )
1174 {
1175    GLuint inputsRead = program->InputsRead;
1176    GLuint u;
1177
1178    if (ctx->FragmentProgram.CallbackEnabled)
1179       inputsRead = ~0;
1180
1181    /* Clear temporary registers */
1182    _mesa_bzero(machine->Temporaries,
1183                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1184
1185    /* Load input registers */
1186    if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1187       GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1188       wpos[0] = (GLfloat) span->x + col;
1189       wpos[1] = (GLfloat) span->y;
1190       wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1191       wpos[3] = span->w + col * span->dwdx;
1192    }
1193    if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1194       GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1195       col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1196       col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1197       col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1198       col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1199    }
1200    if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1201       GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1202       col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1203       col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1204       col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1205       col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1206    }
1207    if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1208       GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1209       fogc[0] = span->array->fog[col];
1210       fogc[1] = 0.0F;
1211       fogc[2] = 0.0F;
1212       fogc[3] = 0.0F;
1213    }
1214    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1215       if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1216          GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1217          /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1218          COPY_4V(tex, span->array->texcoords[u][col]);
1219          /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1220       }
1221    }
1222
1223    /* init condition codes */
1224    machine->CondCodes[0] = COND_EQ;
1225    machine->CondCodes[1] = COND_EQ;
1226    machine->CondCodes[2] = COND_EQ;
1227    machine->CondCodes[3] = COND_EQ;
1228 }
1229
1230
1231 void
1232 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1233 {
1234    const struct fragment_program *program = ctx->FragmentProgram.Current;
1235    GLuint i;
1236
1237    ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1238
1239    for (i = 0; i < span->end; i++) {
1240       if (span->array->mask[i]) {
1241          init_machine(ctx, &ctx->FragmentProgram.Machine,
1242                       ctx->FragmentProgram.Current, span, i);
1243
1244          if (!execute_program(ctx, program, ~0,
1245                               &ctx->FragmentProgram.Machine, span, i)) {
1246             span->array->mask[i] = GL_FALSE;  /* killed fragment */
1247          }
1248
1249          /* Store output registers */
1250          {
1251             const GLfloat *colOut
1252                = ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_COLR];
1253             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1254             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1255             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1256             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1257          }
1258          /* depth value */
1259          if (program->OutputsWritten & (1 << FRAG_OUTPUT_DEPR))
1260             span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_DEPR][0] * ctx->DepthMaxF);
1261       }
1262    }
1263
1264    ctx->_CurrentProgram = 0;
1265 }
1266