src/mesa/swrast/s_nvfragprog.c

   1 /* $Id: s_nvfragprog.c,v 1.15 2003/04/11 01:20:15 brianp Exp $ */
   2
   3 /*
   4  * Mesa 3-D graphics library
   5  * Version:  5.1
   6  *
   7  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included
  17  * in all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  23  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27
  28 #include "glheader.h"
  29 #include "colormac.h"
  30 #include "context.h"
  31 #include "nvfragprog.h"
  32 #include "macros.h"
  33
  34 #include "s_nvfragprog.h"
  35 #include "s_span.h"
  36 #include "s_texture.h"
  37
  38
  39 /* if 1, print some debugging info */
  40 #define DEBUG_FRAG 0
  41
  42
  43 /**
  44  * Fetch a texel.
  45  */
  46 static void
  47 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
  48              GLuint unit, GLfloat color[4] )
  49 {
  50    GLchan rgba[4];
  51    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  52
  53    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  54                                1, (const GLfloat (*)[4]) texcoord,
  55                                &lambda, &rgba);
  56    color[0] = CHAN_TO_FLOAT(rgba[0]);
  57    color[1] = CHAN_TO_FLOAT(rgba[1]);
  58    color[2] = CHAN_TO_FLOAT(rgba[2]);
  59    color[3] = CHAN_TO_FLOAT(rgba[3]);
  60 }
  61
  62
  63 /**
  64  * Fetch a texel with the given partial derivatives to compute a level
  65  * of detail in the mipmap.
  66  */
  67 static void
  68 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
  69                    const GLfloat texdx[4], const GLfloat texdy[4],
  70                    GLuint unit, GLfloat color[4] )
  71 {
  72    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  73    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
  74    const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
  75    const GLfloat texW = (GLfloat) texImg->WidthScale;
  76    const GLfloat texH = (GLfloat) texImg->HeightScale;
  77    GLchan rgba[4];
  78
  79    GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
  80                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
  81                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
  82                                          texW, texH,
  83                                          texcoord[0], texcoord[1], texcoord[3],
  84                                          1.0F / texcoord[3]);
  85
  86    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  87                                1, (const GLfloat (*)[4]) texcoord,
  88                                &lambda, &rgba);
  89    color[0] = CHAN_TO_FLOAT(rgba[0]);
  90    color[1] = CHAN_TO_FLOAT(rgba[1]);
  91    color[2] = CHAN_TO_FLOAT(rgba[2]);
  92    color[3] = CHAN_TO_FLOAT(rgba[3]);
  93 }
  94
  95
  96
  97 /**
  98  * Fetch a 4-element float vector from the given source register.
  99  * Apply swizzling and negating as needed.
 100  */
 101 static void
 102 fetch_vector4( const struct fp_src_register *source,
 103                const struct fp_machine *machine,
 104                const struct fragment_program *program,
 105                GLfloat result[4] )
 106 {
 107    const GLfloat *src;
 108
 109    if (source->IsParameter) {
 110       src = program->Parameters[source->Register].Values;
 111    }
 112    else {
 113       src = machine->Registers[source->Register];
 114    }
 115
 116    result[0] = src[source->Swizzle[0]];
 117    result[1] = src[source->Swizzle[1]];
 118    result[2] = src[source->Swizzle[2]];
 119    result[3] = src[source->Swizzle[3]];
 120
 121    if (source->NegateBase) {
 122       result[0] = -result[0];
 123       result[1] = -result[1];
 124       result[2] = -result[2];
 125       result[3] = -result[3];
 126    }
 127    if (source->Abs) {
 128       result[0] = FABSF(result[0]);
 129       result[1] = FABSF(result[1]);
 130       result[2] = FABSF(result[2]);
 131       result[3] = FABSF(result[3]);
 132    }
 133    if (source->NegateAbs) {
 134       result[0] = -result[0];
 135       result[1] = -result[1];
 136       result[2] = -result[2];
 137       result[3] = -result[3];
 138    }
 139 }
 140
 141
 142 /**
 143  * Fetch the derivative with respect to X for the given register.
 144  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 145  * need to execute another instance of the program (ugh)!
 146  */
 147 static GLboolean
 148 fetch_vector4_deriv( const struct fp_src_register *source,
 149                      const struct sw_span *span,
 150                      char xOrY, GLfloat result[4] )
 151 {
 152    GLfloat src[4];
 153
 154    ASSERT(xOrY == 'X' || xOrY == 'Y');
 155
 156    switch (source->Register) {
 157    case FRAG_ATTRIB_WPOS:
 158       if (xOrY == 'X') {
 159          src[0] = 1.0;
 160          src[1] = 0.0;
 161          src[2] = span->dzdx;
 162          src[3] = span->dwdx;
 163       }
 164       else {
 165          src[0] = 0.0;
 166          src[1] = 1.0;
 167          src[2] = span->dzdy;
 168          src[3] = span->dwdy;
 169       }
 170       break;
 171    case FRAG_ATTRIB_COL0:
 172       if (xOrY == 'X') {
 173          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 174          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 175          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 176          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 177       }
 178       else {
 179          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 180          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 181          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 182          src[3] = span->dady * (1.0F / CHAN_MAXF);
 183       }
 184       break;
 185    case FRAG_ATTRIB_COL1:
 186       if (xOrY == 'X') {
 187          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 188          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 189          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 190          src[3] = 0.0; /* XXX need this */
 191       }
 192       else {
 193          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 194          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 195          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 196          src[3] = 0.0; /* XXX need this */
 197       }
 198       break;
 199    case FRAG_ATTRIB_FOGC:
 200       if (xOrY == 'X') {
 201          src[0] = span->dfogdx;
 202          src[1] = 0.0;
 203          src[2] = 0.0;
 204          src[3] = 0.0;
 205       }
 206       else {
 207          src[0] = span->dfogdy;
 208          src[1] = 0.0;
 209          src[2] = 0.0;
 210          src[3] = 0.0;
 211       }
 212       break;
 213    case FRAG_ATTRIB_TEX0:
 214    case FRAG_ATTRIB_TEX1:
 215    case FRAG_ATTRIB_TEX2:
 216    case FRAG_ATTRIB_TEX3:
 217    case FRAG_ATTRIB_TEX4:
 218    case FRAG_ATTRIB_TEX5:
 219    case FRAG_ATTRIB_TEX6:
 220    case FRAG_ATTRIB_TEX7:
 221       if (xOrY == 'X') {
 222          const GLuint u = source->Register - FRAG_ATTRIB_TEX0;
 223          src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
 224          src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
 225          src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
 226          src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
 227       }
 228       else {
 229          const GLuint u = source->Register - FRAG_ATTRIB_TEX0;
 230          src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
 231          src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
 232          src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
 233          src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
 234       }
 235       break;
 236    default:
 237       return GL_FALSE;
 238    }
 239
 240    result[0] = src[source->Swizzle[0]];
 241    result[1] = src[source->Swizzle[1]];
 242    result[2] = src[source->Swizzle[2]];
 243    result[3] = src[source->Swizzle[3]];
 244
 245    if (source->NegateBase) {
 246       result[0] = -result[0];
 247       result[1] = -result[1];
 248       result[2] = -result[2];
 249       result[3] = -result[3];
 250    }
 251    if (source->Abs) {
 252       result[0] = FABSF(result[0]);
 253       result[1] = FABSF(result[1]);
 254       result[2] = FABSF(result[2]);
 255       result[3] = FABSF(result[3]);
 256    }
 257    if (source->NegateAbs) {
 258       result[0] = -result[0];
 259       result[1] = -result[1];
 260       result[2] = -result[2];
 261       result[3] = -result[3];
 262    }
 263    return GL_TRUE;
 264 }
 265
 266
 267 /**
 268  * As above, but only return result[0] element.
 269  */
 270 static void
 271 fetch_vector1( const struct fp_src_register *source,
 272                const struct fp_machine *machine,
 273                const struct fragment_program *program,
 274                GLfloat result[4] )
 275 {
 276    const GLfloat *src;
 277
 278    if (source->IsParameter) {
 279       src = program->Parameters[source->Register].Values;
 280    }
 281    else {
 282       src = machine->Registers[source->Register];
 283    }
 284
 285    result[0] = src[source->Swizzle[0]];
 286
 287    if (source->NegateBase) {
 288       result[0] = -result[0];
 289    }
 290    if (source->Abs) {
 291       result[0] = FABSF(result[0]);
 292    }
 293    if (source->NegateAbs) {
 294       result[0] = -result[0];
 295    }
 296 }
 297
 298
 299 /*
 300  * Test value against zero and return GT, LT, EQ or UN if NaN.
 301  */
 302 static INLINE GLuint
 303 generate_cc( float value )
 304 {
 305    if (value != value)
 306       return COND_UN;  /* NaN */
 307    if (value > 0.0F)
 308       return COND_GT;
 309    if (value < 0.0F)
 310       return COND_LT;
 311    return COND_EQ;
 312 }
 313
 314 /*
 315  * Test if the ccMaskRule is satisfied by the given condition code.
 316  * Used to mask destination writes according to the current condition codee.
 317  */
 318 static INLINE GLboolean
 319 test_cc(GLuint condCode, GLuint ccMaskRule)
 320 {
 321    switch (ccMaskRule) {
 322    case COND_EQ: return (condCode == COND_EQ);
 323    case COND_NE: return (condCode != COND_EQ);
 324    case COND_LT: return (condCode == COND_LT);
 325    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 326    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 327    case COND_GT: return (condCode == COND_GT);
 328    case COND_TR: return GL_TRUE;
 329    case COND_FL: return GL_FALSE;
 330    default:      return GL_TRUE;
 331    }
 332 }
 333
 334
 335 /**
 336  * Store 4 floats into a register.  Observe the instructions saturate and
 337  * set-condition-code flags.
 338  */
 339 static void
 340 store_vector4( const struct fp_instruction *inst,
 341                struct fp_machine *machine,
 342                const GLfloat value[4] )
 343 {
 344    const struct fp_dst_register *dest = &(inst->DstReg);
 345    const GLboolean clamp = inst->Saturate;
 346    const GLboolean updateCC = inst->UpdateCondRegister;
 347    GLfloat *dstReg = machine->Registers[dest->Register];
 348    GLfloat clampedValue[4];
 349    const GLboolean *writeMask = dest->WriteMask;
 350    GLboolean condWriteMask[4];
 351
 352 #if DEBUG_FRAG
 353    if (value[0] > 1.0e10 ||
 354        IS_INF_OR_NAN(value[0]) ||
 355        IS_INF_OR_NAN(value[1]) ||
 356        IS_INF_OR_NAN(value[2]) ||
 357        IS_INF_OR_NAN(value[3])  )
 358       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 359 #endif
 360
 361    if (clamp) {
 362       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 363       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 364       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 365       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 366       value = clampedValue;
 367    }
 368
 369    if (dest->CondMask != COND_TR) {
 370       condWriteMask[0] = writeMask[0]
 371          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 372       condWriteMask[1] = writeMask[1]
 373          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 374       condWriteMask[2] = writeMask[2]
 375          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 376       condWriteMask[3] = writeMask[3]
 377          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 378       writeMask = condWriteMask;
 379    }
 380
 381    if (writeMask[0]) {
 382       dstReg[0] = value[0];
 383       if (updateCC)
 384          machine->CondCodes[0] = generate_cc(value[0]);
 385    }
 386    if (writeMask[1]) {
 387       dstReg[1] = value[1];
 388       if (updateCC)
 389          machine->CondCodes[1] = generate_cc(value[1]);
 390    }
 391    if (writeMask[2]) {
 392       dstReg[2] = value[2];
 393       if (updateCC)
 394          machine->CondCodes[2] = generate_cc(value[2]);
 395    }
 396    if (writeMask[3]) {
 397       dstReg[3] = value[3];
 398       if (updateCC)
 399          machine->CondCodes[3] = generate_cc(value[3]);
 400    }
 401 }
 402
 403
 404 /**
 405  * Initialize a new machine state instance from an existing one, adding
 406  * the partial derivatives onto the input registers.
 407  * Used to implement DDX and DDY instructions in non-trivial cases.
 408  */
 409 static void
 410 init_machine_deriv( GLcontext *ctx,
 411                     const struct fp_machine *machine,
 412                     const struct fragment_program *program,
 413                     const struct sw_span *span, char xOrY,
 414                     struct fp_machine *dMachine )
 415 {
 416    GLuint u;
 417
 418    ASSERT(xOrY == 'X' || xOrY == 'Y');
 419
 420    /* copy existing machine */
 421    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 422
 423    /* Clear temporary registers */
 424    _mesa_bzero((GLfloat*) (machine->Registers + FP_TEMP_REG_START) ,
 425                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 426
 427    /* Add derivatives */
 428    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 429       GLfloat *wpos = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_WPOS];
 430       if (xOrY == 'X') {
 431          wpos[0] += 1.0F;
 432          wpos[1] += 0.0F;
 433          wpos[2] += span->dzdx;
 434          wpos[3] += span->dwdx;
 435       }
 436       else {
 437          wpos[0] += 0.0F;
 438          wpos[1] += 1.0F;
 439          wpos[2] += span->dzdy;
 440          wpos[3] += span->dwdy;
 441       }
 442    }
 443    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 444       GLfloat *col0 = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL0];
 445       if (xOrY == 'X') {
 446          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 447          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 448          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 449          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 450       }
 451       else {
 452          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 453          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 454          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 455          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 456       }
 457    }
 458    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 459       GLfloat *col1 = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL1];
 460       if (xOrY == 'X') {
 461          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 462          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 463          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 464          col1[3] += 0.0; /*XXX fix */
 465       }
 466       else {
 467          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 468          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 469          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 470          col1[3] += 0.0; /*XXX fix */
 471       }
 472    }
 473    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 474       GLfloat *fogc = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_FOGC];
 475       if (xOrY == 'X') {
 476          fogc[0] += span->dfogdx;
 477       }
 478       else {
 479          fogc[0] += span->dfogdy;
 480       }
 481    }
 482    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 483       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 484          GLfloat *tex = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_TEX0+u];
 485          if (xOrY == 'X') {
 486             tex[0] += span->texStepX[u][0];
 487             tex[1] += span->texStepX[u][1];
 488             tex[2] += span->texStepX[u][2];
 489             tex[3] += span->texStepX[u][3];
 490          }
 491          else {
 492             tex[0] += span->texStepY[u][0];
 493             tex[1] += span->texStepY[u][1];
 494             tex[2] += span->texStepY[u][2];
 495             tex[3] += span->texStepY[u][3];
 496          }
 497       }
 498    }
 499 }
 500
 501
 502 /**
 503  * Execute the given vertex program.
 504  * NOTE: we do everything in single-precision floating point; we don't
 505  * currently observe the single/half/fixed-precision qualifiers.
 506  * \param ctx - rendering context
 507  * \param program - the fragment program to execute
 508  * \param machine - machine state (register file)
 509  * \param maxInst - max number of instructions to execute
 510  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 511  */
 512 static GLboolean
 513 execute_program( GLcontext *ctx,
 514                  const struct fragment_program *program, GLuint maxInst,
 515                  struct fp_machine *machine, const struct sw_span *span,
 516                  GLuint column )
 517 {
 518    GLuint pc;
 519
 520 #if DEBUG_FRAG
 521    printf("execute fragment program --------------------\n");
 522 #endif
 523
 524    for (pc = 0; pc < maxInst; pc++) {
 525       const struct fp_instruction *inst = program->Instructions + pc;
 526       switch (inst->Opcode) {
 527          case FP_OPCODE_ADD:
 528             {
 529                GLfloat a[4], b[4], result[4];
 530                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 531                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 532                result[0] = a[0] + b[0];
 533                result[1] = a[1] + b[1];
 534                result[2] = a[2] + b[2];
 535                result[3] = a[3] + b[3];
 536                store_vector4( inst, machine, result );
 537             }
 538             break;
 539          case FP_OPCODE_COS:
 540             {
 541                GLfloat a[4], result[4];
 542                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 543                result[0] = result[1] = result[2] = result[3] = _mesa_cos(a[0]);
 544                store_vector4( inst, machine, result );
 545             }
 546             break;
 547          case FP_OPCODE_DDX: /* Partial derivative with respect to X */
 548             {
 549                GLfloat a[4], aNext[4], result[4];
 550                struct fp_machine dMachine;
 551                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
 552                   /* This is tricky.  Make a copy of the current machine state,
 553                    * increment the input registers by the dx or dy partial
 554                    * derivatives, then re-execute the program up to the
 555                    * preceeding instruction, then fetch the source register.
 556                    * Finally, find the difference in the register values for
 557                    * the original and derivative runs.
 558                    */
 559                   init_machine_deriv(ctx, machine, program, span,
 560                                      'X', &dMachine);
 561                   execute_program(ctx, program, pc, &dMachine, span, column);
 562                   fetch_vector4( &inst->SrcReg[0], &dMachine, program, aNext );
 563                   result[0] = aNext[0] - a[0];
 564                   result[1] = aNext[1] - a[1];
 565                   result[2] = aNext[2] - a[2];
 566                   result[3] = aNext[3] - a[3];
 567                }
 568                store_vector4( inst, machine, result );
 569             }
 570             break;
 571          case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
 572             {
 573                GLfloat a[4], aNext[4], result[4];
 574                struct fp_machine dMachine;
 575                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
 576                   init_machine_deriv(ctx, machine, program, span,
 577                                      'Y', &dMachine);
 578                   execute_program(ctx, program, pc, &dMachine, span, column);
 579                   fetch_vector4( &inst->SrcReg[0], &dMachine, program, aNext );
 580                   result[0] = aNext[0] - a[0];
 581                   result[1] = aNext[1] - a[1];
 582                   result[2] = aNext[2] - a[2];
 583                   result[3] = aNext[3] - a[3];
 584                }
 585                store_vector4( inst, machine, result );
 586             }
 587             break;
 588          case FP_OPCODE_DP3:
 589             {
 590                GLfloat a[4], b[4], result[4];
 591                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 592                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 593                result[0] = result[1] = result[2] = result[3] =
 594                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
 595                store_vector4( inst, machine, result );
 596 #if DEBUG_FRAG
 597                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 598                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 599 #endif
 600             }
 601             break;
 602          case FP_OPCODE_DP4:
 603             {
 604                GLfloat a[4], b[4], result[4];
 605                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 606                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 607                result[0] = result[1] = result[2] = result[3] =
 608                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 609                store_vector4( inst, machine, result );
 610             }
 611             break;
 612          case FP_OPCODE_DST: /* Distance vector */
 613             {
 614                GLfloat a[4], b[4], result[4];
 615                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 616                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 617                result[0] = 1.0F;
 618                result[1] = a[1] * b[1];
 619                result[2] = a[2];
 620                result[3] = b[3];
 621                store_vector4( inst, machine, result );
 622             }
 623             break;
 624          case FP_OPCODE_EX2: /* Exponential base 2 */
 625             {
 626                GLfloat a[4], result[4];
 627                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 628                result[0] = result[1] = result[2] = result[3] =
 629                   (GLfloat) _mesa_pow(2.0, a[0]);
 630                store_vector4( inst, machine, result );
 631             }
 632             break;
 633          case FP_OPCODE_FLR:
 634             {
 635                GLfloat a[4], result[4];
 636                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 637                result[0] = FLOORF(a[0]);
 638                result[1] = FLOORF(a[1]);
 639                result[2] = FLOORF(a[2]);
 640                result[3] = FLOORF(a[3]);
 641                store_vector4( inst, machine, result );
 642             }
 643             break;
 644          case FP_OPCODE_FRC:
 645             {
 646                GLfloat a[4], result[4];
 647                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 648                result[0] = a[0] - FLOORF(a[0]);
 649                result[1] = a[1] - FLOORF(a[1]);
 650                result[2] = a[2] - FLOORF(a[2]);
 651                result[3] = a[3] - FLOORF(a[3]);
 652                store_vector4( inst, machine, result );
 653             }
 654             break;
 655          case FP_OPCODE_KIL:
 656             {
 657                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 658                const GLuint condMask = inst->DstReg.CondMask;
 659                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 660                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 661                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 662                    test_cc(machine->CondCodes[swizzle[3]], condMask))
 663                   return GL_FALSE;
 664             }
 665             break;
 666          case FP_OPCODE_LG2:  /* log base 2 */
 667             {
 668                GLfloat a[4], result[4];
 669                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 670                result[0] = result[1] = result[2] = result[3]
 671                   = LOG2(a[0]);
 672                store_vector4( inst, machine, result );
 673             }
 674             break;
 675          case FP_OPCODE_LIT:
 676             {
 677                GLfloat a[4], result[4];
 678                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 679                if (a[0] < 0.0F)
 680                   a[0] = 0.0F;
 681                if (a[1] < 0.0F)
 682                   a[1] = 0.0F;
 683                result[0] = 1.0F;
 684                result[1] = a[0];
 685                result[2] = (a[0] > 0.0) ? _mesa_pow(2.0, a[3]) : 0.0F;
 686                result[3] = 1.0F;
 687                store_vector4( inst, machine, result );
 688             }
 689             break;
 690          case FP_OPCODE_LRP:
 691             {
 692                GLfloat a[4], b[4], c[4], result[4];
 693                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 694                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 695                fetch_vector4( &inst->SrcReg[2], machine, program, c );
 696                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 697                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 698                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 699                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 700                store_vector4( inst, machine, result );
 701             }
 702             break;
 703          case FP_OPCODE_MAD:
 704             {
 705                GLfloat a[4], b[4], c[4], result[4];
 706                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 707                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 708                fetch_vector4( &inst->SrcReg[2], machine, program, c );
 709                result[0] = a[0] * b[0] + c[0];
 710                result[1] = a[1] * b[1] + c[1];
 711                result[2] = a[2] * b[2] + c[2];
 712                result[3] = a[3] * b[3] + c[3];
 713                store_vector4( inst, machine, result );
 714             }
 715             break;
 716          case FP_OPCODE_MAX:
 717             {
 718                GLfloat a[4], b[4], result[4];
 719                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 720                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 721                result[0] = MAX2(a[0], b[0]);
 722                result[1] = MAX2(a[1], b[1]);
 723                result[2] = MAX2(a[2], b[2]);
 724                result[3] = MAX2(a[3], b[3]);
 725                store_vector4( inst, machine, result );
 726             }
 727             break;
 728          case FP_OPCODE_MIN:
 729             {
 730                GLfloat a[4], b[4], result[4];
 731                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 732                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 733                result[0] = MIN2(a[0], b[0]);
 734                result[1] = MIN2(a[1], b[1]);
 735                result[2] = MIN2(a[2], b[2]);
 736                result[3] = MIN2(a[3], b[3]);
 737                store_vector4( inst, machine, result );
 738             }
 739             break;
 740          case FP_OPCODE_MOV:
 741             {
 742                GLfloat result[4];
 743                fetch_vector4( &inst->SrcReg[0], machine, program, result );
 744                store_vector4( inst, machine, result );
 745             }
 746             break;
 747          case FP_OPCODE_MUL:
 748             {
 749                GLfloat a[4], b[4], result[4];
 750                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 751                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 752                result[0] = a[0] * b[0];
 753                result[1] = a[1] * b[1];
 754                result[2] = a[2] * b[2];
 755                result[3] = a[3] * b[3];
 756                store_vector4( inst, machine, result );
 757 #if DEBUG_FRAG
 758                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 759                       result[0], result[1], result[2], result[3],
 760                       a[0], a[1], a[2], a[3],
 761                       b[0], b[1], b[2], b[3]);
 762 #endif
 763             }
 764             break;
 765          case FP_OPCODE_PK2H: /* pack two 16-bit floats */
 766             /* XXX this is probably wrong */
 767             {
 768                GLfloat a[4], result[4];
 769                const GLuint *rawBits = (const GLuint *) a;
 770                GLuint *rawResult = (GLuint *) result;
 771                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 772                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 773                   = rawBits[0] | (rawBits[1] << 16);
 774                store_vector4( inst, machine, result );
 775             }
 776             break;
 777          case FP_OPCODE_PK2US: /* pack two GLushorts */
 778             {
 779                GLfloat a[4], result[4];
 780                GLuint usx, usy, *rawResult = (GLuint *) result;
 781                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 782                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 783                a[1] = CLAMP(a[0], 0.0F, 1.0F);
 784                usx = IROUND(a[0] * 65535.0F);
 785                usy = IROUND(a[1] * 65535.0F);
 786                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 787                   = usx | (usy << 16);
 788                store_vector4( inst, machine, result );
 789             }
 790             break;
 791          case FP_OPCODE_PK4B: /* pack four GLbytes */
 792             {
 793                GLfloat a[4], result[4];
 794                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 795                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 796                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
 797                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
 798                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
 799                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
 800                ubx = IROUND(127.0F * a[0] + 128.0F);
 801                uby = IROUND(127.0F * a[1] + 128.0F);
 802                ubz = IROUND(127.0F * a[2] + 128.0F);
 803                ubw = IROUND(127.0F * a[3] + 128.0F);
 804                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 805                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 806                store_vector4( inst, machine, result );
 807             }
 808             break;
 809          case FP_OPCODE_PK4UB: /* pack four GLubytes */
 810             {
 811                GLfloat a[4], result[4];
 812                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 813                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 814                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 815                a[1] = CLAMP(a[1], 0.0F, 1.0F);
 816                a[2] = CLAMP(a[2], 0.0F, 1.0F);
 817                a[3] = CLAMP(a[3], 0.0F, 1.0F);
 818                ubx = IROUND(255.0F * a[0]);
 819                uby = IROUND(255.0F * a[1]);
 820                ubz = IROUND(255.0F * a[2]);
 821                ubw = IROUND(255.0F * a[3]);
 822                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 823                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 824                store_vector4( inst, machine, result );
 825             }
 826             break;
 827          case FP_OPCODE_POW:
 828             {
 829                GLfloat a[4], b[4], result[4];
 830                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 831                fetch_vector1( &inst->SrcReg[1], machine, program, b );
 832                result[0] = result[1] = result[2] = result[3]
 833                   = _mesa_pow(a[0], b[0]);
 834                store_vector4( inst, machine, result );
 835             }
 836             break;
 837          case FP_OPCODE_RCP:
 838             {
 839                GLfloat a[4], result[4];
 840                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 841 #if DEBUG_FRAG
 842                if (a[0] == 0)
 843                   printf("RCP(0)\n");
 844                else if (IS_INF_OR_NAN(a[0]))
 845                   printf("RCP(inf)\n");
 846 #endif
 847                result[0] = result[1] = result[2] = result[3]
 848                   = 1.0F / a[0];
 849                store_vector4( inst, machine, result );
 850             }
 851             break;
 852          case FP_OPCODE_RFL:
 853             {
 854                GLfloat axis[4], dir[4], result[4], tmp[4];
 855                fetch_vector4( &inst->SrcReg[0], machine, program, axis );
 856                fetch_vector4( &inst->SrcReg[1], machine, program, dir );
 857                tmp[3] = axis[0] * axis[0]
 858                       + axis[1] * axis[1]
 859                       + axis[2] * axis[2];
 860                tmp[0] = (2.0F * (axis[0] * dir[0] +
 861                                  axis[1] * dir[1] +
 862                                  axis[2] * dir[2])) / tmp[3];
 863                result[0] = tmp[0] * axis[0] - dir[0];
 864                result[1] = tmp[0] * axis[1] - dir[1];
 865                result[2] = tmp[0] * axis[2] - dir[2];
 866                /* result[3] is never written! XXX enforce in parser! */
 867                store_vector4( inst, machine, result );
 868             }
 869             break;
 870          case FP_OPCODE_RSQ: /* 1 / sqrt() */
 871             {
 872                GLfloat a[4], result[4];
 873                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 874                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
 875                store_vector4( inst, machine, result );
 876 #if DEBUG_FRAG
 877                printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
 878 #endif
 879             }
 880             break;
 881          case FP_OPCODE_SEQ: /* set on equal */
 882             {
 883                GLfloat a[4], b[4], result[4];
 884                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 885                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 886                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
 887                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
 888                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
 889                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
 890                store_vector4( inst, machine, result );
 891             }
 892             break;
 893          case FP_OPCODE_SFL: /* set false, operands ignored */
 894             {
 895                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
 896                store_vector4( inst, machine, result );
 897             }
 898             break;
 899          case FP_OPCODE_SGE: /* set on greater or equal */
 900             {
 901                GLfloat a[4], b[4], result[4];
 902                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 903                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 904                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
 905                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
 906                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
 907                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
 908                store_vector4( inst, machine, result );
 909             }
 910             break;
 911          case FP_OPCODE_SGT: /* set on greater */
 912             {
 913                GLfloat a[4], b[4], result[4];
 914                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 915                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 916                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
 917                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
 918                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
 919                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
 920                store_vector4( inst, machine, result );
 921             }
 922             break;
 923          case FP_OPCODE_SIN:
 924             {
 925                GLfloat a[4], result[4];
 926                fetch_vector1( &inst->SrcReg[0], machine, program, a );
 927                result[0] = result[1] = result[2] = result[3] = _mesa_sin(a[0]);
 928                store_vector4( inst, machine, result );
 929             }
 930             break;
 931          case FP_OPCODE_SLE: /* set on less or equal */
 932             {
 933                GLfloat a[4], b[4], result[4];
 934                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 935                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 936                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
 937                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
 938                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
 939                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
 940                store_vector4( inst, machine, result );
 941             }
 942             break;
 943          case FP_OPCODE_SLT: /* set on less */
 944             {
 945                GLfloat a[4], b[4], result[4];
 946                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 947                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 948                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
 949                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
 950                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
 951                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
 952                store_vector4( inst, machine, result );
 953             }
 954             break;
 955          case FP_OPCODE_SNE: /* set on not equal */
 956             {
 957                GLfloat a[4], b[4], result[4];
 958                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 959                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 960                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
 961                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
 962                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
 963                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
 964                store_vector4( inst, machine, result );
 965             }
 966             break;
 967          case FP_OPCODE_STR: /* set true, operands ignored */
 968             {
 969                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
 970                store_vector4( inst, machine, result );
 971             }
 972             break;
 973          case FP_OPCODE_SUB:
 974             {
 975                GLfloat a[4], b[4], result[4];
 976                fetch_vector4( &inst->SrcReg[0], machine, program, a );
 977                fetch_vector4( &inst->SrcReg[1], machine, program, b );
 978                result[0] = a[0] - b[0];
 979                result[1] = a[1] - b[1];
 980                result[2] = a[2] - b[2];
 981                result[3] = a[3] - b[3];
 982                store_vector4( inst, machine, result );
 983             }
 984             break;
 985          case FP_OPCODE_TEX:
 986             /* Texel lookup */
 987             {
 988                GLfloat texcoord[4], color[4];
 989                fetch_vector4( &inst->SrcReg[0], machine, program, texcoord );
 990                /* XXX: Undo perspective divide from interpolate_texcoords() */
 991                fetch_texel( ctx, texcoord,
 992                             span->array->lambda[inst->TexSrcUnit][column],
 993                             inst->TexSrcUnit, color );
 994                store_vector4( inst, machine, color );
 995             }
 996             break;
 997          case FP_OPCODE_TXD:
 998             /* Texture lookup w/ partial derivatives for LOD */
 999             {
1000                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1001                fetch_vector4( &inst->SrcReg[0], machine, program, texcoord );
1002                fetch_vector4( &inst->SrcReg[1], machine, program, dtdx );
1003                fetch_vector4( &inst->SrcReg[2], machine, program, dtdy );
1004                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1005                                   color );
1006                store_vector4( inst, machine, color );
1007             }
1008             break;
1009          case FP_OPCODE_TXP:
1010             /* Texture lookup w/ perspective divide */
1011             {
1012                GLfloat texcoord[4], color[4];
1013                fetch_vector4( &inst->SrcReg[0], machine, program, texcoord );
1014                /* Already did perspective divide in interpolate_texcoords() */
1015                fetch_texel( ctx, texcoord,
1016                             span->array->lambda[inst->TexSrcUnit][column],
1017                             inst->TexSrcUnit, color );
1018                store_vector4( inst, machine, color );
1019             }
1020             break;
1021          case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1022             /* XXX this is probably wrong */
1023             {
1024                GLfloat a[4], result[4];
1025                const GLuint *rawBits = (const GLuint *) a;
1026                GLuint *rawResult = (GLuint *) result;
1027                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1028                rawResult[0] = rawBits[0] & 0xffff;
1029                rawResult[1] = (rawBits[0] >> 16) & 0xffff;
1030                rawResult[2] = rawBits[0] & 0xffff;
1031                rawResult[3] = (rawBits[0] >> 16) & 0xffff;
1032                store_vector4( inst, machine, result );
1033             }
1034             break;
1035          case FP_OPCODE_UP2US: /* unpack two GLushorts */
1036             {
1037                GLfloat a[4], result[4];
1038                const GLuint *rawBits = (const GLuint *) a;
1039                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1040                result[0] = (GLfloat) ((rawBits[0] >>  0) & 0xffff) / 65535.0F;
1041                result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
1042                result[2] = result[0];
1043                result[3] = result[1];
1044                store_vector4( inst, machine, result );
1045             }
1046             break;
1047          case FP_OPCODE_UP4B: /* unpack four GLbytes */
1048             {
1049                GLfloat a[4], result[4];
1050                const GLuint *rawBits = (const GLuint *) a;
1051                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1052                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1053                result[0] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1054                result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1055                result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1056                store_vector4( inst, machine, result );
1057             }
1058             break;
1059          case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1060             {
1061                GLfloat a[4], result[4];
1062                const GLuint *rawBits = (const GLuint *) a;
1063                fetch_vector1( &inst->SrcReg[0], machine, program, a );
1064                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1065                result[0] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1066                result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1067                result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1068                store_vector4( inst, machine, result );
1069             }
1070             break;
1071          case FP_OPCODE_X2D: /* 2-D matrix transform */
1072             {
1073                GLfloat a[4], b[4], c[4], result[4];
1074                fetch_vector4( &inst->SrcReg[0], machine, program, a );
1075                fetch_vector4( &inst->SrcReg[1], machine, program, b );
1076                fetch_vector4( &inst->SrcReg[2], machine, program, c );
1077                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1078                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1079                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1080                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1081                store_vector4( inst, machine, result );
1082             }
1083             break;
1084          case FP_OPCODE_END:
1085             return GL_TRUE;
1086          default:
1087             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1088                           inst->Opcode);
1089             return GL_TRUE; /* return value doesn't matter */
1090       }
1091    }
1092    return GL_TRUE;
1093 }
1094
1095
1096 static void
1097 init_machine( GLcontext *ctx, struct fp_machine *machine,
1098               const struct fragment_program *program,
1099               const struct sw_span *span, GLuint col )
1100 {
1101    GLuint j, u;
1102
1103    /* Clear temporary registers */
1104    _mesa_bzero(machine->Registers + FP_TEMP_REG_START,
1105                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1106
1107    /* Load program local parameters */
1108    for (j = 0; j < MAX_NV_FRAGMENT_PROGRAM_PARAMS; j++) {
1109       COPY_4V(machine->Registers[FP_PROG_REG_START + j],
1110               program->Base.LocalParams[j]);
1111    }
1112
1113    /* Load input registers */
1114    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1115       GLfloat *wpos = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_WPOS];
1116       wpos[0] = span->x + col;
1117       wpos[1] = span->y;
1118       wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1119       wpos[3] = span->w + col * span->dwdx;
1120    }
1121    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
1122       GLfloat *col0 = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL0];
1123       col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1124       col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1125       col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1126       col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1127    }
1128    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
1129       GLfloat *col1 = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL1];
1130       col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1131       col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1132       col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1133       col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1134    }
1135    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1136       GLfloat *fogc = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_FOGC];
1137       fogc[0] = span->array->fog[col];
1138       fogc[1] = 0.0F;
1139       fogc[2] = 0.0F;
1140       fogc[3] = 0.0F;
1141    }
1142    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1143       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1144          GLfloat *tex = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_TEX0+u];
1145          ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));
1146          COPY_4V(tex, span->array->texcoords[u][col]);
1147          ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);
1148       }
1149    }
1150 }
1151
1152
1153 void
1154 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1155 {
1156    const struct fragment_program *program = ctx->FragmentProgram.Current;
1157    GLuint i;
1158
1159    for (i = 0; i < span->end; i++) {
1160       if (span->array->mask[i]) {
1161          init_machine(ctx, &ctx->FragmentProgram.Machine,
1162                       ctx->FragmentProgram.Current, span, i);
1163
1164          if (!execute_program(ctx, program, ~0,
1165                               &ctx->FragmentProgram.Machine, span, i))
1166             span->array->mask[i] = GL_FALSE;  /* killed fragment */
1167
1168          /* Store output registers */
1169          {
1170             const GLfloat *colOut
1171                = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
1172             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1173             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1174             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1175             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1176          }
1177          /* depth value */
1178          if (ctx->FragmentProgram.Current->OutputsWritten & 2)
1179             span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START + 2][0] * ctx->DepthMaxF);
1180       }
1181    }
1182 }
1183