src/mesa/swrast/s_nvfragprog.c

   1 /* $Id: s_nvfragprog.c,v 1.11 2003/03/19 07:15:35 joukj Exp $ */
   2
   3 /*
   4  * Mesa 3-D graphics library
   5  * Version:  5.1
   6  *
   7  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included
  17  * in all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  23  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27
  28
  29 #include "glheader.h"
  30 #include "colormac.h"
  31 #include "context.h"
  32 #include "nvfragprog.h"
  33 #include "macros.h"
  34
  35 #include "s_nvfragprog.h"
  36 #include "s_span.h"
  37 #include "s_texture.h"
  38
  39
  40 /**
  41  * Fetch a texel.
  42  */
  43 static void
  44 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLuint unit,
  45              GLfloat color[4] )
  46 {
  47    const GLfloat *lambda = NULL;
  48    GLchan rgba[4];
  49    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  50
  51    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  52                                1, (const GLfloat (*)[4]) texcoord,
  53                                lambda, &rgba);
  54    color[0] = CHAN_TO_FLOAT(rgba[0]);
  55    color[1] = CHAN_TO_FLOAT(rgba[1]);
  56    color[2] = CHAN_TO_FLOAT(rgba[2]);
  57    color[3] = CHAN_TO_FLOAT(rgba[3]);
  58 }
  59
  60
  61 /**
  62  * Fetch a texel with the given partial derivatives to compute a level
  63  * of detail in the mipmap.
  64  */
  65 static void
  66 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
  67                    const GLfloat texdx[4], const GLfloat texdy[4],
  68                    GLuint unit, GLfloat color[4] )
  69 {
  70    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  71    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
  72    const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
  73    const GLfloat texW = (GLfloat) texImg->WidthScale;
  74    const GLfloat texH = (GLfloat) texImg->HeightScale;
  75    GLchan rgba[4];
  76
  77    GLfloat lambda = _mesa_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
  78                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
  79                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
  80                                          texW, texH,
  81                                          texcoord[0], texcoord[1], texcoord[3],
  82                                          1.0F / texcoord[3]);
  83
  84    swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
  85                                1, (const GLfloat (*)[4]) texcoord,
  86                                &lambda, &rgba);
  87    color[0] = CHAN_TO_FLOAT(rgba[0]);
  88    color[1] = CHAN_TO_FLOAT(rgba[1]);
  89    color[2] = CHAN_TO_FLOAT(rgba[2]);
  90    color[3] = CHAN_TO_FLOAT(rgba[3]);
  91 }
  92
  93
  94
  95 /**
  96  * Fetch a 4-element float vector from the given source register.
  97  * Apply swizzling and negating as needed.
  98  */
  99 static void
 100 fetch_vector4( const struct fp_src_register *source,
 101                const struct fp_machine *machine,
 102                GLfloat result[4] )
 103 {
 104    const GLfloat *src = machine->Registers[source->Register];
 105
 106    result[0] = src[source->Swizzle[0]];
 107    result[1] = src[source->Swizzle[1]];
 108    result[2] = src[source->Swizzle[2]];
 109    result[3] = src[source->Swizzle[3]];
 110
 111    if (source->NegateBase) {
 112       result[0] = -result[0];
 113       result[1] = -result[1];
 114       result[2] = -result[2];
 115       result[3] = -result[3];
 116    }
 117    if (source->Abs) {
 118       result[0] = FABSF(result[0]);
 119       result[1] = FABSF(result[1]);
 120       result[2] = FABSF(result[2]);
 121       result[3] = FABSF(result[3]);
 122    }
 123    if (source->NegateAbs) {
 124       result[0] = -result[0];
 125       result[1] = -result[1];
 126       result[2] = -result[2];
 127       result[3] = -result[3];
 128    }
 129 }
 130
 131
 132 /**
 133  * Fetch the derivative with respect to X for the given register.
 134  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 135  * need to execute another instance of the program (ugh)!
 136  */
 137 static GLboolean
 138 fetch_vector4_deriv( const struct fp_src_register *source,
 139                      const struct sw_span *span,
 140                      char xOrY, GLfloat result[4] )
 141 {
 142    GLfloat src[4];
 143
 144    ASSERT(xOrY == 'X' || xOrY == 'Y');
 145
 146    switch (source->Register) {
 147    case FRAG_ATTRIB_WPOS:
 148       if (xOrY == 'X') {
 149          src[0] = 1.0;
 150          src[1] = 0.0;
 151          src[2] = span->dzdx;
 152          src[3] = span->dwdx;
 153       }
 154       else {
 155          src[0] = 0.0;
 156          src[1] = 1.0;
 157          src[2] = span->dzdy;
 158          src[3] = span->dwdy;
 159       }
 160       break;
 161    case FRAG_ATTRIB_COL0:
 162       if (xOrY == 'X') {
 163          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 164          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 165          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 166          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 167       }
 168       else {
 169          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 170          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 171          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 172          src[3] = span->dady * (1.0F / CHAN_MAXF);
 173       }
 174       break;
 175    case FRAG_ATTRIB_COL1:
 176       if (xOrY == 'X') {
 177          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 178          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 179          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 180          src[3] = 0.0; /* XXX need this */
 181       }
 182       else {
 183          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 184          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 185          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 186          src[3] = 0.0; /* XXX need this */
 187       }
 188       break;
 189    case FRAG_ATTRIB_FOGC:
 190       if (xOrY == 'X') {
 191          src[0] = span->dfogdx;
 192          src[1] = 0.0;
 193          src[2] = 0.0;
 194          src[3] = 0.0;
 195       }
 196       else {
 197          src[0] = span->dfogdy;
 198          src[1] = 0.0;
 199          src[2] = 0.0;
 200          src[3] = 0.0;
 201       }
 202       break;
 203    case FRAG_ATTRIB_TEX0:
 204    case FRAG_ATTRIB_TEX1:
 205    case FRAG_ATTRIB_TEX2:
 206    case FRAG_ATTRIB_TEX3:
 207    case FRAG_ATTRIB_TEX4:
 208    case FRAG_ATTRIB_TEX5:
 209    case FRAG_ATTRIB_TEX6:
 210    case FRAG_ATTRIB_TEX7:
 211       if (xOrY == 'X') {
 212          const GLuint u = source->Register - FRAG_ATTRIB_TEX0;
 213          src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
 214          src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
 215          src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
 216          src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
 217       }
 218       else {
 219          const GLuint u = source->Register - FRAG_ATTRIB_TEX0;
 220          src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
 221          src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
 222          src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
 223          src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
 224       }
 225       break;
 226    default:
 227       return GL_FALSE;
 228    }
 229
 230    result[0] = src[source->Swizzle[0]];
 231    result[1] = src[source->Swizzle[1]];
 232    result[2] = src[source->Swizzle[2]];
 233    result[3] = src[source->Swizzle[3]];
 234
 235    if (source->NegateBase) {
 236       result[0] = -result[0];
 237       result[1] = -result[1];
 238       result[2] = -result[2];
 239       result[3] = -result[3];
 240    }
 241    if (source->Abs) {
 242       result[0] = FABSF(result[0]);
 243       result[1] = FABSF(result[1]);
 244       result[2] = FABSF(result[2]);
 245       result[3] = FABSF(result[3]);
 246    }
 247    if (source->NegateAbs) {
 248       result[0] = -result[0];
 249       result[1] = -result[1];
 250       result[2] = -result[2];
 251       result[3] = -result[3];
 252    }
 253    return GL_TRUE;
 254 }
 255
 256
 257 /**
 258  * As above, but only return result[0] element.
 259  */
 260 static void
 261 fetch_vector1( const struct fp_src_register *source,
 262                const struct fp_machine *machine,
 263                GLfloat result[4] )
 264 {
 265    const GLfloat *src = machine->Registers[source->Register];
 266
 267    result[0] = src[source->Swizzle[0]];
 268
 269    if (source->NegateBase) {
 270       result[0] = -result[0];
 271    }
 272    if (source->Abs) {
 273       result[0] = FABSF(result[0]);
 274    }
 275    if (source->NegateAbs) {
 276       result[0] = -result[0];
 277    }
 278 }
 279
 280
 281 /*
 282  * Test value against zero and return GT, LT, EQ or UN if NaN.
 283  */
 284 static INLINE GLuint
 285 generate_cc( float value )
 286 {
 287    if (value != value)
 288       return COND_UN;  /* NaN */
 289    if (value > 0.0F)
 290       return COND_GT;
 291    if (value < 0.0F)
 292       return COND_LT;
 293    return COND_EQ;
 294 }
 295
 296 /*
 297  * Test if the ccMaskRule is satisfied by the given condition code.
 298  * Used to mask destination writes according to the current condition codee.
 299  */
 300 static INLINE GLboolean
 301 test_cc(GLuint condCode, GLuint ccMaskRule)
 302 {
 303    switch (ccMaskRule) {
 304    case COND_EQ: return (condCode == COND_EQ);
 305    case COND_NE: return (condCode != COND_EQ);
 306    case COND_LT: return (condCode == COND_LT);
 307    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 308    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 309    case COND_GT: return (condCode == COND_GT);
 310    case COND_TR: return GL_TRUE;
 311    case COND_FL: return GL_FALSE;
 312    default:      return GL_TRUE;
 313    }
 314 }
 315
 316
 317 /**
 318  * Store 4 floats into a register.  Observe the instructions saturate and
 319  * set-condition-code flags.
 320  */
 321 static void
 322 store_vector4( const struct fp_instruction *inst,
 323                struct fp_machine *machine,
 324                const GLfloat value[4] )
 325 {
 326    const struct fp_dst_register *dest = &(inst->DstReg);
 327    const GLboolean clamp = inst->Saturate;
 328    const GLboolean updateCC = inst->UpdateCondRegister;
 329    GLfloat *dstReg = machine->Registers[dest->Register];
 330    GLfloat clampedValue[4];
 331    const GLboolean *writeMask = dest->WriteMask;
 332    GLboolean condWriteMask[4];
 333
 334    if (clamp) {
 335       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 336       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 337       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 338       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 339       value = clampedValue;
 340    }
 341
 342    if (dest->CondMask != COND_TR) {
 343       condWriteMask[0] = writeMask[0]
 344          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 345       condWriteMask[1] = writeMask[1]
 346          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 347       condWriteMask[2] = writeMask[2]
 348          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 349       condWriteMask[3] = writeMask[3]
 350          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 351       writeMask = condWriteMask;
 352    }
 353
 354    if (writeMask[0]) {
 355       dstReg[0] = value[0];
 356       if (updateCC)
 357          machine->CondCodes[0] = generate_cc(value[0]);
 358    }
 359    if (writeMask[1]) {
 360       dstReg[1] = value[1];
 361       if (updateCC)
 362          machine->CondCodes[1] = generate_cc(value[1]);
 363    }
 364    if (writeMask[2]) {
 365       dstReg[2] = value[2];
 366       if (updateCC)
 367          machine->CondCodes[2] = generate_cc(value[2]);
 368    }
 369    if (writeMask[3]) {
 370       dstReg[3] = value[3];
 371       if (updateCC)
 372          machine->CondCodes[3] = generate_cc(value[3]);
 373    }
 374 }
 375
 376
 377 /**
 378  * Initialize a new machine state instance from an existing one, adding
 379  * the partial derivatives onto the input registers.
 380  * Used to implement DDX and DDY instructions in non-trivial cases.
 381  */
 382 static void
 383 init_machine_deriv( GLcontext *ctx,
 384                     const struct fp_machine *machine,
 385                     const struct fragment_program *program,
 386                     const struct sw_span *span, char xOrY,
 387                     struct fp_machine *dMachine )
 388 {
 389    GLuint u;
 390
 391    ASSERT(xOrY == 'X' || xOrY == 'Y');
 392
 393    /* copy existing machine */
 394    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 395
 396    /* Clear temporary registers */
 397    _mesa_bzero((GLfloat*) (machine->Registers + FP_TEMP_REG_START) ,
 398                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 399
 400    /* Add derivatives */
 401    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 402       GLfloat *wpos = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_WPOS];
 403       if (xOrY == 'X') {
 404          wpos[0] += 1.0F;
 405          wpos[1] += 0.0F;
 406          wpos[2] += span->dzdx;
 407          wpos[3] += span->dwdx;
 408       }
 409       else {
 410          wpos[0] += 0.0F;
 411          wpos[1] += 1.0F;
 412          wpos[2] += span->dzdy;
 413          wpos[3] += span->dwdy;
 414       }
 415    }
 416    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 417       GLfloat *col0 = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL0];
 418       if (xOrY == 'X') {
 419          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 420          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 421          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 422          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 423       }
 424       else {
 425          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 426          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 427          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 428          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 429       }
 430    }
 431    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 432       GLfloat *col1 = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL1];
 433       if (xOrY == 'X') {
 434          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 435          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 436          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 437          col1[3] += 0.0; /*XXX fix */
 438       }
 439       else {
 440          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 441          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 442          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 443          col1[3] += 0.0; /*XXX fix */
 444       }
 445    }
 446    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 447       GLfloat *fogc = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_FOGC];
 448       if (xOrY == 'X') {
 449          fogc[0] += span->dfogdx;
 450       }
 451       else {
 452          fogc[0] += span->dfogdy;
 453       }
 454    }
 455    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 456       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 457          GLfloat *tex = (GLfloat*) machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_TEX0+u];
 458          if (xOrY == 'X') {
 459             tex[0] += span->texStepX[u][0];
 460             tex[1] += span->texStepX[u][1];
 461             tex[2] += span->texStepX[u][2];
 462             tex[3] += span->texStepX[u][3];
 463          }
 464          else {
 465             tex[0] += span->texStepY[u][0];
 466             tex[1] += span->texStepY[u][1];
 467             tex[2] += span->texStepY[u][2];
 468             tex[3] += span->texStepY[u][3];
 469          }
 470       }
 471    }
 472 }
 473
 474
 475 /**
 476  * Execute the given vertex program.
 477  * NOTE: we do everything in single-precision floating point; we don't
 478  * currently observe the single/half/fixed-precision qualifiers.
 479  * \param ctx - rendering context
 480  * \param program - the fragment program to execute
 481  * \param machine - machine state (register file)
 482  * \param maxInst - max number of instructions to execute
 483  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 484  */
 485 static GLboolean
 486 execute_program( GLcontext *ctx,
 487                  const struct fragment_program *program, GLuint maxInst,
 488                  struct fp_machine *machine, const struct sw_span *span )
 489 {
 490    GLuint pc = 0;
 491
 492    for (pc = 0; pc < maxInst; pc++) {
 493       const struct fp_instruction *inst = program->Instructions + pc;
 494       switch (inst->Opcode) {
 495          case FP_OPCODE_ADD:
 496             {
 497                GLfloat a[4], b[4], result[4];
 498                fetch_vector4( &inst->SrcReg[0], machine, a );
 499                fetch_vector4( &inst->SrcReg[1], machine, b );
 500                result[0] = a[0] + b[0];
 501                result[1] = a[1] + b[1];
 502                result[2] = a[2] + b[2];
 503                result[3] = a[3] + b[3];
 504                store_vector4( inst, machine, result );
 505             }
 506             break;
 507          case FP_OPCODE_COS:
 508             {
 509                GLfloat a[4], result[4];
 510                fetch_vector1( &inst->SrcReg[0], machine, a );
 511                result[0] = result[1] = result[2] = result[3] = _mesa_cos(a[0]);
 512                store_vector4( inst, machine, result );
 513             }
 514             break;
 515          case FP_OPCODE_DDX: /* Partial derivative with respect to X */
 516             {
 517                GLfloat a[4], aNext[4], result[4];
 518                struct fp_machine dMachine;
 519                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
 520                   /* This is tricky.  Make a copy of the current machine state,
 521                    * increment the input registers by the dx or dy partial
 522                    * derivatives, then re-execute the program up to the
 523                    * preceeding instruction, then fetch the source register.
 524                    * Finally, find the difference in the register values for
 525                    * the original and derivative runs.
 526                    */
 527                   init_machine_deriv(ctx, machine, program, span,
 528                                      'X', &dMachine);
 529                   execute_program(ctx, program, pc, &dMachine, span);
 530                   fetch_vector4( &inst->SrcReg[0], &dMachine, aNext );
 531                   result[0] = aNext[0] - a[0];
 532                   result[1] = aNext[1] - a[1];
 533                   result[2] = aNext[2] - a[2];
 534                   result[3] = aNext[3] - a[3];
 535                }
 536                store_vector4( inst, machine, result );
 537             }
 538             break;
 539          case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
 540             {
 541                GLfloat a[4], aNext[4], result[4];
 542                struct fp_machine dMachine;
 543                if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
 544                   init_machine_deriv(ctx, machine, program, span,
 545                                      'Y', &dMachine);
 546                   execute_program(ctx, program, pc, &dMachine, span);
 547                   fetch_vector4( &inst->SrcReg[0], &dMachine, aNext );
 548                   result[0] = aNext[0] - a[0];
 549                   result[1] = aNext[1] - a[1];
 550                   result[2] = aNext[2] - a[2];
 551                   result[3] = aNext[3] - a[3];
 552                }
 553                store_vector4( inst, machine, result );
 554             }
 555             break;
 556          case FP_OPCODE_DP3:
 557             {
 558                GLfloat a[4], b[4], result[4];
 559                fetch_vector4( &inst->SrcReg[0], machine, a );
 560                fetch_vector4( &inst->SrcReg[1], machine, b );
 561                result[0] = result[1] = result[2] = result[3] =
 562                   a[0] + b[0] + a[1] * b[1] + a[2] * b[2];
 563                store_vector4( inst, machine, result );
 564             }
 565             break;
 566          case FP_OPCODE_DP4:
 567             {
 568                GLfloat a[4], b[4], result[4];
 569                fetch_vector4( &inst->SrcReg[0], machine, a );
 570                fetch_vector4( &inst->SrcReg[1], machine, b );
 571                result[0] = result[1] = result[2] = result[3] =
 572                   a[0] + b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 573                store_vector4( inst, machine, result );
 574             }
 575             break;
 576          case FP_OPCODE_DST: /* Distance vector */
 577             {
 578                GLfloat a[4], b[4], result[4];
 579                fetch_vector4( &inst->SrcReg[0], machine, a );
 580                fetch_vector4( &inst->SrcReg[1], machine, b );
 581                result[0] = 1.0F;
 582                result[1] = a[1] * b[1];
 583                result[2] = a[2];
 584                result[3] = b[3];
 585                store_vector4( inst, machine, result );
 586             }
 587             break;
 588          case FP_OPCODE_EX2: /* Exponential base 2 */
 589             {
 590                GLfloat a[4], result[4];
 591                fetch_vector1( &inst->SrcReg[0], machine, a );
 592                result[0] = result[1] = result[2] = result[3] =
 593                   (GLfloat) _mesa_pow(2.0, a[0]);
 594                store_vector4( inst, machine, result );
 595             }
 596             break;
 597          case FP_OPCODE_FLR:
 598             {
 599                GLfloat a[4], result[4];
 600                fetch_vector4( &inst->SrcReg[0], machine, a );
 601                result[0] = FLOORF(a[0]);
 602                result[1] = FLOORF(a[1]);
 603                result[2] = FLOORF(a[2]);
 604                result[3] = FLOORF(a[3]);
 605                store_vector4( inst, machine, result );
 606             }
 607             break;
 608          case FP_OPCODE_FRC:
 609             {
 610                GLfloat a[4], result[4];
 611                fetch_vector4( &inst->SrcReg[0], machine, a );
 612                result[0] = a[0] - FLOORF(a[0]);
 613                result[1] = a[1] - FLOORF(a[1]);
 614                result[2] = a[2] - FLOORF(a[2]);
 615                result[3] = a[3] - FLOORF(a[3]);
 616                store_vector4( inst, machine, result );
 617             }
 618             break;
 619          case FP_OPCODE_KIL:
 620             {
 621                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 622                const GLuint condMask = inst->DstReg.CondMask;
 623                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 624                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 625                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 626                    test_cc(machine->CondCodes[swizzle[3]], condMask))
 627                   return GL_FALSE;
 628             }
 629             break;
 630          case FP_OPCODE_LG2:  /* log base 2 */
 631             {
 632                GLfloat a[4], result[4];
 633                fetch_vector1( &inst->SrcReg[0], machine, a );
 634                result[0] = result[1] = result[2] = result[3]
 635                   = LOG2(a[0]);
 636                store_vector4( inst, machine, result );
 637             }
 638             break;
 639          case FP_OPCODE_LIT:
 640             {
 641                GLfloat a[4], result[4];
 642                fetch_vector4( &inst->SrcReg[0], machine, a );
 643                if (a[0] < 0.0F)
 644                   a[0] = 0.0F;
 645                if (a[1] < 0.0F)
 646                   a[1] = 0.0F;
 647                result[0] = 1.0F;
 648                result[1] = a[0];
 649                result[2] = (a[0] > 0.0) ? _mesa_pow(2.0, a[3]) : 0.0F;
 650                result[3] = 1.0F;
 651                store_vector4( inst, machine, result );
 652             }
 653             break;
 654          case FP_OPCODE_LRP:
 655             {
 656                GLfloat a[4], b[4], c[4], result[4];
 657                fetch_vector4( &inst->SrcReg[0], machine, a );
 658                fetch_vector4( &inst->SrcReg[1], machine, b );
 659                fetch_vector4( &inst->SrcReg[2], machine, c );
 660                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 661                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 662                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 663                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 664                store_vector4( inst, machine, result );
 665             }
 666             break;
 667          case FP_OPCODE_MAD:
 668             {
 669                GLfloat a[4], b[4], c[4], result[4];
 670                fetch_vector4( &inst->SrcReg[0], machine, a );
 671                fetch_vector4( &inst->SrcReg[1], machine, b );
 672                fetch_vector4( &inst->SrcReg[2], machine, c );
 673                result[0] = a[0] * b[0] + c[0];
 674                result[1] = a[1] * b[1] + c[1];
 675                result[2] = a[2] * b[2] + c[2];
 676                result[3] = a[3] * b[3] + c[3];
 677                store_vector4( inst, machine, result );
 678             }
 679             break;
 680          case FP_OPCODE_MAX:
 681             {
 682                GLfloat a[4], b[4], result[4];
 683                fetch_vector4( &inst->SrcReg[0], machine, a );
 684                fetch_vector4( &inst->SrcReg[1], machine, b );
 685                result[0] = MAX2(a[0], b[0]);
 686                result[1] = MAX2(a[1], b[1]);
 687                result[2] = MAX2(a[2], b[2]);
 688                result[3] = MAX2(a[3], b[3]);
 689                store_vector4( inst, machine, result );
 690             }
 691             break;
 692          case FP_OPCODE_MIN:
 693             {
 694                GLfloat a[4], b[4], result[4];
 695                fetch_vector4( &inst->SrcReg[0], machine, a );
 696                fetch_vector4( &inst->SrcReg[1], machine, b );
 697                result[0] = MIN2(a[0], b[0]);
 698                result[1] = MIN2(a[1], b[1]);
 699                result[2] = MIN2(a[2], b[2]);
 700                result[3] = MIN2(a[3], b[3]);
 701                store_vector4( inst, machine, result );
 702             }
 703             break;
 704          case FP_OPCODE_MOV:
 705             {
 706                GLfloat result[4];
 707                fetch_vector4( &inst->SrcReg[0], machine, result );
 708                store_vector4( inst, machine, result );
 709             }
 710             break;
 711          case FP_OPCODE_MUL:
 712             {
 713                GLfloat a[4], b[4], result[4];
 714                fetch_vector4( &inst->SrcReg[0], machine, a );
 715                fetch_vector4( &inst->SrcReg[1], machine, b );
 716                result[0] = a[0] * b[0];
 717                result[1] = a[1] * b[1];
 718                result[2] = a[2] * b[2];
 719                result[3] = a[3] * b[3];
 720                store_vector4( inst, machine, result );
 721             }
 722             break;
 723          case FP_OPCODE_PK2H: /* pack two 16-bit floats */
 724             /* XXX this is probably wrong */
 725             {
 726                GLfloat a[4], result[4];
 727                const GLuint *rawBits = (const GLuint *) a;
 728                GLuint *rawResult = (GLuint *) result;
 729                fetch_vector4( &inst->SrcReg[0], machine, a );
 730                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 731                   = rawBits[0] | (rawBits[1] << 16);
 732                store_vector4( inst, machine, result );
 733             }
 734             break;
 735          case FP_OPCODE_PK2US: /* pack two GLushorts */
 736             {
 737                GLfloat a[4], result[4];
 738                GLuint usx, usy, *rawResult = (GLuint *) result;
 739                fetch_vector4( &inst->SrcReg[0], machine, a );
 740                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 741                a[1] = CLAMP(a[0], 0.0F, 1.0F);
 742                usx = IROUND(a[0] * 65535.0F);
 743                usy = IROUND(a[1] * 65535.0F);
 744                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 745                   = usx | (usy << 16);
 746                store_vector4( inst, machine, result );
 747             }
 748             break;
 749          case FP_OPCODE_PK4B: /* pack four GLbytes */
 750             {
 751                GLfloat a[4], result[4];
 752                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 753                fetch_vector4( &inst->SrcReg[0], machine, a );
 754                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
 755                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
 756                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
 757                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
 758                ubx = IROUND(127.0F * a[0] + 128.0F);
 759                uby = IROUND(127.0F * a[1] + 128.0F);
 760                ubz = IROUND(127.0F * a[2] + 128.0F);
 761                ubw = IROUND(127.0F * a[3] + 128.0F);
 762                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 763                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 764                store_vector4( inst, machine, result );
 765             }
 766             break;
 767          case FP_OPCODE_PK4UB: /* pack four GLubytes */
 768             {
 769                GLfloat a[4], result[4];
 770                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 771                fetch_vector4( &inst->SrcReg[0], machine, a );
 772                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 773                a[1] = CLAMP(a[1], 0.0F, 1.0F);
 774                a[2] = CLAMP(a[2], 0.0F, 1.0F);
 775                a[3] = CLAMP(a[3], 0.0F, 1.0F);
 776                ubx = IROUND(255.0F * a[0]);
 777                uby = IROUND(255.0F * a[1]);
 778                ubz = IROUND(255.0F * a[2]);
 779                ubw = IROUND(255.0F * a[3]);
 780                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 781                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 782                store_vector4( inst, machine, result );
 783             }
 784             break;
 785          case FP_OPCODE_POW:
 786             {
 787                GLfloat a[4], b[4], result[4];
 788                fetch_vector1( &inst->SrcReg[0], machine, a );
 789                fetch_vector1( &inst->SrcReg[1], machine, b );
 790                result[0] = result[1] = result[2] = result[3]
 791                   = _mesa_pow(a[0], b[0]);
 792                store_vector4( inst, machine, result );
 793             }
 794             break;
 795          case FP_OPCODE_RCP:
 796             {
 797                GLfloat a[4], result[4];
 798                fetch_vector1( &inst->SrcReg[0], machine, a );
 799                result[0] = result[1] = result[2] = result[3]
 800                   = 1.0F / a[0];
 801                store_vector4( inst, machine, result );
 802             }
 803             break;
 804          case FP_OPCODE_RFL:
 805             {
 806                GLfloat axis[4], dir[4], result[4], tmp[4];
 807                fetch_vector4( &inst->SrcReg[0], machine, axis );
 808                fetch_vector4( &inst->SrcReg[1], machine, dir );
 809                tmp[3] = axis[0] * axis[0]
 810                       + axis[1] * axis[1]
 811                       + axis[2] * axis[2];
 812                tmp[0] = (2.0F * (axis[0] * dir[0] +
 813                                  axis[1] * dir[1] +
 814                                  axis[2] * dir[2])) / tmp[3];
 815                result[0] = tmp[0] * axis[0] - dir[0];
 816                result[1] = tmp[0] * axis[1] - dir[1];
 817                result[2] = tmp[0] * axis[2] - dir[2];
 818                /* result[3] is never written! XXX enforce in parser! */
 819                store_vector4( inst, machine, result );
 820             }
 821             break;
 822          case FP_OPCODE_RSQ: /* 1 / sqrt() */
 823             {
 824                GLfloat a[4], result[4];
 825                fetch_vector1( &inst->SrcReg[0], machine, a );
 826                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
 827                store_vector4( inst, machine, result );
 828             }
 829             break;
 830          case FP_OPCODE_SEQ: /* set on equal */
 831             {
 832                GLfloat a[4], b[4], result[4];
 833                fetch_vector4( &inst->SrcReg[0], machine, a );
 834                fetch_vector4( &inst->SrcReg[1], machine, b );
 835                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
 836                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
 837                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
 838                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
 839                store_vector4( inst, machine, result );
 840             }
 841             break;
 842          case FP_OPCODE_SFL: /* set false, operands ignored */
 843             {
 844                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
 845                store_vector4( inst, machine, result );
 846             }
 847             break;
 848          case FP_OPCODE_SGE: /* set on greater or equal */
 849             {
 850                GLfloat a[4], b[4], result[4];
 851                fetch_vector4( &inst->SrcReg[0], machine, a );
 852                fetch_vector4( &inst->SrcReg[1], machine, b );
 853                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
 854                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
 855                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
 856                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
 857                store_vector4( inst, machine, result );
 858             }
 859             break;
 860          case FP_OPCODE_SGT: /* set on greater */
 861             {
 862                GLfloat a[4], b[4], result[4];
 863                fetch_vector4( &inst->SrcReg[0], machine, a );
 864                fetch_vector4( &inst->SrcReg[1], machine, b );
 865                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
 866                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
 867                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
 868                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
 869                store_vector4( inst, machine, result );
 870             }
 871             break;
 872          case FP_OPCODE_SIN:
 873             {
 874                GLfloat a[4], result[4];
 875                fetch_vector1( &inst->SrcReg[0], machine, a );
 876                result[0] = result[1] = result[2] = result[3] = _mesa_sin(a[0]);
 877                store_vector4( inst, machine, result );
 878             }
 879             break;
 880          case FP_OPCODE_SLE: /* set on less or equal */
 881             {
 882                GLfloat a[4], b[4], result[4];
 883                fetch_vector4( &inst->SrcReg[0], machine, a );
 884                fetch_vector4( &inst->SrcReg[1], machine, b );
 885                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
 886                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
 887                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
 888                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
 889                store_vector4( inst, machine, result );
 890             }
 891             break;
 892          case FP_OPCODE_SLT: /* set on less */
 893             {
 894                GLfloat a[4], b[4], result[4];
 895                fetch_vector4( &inst->SrcReg[0], machine, a );
 896                fetch_vector4( &inst->SrcReg[1], machine, b );
 897                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
 898                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
 899                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
 900                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
 901                store_vector4( inst, machine, result );
 902             }
 903             break;
 904          case FP_OPCODE_SNE: /* set on not equal */
 905             {
 906                GLfloat a[4], b[4], result[4];
 907                fetch_vector4( &inst->SrcReg[0], machine, a );
 908                fetch_vector4( &inst->SrcReg[1], machine, b );
 909                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
 910                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
 911                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
 912                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
 913                store_vector4( inst, machine, result );
 914             }
 915             break;
 916          case FP_OPCODE_STR: /* set true, operands ignored */
 917             {
 918                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
 919                store_vector4( inst, machine, result );
 920             }
 921             break;
 922          case FP_OPCODE_SUB:
 923             {
 924                GLfloat a[4], b[4], result[4];
 925                fetch_vector4( &inst->SrcReg[0], machine, a );
 926                fetch_vector4( &inst->SrcReg[1], machine, b );
 927                result[0] = a[0] - b[0];
 928                result[1] = a[1] - b[1];
 929                result[2] = a[2] - b[2];
 930                result[3] = a[3] - b[3];
 931                store_vector4( inst, machine, result );
 932             }
 933             break;
 934          case FP_OPCODE_TEX:
 935             /* Texel lookup */
 936             {
 937                GLfloat texcoord[4], color[4];
 938                fetch_vector4( &inst->SrcReg[0], machine, texcoord );
 939                /* XXX: Undo perspective divide from interpolate_texcoords() */
 940                fetch_texel( ctx, texcoord, inst->TexSrcUnit, color );
 941                store_vector4( inst, machine, color );
 942             }
 943             break;
 944          case FP_OPCODE_TXD:
 945             /* Texture lookup w/ partial derivatives for LOD */
 946             {
 947                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
 948                fetch_vector4( &inst->SrcReg[0], machine, texcoord );
 949                fetch_vector4( &inst->SrcReg[1], machine, dtdx );
 950                fetch_vector4( &inst->SrcReg[2], machine, dtdy );
 951                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
 952                                   color );
 953                store_vector4( inst, machine, color );
 954             }
 955             break;
 956          case FP_OPCODE_TXP:
 957             /* Texture lookup w/ perspective divide */
 958             {
 959                GLfloat texcoord[4], color[4];
 960                fetch_vector4( &inst->SrcReg[0], machine, texcoord );
 961                /* Already did perspective divide in interpolate_texcoords() */
 962                fetch_texel( ctx, texcoord, inst->TexSrcUnit, color );
 963                store_vector4( inst, machine, color );
 964             }
 965             break;
 966          case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
 967             /* XXX this is probably wrong */
 968             {
 969                GLfloat a[4], result[4];
 970                const GLuint *rawBits = (const GLuint *) a;
 971                GLuint *rawResult = (GLuint *) result;
 972                fetch_vector1( &inst->SrcReg[0], machine, a );
 973                rawResult[0] = rawBits[0] & 0xffff;
 974                rawResult[1] = (rawBits[0] >> 16) & 0xffff;
 975                rawResult[2] = rawBits[0] & 0xffff;
 976                rawResult[3] = (rawBits[0] >> 16) & 0xffff;
 977                store_vector4( inst, machine, result );
 978             }
 979             break;
 980          case FP_OPCODE_UP2US: /* unpack two GLushorts */
 981             {
 982                GLfloat a[4], result[4];
 983                const GLuint *rawBits = (const GLuint *) a;
 984                fetch_vector1( &inst->SrcReg[0], machine, a );
 985                result[0] = (GLfloat) ((rawBits[0] >>  0) & 0xffff) / 65535.0F;
 986                result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
 987                result[2] = result[0];
 988                result[3] = result[1];
 989                store_vector4( inst, machine, result );
 990             }
 991             break;
 992          case FP_OPCODE_UP4B: /* unpack four GLbytes */
 993             {
 994                GLfloat a[4], result[4];
 995                const GLuint *rawBits = (const GLuint *) a;
 996                fetch_vector1( &inst->SrcReg[0], machine, a );
 997                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
 998                result[0] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
 999                result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1000                result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1001                store_vector4( inst, machine, result );
1002             }
1003             break;
1004          case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1005             {
1006                GLfloat a[4], result[4];
1007                const GLuint *rawBits = (const GLuint *) a;
1008                fetch_vector1( &inst->SrcReg[0], machine, a );
1009                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1010                result[0] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1011                result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1012                result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1013                store_vector4( inst, machine, result );
1014             }
1015             break;
1016          case FP_OPCODE_X2D: /* 2-D matrix transform */
1017             {
1018                GLfloat a[4], b[4], c[4], result[4];
1019                fetch_vector4( &inst->SrcReg[0], machine, a );
1020                fetch_vector4( &inst->SrcReg[1], machine, b );
1021                fetch_vector4( &inst->SrcReg[2], machine, c );
1022                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1023                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1024                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1025                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1026                store_vector4( inst, machine, result );
1027             }
1028             break;
1029          case FP_OPCODE_END:
1030             return GL_TRUE;
1031          default:
1032             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1033                           inst->Opcode);
1034             return GL_TRUE; /* return value doesn't matter */
1035       }
1036    }
1037    return GL_TRUE;
1038 }
1039
1040
1041 static void
1042 init_machine( GLcontext *ctx, struct fp_machine *machine,
1043               const struct fragment_program *program,
1044               const struct sw_span *span, GLuint col )
1045 {
1046    GLuint j, u;
1047
1048    /* Clear temporary registers */
1049    _mesa_bzero(machine->Registers + FP_TEMP_REG_START,
1050                MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1051
1052    /* Load program local parameters */
1053    for (j = 0; j < MAX_NV_FRAGMENT_PROGRAM_PARAMS; j++) {
1054       COPY_4V(machine->Registers[FP_PROG_REG_START + j],
1055               program->LocalParams[j]);
1056    }
1057
1058    /* Load input registers */
1059    if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1060       GLfloat *wpos = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_WPOS];
1061       wpos[0] = span->x + col;
1062       wpos[1] = span->y;
1063       wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1064       wpos[3] = span->w + col * span->dwdx;
1065    }
1066    if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
1067       GLfloat *col0 = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL0];
1068       col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1069       col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1070       col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1071       col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1072    }
1073    if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
1074       GLfloat *col1 = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_COL1];
1075       col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1076       col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1077       col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1078       col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1079    }
1080    if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1081       GLfloat *fogc = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_FOGC];
1082       fogc[0] = span->array->fog[col];
1083       fogc[1] = 0.0F;
1084       fogc[2] = 0.0F;
1085       fogc[3] = 0.0F;
1086    }
1087    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1088       if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1089          GLfloat *tex = machine->Registers[FP_INPUT_REG_START+FRAG_ATTRIB_TEX0+u];
1090          if (ctx->Texture.Unit[u]._ReallyEnabled) {
1091             COPY_4V(tex, span->array->texcoords[u][col]);
1092          }
1093          else {
1094             COPY_4V(tex, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]);
1095          }
1096       }
1097    }
1098 }
1099
1100
1101 void
1102 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1103 {
1104    const struct fragment_program *program = ctx->FragmentProgram.Current;
1105    GLuint i;
1106
1107    for (i = 0; i < span->end; i++) {
1108       if (span->array->mask[i]) {
1109          init_machine(ctx, &ctx->FragmentProgram.Machine,
1110                       ctx->FragmentProgram.Current, span, i);
1111
1112          if (!execute_program(ctx, program, ~0,
1113                               &ctx->FragmentProgram.Machine, span))
1114             span->array->mask[i] = GL_FALSE;  /* killed fragment */
1115
1116          /* Store output registers */
1117          {
1118             const GLfloat *colOut
1119                = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
1120             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1121             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1122             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1123             UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1124          }
1125          /* depth value */
1126          if (ctx->FragmentProgram.Current->OutputsWritten & 2)
1127             span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START + 2][0] * ctx->DepthMaxF);
1128       }
1129    }
1130 }
1131