src/mesa/swrast/s_nvfragprog.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.5.2
   4  *
   5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /*
  26  * Regarding GL_NV_fragment_program:
  27  *
  28  * Portions of this software may use or implement intellectual
  29  * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
  30  * any and all warranties with respect to such intellectual property,
  31  * including any use thereof or modifications thereto.
  32  */
  33
  34 #include "glheader.h"
  35 #include "colormac.h"
  36 #include "context.h"
  37 #include "program_instruction.h"
  38 #include "program.h"
  39
  40 #include "s_nvfragprog.h"
  41 #include "s_span.h"
  42
  43
  44 /* See comments below for info about this */
  45 #define LAMBDA_ZERO 1
  46
  47 /* debug predicate */
  48 #define DEBUG_FRAG 0
  49
  50
  51 /**
  52  * Virtual machine state used during execution of a fragment programs.
  53  */
  54 struct fp_machine
  55 {
  56    GLfloat Temporaries[MAX_NV_FRAGMENT_PROGRAM_TEMPS][4];
  57    GLfloat Inputs[MAX_NV_FRAGMENT_PROGRAM_INPUTS][4];
  58    GLfloat Outputs[MAX_NV_FRAGMENT_PROGRAM_OUTPUTS][4];
  59    GLuint CondCodes[4];  /**< COND_* value for x/y/z/w */
  60
  61    GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */
  62    GLuint StackDepth; /**< Index/ptr to top of CallStack[] */
  63 };
  64
  65
  66 #if FEATURE_MESA_program_debug
  67 static struct fp_machine *CurrentMachine = NULL;
  68
  69 /**
  70  * For GL_MESA_program_debug.
  71  * Return current value (4*GLfloat) of a fragment program register.
  72  * Called via ctx->Driver.GetFragmentProgramRegister().
  73  */
  74 void
  75 _swrast_get_program_register(GLcontext *ctx, enum register_file file,
  76                              GLuint index, GLfloat val[4])
  77 {
  78    if (CurrentMachine) {
  79       switch (file) {
  80       case PROGRAM_INPUT:
  81          COPY_4V(val, CurrentMachine->Inputs[index]);
  82          break;
  83       case PROGRAM_OUTPUT:
  84          COPY_4V(val, CurrentMachine->Outputs[index]);
  85          break;
  86       case PROGRAM_TEMPORARY:
  87          COPY_4V(val, CurrentMachine->Temporaries[index]);
  88          break;
  89       default:
  90          _mesa_problem(NULL,
  91                        "bad register file in _swrast_get_program_register");
  92       }
  93    }
  94 }
  95 #endif /* FEATURE_MESA_program_debug */
  96
  97
  98 /**
  99  * Fetch a texel.
 100  */
 101 static void
 102 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
 103              GLuint unit, GLfloat color[4] )
 104 {
 105    GLchan rgba[4];
 106    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 107
 108    /* XXX use a float-valued TextureSample routine here!!! */
 109    swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
 110                                1, (const GLfloat (*)[4]) texcoord,
 111                                &lambda, &rgba);
 112    color[0] = CHAN_TO_FLOAT(rgba[0]);
 113    color[1] = CHAN_TO_FLOAT(rgba[1]);
 114    color[2] = CHAN_TO_FLOAT(rgba[2]);
 115    color[3] = CHAN_TO_FLOAT(rgba[3]);
 116 }
 117
 118
 119 /**
 120  * Fetch a texel with the given partial derivatives to compute a level
 121  * of detail in the mipmap.
 122  */
 123 static void
 124 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
 125                    const GLfloat texdx[4], const GLfloat texdy[4],
 126                    GLuint unit, GLfloat color[4] )
 127 {
 128    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 129    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
 130    const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
 131    const GLfloat texW = (GLfloat) texImg->WidthScale;
 132    const GLfloat texH = (GLfloat) texImg->HeightScale;
 133    GLchan rgba[4];
 134
 135    GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
 136                                          texdx[1], texdy[1], /* dt/dx, dt/dy */
 137                                          texdx[3], texdy[2], /* dq/dx, dq/dy */
 138                                          texW, texH,
 139                                          texcoord[0], texcoord[1], texcoord[3],
 140                                          1.0F / texcoord[3]);
 141
 142    swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
 143                                1, (const GLfloat (*)[4]) texcoord,
 144                                &lambda, &rgba);
 145    color[0] = CHAN_TO_FLOAT(rgba[0]);
 146    color[1] = CHAN_TO_FLOAT(rgba[1]);
 147    color[2] = CHAN_TO_FLOAT(rgba[2]);
 148    color[3] = CHAN_TO_FLOAT(rgba[3]);
 149 }
 150
 151
 152 /**
 153  * Return a pointer to the 4-element float vector specified by the given
 154  * source register.
 155  */
 156 static INLINE const GLfloat *
 157 get_register_pointer( GLcontext *ctx,
 158                       const struct prog_src_register *source,
 159                       const struct fp_machine *machine,
 160                       const struct gl_fragment_program *program )
 161 {
 162    switch (source->File) {
 163    case PROGRAM_TEMPORARY:
 164       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
 165       return machine->Temporaries[source->Index];
 166    case PROGRAM_INPUT:
 167       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
 168       return machine->Inputs[source->Index];
 169    case PROGRAM_OUTPUT:
 170       /* This is only for PRINT */
 171       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
 172       return machine->Outputs[source->Index];
 173    case PROGRAM_LOCAL_PARAM:
 174       ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 175       return program->Base.LocalParams[source->Index];
 176    case PROGRAM_ENV_PARAM:
 177       ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
 178       return ctx->FragmentProgram.Parameters[source->Index];
 179    case PROGRAM_STATE_VAR:
 180       /* Fallthrough */
 181    case PROGRAM_CONSTANT:
 182       /* Fallthrough */
 183    case PROGRAM_NAMED_PARAM:
 184       ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
 185       return program->Base.Parameters->ParameterValues[source->Index];
 186    default:
 187       _mesa_problem(ctx, "Invalid input register file %d in fp "
 188                     "get_register_pointer", source->File);
 189       return NULL;
 190    }
 191 }
 192
 193
 194 /**
 195  * Fetch a 4-element float vector from the given source register.
 196  * Apply swizzling and negating as needed.
 197  */
 198 static void
 199 fetch_vector4( GLcontext *ctx,
 200                const struct prog_src_register *source,
 201                const struct fp_machine *machine,
 202                const struct gl_fragment_program *program,
 203                GLfloat result[4] )
 204 {
 205    const GLfloat *src = get_register_pointer(ctx, source, machine, program);
 206    ASSERT(src);
 207
 208    if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
 209                                         SWIZZLE_Z, SWIZZLE_W)) {
 210       /* no swizzling */
 211       COPY_4V(result, src);
 212    }
 213    else {
 214       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 215       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 216       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 217       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 218    }
 219
 220    if (source->NegateBase) {
 221       result[0] = -result[0];
 222       result[1] = -result[1];
 223       result[2] = -result[2];
 224       result[3] = -result[3];
 225    }
 226    if (source->Abs) {
 227       result[0] = FABSF(result[0]);
 228       result[1] = FABSF(result[1]);
 229       result[2] = FABSF(result[2]);
 230       result[3] = FABSF(result[3]);
 231    }
 232    if (source->NegateAbs) {
 233       result[0] = -result[0];
 234       result[1] = -result[1];
 235       result[2] = -result[2];
 236       result[3] = -result[3];
 237    }
 238 }
 239
 240
 241 /**
 242  * Fetch the derivative with respect to X for the given register.
 243  * \return GL_TRUE if it was easily computed or GL_FALSE if we
 244  * need to execute another instance of the program (ugh)!
 245  */
 246 static GLboolean
 247 fetch_vector4_deriv( GLcontext *ctx,
 248                      const struct prog_src_register *source,
 249                      const SWspan *span,
 250                      char xOrY, GLint column, GLfloat result[4] )
 251 {
 252    GLfloat src[4];
 253
 254    ASSERT(xOrY == 'X' || xOrY == 'Y');
 255
 256    switch (source->Index) {
 257    case FRAG_ATTRIB_WPOS:
 258       if (xOrY == 'X') {
 259          src[0] = 1.0;
 260          src[1] = 0.0;
 261          src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
 262          src[3] = span->dwdx;
 263       }
 264       else {
 265          src[0] = 0.0;
 266          src[1] = 1.0;
 267          src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
 268          src[3] = span->dwdy;
 269       }
 270       break;
 271    case FRAG_ATTRIB_COL0:
 272       if (xOrY == 'X') {
 273          src[0] = span->drdx * (1.0F / CHAN_MAXF);
 274          src[1] = span->dgdx * (1.0F / CHAN_MAXF);
 275          src[2] = span->dbdx * (1.0F / CHAN_MAXF);
 276          src[3] = span->dadx * (1.0F / CHAN_MAXF);
 277       }
 278       else {
 279          src[0] = span->drdy * (1.0F / CHAN_MAXF);
 280          src[1] = span->dgdy * (1.0F / CHAN_MAXF);
 281          src[2] = span->dbdy * (1.0F / CHAN_MAXF);
 282          src[3] = span->dady * (1.0F / CHAN_MAXF);
 283       }
 284       break;
 285    case FRAG_ATTRIB_COL1:
 286       if (xOrY == 'X') {
 287          src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
 288          src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
 289          src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
 290          src[3] = 0.0; /* XXX need this */
 291       }
 292       else {
 293          src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
 294          src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
 295          src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
 296          src[3] = 0.0; /* XXX need this */
 297       }
 298       break;
 299    case FRAG_ATTRIB_FOGC:
 300       if (xOrY == 'X') {
 301          src[0] = span->dfogdx;
 302          src[1] = 0.0;
 303          src[2] = 0.0;
 304          src[3] = 0.0;
 305       }
 306       else {
 307          src[0] = span->dfogdy;
 308          src[1] = 0.0;
 309          src[2] = 0.0;
 310          src[3] = 0.0;
 311       }
 312       break;
 313    case FRAG_ATTRIB_TEX0:
 314    case FRAG_ATTRIB_TEX1:
 315    case FRAG_ATTRIB_TEX2:
 316    case FRAG_ATTRIB_TEX3:
 317    case FRAG_ATTRIB_TEX4:
 318    case FRAG_ATTRIB_TEX5:
 319    case FRAG_ATTRIB_TEX6:
 320    case FRAG_ATTRIB_TEX7:
 321       if (xOrY == 'X') {
 322          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 323          /* this is a little tricky - I think I've got it right */
 324          const GLfloat invQ = 1.0f / (span->tex[u][3]
 325                                       + span->texStepX[u][3] * column);
 326          src[0] = span->texStepX[u][0] * invQ;
 327          src[1] = span->texStepX[u][1] * invQ;
 328          src[2] = span->texStepX[u][2] * invQ;
 329          src[3] = span->texStepX[u][3] * invQ;
 330       }
 331       else {
 332          const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
 333          /* Tricky, as above, but in Y direction */
 334          const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
 335          src[0] = span->texStepY[u][0] * invQ;
 336          src[1] = span->texStepY[u][1] * invQ;
 337          src[2] = span->texStepY[u][2] * invQ;
 338          src[3] = span->texStepY[u][3] * invQ;
 339       }
 340       break;
 341    default:
 342       return GL_FALSE;
 343    }
 344
 345    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 346    result[1] = src[GET_SWZ(source->Swizzle, 1)];
 347    result[2] = src[GET_SWZ(source->Swizzle, 2)];
 348    result[3] = src[GET_SWZ(source->Swizzle, 3)];
 349
 350    if (source->NegateBase) {
 351       result[0] = -result[0];
 352       result[1] = -result[1];
 353       result[2] = -result[2];
 354       result[3] = -result[3];
 355    }
 356    if (source->Abs) {
 357       result[0] = FABSF(result[0]);
 358       result[1] = FABSF(result[1]);
 359       result[2] = FABSF(result[2]);
 360       result[3] = FABSF(result[3]);
 361    }
 362    if (source->NegateAbs) {
 363       result[0] = -result[0];
 364       result[1] = -result[1];
 365       result[2] = -result[2];
 366       result[3] = -result[3];
 367    }
 368    return GL_TRUE;
 369 }
 370
 371
 372 /**
 373  * As above, but only return result[0] element.
 374  */
 375 static void
 376 fetch_vector1( GLcontext *ctx,
 377                const struct prog_src_register *source,
 378                const struct fp_machine *machine,
 379                const struct gl_fragment_program *program,
 380                GLfloat result[4] )
 381 {
 382    const GLfloat *src = get_register_pointer(ctx, source, machine, program);
 383    ASSERT(src);
 384
 385    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 386
 387    if (source->NegateBase) {
 388       result[0] = -result[0];
 389    }
 390    if (source->Abs) {
 391       result[0] = FABSF(result[0]);
 392    }
 393    if (source->NegateAbs) {
 394       result[0] = -result[0];
 395    }
 396 }
 397
 398
 399 /**
 400  * Test value against zero and return GT, LT, EQ or UN if NaN.
 401  */
 402 static INLINE GLuint
 403 generate_cc( float value )
 404 {
 405    if (value != value)
 406       return COND_UN;  /* NaN */
 407    if (value > 0.0F)
 408       return COND_GT;
 409    if (value < 0.0F)
 410       return COND_LT;
 411    return COND_EQ;
 412 }
 413
 414
 415 /**
 416  * Test if the ccMaskRule is satisfied by the given condition code.
 417  * Used to mask destination writes according to the current condition code.
 418  */
 419 static INLINE GLboolean
 420 test_cc(GLuint condCode, GLuint ccMaskRule)
 421 {
 422    switch (ccMaskRule) {
 423    case COND_EQ: return (condCode == COND_EQ);
 424    case COND_NE: return (condCode != COND_EQ);
 425    case COND_LT: return (condCode == COND_LT);
 426    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 427    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 428    case COND_GT: return (condCode == COND_GT);
 429    case COND_TR: return GL_TRUE;
 430    case COND_FL: return GL_FALSE;
 431    default:      return GL_TRUE;
 432    }
 433 }
 434
 435
 436 /**
 437  * Store 4 floats into a register.  Observe the instructions saturate and
 438  * set-condition-code flags.
 439  */
 440 static void
 441 store_vector4( const struct prog_instruction *inst,
 442                struct fp_machine *machine,
 443                const GLfloat value[4] )
 444 {
 445    const struct prog_dst_register *dest = &(inst->DstReg);
 446    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 447    GLfloat *dstReg;
 448    GLfloat dummyReg[4];
 449    GLfloat clampedValue[4];
 450    GLuint writeMask = dest->WriteMask;
 451
 452    switch (dest->File) {
 453       case PROGRAM_OUTPUT:
 454          dstReg = machine->Outputs[dest->Index];
 455          break;
 456       case PROGRAM_TEMPORARY:
 457          dstReg = machine->Temporaries[dest->Index];
 458          break;
 459       case PROGRAM_WRITE_ONLY:
 460          dstReg = dummyReg;
 461          return;
 462       default:
 463          _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 464          return;
 465    }
 466
 467 #if 0
 468    if (value[0] > 1.0e10 ||
 469        IS_INF_OR_NAN(value[0]) ||
 470        IS_INF_OR_NAN(value[1]) ||
 471        IS_INF_OR_NAN(value[2]) ||
 472        IS_INF_OR_NAN(value[3])  )
 473       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 474 #endif
 475
 476    if (clamp) {
 477       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 478       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 479       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 480       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 481       value = clampedValue;
 482    }
 483
 484    if (dest->CondMask != COND_TR) {
 485       /* condition codes may turn off some writes */
 486       if (writeMask & WRITEMASK_X) {
 487          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
 488                       dest->CondMask))
 489             writeMask &= ~WRITEMASK_X;
 490       }
 491       if (writeMask & WRITEMASK_Y) {
 492          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
 493                       dest->CondMask))
 494             writeMask &= ~WRITEMASK_Y;
 495       }
 496       if (writeMask & WRITEMASK_Z) {
 497          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
 498                       dest->CondMask))
 499             writeMask &= ~WRITEMASK_Z;
 500       }
 501       if (writeMask & WRITEMASK_W) {
 502          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
 503                       dest->CondMask))
 504             writeMask &= ~WRITEMASK_W;
 505       }
 506    }
 507
 508    if (writeMask & WRITEMASK_X)
 509       dstReg[0] = value[0];
 510    if (writeMask & WRITEMASK_Y)
 511       dstReg[1] = value[1];
 512    if (writeMask & WRITEMASK_Z)
 513       dstReg[2] = value[2];
 514    if (writeMask & WRITEMASK_W)
 515       dstReg[3] = value[3];
 516
 517    if (inst->CondUpdate) {
 518       if (writeMask & WRITEMASK_X)
 519          machine->CondCodes[0] = generate_cc(value[0]);
 520       if (writeMask & WRITEMASK_Y)
 521          machine->CondCodes[1] = generate_cc(value[1]);
 522       if (writeMask & WRITEMASK_Z)
 523          machine->CondCodes[2] = generate_cc(value[2]);
 524       if (writeMask & WRITEMASK_W)
 525          machine->CondCodes[3] = generate_cc(value[3]);
 526    }
 527 }
 528
 529
 530 /**
 531  * Initialize a new machine state instance from an existing one, adding
 532  * the partial derivatives onto the input registers.
 533  * Used to implement DDX and DDY instructions in non-trivial cases.
 534  */
 535 static void
 536 init_machine_deriv( GLcontext *ctx,
 537                     const struct fp_machine *machine,
 538                     const struct gl_fragment_program *program,
 539                     const SWspan *span, char xOrY,
 540                     struct fp_machine *dMachine )
 541 {
 542    GLuint u;
 543
 544    ASSERT(xOrY == 'X' || xOrY == 'Y');
 545
 546    /* copy existing machine */
 547    _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
 548
 549    if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
 550       /* Clear temporary registers (undefined for ARB_f_p) */
 551       _mesa_bzero( (void*) machine->Temporaries,
 552                    MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 553    }
 554
 555    /* Add derivatives */
 556    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
 557       GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
 558       if (xOrY == 'X') {
 559          wpos[0] += 1.0F;
 560          wpos[1] += 0.0F;
 561          wpos[2] += span->dzdx;
 562          wpos[3] += span->dwdx;
 563       }
 564       else {
 565          wpos[0] += 0.0F;
 566          wpos[1] += 1.0F;
 567          wpos[2] += span->dzdy;
 568          wpos[3] += span->dwdy;
 569       }
 570    }
 571    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
 572       GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
 573       if (xOrY == 'X') {
 574          col0[0] += span->drdx * (1.0F / CHAN_MAXF);
 575          col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
 576          col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
 577          col0[3] += span->dadx * (1.0F / CHAN_MAXF);
 578       }
 579       else {
 580          col0[0] += span->drdy * (1.0F / CHAN_MAXF);
 581          col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
 582          col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
 583          col0[3] += span->dady * (1.0F / CHAN_MAXF);
 584       }
 585    }
 586    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
 587       GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
 588       if (xOrY == 'X') {
 589          col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
 590          col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
 591          col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
 592          col1[3] += 0.0; /*XXX fix */
 593       }
 594       else {
 595          col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
 596          col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
 597          col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
 598          col1[3] += 0.0; /*XXX fix */
 599       }
 600    }
 601    if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
 602       GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
 603       if (xOrY == 'X') {
 604          fogc[0] += span->dfogdx;
 605       }
 606       else {
 607          fogc[0] += span->dfogdy;
 608       }
 609    }
 610    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
 611       if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
 612          GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
 613          /* XXX perspective-correct interpolation */
 614          if (xOrY == 'X') {
 615             tex[0] += span->texStepX[u][0];
 616             tex[1] += span->texStepX[u][1];
 617             tex[2] += span->texStepX[u][2];
 618             tex[3] += span->texStepX[u][3];
 619          }
 620          else {
 621             tex[0] += span->texStepY[u][0];
 622             tex[1] += span->texStepY[u][1];
 623             tex[2] += span->texStepY[u][2];
 624             tex[3] += span->texStepY[u][3];
 625          }
 626       }
 627    }
 628
 629    /* init condition codes */
 630    dMachine->CondCodes[0] = COND_EQ;
 631    dMachine->CondCodes[1] = COND_EQ;
 632    dMachine->CondCodes[2] = COND_EQ;
 633    dMachine->CondCodes[3] = COND_EQ;
 634 }
 635
 636
 637 /**
 638  * Execute the given vertex program.
 639  * NOTE: we do everything in single-precision floating point; we don't
 640  * currently observe the single/half/fixed-precision qualifiers.
 641  * \param ctx - rendering context
 642  * \param program - the fragment program to execute
 643  * \param machine - machine state (register file)
 644  * \param maxInst - max number of instructions to execute
 645  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 646  */
 647 static GLboolean
 648 execute_program( GLcontext *ctx,
 649                  const struct gl_fragment_program *program, GLuint maxInst,
 650                  struct fp_machine *machine, const SWspan *span,
 651                  GLuint column )
 652 {
 653    GLuint pc;
 654
 655    if (DEBUG_FRAG) {
 656       printf("execute fragment program --------------------\n");
 657    }
 658
 659    for (pc = 0; pc < maxInst; pc++) {
 660       const struct prog_instruction *inst = program->Base.Instructions + pc;
 661
 662       if (ctx->FragmentProgram.CallbackEnabled &&
 663           ctx->FragmentProgram.Callback) {
 664          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 665          ctx->FragmentProgram.Callback(program->Base.Target,
 666                                        ctx->FragmentProgram.CallbackData);
 667       }
 668
 669       if (DEBUG_FRAG) {
 670          _mesa_print_instruction(inst);
 671       }
 672
 673       switch (inst->Opcode) {
 674          case OPCODE_ABS:
 675             {
 676                GLfloat a[4], result[4];
 677                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 678                result[0] = FABSF(a[0]);
 679                result[1] = FABSF(a[1]);
 680                result[2] = FABSF(a[2]);
 681                result[3] = FABSF(a[3]);
 682                store_vector4( inst, machine, result );
 683             }
 684             break;
 685          case OPCODE_ADD:
 686             {
 687                GLfloat a[4], b[4], result[4];
 688                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 689                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 690                result[0] = a[0] + b[0];
 691                result[1] = a[1] + b[1];
 692                result[2] = a[2] + b[2];
 693                result[3] = a[3] + b[3];
 694                store_vector4( inst, machine, result );
 695                if (DEBUG_FRAG) {
 696                   printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 697                          result[0], result[1], result[2], result[3],
 698                          a[0], a[1], a[2], a[3],
 699                          b[0], b[1], b[2], b[3]);
 700                }
 701             }
 702             break;
 703          case OPCODE_BRA: /* conditional branch */
 704             {
 705                /* NOTE: The return is conditional! */
 706                const GLuint swizzle = inst->DstReg.CondSwizzle;
 707                const GLuint condMask = inst->DstReg.CondMask;
 708                if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 709                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 710                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 711                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 712                   /* take branch */
 713                   pc = inst->BranchTarget;
 714                }
 715             }
 716             break;
 717          case OPCODE_CAL: /* Call subroutine */
 718             {
 719                /* NOTE: The call is conditional! */
 720                const GLuint swizzle = inst->DstReg.CondSwizzle;
 721                const GLuint condMask = inst->DstReg.CondMask;
 722                if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 723                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 724                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 725                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 726                   if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 727                      return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
 728                   }
 729                   machine->CallStack[machine->StackDepth++] = pc + 1;
 730                   pc = inst->BranchTarget;
 731                }
 732             }
 733             break;
 734          case OPCODE_CMP:
 735             {
 736                GLfloat a[4], b[4], c[4], result[4];
 737                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 738                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 739                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 740                result[0] = a[0] < 0.0F ? b[0] : c[0];
 741                result[1] = a[1] < 0.0F ? b[1] : c[1];
 742                result[2] = a[2] < 0.0F ? b[2] : c[2];
 743                result[3] = a[3] < 0.0F ? b[3] : c[3];
 744                store_vector4( inst, machine, result );
 745             }
 746             break;
 747          case OPCODE_COS:
 748             {
 749                GLfloat a[4], result[4];
 750                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 751                result[0] = result[1] = result[2] = result[3]
 752                   = (GLfloat) _mesa_cos(a[0]);
 753                store_vector4( inst, machine, result );
 754             }
 755             break;
 756          case OPCODE_DDX: /* Partial derivative with respect to X */
 757             {
 758                GLfloat a[4], aNext[4], result[4];
 759                struct fp_machine dMachine;
 760                if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
 761                                         column, result)) {
 762                   /* This is tricky.  Make a copy of the current machine state,
 763                    * increment the input registers by the dx or dy partial
 764                    * derivatives, then re-execute the program up to the
 765                    * preceeding instruction, then fetch the source register.
 766                    * Finally, find the difference in the register values for
 767                    * the original and derivative runs.
 768                    */
 769                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 770                   init_machine_deriv(ctx, machine, program, span,
 771                                      'X', &dMachine);
 772                   execute_program(ctx, program, pc, &dMachine, span, column);
 773                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 774                   result[0] = aNext[0] - a[0];
 775                   result[1] = aNext[1] - a[1];
 776                   result[2] = aNext[2] - a[2];
 777                   result[3] = aNext[3] - a[3];
 778                }
 779                store_vector4( inst, machine, result );
 780             }
 781             break;
 782          case OPCODE_DDY: /* Partial derivative with respect to Y */
 783             {
 784                GLfloat a[4], aNext[4], result[4];
 785                struct fp_machine dMachine;
 786                if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
 787                                         column, result)) {
 788                   init_machine_deriv(ctx, machine, program, span,
 789                                      'Y', &dMachine);
 790                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
 791                   execute_program(ctx, program, pc, &dMachine, span, column);
 792                   fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
 793                   result[0] = aNext[0] - a[0];
 794                   result[1] = aNext[1] - a[1];
 795                   result[2] = aNext[2] - a[2];
 796                   result[3] = aNext[3] - a[3];
 797                }
 798                store_vector4( inst, machine, result );
 799             }
 800             break;
 801          case OPCODE_DP3:
 802             {
 803                GLfloat a[4], b[4], result[4];
 804                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 805                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 806                result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 807                store_vector4( inst, machine, result );
 808                if (DEBUG_FRAG) {
 809                   printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 810                          result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 811                }
 812             }
 813             break;
 814          case OPCODE_DP4:
 815             {
 816                GLfloat a[4], b[4], result[4];
 817                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 818                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 819                result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
 820                store_vector4( inst, machine, result );
 821                if (DEBUG_FRAG) {
 822                   printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 823                          result[0], a[0], a[1], a[2], a[3],
 824                          b[0], b[1], b[2], b[3]);
 825                }
 826             }
 827             break;
 828          case OPCODE_DPH:
 829             {
 830                GLfloat a[4], b[4], result[4];
 831                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 832                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 833                result[0] = result[1] = result[2] = result[3] =
 834                   a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
 835                store_vector4( inst, machine, result );
 836             }
 837             break;
 838          case OPCODE_DST: /* Distance vector */
 839             {
 840                GLfloat a[4], b[4], result[4];
 841                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 842                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 843                result[0] = 1.0F;
 844                result[1] = a[1] * b[1];
 845                result[2] = a[2];
 846                result[3] = b[3];
 847                store_vector4( inst, machine, result );
 848             }
 849             break;
 850          case OPCODE_EX2: /* Exponential base 2 */
 851             {
 852                GLfloat a[4], result[4];
 853                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 854                result[0] = result[1] = result[2] = result[3] =
 855                   (GLfloat) _mesa_pow(2.0, a[0]);
 856                store_vector4( inst, machine, result );
 857             }
 858             break;
 859          case OPCODE_FLR:
 860             {
 861                GLfloat a[4], result[4];
 862                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 863                result[0] = FLOORF(a[0]);
 864                result[1] = FLOORF(a[1]);
 865                result[2] = FLOORF(a[2]);
 866                result[3] = FLOORF(a[3]);
 867                store_vector4( inst, machine, result );
 868             }
 869             break;
 870          case OPCODE_FRC:
 871             {
 872                GLfloat a[4], result[4];
 873                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 874                result[0] = a[0] - FLOORF(a[0]);
 875                result[1] = a[1] - FLOORF(a[1]);
 876                result[2] = a[2] - FLOORF(a[2]);
 877                result[3] = a[3] - FLOORF(a[3]);
 878                store_vector4( inst, machine, result );
 879             }
 880             break;
 881          case OPCODE_KIL_NV: /* NV_f_p only */
 882             {
 883                const GLuint swizzle = inst->DstReg.CondSwizzle;
 884                const GLuint condMask = inst->DstReg.CondMask;
 885                if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 886                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 887                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 888                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 889                   return GL_FALSE;
 890                }
 891             }
 892             break;
 893          case OPCODE_KIL: /* ARB_f_p only */
 894             {
 895                GLfloat a[4];
 896                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 897                if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 898                   return GL_FALSE;
 899                }
 900             }
 901             break;
 902          case OPCODE_LG2:  /* log base 2 */
 903             {
 904                GLfloat a[4], result[4];
 905                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
 906                result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
 907                store_vector4( inst, machine, result );
 908             }
 909             break;
 910          case OPCODE_LIT:
 911             {
 912                const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
 913                GLfloat a[4], result[4];
 914                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 915                a[0] = MAX2(a[0], 0.0F);
 916                a[1] = MAX2(a[1], 0.0F);
 917                /* XXX ARB version clamps a[3], NV version doesn't */
 918                a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 919                result[0] = 1.0F;
 920                result[1] = a[0];
 921                /* XXX we could probably just use pow() here */
 922                if (a[0] > 0.0F) {
 923                   if (a[1] == 0.0 && a[3] == 0.0)
 924                      result[2] = 1.0;
 925                   else
 926                      result[2] = EXPF(a[3] * LOGF(a[1]));
 927                }
 928                else {
 929                   result[2] = 0.0;
 930                }
 931                result[3] = 1.0F;
 932                store_vector4( inst, machine, result );
 933                if (DEBUG_FRAG) {
 934                   printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 935                          result[0], result[1], result[2], result[3],
 936                          a[0], a[1], a[2], a[3]);
 937                }
 938             }
 939             break;
 940          case OPCODE_LRP:
 941             {
 942                GLfloat a[4], b[4], c[4], result[4];
 943                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 944                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 945                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 946                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 947                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 948                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 949                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 950                store_vector4( inst, machine, result );
 951                if (DEBUG_FRAG) {
 952                   printf("LRP (%g %g %g %g) = (%g %g %g %g), "
 953                          "(%g %g %g %g), (%g %g %g %g)\n",
 954                          result[0], result[1], result[2], result[3],
 955                          a[0], a[1], a[2], a[3],
 956                          b[0], b[1], b[2], b[3],
 957                          c[0], c[1], c[2], c[3]);
 958                }
 959             }
 960             break;
 961          case OPCODE_MAD:
 962             {
 963                GLfloat a[4], b[4], c[4], result[4];
 964                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 965                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 966                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
 967                result[0] = a[0] * b[0] + c[0];
 968                result[1] = a[1] * b[1] + c[1];
 969                result[2] = a[2] * b[2] + c[2];
 970                result[3] = a[3] * b[3] + c[3];
 971                store_vector4( inst, machine, result );
 972                if (DEBUG_FRAG) {
 973                   printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
 974                          "(%g %g %g %g) + (%g %g %g %g)\n",
 975                          result[0], result[1], result[2], result[3],
 976                          a[0], a[1], a[2], a[3],
 977                          b[0], b[1], b[2], b[3],
 978                          c[0], c[1], c[2], c[3]);
 979                }
 980             }
 981             break;
 982          case OPCODE_MAX:
 983             {
 984                GLfloat a[4], b[4], result[4];
 985                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
 986                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
 987                result[0] = MAX2(a[0], b[0]);
 988                result[1] = MAX2(a[1], b[1]);
 989                result[2] = MAX2(a[2], b[2]);
 990                result[3] = MAX2(a[3], b[3]);
 991                store_vector4( inst, machine, result );
 992                if (DEBUG_FRAG) {
 993                   printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
 994                          result[0], result[1], result[2], result[3],
 995                          a[0], a[1], a[2], a[3],
 996                          b[0], b[1], b[2], b[3]);
 997                }
 998             }
 999             break;
1000          case OPCODE_MIN:
1001             {
1002                GLfloat a[4], b[4], result[4];
1003                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1004                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1005                result[0] = MIN2(a[0], b[0]);
1006                result[1] = MIN2(a[1], b[1]);
1007                result[2] = MIN2(a[2], b[2]);
1008                result[3] = MIN2(a[3], b[3]);
1009                store_vector4( inst, machine, result );
1010             }
1011             break;
1012          case OPCODE_MOV:
1013             {
1014                GLfloat result[4];
1015                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
1016                store_vector4( inst, machine, result );
1017                if (DEBUG_FRAG) {
1018                   printf("MOV (%g %g %g %g)\n",
1019                          result[0], result[1], result[2], result[3]);
1020                }
1021             }
1022             break;
1023          case OPCODE_MUL:
1024             {
1025                GLfloat a[4], b[4], result[4];
1026                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1027                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1028                result[0] = a[0] * b[0];
1029                result[1] = a[1] * b[1];
1030                result[2] = a[2] * b[2];
1031                result[3] = a[3] * b[3];
1032                store_vector4( inst, machine, result );
1033                if (DEBUG_FRAG) {
1034                   printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1035                          result[0], result[1], result[2], result[3],
1036                          a[0], a[1], a[2], a[3],
1037                          b[0], b[1], b[2], b[3]);
1038                }
1039             }
1040             break;
1041          case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1042             {
1043                GLfloat a[4], result[4];
1044                GLhalfNV hx, hy;
1045                GLuint *rawResult = (GLuint *) result;
1046                GLuint twoHalves;
1047                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1048                hx = _mesa_float_to_half(a[0]);
1049                hy = _mesa_float_to_half(a[1]);
1050                twoHalves = hx | (hy << 16);
1051                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1052                   = twoHalves;
1053                store_vector4( inst, machine, result );
1054             }
1055             break;
1056          case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1057             {
1058                GLfloat a[4], result[4];
1059                GLuint usx, usy, *rawResult = (GLuint *) result;
1060                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1061                a[0] = CLAMP(a[0], 0.0F, 1.0F);
1062                a[1] = CLAMP(a[1], 0.0F, 1.0F);
1063                usx = IROUND(a[0] * 65535.0F);
1064                usy = IROUND(a[1] * 65535.0F);
1065                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1066                   = usx | (usy << 16);
1067                store_vector4( inst, machine, result );
1068             }
1069             break;
1070          case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1071             {
1072                GLfloat a[4], result[4];
1073                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1074                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1075                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1076                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1077                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1078                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1079                ubx = IROUND(127.0F * a[0] + 128.0F);
1080                uby = IROUND(127.0F * a[1] + 128.0F);
1081                ubz = IROUND(127.0F * a[2] + 128.0F);
1082                ubw = IROUND(127.0F * a[3] + 128.0F);
1083                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1084                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1085                store_vector4( inst, machine, result );
1086             }
1087             break;
1088          case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1089             {
1090                GLfloat a[4], result[4];
1091                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1092                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1093                a[0] = CLAMP(a[0], 0.0F, 1.0F);
1094                a[1] = CLAMP(a[1], 0.0F, 1.0F);
1095                a[2] = CLAMP(a[2], 0.0F, 1.0F);
1096                a[3] = CLAMP(a[3], 0.0F, 1.0F);
1097                ubx = IROUND(255.0F * a[0]);
1098                uby = IROUND(255.0F * a[1]);
1099                ubz = IROUND(255.0F * a[2]);
1100                ubw = IROUND(255.0F * a[3]);
1101                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1102                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1103                store_vector4( inst, machine, result );
1104             }
1105             break;
1106          case OPCODE_POW:
1107             {
1108                GLfloat a[4], b[4], result[4];
1109                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1110                fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1111                result[0] = result[1] = result[2] = result[3]
1112                   = (GLfloat)_mesa_pow(a[0], b[0]);
1113                store_vector4( inst, machine, result );
1114             }
1115             break;
1116          case OPCODE_RCP:
1117             {
1118                GLfloat a[4], result[4];
1119                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1120                if (DEBUG_FRAG) {
1121                   if (a[0] == 0)
1122                      printf("RCP(0)\n");
1123                   else if (IS_INF_OR_NAN(a[0]))
1124                      printf("RCP(inf)\n");
1125                }
1126                result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1127                store_vector4( inst, machine, result );
1128             }
1129             break;
1130          case OPCODE_RET: /* return from subroutine */
1131             {
1132                /* NOTE: The return is conditional! */
1133                const GLuint swizzle = inst->DstReg.CondSwizzle;
1134                const GLuint condMask = inst->DstReg.CondMask;
1135                if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
1136                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
1137                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
1138                    test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
1139                   if (machine->StackDepth == 0) {
1140                      return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1141                   }
1142                   pc = machine->CallStack[--machine->StackDepth];
1143                }
1144             }
1145             break;
1146          case OPCODE_RFL: /* reflection vector */
1147             {
1148                GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1149                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1150                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1151                tmpW = DOT3(axis, axis);
1152                tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1153                result[0] = tmpX * axis[0] - dir[0];
1154                result[1] = tmpX * axis[1] - dir[1];
1155                result[2] = tmpX * axis[2] - dir[2];
1156                /* result[3] is never written! XXX enforce in parser! */
1157                store_vector4( inst, machine, result );
1158             }
1159             break;
1160          case OPCODE_RSQ: /* 1 / sqrt() */
1161             {
1162                GLfloat a[4], result[4];
1163                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1164                a[0] = FABSF(a[0]);
1165                result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1166                store_vector4( inst, machine, result );
1167                if (DEBUG_FRAG) {
1168                   printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1169                }
1170             }
1171             break;
1172          case OPCODE_SCS: /* sine and cos */
1173             {
1174                GLfloat a[4], result[4];
1175                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1176                result[0] = (GLfloat)_mesa_cos(a[0]);
1177                result[1] = (GLfloat)_mesa_sin(a[0]);
1178                result[2] = 0.0;  /* undefined! */
1179                result[3] = 0.0;  /* undefined! */
1180                store_vector4( inst, machine, result );
1181             }
1182             break;
1183          case OPCODE_SEQ: /* set on equal */
1184             {
1185                GLfloat a[4], b[4], result[4];
1186                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1187                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1188                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1189                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1190                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1191                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1192                store_vector4( inst, machine, result );
1193             }
1194             break;
1195          case OPCODE_SFL: /* set false, operands ignored */
1196             {
1197                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1198                store_vector4( inst, machine, result );
1199             }
1200             break;
1201          case OPCODE_SGE: /* set on greater or equal */
1202             {
1203                GLfloat a[4], b[4], result[4];
1204                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1205                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1206                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1207                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1208                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1209                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1210                store_vector4( inst, machine, result );
1211             }
1212             break;
1213          case OPCODE_SGT: /* set on greater */
1214             {
1215                GLfloat a[4], b[4], result[4];
1216                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1217                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1218                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1219                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1220                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1221                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1222                store_vector4( inst, machine, result );
1223             }
1224             break;
1225          case OPCODE_SIN:
1226             {
1227                GLfloat a[4], result[4];
1228                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1229                result[0] = result[1] = result[2] = result[3]
1230                   = (GLfloat) _mesa_sin(a[0]);
1231                store_vector4( inst, machine, result );
1232             }
1233             break;
1234          case OPCODE_SLE: /* set on less or equal */
1235             {
1236                GLfloat a[4], b[4], result[4];
1237                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1238                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1239                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1240                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1241                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1242                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1243                store_vector4( inst, machine, result );
1244             }
1245             break;
1246          case OPCODE_SLT: /* set on less */
1247             {
1248                GLfloat a[4], b[4], result[4];
1249                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1250                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1251                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1252                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1253                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1254                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1255                store_vector4( inst, machine, result );
1256             }
1257             break;
1258          case OPCODE_SNE: /* set on not equal */
1259             {
1260                GLfloat a[4], b[4], result[4];
1261                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1262                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1263                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1264                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1265                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1266                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1267                store_vector4( inst, machine, result );
1268             }
1269             break;
1270          case OPCODE_STR: /* set true, operands ignored */
1271             {
1272                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1273                store_vector4( inst, machine, result );
1274             }
1275             break;
1276          case OPCODE_SUB:
1277             {
1278                GLfloat a[4], b[4], result[4];
1279                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1280                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1281                result[0] = a[0] - b[0];
1282                result[1] = a[1] - b[1];
1283                result[2] = a[2] - b[2];
1284                result[3] = a[3] - b[3];
1285                store_vector4( inst, machine, result );
1286                if (DEBUG_FRAG) {
1287                   printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1288                          result[0], result[1], result[2], result[3],
1289                          a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1290                }
1291             }
1292             break;
1293          case OPCODE_SWZ: /* extended swizzle */
1294             {
1295                const struct prog_src_register *source = &inst->SrcReg[0];
1296                const GLfloat *src = get_register_pointer(ctx, source,
1297                                                          machine, program);
1298                GLfloat result[4];
1299                GLuint i;
1300                for (i = 0; i < 4; i++) {
1301                   const GLuint swz = GET_SWZ(source->Swizzle, i);
1302                   if (swz == SWIZZLE_ZERO)
1303                      result[i] = 0.0;
1304                   else if (swz == SWIZZLE_ONE)
1305                      result[i] = 1.0;
1306                   else {
1307                      ASSERT(swz >= 0);
1308                      ASSERT(swz <= 3);
1309                      result[i] = src[swz];
1310                   }
1311                   if (source->NegateBase & (1 << i))
1312                      result[i] = -result[i];
1313                }
1314                store_vector4( inst, machine, result );
1315             }
1316             break;
1317          case OPCODE_TEX: /* Both ARB and NV frag prog */
1318             /* Texel lookup */
1319             {
1320                /* Note: only use the precomputed lambda value when we're
1321                 * sampling texture unit [K] with texcoord[K].
1322                 * Otherwise, the lambda value may have no relation to the
1323                 * instruction's texcoord or texture image.  Using the wrong
1324                 * lambda is usually bad news.
1325                 * The rest of the time, just use zero (until we get a more
1326                 * sophisticated way of computing lambda).
1327                 */
1328                GLfloat coord[4], color[4], lambda;
1329                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1330                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1331                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1332                else
1333                   lambda = 0.0;
1334                fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1335                fetch_texel( ctx, coord, lambda, inst->TexSrcUnit, color );
1336                if (DEBUG_FRAG) {
1337                   printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1338                          "lod %f\n",
1339                          color[0], color[1], color[2], color[3],
1340                          inst->TexSrcUnit,
1341                          coord[0], coord[1], coord[2], coord[3], lambda);
1342                }
1343                store_vector4( inst, machine, color );
1344             }
1345             break;
1346          case OPCODE_TXB: /* GL_ARB_fragment_program only */
1347             /* Texel lookup with LOD bias */
1348             {
1349                GLfloat coord[4], color[4], lambda, bias;
1350                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1351                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1352                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1353                else
1354                   lambda = 0.0;
1355                fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1356                /* coord[3] is the bias to add to lambda */
1357                bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1358                     + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1359                     + coord[3];
1360                fetch_texel(ctx, coord, lambda + bias, inst->TexSrcUnit, color);
1361                store_vector4( inst, machine, color );
1362             }
1363             break;
1364          case OPCODE_TXD: /* GL_NV_fragment_program only */
1365             /* Texture lookup w/ partial derivatives for LOD */
1366             {
1367                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1368                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1369                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1370                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1371                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1372                                   color );
1373                store_vector4( inst, machine, color );
1374             }
1375             break;
1376          case OPCODE_TXP: /* GL_ARB_fragment_program only */
1377             /* Texture lookup w/ projective divide */
1378             {
1379                GLfloat texcoord[4], color[4], lambda;
1380                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1381                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1382                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1383                else
1384                   lambda = 0.0;
1385                fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1386                /* Not so sure about this test - if texcoord[3] is
1387                 * zero, we'd probably be fine except for an ASSERT in
1388                 * IROUND_POS() which gets triggered by the inf values created.
1389                 */
1390                if (texcoord[3] != 0.0) {
1391                   texcoord[0] /= texcoord[3];
1392                   texcoord[1] /= texcoord[3];
1393                   texcoord[2] /= texcoord[3];
1394                }
1395                fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1396                store_vector4( inst, machine, color );
1397             }
1398             break;
1399          case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1400             /* Texture lookup w/ projective divide */
1401             {
1402                GLfloat texcoord[4], color[4], lambda;
1403                if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1404                    inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1405                   lambda = span->array->lambda[inst->TexSrcUnit][column];
1406                else
1407                   lambda = 0.0;
1408                fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1409                if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1410                    texcoord[3] != 0.0) {
1411                   texcoord[0] /= texcoord[3];
1412                   texcoord[1] /= texcoord[3];
1413                   texcoord[2] /= texcoord[3];
1414                }
1415                fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1416                store_vector4( inst, machine, color );
1417             }
1418             break;
1419          case OPCODE_UP2H: /* unpack two 16-bit floats */
1420             {
1421                GLfloat a[4], result[4];
1422                const GLuint *rawBits = (const GLuint *) a;
1423                GLhalfNV hx, hy;
1424                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1425                hx = rawBits[0] & 0xffff;
1426                hy = rawBits[0] >> 16;
1427                result[0] = result[2] = _mesa_half_to_float(hx);
1428                result[1] = result[3] = _mesa_half_to_float(hy);
1429                store_vector4( inst, machine, result );
1430             }
1431             break;
1432          case OPCODE_UP2US: /* unpack two GLushorts */
1433             {
1434                GLfloat a[4], result[4];
1435                const GLuint *rawBits = (const GLuint *) a;
1436                GLushort usx, usy;
1437                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1438                usx = rawBits[0] & 0xffff;
1439                usy = rawBits[0] >> 16;
1440                result[0] = result[2] = usx * (1.0f / 65535.0f);
1441                result[1] = result[3] = usy * (1.0f / 65535.0f);
1442                store_vector4( inst, machine, result );
1443             }
1444             break;
1445          case OPCODE_UP4B: /* unpack four GLbytes */
1446             {
1447                GLfloat a[4], result[4];
1448                const GLuint *rawBits = (const GLuint *) a;
1449                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1450                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
1451                result[1] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
1452                result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1453                result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1454                store_vector4( inst, machine, result );
1455             }
1456             break;
1457          case OPCODE_UP4UB: /* unpack four GLubytes */
1458             {
1459                GLfloat a[4], result[4];
1460                const GLuint *rawBits = (const GLuint *) a;
1461                fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1462                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
1463                result[1] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
1464                result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1465                result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1466                store_vector4( inst, machine, result );
1467             }
1468             break;
1469          case OPCODE_XPD: /* cross product */
1470             {
1471                GLfloat a[4], b[4], result[4];
1472                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1473                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1474                result[0] = a[1] * b[2] - a[2] * b[1];
1475                result[1] = a[2] * b[0] - a[0] * b[2];
1476                result[2] = a[0] * b[1] - a[1] * b[0];
1477                result[3] = 1.0;
1478                store_vector4( inst, machine, result );
1479             }
1480             break;
1481          case OPCODE_X2D: /* 2-D matrix transform */
1482             {
1483                GLfloat a[4], b[4], c[4], result[4];
1484                fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1485                fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1486                fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1487                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1488                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1489                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1490                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1491                store_vector4( inst, machine, result );
1492             }
1493             break;
1494          case OPCODE_PRINT:
1495             {
1496                if (inst->SrcReg[0].File != -1) {
1497                   GLfloat a[4];
1498                   fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1499                   _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1500                                a[0], a[1], a[2], a[3]);
1501                }
1502                else {
1503                   _mesa_printf("%s\n", (const char *) inst->Data);
1504                }
1505             }
1506             break;
1507          case OPCODE_END:
1508             return GL_TRUE;
1509          default:
1510             _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1511                           inst->Opcode);
1512             return GL_TRUE; /* return value doesn't matter */
1513       }
1514    }
1515    return GL_TRUE;
1516 }
1517
1518
1519 /**
1520  * Initialize the virtual fragment program machine state prior to running
1521  * fragment program on a fragment.  This involves initializing the input
1522  * registers, condition codes, etc.
1523  * \param machine  the virtual machine state to init
1524  * \param program  the fragment program we're about to run
1525  * \param span  the span of pixels we'll operate on
1526  * \param col  which element (column) of the span we'll operate on
1527  */
1528 static void
1529 init_machine( GLcontext *ctx, struct fp_machine *machine,
1530               const struct gl_fragment_program *program,
1531               const SWspan *span, GLuint col )
1532 {
1533    GLuint inputsRead = program->Base.InputsRead;
1534    GLuint u;
1535
1536    if (ctx->FragmentProgram.CallbackEnabled)
1537       inputsRead = ~0;
1538
1539    if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1540       /* Clear temporary registers (undefined for ARB_f_p) */
1541       _mesa_bzero(machine->Temporaries,
1542                   MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1543    }
1544
1545    /* Load input registers */
1546    if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1547       GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1548       ASSERT(span->arrayMask & SPAN_Z);
1549       if (span->arrayMask & SPAN_XY) {
1550          wpos[0] = (GLfloat) span->array->x[col];
1551          wpos[1] = (GLfloat) span->array->y[col];
1552       }
1553       else {
1554          wpos[0] = (GLfloat) span->x + col;
1555          wpos[1] = (GLfloat) span->y;
1556       }
1557       wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1558       wpos[3] = span->w + col * span->dwdx;
1559    }
1560    if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1561       ASSERT(span->arrayMask & SPAN_RGBA);
1562       COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1563               span->array->color.sz4.rgba[col]);
1564    }
1565    if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1566       ASSERT(span->arrayMask & SPAN_SPEC);
1567       COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1568               span->array->color.sz4.spec[col]);
1569    }
1570    if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1571       GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1572       ASSERT(span->arrayMask & SPAN_FOG);
1573       fogc[0] = span->array->fog[col];
1574       fogc[1] = 0.0F;
1575       fogc[2] = 0.0F;
1576       fogc[3] = 0.0F;
1577    }
1578    for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1579       if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1580          GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1581          /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1582          COPY_4V(tex, span->array->texcoords[u][col]);
1583          /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1584       }
1585    }
1586
1587    /* init condition codes */
1588    machine->CondCodes[0] = COND_EQ;
1589    machine->CondCodes[1] = COND_EQ;
1590    machine->CondCodes[2] = COND_EQ;
1591    machine->CondCodes[3] = COND_EQ;
1592
1593    /* init call stack */
1594    machine->StackDepth = 0;
1595 }
1596
1597
1598 /**
1599  * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1600  */
1601 static void
1602 run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1603 {
1604    const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1605    struct fp_machine machine;
1606    GLuint i;
1607
1608    CurrentMachine = &machine;
1609
1610    for (i = start; i < end; i++) {
1611       if (span->array->mask[i]) {
1612          init_machine(ctx, &machine, program, span, i);
1613
1614          if (execute_program(ctx, program, ~0, &machine, span, i)) {
1615             /* Store result color */
1616             COPY_4V(span->array->color.sz4.rgba[i],
1617                     machine.Outputs[FRAG_RESULT_COLR]);
1618
1619             /* Store result depth/z */
1620             if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1621                const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1622                if (depth <= 0.0)
1623                   span->array->z[i] = 0;
1624                else if (depth >= 1.0)
1625                   span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1626                else
1627                   span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1628             }
1629          }
1630          else {
1631             /* killed fragment */
1632             span->array->mask[i] = GL_FALSE;
1633             span->writeAll = GL_FALSE;
1634          }
1635       }
1636    }
1637
1638    CurrentMachine = NULL;
1639 }
1640
1641
1642 /**
1643  * Execute the current fragment program for all the fragments
1644  * in the given span.
1645  */
1646 void
1647 _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1648 {
1649    const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1650
1651    /* incoming colors should be floats */
1652    ASSERT(span->array->ChanType == GL_FLOAT);
1653
1654    ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1655
1656    run_program(ctx, span, 0, span->end);
1657
1658    if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1659       span->interpMask &= ~SPAN_Z;
1660       span->arrayMask |= SPAN_Z;
1661    }
1662
1663    ctx->_CurrentProgram = 0;
1664 }
1665