src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "program.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "prog_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)                  \
  58    do {                                      \
  59          fi_type fi;                         \
  60          fi.i = 0x7F800000;                  \
  61          x = fi.f;                           \
  62    } while (0)
  63 #define SET_NEG_INFINITY(x)                  \
  64    do {                                      \
  65          fi_type fi;                         \
  66          fi.i = 0xFF800000;                  \
  67          x = fi.f;                           \
  68    } while (0)
  69 #elif defined(VMS)
  70 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  71 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  72 #else
  73 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  74 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  75 #endif
  76
  77 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  78
  79
  80 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  81
  82
  83
  84 /**
  85  * Return a pointer to the 4-element float vector specified by the given
  86  * source register.
  87  */
  88 static INLINE const GLfloat *
  89 get_src_register_pointer(const struct prog_src_register *source,
  90                          const struct gl_program_machine *machine)
  91 {
  92    const struct gl_program *prog = machine->CurProgram;
  93    GLint reg = source->Index;
  94
  95    if (source->RelAddr) {
  96       /* add address register value to src index/offset */
  97       reg += machine->AddressReg[0][0];
  98       if (reg < 0) {
  99          return ZeroVec;
 100       }
 101    }
 102
 103    switch (source->File) {
 104    case PROGRAM_TEMPORARY:
 105       if (reg >= MAX_PROGRAM_TEMPS)
 106          return ZeroVec;
 107       return machine->Temporaries[reg];
 108
 109    case PROGRAM_INPUT:
 110       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 111          if (reg >= VERT_ATTRIB_MAX)
 112             return ZeroVec;
 113          return machine->VertAttribs[reg];
 114       }
 115       else {
 116          if (reg >= FRAG_ATTRIB_MAX)
 117             return ZeroVec;
 118          return machine->Attribs[reg][machine->CurElement];
 119       }
 120
 121    case PROGRAM_OUTPUT:
 122       if (reg >= MAX_PROGRAM_OUTPUTS)
 123          return ZeroVec;
 124       return machine->Outputs[reg];
 125
 126    case PROGRAM_LOCAL_PARAM:
 127       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 128          return ZeroVec;
 129       return machine->CurProgram->LocalParams[reg];
 130
 131    case PROGRAM_ENV_PARAM:
 132       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 133          return ZeroVec;
 134       return machine->EnvParams[reg];
 135
 136    case PROGRAM_STATE_VAR:
 137       /* Fallthrough */
 138    case PROGRAM_CONSTANT:
 139       /* Fallthrough */
 140    case PROGRAM_UNIFORM:
 141       /* Fallthrough */
 142    case PROGRAM_NAMED_PARAM:
 143       if (reg >= (GLint) prog->Parameters->NumParameters)
 144          return ZeroVec;
 145       return prog->Parameters->ParameterValues[reg];
 146
 147    default:
 148       _mesa_problem(NULL,
 149          "Invalid src register file %d in get_src_register_pointer()",
 150          source->File);
 151       return NULL;
 152    }
 153 }
 154
 155
 156 /**
 157  * Return a pointer to the 4-element float vector specified by the given
 158  * destination register.
 159  */
 160 static INLINE GLfloat *
 161 get_dst_register_pointer(const struct prog_dst_register *dest,
 162                          struct gl_program_machine *machine)
 163 {
 164    static GLfloat dummyReg[4];
 165    GLint reg = dest->Index;
 166
 167    if (dest->RelAddr) {
 168       /* add address register value to src index/offset */
 169       reg += machine->AddressReg[0][0];
 170       if (reg < 0) {
 171          return dummyReg;
 172       }
 173    }
 174
 175    switch (dest->File) {
 176    case PROGRAM_TEMPORARY:
 177       if (reg >= MAX_PROGRAM_TEMPS)
 178          return dummyReg;
 179       return machine->Temporaries[reg];
 180
 181    case PROGRAM_OUTPUT:
 182       if (reg >= MAX_PROGRAM_OUTPUTS)
 183          return dummyReg;
 184       return machine->Outputs[reg];
 185
 186    case PROGRAM_WRITE_ONLY:
 187       return dummyReg;
 188
 189    default:
 190       _mesa_problem(NULL,
 191          "Invalid dest register file %d in get_dst_register_pointer()",
 192          dest->File);
 193       return NULL;
 194    }
 195 }
 196
 197
 198
 199 /**
 200  * Fetch a 4-element float vector from the given source register.
 201  * Apply swizzling and negating as needed.
 202  */
 203 static void
 204 fetch_vector4(const struct prog_src_register *source,
 205               const struct gl_program_machine *machine, GLfloat result[4])
 206 {
 207    const GLfloat *src = get_src_register_pointer(source, machine);
 208    ASSERT(src);
 209
 210    if (source->Swizzle == SWIZZLE_NOOP) {
 211       /* no swizzling */
 212       COPY_4V(result, src);
 213    }
 214    else {
 215       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 216       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 217       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 218       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 219       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 220       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 221       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 222       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 223    }
 224
 225    if (source->Abs) {
 226       result[0] = FABSF(result[0]);
 227       result[1] = FABSF(result[1]);
 228       result[2] = FABSF(result[2]);
 229       result[3] = FABSF(result[3]);
 230    }
 231    if (source->Negate) {
 232       ASSERT(source->Negate == NEGATE_XYZW);
 233       result[0] = -result[0];
 234       result[1] = -result[1];
 235       result[2] = -result[2];
 236       result[3] = -result[3];
 237    }
 238
 239 #ifdef NAN_CHECK
 240    assert(!IS_INF_OR_NAN(result[0]));
 241    assert(!IS_INF_OR_NAN(result[0]));
 242    assert(!IS_INF_OR_NAN(result[0]));
 243    assert(!IS_INF_OR_NAN(result[0]));
 244 #endif
 245 }
 246
 247
 248 /**
 249  * Fetch a 4-element uint vector from the given source register.
 250  * Apply swizzling but not negation/abs.
 251  */
 252 static void
 253 fetch_vector4ui(const struct prog_src_register *source,
 254                 const struct gl_program_machine *machine, GLuint result[4])
 255 {
 256    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 257    ASSERT(src);
 258
 259    if (source->Swizzle == SWIZZLE_NOOP) {
 260       /* no swizzling */
 261       COPY_4V(result, src);
 262    }
 263    else {
 264       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 265       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 266       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 267       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 268       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 269       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 270       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 271       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 272    }
 273
 274    /* Note: no Negate or Abs here */
 275 }
 276
 277
 278
 279 /**
 280  * Fetch the derivative with respect to X or Y for the given register.
 281  * XXX this currently only works for fragment program input attribs.
 282  */
 283 static void
 284 fetch_vector4_deriv(GLcontext * ctx,
 285                     const struct prog_src_register *source,
 286                     const struct gl_program_machine *machine,
 287                     char xOrY, GLfloat result[4])
 288 {
 289    if (source->File == PROGRAM_INPUT &&
 290        source->Index < (GLint) machine->NumDeriv) {
 291       const GLint col = machine->CurElement;
 292       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 293       const GLfloat invQ = 1.0f / w;
 294       GLfloat deriv[4];
 295
 296       if (xOrY == 'X') {
 297          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 298          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 299          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 300          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 301       }
 302       else {
 303          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 304          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 305          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 306          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 307       }
 308
 309       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 310       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 311       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 312       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 313
 314       if (source->Abs) {
 315          result[0] = FABSF(result[0]);
 316          result[1] = FABSF(result[1]);
 317          result[2] = FABSF(result[2]);
 318          result[3] = FABSF(result[3]);
 319       }
 320       if (source->Negate) {
 321          ASSERT(source->Negate == NEGATE_XYZW);
 322          result[0] = -result[0];
 323          result[1] = -result[1];
 324          result[2] = -result[2];
 325          result[3] = -result[3];
 326       }
 327    }
 328    else {
 329       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 330    }
 331 }
 332
 333
 334 /**
 335  * As above, but only return result[0] element.
 336  */
 337 static void
 338 fetch_vector1(const struct prog_src_register *source,
 339               const struct gl_program_machine *machine, GLfloat result[4])
 340 {
 341    const GLfloat *src = get_src_register_pointer(source, machine);
 342    ASSERT(src);
 343
 344    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 345
 346    if (source->Abs) {
 347       result[0] = FABSF(result[0]);
 348    }
 349    if (source->Negate) {
 350       result[0] = -result[0];
 351    }
 352 }
 353
 354
 355 /**
 356  * Fetch texel from texture.  Use partial derivatives when possible.
 357  */
 358 static INLINE void
 359 fetch_texel(GLcontext *ctx,
 360             const struct gl_program_machine *machine,
 361             const struct prog_instruction *inst,
 362             const GLfloat texcoord[4], GLfloat lodBias,
 363             GLfloat color[4])
 364 {
 365    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 366
 367    /* Note: we only have the right derivatives for fragment input attribs.
 368     */
 369    if (machine->NumDeriv > 0 &&
 370        inst->SrcReg[0].File == PROGRAM_INPUT &&
 371        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 372       /* simple texture fetch for which we should have derivatives */
 373       GLuint attr = inst->SrcReg[0].Index;
 374       machine->FetchTexelDeriv(ctx, texcoord,
 375                                machine->DerivX[attr],
 376                                machine->DerivY[attr],
 377                                lodBias, unit, color);
 378    }
 379    else {
 380       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 381    }
 382 }
 383
 384
 385 /**
 386  * Test value against zero and return GT, LT, EQ or UN if NaN.
 387  */
 388 static INLINE GLuint
 389 generate_cc(float value)
 390 {
 391    if (value != value)
 392       return COND_UN;           /* NaN */
 393    if (value > 0.0F)
 394       return COND_GT;
 395    if (value < 0.0F)
 396       return COND_LT;
 397    return COND_EQ;
 398 }
 399
 400
 401 /**
 402  * Test if the ccMaskRule is satisfied by the given condition code.
 403  * Used to mask destination writes according to the current condition code.
 404  */
 405 static INLINE GLboolean
 406 test_cc(GLuint condCode, GLuint ccMaskRule)
 407 {
 408    switch (ccMaskRule) {
 409    case COND_EQ: return (condCode == COND_EQ);
 410    case COND_NE: return (condCode != COND_EQ);
 411    case COND_LT: return (condCode == COND_LT);
 412    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 413    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 414    case COND_GT: return (condCode == COND_GT);
 415    case COND_TR: return GL_TRUE;
 416    case COND_FL: return GL_FALSE;
 417    default:      return GL_TRUE;
 418    }
 419 }
 420
 421
 422 /**
 423  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 424  * or GL_FALSE to indicate result.
 425  */
 426 static INLINE GLboolean
 427 eval_condition(const struct gl_program_machine *machine,
 428                const struct prog_instruction *inst)
 429 {
 430    const GLuint swizzle = inst->DstReg.CondSwizzle;
 431    const GLuint condMask = inst->DstReg.CondMask;
 432    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 433        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 434        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 435        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 436       return GL_TRUE;
 437    }
 438    else {
 439       return GL_FALSE;
 440    }
 441 }
 442
 443
 444
 445 /**
 446  * Store 4 floats into a register.  Observe the instructions saturate and
 447  * set-condition-code flags.
 448  */
 449 static void
 450 store_vector4(const struct prog_instruction *inst,
 451               struct gl_program_machine *machine, const GLfloat value[4])
 452 {
 453    const struct prog_dst_register *dstReg = &(inst->DstReg);
 454    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 455    GLuint writeMask = dstReg->WriteMask;
 456    GLfloat clampedValue[4];
 457    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 458
 459 #if 0
 460    if (value[0] > 1.0e10 ||
 461        IS_INF_OR_NAN(value[0]) ||
 462        IS_INF_OR_NAN(value[1]) ||
 463        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 464       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 465 #endif
 466
 467    if (clamp) {
 468       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 469       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 470       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 471       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 472       value = clampedValue;
 473    }
 474
 475    if (dstReg->CondMask != COND_TR) {
 476       /* condition codes may turn off some writes */
 477       if (writeMask & WRITEMASK_X) {
 478          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 479                       dstReg->CondMask))
 480             writeMask &= ~WRITEMASK_X;
 481       }
 482       if (writeMask & WRITEMASK_Y) {
 483          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 484                       dstReg->CondMask))
 485             writeMask &= ~WRITEMASK_Y;
 486       }
 487       if (writeMask & WRITEMASK_Z) {
 488          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 489                       dstReg->CondMask))
 490             writeMask &= ~WRITEMASK_Z;
 491       }
 492       if (writeMask & WRITEMASK_W) {
 493          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 494                       dstReg->CondMask))
 495             writeMask &= ~WRITEMASK_W;
 496       }
 497    }
 498
 499 #ifdef NAN_CHECK
 500    assert(!IS_INF_OR_NAN(value[0]));
 501    assert(!IS_INF_OR_NAN(value[0]));
 502    assert(!IS_INF_OR_NAN(value[0]));
 503    assert(!IS_INF_OR_NAN(value[0]));
 504 #endif
 505
 506    if (writeMask & WRITEMASK_X)
 507       dst[0] = value[0];
 508    if (writeMask & WRITEMASK_Y)
 509       dst[1] = value[1];
 510    if (writeMask & WRITEMASK_Z)
 511       dst[2] = value[2];
 512    if (writeMask & WRITEMASK_W)
 513       dst[3] = value[3];
 514
 515    if (inst->CondUpdate) {
 516       if (writeMask & WRITEMASK_X)
 517          machine->CondCodes[0] = generate_cc(value[0]);
 518       if (writeMask & WRITEMASK_Y)
 519          machine->CondCodes[1] = generate_cc(value[1]);
 520       if (writeMask & WRITEMASK_Z)
 521          machine->CondCodes[2] = generate_cc(value[2]);
 522       if (writeMask & WRITEMASK_W)
 523          machine->CondCodes[3] = generate_cc(value[3]);
 524 #if DEBUG_PROG
 525       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 526              _mesa_condcode_string(machine->CondCodes[0]),
 527              _mesa_condcode_string(machine->CondCodes[1]),
 528              _mesa_condcode_string(machine->CondCodes[2]),
 529              _mesa_condcode_string(machine->CondCodes[3]));
 530 #endif
 531    }
 532 }
 533
 534
 535 /**
 536  * Store 4 uints into a register.  Observe the set-condition-code flags.
 537  */
 538 static void
 539 store_vector4ui(const struct prog_instruction *inst,
 540                 struct gl_program_machine *machine, const GLuint value[4])
 541 {
 542    const struct prog_dst_register *dstReg = &(inst->DstReg);
 543    GLuint writeMask = dstReg->WriteMask;
 544    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 545
 546    if (dstReg->CondMask != COND_TR) {
 547       /* condition codes may turn off some writes */
 548       if (writeMask & WRITEMASK_X) {
 549          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 550                       dstReg->CondMask))
 551             writeMask &= ~WRITEMASK_X;
 552       }
 553       if (writeMask & WRITEMASK_Y) {
 554          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 555                       dstReg->CondMask))
 556             writeMask &= ~WRITEMASK_Y;
 557       }
 558       if (writeMask & WRITEMASK_Z) {
 559          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 560                       dstReg->CondMask))
 561             writeMask &= ~WRITEMASK_Z;
 562       }
 563       if (writeMask & WRITEMASK_W) {
 564          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 565                       dstReg->CondMask))
 566             writeMask &= ~WRITEMASK_W;
 567       }
 568    }
 569
 570    if (writeMask & WRITEMASK_X)
 571       dst[0] = value[0];
 572    if (writeMask & WRITEMASK_Y)
 573       dst[1] = value[1];
 574    if (writeMask & WRITEMASK_Z)
 575       dst[2] = value[2];
 576    if (writeMask & WRITEMASK_W)
 577       dst[3] = value[3];
 578
 579    if (inst->CondUpdate) {
 580       if (writeMask & WRITEMASK_X)
 581          machine->CondCodes[0] = generate_cc(value[0]);
 582       if (writeMask & WRITEMASK_Y)
 583          machine->CondCodes[1] = generate_cc(value[1]);
 584       if (writeMask & WRITEMASK_Z)
 585          machine->CondCodes[2] = generate_cc(value[2]);
 586       if (writeMask & WRITEMASK_W)
 587          machine->CondCodes[3] = generate_cc(value[3]);
 588 #if DEBUG_PROG
 589       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 590              _mesa_condcode_string(machine->CondCodes[0]),
 591              _mesa_condcode_string(machine->CondCodes[1]),
 592              _mesa_condcode_string(machine->CondCodes[2]),
 593              _mesa_condcode_string(machine->CondCodes[3]));
 594 #endif
 595    }
 596 }
 597
 598
 599
 600 /**
 601  * Execute the given vertex/fragment program.
 602  *
 603  * \param ctx  rendering context
 604  * \param program  the program to execute
 605  * \param machine  machine state (must be initialized)
 606  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 607  */
 608 GLboolean
 609 _mesa_execute_program(GLcontext * ctx,
 610                       const struct gl_program *program,
 611                       struct gl_program_machine *machine)
 612 {
 613    const GLuint numInst = program->NumInstructions;
 614    const GLuint maxExec = 10000;
 615    GLuint pc, numExec = 0;
 616
 617    machine->CurProgram = program;
 618
 619    if (DEBUG_PROG) {
 620       printf("execute program %u --------------------\n", program->Id);
 621    }
 622
 623    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 624       machine->EnvParams = ctx->VertexProgram.Parameters;
 625    }
 626    else {
 627       machine->EnvParams = ctx->FragmentProgram.Parameters;
 628    }
 629
 630    for (pc = 0; pc < numInst; pc++) {
 631       const struct prog_instruction *inst = program->Instructions + pc;
 632
 633       if (DEBUG_PROG) {
 634          _mesa_print_instruction(inst);
 635       }
 636
 637       switch (inst->Opcode) {
 638       case OPCODE_ABS:
 639          {
 640             GLfloat a[4], result[4];
 641             fetch_vector4(&inst->SrcReg[0], machine, a);
 642             result[0] = FABSF(a[0]);
 643             result[1] = FABSF(a[1]);
 644             result[2] = FABSF(a[2]);
 645             result[3] = FABSF(a[3]);
 646             store_vector4(inst, machine, result);
 647          }
 648          break;
 649       case OPCODE_ADD:
 650          {
 651             GLfloat a[4], b[4], result[4];
 652             fetch_vector4(&inst->SrcReg[0], machine, a);
 653             fetch_vector4(&inst->SrcReg[1], machine, b);
 654             result[0] = a[0] + b[0];
 655             result[1] = a[1] + b[1];
 656             result[2] = a[2] + b[2];
 657             result[3] = a[3] + b[3];
 658             store_vector4(inst, machine, result);
 659             if (DEBUG_PROG) {
 660                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 661                       result[0], result[1], result[2], result[3],
 662                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 663             }
 664          }
 665          break;
 666       case OPCODE_AND:     /* bitwise AND */
 667          {
 668             GLuint a[4], b[4], result[4];
 669             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 670             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 671             result[0] = a[0] & b[0];
 672             result[1] = a[1] & b[1];
 673             result[2] = a[2] & b[2];
 674             result[3] = a[3] & b[3];
 675             store_vector4ui(inst, machine, result);
 676          }
 677          break;
 678       case OPCODE_ARL:
 679          {
 680             GLfloat t[4];
 681             fetch_vector4(&inst->SrcReg[0], machine, t);
 682             machine->AddressReg[0][0] = IFLOOR(t[0]);
 683          }
 684          break;
 685       case OPCODE_BGNLOOP:
 686          /* no-op */
 687          break;
 688       case OPCODE_ENDLOOP:
 689          /* subtract 1 here since pc is incremented by for(pc) loop */
 690          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 691          break;
 692       case OPCODE_BGNSUB:      /* begin subroutine */
 693          break;
 694       case OPCODE_ENDSUB:      /* end subroutine */
 695          break;
 696       case OPCODE_BRA:         /* branch (conditional) */
 697          /* fall-through */
 698       case OPCODE_BRK:         /* break out of loop (conditional) */
 699          /* fall-through */
 700       case OPCODE_CONT:        /* continue loop (conditional) */
 701          if (eval_condition(machine, inst)) {
 702             /* take branch */
 703             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 704             pc = inst->BranchTarget - 1;
 705          }
 706          break;
 707       case OPCODE_CAL:         /* Call subroutine (conditional) */
 708          if (eval_condition(machine, inst)) {
 709             /* call the subroutine */
 710             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 711                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 712             }
 713             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 714             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 715             pc = inst->BranchTarget - 1;
 716          }
 717          break;
 718       case OPCODE_CMP:
 719          {
 720             GLfloat a[4], b[4], c[4], result[4];
 721             fetch_vector4(&inst->SrcReg[0], machine, a);
 722             fetch_vector4(&inst->SrcReg[1], machine, b);
 723             fetch_vector4(&inst->SrcReg[2], machine, c);
 724             result[0] = a[0] < 0.0F ? b[0] : c[0];
 725             result[1] = a[1] < 0.0F ? b[1] : c[1];
 726             result[2] = a[2] < 0.0F ? b[2] : c[2];
 727             result[3] = a[3] < 0.0F ? b[3] : c[3];
 728             store_vector4(inst, machine, result);
 729          }
 730          break;
 731       case OPCODE_COS:
 732          {
 733             GLfloat a[4], result[4];
 734             fetch_vector1(&inst->SrcReg[0], machine, a);
 735             result[0] = result[1] = result[2] = result[3]
 736                = (GLfloat) _mesa_cos(a[0]);
 737             store_vector4(inst, machine, result);
 738          }
 739          break;
 740       case OPCODE_DDX:         /* Partial derivative with respect to X */
 741          {
 742             GLfloat result[4];
 743             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 744                                 'X', result);
 745             store_vector4(inst, machine, result);
 746          }
 747          break;
 748       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 749          {
 750             GLfloat result[4];
 751             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 752                                 'Y', result);
 753             store_vector4(inst, machine, result);
 754          }
 755          break;
 756       case OPCODE_DP2:
 757          {
 758             GLfloat a[4], b[4], result[4];
 759             fetch_vector4(&inst->SrcReg[0], machine, a);
 760             fetch_vector4(&inst->SrcReg[1], machine, b);
 761             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 762             store_vector4(inst, machine, result);
 763             if (DEBUG_PROG) {
 764                printf("DP2 %g = (%g %g) . (%g %g)\n",
 765                       result[0], a[0], a[1], b[0], b[1]);
 766             }
 767          }
 768          break;
 769       case OPCODE_DP2A:
 770          {
 771             GLfloat a[4], b[4], c, result[4];
 772             fetch_vector4(&inst->SrcReg[0], machine, a);
 773             fetch_vector4(&inst->SrcReg[1], machine, b);
 774             fetch_vector1(&inst->SrcReg[1], machine, &c);
 775             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 776             store_vector4(inst, machine, result);
 777             if (DEBUG_PROG) {
 778                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 779                       result[0], a[0], a[1], b[0], b[1], c);
 780             }
 781          }
 782          break;
 783       case OPCODE_DP3:
 784          {
 785             GLfloat a[4], b[4], result[4];
 786             fetch_vector4(&inst->SrcReg[0], machine, a);
 787             fetch_vector4(&inst->SrcReg[1], machine, b);
 788             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 789             store_vector4(inst, machine, result);
 790             if (DEBUG_PROG) {
 791                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 792                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 793             }
 794          }
 795          break;
 796       case OPCODE_DP4:
 797          {
 798             GLfloat a[4], b[4], result[4];
 799             fetch_vector4(&inst->SrcReg[0], machine, a);
 800             fetch_vector4(&inst->SrcReg[1], machine, b);
 801             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 802             store_vector4(inst, machine, result);
 803             if (DEBUG_PROG) {
 804                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 805                       result[0], a[0], a[1], a[2], a[3],
 806                       b[0], b[1], b[2], b[3]);
 807             }
 808          }
 809          break;
 810       case OPCODE_DPH:
 811          {
 812             GLfloat a[4], b[4], result[4];
 813             fetch_vector4(&inst->SrcReg[0], machine, a);
 814             fetch_vector4(&inst->SrcReg[1], machine, b);
 815             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 816             store_vector4(inst, machine, result);
 817          }
 818          break;
 819       case OPCODE_DST:         /* Distance vector */
 820          {
 821             GLfloat a[4], b[4], result[4];
 822             fetch_vector4(&inst->SrcReg[0], machine, a);
 823             fetch_vector4(&inst->SrcReg[1], machine, b);
 824             result[0] = 1.0F;
 825             result[1] = a[1] * b[1];
 826             result[2] = a[2];
 827             result[3] = b[3];
 828             store_vector4(inst, machine, result);
 829          }
 830          break;
 831       case OPCODE_EXP:
 832          {
 833             GLfloat t[4], q[4], floor_t0;
 834             fetch_vector1(&inst->SrcReg[0], machine, t);
 835             floor_t0 = FLOORF(t[0]);
 836             if (floor_t0 > FLT_MAX_EXP) {
 837                SET_POS_INFINITY(q[0]);
 838                SET_POS_INFINITY(q[2]);
 839             }
 840             else if (floor_t0 < FLT_MIN_EXP) {
 841                q[0] = 0.0F;
 842                q[2] = 0.0F;
 843             }
 844             else {
 845                q[0] = LDEXPF(1.0, (int) floor_t0);
 846                /* Note: GL_NV_vertex_program expects
 847                 * result.z = result.x * APPX(result.y)
 848                 * We do what the ARB extension says.
 849                 */
 850                q[2] = (GLfloat) _mesa_pow(2.0, t[0]);
 851             }
 852             q[1] = t[0] - floor_t0;
 853             q[3] = 1.0F;
 854             store_vector4( inst, machine, q );
 855          }
 856          break;
 857       case OPCODE_EX2:         /* Exponential base 2 */
 858          {
 859             GLfloat a[4], result[4], val;
 860             fetch_vector1(&inst->SrcReg[0], machine, a);
 861             val = (GLfloat) _mesa_pow(2.0, a[0]);
 862             /*
 863             if (IS_INF_OR_NAN(val))
 864                val = 1.0e10;
 865             */
 866             result[0] = result[1] = result[2] = result[3] = val;
 867             store_vector4(inst, machine, result);
 868          }
 869          break;
 870       case OPCODE_FLR:
 871          {
 872             GLfloat a[4], result[4];
 873             fetch_vector4(&inst->SrcReg[0], machine, a);
 874             result[0] = FLOORF(a[0]);
 875             result[1] = FLOORF(a[1]);
 876             result[2] = FLOORF(a[2]);
 877             result[3] = FLOORF(a[3]);
 878             store_vector4(inst, machine, result);
 879          }
 880          break;
 881       case OPCODE_FRC:
 882          {
 883             GLfloat a[4], result[4];
 884             fetch_vector4(&inst->SrcReg[0], machine, a);
 885             result[0] = a[0] - FLOORF(a[0]);
 886             result[1] = a[1] - FLOORF(a[1]);
 887             result[2] = a[2] - FLOORF(a[2]);
 888             result[3] = a[3] - FLOORF(a[3]);
 889             store_vector4(inst, machine, result);
 890          }
 891          break;
 892       case OPCODE_IF:
 893          {
 894             GLboolean cond;
 895             /* eval condition */
 896             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 897                GLfloat a[4];
 898                fetch_vector1(&inst->SrcReg[0], machine, a);
 899                cond = (a[0] != 0.0);
 900             }
 901             else {
 902                cond = eval_condition(machine, inst);
 903             }
 904             if (DEBUG_PROG) {
 905                printf("IF: %d\n", cond);
 906             }
 907             /* do if/else */
 908             if (cond) {
 909                /* do if-clause (just continue execution) */
 910             }
 911             else {
 912                /* go to the instruction after ELSE or ENDIF */
 913                assert(inst->BranchTarget >= 0);
 914                pc = inst->BranchTarget - 1;
 915             }
 916          }
 917          break;
 918       case OPCODE_ELSE:
 919          /* goto ENDIF */
 920          assert(inst->BranchTarget >= 0);
 921          pc = inst->BranchTarget - 1;
 922          break;
 923       case OPCODE_ENDIF:
 924          /* nothing */
 925          break;
 926       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 927          if (eval_condition(machine, inst)) {
 928             return GL_FALSE;
 929          }
 930          break;
 931       case OPCODE_KIL:         /* ARB_f_p only */
 932          {
 933             GLfloat a[4];
 934             fetch_vector4(&inst->SrcReg[0], machine, a);
 935             if (DEBUG_PROG) {
 936                printf("KIL if (%g %g %g %g) <= 0.0\n",
 937                       a[0], a[1], a[2], a[3]);
 938             }
 939
 940             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 941                return GL_FALSE;
 942             }
 943          }
 944          break;
 945       case OPCODE_LG2:         /* log base 2 */
 946          {
 947             GLfloat a[4], result[4], val;
 948             fetch_vector1(&inst->SrcReg[0], machine, a);
 949             /* The fast LOG2 macro doesn't meet the precision requirements.
 950              */
 951             if (a[0] == 0.0F) {
 952                val = -FLT_MAX;
 953             }
 954             else {
 955                val = log(a[0]) * 1.442695F;
 956             }
 957             result[0] = result[1] = result[2] = result[3] = val;
 958             store_vector4(inst, machine, result);
 959          }
 960          break;
 961       case OPCODE_LIT:
 962          {
 963             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 964             GLfloat a[4], result[4];
 965             fetch_vector4(&inst->SrcReg[0], machine, a);
 966             a[0] = MAX2(a[0], 0.0F);
 967             a[1] = MAX2(a[1], 0.0F);
 968             /* XXX ARB version clamps a[3], NV version doesn't */
 969             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 970             result[0] = 1.0F;
 971             result[1] = a[0];
 972             /* XXX we could probably just use pow() here */
 973             if (a[0] > 0.0F) {
 974                if (a[1] == 0.0 && a[3] == 0.0)
 975                   result[2] = 1.0;
 976                else
 977                   result[2] = (GLfloat) _mesa_pow(a[1], a[3]);
 978             }
 979             else {
 980                result[2] = 0.0;
 981             }
 982             result[3] = 1.0F;
 983             store_vector4(inst, machine, result);
 984             if (DEBUG_PROG) {
 985                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 986                       result[0], result[1], result[2], result[3],
 987                       a[0], a[1], a[2], a[3]);
 988             }
 989          }
 990          break;
 991       case OPCODE_LOG:
 992          {
 993             GLfloat t[4], q[4], abs_t0;
 994             fetch_vector1(&inst->SrcReg[0], machine, t);
 995             abs_t0 = FABSF(t[0]);
 996             if (abs_t0 != 0.0F) {
 997                /* Since we really can't handle infinite values on VMS
 998                 * like other OSes we'll use __MAXFLOAT to represent
 999                 * infinity.  This may need some tweaking.
1000                 */
1001 #ifdef VMS
1002                if (abs_t0 == __MAXFLOAT)
1003 #else
1004                if (IS_INF_OR_NAN(abs_t0))
1005 #endif
1006                {
1007                   SET_POS_INFINITY(q[0]);
1008                   q[1] = 1.0F;
1009                   SET_POS_INFINITY(q[2]);
1010                }
1011                else {
1012                   int exponent;
1013                   GLfloat mantissa = FREXPF(t[0], &exponent);
1014                   q[0] = (GLfloat) (exponent - 1);
1015                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1016
1017                   /* The fast LOG2 macro doesn't meet the precision
1018                    * requirements.
1019                    */
1020                   q[2] = (log(t[0]) * 1.442695F);
1021                }
1022             }
1023             else {
1024                SET_NEG_INFINITY(q[0]);
1025                q[1] = 1.0F;
1026                SET_NEG_INFINITY(q[2]);
1027             }
1028             q[3] = 1.0;
1029             store_vector4(inst, machine, q);
1030          }
1031          break;
1032       case OPCODE_LRP:
1033          {
1034             GLfloat a[4], b[4], c[4], result[4];
1035             fetch_vector4(&inst->SrcReg[0], machine, a);
1036             fetch_vector4(&inst->SrcReg[1], machine, b);
1037             fetch_vector4(&inst->SrcReg[2], machine, c);
1038             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1039             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1040             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1041             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1042             store_vector4(inst, machine, result);
1043             if (DEBUG_PROG) {
1044                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1045                       "(%g %g %g %g), (%g %g %g %g)\n",
1046                       result[0], result[1], result[2], result[3],
1047                       a[0], a[1], a[2], a[3],
1048                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1049             }
1050          }
1051          break;
1052       case OPCODE_MAD:
1053          {
1054             GLfloat a[4], b[4], c[4], result[4];
1055             fetch_vector4(&inst->SrcReg[0], machine, a);
1056             fetch_vector4(&inst->SrcReg[1], machine, b);
1057             fetch_vector4(&inst->SrcReg[2], machine, c);
1058             result[0] = a[0] * b[0] + c[0];
1059             result[1] = a[1] * b[1] + c[1];
1060             result[2] = a[2] * b[2] + c[2];
1061             result[3] = a[3] * b[3] + c[3];
1062             store_vector4(inst, machine, result);
1063             if (DEBUG_PROG) {
1064                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1065                       "(%g %g %g %g) + (%g %g %g %g)\n",
1066                       result[0], result[1], result[2], result[3],
1067                       a[0], a[1], a[2], a[3],
1068                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1069             }
1070          }
1071          break;
1072       case OPCODE_MAX:
1073          {
1074             GLfloat a[4], b[4], result[4];
1075             fetch_vector4(&inst->SrcReg[0], machine, a);
1076             fetch_vector4(&inst->SrcReg[1], machine, b);
1077             result[0] = MAX2(a[0], b[0]);
1078             result[1] = MAX2(a[1], b[1]);
1079             result[2] = MAX2(a[2], b[2]);
1080             result[3] = MAX2(a[3], b[3]);
1081             store_vector4(inst, machine, result);
1082             if (DEBUG_PROG) {
1083                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1084                       result[0], result[1], result[2], result[3],
1085                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1086             }
1087          }
1088          break;
1089       case OPCODE_MIN:
1090          {
1091             GLfloat a[4], b[4], result[4];
1092             fetch_vector4(&inst->SrcReg[0], machine, a);
1093             fetch_vector4(&inst->SrcReg[1], machine, b);
1094             result[0] = MIN2(a[0], b[0]);
1095             result[1] = MIN2(a[1], b[1]);
1096             result[2] = MIN2(a[2], b[2]);
1097             result[3] = MIN2(a[3], b[3]);
1098             store_vector4(inst, machine, result);
1099          }
1100          break;
1101       case OPCODE_MOV:
1102          {
1103             GLfloat result[4];
1104             fetch_vector4(&inst->SrcReg[0], machine, result);
1105             store_vector4(inst, machine, result);
1106             if (DEBUG_PROG) {
1107                printf("MOV (%g %g %g %g)\n",
1108                       result[0], result[1], result[2], result[3]);
1109             }
1110          }
1111          break;
1112       case OPCODE_MUL:
1113          {
1114             GLfloat a[4], b[4], result[4];
1115             fetch_vector4(&inst->SrcReg[0], machine, a);
1116             fetch_vector4(&inst->SrcReg[1], machine, b);
1117             result[0] = a[0] * b[0];
1118             result[1] = a[1] * b[1];
1119             result[2] = a[2] * b[2];
1120             result[3] = a[3] * b[3];
1121             store_vector4(inst, machine, result);
1122             if (DEBUG_PROG) {
1123                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1124                       result[0], result[1], result[2], result[3],
1125                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1126             }
1127          }
1128          break;
1129       case OPCODE_NOISE1:
1130          {
1131             GLfloat a[4], result[4];
1132             fetch_vector1(&inst->SrcReg[0], machine, a);
1133             result[0] =
1134                result[1] =
1135                result[2] =
1136                result[3] = _mesa_noise1(a[0]);
1137             store_vector4(inst, machine, result);
1138          }
1139          break;
1140       case OPCODE_NOISE2:
1141          {
1142             GLfloat a[4], result[4];
1143             fetch_vector4(&inst->SrcReg[0], machine, a);
1144             result[0] =
1145                result[1] =
1146                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1147             store_vector4(inst, machine, result);
1148          }
1149          break;
1150       case OPCODE_NOISE3:
1151          {
1152             GLfloat a[4], result[4];
1153             fetch_vector4(&inst->SrcReg[0], machine, a);
1154             result[0] =
1155                result[1] =
1156                result[2] =
1157                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1158             store_vector4(inst, machine, result);
1159          }
1160          break;
1161       case OPCODE_NOISE4:
1162          {
1163             GLfloat a[4], result[4];
1164             fetch_vector4(&inst->SrcReg[0], machine, a);
1165             result[0] =
1166                result[1] =
1167                result[2] =
1168                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1169             store_vector4(inst, machine, result);
1170          }
1171          break;
1172       case OPCODE_NOP:
1173          break;
1174       case OPCODE_NOT:         /* bitwise NOT */
1175          {
1176             GLuint a[4], result[4];
1177             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1178             result[0] = ~a[0];
1179             result[1] = ~a[1];
1180             result[2] = ~a[2];
1181             result[3] = ~a[3];
1182             store_vector4ui(inst, machine, result);
1183          }
1184          break;
1185       case OPCODE_NRM3:        /* 3-component normalization */
1186          {
1187             GLfloat a[4], result[4];
1188             GLfloat tmp;
1189             fetch_vector4(&inst->SrcReg[0], machine, a);
1190             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1191             if (tmp != 0.0F)
1192                tmp = INV_SQRTF(tmp);
1193             result[0] = tmp * a[0];
1194             result[1] = tmp * a[1];
1195             result[2] = tmp * a[2];
1196             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1197             store_vector4(inst, machine, result);
1198          }
1199          break;
1200       case OPCODE_NRM4:        /* 4-component normalization */
1201          {
1202             GLfloat a[4], result[4];
1203             GLfloat tmp;
1204             fetch_vector4(&inst->SrcReg[0], machine, a);
1205             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1206             if (tmp != 0.0F)
1207                tmp = INV_SQRTF(tmp);
1208             result[0] = tmp * a[0];
1209             result[1] = tmp * a[1];
1210             result[2] = tmp * a[2];
1211             result[3] = tmp * a[3];
1212             store_vector4(inst, machine, result);
1213          }
1214          break;
1215       case OPCODE_OR:          /* bitwise OR */
1216          {
1217             GLuint a[4], b[4], result[4];
1218             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1219             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1220             result[0] = a[0] | b[0];
1221             result[1] = a[1] | b[1];
1222             result[2] = a[2] | b[2];
1223             result[3] = a[3] | b[3];
1224             store_vector4ui(inst, machine, result);
1225          }
1226          break;
1227       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1228          {
1229             GLfloat a[4];
1230             GLuint result[4];
1231             GLhalfNV hx, hy;
1232             fetch_vector4(&inst->SrcReg[0], machine, a);
1233             hx = _mesa_float_to_half(a[0]);
1234             hy = _mesa_float_to_half(a[1]);
1235             result[0] =
1236             result[1] =
1237             result[2] =
1238             result[3] = hx | (hy << 16);
1239             store_vector4ui(inst, machine, result);
1240          }
1241          break;
1242       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1243          {
1244             GLfloat a[4];
1245             GLuint result[4], usx, usy;
1246             fetch_vector4(&inst->SrcReg[0], machine, a);
1247             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1248             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1249             usx = IROUND(a[0] * 65535.0F);
1250             usy = IROUND(a[1] * 65535.0F);
1251             result[0] =
1252             result[1] =
1253             result[2] =
1254             result[3] = usx | (usy << 16);
1255             store_vector4ui(inst, machine, result);
1256          }
1257          break;
1258       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1259          {
1260             GLfloat a[4];
1261             GLuint result[4], ubx, uby, ubz, ubw;
1262             fetch_vector4(&inst->SrcReg[0], machine, a);
1263             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1264             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1265             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1266             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1267             ubx = IROUND(127.0F * a[0] + 128.0F);
1268             uby = IROUND(127.0F * a[1] + 128.0F);
1269             ubz = IROUND(127.0F * a[2] + 128.0F);
1270             ubw = IROUND(127.0F * a[3] + 128.0F);
1271             result[0] =
1272             result[1] =
1273             result[2] =
1274             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1275             store_vector4ui(inst, machine, result);
1276          }
1277          break;
1278       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1279          {
1280             GLfloat a[4];
1281             GLuint result[4], ubx, uby, ubz, ubw;
1282             fetch_vector4(&inst->SrcReg[0], machine, a);
1283             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1284             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1285             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1286             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1287             ubx = IROUND(255.0F * a[0]);
1288             uby = IROUND(255.0F * a[1]);
1289             ubz = IROUND(255.0F * a[2]);
1290             ubw = IROUND(255.0F * a[3]);
1291             result[0] =
1292             result[1] =
1293             result[2] =
1294             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1295             store_vector4ui(inst, machine, result);
1296          }
1297          break;
1298       case OPCODE_POW:
1299          {
1300             GLfloat a[4], b[4], result[4];
1301             fetch_vector1(&inst->SrcReg[0], machine, a);
1302             fetch_vector1(&inst->SrcReg[1], machine, b);
1303             result[0] = result[1] = result[2] = result[3]
1304                = (GLfloat) _mesa_pow(a[0], b[0]);
1305             store_vector4(inst, machine, result);
1306          }
1307          break;
1308       case OPCODE_RCP:
1309          {
1310             GLfloat a[4], result[4];
1311             fetch_vector1(&inst->SrcReg[0], machine, a);
1312             if (DEBUG_PROG) {
1313                if (a[0] == 0)
1314                   printf("RCP(0)\n");
1315                else if (IS_INF_OR_NAN(a[0]))
1316                   printf("RCP(inf)\n");
1317             }
1318             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1319             store_vector4(inst, machine, result);
1320          }
1321          break;
1322       case OPCODE_RET:         /* return from subroutine (conditional) */
1323          if (eval_condition(machine, inst)) {
1324             if (machine->StackDepth == 0) {
1325                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1326             }
1327             /* subtract one because of pc++ in the for loop */
1328             pc = machine->CallStack[--machine->StackDepth] - 1;
1329          }
1330          break;
1331       case OPCODE_RFL:         /* reflection vector */
1332          {
1333             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1334             fetch_vector4(&inst->SrcReg[0], machine, axis);
1335             fetch_vector4(&inst->SrcReg[1], machine, dir);
1336             tmpW = DOT3(axis, axis);
1337             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1338             result[0] = tmpX * axis[0] - dir[0];
1339             result[1] = tmpX * axis[1] - dir[1];
1340             result[2] = tmpX * axis[2] - dir[2];
1341             /* result[3] is never written! XXX enforce in parser! */
1342             store_vector4(inst, machine, result);
1343          }
1344          break;
1345       case OPCODE_RSQ:         /* 1 / sqrt() */
1346          {
1347             GLfloat a[4], result[4];
1348             fetch_vector1(&inst->SrcReg[0], machine, a);
1349             a[0] = FABSF(a[0]);
1350             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1351             store_vector4(inst, machine, result);
1352             if (DEBUG_PROG) {
1353                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1354             }
1355          }
1356          break;
1357       case OPCODE_SCS:         /* sine and cos */
1358          {
1359             GLfloat a[4], result[4];
1360             fetch_vector1(&inst->SrcReg[0], machine, a);
1361             result[0] = (GLfloat) _mesa_cos(a[0]);
1362             result[1] = (GLfloat) _mesa_sin(a[0]);
1363             result[2] = 0.0;    /* undefined! */
1364             result[3] = 0.0;    /* undefined! */
1365             store_vector4(inst, machine, result);
1366          }
1367          break;
1368       case OPCODE_SEQ:         /* set on equal */
1369          {
1370             GLfloat a[4], b[4], result[4];
1371             fetch_vector4(&inst->SrcReg[0], machine, a);
1372             fetch_vector4(&inst->SrcReg[1], machine, b);
1373             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1374             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1375             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1376             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1377             store_vector4(inst, machine, result);
1378             if (DEBUG_PROG) {
1379                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1380                       result[0], result[1], result[2], result[3],
1381                       a[0], a[1], a[2], a[3],
1382                       b[0], b[1], b[2], b[3]);
1383             }
1384          }
1385          break;
1386       case OPCODE_SFL:         /* set false, operands ignored */
1387          {
1388             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1389             store_vector4(inst, machine, result);
1390          }
1391          break;
1392       case OPCODE_SGE:         /* set on greater or equal */
1393          {
1394             GLfloat a[4], b[4], result[4];
1395             fetch_vector4(&inst->SrcReg[0], machine, a);
1396             fetch_vector4(&inst->SrcReg[1], machine, b);
1397             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1398             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1399             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1400             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1401             store_vector4(inst, machine, result);
1402             if (DEBUG_PROG) {
1403                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1404                       result[0], result[1], result[2], result[3],
1405                       a[0], a[1], a[2], a[3],
1406                       b[0], b[1], b[2], b[3]);
1407             }
1408          }
1409          break;
1410       case OPCODE_SGT:         /* set on greater */
1411          {
1412             GLfloat a[4], b[4], result[4];
1413             fetch_vector4(&inst->SrcReg[0], machine, a);
1414             fetch_vector4(&inst->SrcReg[1], machine, b);
1415             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1416             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1417             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1418             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1419             store_vector4(inst, machine, result);
1420             if (DEBUG_PROG) {
1421                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1422                       result[0], result[1], result[2], result[3],
1423                       a[0], a[1], a[2], a[3],
1424                       b[0], b[1], b[2], b[3]);
1425             }
1426          }
1427          break;
1428       case OPCODE_SIN:
1429          {
1430             GLfloat a[4], result[4];
1431             fetch_vector1(&inst->SrcReg[0], machine, a);
1432             result[0] = result[1] = result[2] = result[3]
1433                = (GLfloat) _mesa_sin(a[0]);
1434             store_vector4(inst, machine, result);
1435          }
1436          break;
1437       case OPCODE_SLE:         /* set on less or equal */
1438          {
1439             GLfloat a[4], b[4], result[4];
1440             fetch_vector4(&inst->SrcReg[0], machine, a);
1441             fetch_vector4(&inst->SrcReg[1], machine, b);
1442             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1443             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1444             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1445             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1446             store_vector4(inst, machine, result);
1447             if (DEBUG_PROG) {
1448                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1449                       result[0], result[1], result[2], result[3],
1450                       a[0], a[1], a[2], a[3],
1451                       b[0], b[1], b[2], b[3]);
1452             }
1453          }
1454          break;
1455       case OPCODE_SLT:         /* set on less */
1456          {
1457             GLfloat a[4], b[4], result[4];
1458             fetch_vector4(&inst->SrcReg[0], machine, a);
1459             fetch_vector4(&inst->SrcReg[1], machine, b);
1460             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1461             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1462             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1463             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1464             store_vector4(inst, machine, result);
1465             if (DEBUG_PROG) {
1466                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1467                       result[0], result[1], result[2], result[3],
1468                       a[0], a[1], a[2], a[3],
1469                       b[0], b[1], b[2], b[3]);
1470             }
1471          }
1472          break;
1473       case OPCODE_SNE:         /* set on not equal */
1474          {
1475             GLfloat a[4], b[4], result[4];
1476             fetch_vector4(&inst->SrcReg[0], machine, a);
1477             fetch_vector4(&inst->SrcReg[1], machine, b);
1478             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1479             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1480             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1481             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1482             store_vector4(inst, machine, result);
1483             if (DEBUG_PROG) {
1484                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1485                       result[0], result[1], result[2], result[3],
1486                       a[0], a[1], a[2], a[3],
1487                       b[0], b[1], b[2], b[3]);
1488             }
1489          }
1490          break;
1491       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1492          {
1493             GLfloat a[4], result[4];
1494             fetch_vector4(&inst->SrcReg[0], machine, a);
1495             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1496             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1497             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1498             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1499             store_vector4(inst, machine, result);
1500          }
1501          break;
1502       case OPCODE_STR:         /* set true, operands ignored */
1503          {
1504             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1505             store_vector4(inst, machine, result);
1506          }
1507          break;
1508       case OPCODE_SUB:
1509          {
1510             GLfloat a[4], b[4], result[4];
1511             fetch_vector4(&inst->SrcReg[0], machine, a);
1512             fetch_vector4(&inst->SrcReg[1], machine, b);
1513             result[0] = a[0] - b[0];
1514             result[1] = a[1] - b[1];
1515             result[2] = a[2] - b[2];
1516             result[3] = a[3] - b[3];
1517             store_vector4(inst, machine, result);
1518             if (DEBUG_PROG) {
1519                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1520                       result[0], result[1], result[2], result[3],
1521                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1522             }
1523          }
1524          break;
1525       case OPCODE_SWZ:         /* extended swizzle */
1526          {
1527             const struct prog_src_register *source = &inst->SrcReg[0];
1528             const GLfloat *src = get_src_register_pointer(source, machine);
1529             GLfloat result[4];
1530             GLuint i;
1531             for (i = 0; i < 4; i++) {
1532                const GLuint swz = GET_SWZ(source->Swizzle, i);
1533                if (swz == SWIZZLE_ZERO)
1534                   result[i] = 0.0;
1535                else if (swz == SWIZZLE_ONE)
1536                   result[i] = 1.0;
1537                else {
1538                   ASSERT(swz >= 0);
1539                   ASSERT(swz <= 3);
1540                   result[i] = src[swz];
1541                }
1542                if (source->Negate & (1 << i))
1543                   result[i] = -result[i];
1544             }
1545             store_vector4(inst, machine, result);
1546          }
1547          break;
1548       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1549          /* Simple texel lookup */
1550          {
1551             GLfloat texcoord[4], color[4];
1552             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1553
1554             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1555
1556             if (DEBUG_PROG) {
1557                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1558                       color[0], color[1], color[2], color[3],
1559                       inst->TexSrcUnit,
1560                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1561             }
1562             store_vector4(inst, machine, color);
1563          }
1564          break;
1565       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1566          /* Texel lookup with LOD bias */
1567          {
1568             GLfloat texcoord[4], color[4], lodBias;
1569
1570             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1571
1572             /* texcoord[3] is the bias to add to lambda */
1573             lodBias = texcoord[3];
1574
1575             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1576
1577             store_vector4(inst, machine, color);
1578          }
1579          break;
1580       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1581          /* Texture lookup w/ partial derivatives for LOD */
1582          {
1583             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1584             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1585             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1586             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1587             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1588                                      0.0, /* lodBias */
1589                                      inst->TexSrcUnit, color);
1590             store_vector4(inst, machine, color);
1591          }
1592          break;
1593       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1594          /* Texture lookup w/ projective divide */
1595          {
1596             GLfloat texcoord[4], color[4];
1597
1598             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1599             /* Not so sure about this test - if texcoord[3] is
1600              * zero, we'd probably be fine except for an ASSERT in
1601              * IROUND_POS() which gets triggered by the inf values created.
1602              */
1603             if (texcoord[3] != 0.0) {
1604                texcoord[0] /= texcoord[3];
1605                texcoord[1] /= texcoord[3];
1606                texcoord[2] /= texcoord[3];
1607             }
1608
1609             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1610
1611             store_vector4(inst, machine, color);
1612          }
1613          break;
1614       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1615          /* Texture lookup w/ projective divide, as above, but do not
1616           * do the divide by w if sampling from a cube map.
1617           */
1618          {
1619             GLfloat texcoord[4], color[4];
1620
1621             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1622             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1623                 texcoord[3] != 0.0) {
1624                texcoord[0] /= texcoord[3];
1625                texcoord[1] /= texcoord[3];
1626                texcoord[2] /= texcoord[3];
1627             }
1628
1629             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1630
1631             store_vector4(inst, machine, color);
1632          }
1633          break;
1634       case OPCODE_TRUNC:       /* truncate toward zero */
1635          {
1636             GLfloat a[4], result[4];
1637             fetch_vector4(&inst->SrcReg[0], machine, a);
1638             result[0] = (GLfloat) (GLint) a[0];
1639             result[1] = (GLfloat) (GLint) a[1];
1640             result[2] = (GLfloat) (GLint) a[2];
1641             result[3] = (GLfloat) (GLint) a[3];
1642             store_vector4(inst, machine, result);
1643          }
1644          break;
1645       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1646          {
1647             GLfloat a[4], result[4];
1648             fi_type fi;
1649             GLhalfNV hx, hy;
1650             fetch_vector1(&inst->SrcReg[0], machine, a);
1651             fi.f = a[0];
1652             hx = fi.i & 0xffff;
1653             hy = fi.i >> 16;
1654             result[0] = result[2] = _mesa_half_to_float(hx);
1655             result[1] = result[3] = _mesa_half_to_float(hy);
1656             store_vector4(inst, machine, result);
1657          }
1658          break;
1659       case OPCODE_UP2US:       /* unpack two GLushorts */
1660          {
1661             GLfloat a[4], result[4];
1662             fi_type fi;
1663             GLushort usx, usy;
1664             fetch_vector1(&inst->SrcReg[0], machine, a);
1665             fi.f = a[0];
1666             usx = fi.i & 0xffff;
1667             usy = fi.i >> 16;
1668             result[0] = result[2] = usx * (1.0f / 65535.0f);
1669             result[1] = result[3] = usy * (1.0f / 65535.0f);
1670             store_vector4(inst, machine, result);
1671          }
1672          break;
1673       case OPCODE_UP4B:        /* unpack four GLbytes */
1674          {
1675             GLfloat a[4], result[4];
1676             fi_type fi;
1677             fetch_vector1(&inst->SrcReg[0], machine, a);
1678             fi.f = a[0];
1679             result[0] = (((fi.i >> 0) & 0xff) - 128) / 127.0F;
1680             result[1] = (((fi.i >> 8) & 0xff) - 128) / 127.0F;
1681             result[2] = (((fi.i >> 16) & 0xff) - 128) / 127.0F;
1682             result[3] = (((fi.i >> 24) & 0xff) - 128) / 127.0F;
1683             store_vector4(inst, machine, result);
1684          }
1685          break;
1686       case OPCODE_UP4UB:       /* unpack four GLubytes */
1687          {
1688             GLfloat a[4], result[4];
1689             fi_type fi;
1690             fetch_vector1(&inst->SrcReg[0], machine, a);
1691             fi.f = a[0];
1692             result[0] = ((fi.i >> 0) & 0xff) / 255.0F;
1693             result[1] = ((fi.i >> 8) & 0xff) / 255.0F;
1694             result[2] = ((fi.i >> 16) & 0xff) / 255.0F;
1695             result[3] = ((fi.i >> 24) & 0xff) / 255.0F;
1696             store_vector4(inst, machine, result);
1697          }
1698          break;
1699       case OPCODE_XOR:         /* bitwise XOR */
1700          {
1701             GLuint a[4], b[4], result[4];
1702             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1703             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1704             result[0] = a[0] ^ b[0];
1705             result[1] = a[1] ^ b[1];
1706             result[2] = a[2] ^ b[2];
1707             result[3] = a[3] ^ b[3];
1708             store_vector4ui(inst, machine, result);
1709          }
1710          break;
1711       case OPCODE_XPD:         /* cross product */
1712          {
1713             GLfloat a[4], b[4], result[4];
1714             fetch_vector4(&inst->SrcReg[0], machine, a);
1715             fetch_vector4(&inst->SrcReg[1], machine, b);
1716             result[0] = a[1] * b[2] - a[2] * b[1];
1717             result[1] = a[2] * b[0] - a[0] * b[2];
1718             result[2] = a[0] * b[1] - a[1] * b[0];
1719             result[3] = 1.0;
1720             store_vector4(inst, machine, result);
1721             if (DEBUG_PROG) {
1722                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1723                       result[0], result[1], result[2], result[3],
1724                       a[0], a[1], a[2], b[0], b[1], b[2]);
1725             }
1726          }
1727          break;
1728       case OPCODE_X2D:         /* 2-D matrix transform */
1729          {
1730             GLfloat a[4], b[4], c[4], result[4];
1731             fetch_vector4(&inst->SrcReg[0], machine, a);
1732             fetch_vector4(&inst->SrcReg[1], machine, b);
1733             fetch_vector4(&inst->SrcReg[2], machine, c);
1734             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1735             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1736             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1737             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1738             store_vector4(inst, machine, result);
1739          }
1740          break;
1741       case OPCODE_PRINT:
1742          {
1743             if (inst->SrcReg[0].File != -1) {
1744                GLfloat a[4];
1745                fetch_vector4(&inst->SrcReg[0], machine, a);
1746                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1747                             a[0], a[1], a[2], a[3]);
1748             }
1749             else {
1750                _mesa_printf("%s\n", (const char *) inst->Data);
1751             }
1752          }
1753          break;
1754       case OPCODE_END:
1755          return GL_TRUE;
1756       default:
1757          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1758                        inst->Opcode);
1759          return GL_TRUE;        /* return value doesn't matter */
1760       }
1761
1762       numExec++;
1763       if (numExec > maxExec) {
1764          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1765          return GL_TRUE;
1766       }
1767
1768    } /* for pc */
1769
1770    return GL_TRUE;
1771 }