src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "program.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "prog_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)                  \
  58    do {                                      \
  59          fi_type fi;                         \
  60          fi.i = 0x7F800000;                  \
  61          x = fi.f;                           \
  62    } while (0)
  63 #define SET_NEG_INFINITY(x)                  \
  64    do {                                      \
  65          fi_type fi;                         \
  66          fi.i = 0xFF800000;                  \
  67          x = fi.f;                           \
  68    } while (0)
  69 #elif defined(VMS)
  70 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  71 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  72 #else
  73 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  74 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  75 #endif
  76
  77 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  78
  79
  80 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  81
  82
  83
  84 /**
  85  * Return a pointer to the 4-element float vector specified by the given
  86  * source register.
  87  */
  88 static INLINE const GLfloat *
  89 get_src_register_pointer(const struct prog_src_register *source,
  90                          const struct gl_program_machine *machine)
  91 {
  92    const struct gl_program *prog = machine->CurProgram;
  93    GLint reg = source->Index;
  94
  95    if (source->RelAddr) {
  96       /* add address register value to src index/offset */
  97       reg += machine->AddressReg[0][0];
  98       if (reg < 0) {
  99          return ZeroVec;
 100       }
 101    }
 102
 103    switch (source->File) {
 104    case PROGRAM_TEMPORARY:
 105       if (reg >= MAX_PROGRAM_TEMPS)
 106          return ZeroVec;
 107       return machine->Temporaries[reg];
 108
 109    case PROGRAM_INPUT:
 110       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 111          if (reg >= VERT_ATTRIB_MAX)
 112             return ZeroVec;
 113          return machine->VertAttribs[reg];
 114       }
 115       else {
 116          if (reg >= FRAG_ATTRIB_MAX)
 117             return ZeroVec;
 118          return machine->Attribs[reg][machine->CurElement];
 119       }
 120
 121    case PROGRAM_OUTPUT:
 122       if (reg >= MAX_PROGRAM_OUTPUTS)
 123          return ZeroVec;
 124       return machine->Outputs[reg];
 125
 126    case PROGRAM_LOCAL_PARAM:
 127       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 128          return ZeroVec;
 129       return machine->CurProgram->LocalParams[reg];
 130
 131    case PROGRAM_ENV_PARAM:
 132       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 133          return ZeroVec;
 134       return machine->EnvParams[reg];
 135
 136    case PROGRAM_STATE_VAR:
 137       /* Fallthrough */
 138    case PROGRAM_CONSTANT:
 139       /* Fallthrough */
 140    case PROGRAM_UNIFORM:
 141       /* Fallthrough */
 142    case PROGRAM_NAMED_PARAM:
 143       if (reg >= (GLint) prog->Parameters->NumParameters)
 144          return ZeroVec;
 145       return prog->Parameters->ParameterValues[reg];
 146
 147    default:
 148       _mesa_problem(NULL,
 149          "Invalid src register file %d in get_src_register_pointer()",
 150          source->File);
 151       return NULL;
 152    }
 153 }
 154
 155
 156 /**
 157  * Return a pointer to the 4-element float vector specified by the given
 158  * destination register.
 159  */
 160 static INLINE GLfloat *
 161 get_dst_register_pointer(const struct prog_dst_register *dest,
 162                          struct gl_program_machine *machine)
 163 {
 164    static GLfloat dummyReg[4];
 165    GLint reg = dest->Index;
 166
 167    if (dest->RelAddr) {
 168       /* add address register value to src index/offset */
 169       reg += machine->AddressReg[0][0];
 170       if (reg < 0) {
 171          return dummyReg;
 172       }
 173    }
 174
 175    switch (dest->File) {
 176    case PROGRAM_TEMPORARY:
 177       if (reg >= MAX_PROGRAM_TEMPS)
 178          return dummyReg;
 179       return machine->Temporaries[reg];
 180
 181    case PROGRAM_OUTPUT:
 182       if (reg >= MAX_PROGRAM_OUTPUTS)
 183          return dummyReg;
 184       return machine->Outputs[reg];
 185
 186    case PROGRAM_WRITE_ONLY:
 187       return dummyReg;
 188
 189    default:
 190       _mesa_problem(NULL,
 191          "Invalid dest register file %d in get_dst_register_pointer()",
 192          dest->File);
 193       return NULL;
 194    }
 195 }
 196
 197
 198
 199 /**
 200  * Fetch a 4-element float vector from the given source register.
 201  * Apply swizzling and negating as needed.
 202  */
 203 static void
 204 fetch_vector4(const struct prog_src_register *source,
 205               const struct gl_program_machine *machine, GLfloat result[4])
 206 {
 207    const GLfloat *src = get_src_register_pointer(source, machine);
 208    ASSERT(src);
 209
 210    if (source->Swizzle == SWIZZLE_NOOP) {
 211       /* no swizzling */
 212       COPY_4V(result, src);
 213    }
 214    else {
 215       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 216       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 217       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 218       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 219       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 220       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 221       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 222       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 223    }
 224
 225    if (source->Abs) {
 226       result[0] = FABSF(result[0]);
 227       result[1] = FABSF(result[1]);
 228       result[2] = FABSF(result[2]);
 229       result[3] = FABSF(result[3]);
 230    }
 231    if (source->Negate) {
 232       ASSERT(source->Negate == NEGATE_XYZW);
 233       result[0] = -result[0];
 234       result[1] = -result[1];
 235       result[2] = -result[2];
 236       result[3] = -result[3];
 237    }
 238
 239 #ifdef NAN_CHECK
 240    assert(!IS_INF_OR_NAN(result[0]));
 241    assert(!IS_INF_OR_NAN(result[0]));
 242    assert(!IS_INF_OR_NAN(result[0]));
 243    assert(!IS_INF_OR_NAN(result[0]));
 244 #endif
 245 }
 246
 247
 248 /**
 249  * Fetch a 4-element uint vector from the given source register.
 250  * Apply swizzling but not negation/abs.
 251  */
 252 static void
 253 fetch_vector4ui(const struct prog_src_register *source,
 254                 const struct gl_program_machine *machine, GLuint result[4])
 255 {
 256    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 257    ASSERT(src);
 258
 259    if (source->Swizzle == SWIZZLE_NOOP) {
 260       /* no swizzling */
 261       COPY_4V(result, src);
 262    }
 263    else {
 264       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 265       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 266       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 267       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 268       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 269       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 270       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 271       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 272    }
 273
 274    /* Note: no Negate or Abs here */
 275 }
 276
 277
 278
 279 /**
 280  * Fetch the derivative with respect to X or Y for the given register.
 281  * XXX this currently only works for fragment program input attribs.
 282  */
 283 static void
 284 fetch_vector4_deriv(GLcontext * ctx,
 285                     const struct prog_src_register *source,
 286                     const struct gl_program_machine *machine,
 287                     char xOrY, GLfloat result[4])
 288 {
 289    if (source->File == PROGRAM_INPUT &&
 290        source->Index < (GLint) machine->NumDeriv) {
 291       const GLint col = machine->CurElement;
 292       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 293       const GLfloat invQ = 1.0f / w;
 294       GLfloat deriv[4];
 295
 296       if (xOrY == 'X') {
 297          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 298          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 299          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 300          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 301       }
 302       else {
 303          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 304          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 305          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 306          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 307       }
 308
 309       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 310       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 311       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 312       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 313
 314       if (source->Abs) {
 315          result[0] = FABSF(result[0]);
 316          result[1] = FABSF(result[1]);
 317          result[2] = FABSF(result[2]);
 318          result[3] = FABSF(result[3]);
 319       }
 320       if (source->Negate) {
 321          ASSERT(source->Negate == NEGATE_XYZW);
 322          result[0] = -result[0];
 323          result[1] = -result[1];
 324          result[2] = -result[2];
 325          result[3] = -result[3];
 326       }
 327    }
 328    else {
 329       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 330    }
 331 }
 332
 333
 334 /**
 335  * As above, but only return result[0] element.
 336  */
 337 static void
 338 fetch_vector1(const struct prog_src_register *source,
 339               const struct gl_program_machine *machine, GLfloat result[4])
 340 {
 341    const GLfloat *src = get_src_register_pointer(source, machine);
 342    ASSERT(src);
 343
 344    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 345
 346    if (source->Abs) {
 347       result[0] = FABSF(result[0]);
 348    }
 349    if (source->Negate) {
 350       result[0] = -result[0];
 351    }
 352 }
 353
 354
 355 /**
 356  * Fetch texel from texture.  Use partial derivatives when possible.
 357  */
 358 static INLINE void
 359 fetch_texel(GLcontext *ctx,
 360             const struct gl_program_machine *machine,
 361             const struct prog_instruction *inst,
 362             const GLfloat texcoord[4], GLfloat lodBias,
 363             GLfloat color[4])
 364 {
 365    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 366
 367    /* Note: we only have the right derivatives for fragment input attribs.
 368     */
 369    if (machine->NumDeriv > 0 &&
 370        inst->SrcReg[0].File == PROGRAM_INPUT &&
 371        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 372       /* simple texture fetch for which we should have derivatives */
 373       GLuint attr = inst->SrcReg[0].Index;
 374       machine->FetchTexelDeriv(ctx, texcoord,
 375                                machine->DerivX[attr],
 376                                machine->DerivY[attr],
 377                                lodBias, unit, color);
 378    }
 379    else {
 380       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 381    }
 382 }
 383
 384
 385 /**
 386  * Test value against zero and return GT, LT, EQ or UN if NaN.
 387  */
 388 static INLINE GLuint
 389 generate_cc(float value)
 390 {
 391    if (value != value)
 392       return COND_UN;           /* NaN */
 393    if (value > 0.0F)
 394       return COND_GT;
 395    if (value < 0.0F)
 396       return COND_LT;
 397    return COND_EQ;
 398 }
 399
 400
 401 /**
 402  * Test if the ccMaskRule is satisfied by the given condition code.
 403  * Used to mask destination writes according to the current condition code.
 404  */
 405 static INLINE GLboolean
 406 test_cc(GLuint condCode, GLuint ccMaskRule)
 407 {
 408    switch (ccMaskRule) {
 409    case COND_EQ: return (condCode == COND_EQ);
 410    case COND_NE: return (condCode != COND_EQ);
 411    case COND_LT: return (condCode == COND_LT);
 412    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 413    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 414    case COND_GT: return (condCode == COND_GT);
 415    case COND_TR: return GL_TRUE;
 416    case COND_FL: return GL_FALSE;
 417    default:      return GL_TRUE;
 418    }
 419 }
 420
 421
 422 /**
 423  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 424  * or GL_FALSE to indicate result.
 425  */
 426 static INLINE GLboolean
 427 eval_condition(const struct gl_program_machine *machine,
 428                const struct prog_instruction *inst)
 429 {
 430    const GLuint swizzle = inst->DstReg.CondSwizzle;
 431    const GLuint condMask = inst->DstReg.CondMask;
 432    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 433        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 434        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 435        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 436       return GL_TRUE;
 437    }
 438    else {
 439       return GL_FALSE;
 440    }
 441 }
 442
 443
 444
 445 /**
 446  * Store 4 floats into a register.  Observe the instructions saturate and
 447  * set-condition-code flags.
 448  */
 449 static void
 450 store_vector4(const struct prog_instruction *inst,
 451               struct gl_program_machine *machine, const GLfloat value[4])
 452 {
 453    const struct prog_dst_register *dstReg = &(inst->DstReg);
 454    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 455    GLuint writeMask = dstReg->WriteMask;
 456    GLfloat clampedValue[4];
 457    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 458
 459 #if 0
 460    if (value[0] > 1.0e10 ||
 461        IS_INF_OR_NAN(value[0]) ||
 462        IS_INF_OR_NAN(value[1]) ||
 463        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 464       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 465 #endif
 466
 467    if (clamp) {
 468       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 469       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 470       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 471       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 472       value = clampedValue;
 473    }
 474
 475    if (dstReg->CondMask != COND_TR) {
 476       /* condition codes may turn off some writes */
 477       if (writeMask & WRITEMASK_X) {
 478          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 479                       dstReg->CondMask))
 480             writeMask &= ~WRITEMASK_X;
 481       }
 482       if (writeMask & WRITEMASK_Y) {
 483          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 484                       dstReg->CondMask))
 485             writeMask &= ~WRITEMASK_Y;
 486       }
 487       if (writeMask & WRITEMASK_Z) {
 488          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 489                       dstReg->CondMask))
 490             writeMask &= ~WRITEMASK_Z;
 491       }
 492       if (writeMask & WRITEMASK_W) {
 493          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 494                       dstReg->CondMask))
 495             writeMask &= ~WRITEMASK_W;
 496       }
 497    }
 498
 499 #ifdef NAN_CHECK
 500    assert(!IS_INF_OR_NAN(value[0]));
 501    assert(!IS_INF_OR_NAN(value[0]));
 502    assert(!IS_INF_OR_NAN(value[0]));
 503    assert(!IS_INF_OR_NAN(value[0]));
 504 #endif
 505
 506    if (writeMask & WRITEMASK_X)
 507       dst[0] = value[0];
 508    if (writeMask & WRITEMASK_Y)
 509       dst[1] = value[1];
 510    if (writeMask & WRITEMASK_Z)
 511       dst[2] = value[2];
 512    if (writeMask & WRITEMASK_W)
 513       dst[3] = value[3];
 514
 515    if (inst->CondUpdate) {
 516       if (writeMask & WRITEMASK_X)
 517          machine->CondCodes[0] = generate_cc(value[0]);
 518       if (writeMask & WRITEMASK_Y)
 519          machine->CondCodes[1] = generate_cc(value[1]);
 520       if (writeMask & WRITEMASK_Z)
 521          machine->CondCodes[2] = generate_cc(value[2]);
 522       if (writeMask & WRITEMASK_W)
 523          machine->CondCodes[3] = generate_cc(value[3]);
 524 #if DEBUG_PROG
 525       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 526              _mesa_condcode_string(machine->CondCodes[0]),
 527              _mesa_condcode_string(machine->CondCodes[1]),
 528              _mesa_condcode_string(machine->CondCodes[2]),
 529              _mesa_condcode_string(machine->CondCodes[3]));
 530 #endif
 531    }
 532 }
 533
 534
 535 /**
 536  * Store 4 uints into a register.  Observe the set-condition-code flags.
 537  */
 538 static void
 539 store_vector4ui(const struct prog_instruction *inst,
 540                 struct gl_program_machine *machine, const GLuint value[4])
 541 {
 542    const struct prog_dst_register *dstReg = &(inst->DstReg);
 543    GLuint writeMask = dstReg->WriteMask;
 544    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 545
 546    if (dstReg->CondMask != COND_TR) {
 547       /* condition codes may turn off some writes */
 548       if (writeMask & WRITEMASK_X) {
 549          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 550                       dstReg->CondMask))
 551             writeMask &= ~WRITEMASK_X;
 552       }
 553       if (writeMask & WRITEMASK_Y) {
 554          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 555                       dstReg->CondMask))
 556             writeMask &= ~WRITEMASK_Y;
 557       }
 558       if (writeMask & WRITEMASK_Z) {
 559          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 560                       dstReg->CondMask))
 561             writeMask &= ~WRITEMASK_Z;
 562       }
 563       if (writeMask & WRITEMASK_W) {
 564          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 565                       dstReg->CondMask))
 566             writeMask &= ~WRITEMASK_W;
 567       }
 568    }
 569
 570    if (writeMask & WRITEMASK_X)
 571       dst[0] = value[0];
 572    if (writeMask & WRITEMASK_Y)
 573       dst[1] = value[1];
 574    if (writeMask & WRITEMASK_Z)
 575       dst[2] = value[2];
 576    if (writeMask & WRITEMASK_W)
 577       dst[3] = value[3];
 578
 579    if (inst->CondUpdate) {
 580       if (writeMask & WRITEMASK_X)
 581          machine->CondCodes[0] = generate_cc(value[0]);
 582       if (writeMask & WRITEMASK_Y)
 583          machine->CondCodes[1] = generate_cc(value[1]);
 584       if (writeMask & WRITEMASK_Z)
 585          machine->CondCodes[2] = generate_cc(value[2]);
 586       if (writeMask & WRITEMASK_W)
 587          machine->CondCodes[3] = generate_cc(value[3]);
 588 #if DEBUG_PROG
 589       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 590              _mesa_condcode_string(machine->CondCodes[0]),
 591              _mesa_condcode_string(machine->CondCodes[1]),
 592              _mesa_condcode_string(machine->CondCodes[2]),
 593              _mesa_condcode_string(machine->CondCodes[3]));
 594 #endif
 595    }
 596 }
 597
 598
 599
 600 /**
 601  * Execute the given vertex/fragment program.
 602  *
 603  * \param ctx  rendering context
 604  * \param program  the program to execute
 605  * \param machine  machine state (must be initialized)
 606  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 607  */
 608 GLboolean
 609 _mesa_execute_program(GLcontext * ctx,
 610                       const struct gl_program *program,
 611                       struct gl_program_machine *machine)
 612 {
 613    const GLuint numInst = program->NumInstructions;
 614    const GLuint maxExec = 10000;
 615    GLuint pc, numExec = 0;
 616
 617    machine->CurProgram = program;
 618
 619    if (DEBUG_PROG) {
 620       printf("execute program %u --------------------\n", program->Id);
 621    }
 622
 623    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 624       machine->EnvParams = ctx->VertexProgram.Parameters;
 625    }
 626    else {
 627       machine->EnvParams = ctx->FragmentProgram.Parameters;
 628    }
 629
 630    for (pc = 0; pc < numInst; pc++) {
 631       const struct prog_instruction *inst = program->Instructions + pc;
 632
 633       if (DEBUG_PROG) {
 634          _mesa_print_instruction(inst);
 635       }
 636
 637       switch (inst->Opcode) {
 638       case OPCODE_ABS:
 639          {
 640             GLfloat a[4], result[4];
 641             fetch_vector4(&inst->SrcReg[0], machine, a);
 642             result[0] = FABSF(a[0]);
 643             result[1] = FABSF(a[1]);
 644             result[2] = FABSF(a[2]);
 645             result[3] = FABSF(a[3]);
 646             store_vector4(inst, machine, result);
 647          }
 648          break;
 649       case OPCODE_ADD:
 650          {
 651             GLfloat a[4], b[4], result[4];
 652             fetch_vector4(&inst->SrcReg[0], machine, a);
 653             fetch_vector4(&inst->SrcReg[1], machine, b);
 654             result[0] = a[0] + b[0];
 655             result[1] = a[1] + b[1];
 656             result[2] = a[2] + b[2];
 657             result[3] = a[3] + b[3];
 658             store_vector4(inst, machine, result);
 659             if (DEBUG_PROG) {
 660                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 661                       result[0], result[1], result[2], result[3],
 662                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 663             }
 664          }
 665          break;
 666       case OPCODE_AND:     /* bitwise AND */
 667          {
 668             GLuint a[4], b[4], result[4];
 669             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 670             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 671             result[0] = a[0] & b[0];
 672             result[1] = a[1] & b[1];
 673             result[2] = a[2] & b[2];
 674             result[3] = a[3] & b[3];
 675             store_vector4ui(inst, machine, result);
 676          }
 677          break;
 678       case OPCODE_ARL:
 679          {
 680             GLfloat t[4];
 681             fetch_vector4(&inst->SrcReg[0], machine, t);
 682             machine->AddressReg[0][0] = IFLOOR(t[0]);
 683          }
 684          break;
 685       case OPCODE_BGNLOOP:
 686          /* no-op */
 687          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 688                 == OPCODE_ENDLOOP);
 689          break;
 690       case OPCODE_ENDLOOP:
 691          /* subtract 1 here since pc is incremented by for(pc) loop */
 692          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 693                 == OPCODE_BGNLOOP);
 694          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 695          break;
 696       case OPCODE_BGNSUB:      /* begin subroutine */
 697          break;
 698       case OPCODE_ENDSUB:      /* end subroutine */
 699          break;
 700       case OPCODE_BRA:         /* branch (conditional) */
 701          if (eval_condition(machine, inst)) {
 702             /* take branch */
 703             /* Subtract 1 here since we'll do pc++ below */
 704             pc = inst->BranchTarget - 1;
 705          }
 706          break;
 707       case OPCODE_BRK:         /* break out of loop (conditional) */
 708          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 709                 == OPCODE_ENDLOOP);
 710          if (eval_condition(machine, inst)) {
 711             /* break out of loop */
 712             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 713             pc = inst->BranchTarget;
 714          }
 715          break;
 716       case OPCODE_CONT:        /* continue loop (conditional) */
 717          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 718                 == OPCODE_ENDLOOP);
 719          if (eval_condition(machine, inst)) {
 720             /* continue at ENDLOOP */
 721             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 722             pc = inst->BranchTarget - 1;
 723          }
 724          break;
 725       case OPCODE_CAL:         /* Call subroutine (conditional) */
 726          if (eval_condition(machine, inst)) {
 727             /* call the subroutine */
 728             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 729                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 730             }
 731             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 732             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 733             pc = inst->BranchTarget - 1;
 734          }
 735          break;
 736       case OPCODE_CMP:
 737          {
 738             GLfloat a[4], b[4], c[4], result[4];
 739             fetch_vector4(&inst->SrcReg[0], machine, a);
 740             fetch_vector4(&inst->SrcReg[1], machine, b);
 741             fetch_vector4(&inst->SrcReg[2], machine, c);
 742             result[0] = a[0] < 0.0F ? b[0] : c[0];
 743             result[1] = a[1] < 0.0F ? b[1] : c[1];
 744             result[2] = a[2] < 0.0F ? b[2] : c[2];
 745             result[3] = a[3] < 0.0F ? b[3] : c[3];
 746             store_vector4(inst, machine, result);
 747          }
 748          break;
 749       case OPCODE_COS:
 750          {
 751             GLfloat a[4], result[4];
 752             fetch_vector1(&inst->SrcReg[0], machine, a);
 753             result[0] = result[1] = result[2] = result[3]
 754                = (GLfloat) _mesa_cos(a[0]);
 755             store_vector4(inst, machine, result);
 756          }
 757          break;
 758       case OPCODE_DDX:         /* Partial derivative with respect to X */
 759          {
 760             GLfloat result[4];
 761             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 762                                 'X', result);
 763             store_vector4(inst, machine, result);
 764          }
 765          break;
 766       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 767          {
 768             GLfloat result[4];
 769             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 770                                 'Y', result);
 771             store_vector4(inst, machine, result);
 772          }
 773          break;
 774       case OPCODE_DP2:
 775          {
 776             GLfloat a[4], b[4], result[4];
 777             fetch_vector4(&inst->SrcReg[0], machine, a);
 778             fetch_vector4(&inst->SrcReg[1], machine, b);
 779             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 780             store_vector4(inst, machine, result);
 781             if (DEBUG_PROG) {
 782                printf("DP2 %g = (%g %g) . (%g %g)\n",
 783                       result[0], a[0], a[1], b[0], b[1]);
 784             }
 785          }
 786          break;
 787       case OPCODE_DP2A:
 788          {
 789             GLfloat a[4], b[4], c, result[4];
 790             fetch_vector4(&inst->SrcReg[0], machine, a);
 791             fetch_vector4(&inst->SrcReg[1], machine, b);
 792             fetch_vector1(&inst->SrcReg[1], machine, &c);
 793             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 794             store_vector4(inst, machine, result);
 795             if (DEBUG_PROG) {
 796                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 797                       result[0], a[0], a[1], b[0], b[1], c);
 798             }
 799          }
 800          break;
 801       case OPCODE_DP3:
 802          {
 803             GLfloat a[4], b[4], result[4];
 804             fetch_vector4(&inst->SrcReg[0], machine, a);
 805             fetch_vector4(&inst->SrcReg[1], machine, b);
 806             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 807             store_vector4(inst, machine, result);
 808             if (DEBUG_PROG) {
 809                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 810                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 811             }
 812          }
 813          break;
 814       case OPCODE_DP4:
 815          {
 816             GLfloat a[4], b[4], result[4];
 817             fetch_vector4(&inst->SrcReg[0], machine, a);
 818             fetch_vector4(&inst->SrcReg[1], machine, b);
 819             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 820             store_vector4(inst, machine, result);
 821             if (DEBUG_PROG) {
 822                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 823                       result[0], a[0], a[1], a[2], a[3],
 824                       b[0], b[1], b[2], b[3]);
 825             }
 826          }
 827          break;
 828       case OPCODE_DPH:
 829          {
 830             GLfloat a[4], b[4], result[4];
 831             fetch_vector4(&inst->SrcReg[0], machine, a);
 832             fetch_vector4(&inst->SrcReg[1], machine, b);
 833             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 834             store_vector4(inst, machine, result);
 835          }
 836          break;
 837       case OPCODE_DST:         /* Distance vector */
 838          {
 839             GLfloat a[4], b[4], result[4];
 840             fetch_vector4(&inst->SrcReg[0], machine, a);
 841             fetch_vector4(&inst->SrcReg[1], machine, b);
 842             result[0] = 1.0F;
 843             result[1] = a[1] * b[1];
 844             result[2] = a[2];
 845             result[3] = b[3];
 846             store_vector4(inst, machine, result);
 847          }
 848          break;
 849       case OPCODE_EXP:
 850          {
 851             GLfloat t[4], q[4], floor_t0;
 852             fetch_vector1(&inst->SrcReg[0], machine, t);
 853             floor_t0 = FLOORF(t[0]);
 854             if (floor_t0 > FLT_MAX_EXP) {
 855                SET_POS_INFINITY(q[0]);
 856                SET_POS_INFINITY(q[2]);
 857             }
 858             else if (floor_t0 < FLT_MIN_EXP) {
 859                q[0] = 0.0F;
 860                q[2] = 0.0F;
 861             }
 862             else {
 863                q[0] = LDEXPF(1.0, (int) floor_t0);
 864                /* Note: GL_NV_vertex_program expects
 865                 * result.z = result.x * APPX(result.y)
 866                 * We do what the ARB extension says.
 867                 */
 868                q[2] = (GLfloat) _mesa_pow(2.0, t[0]);
 869             }
 870             q[1] = t[0] - floor_t0;
 871             q[3] = 1.0F;
 872             store_vector4( inst, machine, q );
 873          }
 874          break;
 875       case OPCODE_EX2:         /* Exponential base 2 */
 876          {
 877             GLfloat a[4], result[4], val;
 878             fetch_vector1(&inst->SrcReg[0], machine, a);
 879             val = (GLfloat) _mesa_pow(2.0, a[0]);
 880             /*
 881             if (IS_INF_OR_NAN(val))
 882                val = 1.0e10;
 883             */
 884             result[0] = result[1] = result[2] = result[3] = val;
 885             store_vector4(inst, machine, result);
 886          }
 887          break;
 888       case OPCODE_FLR:
 889          {
 890             GLfloat a[4], result[4];
 891             fetch_vector4(&inst->SrcReg[0], machine, a);
 892             result[0] = FLOORF(a[0]);
 893             result[1] = FLOORF(a[1]);
 894             result[2] = FLOORF(a[2]);
 895             result[3] = FLOORF(a[3]);
 896             store_vector4(inst, machine, result);
 897          }
 898          break;
 899       case OPCODE_FRC:
 900          {
 901             GLfloat a[4], result[4];
 902             fetch_vector4(&inst->SrcReg[0], machine, a);
 903             result[0] = a[0] - FLOORF(a[0]);
 904             result[1] = a[1] - FLOORF(a[1]);
 905             result[2] = a[2] - FLOORF(a[2]);
 906             result[3] = a[3] - FLOORF(a[3]);
 907             store_vector4(inst, machine, result);
 908          }
 909          break;
 910       case OPCODE_IF:
 911          {
 912             GLboolean cond;
 913             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 914                    == OPCODE_ELSE ||
 915                    program->Instructions[inst->BranchTarget].Opcode
 916                    == OPCODE_ENDIF);
 917             /* eval condition */
 918             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 919                GLfloat a[4];
 920                fetch_vector1(&inst->SrcReg[0], machine, a);
 921                cond = (a[0] != 0.0);
 922             }
 923             else {
 924                cond = eval_condition(machine, inst);
 925             }
 926             if (DEBUG_PROG) {
 927                printf("IF: %d\n", cond);
 928             }
 929             /* do if/else */
 930             if (cond) {
 931                /* do if-clause (just continue execution) */
 932             }
 933             else {
 934                /* go to the instruction after ELSE or ENDIF */
 935                assert(inst->BranchTarget >= 0);
 936                pc = inst->BranchTarget;
 937             }
 938          }
 939          break;
 940       case OPCODE_ELSE:
 941          /* goto ENDIF */
 942          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 943                 == OPCODE_ENDIF);
 944          assert(inst->BranchTarget >= 0);
 945          pc = inst->BranchTarget;
 946          break;
 947       case OPCODE_ENDIF:
 948          /* nothing */
 949          break;
 950       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 951          if (eval_condition(machine, inst)) {
 952             return GL_FALSE;
 953          }
 954          break;
 955       case OPCODE_KIL:         /* ARB_f_p only */
 956          {
 957             GLfloat a[4];
 958             fetch_vector4(&inst->SrcReg[0], machine, a);
 959             if (DEBUG_PROG) {
 960                printf("KIL if (%g %g %g %g) <= 0.0\n",
 961                       a[0], a[1], a[2], a[3]);
 962             }
 963
 964             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 965                return GL_FALSE;
 966             }
 967          }
 968          break;
 969       case OPCODE_LG2:         /* log base 2 */
 970          {
 971             GLfloat a[4], result[4], val;
 972             fetch_vector1(&inst->SrcReg[0], machine, a);
 973             /* The fast LOG2 macro doesn't meet the precision requirements.
 974              */
 975             if (a[0] == 0.0F) {
 976                val = -FLT_MAX;
 977             }
 978             else {
 979                val = log(a[0]) * 1.442695F;
 980             }
 981             result[0] = result[1] = result[2] = result[3] = val;
 982             store_vector4(inst, machine, result);
 983          }
 984          break;
 985       case OPCODE_LIT:
 986          {
 987             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 988             GLfloat a[4], result[4];
 989             fetch_vector4(&inst->SrcReg[0], machine, a);
 990             a[0] = MAX2(a[0], 0.0F);
 991             a[1] = MAX2(a[1], 0.0F);
 992             /* XXX ARB version clamps a[3], NV version doesn't */
 993             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 994             result[0] = 1.0F;
 995             result[1] = a[0];
 996             /* XXX we could probably just use pow() here */
 997             if (a[0] > 0.0F) {
 998                if (a[1] == 0.0 && a[3] == 0.0)
 999                   result[2] = 1.0;
1000                else
1001                   result[2] = (GLfloat) _mesa_pow(a[1], a[3]);
1002             }
1003             else {
1004                result[2] = 0.0;
1005             }
1006             result[3] = 1.0F;
1007             store_vector4(inst, machine, result);
1008             if (DEBUG_PROG) {
1009                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1010                       result[0], result[1], result[2], result[3],
1011                       a[0], a[1], a[2], a[3]);
1012             }
1013          }
1014          break;
1015       case OPCODE_LOG:
1016          {
1017             GLfloat t[4], q[4], abs_t0;
1018             fetch_vector1(&inst->SrcReg[0], machine, t);
1019             abs_t0 = FABSF(t[0]);
1020             if (abs_t0 != 0.0F) {
1021                /* Since we really can't handle infinite values on VMS
1022                 * like other OSes we'll use __MAXFLOAT to represent
1023                 * infinity.  This may need some tweaking.
1024                 */
1025 #ifdef VMS
1026                if (abs_t0 == __MAXFLOAT)
1027 #else
1028                if (IS_INF_OR_NAN(abs_t0))
1029 #endif
1030                {
1031                   SET_POS_INFINITY(q[0]);
1032                   q[1] = 1.0F;
1033                   SET_POS_INFINITY(q[2]);
1034                }
1035                else {
1036                   int exponent;
1037                   GLfloat mantissa = FREXPF(t[0], &exponent);
1038                   q[0] = (GLfloat) (exponent - 1);
1039                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1040
1041                   /* The fast LOG2 macro doesn't meet the precision
1042                    * requirements.
1043                    */
1044                   q[2] = (log(t[0]) * 1.442695F);
1045                }
1046             }
1047             else {
1048                SET_NEG_INFINITY(q[0]);
1049                q[1] = 1.0F;
1050                SET_NEG_INFINITY(q[2]);
1051             }
1052             q[3] = 1.0;
1053             store_vector4(inst, machine, q);
1054          }
1055          break;
1056       case OPCODE_LRP:
1057          {
1058             GLfloat a[4], b[4], c[4], result[4];
1059             fetch_vector4(&inst->SrcReg[0], machine, a);
1060             fetch_vector4(&inst->SrcReg[1], machine, b);
1061             fetch_vector4(&inst->SrcReg[2], machine, c);
1062             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1063             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1064             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1065             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1066             store_vector4(inst, machine, result);
1067             if (DEBUG_PROG) {
1068                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1069                       "(%g %g %g %g), (%g %g %g %g)\n",
1070                       result[0], result[1], result[2], result[3],
1071                       a[0], a[1], a[2], a[3],
1072                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1073             }
1074          }
1075          break;
1076       case OPCODE_MAD:
1077          {
1078             GLfloat a[4], b[4], c[4], result[4];
1079             fetch_vector4(&inst->SrcReg[0], machine, a);
1080             fetch_vector4(&inst->SrcReg[1], machine, b);
1081             fetch_vector4(&inst->SrcReg[2], machine, c);
1082             result[0] = a[0] * b[0] + c[0];
1083             result[1] = a[1] * b[1] + c[1];
1084             result[2] = a[2] * b[2] + c[2];
1085             result[3] = a[3] * b[3] + c[3];
1086             store_vector4(inst, machine, result);
1087             if (DEBUG_PROG) {
1088                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1089                       "(%g %g %g %g) + (%g %g %g %g)\n",
1090                       result[0], result[1], result[2], result[3],
1091                       a[0], a[1], a[2], a[3],
1092                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1093             }
1094          }
1095          break;
1096       case OPCODE_MAX:
1097          {
1098             GLfloat a[4], b[4], result[4];
1099             fetch_vector4(&inst->SrcReg[0], machine, a);
1100             fetch_vector4(&inst->SrcReg[1], machine, b);
1101             result[0] = MAX2(a[0], b[0]);
1102             result[1] = MAX2(a[1], b[1]);
1103             result[2] = MAX2(a[2], b[2]);
1104             result[3] = MAX2(a[3], b[3]);
1105             store_vector4(inst, machine, result);
1106             if (DEBUG_PROG) {
1107                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1108                       result[0], result[1], result[2], result[3],
1109                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1110             }
1111          }
1112          break;
1113       case OPCODE_MIN:
1114          {
1115             GLfloat a[4], b[4], result[4];
1116             fetch_vector4(&inst->SrcReg[0], machine, a);
1117             fetch_vector4(&inst->SrcReg[1], machine, b);
1118             result[0] = MIN2(a[0], b[0]);
1119             result[1] = MIN2(a[1], b[1]);
1120             result[2] = MIN2(a[2], b[2]);
1121             result[3] = MIN2(a[3], b[3]);
1122             store_vector4(inst, machine, result);
1123          }
1124          break;
1125       case OPCODE_MOV:
1126          {
1127             GLfloat result[4];
1128             fetch_vector4(&inst->SrcReg[0], machine, result);
1129             store_vector4(inst, machine, result);
1130             if (DEBUG_PROG) {
1131                printf("MOV (%g %g %g %g)\n",
1132                       result[0], result[1], result[2], result[3]);
1133             }
1134          }
1135          break;
1136       case OPCODE_MUL:
1137          {
1138             GLfloat a[4], b[4], result[4];
1139             fetch_vector4(&inst->SrcReg[0], machine, a);
1140             fetch_vector4(&inst->SrcReg[1], machine, b);
1141             result[0] = a[0] * b[0];
1142             result[1] = a[1] * b[1];
1143             result[2] = a[2] * b[2];
1144             result[3] = a[3] * b[3];
1145             store_vector4(inst, machine, result);
1146             if (DEBUG_PROG) {
1147                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1148                       result[0], result[1], result[2], result[3],
1149                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1150             }
1151          }
1152          break;
1153       case OPCODE_NOISE1:
1154          {
1155             GLfloat a[4], result[4];
1156             fetch_vector1(&inst->SrcReg[0], machine, a);
1157             result[0] =
1158                result[1] =
1159                result[2] =
1160                result[3] = _mesa_noise1(a[0]);
1161             store_vector4(inst, machine, result);
1162          }
1163          break;
1164       case OPCODE_NOISE2:
1165          {
1166             GLfloat a[4], result[4];
1167             fetch_vector4(&inst->SrcReg[0], machine, a);
1168             result[0] =
1169                result[1] =
1170                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1171             store_vector4(inst, machine, result);
1172          }
1173          break;
1174       case OPCODE_NOISE3:
1175          {
1176             GLfloat a[4], result[4];
1177             fetch_vector4(&inst->SrcReg[0], machine, a);
1178             result[0] =
1179                result[1] =
1180                result[2] =
1181                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1182             store_vector4(inst, machine, result);
1183          }
1184          break;
1185       case OPCODE_NOISE4:
1186          {
1187             GLfloat a[4], result[4];
1188             fetch_vector4(&inst->SrcReg[0], machine, a);
1189             result[0] =
1190                result[1] =
1191                result[2] =
1192                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1193             store_vector4(inst, machine, result);
1194          }
1195          break;
1196       case OPCODE_NOP:
1197          break;
1198       case OPCODE_NOT:         /* bitwise NOT */
1199          {
1200             GLuint a[4], result[4];
1201             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1202             result[0] = ~a[0];
1203             result[1] = ~a[1];
1204             result[2] = ~a[2];
1205             result[3] = ~a[3];
1206             store_vector4ui(inst, machine, result);
1207          }
1208          break;
1209       case OPCODE_NRM3:        /* 3-component normalization */
1210          {
1211             GLfloat a[4], result[4];
1212             GLfloat tmp;
1213             fetch_vector4(&inst->SrcReg[0], machine, a);
1214             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1215             if (tmp != 0.0F)
1216                tmp = INV_SQRTF(tmp);
1217             result[0] = tmp * a[0];
1218             result[1] = tmp * a[1];
1219             result[2] = tmp * a[2];
1220             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1221             store_vector4(inst, machine, result);
1222          }
1223          break;
1224       case OPCODE_NRM4:        /* 4-component normalization */
1225          {
1226             GLfloat a[4], result[4];
1227             GLfloat tmp;
1228             fetch_vector4(&inst->SrcReg[0], machine, a);
1229             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1230             if (tmp != 0.0F)
1231                tmp = INV_SQRTF(tmp);
1232             result[0] = tmp * a[0];
1233             result[1] = tmp * a[1];
1234             result[2] = tmp * a[2];
1235             result[3] = tmp * a[3];
1236             store_vector4(inst, machine, result);
1237          }
1238          break;
1239       case OPCODE_OR:          /* bitwise OR */
1240          {
1241             GLuint a[4], b[4], result[4];
1242             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1243             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1244             result[0] = a[0] | b[0];
1245             result[1] = a[1] | b[1];
1246             result[2] = a[2] | b[2];
1247             result[3] = a[3] | b[3];
1248             store_vector4ui(inst, machine, result);
1249          }
1250          break;
1251       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1252          {
1253             GLfloat a[4];
1254             GLuint result[4];
1255             GLhalfNV hx, hy;
1256             fetch_vector4(&inst->SrcReg[0], machine, a);
1257             hx = _mesa_float_to_half(a[0]);
1258             hy = _mesa_float_to_half(a[1]);
1259             result[0] =
1260             result[1] =
1261             result[2] =
1262             result[3] = hx | (hy << 16);
1263             store_vector4ui(inst, machine, result);
1264          }
1265          break;
1266       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1267          {
1268             GLfloat a[4];
1269             GLuint result[4], usx, usy;
1270             fetch_vector4(&inst->SrcReg[0], machine, a);
1271             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1272             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1273             usx = IROUND(a[0] * 65535.0F);
1274             usy = IROUND(a[1] * 65535.0F);
1275             result[0] =
1276             result[1] =
1277             result[2] =
1278             result[3] = usx | (usy << 16);
1279             store_vector4ui(inst, machine, result);
1280          }
1281          break;
1282       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1283          {
1284             GLfloat a[4];
1285             GLuint result[4], ubx, uby, ubz, ubw;
1286             fetch_vector4(&inst->SrcReg[0], machine, a);
1287             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1288             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1289             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1290             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1291             ubx = IROUND(127.0F * a[0] + 128.0F);
1292             uby = IROUND(127.0F * a[1] + 128.0F);
1293             ubz = IROUND(127.0F * a[2] + 128.0F);
1294             ubw = IROUND(127.0F * a[3] + 128.0F);
1295             result[0] =
1296             result[1] =
1297             result[2] =
1298             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1299             store_vector4ui(inst, machine, result);
1300          }
1301          break;
1302       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1303          {
1304             GLfloat a[4];
1305             GLuint result[4], ubx, uby, ubz, ubw;
1306             fetch_vector4(&inst->SrcReg[0], machine, a);
1307             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1308             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1309             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1310             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1311             ubx = IROUND(255.0F * a[0]);
1312             uby = IROUND(255.0F * a[1]);
1313             ubz = IROUND(255.0F * a[2]);
1314             ubw = IROUND(255.0F * a[3]);
1315             result[0] =
1316             result[1] =
1317             result[2] =
1318             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1319             store_vector4ui(inst, machine, result);
1320          }
1321          break;
1322       case OPCODE_POW:
1323          {
1324             GLfloat a[4], b[4], result[4];
1325             fetch_vector1(&inst->SrcReg[0], machine, a);
1326             fetch_vector1(&inst->SrcReg[1], machine, b);
1327             result[0] = result[1] = result[2] = result[3]
1328                = (GLfloat) _mesa_pow(a[0], b[0]);
1329             store_vector4(inst, machine, result);
1330          }
1331          break;
1332       case OPCODE_RCP:
1333          {
1334             GLfloat a[4], result[4];
1335             fetch_vector1(&inst->SrcReg[0], machine, a);
1336             if (DEBUG_PROG) {
1337                if (a[0] == 0)
1338                   printf("RCP(0)\n");
1339                else if (IS_INF_OR_NAN(a[0]))
1340                   printf("RCP(inf)\n");
1341             }
1342             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1343             store_vector4(inst, machine, result);
1344          }
1345          break;
1346       case OPCODE_RET:         /* return from subroutine (conditional) */
1347          if (eval_condition(machine, inst)) {
1348             if (machine->StackDepth == 0) {
1349                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1350             }
1351             /* subtract one because of pc++ in the for loop */
1352             pc = machine->CallStack[--machine->StackDepth] - 1;
1353          }
1354          break;
1355       case OPCODE_RFL:         /* reflection vector */
1356          {
1357             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1358             fetch_vector4(&inst->SrcReg[0], machine, axis);
1359             fetch_vector4(&inst->SrcReg[1], machine, dir);
1360             tmpW = DOT3(axis, axis);
1361             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1362             result[0] = tmpX * axis[0] - dir[0];
1363             result[1] = tmpX * axis[1] - dir[1];
1364             result[2] = tmpX * axis[2] - dir[2];
1365             /* result[3] is never written! XXX enforce in parser! */
1366             store_vector4(inst, machine, result);
1367          }
1368          break;
1369       case OPCODE_RSQ:         /* 1 / sqrt() */
1370          {
1371             GLfloat a[4], result[4];
1372             fetch_vector1(&inst->SrcReg[0], machine, a);
1373             a[0] = FABSF(a[0]);
1374             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1375             store_vector4(inst, machine, result);
1376             if (DEBUG_PROG) {
1377                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1378             }
1379          }
1380          break;
1381       case OPCODE_SCS:         /* sine and cos */
1382          {
1383             GLfloat a[4], result[4];
1384             fetch_vector1(&inst->SrcReg[0], machine, a);
1385             result[0] = (GLfloat) _mesa_cos(a[0]);
1386             result[1] = (GLfloat) _mesa_sin(a[0]);
1387             result[2] = 0.0;    /* undefined! */
1388             result[3] = 0.0;    /* undefined! */
1389             store_vector4(inst, machine, result);
1390          }
1391          break;
1392       case OPCODE_SEQ:         /* set on equal */
1393          {
1394             GLfloat a[4], b[4], result[4];
1395             fetch_vector4(&inst->SrcReg[0], machine, a);
1396             fetch_vector4(&inst->SrcReg[1], machine, b);
1397             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1398             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1399             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1400             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1401             store_vector4(inst, machine, result);
1402             if (DEBUG_PROG) {
1403                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1404                       result[0], result[1], result[2], result[3],
1405                       a[0], a[1], a[2], a[3],
1406                       b[0], b[1], b[2], b[3]);
1407             }
1408          }
1409          break;
1410       case OPCODE_SFL:         /* set false, operands ignored */
1411          {
1412             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1413             store_vector4(inst, machine, result);
1414          }
1415          break;
1416       case OPCODE_SGE:         /* set on greater or equal */
1417          {
1418             GLfloat a[4], b[4], result[4];
1419             fetch_vector4(&inst->SrcReg[0], machine, a);
1420             fetch_vector4(&inst->SrcReg[1], machine, b);
1421             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1422             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1423             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1424             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1425             store_vector4(inst, machine, result);
1426             if (DEBUG_PROG) {
1427                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1428                       result[0], result[1], result[2], result[3],
1429                       a[0], a[1], a[2], a[3],
1430                       b[0], b[1], b[2], b[3]);
1431             }
1432          }
1433          break;
1434       case OPCODE_SGT:         /* set on greater */
1435          {
1436             GLfloat a[4], b[4], result[4];
1437             fetch_vector4(&inst->SrcReg[0], machine, a);
1438             fetch_vector4(&inst->SrcReg[1], machine, b);
1439             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1440             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1441             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1442             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1443             store_vector4(inst, machine, result);
1444             if (DEBUG_PROG) {
1445                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1446                       result[0], result[1], result[2], result[3],
1447                       a[0], a[1], a[2], a[3],
1448                       b[0], b[1], b[2], b[3]);
1449             }
1450          }
1451          break;
1452       case OPCODE_SIN:
1453          {
1454             GLfloat a[4], result[4];
1455             fetch_vector1(&inst->SrcReg[0], machine, a);
1456             result[0] = result[1] = result[2] = result[3]
1457                = (GLfloat) _mesa_sin(a[0]);
1458             store_vector4(inst, machine, result);
1459          }
1460          break;
1461       case OPCODE_SLE:         /* set on less or equal */
1462          {
1463             GLfloat a[4], b[4], result[4];
1464             fetch_vector4(&inst->SrcReg[0], machine, a);
1465             fetch_vector4(&inst->SrcReg[1], machine, b);
1466             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1467             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1468             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1469             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1470             store_vector4(inst, machine, result);
1471             if (DEBUG_PROG) {
1472                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1473                       result[0], result[1], result[2], result[3],
1474                       a[0], a[1], a[2], a[3],
1475                       b[0], b[1], b[2], b[3]);
1476             }
1477          }
1478          break;
1479       case OPCODE_SLT:         /* set on less */
1480          {
1481             GLfloat a[4], b[4], result[4];
1482             fetch_vector4(&inst->SrcReg[0], machine, a);
1483             fetch_vector4(&inst->SrcReg[1], machine, b);
1484             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1485             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1486             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1487             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1488             store_vector4(inst, machine, result);
1489             if (DEBUG_PROG) {
1490                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1491                       result[0], result[1], result[2], result[3],
1492                       a[0], a[1], a[2], a[3],
1493                       b[0], b[1], b[2], b[3]);
1494             }
1495          }
1496          break;
1497       case OPCODE_SNE:         /* set on not equal */
1498          {
1499             GLfloat a[4], b[4], result[4];
1500             fetch_vector4(&inst->SrcReg[0], machine, a);
1501             fetch_vector4(&inst->SrcReg[1], machine, b);
1502             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1503             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1504             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1505             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1506             store_vector4(inst, machine, result);
1507             if (DEBUG_PROG) {
1508                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1509                       result[0], result[1], result[2], result[3],
1510                       a[0], a[1], a[2], a[3],
1511                       b[0], b[1], b[2], b[3]);
1512             }
1513          }
1514          break;
1515       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1516          {
1517             GLfloat a[4], result[4];
1518             fetch_vector4(&inst->SrcReg[0], machine, a);
1519             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1520             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1521             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1522             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1523             store_vector4(inst, machine, result);
1524          }
1525          break;
1526       case OPCODE_STR:         /* set true, operands ignored */
1527          {
1528             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1529             store_vector4(inst, machine, result);
1530          }
1531          break;
1532       case OPCODE_SUB:
1533          {
1534             GLfloat a[4], b[4], result[4];
1535             fetch_vector4(&inst->SrcReg[0], machine, a);
1536             fetch_vector4(&inst->SrcReg[1], machine, b);
1537             result[0] = a[0] - b[0];
1538             result[1] = a[1] - b[1];
1539             result[2] = a[2] - b[2];
1540             result[3] = a[3] - b[3];
1541             store_vector4(inst, machine, result);
1542             if (DEBUG_PROG) {
1543                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1544                       result[0], result[1], result[2], result[3],
1545                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1546             }
1547          }
1548          break;
1549       case OPCODE_SWZ:         /* extended swizzle */
1550          {
1551             const struct prog_src_register *source = &inst->SrcReg[0];
1552             const GLfloat *src = get_src_register_pointer(source, machine);
1553             GLfloat result[4];
1554             GLuint i;
1555             for (i = 0; i < 4; i++) {
1556                const GLuint swz = GET_SWZ(source->Swizzle, i);
1557                if (swz == SWIZZLE_ZERO)
1558                   result[i] = 0.0;
1559                else if (swz == SWIZZLE_ONE)
1560                   result[i] = 1.0;
1561                else {
1562                   ASSERT(swz >= 0);
1563                   ASSERT(swz <= 3);
1564                   result[i] = src[swz];
1565                }
1566                if (source->Negate & (1 << i))
1567                   result[i] = -result[i];
1568             }
1569             store_vector4(inst, machine, result);
1570          }
1571          break;
1572       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1573          /* Simple texel lookup */
1574          {
1575             GLfloat texcoord[4], color[4];
1576             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1577
1578             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1579
1580             if (DEBUG_PROG) {
1581                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1582                       color[0], color[1], color[2], color[3],
1583                       inst->TexSrcUnit,
1584                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1585             }
1586             store_vector4(inst, machine, color);
1587          }
1588          break;
1589       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1590          /* Texel lookup with LOD bias */
1591          {
1592             GLfloat texcoord[4], color[4], lodBias;
1593
1594             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1595
1596             /* texcoord[3] is the bias to add to lambda */
1597             lodBias = texcoord[3];
1598
1599             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1600
1601             store_vector4(inst, machine, color);
1602          }
1603          break;
1604       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1605          /* Texture lookup w/ partial derivatives for LOD */
1606          {
1607             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1608             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1609             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1610             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1611             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1612                                      0.0, /* lodBias */
1613                                      inst->TexSrcUnit, color);
1614             store_vector4(inst, machine, color);
1615          }
1616          break;
1617       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1618          /* Texture lookup w/ projective divide */
1619          {
1620             GLfloat texcoord[4], color[4];
1621
1622             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1623             /* Not so sure about this test - if texcoord[3] is
1624              * zero, we'd probably be fine except for an ASSERT in
1625              * IROUND_POS() which gets triggered by the inf values created.
1626              */
1627             if (texcoord[3] != 0.0) {
1628                texcoord[0] /= texcoord[3];
1629                texcoord[1] /= texcoord[3];
1630                texcoord[2] /= texcoord[3];
1631             }
1632
1633             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1634
1635             store_vector4(inst, machine, color);
1636          }
1637          break;
1638       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1639          /* Texture lookup w/ projective divide, as above, but do not
1640           * do the divide by w if sampling from a cube map.
1641           */
1642          {
1643             GLfloat texcoord[4], color[4];
1644
1645             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1646             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1647                 texcoord[3] != 0.0) {
1648                texcoord[0] /= texcoord[3];
1649                texcoord[1] /= texcoord[3];
1650                texcoord[2] /= texcoord[3];
1651             }
1652
1653             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1654
1655             store_vector4(inst, machine, color);
1656          }
1657          break;
1658       case OPCODE_TRUNC:       /* truncate toward zero */
1659          {
1660             GLfloat a[4], result[4];
1661             fetch_vector4(&inst->SrcReg[0], machine, a);
1662             result[0] = (GLfloat) (GLint) a[0];
1663             result[1] = (GLfloat) (GLint) a[1];
1664             result[2] = (GLfloat) (GLint) a[2];
1665             result[3] = (GLfloat) (GLint) a[3];
1666             store_vector4(inst, machine, result);
1667          }
1668          break;
1669       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1670          {
1671             GLfloat a[4], result[4];
1672             fi_type fi;
1673             GLhalfNV hx, hy;
1674             fetch_vector1(&inst->SrcReg[0], machine, a);
1675             fi.f = a[0];
1676             hx = fi.i & 0xffff;
1677             hy = fi.i >> 16;
1678             result[0] = result[2] = _mesa_half_to_float(hx);
1679             result[1] = result[3] = _mesa_half_to_float(hy);
1680             store_vector4(inst, machine, result);
1681          }
1682          break;
1683       case OPCODE_UP2US:       /* unpack two GLushorts */
1684          {
1685             GLfloat a[4], result[4];
1686             fi_type fi;
1687             GLushort usx, usy;
1688             fetch_vector1(&inst->SrcReg[0], machine, a);
1689             fi.f = a[0];
1690             usx = fi.i & 0xffff;
1691             usy = fi.i >> 16;
1692             result[0] = result[2] = usx * (1.0f / 65535.0f);
1693             result[1] = result[3] = usy * (1.0f / 65535.0f);
1694             store_vector4(inst, machine, result);
1695          }
1696          break;
1697       case OPCODE_UP4B:        /* unpack four GLbytes */
1698          {
1699             GLfloat a[4], result[4];
1700             fi_type fi;
1701             fetch_vector1(&inst->SrcReg[0], machine, a);
1702             fi.f = a[0];
1703             result[0] = (((fi.i >> 0) & 0xff) - 128) / 127.0F;
1704             result[1] = (((fi.i >> 8) & 0xff) - 128) / 127.0F;
1705             result[2] = (((fi.i >> 16) & 0xff) - 128) / 127.0F;
1706             result[3] = (((fi.i >> 24) & 0xff) - 128) / 127.0F;
1707             store_vector4(inst, machine, result);
1708          }
1709          break;
1710       case OPCODE_UP4UB:       /* unpack four GLubytes */
1711          {
1712             GLfloat a[4], result[4];
1713             fi_type fi;
1714             fetch_vector1(&inst->SrcReg[0], machine, a);
1715             fi.f = a[0];
1716             result[0] = ((fi.i >> 0) & 0xff) / 255.0F;
1717             result[1] = ((fi.i >> 8) & 0xff) / 255.0F;
1718             result[2] = ((fi.i >> 16) & 0xff) / 255.0F;
1719             result[3] = ((fi.i >> 24) & 0xff) / 255.0F;
1720             store_vector4(inst, machine, result);
1721          }
1722          break;
1723       case OPCODE_XOR:         /* bitwise XOR */
1724          {
1725             GLuint a[4], b[4], result[4];
1726             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1727             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1728             result[0] = a[0] ^ b[0];
1729             result[1] = a[1] ^ b[1];
1730             result[2] = a[2] ^ b[2];
1731             result[3] = a[3] ^ b[3];
1732             store_vector4ui(inst, machine, result);
1733          }
1734          break;
1735       case OPCODE_XPD:         /* cross product */
1736          {
1737             GLfloat a[4], b[4], result[4];
1738             fetch_vector4(&inst->SrcReg[0], machine, a);
1739             fetch_vector4(&inst->SrcReg[1], machine, b);
1740             result[0] = a[1] * b[2] - a[2] * b[1];
1741             result[1] = a[2] * b[0] - a[0] * b[2];
1742             result[2] = a[0] * b[1] - a[1] * b[0];
1743             result[3] = 1.0;
1744             store_vector4(inst, machine, result);
1745             if (DEBUG_PROG) {
1746                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1747                       result[0], result[1], result[2], result[3],
1748                       a[0], a[1], a[2], b[0], b[1], b[2]);
1749             }
1750          }
1751          break;
1752       case OPCODE_X2D:         /* 2-D matrix transform */
1753          {
1754             GLfloat a[4], b[4], c[4], result[4];
1755             fetch_vector4(&inst->SrcReg[0], machine, a);
1756             fetch_vector4(&inst->SrcReg[1], machine, b);
1757             fetch_vector4(&inst->SrcReg[2], machine, c);
1758             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1759             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1760             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1761             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1762             store_vector4(inst, machine, result);
1763          }
1764          break;
1765       case OPCODE_PRINT:
1766          {
1767             if (inst->SrcReg[0].File != -1) {
1768                GLfloat a[4];
1769                fetch_vector4(&inst->SrcReg[0], machine, a);
1770                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1771                             a[0], a[1], a[2], a[3]);
1772             }
1773             else {
1774                _mesa_printf("%s\n", (const char *) inst->Data);
1775             }
1776          }
1777          break;
1778       case OPCODE_END:
1779          return GL_TRUE;
1780       default:
1781          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1782                        inst->Opcode);
1783          return GL_TRUE;        /* return value doesn't matter */
1784       }
1785
1786       numExec++;
1787       if (numExec > maxExec) {
1788          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1789          return GL_TRUE;
1790       }
1791
1792    } /* for pc */
1793
1794    return GL_TRUE;
1795 }