src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "prog_execute.h"
  42 #include "prog_instruction.h"
  43 #include "prog_parameter.h"
  44 #include "prog_print.h"
  45 #include "prog_noise.h"
  46
  47
  48 /* debug predicate */
  49 #define DEBUG_PROG 0
  50
  51
  52 /**
  53  * Set x to positive or negative infinity.
  54  */
  55 #if defined(USE_IEEE) || defined(_WIN32)
  56 #define SET_POS_INFINITY(x)                  \
  57    do {                                      \
  58          fi_type fi;                         \
  59          fi.i = 0x7F800000;                  \
  60          x = fi.f;                           \
  61    } while (0)
  62 #define SET_NEG_INFINITY(x)                  \
  63    do {                                      \
  64          fi_type fi;                         \
  65          fi.i = 0xFF800000;                  \
  66          x = fi.f;                           \
  67    } while (0)
  68 #elif defined(VMS)
  69 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  70 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  71 #else
  72 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  73 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  74 #endif
  75
  76 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  77
  78
  79 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  80
  81
  82
  83 /**
  84  * Return a pointer to the 4-element float vector specified by the given
  85  * source register.
  86  */
  87 static INLINE const GLfloat *
  88 get_src_register_pointer(const struct prog_src_register *source,
  89                          const struct gl_program_machine *machine)
  90 {
  91    const struct gl_program *prog = machine->CurProgram;
  92    GLint reg = source->Index;
  93
  94    if (source->RelAddr) {
  95       /* add address register value to src index/offset */
  96       reg += machine->AddressReg[0][0];
  97       if (reg < 0) {
  98          return ZeroVec;
  99       }
 100    }
 101
 102    switch (source->File) {
 103    case PROGRAM_TEMPORARY:
 104       if (reg >= MAX_PROGRAM_TEMPS)
 105          return ZeroVec;
 106       return machine->Temporaries[reg];
 107
 108    case PROGRAM_INPUT:
 109       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 110          if (reg >= VERT_ATTRIB_MAX)
 111             return ZeroVec;
 112          return machine->VertAttribs[reg];
 113       }
 114       else {
 115          if (reg >= FRAG_ATTRIB_MAX)
 116             return ZeroVec;
 117          return machine->Attribs[reg][machine->CurElement];
 118       }
 119
 120    case PROGRAM_OUTPUT:
 121       if (reg >= MAX_PROGRAM_OUTPUTS)
 122          return ZeroVec;
 123       return machine->Outputs[reg];
 124
 125    case PROGRAM_LOCAL_PARAM:
 126       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 127          return ZeroVec;
 128       return machine->CurProgram->LocalParams[reg];
 129
 130    case PROGRAM_ENV_PARAM:
 131       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 132          return ZeroVec;
 133       return machine->EnvParams[reg];
 134
 135    case PROGRAM_STATE_VAR:
 136       /* Fallthrough */
 137    case PROGRAM_CONSTANT:
 138       /* Fallthrough */
 139    case PROGRAM_UNIFORM:
 140       /* Fallthrough */
 141    case PROGRAM_NAMED_PARAM:
 142       if (reg >= (GLint) prog->Parameters->NumParameters)
 143          return ZeroVec;
 144       return prog->Parameters->ParameterValues[reg];
 145
 146    default:
 147       _mesa_problem(NULL,
 148          "Invalid src register file %d in get_src_register_pointer()",
 149          source->File);
 150       return NULL;
 151    }
 152 }
 153
 154
 155 /**
 156  * Return a pointer to the 4-element float vector specified by the given
 157  * destination register.
 158  */
 159 static INLINE GLfloat *
 160 get_dst_register_pointer(const struct prog_dst_register *dest,
 161                          struct gl_program_machine *machine)
 162 {
 163    static GLfloat dummyReg[4];
 164    GLint reg = dest->Index;
 165
 166    if (dest->RelAddr) {
 167       /* add address register value to src index/offset */
 168       reg += machine->AddressReg[0][0];
 169       if (reg < 0) {
 170          return dummyReg;
 171       }
 172    }
 173
 174    switch (dest->File) {
 175    case PROGRAM_TEMPORARY:
 176       if (reg >= MAX_PROGRAM_TEMPS)
 177          return dummyReg;
 178       return machine->Temporaries[reg];
 179
 180    case PROGRAM_OUTPUT:
 181       if (reg >= MAX_PROGRAM_OUTPUTS)
 182          return dummyReg;
 183       return machine->Outputs[reg];
 184
 185    case PROGRAM_WRITE_ONLY:
 186       return dummyReg;
 187
 188    default:
 189       _mesa_problem(NULL,
 190          "Invalid dest register file %d in get_dst_register_pointer()",
 191          dest->File);
 192       return NULL;
 193    }
 194 }
 195
 196
 197
 198 /**
 199  * Fetch a 4-element float vector from the given source register.
 200  * Apply swizzling and negating as needed.
 201  */
 202 static void
 203 fetch_vector4(const struct prog_src_register *source,
 204               const struct gl_program_machine *machine, GLfloat result[4])
 205 {
 206    const GLfloat *src = get_src_register_pointer(source, machine);
 207    ASSERT(src);
 208
 209    if (source->Swizzle == SWIZZLE_NOOP) {
 210       /* no swizzling */
 211       COPY_4V(result, src);
 212    }
 213    else {
 214       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 215       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 216       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 217       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 218       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 219       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 220       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 221       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 222    }
 223
 224    if (source->Abs) {
 225       result[0] = FABSF(result[0]);
 226       result[1] = FABSF(result[1]);
 227       result[2] = FABSF(result[2]);
 228       result[3] = FABSF(result[3]);
 229    }
 230    if (source->Negate) {
 231       ASSERT(source->Negate == NEGATE_XYZW);
 232       result[0] = -result[0];
 233       result[1] = -result[1];
 234       result[2] = -result[2];
 235       result[3] = -result[3];
 236    }
 237
 238 #ifdef NAN_CHECK
 239    assert(!IS_INF_OR_NAN(result[0]));
 240    assert(!IS_INF_OR_NAN(result[0]));
 241    assert(!IS_INF_OR_NAN(result[0]));
 242    assert(!IS_INF_OR_NAN(result[0]));
 243 #endif
 244 }
 245
 246
 247 /**
 248  * Fetch a 4-element uint vector from the given source register.
 249  * Apply swizzling but not negation/abs.
 250  */
 251 static void
 252 fetch_vector4ui(const struct prog_src_register *source,
 253                 const struct gl_program_machine *machine, GLuint result[4])
 254 {
 255    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 256    ASSERT(src);
 257
 258    if (source->Swizzle == SWIZZLE_NOOP) {
 259       /* no swizzling */
 260       COPY_4V(result, src);
 261    }
 262    else {
 263       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 264       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 265       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 266       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 267       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 268       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 269       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 270       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 271    }
 272
 273    /* Note: no Negate or Abs here */
 274 }
 275
 276
 277
 278 /**
 279  * Fetch the derivative with respect to X or Y for the given register.
 280  * XXX this currently only works for fragment program input attribs.
 281  */
 282 static void
 283 fetch_vector4_deriv(GLcontext * ctx,
 284                     const struct prog_src_register *source,
 285                     const struct gl_program_machine *machine,
 286                     char xOrY, GLfloat result[4])
 287 {
 288    if (source->File == PROGRAM_INPUT &&
 289        source->Index < (GLint) machine->NumDeriv) {
 290       const GLint col = machine->CurElement;
 291       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 292       const GLfloat invQ = 1.0f / w;
 293       GLfloat deriv[4];
 294
 295       if (xOrY == 'X') {
 296          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 297          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 298          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 299          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 300       }
 301       else {
 302          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 303          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 304          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 305          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 306       }
 307
 308       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 309       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 310       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 311       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 312
 313       if (source->Abs) {
 314          result[0] = FABSF(result[0]);
 315          result[1] = FABSF(result[1]);
 316          result[2] = FABSF(result[2]);
 317          result[3] = FABSF(result[3]);
 318       }
 319       if (source->Negate) {
 320          ASSERT(source->Negate == NEGATE_XYZW);
 321          result[0] = -result[0];
 322          result[1] = -result[1];
 323          result[2] = -result[2];
 324          result[3] = -result[3];
 325       }
 326    }
 327    else {
 328       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 329    }
 330 }
 331
 332
 333 /**
 334  * As above, but only return result[0] element.
 335  */
 336 static void
 337 fetch_vector1(const struct prog_src_register *source,
 338               const struct gl_program_machine *machine, GLfloat result[4])
 339 {
 340    const GLfloat *src = get_src_register_pointer(source, machine);
 341    ASSERT(src);
 342
 343    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 344
 345    if (source->Abs) {
 346       result[0] = FABSF(result[0]);
 347    }
 348    if (source->Negate) {
 349       result[0] = -result[0];
 350    }
 351 }
 352
 353
 354 static GLuint
 355 fetch_vector1ui(const struct prog_src_register *source,
 356                 const struct gl_program_machine *machine)
 357 {
 358    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 359    return src[GET_SWZ(source->Swizzle, 0)];
 360 }
 361
 362
 363 /**
 364  * Fetch texel from texture.  Use partial derivatives when possible.
 365  */
 366 static INLINE void
 367 fetch_texel(GLcontext *ctx,
 368             const struct gl_program_machine *machine,
 369             const struct prog_instruction *inst,
 370             const GLfloat texcoord[4], GLfloat lodBias,
 371             GLfloat color[4])
 372 {
 373    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 374
 375    /* Note: we only have the right derivatives for fragment input attribs.
 376     */
 377    if (machine->NumDeriv > 0 &&
 378        inst->SrcReg[0].File == PROGRAM_INPUT &&
 379        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 380       /* simple texture fetch for which we should have derivatives */
 381       GLuint attr = inst->SrcReg[0].Index;
 382       machine->FetchTexelDeriv(ctx, texcoord,
 383                                machine->DerivX[attr],
 384                                machine->DerivY[attr],
 385                                lodBias, unit, color);
 386    }
 387    else {
 388       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 389    }
 390 }
 391
 392
 393 /**
 394  * Test value against zero and return GT, LT, EQ or UN if NaN.
 395  */
 396 static INLINE GLuint
 397 generate_cc(float value)
 398 {
 399    if (value != value)
 400       return COND_UN;           /* NaN */
 401    if (value > 0.0F)
 402       return COND_GT;
 403    if (value < 0.0F)
 404       return COND_LT;
 405    return COND_EQ;
 406 }
 407
 408
 409 /**
 410  * Test if the ccMaskRule is satisfied by the given condition code.
 411  * Used to mask destination writes according to the current condition code.
 412  */
 413 static INLINE GLboolean
 414 test_cc(GLuint condCode, GLuint ccMaskRule)
 415 {
 416    switch (ccMaskRule) {
 417    case COND_EQ: return (condCode == COND_EQ);
 418    case COND_NE: return (condCode != COND_EQ);
 419    case COND_LT: return (condCode == COND_LT);
 420    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 421    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 422    case COND_GT: return (condCode == COND_GT);
 423    case COND_TR: return GL_TRUE;
 424    case COND_FL: return GL_FALSE;
 425    default:      return GL_TRUE;
 426    }
 427 }
 428
 429
 430 /**
 431  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 432  * or GL_FALSE to indicate result.
 433  */
 434 static INLINE GLboolean
 435 eval_condition(const struct gl_program_machine *machine,
 436                const struct prog_instruction *inst)
 437 {
 438    const GLuint swizzle = inst->DstReg.CondSwizzle;
 439    const GLuint condMask = inst->DstReg.CondMask;
 440    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 441        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 442        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 443        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 444       return GL_TRUE;
 445    }
 446    else {
 447       return GL_FALSE;
 448    }
 449 }
 450
 451
 452
 453 /**
 454  * Store 4 floats into a register.  Observe the instructions saturate and
 455  * set-condition-code flags.
 456  */
 457 static void
 458 store_vector4(const struct prog_instruction *inst,
 459               struct gl_program_machine *machine, const GLfloat value[4])
 460 {
 461    const struct prog_dst_register *dstReg = &(inst->DstReg);
 462    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 463    GLuint writeMask = dstReg->WriteMask;
 464    GLfloat clampedValue[4];
 465    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 466
 467 #if 0
 468    if (value[0] > 1.0e10 ||
 469        IS_INF_OR_NAN(value[0]) ||
 470        IS_INF_OR_NAN(value[1]) ||
 471        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 472       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 473 #endif
 474
 475    if (clamp) {
 476       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 477       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 478       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 479       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 480       value = clampedValue;
 481    }
 482
 483    if (dstReg->CondMask != COND_TR) {
 484       /* condition codes may turn off some writes */
 485       if (writeMask & WRITEMASK_X) {
 486          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 487                       dstReg->CondMask))
 488             writeMask &= ~WRITEMASK_X;
 489       }
 490       if (writeMask & WRITEMASK_Y) {
 491          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 492                       dstReg->CondMask))
 493             writeMask &= ~WRITEMASK_Y;
 494       }
 495       if (writeMask & WRITEMASK_Z) {
 496          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 497                       dstReg->CondMask))
 498             writeMask &= ~WRITEMASK_Z;
 499       }
 500       if (writeMask & WRITEMASK_W) {
 501          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 502                       dstReg->CondMask))
 503             writeMask &= ~WRITEMASK_W;
 504       }
 505    }
 506
 507 #ifdef NAN_CHECK
 508    assert(!IS_INF_OR_NAN(value[0]));
 509    assert(!IS_INF_OR_NAN(value[0]));
 510    assert(!IS_INF_OR_NAN(value[0]));
 511    assert(!IS_INF_OR_NAN(value[0]));
 512 #endif
 513
 514    if (writeMask & WRITEMASK_X)
 515       dst[0] = value[0];
 516    if (writeMask & WRITEMASK_Y)
 517       dst[1] = value[1];
 518    if (writeMask & WRITEMASK_Z)
 519       dst[2] = value[2];
 520    if (writeMask & WRITEMASK_W)
 521       dst[3] = value[3];
 522
 523    if (inst->CondUpdate) {
 524       if (writeMask & WRITEMASK_X)
 525          machine->CondCodes[0] = generate_cc(value[0]);
 526       if (writeMask & WRITEMASK_Y)
 527          machine->CondCodes[1] = generate_cc(value[1]);
 528       if (writeMask & WRITEMASK_Z)
 529          machine->CondCodes[2] = generate_cc(value[2]);
 530       if (writeMask & WRITEMASK_W)
 531          machine->CondCodes[3] = generate_cc(value[3]);
 532 #if DEBUG_PROG
 533       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 534              _mesa_condcode_string(machine->CondCodes[0]),
 535              _mesa_condcode_string(machine->CondCodes[1]),
 536              _mesa_condcode_string(machine->CondCodes[2]),
 537              _mesa_condcode_string(machine->CondCodes[3]));
 538 #endif
 539    }
 540 }
 541
 542
 543 /**
 544  * Store 4 uints into a register.  Observe the set-condition-code flags.
 545  */
 546 static void
 547 store_vector4ui(const struct prog_instruction *inst,
 548                 struct gl_program_machine *machine, const GLuint value[4])
 549 {
 550    const struct prog_dst_register *dstReg = &(inst->DstReg);
 551    GLuint writeMask = dstReg->WriteMask;
 552    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 553
 554    if (dstReg->CondMask != COND_TR) {
 555       /* condition codes may turn off some writes */
 556       if (writeMask & WRITEMASK_X) {
 557          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 558                       dstReg->CondMask))
 559             writeMask &= ~WRITEMASK_X;
 560       }
 561       if (writeMask & WRITEMASK_Y) {
 562          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 563                       dstReg->CondMask))
 564             writeMask &= ~WRITEMASK_Y;
 565       }
 566       if (writeMask & WRITEMASK_Z) {
 567          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 568                       dstReg->CondMask))
 569             writeMask &= ~WRITEMASK_Z;
 570       }
 571       if (writeMask & WRITEMASK_W) {
 572          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 573                       dstReg->CondMask))
 574             writeMask &= ~WRITEMASK_W;
 575       }
 576    }
 577
 578    if (writeMask & WRITEMASK_X)
 579       dst[0] = value[0];
 580    if (writeMask & WRITEMASK_Y)
 581       dst[1] = value[1];
 582    if (writeMask & WRITEMASK_Z)
 583       dst[2] = value[2];
 584    if (writeMask & WRITEMASK_W)
 585       dst[3] = value[3];
 586
 587    if (inst->CondUpdate) {
 588       if (writeMask & WRITEMASK_X)
 589          machine->CondCodes[0] = generate_cc((float)value[0]);
 590       if (writeMask & WRITEMASK_Y)
 591          machine->CondCodes[1] = generate_cc((float)value[1]);
 592       if (writeMask & WRITEMASK_Z)
 593          machine->CondCodes[2] = generate_cc((float)value[2]);
 594       if (writeMask & WRITEMASK_W)
 595          machine->CondCodes[3] = generate_cc((float)value[3]);
 596 #if DEBUG_PROG
 597       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 598              _mesa_condcode_string(machine->CondCodes[0]),
 599              _mesa_condcode_string(machine->CondCodes[1]),
 600              _mesa_condcode_string(machine->CondCodes[2]),
 601              _mesa_condcode_string(machine->CondCodes[3]));
 602 #endif
 603    }
 604 }
 605
 606
 607
 608 /**
 609  * Execute the given vertex/fragment program.
 610  *
 611  * \param ctx  rendering context
 612  * \param program  the program to execute
 613  * \param machine  machine state (must be initialized)
 614  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 615  */
 616 GLboolean
 617 _mesa_execute_program(GLcontext * ctx,
 618                       const struct gl_program *program,
 619                       struct gl_program_machine *machine)
 620 {
 621    const GLuint numInst = program->NumInstructions;
 622    const GLuint maxExec = 10000;
 623    GLuint pc, numExec = 0;
 624
 625    machine->CurProgram = program;
 626
 627    if (DEBUG_PROG) {
 628       printf("execute program %u --------------------\n", program->Id);
 629    }
 630
 631    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 632       machine->EnvParams = ctx->VertexProgram.Parameters;
 633    }
 634    else {
 635       machine->EnvParams = ctx->FragmentProgram.Parameters;
 636    }
 637
 638    for (pc = 0; pc < numInst; pc++) {
 639       const struct prog_instruction *inst = program->Instructions + pc;
 640
 641       if (DEBUG_PROG) {
 642          _mesa_print_instruction(inst);
 643       }
 644
 645       switch (inst->Opcode) {
 646       case OPCODE_ABS:
 647          {
 648             GLfloat a[4], result[4];
 649             fetch_vector4(&inst->SrcReg[0], machine, a);
 650             result[0] = FABSF(a[0]);
 651             result[1] = FABSF(a[1]);
 652             result[2] = FABSF(a[2]);
 653             result[3] = FABSF(a[3]);
 654             store_vector4(inst, machine, result);
 655          }
 656          break;
 657       case OPCODE_ADD:
 658          {
 659             GLfloat a[4], b[4], result[4];
 660             fetch_vector4(&inst->SrcReg[0], machine, a);
 661             fetch_vector4(&inst->SrcReg[1], machine, b);
 662             result[0] = a[0] + b[0];
 663             result[1] = a[1] + b[1];
 664             result[2] = a[2] + b[2];
 665             result[3] = a[3] + b[3];
 666             store_vector4(inst, machine, result);
 667             if (DEBUG_PROG) {
 668                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 669                       result[0], result[1], result[2], result[3],
 670                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 671             }
 672          }
 673          break;
 674       case OPCODE_AND:     /* bitwise AND */
 675          {
 676             GLuint a[4], b[4], result[4];
 677             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 678             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 679             result[0] = a[0] & b[0];
 680             result[1] = a[1] & b[1];
 681             result[2] = a[2] & b[2];
 682             result[3] = a[3] & b[3];
 683             store_vector4ui(inst, machine, result);
 684          }
 685          break;
 686       case OPCODE_ARL:
 687          {
 688             GLfloat t[4];
 689             fetch_vector4(&inst->SrcReg[0], machine, t);
 690             machine->AddressReg[0][0] = IFLOOR(t[0]);
 691             if (DEBUG_PROG) {
 692                printf("ARL %d\n", machine->AddressReg[0][0]);
 693             }
 694          }
 695          break;
 696       case OPCODE_BGNLOOP:
 697          /* no-op */
 698          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 699                 == OPCODE_ENDLOOP);
 700          break;
 701       case OPCODE_ENDLOOP:
 702          /* subtract 1 here since pc is incremented by for(pc) loop */
 703          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 704                 == OPCODE_BGNLOOP);
 705          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 706          break;
 707       case OPCODE_BGNSUB:      /* begin subroutine */
 708          break;
 709       case OPCODE_ENDSUB:      /* end subroutine */
 710          break;
 711       case OPCODE_BRA:         /* branch (conditional) */
 712          if (eval_condition(machine, inst)) {
 713             /* take branch */
 714             /* Subtract 1 here since we'll do pc++ below */
 715             pc = inst->BranchTarget - 1;
 716          }
 717          break;
 718       case OPCODE_BRK:         /* break out of loop (conditional) */
 719          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 720                 == OPCODE_ENDLOOP);
 721          if (eval_condition(machine, inst)) {
 722             /* break out of loop */
 723             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 724             pc = inst->BranchTarget;
 725          }
 726          break;
 727       case OPCODE_CONT:        /* continue loop (conditional) */
 728          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 729                 == OPCODE_ENDLOOP);
 730          if (eval_condition(machine, inst)) {
 731             /* continue at ENDLOOP */
 732             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 733             pc = inst->BranchTarget - 1;
 734          }
 735          break;
 736       case OPCODE_CAL:         /* Call subroutine (conditional) */
 737          if (eval_condition(machine, inst)) {
 738             /* call the subroutine */
 739             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 740                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 741             }
 742             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 743             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 744             pc = inst->BranchTarget - 1;
 745          }
 746          break;
 747       case OPCODE_CMP:
 748          {
 749             GLfloat a[4], b[4], c[4], result[4];
 750             fetch_vector4(&inst->SrcReg[0], machine, a);
 751             fetch_vector4(&inst->SrcReg[1], machine, b);
 752             fetch_vector4(&inst->SrcReg[2], machine, c);
 753             result[0] = a[0] < 0.0F ? b[0] : c[0];
 754             result[1] = a[1] < 0.0F ? b[1] : c[1];
 755             result[2] = a[2] < 0.0F ? b[2] : c[2];
 756             result[3] = a[3] < 0.0F ? b[3] : c[3];
 757             store_vector4(inst, machine, result);
 758          }
 759          break;
 760       case OPCODE_COS:
 761          {
 762             GLfloat a[4], result[4];
 763             fetch_vector1(&inst->SrcReg[0], machine, a);
 764             result[0] = result[1] = result[2] = result[3]
 765                = (GLfloat) _mesa_cos(a[0]);
 766             store_vector4(inst, machine, result);
 767          }
 768          break;
 769       case OPCODE_DDX:         /* Partial derivative with respect to X */
 770          {
 771             GLfloat result[4];
 772             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 773                                 'X', result);
 774             store_vector4(inst, machine, result);
 775          }
 776          break;
 777       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 778          {
 779             GLfloat result[4];
 780             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 781                                 'Y', result);
 782             store_vector4(inst, machine, result);
 783          }
 784          break;
 785       case OPCODE_DP2:
 786          {
 787             GLfloat a[4], b[4], result[4];
 788             fetch_vector4(&inst->SrcReg[0], machine, a);
 789             fetch_vector4(&inst->SrcReg[1], machine, b);
 790             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 791             store_vector4(inst, machine, result);
 792             if (DEBUG_PROG) {
 793                printf("DP2 %g = (%g %g) . (%g %g)\n",
 794                       result[0], a[0], a[1], b[0], b[1]);
 795             }
 796          }
 797          break;
 798       case OPCODE_DP2A:
 799          {
 800             GLfloat a[4], b[4], c, result[4];
 801             fetch_vector4(&inst->SrcReg[0], machine, a);
 802             fetch_vector4(&inst->SrcReg[1], machine, b);
 803             fetch_vector1(&inst->SrcReg[1], machine, &c);
 804             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 805             store_vector4(inst, machine, result);
 806             if (DEBUG_PROG) {
 807                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 808                       result[0], a[0], a[1], b[0], b[1], c);
 809             }
 810          }
 811          break;
 812       case OPCODE_DP3:
 813          {
 814             GLfloat a[4], b[4], result[4];
 815             fetch_vector4(&inst->SrcReg[0], machine, a);
 816             fetch_vector4(&inst->SrcReg[1], machine, b);
 817             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 818             store_vector4(inst, machine, result);
 819             if (DEBUG_PROG) {
 820                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 821                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 822             }
 823          }
 824          break;
 825       case OPCODE_DP4:
 826          {
 827             GLfloat a[4], b[4], result[4];
 828             fetch_vector4(&inst->SrcReg[0], machine, a);
 829             fetch_vector4(&inst->SrcReg[1], machine, b);
 830             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 831             store_vector4(inst, machine, result);
 832             if (DEBUG_PROG) {
 833                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 834                       result[0], a[0], a[1], a[2], a[3],
 835                       b[0], b[1], b[2], b[3]);
 836             }
 837          }
 838          break;
 839       case OPCODE_DPH:
 840          {
 841             GLfloat a[4], b[4], result[4];
 842             fetch_vector4(&inst->SrcReg[0], machine, a);
 843             fetch_vector4(&inst->SrcReg[1], machine, b);
 844             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 845             store_vector4(inst, machine, result);
 846          }
 847          break;
 848       case OPCODE_DST:         /* Distance vector */
 849          {
 850             GLfloat a[4], b[4], result[4];
 851             fetch_vector4(&inst->SrcReg[0], machine, a);
 852             fetch_vector4(&inst->SrcReg[1], machine, b);
 853             result[0] = 1.0F;
 854             result[1] = a[1] * b[1];
 855             result[2] = a[2];
 856             result[3] = b[3];
 857             store_vector4(inst, machine, result);
 858          }
 859          break;
 860       case OPCODE_EXP:
 861          {
 862             GLfloat t[4], q[4], floor_t0;
 863             fetch_vector1(&inst->SrcReg[0], machine, t);
 864             floor_t0 = FLOORF(t[0]);
 865             if (floor_t0 > FLT_MAX_EXP) {
 866                SET_POS_INFINITY(q[0]);
 867                SET_POS_INFINITY(q[2]);
 868             }
 869             else if (floor_t0 < FLT_MIN_EXP) {
 870                q[0] = 0.0F;
 871                q[2] = 0.0F;
 872             }
 873             else {
 874                q[0] = LDEXPF(1.0, (int) floor_t0);
 875                /* Note: GL_NV_vertex_program expects
 876                 * result.z = result.x * APPX(result.y)
 877                 * We do what the ARB extension says.
 878                 */
 879                q[2] = (GLfloat) _mesa_pow(2.0, t[0]);
 880             }
 881             q[1] = t[0] - floor_t0;
 882             q[3] = 1.0F;
 883             store_vector4( inst, machine, q );
 884          }
 885          break;
 886       case OPCODE_EX2:         /* Exponential base 2 */
 887          {
 888             GLfloat a[4], result[4], val;
 889             fetch_vector1(&inst->SrcReg[0], machine, a);
 890             val = (GLfloat) _mesa_pow(2.0, a[0]);
 891             /*
 892             if (IS_INF_OR_NAN(val))
 893                val = 1.0e10;
 894             */
 895             result[0] = result[1] = result[2] = result[3] = val;
 896             store_vector4(inst, machine, result);
 897          }
 898          break;
 899       case OPCODE_FLR:
 900          {
 901             GLfloat a[4], result[4];
 902             fetch_vector4(&inst->SrcReg[0], machine, a);
 903             result[0] = FLOORF(a[0]);
 904             result[1] = FLOORF(a[1]);
 905             result[2] = FLOORF(a[2]);
 906             result[3] = FLOORF(a[3]);
 907             store_vector4(inst, machine, result);
 908          }
 909          break;
 910       case OPCODE_FRC:
 911          {
 912             GLfloat a[4], result[4];
 913             fetch_vector4(&inst->SrcReg[0], machine, a);
 914             result[0] = a[0] - FLOORF(a[0]);
 915             result[1] = a[1] - FLOORF(a[1]);
 916             result[2] = a[2] - FLOORF(a[2]);
 917             result[3] = a[3] - FLOORF(a[3]);
 918             store_vector4(inst, machine, result);
 919          }
 920          break;
 921       case OPCODE_IF:
 922          {
 923             GLboolean cond;
 924             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 925                    == OPCODE_ELSE ||
 926                    program->Instructions[inst->BranchTarget].Opcode
 927                    == OPCODE_ENDIF);
 928             /* eval condition */
 929             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 930                GLfloat a[4];
 931                fetch_vector1(&inst->SrcReg[0], machine, a);
 932                cond = (a[0] != 0.0);
 933             }
 934             else {
 935                cond = eval_condition(machine, inst);
 936             }
 937             if (DEBUG_PROG) {
 938                printf("IF: %d\n", cond);
 939             }
 940             /* do if/else */
 941             if (cond) {
 942                /* do if-clause (just continue execution) */
 943             }
 944             else {
 945                /* go to the instruction after ELSE or ENDIF */
 946                assert(inst->BranchTarget >= 0);
 947                pc = inst->BranchTarget;
 948             }
 949          }
 950          break;
 951       case OPCODE_ELSE:
 952          /* goto ENDIF */
 953          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 954                 == OPCODE_ENDIF);
 955          assert(inst->BranchTarget >= 0);
 956          pc = inst->BranchTarget;
 957          break;
 958       case OPCODE_ENDIF:
 959          /* nothing */
 960          break;
 961       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 962          if (eval_condition(machine, inst)) {
 963             return GL_FALSE;
 964          }
 965          break;
 966       case OPCODE_KIL:         /* ARB_f_p only */
 967          {
 968             GLfloat a[4];
 969             fetch_vector4(&inst->SrcReg[0], machine, a);
 970             if (DEBUG_PROG) {
 971                printf("KIL if (%g %g %g %g) <= 0.0\n",
 972                       a[0], a[1], a[2], a[3]);
 973             }
 974
 975             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 976                return GL_FALSE;
 977             }
 978          }
 979          break;
 980       case OPCODE_LG2:         /* log base 2 */
 981          {
 982             GLfloat a[4], result[4], val;
 983             fetch_vector1(&inst->SrcReg[0], machine, a);
 984             /* The fast LOG2 macro doesn't meet the precision requirements.
 985              */
 986             if (a[0] == 0.0F) {
 987                val = -FLT_MAX;
 988             }
 989             else {
 990                val = (float)(log(a[0]) * 1.442695F);
 991             }
 992             result[0] = result[1] = result[2] = result[3] = val;
 993             store_vector4(inst, machine, result);
 994          }
 995          break;
 996       case OPCODE_LIT:
 997          {
 998             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 999             GLfloat a[4], result[4];
1000             fetch_vector4(&inst->SrcReg[0], machine, a);
1001             a[0] = MAX2(a[0], 0.0F);
1002             a[1] = MAX2(a[1], 0.0F);
1003             /* XXX ARB version clamps a[3], NV version doesn't */
1004             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1005             result[0] = 1.0F;
1006             result[1] = a[0];
1007             /* XXX we could probably just use pow() here */
1008             if (a[0] > 0.0F) {
1009                if (a[1] == 0.0 && a[3] == 0.0)
1010                   result[2] = 1.0F;
1011                else
1012                   result[2] = (GLfloat) _mesa_pow(a[1], a[3]);
1013             }
1014             else {
1015                result[2] = 0.0F;
1016             }
1017             result[3] = 1.0F;
1018             store_vector4(inst, machine, result);
1019             if (DEBUG_PROG) {
1020                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1021                       result[0], result[1], result[2], result[3],
1022                       a[0], a[1], a[2], a[3]);
1023             }
1024          }
1025          break;
1026       case OPCODE_LOG:
1027          {
1028             GLfloat t[4], q[4], abs_t0;
1029             fetch_vector1(&inst->SrcReg[0], machine, t);
1030             abs_t0 = FABSF(t[0]);
1031             if (abs_t0 != 0.0F) {
1032                /* Since we really can't handle infinite values on VMS
1033                 * like other OSes we'll use __MAXFLOAT to represent
1034                 * infinity.  This may need some tweaking.
1035                 */
1036 #ifdef VMS
1037                if (abs_t0 == __MAXFLOAT)
1038 #else
1039                if (IS_INF_OR_NAN(abs_t0))
1040 #endif
1041                {
1042                   SET_POS_INFINITY(q[0]);
1043                   q[1] = 1.0F;
1044                   SET_POS_INFINITY(q[2]);
1045                }
1046                else {
1047                   int exponent;
1048                   GLfloat mantissa = FREXPF(t[0], &exponent);
1049                   q[0] = (GLfloat) (exponent - 1);
1050                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1051
1052                   /* The fast LOG2 macro doesn't meet the precision
1053                    * requirements.
1054                    */
1055                   q[2] = (float)(log(t[0]) * 1.442695F);
1056                }
1057             }
1058             else {
1059                SET_NEG_INFINITY(q[0]);
1060                q[1] = 1.0F;
1061                SET_NEG_INFINITY(q[2]);
1062             }
1063             q[3] = 1.0;
1064             store_vector4(inst, machine, q);
1065          }
1066          break;
1067       case OPCODE_LRP:
1068          {
1069             GLfloat a[4], b[4], c[4], result[4];
1070             fetch_vector4(&inst->SrcReg[0], machine, a);
1071             fetch_vector4(&inst->SrcReg[1], machine, b);
1072             fetch_vector4(&inst->SrcReg[2], machine, c);
1073             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1074             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1075             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1076             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1077             store_vector4(inst, machine, result);
1078             if (DEBUG_PROG) {
1079                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1080                       "(%g %g %g %g), (%g %g %g %g)\n",
1081                       result[0], result[1], result[2], result[3],
1082                       a[0], a[1], a[2], a[3],
1083                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1084             }
1085          }
1086          break;
1087       case OPCODE_MAD:
1088          {
1089             GLfloat a[4], b[4], c[4], result[4];
1090             fetch_vector4(&inst->SrcReg[0], machine, a);
1091             fetch_vector4(&inst->SrcReg[1], machine, b);
1092             fetch_vector4(&inst->SrcReg[2], machine, c);
1093             result[0] = a[0] * b[0] + c[0];
1094             result[1] = a[1] * b[1] + c[1];
1095             result[2] = a[2] * b[2] + c[2];
1096             result[3] = a[3] * b[3] + c[3];
1097             store_vector4(inst, machine, result);
1098             if (DEBUG_PROG) {
1099                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1100                       "(%g %g %g %g) + (%g %g %g %g)\n",
1101                       result[0], result[1], result[2], result[3],
1102                       a[0], a[1], a[2], a[3],
1103                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1104             }
1105          }
1106          break;
1107       case OPCODE_MAX:
1108          {
1109             GLfloat a[4], b[4], result[4];
1110             fetch_vector4(&inst->SrcReg[0], machine, a);
1111             fetch_vector4(&inst->SrcReg[1], machine, b);
1112             result[0] = MAX2(a[0], b[0]);
1113             result[1] = MAX2(a[1], b[1]);
1114             result[2] = MAX2(a[2], b[2]);
1115             result[3] = MAX2(a[3], b[3]);
1116             store_vector4(inst, machine, result);
1117             if (DEBUG_PROG) {
1118                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1119                       result[0], result[1], result[2], result[3],
1120                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1121             }
1122          }
1123          break;
1124       case OPCODE_MIN:
1125          {
1126             GLfloat a[4], b[4], result[4];
1127             fetch_vector4(&inst->SrcReg[0], machine, a);
1128             fetch_vector4(&inst->SrcReg[1], machine, b);
1129             result[0] = MIN2(a[0], b[0]);
1130             result[1] = MIN2(a[1], b[1]);
1131             result[2] = MIN2(a[2], b[2]);
1132             result[3] = MIN2(a[3], b[3]);
1133             store_vector4(inst, machine, result);
1134          }
1135          break;
1136       case OPCODE_MOV:
1137          {
1138             GLfloat result[4];
1139             fetch_vector4(&inst->SrcReg[0], machine, result);
1140             store_vector4(inst, machine, result);
1141             if (DEBUG_PROG) {
1142                printf("MOV (%g %g %g %g)\n",
1143                       result[0], result[1], result[2], result[3]);
1144             }
1145          }
1146          break;
1147       case OPCODE_MUL:
1148          {
1149             GLfloat a[4], b[4], result[4];
1150             fetch_vector4(&inst->SrcReg[0], machine, a);
1151             fetch_vector4(&inst->SrcReg[1], machine, b);
1152             result[0] = a[0] * b[0];
1153             result[1] = a[1] * b[1];
1154             result[2] = a[2] * b[2];
1155             result[3] = a[3] * b[3];
1156             store_vector4(inst, machine, result);
1157             if (DEBUG_PROG) {
1158                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1159                       result[0], result[1], result[2], result[3],
1160                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1161             }
1162          }
1163          break;
1164       case OPCODE_NOISE1:
1165          {
1166             GLfloat a[4], result[4];
1167             fetch_vector1(&inst->SrcReg[0], machine, a);
1168             result[0] =
1169                result[1] =
1170                result[2] =
1171                result[3] = _mesa_noise1(a[0]);
1172             store_vector4(inst, machine, result);
1173          }
1174          break;
1175       case OPCODE_NOISE2:
1176          {
1177             GLfloat a[4], result[4];
1178             fetch_vector4(&inst->SrcReg[0], machine, a);
1179             result[0] =
1180                result[1] =
1181                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1182             store_vector4(inst, machine, result);
1183          }
1184          break;
1185       case OPCODE_NOISE3:
1186          {
1187             GLfloat a[4], result[4];
1188             fetch_vector4(&inst->SrcReg[0], machine, a);
1189             result[0] =
1190                result[1] =
1191                result[2] =
1192                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1193             store_vector4(inst, machine, result);
1194          }
1195          break;
1196       case OPCODE_NOISE4:
1197          {
1198             GLfloat a[4], result[4];
1199             fetch_vector4(&inst->SrcReg[0], machine, a);
1200             result[0] =
1201                result[1] =
1202                result[2] =
1203                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1204             store_vector4(inst, machine, result);
1205          }
1206          break;
1207       case OPCODE_NOP:
1208          break;
1209       case OPCODE_NOT:         /* bitwise NOT */
1210          {
1211             GLuint a[4], result[4];
1212             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1213             result[0] = ~a[0];
1214             result[1] = ~a[1];
1215             result[2] = ~a[2];
1216             result[3] = ~a[3];
1217             store_vector4ui(inst, machine, result);
1218          }
1219          break;
1220       case OPCODE_NRM3:        /* 3-component normalization */
1221          {
1222             GLfloat a[4], result[4];
1223             GLfloat tmp;
1224             fetch_vector4(&inst->SrcReg[0], machine, a);
1225             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1226             if (tmp != 0.0F)
1227                tmp = INV_SQRTF(tmp);
1228             result[0] = tmp * a[0];
1229             result[1] = tmp * a[1];
1230             result[2] = tmp * a[2];
1231             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1232             store_vector4(inst, machine, result);
1233          }
1234          break;
1235       case OPCODE_NRM4:        /* 4-component normalization */
1236          {
1237             GLfloat a[4], result[4];
1238             GLfloat tmp;
1239             fetch_vector4(&inst->SrcReg[0], machine, a);
1240             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1241             if (tmp != 0.0F)
1242                tmp = INV_SQRTF(tmp);
1243             result[0] = tmp * a[0];
1244             result[1] = tmp * a[1];
1245             result[2] = tmp * a[2];
1246             result[3] = tmp * a[3];
1247             store_vector4(inst, machine, result);
1248          }
1249          break;
1250       case OPCODE_OR:          /* bitwise OR */
1251          {
1252             GLuint a[4], b[4], result[4];
1253             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1254             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1255             result[0] = a[0] | b[0];
1256             result[1] = a[1] | b[1];
1257             result[2] = a[2] | b[2];
1258             result[3] = a[3] | b[3];
1259             store_vector4ui(inst, machine, result);
1260          }
1261          break;
1262       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1263          {
1264             GLfloat a[4];
1265             GLuint result[4];
1266             GLhalfNV hx, hy;
1267             fetch_vector4(&inst->SrcReg[0], machine, a);
1268             hx = _mesa_float_to_half(a[0]);
1269             hy = _mesa_float_to_half(a[1]);
1270             result[0] =
1271             result[1] =
1272             result[2] =
1273             result[3] = hx | (hy << 16);
1274             store_vector4ui(inst, machine, result);
1275          }
1276          break;
1277       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1278          {
1279             GLfloat a[4];
1280             GLuint result[4], usx, usy;
1281             fetch_vector4(&inst->SrcReg[0], machine, a);
1282             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1283             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1284             usx = IROUND(a[0] * 65535.0F);
1285             usy = IROUND(a[1] * 65535.0F);
1286             result[0] =
1287             result[1] =
1288             result[2] =
1289             result[3] = usx | (usy << 16);
1290             store_vector4ui(inst, machine, result);
1291          }
1292          break;
1293       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1294          {
1295             GLfloat a[4];
1296             GLuint result[4], ubx, uby, ubz, ubw;
1297             fetch_vector4(&inst->SrcReg[0], machine, a);
1298             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1299             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1300             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1301             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1302             ubx = IROUND(127.0F * a[0] + 128.0F);
1303             uby = IROUND(127.0F * a[1] + 128.0F);
1304             ubz = IROUND(127.0F * a[2] + 128.0F);
1305             ubw = IROUND(127.0F * a[3] + 128.0F);
1306             result[0] =
1307             result[1] =
1308             result[2] =
1309             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1310             store_vector4ui(inst, machine, result);
1311          }
1312          break;
1313       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1314          {
1315             GLfloat a[4];
1316             GLuint result[4], ubx, uby, ubz, ubw;
1317             fetch_vector4(&inst->SrcReg[0], machine, a);
1318             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1319             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1320             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1321             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1322             ubx = IROUND(255.0F * a[0]);
1323             uby = IROUND(255.0F * a[1]);
1324             ubz = IROUND(255.0F * a[2]);
1325             ubw = IROUND(255.0F * a[3]);
1326             result[0] =
1327             result[1] =
1328             result[2] =
1329             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1330             store_vector4ui(inst, machine, result);
1331          }
1332          break;
1333       case OPCODE_POW:
1334          {
1335             GLfloat a[4], b[4], result[4];
1336             fetch_vector1(&inst->SrcReg[0], machine, a);
1337             fetch_vector1(&inst->SrcReg[1], machine, b);
1338             result[0] = result[1] = result[2] = result[3]
1339                = (GLfloat) _mesa_pow(a[0], b[0]);
1340             store_vector4(inst, machine, result);
1341          }
1342          break;
1343       case OPCODE_RCP:
1344          {
1345             GLfloat a[4], result[4];
1346             fetch_vector1(&inst->SrcReg[0], machine, a);
1347             if (DEBUG_PROG) {
1348                if (a[0] == 0)
1349                   printf("RCP(0)\n");
1350                else if (IS_INF_OR_NAN(a[0]))
1351                   printf("RCP(inf)\n");
1352             }
1353             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1354             store_vector4(inst, machine, result);
1355          }
1356          break;
1357       case OPCODE_RET:         /* return from subroutine (conditional) */
1358          if (eval_condition(machine, inst)) {
1359             if (machine->StackDepth == 0) {
1360                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1361             }
1362             /* subtract one because of pc++ in the for loop */
1363             pc = machine->CallStack[--machine->StackDepth] - 1;
1364          }
1365          break;
1366       case OPCODE_RFL:         /* reflection vector */
1367          {
1368             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1369             fetch_vector4(&inst->SrcReg[0], machine, axis);
1370             fetch_vector4(&inst->SrcReg[1], machine, dir);
1371             tmpW = DOT3(axis, axis);
1372             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1373             result[0] = tmpX * axis[0] - dir[0];
1374             result[1] = tmpX * axis[1] - dir[1];
1375             result[2] = tmpX * axis[2] - dir[2];
1376             /* result[3] is never written! XXX enforce in parser! */
1377             store_vector4(inst, machine, result);
1378          }
1379          break;
1380       case OPCODE_RSQ:         /* 1 / sqrt() */
1381          {
1382             GLfloat a[4], result[4];
1383             fetch_vector1(&inst->SrcReg[0], machine, a);
1384             a[0] = FABSF(a[0]);
1385             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1386             store_vector4(inst, machine, result);
1387             if (DEBUG_PROG) {
1388                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1389             }
1390          }
1391          break;
1392       case OPCODE_SCS:         /* sine and cos */
1393          {
1394             GLfloat a[4], result[4];
1395             fetch_vector1(&inst->SrcReg[0], machine, a);
1396             result[0] = (GLfloat) _mesa_cos(a[0]);
1397             result[1] = (GLfloat) _mesa_sin(a[0]);
1398             result[2] = 0.0;    /* undefined! */
1399             result[3] = 0.0;    /* undefined! */
1400             store_vector4(inst, machine, result);
1401          }
1402          break;
1403       case OPCODE_SEQ:         /* set on equal */
1404          {
1405             GLfloat a[4], b[4], result[4];
1406             fetch_vector4(&inst->SrcReg[0], machine, a);
1407             fetch_vector4(&inst->SrcReg[1], machine, b);
1408             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1409             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1410             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1411             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1412             store_vector4(inst, machine, result);
1413             if (DEBUG_PROG) {
1414                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1415                       result[0], result[1], result[2], result[3],
1416                       a[0], a[1], a[2], a[3],
1417                       b[0], b[1], b[2], b[3]);
1418             }
1419          }
1420          break;
1421       case OPCODE_SFL:         /* set false, operands ignored */
1422          {
1423             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1424             store_vector4(inst, machine, result);
1425          }
1426          break;
1427       case OPCODE_SGE:         /* set on greater or equal */
1428          {
1429             GLfloat a[4], b[4], result[4];
1430             fetch_vector4(&inst->SrcReg[0], machine, a);
1431             fetch_vector4(&inst->SrcReg[1], machine, b);
1432             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1433             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1434             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1435             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1436             store_vector4(inst, machine, result);
1437             if (DEBUG_PROG) {
1438                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1439                       result[0], result[1], result[2], result[3],
1440                       a[0], a[1], a[2], a[3],
1441                       b[0], b[1], b[2], b[3]);
1442             }
1443          }
1444          break;
1445       case OPCODE_SGT:         /* set on greater */
1446          {
1447             GLfloat a[4], b[4], result[4];
1448             fetch_vector4(&inst->SrcReg[0], machine, a);
1449             fetch_vector4(&inst->SrcReg[1], machine, b);
1450             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1451             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1452             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1453             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1454             store_vector4(inst, machine, result);
1455             if (DEBUG_PROG) {
1456                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1457                       result[0], result[1], result[2], result[3],
1458                       a[0], a[1], a[2], a[3],
1459                       b[0], b[1], b[2], b[3]);
1460             }
1461          }
1462          break;
1463       case OPCODE_SIN:
1464          {
1465             GLfloat a[4], result[4];
1466             fetch_vector1(&inst->SrcReg[0], machine, a);
1467             result[0] = result[1] = result[2] = result[3]
1468                = (GLfloat) _mesa_sin(a[0]);
1469             store_vector4(inst, machine, result);
1470          }
1471          break;
1472       case OPCODE_SLE:         /* set on less or equal */
1473          {
1474             GLfloat a[4], b[4], result[4];
1475             fetch_vector4(&inst->SrcReg[0], machine, a);
1476             fetch_vector4(&inst->SrcReg[1], machine, b);
1477             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1478             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1479             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1480             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1481             store_vector4(inst, machine, result);
1482             if (DEBUG_PROG) {
1483                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1484                       result[0], result[1], result[2], result[3],
1485                       a[0], a[1], a[2], a[3],
1486                       b[0], b[1], b[2], b[3]);
1487             }
1488          }
1489          break;
1490       case OPCODE_SLT:         /* set on less */
1491          {
1492             GLfloat a[4], b[4], result[4];
1493             fetch_vector4(&inst->SrcReg[0], machine, a);
1494             fetch_vector4(&inst->SrcReg[1], machine, b);
1495             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1496             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1497             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1498             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1499             store_vector4(inst, machine, result);
1500             if (DEBUG_PROG) {
1501                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1502                       result[0], result[1], result[2], result[3],
1503                       a[0], a[1], a[2], a[3],
1504                       b[0], b[1], b[2], b[3]);
1505             }
1506          }
1507          break;
1508       case OPCODE_SNE:         /* set on not equal */
1509          {
1510             GLfloat a[4], b[4], result[4];
1511             fetch_vector4(&inst->SrcReg[0], machine, a);
1512             fetch_vector4(&inst->SrcReg[1], machine, b);
1513             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1514             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1515             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1516             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1517             store_vector4(inst, machine, result);
1518             if (DEBUG_PROG) {
1519                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1520                       result[0], result[1], result[2], result[3],
1521                       a[0], a[1], a[2], a[3],
1522                       b[0], b[1], b[2], b[3]);
1523             }
1524          }
1525          break;
1526       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1527          {
1528             GLfloat a[4], result[4];
1529             fetch_vector4(&inst->SrcReg[0], machine, a);
1530             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1531             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1532             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1533             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1534             store_vector4(inst, machine, result);
1535          }
1536          break;
1537       case OPCODE_STR:         /* set true, operands ignored */
1538          {
1539             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1540             store_vector4(inst, machine, result);
1541          }
1542          break;
1543       case OPCODE_SUB:
1544          {
1545             GLfloat a[4], b[4], result[4];
1546             fetch_vector4(&inst->SrcReg[0], machine, a);
1547             fetch_vector4(&inst->SrcReg[1], machine, b);
1548             result[0] = a[0] - b[0];
1549             result[1] = a[1] - b[1];
1550             result[2] = a[2] - b[2];
1551             result[3] = a[3] - b[3];
1552             store_vector4(inst, machine, result);
1553             if (DEBUG_PROG) {
1554                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1555                       result[0], result[1], result[2], result[3],
1556                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1557             }
1558          }
1559          break;
1560       case OPCODE_SWZ:         /* extended swizzle */
1561          {
1562             const struct prog_src_register *source = &inst->SrcReg[0];
1563             const GLfloat *src = get_src_register_pointer(source, machine);
1564             GLfloat result[4];
1565             GLuint i;
1566             for (i = 0; i < 4; i++) {
1567                const GLuint swz = GET_SWZ(source->Swizzle, i);
1568                if (swz == SWIZZLE_ZERO)
1569                   result[i] = 0.0;
1570                else if (swz == SWIZZLE_ONE)
1571                   result[i] = 1.0;
1572                else {
1573                   ASSERT(swz >= 0);
1574                   ASSERT(swz <= 3);
1575                   result[i] = src[swz];
1576                }
1577                if (source->Negate & (1 << i))
1578                   result[i] = -result[i];
1579             }
1580             store_vector4(inst, machine, result);
1581          }
1582          break;
1583       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1584          /* Simple texel lookup */
1585          {
1586             GLfloat texcoord[4], color[4];
1587             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1588
1589             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1590
1591             if (DEBUG_PROG) {
1592                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1593                       color[0], color[1], color[2], color[3],
1594                       inst->TexSrcUnit,
1595                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1596             }
1597             store_vector4(inst, machine, color);
1598          }
1599          break;
1600       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1601          /* Texel lookup with LOD bias */
1602          {
1603             GLfloat texcoord[4], color[4], lodBias;
1604
1605             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1606
1607             /* texcoord[3] is the bias to add to lambda */
1608             lodBias = texcoord[3];
1609
1610             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1611
1612             store_vector4(inst, machine, color);
1613          }
1614          break;
1615       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1616          /* Texture lookup w/ partial derivatives for LOD */
1617          {
1618             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1619             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1620             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1621             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1622             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1623                                      0.0, /* lodBias */
1624                                      inst->TexSrcUnit, color);
1625             store_vector4(inst, machine, color);
1626          }
1627          break;
1628       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1629          /* Texture lookup w/ projective divide */
1630          {
1631             GLfloat texcoord[4], color[4];
1632
1633             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1634             /* Not so sure about this test - if texcoord[3] is
1635              * zero, we'd probably be fine except for an ASSERT in
1636              * IROUND_POS() which gets triggered by the inf values created.
1637              */
1638             if (texcoord[3] != 0.0) {
1639                texcoord[0] /= texcoord[3];
1640                texcoord[1] /= texcoord[3];
1641                texcoord[2] /= texcoord[3];
1642             }
1643
1644             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1645
1646             store_vector4(inst, machine, color);
1647          }
1648          break;
1649       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1650          /* Texture lookup w/ projective divide, as above, but do not
1651           * do the divide by w if sampling from a cube map.
1652           */
1653          {
1654             GLfloat texcoord[4], color[4];
1655
1656             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1657             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1658                 texcoord[3] != 0.0) {
1659                texcoord[0] /= texcoord[3];
1660                texcoord[1] /= texcoord[3];
1661                texcoord[2] /= texcoord[3];
1662             }
1663
1664             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1665
1666             store_vector4(inst, machine, color);
1667          }
1668          break;
1669       case OPCODE_TRUNC:       /* truncate toward zero */
1670          {
1671             GLfloat a[4], result[4];
1672             fetch_vector4(&inst->SrcReg[0], machine, a);
1673             result[0] = (GLfloat) (GLint) a[0];
1674             result[1] = (GLfloat) (GLint) a[1];
1675             result[2] = (GLfloat) (GLint) a[2];
1676             result[3] = (GLfloat) (GLint) a[3];
1677             store_vector4(inst, machine, result);
1678          }
1679          break;
1680       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1681          {
1682             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1683             GLfloat result[4];
1684             GLushort hx, hy;
1685             hx = raw & 0xffff;
1686             hy = raw >> 16;
1687             result[0] = result[2] = _mesa_half_to_float(hx);
1688             result[1] = result[3] = _mesa_half_to_float(hy);
1689             store_vector4(inst, machine, result);
1690          }
1691          break;
1692       case OPCODE_UP2US:       /* unpack two GLushorts */
1693          {
1694             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1695             GLfloat result[4];
1696             GLushort usx, usy;
1697             usx = raw & 0xffff;
1698             usy = raw >> 16;
1699             result[0] = result[2] = usx * (1.0f / 65535.0f);
1700             result[1] = result[3] = usy * (1.0f / 65535.0f);
1701             store_vector4(inst, machine, result);
1702          }
1703          break;
1704       case OPCODE_UP4B:        /* unpack four GLbytes */
1705          {
1706             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1707             GLfloat result[4];
1708             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1709             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1710             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1711             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1712             store_vector4(inst, machine, result);
1713          }
1714          break;
1715       case OPCODE_UP4UB:       /* unpack four GLubytes */
1716          {
1717             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1718             GLfloat result[4];
1719             result[0] = ((raw >> 0) & 0xff) / 255.0F;
1720             result[1] = ((raw >> 8) & 0xff) / 255.0F;
1721             result[2] = ((raw >> 16) & 0xff) / 255.0F;
1722             result[3] = ((raw >> 24) & 0xff) / 255.0F;
1723             store_vector4(inst, machine, result);
1724          }
1725          break;
1726       case OPCODE_XOR:         /* bitwise XOR */
1727          {
1728             GLuint a[4], b[4], result[4];
1729             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1730             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1731             result[0] = a[0] ^ b[0];
1732             result[1] = a[1] ^ b[1];
1733             result[2] = a[2] ^ b[2];
1734             result[3] = a[3] ^ b[3];
1735             store_vector4ui(inst, machine, result);
1736          }
1737          break;
1738       case OPCODE_XPD:         /* cross product */
1739          {
1740             GLfloat a[4], b[4], result[4];
1741             fetch_vector4(&inst->SrcReg[0], machine, a);
1742             fetch_vector4(&inst->SrcReg[1], machine, b);
1743             result[0] = a[1] * b[2] - a[2] * b[1];
1744             result[1] = a[2] * b[0] - a[0] * b[2];
1745             result[2] = a[0] * b[1] - a[1] * b[0];
1746             result[3] = 1.0;
1747             store_vector4(inst, machine, result);
1748             if (DEBUG_PROG) {
1749                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1750                       result[0], result[1], result[2], result[3],
1751                       a[0], a[1], a[2], b[0], b[1], b[2]);
1752             }
1753          }
1754          break;
1755       case OPCODE_X2D:         /* 2-D matrix transform */
1756          {
1757             GLfloat a[4], b[4], c[4], result[4];
1758             fetch_vector4(&inst->SrcReg[0], machine, a);
1759             fetch_vector4(&inst->SrcReg[1], machine, b);
1760             fetch_vector4(&inst->SrcReg[2], machine, c);
1761             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1762             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1763             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1764             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1765             store_vector4(inst, machine, result);
1766          }
1767          break;
1768       case OPCODE_PRINT:
1769          {
1770             if (inst->SrcReg[0].File != -1) {
1771                GLfloat a[4];
1772                fetch_vector4(&inst->SrcReg[0], machine, a);
1773                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1774                             a[0], a[1], a[2], a[3]);
1775             }
1776             else {
1777                printf("%s\n", (const char *) inst->Data);
1778             }
1779          }
1780          break;
1781       case OPCODE_END:
1782          return GL_TRUE;
1783       default:
1784          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1785                        inst->Opcode);
1786          return GL_TRUE;        /* return value doesn't matter */
1787       }
1788
1789       numExec++;
1790       if (numExec > maxExec) {
1791          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1792          return GL_TRUE;
1793       }
1794
1795    } /* for pc */
1796
1797    return GL_TRUE;
1798 }