src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "prog_execute.h"
  42 #include "prog_instruction.h"
  43 #include "prog_parameter.h"
  44 #include "prog_print.h"
  45 #include "prog_noise.h"
  46
  47
  48 /* debug predicate */
  49 #define DEBUG_PROG 0
  50
  51
  52 /**
  53  * Set x to positive or negative infinity.
  54  */
  55 #if defined(USE_IEEE) || defined(_WIN32)
  56 #define SET_POS_INFINITY(x)                  \
  57    do {                                      \
  58          fi_type fi;                         \
  59          fi.i = 0x7F800000;                  \
  60          x = fi.f;                           \
  61    } while (0)
  62 #define SET_NEG_INFINITY(x)                  \
  63    do {                                      \
  64          fi_type fi;                         \
  65          fi.i = 0xFF800000;                  \
  66          x = fi.f;                           \
  67    } while (0)
  68 #elif defined(VMS)
  69 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  70 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  71 #else
  72 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  73 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  74 #endif
  75
  76 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  77
  78
  79 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  80
  81
  82
  83 /**
  84  * Return a pointer to the 4-element float vector specified by the given
  85  * source register.
  86  */
  87 static INLINE const GLfloat *
  88 get_src_register_pointer(const struct prog_src_register *source,
  89                          const struct gl_program_machine *machine)
  90 {
  91    const struct gl_program *prog = machine->CurProgram;
  92    GLint reg = source->Index;
  93
  94    if (source->RelAddr) {
  95       /* add address register value to src index/offset */
  96       reg += machine->AddressReg[0][0];
  97       if (reg < 0) {
  98          return ZeroVec;
  99       }
 100    }
 101
 102    switch (source->File) {
 103    case PROGRAM_TEMPORARY:
 104       if (reg >= MAX_PROGRAM_TEMPS)
 105          return ZeroVec;
 106       return machine->Temporaries[reg];
 107
 108    case PROGRAM_INPUT:
 109       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 110          if (reg >= VERT_ATTRIB_MAX)
 111             return ZeroVec;
 112          return machine->VertAttribs[reg];
 113       }
 114       else {
 115          if (reg >= FRAG_ATTRIB_MAX)
 116             return ZeroVec;
 117          return machine->Attribs[reg][machine->CurElement];
 118       }
 119
 120    case PROGRAM_OUTPUT:
 121       if (reg >= MAX_PROGRAM_OUTPUTS)
 122          return ZeroVec;
 123       return machine->Outputs[reg];
 124
 125    case PROGRAM_LOCAL_PARAM:
 126       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 127          return ZeroVec;
 128       return machine->CurProgram->LocalParams[reg];
 129
 130    case PROGRAM_ENV_PARAM:
 131       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 132          return ZeroVec;
 133       return machine->EnvParams[reg];
 134
 135    case PROGRAM_STATE_VAR:
 136       /* Fallthrough */
 137    case PROGRAM_CONSTANT:
 138       /* Fallthrough */
 139    case PROGRAM_UNIFORM:
 140       /* Fallthrough */
 141    case PROGRAM_NAMED_PARAM:
 142       if (reg >= (GLint) prog->Parameters->NumParameters)
 143          return ZeroVec;
 144       return prog->Parameters->ParameterValues[reg];
 145
 146    default:
 147       _mesa_problem(NULL,
 148          "Invalid src register file %d in get_src_register_pointer()",
 149          source->File);
 150       return NULL;
 151    }
 152 }
 153
 154
 155 /**
 156  * Return a pointer to the 4-element float vector specified by the given
 157  * destination register.
 158  */
 159 static INLINE GLfloat *
 160 get_dst_register_pointer(const struct prog_dst_register *dest,
 161                          struct gl_program_machine *machine)
 162 {
 163    static GLfloat dummyReg[4];
 164    GLint reg = dest->Index;
 165
 166    if (dest->RelAddr) {
 167       /* add address register value to src index/offset */
 168       reg += machine->AddressReg[0][0];
 169       if (reg < 0) {
 170          return dummyReg;
 171       }
 172    }
 173
 174    switch (dest->File) {
 175    case PROGRAM_TEMPORARY:
 176       if (reg >= MAX_PROGRAM_TEMPS)
 177          return dummyReg;
 178       return machine->Temporaries[reg];
 179
 180    case PROGRAM_OUTPUT:
 181       if (reg >= MAX_PROGRAM_OUTPUTS)
 182          return dummyReg;
 183       return machine->Outputs[reg];
 184
 185    case PROGRAM_WRITE_ONLY:
 186       return dummyReg;
 187
 188    default:
 189       _mesa_problem(NULL,
 190          "Invalid dest register file %d in get_dst_register_pointer()",
 191          dest->File);
 192       return NULL;
 193    }
 194 }
 195
 196
 197
 198 /**
 199  * Fetch a 4-element float vector from the given source register.
 200  * Apply swizzling and negating as needed.
 201  */
 202 static void
 203 fetch_vector4(const struct prog_src_register *source,
 204               const struct gl_program_machine *machine, GLfloat result[4])
 205 {
 206    const GLfloat *src = get_src_register_pointer(source, machine);
 207    ASSERT(src);
 208
 209    if (source->Swizzle == SWIZZLE_NOOP) {
 210       /* no swizzling */
 211       COPY_4V(result, src);
 212    }
 213    else {
 214       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 215       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 216       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 217       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 218       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 219       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 220       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 221       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 222    }
 223
 224    if (source->Abs) {
 225       result[0] = FABSF(result[0]);
 226       result[1] = FABSF(result[1]);
 227       result[2] = FABSF(result[2]);
 228       result[3] = FABSF(result[3]);
 229    }
 230    if (source->Negate) {
 231       ASSERT(source->Negate == NEGATE_XYZW);
 232       result[0] = -result[0];
 233       result[1] = -result[1];
 234       result[2] = -result[2];
 235       result[3] = -result[3];
 236    }
 237
 238 #ifdef NAN_CHECK
 239    assert(!IS_INF_OR_NAN(result[0]));
 240    assert(!IS_INF_OR_NAN(result[0]));
 241    assert(!IS_INF_OR_NAN(result[0]));
 242    assert(!IS_INF_OR_NAN(result[0]));
 243 #endif
 244 }
 245
 246
 247 /**
 248  * Fetch a 4-element uint vector from the given source register.
 249  * Apply swizzling but not negation/abs.
 250  */
 251 static void
 252 fetch_vector4ui(const struct prog_src_register *source,
 253                 const struct gl_program_machine *machine, GLuint result[4])
 254 {
 255    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 256    ASSERT(src);
 257
 258    if (source->Swizzle == SWIZZLE_NOOP) {
 259       /* no swizzling */
 260       COPY_4V(result, src);
 261    }
 262    else {
 263       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 264       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 265       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 266       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 267       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 268       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 269       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 270       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 271    }
 272
 273    /* Note: no Negate or Abs here */
 274 }
 275
 276
 277
 278 /**
 279  * Fetch the derivative with respect to X or Y for the given register.
 280  * XXX this currently only works for fragment program input attribs.
 281  */
 282 static void
 283 fetch_vector4_deriv(GLcontext * ctx,
 284                     const struct prog_src_register *source,
 285                     const struct gl_program_machine *machine,
 286                     char xOrY, GLfloat result[4])
 287 {
 288    if (source->File == PROGRAM_INPUT &&
 289        source->Index < (GLint) machine->NumDeriv) {
 290       const GLint col = machine->CurElement;
 291       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 292       const GLfloat invQ = 1.0f / w;
 293       GLfloat deriv[4];
 294
 295       if (xOrY == 'X') {
 296          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 297          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 298          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 299          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 300       }
 301       else {
 302          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 303          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 304          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 305          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 306       }
 307
 308       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 309       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 310       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 311       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 312
 313       if (source->Abs) {
 314          result[0] = FABSF(result[0]);
 315          result[1] = FABSF(result[1]);
 316          result[2] = FABSF(result[2]);
 317          result[3] = FABSF(result[3]);
 318       }
 319       if (source->Negate) {
 320          ASSERT(source->Negate == NEGATE_XYZW);
 321          result[0] = -result[0];
 322          result[1] = -result[1];
 323          result[2] = -result[2];
 324          result[3] = -result[3];
 325       }
 326    }
 327    else {
 328       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 329    }
 330 }
 331
 332
 333 /**
 334  * As above, but only return result[0] element.
 335  */
 336 static void
 337 fetch_vector1(const struct prog_src_register *source,
 338               const struct gl_program_machine *machine, GLfloat result[4])
 339 {
 340    const GLfloat *src = get_src_register_pointer(source, machine);
 341    ASSERT(src);
 342
 343    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 344
 345    if (source->Abs) {
 346       result[0] = FABSF(result[0]);
 347    }
 348    if (source->Negate) {
 349       result[0] = -result[0];
 350    }
 351 }
 352
 353
 354 static GLuint
 355 fetch_vector1ui(const struct prog_src_register *source,
 356                 const struct gl_program_machine *machine)
 357 {
 358    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 359    GLuint result;
 360
 361    ASSERT(src);
 362
 363    result = src[GET_SWZ(source->Swizzle, 0)];
 364
 365    if (source->Abs) {
 366       result = FABSF(result);
 367    }
 368    if (source->Negate) {
 369       result = -result;
 370    }
 371
 372    return result;
 373 }
 374
 375
 376 /**
 377  * Fetch texel from texture.  Use partial derivatives when possible.
 378  */
 379 static INLINE void
 380 fetch_texel(GLcontext *ctx,
 381             const struct gl_program_machine *machine,
 382             const struct prog_instruction *inst,
 383             const GLfloat texcoord[4], GLfloat lodBias,
 384             GLfloat color[4])
 385 {
 386    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 387
 388    /* Note: we only have the right derivatives for fragment input attribs.
 389     */
 390    if (machine->NumDeriv > 0 &&
 391        inst->SrcReg[0].File == PROGRAM_INPUT &&
 392        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 393       /* simple texture fetch for which we should have derivatives */
 394       GLuint attr = inst->SrcReg[0].Index;
 395       machine->FetchTexelDeriv(ctx, texcoord,
 396                                machine->DerivX[attr],
 397                                machine->DerivY[attr],
 398                                lodBias, unit, color);
 399    }
 400    else {
 401       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 402    }
 403 }
 404
 405
 406 /**
 407  * Test value against zero and return GT, LT, EQ or UN if NaN.
 408  */
 409 static INLINE GLuint
 410 generate_cc(float value)
 411 {
 412    if (value != value)
 413       return COND_UN;           /* NaN */
 414    if (value > 0.0F)
 415       return COND_GT;
 416    if (value < 0.0F)
 417       return COND_LT;
 418    return COND_EQ;
 419 }
 420
 421
 422 /**
 423  * Test if the ccMaskRule is satisfied by the given condition code.
 424  * Used to mask destination writes according to the current condition code.
 425  */
 426 static INLINE GLboolean
 427 test_cc(GLuint condCode, GLuint ccMaskRule)
 428 {
 429    switch (ccMaskRule) {
 430    case COND_EQ: return (condCode == COND_EQ);
 431    case COND_NE: return (condCode != COND_EQ);
 432    case COND_LT: return (condCode == COND_LT);
 433    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 434    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 435    case COND_GT: return (condCode == COND_GT);
 436    case COND_TR: return GL_TRUE;
 437    case COND_FL: return GL_FALSE;
 438    default:      return GL_TRUE;
 439    }
 440 }
 441
 442
 443 /**
 444  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 445  * or GL_FALSE to indicate result.
 446  */
 447 static INLINE GLboolean
 448 eval_condition(const struct gl_program_machine *machine,
 449                const struct prog_instruction *inst)
 450 {
 451    const GLuint swizzle = inst->DstReg.CondSwizzle;
 452    const GLuint condMask = inst->DstReg.CondMask;
 453    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 454        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 455        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 456        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 457       return GL_TRUE;
 458    }
 459    else {
 460       return GL_FALSE;
 461    }
 462 }
 463
 464
 465
 466 /**
 467  * Store 4 floats into a register.  Observe the instructions saturate and
 468  * set-condition-code flags.
 469  */
 470 static void
 471 store_vector4(const struct prog_instruction *inst,
 472               struct gl_program_machine *machine, const GLfloat value[4])
 473 {
 474    const struct prog_dst_register *dstReg = &(inst->DstReg);
 475    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 476    GLuint writeMask = dstReg->WriteMask;
 477    GLfloat clampedValue[4];
 478    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 479
 480 #if 0
 481    if (value[0] > 1.0e10 ||
 482        IS_INF_OR_NAN(value[0]) ||
 483        IS_INF_OR_NAN(value[1]) ||
 484        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 485       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 486 #endif
 487
 488    if (clamp) {
 489       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 490       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 491       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 492       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 493       value = clampedValue;
 494    }
 495
 496    if (dstReg->CondMask != COND_TR) {
 497       /* condition codes may turn off some writes */
 498       if (writeMask & WRITEMASK_X) {
 499          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 500                       dstReg->CondMask))
 501             writeMask &= ~WRITEMASK_X;
 502       }
 503       if (writeMask & WRITEMASK_Y) {
 504          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 505                       dstReg->CondMask))
 506             writeMask &= ~WRITEMASK_Y;
 507       }
 508       if (writeMask & WRITEMASK_Z) {
 509          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 510                       dstReg->CondMask))
 511             writeMask &= ~WRITEMASK_Z;
 512       }
 513       if (writeMask & WRITEMASK_W) {
 514          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 515                       dstReg->CondMask))
 516             writeMask &= ~WRITEMASK_W;
 517       }
 518    }
 519
 520 #ifdef NAN_CHECK
 521    assert(!IS_INF_OR_NAN(value[0]));
 522    assert(!IS_INF_OR_NAN(value[0]));
 523    assert(!IS_INF_OR_NAN(value[0]));
 524    assert(!IS_INF_OR_NAN(value[0]));
 525 #endif
 526
 527    if (writeMask & WRITEMASK_X)
 528       dst[0] = value[0];
 529    if (writeMask & WRITEMASK_Y)
 530       dst[1] = value[1];
 531    if (writeMask & WRITEMASK_Z)
 532       dst[2] = value[2];
 533    if (writeMask & WRITEMASK_W)
 534       dst[3] = value[3];
 535
 536    if (inst->CondUpdate) {
 537       if (writeMask & WRITEMASK_X)
 538          machine->CondCodes[0] = generate_cc(value[0]);
 539       if (writeMask & WRITEMASK_Y)
 540          machine->CondCodes[1] = generate_cc(value[1]);
 541       if (writeMask & WRITEMASK_Z)
 542          machine->CondCodes[2] = generate_cc(value[2]);
 543       if (writeMask & WRITEMASK_W)
 544          machine->CondCodes[3] = generate_cc(value[3]);
 545 #if DEBUG_PROG
 546       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 547              _mesa_condcode_string(machine->CondCodes[0]),
 548              _mesa_condcode_string(machine->CondCodes[1]),
 549              _mesa_condcode_string(machine->CondCodes[2]),
 550              _mesa_condcode_string(machine->CondCodes[3]));
 551 #endif
 552    }
 553 }
 554
 555
 556 /**
 557  * Store 4 uints into a register.  Observe the set-condition-code flags.
 558  */
 559 static void
 560 store_vector4ui(const struct prog_instruction *inst,
 561                 struct gl_program_machine *machine, const GLuint value[4])
 562 {
 563    const struct prog_dst_register *dstReg = &(inst->DstReg);
 564    GLuint writeMask = dstReg->WriteMask;
 565    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 566
 567    if (dstReg->CondMask != COND_TR) {
 568       /* condition codes may turn off some writes */
 569       if (writeMask & WRITEMASK_X) {
 570          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 571                       dstReg->CondMask))
 572             writeMask &= ~WRITEMASK_X;
 573       }
 574       if (writeMask & WRITEMASK_Y) {
 575          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 576                       dstReg->CondMask))
 577             writeMask &= ~WRITEMASK_Y;
 578       }
 579       if (writeMask & WRITEMASK_Z) {
 580          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 581                       dstReg->CondMask))
 582             writeMask &= ~WRITEMASK_Z;
 583       }
 584       if (writeMask & WRITEMASK_W) {
 585          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 586                       dstReg->CondMask))
 587             writeMask &= ~WRITEMASK_W;
 588       }
 589    }
 590
 591    if (writeMask & WRITEMASK_X)
 592       dst[0] = value[0];
 593    if (writeMask & WRITEMASK_Y)
 594       dst[1] = value[1];
 595    if (writeMask & WRITEMASK_Z)
 596       dst[2] = value[2];
 597    if (writeMask & WRITEMASK_W)
 598       dst[3] = value[3];
 599
 600    if (inst->CondUpdate) {
 601       if (writeMask & WRITEMASK_X)
 602          machine->CondCodes[0] = generate_cc(value[0]);
 603       if (writeMask & WRITEMASK_Y)
 604          machine->CondCodes[1] = generate_cc(value[1]);
 605       if (writeMask & WRITEMASK_Z)
 606          machine->CondCodes[2] = generate_cc(value[2]);
 607       if (writeMask & WRITEMASK_W)
 608          machine->CondCodes[3] = generate_cc(value[3]);
 609 #if DEBUG_PROG
 610       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 611              _mesa_condcode_string(machine->CondCodes[0]),
 612              _mesa_condcode_string(machine->CondCodes[1]),
 613              _mesa_condcode_string(machine->CondCodes[2]),
 614              _mesa_condcode_string(machine->CondCodes[3]));
 615 #endif
 616    }
 617 }
 618
 619
 620
 621 /**
 622  * Execute the given vertex/fragment program.
 623  *
 624  * \param ctx  rendering context
 625  * \param program  the program to execute
 626  * \param machine  machine state (must be initialized)
 627  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 628  */
 629 GLboolean
 630 _mesa_execute_program(GLcontext * ctx,
 631                       const struct gl_program *program,
 632                       struct gl_program_machine *machine)
 633 {
 634    const GLuint numInst = program->NumInstructions;
 635    const GLuint maxExec = 10000;
 636    GLuint pc, numExec = 0;
 637
 638    machine->CurProgram = program;
 639
 640    if (DEBUG_PROG) {
 641       printf("execute program %u --------------------\n", program->Id);
 642    }
 643
 644    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 645       machine->EnvParams = ctx->VertexProgram.Parameters;
 646    }
 647    else {
 648       machine->EnvParams = ctx->FragmentProgram.Parameters;
 649    }
 650
 651    for (pc = 0; pc < numInst; pc++) {
 652       const struct prog_instruction *inst = program->Instructions + pc;
 653
 654       if (DEBUG_PROG) {
 655          _mesa_print_instruction(inst);
 656       }
 657
 658       switch (inst->Opcode) {
 659       case OPCODE_ABS:
 660          {
 661             GLfloat a[4], result[4];
 662             fetch_vector4(&inst->SrcReg[0], machine, a);
 663             result[0] = FABSF(a[0]);
 664             result[1] = FABSF(a[1]);
 665             result[2] = FABSF(a[2]);
 666             result[3] = FABSF(a[3]);
 667             store_vector4(inst, machine, result);
 668          }
 669          break;
 670       case OPCODE_ADD:
 671          {
 672             GLfloat a[4], b[4], result[4];
 673             fetch_vector4(&inst->SrcReg[0], machine, a);
 674             fetch_vector4(&inst->SrcReg[1], machine, b);
 675             result[0] = a[0] + b[0];
 676             result[1] = a[1] + b[1];
 677             result[2] = a[2] + b[2];
 678             result[3] = a[3] + b[3];
 679             store_vector4(inst, machine, result);
 680             if (DEBUG_PROG) {
 681                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 682                       result[0], result[1], result[2], result[3],
 683                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 684             }
 685          }
 686          break;
 687       case OPCODE_AND:     /* bitwise AND */
 688          {
 689             GLuint a[4], b[4], result[4];
 690             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 691             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 692             result[0] = a[0] & b[0];
 693             result[1] = a[1] & b[1];
 694             result[2] = a[2] & b[2];
 695             result[3] = a[3] & b[3];
 696             store_vector4ui(inst, machine, result);
 697          }
 698          break;
 699       case OPCODE_ARL:
 700          {
 701             GLfloat t[4];
 702             fetch_vector4(&inst->SrcReg[0], machine, t);
 703             machine->AddressReg[0][0] = IFLOOR(t[0]);
 704          }
 705          break;
 706       case OPCODE_BGNLOOP:
 707          /* no-op */
 708          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 709                 == OPCODE_ENDLOOP);
 710          break;
 711       case OPCODE_ENDLOOP:
 712          /* subtract 1 here since pc is incremented by for(pc) loop */
 713          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 714                 == OPCODE_BGNLOOP);
 715          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 716          break;
 717       case OPCODE_BGNSUB:      /* begin subroutine */
 718          break;
 719       case OPCODE_ENDSUB:      /* end subroutine */
 720          break;
 721       case OPCODE_BRA:         /* branch (conditional) */
 722          if (eval_condition(machine, inst)) {
 723             /* take branch */
 724             /* Subtract 1 here since we'll do pc++ below */
 725             pc = inst->BranchTarget - 1;
 726          }
 727          break;
 728       case OPCODE_BRK:         /* break out of loop (conditional) */
 729          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 730                 == OPCODE_ENDLOOP);
 731          if (eval_condition(machine, inst)) {
 732             /* break out of loop */
 733             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 734             pc = inst->BranchTarget;
 735          }
 736          break;
 737       case OPCODE_CONT:        /* continue loop (conditional) */
 738          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 739                 == OPCODE_ENDLOOP);
 740          if (eval_condition(machine, inst)) {
 741             /* continue at ENDLOOP */
 742             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 743             pc = inst->BranchTarget - 1;
 744          }
 745          break;
 746       case OPCODE_CAL:         /* Call subroutine (conditional) */
 747          if (eval_condition(machine, inst)) {
 748             /* call the subroutine */
 749             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 750                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 751             }
 752             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 753             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 754             pc = inst->BranchTarget - 1;
 755          }
 756          break;
 757       case OPCODE_CMP:
 758          {
 759             GLfloat a[4], b[4], c[4], result[4];
 760             fetch_vector4(&inst->SrcReg[0], machine, a);
 761             fetch_vector4(&inst->SrcReg[1], machine, b);
 762             fetch_vector4(&inst->SrcReg[2], machine, c);
 763             result[0] = a[0] < 0.0F ? b[0] : c[0];
 764             result[1] = a[1] < 0.0F ? b[1] : c[1];
 765             result[2] = a[2] < 0.0F ? b[2] : c[2];
 766             result[3] = a[3] < 0.0F ? b[3] : c[3];
 767             store_vector4(inst, machine, result);
 768          }
 769          break;
 770       case OPCODE_COS:
 771          {
 772             GLfloat a[4], result[4];
 773             fetch_vector1(&inst->SrcReg[0], machine, a);
 774             result[0] = result[1] = result[2] = result[3]
 775                = (GLfloat) _mesa_cos(a[0]);
 776             store_vector4(inst, machine, result);
 777          }
 778          break;
 779       case OPCODE_DDX:         /* Partial derivative with respect to X */
 780          {
 781             GLfloat result[4];
 782             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 783                                 'X', result);
 784             store_vector4(inst, machine, result);
 785          }
 786          break;
 787       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 788          {
 789             GLfloat result[4];
 790             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 791                                 'Y', result);
 792             store_vector4(inst, machine, result);
 793          }
 794          break;
 795       case OPCODE_DP2:
 796          {
 797             GLfloat a[4], b[4], result[4];
 798             fetch_vector4(&inst->SrcReg[0], machine, a);
 799             fetch_vector4(&inst->SrcReg[1], machine, b);
 800             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 801             store_vector4(inst, machine, result);
 802             if (DEBUG_PROG) {
 803                printf("DP2 %g = (%g %g) . (%g %g)\n",
 804                       result[0], a[0], a[1], b[0], b[1]);
 805             }
 806          }
 807          break;
 808       case OPCODE_DP2A:
 809          {
 810             GLfloat a[4], b[4], c, result[4];
 811             fetch_vector4(&inst->SrcReg[0], machine, a);
 812             fetch_vector4(&inst->SrcReg[1], machine, b);
 813             fetch_vector1(&inst->SrcReg[1], machine, &c);
 814             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 815             store_vector4(inst, machine, result);
 816             if (DEBUG_PROG) {
 817                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 818                       result[0], a[0], a[1], b[0], b[1], c);
 819             }
 820          }
 821          break;
 822       case OPCODE_DP3:
 823          {
 824             GLfloat a[4], b[4], result[4];
 825             fetch_vector4(&inst->SrcReg[0], machine, a);
 826             fetch_vector4(&inst->SrcReg[1], machine, b);
 827             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 828             store_vector4(inst, machine, result);
 829             if (DEBUG_PROG) {
 830                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 831                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 832             }
 833          }
 834          break;
 835       case OPCODE_DP4:
 836          {
 837             GLfloat a[4], b[4], result[4];
 838             fetch_vector4(&inst->SrcReg[0], machine, a);
 839             fetch_vector4(&inst->SrcReg[1], machine, b);
 840             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 841             store_vector4(inst, machine, result);
 842             if (DEBUG_PROG) {
 843                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 844                       result[0], a[0], a[1], a[2], a[3],
 845                       b[0], b[1], b[2], b[3]);
 846             }
 847          }
 848          break;
 849       case OPCODE_DPH:
 850          {
 851             GLfloat a[4], b[4], result[4];
 852             fetch_vector4(&inst->SrcReg[0], machine, a);
 853             fetch_vector4(&inst->SrcReg[1], machine, b);
 854             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 855             store_vector4(inst, machine, result);
 856          }
 857          break;
 858       case OPCODE_DST:         /* Distance vector */
 859          {
 860             GLfloat a[4], b[4], result[4];
 861             fetch_vector4(&inst->SrcReg[0], machine, a);
 862             fetch_vector4(&inst->SrcReg[1], machine, b);
 863             result[0] = 1.0F;
 864             result[1] = a[1] * b[1];
 865             result[2] = a[2];
 866             result[3] = b[3];
 867             store_vector4(inst, machine, result);
 868          }
 869          break;
 870       case OPCODE_EXP:
 871          {
 872             GLfloat t[4], q[4], floor_t0;
 873             fetch_vector1(&inst->SrcReg[0], machine, t);
 874             floor_t0 = FLOORF(t[0]);
 875             if (floor_t0 > FLT_MAX_EXP) {
 876                SET_POS_INFINITY(q[0]);
 877                SET_POS_INFINITY(q[2]);
 878             }
 879             else if (floor_t0 < FLT_MIN_EXP) {
 880                q[0] = 0.0F;
 881                q[2] = 0.0F;
 882             }
 883             else {
 884                q[0] = LDEXPF(1.0, (int) floor_t0);
 885                /* Note: GL_NV_vertex_program expects
 886                 * result.z = result.x * APPX(result.y)
 887                 * We do what the ARB extension says.
 888                 */
 889                q[2] = (GLfloat) _mesa_pow(2.0, t[0]);
 890             }
 891             q[1] = t[0] - floor_t0;
 892             q[3] = 1.0F;
 893             store_vector4( inst, machine, q );
 894          }
 895          break;
 896       case OPCODE_EX2:         /* Exponential base 2 */
 897          {
 898             GLfloat a[4], result[4], val;
 899             fetch_vector1(&inst->SrcReg[0], machine, a);
 900             val = (GLfloat) _mesa_pow(2.0, a[0]);
 901             /*
 902             if (IS_INF_OR_NAN(val))
 903                val = 1.0e10;
 904             */
 905             result[0] = result[1] = result[2] = result[3] = val;
 906             store_vector4(inst, machine, result);
 907          }
 908          break;
 909       case OPCODE_FLR:
 910          {
 911             GLfloat a[4], result[4];
 912             fetch_vector4(&inst->SrcReg[0], machine, a);
 913             result[0] = FLOORF(a[0]);
 914             result[1] = FLOORF(a[1]);
 915             result[2] = FLOORF(a[2]);
 916             result[3] = FLOORF(a[3]);
 917             store_vector4(inst, machine, result);
 918          }
 919          break;
 920       case OPCODE_FRC:
 921          {
 922             GLfloat a[4], result[4];
 923             fetch_vector4(&inst->SrcReg[0], machine, a);
 924             result[0] = a[0] - FLOORF(a[0]);
 925             result[1] = a[1] - FLOORF(a[1]);
 926             result[2] = a[2] - FLOORF(a[2]);
 927             result[3] = a[3] - FLOORF(a[3]);
 928             store_vector4(inst, machine, result);
 929          }
 930          break;
 931       case OPCODE_IF:
 932          {
 933             GLboolean cond;
 934             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 935                    == OPCODE_ELSE ||
 936                    program->Instructions[inst->BranchTarget].Opcode
 937                    == OPCODE_ENDIF);
 938             /* eval condition */
 939             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 940                GLfloat a[4];
 941                fetch_vector1(&inst->SrcReg[0], machine, a);
 942                cond = (a[0] != 0.0);
 943             }
 944             else {
 945                cond = eval_condition(machine, inst);
 946             }
 947             if (DEBUG_PROG) {
 948                printf("IF: %d\n", cond);
 949             }
 950             /* do if/else */
 951             if (cond) {
 952                /* do if-clause (just continue execution) */
 953             }
 954             else {
 955                /* go to the instruction after ELSE or ENDIF */
 956                assert(inst->BranchTarget >= 0);
 957                pc = inst->BranchTarget;
 958             }
 959          }
 960          break;
 961       case OPCODE_ELSE:
 962          /* goto ENDIF */
 963          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 964                 == OPCODE_ENDIF);
 965          assert(inst->BranchTarget >= 0);
 966          pc = inst->BranchTarget;
 967          break;
 968       case OPCODE_ENDIF:
 969          /* nothing */
 970          break;
 971       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 972          if (eval_condition(machine, inst)) {
 973             return GL_FALSE;
 974          }
 975          break;
 976       case OPCODE_KIL:         /* ARB_f_p only */
 977          {
 978             GLfloat a[4];
 979             fetch_vector4(&inst->SrcReg[0], machine, a);
 980             if (DEBUG_PROG) {
 981                printf("KIL if (%g %g %g %g) <= 0.0\n",
 982                       a[0], a[1], a[2], a[3]);
 983             }
 984
 985             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 986                return GL_FALSE;
 987             }
 988          }
 989          break;
 990       case OPCODE_LG2:         /* log base 2 */
 991          {
 992             GLfloat a[4], result[4], val;
 993             fetch_vector1(&inst->SrcReg[0], machine, a);
 994             /* The fast LOG2 macro doesn't meet the precision requirements.
 995              */
 996             if (a[0] == 0.0F) {
 997                val = -FLT_MAX;
 998             }
 999             else {
1000                val = log(a[0]) * 1.442695F;
1001             }
1002             result[0] = result[1] = result[2] = result[3] = val;
1003             store_vector4(inst, machine, result);
1004          }
1005          break;
1006       case OPCODE_LIT:
1007          {
1008             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
1009             GLfloat a[4], result[4];
1010             fetch_vector4(&inst->SrcReg[0], machine, a);
1011             a[0] = MAX2(a[0], 0.0F);
1012             a[1] = MAX2(a[1], 0.0F);
1013             /* XXX ARB version clamps a[3], NV version doesn't */
1014             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1015             result[0] = 1.0F;
1016             result[1] = a[0];
1017             /* XXX we could probably just use pow() here */
1018             if (a[0] > 0.0F) {
1019                if (a[1] == 0.0 && a[3] == 0.0)
1020                   result[2] = 1.0;
1021                else
1022                   result[2] = (GLfloat) _mesa_pow(a[1], a[3]);
1023             }
1024             else {
1025                result[2] = 0.0;
1026             }
1027             result[3] = 1.0F;
1028             store_vector4(inst, machine, result);
1029             if (DEBUG_PROG) {
1030                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1031                       result[0], result[1], result[2], result[3],
1032                       a[0], a[1], a[2], a[3]);
1033             }
1034          }
1035          break;
1036       case OPCODE_LOG:
1037          {
1038             GLfloat t[4], q[4], abs_t0;
1039             fetch_vector1(&inst->SrcReg[0], machine, t);
1040             abs_t0 = FABSF(t[0]);
1041             if (abs_t0 != 0.0F) {
1042                /* Since we really can't handle infinite values on VMS
1043                 * like other OSes we'll use __MAXFLOAT to represent
1044                 * infinity.  This may need some tweaking.
1045                 */
1046 #ifdef VMS
1047                if (abs_t0 == __MAXFLOAT)
1048 #else
1049                if (IS_INF_OR_NAN(abs_t0))
1050 #endif
1051                {
1052                   SET_POS_INFINITY(q[0]);
1053                   q[1] = 1.0F;
1054                   SET_POS_INFINITY(q[2]);
1055                }
1056                else {
1057                   int exponent;
1058                   GLfloat mantissa = FREXPF(t[0], &exponent);
1059                   q[0] = (GLfloat) (exponent - 1);
1060                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1061
1062                   /* The fast LOG2 macro doesn't meet the precision
1063                    * requirements.
1064                    */
1065                   q[2] = (log(t[0]) * 1.442695F);
1066                }
1067             }
1068             else {
1069                SET_NEG_INFINITY(q[0]);
1070                q[1] = 1.0F;
1071                SET_NEG_INFINITY(q[2]);
1072             }
1073             q[3] = 1.0;
1074             store_vector4(inst, machine, q);
1075          }
1076          break;
1077       case OPCODE_LRP:
1078          {
1079             GLfloat a[4], b[4], c[4], result[4];
1080             fetch_vector4(&inst->SrcReg[0], machine, a);
1081             fetch_vector4(&inst->SrcReg[1], machine, b);
1082             fetch_vector4(&inst->SrcReg[2], machine, c);
1083             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1084             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1085             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1086             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1087             store_vector4(inst, machine, result);
1088             if (DEBUG_PROG) {
1089                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1090                       "(%g %g %g %g), (%g %g %g %g)\n",
1091                       result[0], result[1], result[2], result[3],
1092                       a[0], a[1], a[2], a[3],
1093                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1094             }
1095          }
1096          break;
1097       case OPCODE_MAD:
1098          {
1099             GLfloat a[4], b[4], c[4], result[4];
1100             fetch_vector4(&inst->SrcReg[0], machine, a);
1101             fetch_vector4(&inst->SrcReg[1], machine, b);
1102             fetch_vector4(&inst->SrcReg[2], machine, c);
1103             result[0] = a[0] * b[0] + c[0];
1104             result[1] = a[1] * b[1] + c[1];
1105             result[2] = a[2] * b[2] + c[2];
1106             result[3] = a[3] * b[3] + c[3];
1107             store_vector4(inst, machine, result);
1108             if (DEBUG_PROG) {
1109                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1110                       "(%g %g %g %g) + (%g %g %g %g)\n",
1111                       result[0], result[1], result[2], result[3],
1112                       a[0], a[1], a[2], a[3],
1113                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1114             }
1115          }
1116          break;
1117       case OPCODE_MAX:
1118          {
1119             GLfloat a[4], b[4], result[4];
1120             fetch_vector4(&inst->SrcReg[0], machine, a);
1121             fetch_vector4(&inst->SrcReg[1], machine, b);
1122             result[0] = MAX2(a[0], b[0]);
1123             result[1] = MAX2(a[1], b[1]);
1124             result[2] = MAX2(a[2], b[2]);
1125             result[3] = MAX2(a[3], b[3]);
1126             store_vector4(inst, machine, result);
1127             if (DEBUG_PROG) {
1128                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1129                       result[0], result[1], result[2], result[3],
1130                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1131             }
1132          }
1133          break;
1134       case OPCODE_MIN:
1135          {
1136             GLfloat a[4], b[4], result[4];
1137             fetch_vector4(&inst->SrcReg[0], machine, a);
1138             fetch_vector4(&inst->SrcReg[1], machine, b);
1139             result[0] = MIN2(a[0], b[0]);
1140             result[1] = MIN2(a[1], b[1]);
1141             result[2] = MIN2(a[2], b[2]);
1142             result[3] = MIN2(a[3], b[3]);
1143             store_vector4(inst, machine, result);
1144          }
1145          break;
1146       case OPCODE_MOV:
1147          {
1148             GLfloat result[4];
1149             fetch_vector4(&inst->SrcReg[0], machine, result);
1150             store_vector4(inst, machine, result);
1151             if (DEBUG_PROG) {
1152                printf("MOV (%g %g %g %g)\n",
1153                       result[0], result[1], result[2], result[3]);
1154             }
1155          }
1156          break;
1157       case OPCODE_MUL:
1158          {
1159             GLfloat a[4], b[4], result[4];
1160             fetch_vector4(&inst->SrcReg[0], machine, a);
1161             fetch_vector4(&inst->SrcReg[1], machine, b);
1162             result[0] = a[0] * b[0];
1163             result[1] = a[1] * b[1];
1164             result[2] = a[2] * b[2];
1165             result[3] = a[3] * b[3];
1166             store_vector4(inst, machine, result);
1167             if (DEBUG_PROG) {
1168                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1169                       result[0], result[1], result[2], result[3],
1170                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1171             }
1172          }
1173          break;
1174       case OPCODE_NOISE1:
1175          {
1176             GLfloat a[4], result[4];
1177             fetch_vector1(&inst->SrcReg[0], machine, a);
1178             result[0] =
1179                result[1] =
1180                result[2] =
1181                result[3] = _mesa_noise1(a[0]);
1182             store_vector4(inst, machine, result);
1183          }
1184          break;
1185       case OPCODE_NOISE2:
1186          {
1187             GLfloat a[4], result[4];
1188             fetch_vector4(&inst->SrcReg[0], machine, a);
1189             result[0] =
1190                result[1] =
1191                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1192             store_vector4(inst, machine, result);
1193          }
1194          break;
1195       case OPCODE_NOISE3:
1196          {
1197             GLfloat a[4], result[4];
1198             fetch_vector4(&inst->SrcReg[0], machine, a);
1199             result[0] =
1200                result[1] =
1201                result[2] =
1202                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1203             store_vector4(inst, machine, result);
1204          }
1205          break;
1206       case OPCODE_NOISE4:
1207          {
1208             GLfloat a[4], result[4];
1209             fetch_vector4(&inst->SrcReg[0], machine, a);
1210             result[0] =
1211                result[1] =
1212                result[2] =
1213                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1214             store_vector4(inst, machine, result);
1215          }
1216          break;
1217       case OPCODE_NOP:
1218          break;
1219       case OPCODE_NOT:         /* bitwise NOT */
1220          {
1221             GLuint a[4], result[4];
1222             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1223             result[0] = ~a[0];
1224             result[1] = ~a[1];
1225             result[2] = ~a[2];
1226             result[3] = ~a[3];
1227             store_vector4ui(inst, machine, result);
1228          }
1229          break;
1230       case OPCODE_NRM3:        /* 3-component normalization */
1231          {
1232             GLfloat a[4], result[4];
1233             GLfloat tmp;
1234             fetch_vector4(&inst->SrcReg[0], machine, a);
1235             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1236             if (tmp != 0.0F)
1237                tmp = INV_SQRTF(tmp);
1238             result[0] = tmp * a[0];
1239             result[1] = tmp * a[1];
1240             result[2] = tmp * a[2];
1241             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1242             store_vector4(inst, machine, result);
1243          }
1244          break;
1245       case OPCODE_NRM4:        /* 4-component normalization */
1246          {
1247             GLfloat a[4], result[4];
1248             GLfloat tmp;
1249             fetch_vector4(&inst->SrcReg[0], machine, a);
1250             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1251             if (tmp != 0.0F)
1252                tmp = INV_SQRTF(tmp);
1253             result[0] = tmp * a[0];
1254             result[1] = tmp * a[1];
1255             result[2] = tmp * a[2];
1256             result[3] = tmp * a[3];
1257             store_vector4(inst, machine, result);
1258          }
1259          break;
1260       case OPCODE_OR:          /* bitwise OR */
1261          {
1262             GLuint a[4], b[4], result[4];
1263             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1264             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1265             result[0] = a[0] | b[0];
1266             result[1] = a[1] | b[1];
1267             result[2] = a[2] | b[2];
1268             result[3] = a[3] | b[3];
1269             store_vector4ui(inst, machine, result);
1270          }
1271          break;
1272       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1273          {
1274             GLfloat a[4];
1275             GLuint result[4];
1276             GLhalfNV hx, hy;
1277             fetch_vector4(&inst->SrcReg[0], machine, a);
1278             hx = _mesa_float_to_half(a[0]);
1279             hy = _mesa_float_to_half(a[1]);
1280             result[0] =
1281             result[1] =
1282             result[2] =
1283             result[3] = hx | (hy << 16);
1284             store_vector4ui(inst, machine, result);
1285          }
1286          break;
1287       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1288          {
1289             GLfloat a[4];
1290             GLuint result[4], usx, usy;
1291             fetch_vector4(&inst->SrcReg[0], machine, a);
1292             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1293             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1294             usx = IROUND(a[0] * 65535.0F);
1295             usy = IROUND(a[1] * 65535.0F);
1296             result[0] =
1297             result[1] =
1298             result[2] =
1299             result[3] = usx | (usy << 16);
1300             store_vector4ui(inst, machine, result);
1301          }
1302          break;
1303       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1304          {
1305             GLfloat a[4];
1306             GLuint result[4], ubx, uby, ubz, ubw;
1307             fetch_vector4(&inst->SrcReg[0], machine, a);
1308             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1309             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1310             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1311             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1312             ubx = IROUND(127.0F * a[0] + 128.0F);
1313             uby = IROUND(127.0F * a[1] + 128.0F);
1314             ubz = IROUND(127.0F * a[2] + 128.0F);
1315             ubw = IROUND(127.0F * a[3] + 128.0F);
1316             result[0] =
1317             result[1] =
1318             result[2] =
1319             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1320             store_vector4ui(inst, machine, result);
1321          }
1322          break;
1323       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1324          {
1325             GLfloat a[4];
1326             GLuint result[4], ubx, uby, ubz, ubw;
1327             fetch_vector4(&inst->SrcReg[0], machine, a);
1328             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1329             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1330             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1331             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1332             ubx = IROUND(255.0F * a[0]);
1333             uby = IROUND(255.0F * a[1]);
1334             ubz = IROUND(255.0F * a[2]);
1335             ubw = IROUND(255.0F * a[3]);
1336             result[0] =
1337             result[1] =
1338             result[2] =
1339             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1340             store_vector4ui(inst, machine, result);
1341          }
1342          break;
1343       case OPCODE_POW:
1344          {
1345             GLfloat a[4], b[4], result[4];
1346             fetch_vector1(&inst->SrcReg[0], machine, a);
1347             fetch_vector1(&inst->SrcReg[1], machine, b);
1348             result[0] = result[1] = result[2] = result[3]
1349                = (GLfloat) _mesa_pow(a[0], b[0]);
1350             store_vector4(inst, machine, result);
1351          }
1352          break;
1353       case OPCODE_RCP:
1354          {
1355             GLfloat a[4], result[4];
1356             fetch_vector1(&inst->SrcReg[0], machine, a);
1357             if (DEBUG_PROG) {
1358                if (a[0] == 0)
1359                   printf("RCP(0)\n");
1360                else if (IS_INF_OR_NAN(a[0]))
1361                   printf("RCP(inf)\n");
1362             }
1363             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1364             store_vector4(inst, machine, result);
1365          }
1366          break;
1367       case OPCODE_RET:         /* return from subroutine (conditional) */
1368          if (eval_condition(machine, inst)) {
1369             if (machine->StackDepth == 0) {
1370                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1371             }
1372             /* subtract one because of pc++ in the for loop */
1373             pc = machine->CallStack[--machine->StackDepth] - 1;
1374          }
1375          break;
1376       case OPCODE_RFL:         /* reflection vector */
1377          {
1378             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1379             fetch_vector4(&inst->SrcReg[0], machine, axis);
1380             fetch_vector4(&inst->SrcReg[1], machine, dir);
1381             tmpW = DOT3(axis, axis);
1382             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1383             result[0] = tmpX * axis[0] - dir[0];
1384             result[1] = tmpX * axis[1] - dir[1];
1385             result[2] = tmpX * axis[2] - dir[2];
1386             /* result[3] is never written! XXX enforce in parser! */
1387             store_vector4(inst, machine, result);
1388          }
1389          break;
1390       case OPCODE_RSQ:         /* 1 / sqrt() */
1391          {
1392             GLfloat a[4], result[4];
1393             fetch_vector1(&inst->SrcReg[0], machine, a);
1394             a[0] = FABSF(a[0]);
1395             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1396             store_vector4(inst, machine, result);
1397             if (DEBUG_PROG) {
1398                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1399             }
1400          }
1401          break;
1402       case OPCODE_SCS:         /* sine and cos */
1403          {
1404             GLfloat a[4], result[4];
1405             fetch_vector1(&inst->SrcReg[0], machine, a);
1406             result[0] = (GLfloat) _mesa_cos(a[0]);
1407             result[1] = (GLfloat) _mesa_sin(a[0]);
1408             result[2] = 0.0;    /* undefined! */
1409             result[3] = 0.0;    /* undefined! */
1410             store_vector4(inst, machine, result);
1411          }
1412          break;
1413       case OPCODE_SEQ:         /* set on equal */
1414          {
1415             GLfloat a[4], b[4], result[4];
1416             fetch_vector4(&inst->SrcReg[0], machine, a);
1417             fetch_vector4(&inst->SrcReg[1], machine, b);
1418             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1419             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1420             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1421             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1422             store_vector4(inst, machine, result);
1423             if (DEBUG_PROG) {
1424                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1425                       result[0], result[1], result[2], result[3],
1426                       a[0], a[1], a[2], a[3],
1427                       b[0], b[1], b[2], b[3]);
1428             }
1429          }
1430          break;
1431       case OPCODE_SFL:         /* set false, operands ignored */
1432          {
1433             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1434             store_vector4(inst, machine, result);
1435          }
1436          break;
1437       case OPCODE_SGE:         /* set on greater or equal */
1438          {
1439             GLfloat a[4], b[4], result[4];
1440             fetch_vector4(&inst->SrcReg[0], machine, a);
1441             fetch_vector4(&inst->SrcReg[1], machine, b);
1442             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1443             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1444             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1445             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1446             store_vector4(inst, machine, result);
1447             if (DEBUG_PROG) {
1448                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1449                       result[0], result[1], result[2], result[3],
1450                       a[0], a[1], a[2], a[3],
1451                       b[0], b[1], b[2], b[3]);
1452             }
1453          }
1454          break;
1455       case OPCODE_SGT:         /* set on greater */
1456          {
1457             GLfloat a[4], b[4], result[4];
1458             fetch_vector4(&inst->SrcReg[0], machine, a);
1459             fetch_vector4(&inst->SrcReg[1], machine, b);
1460             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1461             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1462             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1463             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1464             store_vector4(inst, machine, result);
1465             if (DEBUG_PROG) {
1466                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1467                       result[0], result[1], result[2], result[3],
1468                       a[0], a[1], a[2], a[3],
1469                       b[0], b[1], b[2], b[3]);
1470             }
1471          }
1472          break;
1473       case OPCODE_SIN:
1474          {
1475             GLfloat a[4], result[4];
1476             fetch_vector1(&inst->SrcReg[0], machine, a);
1477             result[0] = result[1] = result[2] = result[3]
1478                = (GLfloat) _mesa_sin(a[0]);
1479             store_vector4(inst, machine, result);
1480          }
1481          break;
1482       case OPCODE_SLE:         /* set on less or equal */
1483          {
1484             GLfloat a[4], b[4], result[4];
1485             fetch_vector4(&inst->SrcReg[0], machine, a);
1486             fetch_vector4(&inst->SrcReg[1], machine, b);
1487             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1488             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1489             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1490             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1491             store_vector4(inst, machine, result);
1492             if (DEBUG_PROG) {
1493                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1494                       result[0], result[1], result[2], result[3],
1495                       a[0], a[1], a[2], a[3],
1496                       b[0], b[1], b[2], b[3]);
1497             }
1498          }
1499          break;
1500       case OPCODE_SLT:         /* set on less */
1501          {
1502             GLfloat a[4], b[4], result[4];
1503             fetch_vector4(&inst->SrcReg[0], machine, a);
1504             fetch_vector4(&inst->SrcReg[1], machine, b);
1505             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1506             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1507             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1508             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1509             store_vector4(inst, machine, result);
1510             if (DEBUG_PROG) {
1511                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1512                       result[0], result[1], result[2], result[3],
1513                       a[0], a[1], a[2], a[3],
1514                       b[0], b[1], b[2], b[3]);
1515             }
1516          }
1517          break;
1518       case OPCODE_SNE:         /* set on not equal */
1519          {
1520             GLfloat a[4], b[4], result[4];
1521             fetch_vector4(&inst->SrcReg[0], machine, a);
1522             fetch_vector4(&inst->SrcReg[1], machine, b);
1523             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1524             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1525             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1526             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1527             store_vector4(inst, machine, result);
1528             if (DEBUG_PROG) {
1529                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1530                       result[0], result[1], result[2], result[3],
1531                       a[0], a[1], a[2], a[3],
1532                       b[0], b[1], b[2], b[3]);
1533             }
1534          }
1535          break;
1536       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1537          {
1538             GLfloat a[4], result[4];
1539             fetch_vector4(&inst->SrcReg[0], machine, a);
1540             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1541             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1542             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1543             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1544             store_vector4(inst, machine, result);
1545          }
1546          break;
1547       case OPCODE_STR:         /* set true, operands ignored */
1548          {
1549             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1550             store_vector4(inst, machine, result);
1551          }
1552          break;
1553       case OPCODE_SUB:
1554          {
1555             GLfloat a[4], b[4], result[4];
1556             fetch_vector4(&inst->SrcReg[0], machine, a);
1557             fetch_vector4(&inst->SrcReg[1], machine, b);
1558             result[0] = a[0] - b[0];
1559             result[1] = a[1] - b[1];
1560             result[2] = a[2] - b[2];
1561             result[3] = a[3] - b[3];
1562             store_vector4(inst, machine, result);
1563             if (DEBUG_PROG) {
1564                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1565                       result[0], result[1], result[2], result[3],
1566                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1567             }
1568          }
1569          break;
1570       case OPCODE_SWZ:         /* extended swizzle */
1571          {
1572             const struct prog_src_register *source = &inst->SrcReg[0];
1573             const GLfloat *src = get_src_register_pointer(source, machine);
1574             GLfloat result[4];
1575             GLuint i;
1576             for (i = 0; i < 4; i++) {
1577                const GLuint swz = GET_SWZ(source->Swizzle, i);
1578                if (swz == SWIZZLE_ZERO)
1579                   result[i] = 0.0;
1580                else if (swz == SWIZZLE_ONE)
1581                   result[i] = 1.0;
1582                else {
1583                   ASSERT(swz >= 0);
1584                   ASSERT(swz <= 3);
1585                   result[i] = src[swz];
1586                }
1587                if (source->Negate & (1 << i))
1588                   result[i] = -result[i];
1589             }
1590             store_vector4(inst, machine, result);
1591          }
1592          break;
1593       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1594          /* Simple texel lookup */
1595          {
1596             GLfloat texcoord[4], color[4];
1597             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1598
1599             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1600
1601             if (DEBUG_PROG) {
1602                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1603                       color[0], color[1], color[2], color[3],
1604                       inst->TexSrcUnit,
1605                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1606             }
1607             store_vector4(inst, machine, color);
1608          }
1609          break;
1610       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1611          /* Texel lookup with LOD bias */
1612          {
1613             GLfloat texcoord[4], color[4], lodBias;
1614
1615             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1616
1617             /* texcoord[3] is the bias to add to lambda */
1618             lodBias = texcoord[3];
1619
1620             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1621
1622             store_vector4(inst, machine, color);
1623          }
1624          break;
1625       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1626          /* Texture lookup w/ partial derivatives for LOD */
1627          {
1628             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1629             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1630             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1631             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1632             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1633                                      0.0, /* lodBias */
1634                                      inst->TexSrcUnit, color);
1635             store_vector4(inst, machine, color);
1636          }
1637          break;
1638       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1639          /* Texture lookup w/ projective divide */
1640          {
1641             GLfloat texcoord[4], color[4];
1642
1643             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1644             /* Not so sure about this test - if texcoord[3] is
1645              * zero, we'd probably be fine except for an ASSERT in
1646              * IROUND_POS() which gets triggered by the inf values created.
1647              */
1648             if (texcoord[3] != 0.0) {
1649                texcoord[0] /= texcoord[3];
1650                texcoord[1] /= texcoord[3];
1651                texcoord[2] /= texcoord[3];
1652             }
1653
1654             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1655
1656             store_vector4(inst, machine, color);
1657          }
1658          break;
1659       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1660          /* Texture lookup w/ projective divide, as above, but do not
1661           * do the divide by w if sampling from a cube map.
1662           */
1663          {
1664             GLfloat texcoord[4], color[4];
1665
1666             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1667             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1668                 texcoord[3] != 0.0) {
1669                texcoord[0] /= texcoord[3];
1670                texcoord[1] /= texcoord[3];
1671                texcoord[2] /= texcoord[3];
1672             }
1673
1674             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1675
1676             store_vector4(inst, machine, color);
1677          }
1678          break;
1679       case OPCODE_TRUNC:       /* truncate toward zero */
1680          {
1681             GLfloat a[4], result[4];
1682             fetch_vector4(&inst->SrcReg[0], machine, a);
1683             result[0] = (GLfloat) (GLint) a[0];
1684             result[1] = (GLfloat) (GLint) a[1];
1685             result[2] = (GLfloat) (GLint) a[2];
1686             result[3] = (GLfloat) (GLint) a[3];
1687             store_vector4(inst, machine, result);
1688          }
1689          break;
1690       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1691          {
1692             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1693             GLfloat result[4];
1694             GLushort hx, hy;
1695             hx = raw & 0xffff;
1696             hy = raw >> 16;
1697             result[0] = result[2] = _mesa_half_to_float(hx);
1698             result[1] = result[3] = _mesa_half_to_float(hy);
1699             store_vector4(inst, machine, result);
1700          }
1701          break;
1702       case OPCODE_UP2US:       /* unpack two GLushorts */
1703          {
1704             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1705             GLfloat result[4];
1706             GLushort usx, usy;
1707             usx = raw & 0xffff;
1708             usy = raw >> 16;
1709             result[0] = result[2] = usx * (1.0f / 65535.0f);
1710             result[1] = result[3] = usy * (1.0f / 65535.0f);
1711             store_vector4(inst, machine, result);
1712          }
1713          break;
1714       case OPCODE_UP4B:        /* unpack four GLbytes */
1715          {
1716             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1717             GLfloat result[4];
1718             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1719             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1720             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1721             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1722             store_vector4(inst, machine, result);
1723          }
1724          break;
1725       case OPCODE_UP4UB:       /* unpack four GLubytes */
1726          {
1727             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1728             GLfloat result[4];
1729             result[0] = ((raw >> 0) & 0xff) / 255.0F;
1730             result[1] = ((raw >> 8) & 0xff) / 255.0F;
1731             result[2] = ((raw >> 16) & 0xff) / 255.0F;
1732             result[3] = ((raw >> 24) & 0xff) / 255.0F;
1733             store_vector4(inst, machine, result);
1734          }
1735          break;
1736       case OPCODE_XOR:         /* bitwise XOR */
1737          {
1738             GLuint a[4], b[4], result[4];
1739             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1740             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1741             result[0] = a[0] ^ b[0];
1742             result[1] = a[1] ^ b[1];
1743             result[2] = a[2] ^ b[2];
1744             result[3] = a[3] ^ b[3];
1745             store_vector4ui(inst, machine, result);
1746          }
1747          break;
1748       case OPCODE_XPD:         /* cross product */
1749          {
1750             GLfloat a[4], b[4], result[4];
1751             fetch_vector4(&inst->SrcReg[0], machine, a);
1752             fetch_vector4(&inst->SrcReg[1], machine, b);
1753             result[0] = a[1] * b[2] - a[2] * b[1];
1754             result[1] = a[2] * b[0] - a[0] * b[2];
1755             result[2] = a[0] * b[1] - a[1] * b[0];
1756             result[3] = 1.0;
1757             store_vector4(inst, machine, result);
1758             if (DEBUG_PROG) {
1759                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1760                       result[0], result[1], result[2], result[3],
1761                       a[0], a[1], a[2], b[0], b[1], b[2]);
1762             }
1763          }
1764          break;
1765       case OPCODE_X2D:         /* 2-D matrix transform */
1766          {
1767             GLfloat a[4], b[4], c[4], result[4];
1768             fetch_vector4(&inst->SrcReg[0], machine, a);
1769             fetch_vector4(&inst->SrcReg[1], machine, b);
1770             fetch_vector4(&inst->SrcReg[2], machine, c);
1771             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1772             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1773             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1774             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1775             store_vector4(inst, machine, result);
1776          }
1777          break;
1778       case OPCODE_PRINT:
1779          {
1780             if (inst->SrcReg[0].File != -1) {
1781                GLfloat a[4];
1782                fetch_vector4(&inst->SrcReg[0], machine, a);
1783                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1784                             a[0], a[1], a[2], a[3]);
1785             }
1786             else {
1787                _mesa_printf("%s\n", (const char *) inst->Data);
1788             }
1789          }
1790          break;
1791       case OPCODE_END:
1792          return GL_TRUE;
1793       default:
1794          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1795                        inst->Opcode);
1796          return GL_TRUE;        /* return value doesn't matter */
1797       }
1798
1799       numExec++;
1800       if (numExec > maxExec) {
1801          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1802          return GL_TRUE;
1803       }
1804
1805    } /* for pc */
1806
1807    return GL_TRUE;
1808 }