src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.0.3
   4  *
   5  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "program.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "shader/slang/slang_library_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
  58 #define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
  59 #elif defined(VMS)
  60 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  61 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  62 #else
  63 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  64 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  65 #endif
  66
  67 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  68
  69
  70 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  71
  72
  73
  74 /**
  75  * Return a pointer to the 4-element float vector specified by the given
  76  * source register.
  77  */
  78 static INLINE const GLfloat *
  79 get_register_pointer(const struct prog_src_register *source,
  80                      const struct gl_program_machine *machine)
  81 {
  82    if (source->RelAddr) {
  83       const GLint reg = source->Index + machine->AddressReg[0][0];
  84       if (source->File == PROGRAM_ENV_PARAM)
  85          if (reg < 0 || reg >= MAX_PROGRAM_ENV_PARAMS)
  86             return ZeroVec;
  87          else
  88             return machine->EnvParams[reg];
  89       else {
  90          const struct gl_program_parameter_list *params;
  91          ASSERT(source->File == PROGRAM_LOCAL_PARAM ||
  92                 source->File == PROGRAM_CONSTANT ||
  93                 source->File == PROGRAM_STATE_VAR ||
  94                 source->File == PROGRAM_UNIFORM);
  95          params = machine->CurProgram->Parameters;
  96          if (reg < 0 || reg >= (GLint)params->NumParameters)
  97             return ZeroVec;
  98          else
  99             return params->ParameterValues[reg];
 100       }
 101    }
 102
 103    switch (source->File) {
 104    case PROGRAM_TEMPORARY:
 105       ASSERT(source->Index < MAX_PROGRAM_TEMPS);
 106       return machine->Temporaries[source->Index];
 107
 108    case PROGRAM_INPUT:
 109       if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
 110          ASSERT(source->Index < VERT_ATTRIB_MAX);
 111          return machine->VertAttribs[source->Index];
 112       }
 113       else {
 114          ASSERT(source->Index < FRAG_ATTRIB_MAX);
 115          return machine->Attribs[source->Index][machine->CurElement];
 116       }
 117
 118    case PROGRAM_OUTPUT:
 119       ASSERT(source->Index < MAX_PROGRAM_OUTPUTS);
 120       return machine->Outputs[source->Index];
 121
 122    case PROGRAM_LOCAL_PARAM:
 123       ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 124       return machine->CurProgram->LocalParams[source->Index];
 125
 126    case PROGRAM_ENV_PARAM:
 127       ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
 128       return machine->EnvParams[source->Index];
 129
 130    case PROGRAM_STATE_VAR:
 131       /* Fallthrough */
 132    case PROGRAM_CONSTANT:
 133       /* Fallthrough */
 134    case PROGRAM_UNIFORM:
 135       /* Fallthrough */
 136    case PROGRAM_NAMED_PARAM:
 137       ASSERT(source->Index <
 138              (GLint) machine->CurProgram->Parameters->NumParameters);
 139       return machine->CurProgram->Parameters->ParameterValues[source->Index];
 140
 141    default:
 142       _mesa_problem(NULL,
 143                     "Invalid input register file %d in get_register_pointer()",
 144                     source->File);
 145       return NULL;
 146    }
 147 }
 148
 149
 150 #if FEATURE_MESA_program_debug
 151 static struct gl_program_machine *CurrentMachine = NULL;
 152
 153 /**
 154  * For GL_MESA_program_debug.
 155  * Return current value (4*GLfloat) of a program register.
 156  * Called via ctx->Driver.GetProgramRegister().
 157  */
 158 void
 159 _mesa_get_program_register(GLcontext *ctx, enum register_file file,
 160                            GLuint index, GLfloat val[4])
 161 {
 162    if (CurrentMachine) {
 163       struct prog_src_register src;
 164       const GLfloat *reg;
 165       src.File = file;
 166       src.Index = index;
 167       reg = get_register_pointer(&src, CurrentMachine);
 168       COPY_4V(val, reg);
 169    }
 170 }
 171 #endif /* FEATURE_MESA_program_debug */
 172
 173
 174 /**
 175  * Fetch a 4-element float vector from the given source register.
 176  * Apply swizzling and negating as needed.
 177  */
 178 static void
 179 fetch_vector4(const struct prog_src_register *source,
 180               const struct gl_program_machine *machine, GLfloat result[4])
 181 {
 182    const GLfloat *src = get_register_pointer(source, machine);
 183    ASSERT(src);
 184
 185    if (source->Swizzle == SWIZZLE_NOOP) {
 186       /* no swizzling */
 187       COPY_4V(result, src);
 188    }
 189    else {
 190       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 191       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 192       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 193       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 194       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 195       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 196       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 197       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 198    }
 199
 200    if (source->NegateBase) {
 201       result[0] = -result[0];
 202       result[1] = -result[1];
 203       result[2] = -result[2];
 204       result[3] = -result[3];
 205    }
 206    if (source->Abs) {
 207       result[0] = FABSF(result[0]);
 208       result[1] = FABSF(result[1]);
 209       result[2] = FABSF(result[2]);
 210       result[3] = FABSF(result[3]);
 211    }
 212    if (source->NegateAbs) {
 213       result[0] = -result[0];
 214       result[1] = -result[1];
 215       result[2] = -result[2];
 216       result[3] = -result[3];
 217    }
 218 }
 219
 220
 221 /**
 222  * Fetch a 4-element uint vector from the given source register.
 223  * Apply swizzling but not negation/abs.
 224  */
 225 static void
 226 fetch_vector4ui(const struct prog_src_register *source,
 227                 const struct gl_program_machine *machine, GLuint result[4])
 228 {
 229    const GLuint *src = (GLuint *) get_register_pointer(source, machine);
 230    ASSERT(src);
 231
 232    if (source->Swizzle == SWIZZLE_NOOP) {
 233       /* no swizzling */
 234       COPY_4V(result, src);
 235    }
 236    else {
 237       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 238       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 239       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 240       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 241       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 242       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 243       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 244       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 245    }
 246
 247    /* Note: no NegateBase, Abs, NegateAbs here */
 248 }
 249
 250
 251
 252 /**
 253  * Fetch the derivative with respect to X or Y for the given register.
 254  * XXX this currently only works for fragment program input attribs.
 255  */
 256 static void
 257 fetch_vector4_deriv(GLcontext * ctx,
 258                     const struct prog_src_register *source,
 259                     const struct gl_program_machine *machine,
 260                     char xOrY, GLfloat result[4])
 261 {
 262    if (source->File == PROGRAM_INPUT &&
 263        source->Index < (GLint) machine->NumDeriv) {
 264       const GLint col = machine->CurElement;
 265       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 266       const GLfloat invQ = 1.0f / w;
 267       GLfloat deriv[4];
 268
 269       if (xOrY == 'X') {
 270          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 271          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 272          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 273          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 274       }
 275       else {
 276          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 277          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 278          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 279          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 280       }
 281
 282       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 283       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 284       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 285       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 286
 287       if (source->NegateBase) {
 288          result[0] = -result[0];
 289          result[1] = -result[1];
 290          result[2] = -result[2];
 291          result[3] = -result[3];
 292       }
 293       if (source->Abs) {
 294          result[0] = FABSF(result[0]);
 295          result[1] = FABSF(result[1]);
 296          result[2] = FABSF(result[2]);
 297          result[3] = FABSF(result[3]);
 298       }
 299       if (source->NegateAbs) {
 300          result[0] = -result[0];
 301          result[1] = -result[1];
 302          result[2] = -result[2];
 303          result[3] = -result[3];
 304       }
 305    }
 306    else {
 307       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 308    }
 309 }
 310
 311
 312 /**
 313  * As above, but only return result[0] element.
 314  */
 315 static void
 316 fetch_vector1(const struct prog_src_register *source,
 317               const struct gl_program_machine *machine, GLfloat result[4])
 318 {
 319    const GLfloat *src = get_register_pointer(source, machine);
 320    ASSERT(src);
 321
 322    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 323
 324    if (source->NegateBase) {
 325       result[0] = -result[0];
 326    }
 327    if (source->Abs) {
 328       result[0] = FABSF(result[0]);
 329    }
 330    if (source->NegateAbs) {
 331       result[0] = -result[0];
 332    }
 333 }
 334
 335
 336 /**
 337  * Fetch texel from texture.  Use partial derivatives when possible.
 338  */
 339 static INLINE void
 340 fetch_texel(GLcontext *ctx,
 341             const struct gl_program_machine *machine,
 342             const struct prog_instruction *inst,
 343             const GLfloat texcoord[4], GLfloat lodBias,
 344             GLfloat color[4])
 345 {
 346    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 347
 348    /* Note: we only have the right derivatives for fragment input attribs.
 349     */
 350    if (machine->NumDeriv > 0 &&
 351        inst->SrcReg[0].File == PROGRAM_INPUT &&
 352        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 353       /* simple texture fetch for which we should have derivatives */
 354       GLuint attr = inst->SrcReg[0].Index;
 355       machine->FetchTexelDeriv(ctx, texcoord,
 356                                machine->DerivX[attr],
 357                                machine->DerivY[attr],
 358                                lodBias, unit, color);
 359    }
 360    else {
 361       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 362    }
 363 }
 364
 365
 366 /**
 367  * Test value against zero and return GT, LT, EQ or UN if NaN.
 368  */
 369 static INLINE GLuint
 370 generate_cc(float value)
 371 {
 372    if (value != value)
 373       return COND_UN;           /* NaN */
 374    if (value > 0.0F)
 375       return COND_GT;
 376    if (value < 0.0F)
 377       return COND_LT;
 378    return COND_EQ;
 379 }
 380
 381
 382 /**
 383  * Test if the ccMaskRule is satisfied by the given condition code.
 384  * Used to mask destination writes according to the current condition code.
 385  */
 386 static INLINE GLboolean
 387 test_cc(GLuint condCode, GLuint ccMaskRule)
 388 {
 389    switch (ccMaskRule) {
 390    case COND_EQ: return (condCode == COND_EQ);
 391    case COND_NE: return (condCode != COND_EQ);
 392    case COND_LT: return (condCode == COND_LT);
 393    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 394    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 395    case COND_GT: return (condCode == COND_GT);
 396    case COND_TR: return GL_TRUE;
 397    case COND_FL: return GL_FALSE;
 398    default:      return GL_TRUE;
 399    }
 400 }
 401
 402
 403 /**
 404  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 405  * or GL_FALSE to indicate result.
 406  */
 407 static INLINE GLboolean
 408 eval_condition(const struct gl_program_machine *machine,
 409                const struct prog_instruction *inst)
 410 {
 411    const GLuint swizzle = inst->DstReg.CondSwizzle;
 412    const GLuint condMask = inst->DstReg.CondMask;
 413    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 414        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 415        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 416        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 417       return GL_TRUE;
 418    }
 419    else {
 420       return GL_FALSE;
 421    }
 422 }
 423
 424
 425
 426 /**
 427  * Store 4 floats into a register.  Observe the instructions saturate and
 428  * set-condition-code flags.
 429  */
 430 static void
 431 store_vector4(const struct prog_instruction *inst,
 432               struct gl_program_machine *machine, const GLfloat value[4])
 433 {
 434    const struct prog_dst_register *dest = &(inst->DstReg);
 435    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 436    GLfloat *dstReg;
 437    GLfloat dummyReg[4];
 438    GLfloat clampedValue[4];
 439    GLuint writeMask = dest->WriteMask;
 440
 441    switch (dest->File) {
 442    case PROGRAM_OUTPUT:
 443       ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
 444       dstReg = machine->Outputs[dest->Index];
 445       break;
 446    case PROGRAM_TEMPORARY:
 447       ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
 448       dstReg = machine->Temporaries[dest->Index];
 449       break;
 450    case PROGRAM_WRITE_ONLY:
 451       dstReg = dummyReg;
 452       return;
 453    default:
 454       _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 455       return;
 456    }
 457
 458 #if 0
 459    if (value[0] > 1.0e10 ||
 460        IS_INF_OR_NAN(value[0]) ||
 461        IS_INF_OR_NAN(value[1]) ||
 462        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 463       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 464 #endif
 465
 466    if (clamp) {
 467       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 468       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 469       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 470       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 471       value = clampedValue;
 472    }
 473
 474    if (dest->CondMask != COND_TR) {
 475       /* condition codes may turn off some writes */
 476       if (writeMask & WRITEMASK_X) {
 477          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
 478                       dest->CondMask))
 479             writeMask &= ~WRITEMASK_X;
 480       }
 481       if (writeMask & WRITEMASK_Y) {
 482          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
 483                       dest->CondMask))
 484             writeMask &= ~WRITEMASK_Y;
 485       }
 486       if (writeMask & WRITEMASK_Z) {
 487          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
 488                       dest->CondMask))
 489             writeMask &= ~WRITEMASK_Z;
 490       }
 491       if (writeMask & WRITEMASK_W) {
 492          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
 493                       dest->CondMask))
 494             writeMask &= ~WRITEMASK_W;
 495       }
 496    }
 497
 498    if (writeMask & WRITEMASK_X)
 499       dstReg[0] = value[0];
 500    if (writeMask & WRITEMASK_Y)
 501       dstReg[1] = value[1];
 502    if (writeMask & WRITEMASK_Z)
 503       dstReg[2] = value[2];
 504    if (writeMask & WRITEMASK_W)
 505       dstReg[3] = value[3];
 506
 507    if (inst->CondUpdate) {
 508       if (writeMask & WRITEMASK_X)
 509          machine->CondCodes[0] = generate_cc(value[0]);
 510       if (writeMask & WRITEMASK_Y)
 511          machine->CondCodes[1] = generate_cc(value[1]);
 512       if (writeMask & WRITEMASK_Z)
 513          machine->CondCodes[2] = generate_cc(value[2]);
 514       if (writeMask & WRITEMASK_W)
 515          machine->CondCodes[3] = generate_cc(value[3]);
 516 #if DEBUG_PROG
 517       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 518              _mesa_condcode_string(machine->CondCodes[0]),
 519              _mesa_condcode_string(machine->CondCodes[1]),
 520              _mesa_condcode_string(machine->CondCodes[2]),
 521              _mesa_condcode_string(machine->CondCodes[3]));
 522 #endif
 523    }
 524 }
 525
 526
 527 /**
 528  * Store 4 uints into a register.  Observe the set-condition-code flags.
 529  */
 530 static void
 531 store_vector4ui(const struct prog_instruction *inst,
 532                 struct gl_program_machine *machine, const GLuint value[4])
 533 {
 534    const struct prog_dst_register *dest = &(inst->DstReg);
 535    GLuint *dstReg;
 536    GLuint dummyReg[4];
 537    GLuint writeMask = dest->WriteMask;
 538
 539    switch (dest->File) {
 540    case PROGRAM_OUTPUT:
 541       ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
 542       dstReg = (GLuint *) machine->Outputs[dest->Index];
 543       break;
 544    case PROGRAM_TEMPORARY:
 545       ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
 546       dstReg = (GLuint *) machine->Temporaries[dest->Index];
 547       break;
 548    case PROGRAM_WRITE_ONLY:
 549       dstReg = dummyReg;
 550       return;
 551    default:
 552       _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 553       return;
 554    }
 555
 556    if (dest->CondMask != COND_TR) {
 557       /* condition codes may turn off some writes */
 558       if (writeMask & WRITEMASK_X) {
 559          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
 560                       dest->CondMask))
 561             writeMask &= ~WRITEMASK_X;
 562       }
 563       if (writeMask & WRITEMASK_Y) {
 564          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
 565                       dest->CondMask))
 566             writeMask &= ~WRITEMASK_Y;
 567       }
 568       if (writeMask & WRITEMASK_Z) {
 569          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
 570                       dest->CondMask))
 571             writeMask &= ~WRITEMASK_Z;
 572       }
 573       if (writeMask & WRITEMASK_W) {
 574          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
 575                       dest->CondMask))
 576             writeMask &= ~WRITEMASK_W;
 577       }
 578    }
 579
 580    if (writeMask & WRITEMASK_X)
 581       dstReg[0] = value[0];
 582    if (writeMask & WRITEMASK_Y)
 583       dstReg[1] = value[1];
 584    if (writeMask & WRITEMASK_Z)
 585       dstReg[2] = value[2];
 586    if (writeMask & WRITEMASK_W)
 587       dstReg[3] = value[3];
 588
 589    if (inst->CondUpdate) {
 590       if (writeMask & WRITEMASK_X)
 591          machine->CondCodes[0] = generate_cc(value[0]);
 592       if (writeMask & WRITEMASK_Y)
 593          machine->CondCodes[1] = generate_cc(value[1]);
 594       if (writeMask & WRITEMASK_Z)
 595          machine->CondCodes[2] = generate_cc(value[2]);
 596       if (writeMask & WRITEMASK_W)
 597          machine->CondCodes[3] = generate_cc(value[3]);
 598 #if DEBUG_PROG
 599       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 600              _mesa_condcode_string(machine->CondCodes[0]),
 601              _mesa_condcode_string(machine->CondCodes[1]),
 602              _mesa_condcode_string(machine->CondCodes[2]),
 603              _mesa_condcode_string(machine->CondCodes[3]));
 604 #endif
 605    }
 606 }
 607
 608
 609
 610 /**
 611  * Execute the given vertex/fragment program.
 612  *
 613  * \param ctx  rendering context
 614  * \param program  the program to execute
 615  * \param machine  machine state (must be initialized)
 616  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 617  */
 618 GLboolean
 619 _mesa_execute_program(GLcontext * ctx,
 620                       const struct gl_program *program,
 621                       struct gl_program_machine *machine)
 622 {
 623    const GLuint numInst = program->NumInstructions;
 624    const GLuint maxExec = 10000;
 625    GLuint pc, numExec = 0;
 626
 627    machine->CurProgram = program;
 628
 629    if (DEBUG_PROG) {
 630       printf("execute program %u --------------------\n", program->Id);
 631    }
 632
 633 #if FEATURE_MESA_program_debug
 634    CurrentMachine = machine;
 635 #endif
 636
 637    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 638       machine->EnvParams = ctx->VertexProgram.Parameters;
 639    }
 640    else {
 641       machine->EnvParams = ctx->FragmentProgram.Parameters;
 642    }
 643
 644    for (pc = 0; pc < numInst; pc++) {
 645       const struct prog_instruction *inst = program->Instructions + pc;
 646
 647 #if FEATURE_MESA_program_debug
 648       if (ctx->FragmentProgram.CallbackEnabled &&
 649           ctx->FragmentProgram.Callback) {
 650          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 651          ctx->FragmentProgram.Callback(program->Target,
 652                                        ctx->FragmentProgram.CallbackData);
 653       }
 654 #endif
 655
 656       if (DEBUG_PROG) {
 657          _mesa_print_instruction(inst);
 658       }
 659
 660       switch (inst->Opcode) {
 661       case OPCODE_ABS:
 662          {
 663             GLfloat a[4], result[4];
 664             fetch_vector4(&inst->SrcReg[0], machine, a);
 665             result[0] = FABSF(a[0]);
 666             result[1] = FABSF(a[1]);
 667             result[2] = FABSF(a[2]);
 668             result[3] = FABSF(a[3]);
 669             store_vector4(inst, machine, result);
 670          }
 671          break;
 672       case OPCODE_ADD:
 673          {
 674             GLfloat a[4], b[4], result[4];
 675             fetch_vector4(&inst->SrcReg[0], machine, a);
 676             fetch_vector4(&inst->SrcReg[1], machine, b);
 677             result[0] = a[0] + b[0];
 678             result[1] = a[1] + b[1];
 679             result[2] = a[2] + b[2];
 680             result[3] = a[3] + b[3];
 681             store_vector4(inst, machine, result);
 682             if (DEBUG_PROG) {
 683                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 684                       result[0], result[1], result[2], result[3],
 685                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 686             }
 687          }
 688          break;
 689       case OPCODE_AND:     /* bitwise AND */
 690          {
 691             GLuint a[4], b[4], result[4];
 692             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 693             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 694             result[0] = a[0] & b[0];
 695             result[1] = a[1] & b[1];
 696             result[2] = a[2] & b[2];
 697             result[3] = a[3] & b[3];
 698             store_vector4ui(inst, machine, result);
 699          }
 700          break;
 701       case OPCODE_ARL:
 702          {
 703             GLfloat t[4];
 704             fetch_vector4(&inst->SrcReg[0], machine, t);
 705             machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
 706          }
 707          break;
 708       case OPCODE_BGNLOOP:
 709          /* no-op */
 710          break;
 711       case OPCODE_ENDLOOP:
 712          /* subtract 1 here since pc is incremented by for(pc) loop */
 713          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 714          break;
 715       case OPCODE_BGNSUB:      /* begin subroutine */
 716          break;
 717       case OPCODE_ENDSUB:      /* end subroutine */
 718          break;
 719       case OPCODE_BRA:         /* branch (conditional) */
 720          /* fall-through */
 721       case OPCODE_BRK:         /* break out of loop (conditional) */
 722          /* fall-through */
 723       case OPCODE_CONT:        /* continue loop (conditional) */
 724          if (eval_condition(machine, inst)) {
 725             /* take branch */
 726             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 727             pc = inst->BranchTarget - 1;
 728          }
 729          break;
 730       case OPCODE_CAL:         /* Call subroutine (conditional) */
 731          if (eval_condition(machine, inst)) {
 732             /* call the subroutine */
 733             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 734                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 735             }
 736             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 737             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 738             pc = inst->BranchTarget - 1;
 739          }
 740          break;
 741       case OPCODE_CMP:
 742          {
 743             GLfloat a[4], b[4], c[4], result[4];
 744             fetch_vector4(&inst->SrcReg[0], machine, a);
 745             fetch_vector4(&inst->SrcReg[1], machine, b);
 746             fetch_vector4(&inst->SrcReg[2], machine, c);
 747             result[0] = a[0] < 0.0F ? b[0] : c[0];
 748             result[1] = a[1] < 0.0F ? b[1] : c[1];
 749             result[2] = a[2] < 0.0F ? b[2] : c[2];
 750             result[3] = a[3] < 0.0F ? b[3] : c[3];
 751             store_vector4(inst, machine, result);
 752          }
 753          break;
 754       case OPCODE_COS:
 755          {
 756             GLfloat a[4], result[4];
 757             fetch_vector1(&inst->SrcReg[0], machine, a);
 758             result[0] = result[1] = result[2] = result[3]
 759                = (GLfloat) _mesa_cos(a[0]);
 760             store_vector4(inst, machine, result);
 761          }
 762          break;
 763       case OPCODE_DDX:         /* Partial derivative with respect to X */
 764          {
 765             GLfloat result[4];
 766             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 767                                 'X', result);
 768             store_vector4(inst, machine, result);
 769          }
 770          break;
 771       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 772          {
 773             GLfloat result[4];
 774             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 775                                 'Y', result);
 776             store_vector4(inst, machine, result);
 777          }
 778          break;
 779       case OPCODE_DP2:
 780          {
 781             GLfloat a[4], b[4], result[4];
 782             fetch_vector4(&inst->SrcReg[0], machine, a);
 783             fetch_vector4(&inst->SrcReg[1], machine, b);
 784             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 785             store_vector4(inst, machine, result);
 786             if (DEBUG_PROG) {
 787                printf("DP2 %g = (%g %g) . (%g %g)\n",
 788                       result[0], a[0], a[1], b[0], b[1]);
 789             }
 790          }
 791          break;
 792       case OPCODE_DP2A:
 793          {
 794             GLfloat a[4], b[4], c, result[4];
 795             fetch_vector4(&inst->SrcReg[0], machine, a);
 796             fetch_vector4(&inst->SrcReg[1], machine, b);
 797             fetch_vector1(&inst->SrcReg[1], machine, &c);
 798             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 799             store_vector4(inst, machine, result);
 800             if (DEBUG_PROG) {
 801                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 802                       result[0], a[0], a[1], b[0], b[1], c);
 803             }
 804          }
 805          break;
 806       case OPCODE_DP3:
 807          {
 808             GLfloat a[4], b[4], result[4];
 809             fetch_vector4(&inst->SrcReg[0], machine, a);
 810             fetch_vector4(&inst->SrcReg[1], machine, b);
 811             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 812             store_vector4(inst, machine, result);
 813             if (DEBUG_PROG) {
 814                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 815                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 816             }
 817          }
 818          break;
 819       case OPCODE_DP4:
 820          {
 821             GLfloat a[4], b[4], result[4];
 822             fetch_vector4(&inst->SrcReg[0], machine, a);
 823             fetch_vector4(&inst->SrcReg[1], machine, b);
 824             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 825             store_vector4(inst, machine, result);
 826             if (DEBUG_PROG) {
 827                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 828                       result[0], a[0], a[1], a[2], a[3],
 829                       b[0], b[1], b[2], b[3]);
 830             }
 831          }
 832          break;
 833       case OPCODE_DPH:
 834          {
 835             GLfloat a[4], b[4], result[4];
 836             fetch_vector4(&inst->SrcReg[0], machine, a);
 837             fetch_vector4(&inst->SrcReg[1], machine, b);
 838             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 839             store_vector4(inst, machine, result);
 840          }
 841          break;
 842       case OPCODE_DST:         /* Distance vector */
 843          {
 844             GLfloat a[4], b[4], result[4];
 845             fetch_vector4(&inst->SrcReg[0], machine, a);
 846             fetch_vector4(&inst->SrcReg[1], machine, b);
 847             result[0] = 1.0F;
 848             result[1] = a[1] * b[1];
 849             result[2] = a[2];
 850             result[3] = b[3];
 851             store_vector4(inst, machine, result);
 852          }
 853          break;
 854       case OPCODE_EXP:
 855          {
 856             GLfloat t[4], q[4], floor_t0;
 857             fetch_vector1(&inst->SrcReg[0], machine, t);
 858             floor_t0 = FLOORF(t[0]);
 859             if (floor_t0 > FLT_MAX_EXP) {
 860                SET_POS_INFINITY(q[0]);
 861                SET_POS_INFINITY(q[2]);
 862             }
 863             else if (floor_t0 < FLT_MIN_EXP) {
 864                q[0] = 0.0F;
 865                q[2] = 0.0F;
 866             }
 867             else {
 868                q[0] = LDEXPF(1.0, (int) floor_t0);
 869                /* Note: GL_NV_vertex_program expects
 870                 * result.z = result.x * APPX(result.y)
 871                 * We do what the ARB extension says.
 872                 */
 873                q[2] = (GLfloat) pow(2.0, t[0]);
 874             }
 875             q[1] = t[0] - floor_t0;
 876             q[3] = 1.0F;
 877             store_vector4( inst, machine, q );
 878          }
 879          break;
 880       case OPCODE_EX2:         /* Exponential base 2 */
 881          {
 882             GLfloat a[4], result[4];
 883             fetch_vector1(&inst->SrcReg[0], machine, a);
 884             result[0] = result[1] = result[2] = result[3] =
 885                (GLfloat) _mesa_pow(2.0, a[0]);
 886             store_vector4(inst, machine, result);
 887          }
 888          break;
 889       case OPCODE_FLR:
 890          {
 891             GLfloat a[4], result[4];
 892             fetch_vector4(&inst->SrcReg[0], machine, a);
 893             result[0] = FLOORF(a[0]);
 894             result[1] = FLOORF(a[1]);
 895             result[2] = FLOORF(a[2]);
 896             result[3] = FLOORF(a[3]);
 897             store_vector4(inst, machine, result);
 898          }
 899          break;
 900       case OPCODE_FRC:
 901          {
 902             GLfloat a[4], result[4];
 903             fetch_vector4(&inst->SrcReg[0], machine, a);
 904             result[0] = a[0] - FLOORF(a[0]);
 905             result[1] = a[1] - FLOORF(a[1]);
 906             result[2] = a[2] - FLOORF(a[2]);
 907             result[3] = a[3] - FLOORF(a[3]);
 908             store_vector4(inst, machine, result);
 909          }
 910          break;
 911       case OPCODE_IF:
 912          {
 913             GLboolean cond;
 914             /* eval condition */
 915             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 916                GLfloat a[4];
 917                fetch_vector1(&inst->SrcReg[0], machine, a);
 918                cond = (a[0] != 0.0);
 919             }
 920             else {
 921                cond = eval_condition(machine, inst);
 922             }
 923             if (DEBUG_PROG) {
 924                printf("IF: %d\n", cond);
 925             }
 926             /* do if/else */
 927             if (cond) {
 928                /* do if-clause (just continue execution) */
 929             }
 930             else {
 931                /* go to the instruction after ELSE or ENDIF */
 932                assert(inst->BranchTarget >= 0);
 933                pc = inst->BranchTarget - 1;
 934             }
 935          }
 936          break;
 937       case OPCODE_ELSE:
 938          /* goto ENDIF */
 939          assert(inst->BranchTarget >= 0);
 940          pc = inst->BranchTarget - 1;
 941          break;
 942       case OPCODE_ENDIF:
 943          /* nothing */
 944          break;
 945       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 946          if (eval_condition(machine, inst)) {
 947             return GL_FALSE;
 948          }
 949          break;
 950       case OPCODE_KIL:         /* ARB_f_p only */
 951          {
 952             GLfloat a[4];
 953             fetch_vector4(&inst->SrcReg[0], machine, a);
 954             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 955                return GL_FALSE;
 956             }
 957          }
 958          break;
 959       case OPCODE_LG2:         /* log base 2 */
 960          {
 961             GLfloat a[4], result[4];
 962             fetch_vector1(&inst->SrcReg[0], machine, a);
 963             result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
 964             store_vector4(inst, machine, result);
 965          }
 966          break;
 967       case OPCODE_LIT:
 968          {
 969             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 970             GLfloat a[4], result[4];
 971             fetch_vector4(&inst->SrcReg[0], machine, a);
 972             a[0] = MAX2(a[0], 0.0F);
 973             a[1] = MAX2(a[1], 0.0F);
 974             /* XXX ARB version clamps a[3], NV version doesn't */
 975             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 976             result[0] = 1.0F;
 977             result[1] = a[0];
 978             /* XXX we could probably just use pow() here */
 979             if (a[0] > 0.0F) {
 980                if (a[1] == 0.0 && a[3] == 0.0)
 981                   result[2] = 1.0;
 982                else
 983                   result[2] = EXPF(a[3] * LOGF(a[1]));
 984             }
 985             else {
 986                result[2] = 0.0;
 987             }
 988             result[3] = 1.0F;
 989             store_vector4(inst, machine, result);
 990             if (DEBUG_PROG) {
 991                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 992                       result[0], result[1], result[2], result[3],
 993                       a[0], a[1], a[2], a[3]);
 994             }
 995          }
 996          break;
 997       case OPCODE_LOG:
 998          {
 999             GLfloat t[4], q[4], abs_t0;
1000             fetch_vector1(&inst->SrcReg[0], machine, t);
1001             abs_t0 = FABSF(t[0]);
1002             if (abs_t0 != 0.0F) {
1003                /* Since we really can't handle infinite values on VMS
1004                 * like other OSes we'll use __MAXFLOAT to represent
1005                 * infinity.  This may need some tweaking.
1006                 */
1007 #ifdef VMS
1008                if (abs_t0 == __MAXFLOAT)
1009 #else
1010                if (IS_INF_OR_NAN(abs_t0))
1011 #endif
1012                {
1013                   SET_POS_INFINITY(q[0]);
1014                   q[1] = 1.0F;
1015                   SET_POS_INFINITY(q[2]);
1016                }
1017                else {
1018                   int exponent;
1019                   GLfloat mantissa = FREXPF(t[0], &exponent);
1020                   q[0] = (GLfloat) (exponent - 1);
1021                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1022                   q[2] = (GLfloat) (q[0] + LOG2(q[1]));
1023                }
1024             }
1025             else {
1026                SET_NEG_INFINITY(q[0]);
1027                q[1] = 1.0F;
1028                SET_NEG_INFINITY(q[2]);
1029             }
1030             q[3] = 1.0;
1031             store_vector4(inst, machine, q);
1032          }
1033          break;
1034       case OPCODE_LRP:
1035          {
1036             GLfloat a[4], b[4], c[4], result[4];
1037             fetch_vector4(&inst->SrcReg[0], machine, a);
1038             fetch_vector4(&inst->SrcReg[1], machine, b);
1039             fetch_vector4(&inst->SrcReg[2], machine, c);
1040             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1041             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1042             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1043             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1044             store_vector4(inst, machine, result);
1045             if (DEBUG_PROG) {
1046                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1047                       "(%g %g %g %g), (%g %g %g %g)\n",
1048                       result[0], result[1], result[2], result[3],
1049                       a[0], a[1], a[2], a[3],
1050                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1051             }
1052          }
1053          break;
1054       case OPCODE_MAD:
1055          {
1056             GLfloat a[4], b[4], c[4], result[4];
1057             fetch_vector4(&inst->SrcReg[0], machine, a);
1058             fetch_vector4(&inst->SrcReg[1], machine, b);
1059             fetch_vector4(&inst->SrcReg[2], machine, c);
1060             result[0] = a[0] * b[0] + c[0];
1061             result[1] = a[1] * b[1] + c[1];
1062             result[2] = a[2] * b[2] + c[2];
1063             result[3] = a[3] * b[3] + c[3];
1064             store_vector4(inst, machine, result);
1065             if (DEBUG_PROG) {
1066                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1067                       "(%g %g %g %g) + (%g %g %g %g)\n",
1068                       result[0], result[1], result[2], result[3],
1069                       a[0], a[1], a[2], a[3],
1070                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1071             }
1072          }
1073          break;
1074       case OPCODE_MAX:
1075          {
1076             GLfloat a[4], b[4], result[4];
1077             fetch_vector4(&inst->SrcReg[0], machine, a);
1078             fetch_vector4(&inst->SrcReg[1], machine, b);
1079             result[0] = MAX2(a[0], b[0]);
1080             result[1] = MAX2(a[1], b[1]);
1081             result[2] = MAX2(a[2], b[2]);
1082             result[3] = MAX2(a[3], b[3]);
1083             store_vector4(inst, machine, result);
1084             if (DEBUG_PROG) {
1085                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1086                       result[0], result[1], result[2], result[3],
1087                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1088             }
1089          }
1090          break;
1091       case OPCODE_MIN:
1092          {
1093             GLfloat a[4], b[4], result[4];
1094             fetch_vector4(&inst->SrcReg[0], machine, a);
1095             fetch_vector4(&inst->SrcReg[1], machine, b);
1096             result[0] = MIN2(a[0], b[0]);
1097             result[1] = MIN2(a[1], b[1]);
1098             result[2] = MIN2(a[2], b[2]);
1099             result[3] = MIN2(a[3], b[3]);
1100             store_vector4(inst, machine, result);
1101          }
1102          break;
1103       case OPCODE_MOV:
1104          {
1105             GLfloat result[4];
1106             fetch_vector4(&inst->SrcReg[0], machine, result);
1107             store_vector4(inst, machine, result);
1108             if (DEBUG_PROG) {
1109                printf("MOV (%g %g %g %g)\n",
1110                       result[0], result[1], result[2], result[3]);
1111             }
1112          }
1113          break;
1114       case OPCODE_MUL:
1115          {
1116             GLfloat a[4], b[4], result[4];
1117             fetch_vector4(&inst->SrcReg[0], machine, a);
1118             fetch_vector4(&inst->SrcReg[1], machine, b);
1119             result[0] = a[0] * b[0];
1120             result[1] = a[1] * b[1];
1121             result[2] = a[2] * b[2];
1122             result[3] = a[3] * b[3];
1123             store_vector4(inst, machine, result);
1124             if (DEBUG_PROG) {
1125                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1126                       result[0], result[1], result[2], result[3],
1127                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1128             }
1129          }
1130          break;
1131       case OPCODE_NOISE1:
1132          {
1133             GLfloat a[4], result[4];
1134             fetch_vector1(&inst->SrcReg[0], machine, a);
1135             result[0] =
1136                result[1] =
1137                result[2] = result[3] = _slang_library_noise1(a[0]);
1138             store_vector4(inst, machine, result);
1139          }
1140          break;
1141       case OPCODE_NOISE2:
1142          {
1143             GLfloat a[4], result[4];
1144             fetch_vector4(&inst->SrcReg[0], machine, a);
1145             result[0] =
1146                result[1] =
1147                result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1148             store_vector4(inst, machine, result);
1149          }
1150          break;
1151       case OPCODE_NOISE3:
1152          {
1153             GLfloat a[4], result[4];
1154             fetch_vector4(&inst->SrcReg[0], machine, a);
1155             result[0] =
1156                result[1] =
1157                result[2] =
1158                result[3] = _slang_library_noise3(a[0], a[1], a[2]);
1159             store_vector4(inst, machine, result);
1160          }
1161          break;
1162       case OPCODE_NOISE4:
1163          {
1164             GLfloat a[4], result[4];
1165             fetch_vector4(&inst->SrcReg[0], machine, a);
1166             result[0] =
1167                result[1] =
1168                result[2] =
1169                result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
1170             store_vector4(inst, machine, result);
1171          }
1172          break;
1173       case OPCODE_NOP:
1174          break;
1175       case OPCODE_NOT:         /* bitwise NOT */
1176          {
1177             GLuint a[4], result[4];
1178             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1179             result[0] = ~a[0];
1180             result[1] = ~a[1];
1181             result[2] = ~a[2];
1182             result[3] = ~a[3];
1183             store_vector4ui(inst, machine, result);
1184          }
1185          break;
1186       case OPCODE_NRM3:        /* 3-component normalization */
1187          {
1188             GLfloat a[4], result[4];
1189             GLfloat tmp;
1190             fetch_vector4(&inst->SrcReg[0], machine, a);
1191             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1192             if (tmp != 0.0F)
1193                tmp = 1.0F / tmp;
1194             result[0] = tmp * a[0];
1195             result[1] = tmp * a[1];
1196             result[2] = tmp * a[2];
1197             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1198             store_vector4(inst, machine, result);
1199          }
1200          break;
1201       case OPCODE_NRM4:        /* 4-component normalization */
1202          {
1203             GLfloat a[4], result[4];
1204             GLfloat tmp;
1205             fetch_vector4(&inst->SrcReg[0], machine, a);
1206             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1207             if (tmp != 0.0F)
1208                tmp = 1.0F / tmp;
1209             result[0] = tmp * a[0];
1210             result[1] = tmp * a[1];
1211             result[2] = tmp * a[2];
1212             result[3] = tmp * a[3];
1213             store_vector4(inst, machine, result);
1214          }
1215          break;
1216       case OPCODE_OR:          /* bitwise OR */
1217          {
1218             GLuint a[4], b[4], result[4];
1219             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1220             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1221             result[0] = a[0] | b[0];
1222             result[1] = a[1] | b[1];
1223             result[2] = a[2] | b[2];
1224             result[3] = a[3] | b[3];
1225             store_vector4ui(inst, machine, result);
1226          }
1227          break;
1228       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1229          {
1230             GLfloat a[4];
1231             GLuint result[4];
1232             GLhalfNV hx, hy;
1233             fetch_vector4(&inst->SrcReg[0], machine, a);
1234             hx = _mesa_float_to_half(a[0]);
1235             hy = _mesa_float_to_half(a[1]);
1236             result[0] =
1237             result[1] =
1238             result[2] =
1239             result[3] = hx | (hy << 16);
1240             store_vector4ui(inst, machine, result);
1241          }
1242          break;
1243       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1244          {
1245             GLfloat a[4];
1246             GLuint result[4], usx, usy;
1247             fetch_vector4(&inst->SrcReg[0], machine, a);
1248             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1249             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1250             usx = IROUND(a[0] * 65535.0F);
1251             usy = IROUND(a[1] * 65535.0F);
1252             result[0] =
1253             result[1] =
1254             result[2] =
1255             result[3] = usx | (usy << 16);
1256             store_vector4ui(inst, machine, result);
1257          }
1258          break;
1259       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1260          {
1261             GLfloat a[4];
1262             GLuint result[4], ubx, uby, ubz, ubw;
1263             fetch_vector4(&inst->SrcReg[0], machine, a);
1264             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1265             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1266             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1267             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1268             ubx = IROUND(127.0F * a[0] + 128.0F);
1269             uby = IROUND(127.0F * a[1] + 128.0F);
1270             ubz = IROUND(127.0F * a[2] + 128.0F);
1271             ubw = IROUND(127.0F * a[3] + 128.0F);
1272             result[0] =
1273             result[1] =
1274             result[2] =
1275             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1276             store_vector4ui(inst, machine, result);
1277          }
1278          break;
1279       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1280          {
1281             GLfloat a[4];
1282             GLuint result[4], ubx, uby, ubz, ubw;
1283             fetch_vector4(&inst->SrcReg[0], machine, a);
1284             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1285             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1286             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1287             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1288             ubx = IROUND(255.0F * a[0]);
1289             uby = IROUND(255.0F * a[1]);
1290             ubz = IROUND(255.0F * a[2]);
1291             ubw = IROUND(255.0F * a[3]);
1292             result[0] =
1293             result[1] =
1294             result[2] =
1295             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1296             store_vector4ui(inst, machine, result);
1297          }
1298          break;
1299       case OPCODE_POW:
1300          {
1301             GLfloat a[4], b[4], result[4];
1302             fetch_vector1(&inst->SrcReg[0], machine, a);
1303             fetch_vector1(&inst->SrcReg[1], machine, b);
1304             result[0] = result[1] = result[2] = result[3]
1305                = (GLfloat) _mesa_pow(a[0], b[0]);
1306             store_vector4(inst, machine, result);
1307          }
1308          break;
1309       case OPCODE_RCP:
1310          {
1311             GLfloat a[4], result[4];
1312             fetch_vector1(&inst->SrcReg[0], machine, a);
1313             if (DEBUG_PROG) {
1314                if (a[0] == 0)
1315                   printf("RCP(0)\n");
1316                else if (IS_INF_OR_NAN(a[0]))
1317                   printf("RCP(inf)\n");
1318             }
1319             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1320             store_vector4(inst, machine, result);
1321          }
1322          break;
1323       case OPCODE_RET:         /* return from subroutine (conditional) */
1324          if (eval_condition(machine, inst)) {
1325             if (machine->StackDepth == 0) {
1326                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1327             }
1328             /* subtract one because of pc++ in the for loop */
1329             pc = machine->CallStack[--machine->StackDepth] - 1;
1330          }
1331          break;
1332       case OPCODE_RFL:         /* reflection vector */
1333          {
1334             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1335             fetch_vector4(&inst->SrcReg[0], machine, axis);
1336             fetch_vector4(&inst->SrcReg[1], machine, dir);
1337             tmpW = DOT3(axis, axis);
1338             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1339             result[0] = tmpX * axis[0] - dir[0];
1340             result[1] = tmpX * axis[1] - dir[1];
1341             result[2] = tmpX * axis[2] - dir[2];
1342             /* result[3] is never written! XXX enforce in parser! */
1343             store_vector4(inst, machine, result);
1344          }
1345          break;
1346       case OPCODE_RSQ:         /* 1 / sqrt() */
1347          {
1348             GLfloat a[4], result[4];
1349             fetch_vector1(&inst->SrcReg[0], machine, a);
1350             a[0] = FABSF(a[0]);
1351             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1352             store_vector4(inst, machine, result);
1353             if (DEBUG_PROG) {
1354                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1355             }
1356          }
1357          break;
1358       case OPCODE_SCS:         /* sine and cos */
1359          {
1360             GLfloat a[4], result[4];
1361             fetch_vector1(&inst->SrcReg[0], machine, a);
1362             result[0] = (GLfloat) _mesa_cos(a[0]);
1363             result[1] = (GLfloat) _mesa_sin(a[0]);
1364             result[2] = 0.0;    /* undefined! */
1365             result[3] = 0.0;    /* undefined! */
1366             store_vector4(inst, machine, result);
1367          }
1368          break;
1369       case OPCODE_SEQ:         /* set on equal */
1370          {
1371             GLfloat a[4], b[4], result[4];
1372             fetch_vector4(&inst->SrcReg[0], machine, a);
1373             fetch_vector4(&inst->SrcReg[1], machine, b);
1374             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1375             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1376             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1377             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1378             store_vector4(inst, machine, result);
1379             if (DEBUG_PROG) {
1380                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1381                       result[0], result[1], result[2], result[3],
1382                       a[0], a[1], a[2], a[3],
1383                       b[0], b[1], b[2], b[3]);
1384             }
1385          }
1386          break;
1387       case OPCODE_SFL:         /* set false, operands ignored */
1388          {
1389             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1390             store_vector4(inst, machine, result);
1391          }
1392          break;
1393       case OPCODE_SGE:         /* set on greater or equal */
1394          {
1395             GLfloat a[4], b[4], result[4];
1396             fetch_vector4(&inst->SrcReg[0], machine, a);
1397             fetch_vector4(&inst->SrcReg[1], machine, b);
1398             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1399             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1400             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1401             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1402             store_vector4(inst, machine, result);
1403             if (DEBUG_PROG) {
1404                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1405                       result[0], result[1], result[2], result[3],
1406                       a[0], a[1], a[2], a[3],
1407                       b[0], b[1], b[2], b[3]);
1408             }
1409          }
1410          break;
1411       case OPCODE_SGT:         /* set on greater */
1412          {
1413             GLfloat a[4], b[4], result[4];
1414             fetch_vector4(&inst->SrcReg[0], machine, a);
1415             fetch_vector4(&inst->SrcReg[1], machine, b);
1416             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1417             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1418             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1419             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1420             store_vector4(inst, machine, result);
1421             if (DEBUG_PROG) {
1422                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1423                       result[0], result[1], result[2], result[3],
1424                       a[0], a[1], a[2], a[3],
1425                       b[0], b[1], b[2], b[3]);
1426             }
1427          }
1428          break;
1429       case OPCODE_SIN:
1430          {
1431             GLfloat a[4], result[4];
1432             fetch_vector1(&inst->SrcReg[0], machine, a);
1433             result[0] = result[1] = result[2] = result[3]
1434                = (GLfloat) _mesa_sin(a[0]);
1435             store_vector4(inst, machine, result);
1436          }
1437          break;
1438       case OPCODE_SLE:         /* set on less or equal */
1439          {
1440             GLfloat a[4], b[4], result[4];
1441             fetch_vector4(&inst->SrcReg[0], machine, a);
1442             fetch_vector4(&inst->SrcReg[1], machine, b);
1443             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1444             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1445             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1446             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1447             store_vector4(inst, machine, result);
1448             if (DEBUG_PROG) {
1449                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1450                       result[0], result[1], result[2], result[3],
1451                       a[0], a[1], a[2], a[3],
1452                       b[0], b[1], b[2], b[3]);
1453             }
1454          }
1455          break;
1456       case OPCODE_SLT:         /* set on less */
1457          {
1458             GLfloat a[4], b[4], result[4];
1459             fetch_vector4(&inst->SrcReg[0], machine, a);
1460             fetch_vector4(&inst->SrcReg[1], machine, b);
1461             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1462             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1463             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1464             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1465             store_vector4(inst, machine, result);
1466             if (DEBUG_PROG) {
1467                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1468                       result[0], result[1], result[2], result[3],
1469                       a[0], a[1], a[2], a[3],
1470                       b[0], b[1], b[2], b[3]);
1471             }
1472          }
1473          break;
1474       case OPCODE_SNE:         /* set on not equal */
1475          {
1476             GLfloat a[4], b[4], result[4];
1477             fetch_vector4(&inst->SrcReg[0], machine, a);
1478             fetch_vector4(&inst->SrcReg[1], machine, b);
1479             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1480             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1481             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1482             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1483             store_vector4(inst, machine, result);
1484             if (DEBUG_PROG) {
1485                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1486                       result[0], result[1], result[2], result[3],
1487                       a[0], a[1], a[2], a[3],
1488                       b[0], b[1], b[2], b[3]);
1489             }
1490          }
1491          break;
1492       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1493          {
1494             GLfloat a[4], result[4];
1495             fetch_vector4(&inst->SrcReg[0], machine, a);
1496             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1497             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1498             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1499             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1500             store_vector4(inst, machine, result);
1501          }
1502          break;
1503       case OPCODE_STR:         /* set true, operands ignored */
1504          {
1505             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1506             store_vector4(inst, machine, result);
1507          }
1508          break;
1509       case OPCODE_SUB:
1510          {
1511             GLfloat a[4], b[4], result[4];
1512             fetch_vector4(&inst->SrcReg[0], machine, a);
1513             fetch_vector4(&inst->SrcReg[1], machine, b);
1514             result[0] = a[0] - b[0];
1515             result[1] = a[1] - b[1];
1516             result[2] = a[2] - b[2];
1517             result[3] = a[3] - b[3];
1518             store_vector4(inst, machine, result);
1519             if (DEBUG_PROG) {
1520                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1521                       result[0], result[1], result[2], result[3],
1522                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1523             }
1524          }
1525          break;
1526       case OPCODE_SWZ:         /* extended swizzle */
1527          {
1528             const struct prog_src_register *source = &inst->SrcReg[0];
1529             const GLfloat *src = get_register_pointer(source, machine);
1530             GLfloat result[4];
1531             GLuint i;
1532             for (i = 0; i < 4; i++) {
1533                const GLuint swz = GET_SWZ(source->Swizzle, i);
1534                if (swz == SWIZZLE_ZERO)
1535                   result[i] = 0.0;
1536                else if (swz == SWIZZLE_ONE)
1537                   result[i] = 1.0;
1538                else {
1539                   ASSERT(swz >= 0);
1540                   ASSERT(swz <= 3);
1541                   result[i] = src[swz];
1542                }
1543                if (source->NegateBase & (1 << i))
1544                   result[i] = -result[i];
1545             }
1546             store_vector4(inst, machine, result);
1547          }
1548          break;
1549       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1550          /* Simple texel lookup */
1551          {
1552             GLfloat texcoord[4], color[4];
1553             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1554
1555             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1556
1557             if (DEBUG_PROG) {
1558                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1559                       color[0], color[1], color[2], color[3],
1560                       inst->TexSrcUnit,
1561                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1562             }
1563             store_vector4(inst, machine, color);
1564          }
1565          break;
1566       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1567          /* Texel lookup with LOD bias */
1568          {
1569             const struct gl_texture_unit *texUnit
1570                = &ctx->Texture.Unit[inst->TexSrcUnit];
1571             GLfloat texcoord[4], color[4], lodBias;
1572
1573             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1574
1575             /* texcoord[3] is the bias to add to lambda */
1576             lodBias = texUnit->LodBias + texcoord[3];
1577             if (texUnit->_Current) {
1578                lodBias += texUnit->_Current->LodBias;
1579             }
1580
1581             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1582
1583             store_vector4(inst, machine, color);
1584          }
1585          break;
1586       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1587          /* Texture lookup w/ partial derivatives for LOD */
1588          {
1589             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1590             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1591             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1592             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1593             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1594                                      0.0, /* lodBias */
1595                                      inst->TexSrcUnit, color);
1596             store_vector4(inst, machine, color);
1597          }
1598          break;
1599       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1600          /* Texture lookup w/ projective divide */
1601          {
1602             GLfloat texcoord[4], color[4];
1603
1604             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1605             /* Not so sure about this test - if texcoord[3] is
1606              * zero, we'd probably be fine except for an ASSERT in
1607              * IROUND_POS() which gets triggered by the inf values created.
1608              */
1609             if (texcoord[3] != 0.0) {
1610                texcoord[0] /= texcoord[3];
1611                texcoord[1] /= texcoord[3];
1612                texcoord[2] /= texcoord[3];
1613             }
1614
1615             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1616
1617             store_vector4(inst, machine, color);
1618          }
1619          break;
1620       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1621          /* Texture lookup w/ projective divide, as above, but do not
1622           * do the divide by w if sampling from a cube map.
1623           */
1624          {
1625             GLfloat texcoord[4], color[4];
1626
1627             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1628             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1629                 texcoord[3] != 0.0) {
1630                texcoord[0] /= texcoord[3];
1631                texcoord[1] /= texcoord[3];
1632                texcoord[2] /= texcoord[3];
1633             }
1634
1635             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1636
1637             store_vector4(inst, machine, color);
1638          }
1639          break;
1640       case OPCODE_TRUNC:       /* truncate toward zero */
1641          {
1642             GLfloat a[4], result[4];
1643             fetch_vector4(&inst->SrcReg[0], machine, a);
1644             result[0] = (GLfloat) (GLint) a[0];
1645             result[1] = (GLfloat) (GLint) a[1];
1646             result[2] = (GLfloat) (GLint) a[2];
1647             result[3] = (GLfloat) (GLint) a[3];
1648             store_vector4(inst, machine, result);
1649          }
1650          break;
1651       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1652          {
1653             GLfloat a[4], result[4];
1654             const GLuint *rawBits = (const GLuint *) a;
1655             GLhalfNV hx, hy;
1656             fetch_vector1(&inst->SrcReg[0], machine, a);
1657             hx = rawBits[0] & 0xffff;
1658             hy = rawBits[0] >> 16;
1659             result[0] = result[2] = _mesa_half_to_float(hx);
1660             result[1] = result[3] = _mesa_half_to_float(hy);
1661             store_vector4(inst, machine, result);
1662          }
1663          break;
1664       case OPCODE_UP2US:       /* unpack two GLushorts */
1665          {
1666             GLfloat a[4], result[4];
1667             const GLuint *rawBits = (const GLuint *) a;
1668             GLushort usx, usy;
1669             fetch_vector1(&inst->SrcReg[0], machine, a);
1670             usx = rawBits[0] & 0xffff;
1671             usy = rawBits[0] >> 16;
1672             result[0] = result[2] = usx * (1.0f / 65535.0f);
1673             result[1] = result[3] = usy * (1.0f / 65535.0f);
1674             store_vector4(inst, machine, result);
1675          }
1676          break;
1677       case OPCODE_UP4B:        /* unpack four GLbytes */
1678          {
1679             GLfloat a[4], result[4];
1680             const GLuint *rawBits = (const GLuint *) a;
1681             fetch_vector1(&inst->SrcReg[0], machine, a);
1682             result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1683             result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1684             result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1685             result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1686             store_vector4(inst, machine, result);
1687          }
1688          break;
1689       case OPCODE_UP4UB:       /* unpack four GLubytes */
1690          {
1691             GLfloat a[4], result[4];
1692             const GLuint *rawBits = (const GLuint *) a;
1693             fetch_vector1(&inst->SrcReg[0], machine, a);
1694             result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1695             result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1696             result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1697             result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1698             store_vector4(inst, machine, result);
1699          }
1700          break;
1701       case OPCODE_XOR:         /* bitwise XOR */
1702          {
1703             GLuint a[4], b[4], result[4];
1704             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1705             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1706             result[0] = a[0] ^ b[0];
1707             result[1] = a[1] ^ b[1];
1708             result[2] = a[2] ^ b[2];
1709             result[3] = a[3] ^ b[3];
1710             store_vector4ui(inst, machine, result);
1711          }
1712          break;
1713       case OPCODE_XPD:         /* cross product */
1714          {
1715             GLfloat a[4], b[4], result[4];
1716             fetch_vector4(&inst->SrcReg[0], machine, a);
1717             fetch_vector4(&inst->SrcReg[1], machine, b);
1718             result[0] = a[1] * b[2] - a[2] * b[1];
1719             result[1] = a[2] * b[0] - a[0] * b[2];
1720             result[2] = a[0] * b[1] - a[1] * b[0];
1721             result[3] = 1.0;
1722             store_vector4(inst, machine, result);
1723             if (DEBUG_PROG) {
1724                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1725                       result[0], result[1], result[2], result[3],
1726                       a[0], a[1], a[2], b[0], b[1], b[2]);
1727             }
1728          }
1729          break;
1730       case OPCODE_X2D:         /* 2-D matrix transform */
1731          {
1732             GLfloat a[4], b[4], c[4], result[4];
1733             fetch_vector4(&inst->SrcReg[0], machine, a);
1734             fetch_vector4(&inst->SrcReg[1], machine, b);
1735             fetch_vector4(&inst->SrcReg[2], machine, c);
1736             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1737             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1738             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1739             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1740             store_vector4(inst, machine, result);
1741          }
1742          break;
1743       case OPCODE_PRINT:
1744          {
1745             if (inst->SrcReg[0].File != -1) {
1746                GLfloat a[4];
1747                fetch_vector4(&inst->SrcReg[0], machine, a);
1748                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1749                             a[0], a[1], a[2], a[3]);
1750             }
1751             else {
1752                _mesa_printf("%s\n", (const char *) inst->Data);
1753             }
1754          }
1755          break;
1756       case OPCODE_END:
1757          return GL_TRUE;
1758       default:
1759          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1760                        inst->Opcode);
1761          return GL_TRUE;        /* return value doesn't matter */
1762       }
1763
1764       numExec++;
1765       if (numExec > maxExec) {
1766          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1767          return GL_TRUE;
1768       }
1769
1770    } /* for pc */
1771
1772 #if FEATURE_MESA_program_debug
1773    CurrentMachine = NULL;
1774 #endif
1775
1776    return GL_TRUE;
1777 }