src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "program.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "prog_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
  58 #define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
  59 #elif defined(VMS)
  60 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  61 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  62 #else
  63 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  64 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  65 #endif
  66
  67 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  68
  69
  70 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  71
  72
  73
  74 /**
  75  * Return a pointer to the 4-element float vector specified by the given
  76  * source register.
  77  */
  78 static INLINE const GLfloat *
  79 get_src_register_pointer(const struct prog_src_register *source,
  80                          const struct gl_program_machine *machine)
  81 {
  82    const struct gl_program *prog = machine->CurProgram;
  83    GLint reg = source->Index;
  84
  85    if (source->RelAddr) {
  86       /* add address register value to src index/offset */
  87       reg += machine->AddressReg[0][0];
  88       if (reg < 0) {
  89          return ZeroVec;
  90       }
  91    }
  92
  93    switch (source->File) {
  94    case PROGRAM_TEMPORARY:
  95       if (reg >= MAX_PROGRAM_TEMPS)
  96          return ZeroVec;
  97       return machine->Temporaries[reg];
  98
  99    case PROGRAM_INPUT:
 100       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 101          if (reg >= VERT_ATTRIB_MAX)
 102             return ZeroVec;
 103          return machine->VertAttribs[reg];
 104       }
 105       else {
 106          if (reg >= FRAG_ATTRIB_MAX)
 107             return ZeroVec;
 108          return machine->Attribs[reg][machine->CurElement];
 109       }
 110
 111    case PROGRAM_OUTPUT:
 112       if (reg >= MAX_PROGRAM_OUTPUTS)
 113          return ZeroVec;
 114       return machine->Outputs[reg];
 115
 116    case PROGRAM_LOCAL_PARAM:
 117       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 118          return ZeroVec;
 119       return machine->CurProgram->LocalParams[reg];
 120
 121    case PROGRAM_ENV_PARAM:
 122       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 123          return ZeroVec;
 124       return machine->EnvParams[reg];
 125
 126    case PROGRAM_STATE_VAR:
 127       /* Fallthrough */
 128    case PROGRAM_CONSTANT:
 129       /* Fallthrough */
 130    case PROGRAM_UNIFORM:
 131       /* Fallthrough */
 132    case PROGRAM_NAMED_PARAM:
 133       if (reg >= (GLint) prog->Parameters->NumParameters)
 134          return ZeroVec;
 135       return prog->Parameters->ParameterValues[reg];
 136
 137    default:
 138       _mesa_problem(NULL,
 139          "Invalid src register file %d in get_src_register_pointer()",
 140          source->File);
 141       return NULL;
 142    }
 143 }
 144
 145
 146 /**
 147  * Return a pointer to the 4-element float vector specified by the given
 148  * destination register.
 149  */
 150 static INLINE GLfloat *
 151 get_dst_register_pointer(const struct prog_dst_register *dest,
 152                          struct gl_program_machine *machine)
 153 {
 154    static GLfloat dummyReg[4];
 155    GLint reg = dest->Index;
 156
 157    if (dest->RelAddr) {
 158       /* add address register value to src index/offset */
 159       reg += machine->AddressReg[0][0];
 160       if (reg < 0) {
 161          return dummyReg;
 162       }
 163    }
 164
 165    switch (dest->File) {
 166    case PROGRAM_TEMPORARY:
 167       if (reg >= MAX_PROGRAM_TEMPS)
 168          return dummyReg;
 169       return machine->Temporaries[reg];
 170
 171    case PROGRAM_OUTPUT:
 172       if (reg >= MAX_PROGRAM_OUTPUTS)
 173          return dummyReg;
 174       return machine->Outputs[reg];
 175
 176    case PROGRAM_WRITE_ONLY:
 177       return dummyReg;
 178
 179    default:
 180       _mesa_problem(NULL,
 181          "Invalid dest register file %d in get_dst_register_pointer()",
 182          dest->File);
 183       return NULL;
 184    }
 185 }
 186
 187
 188
 189 #if FEATURE_MESA_program_debug
 190 static struct gl_program_machine *CurrentMachine = NULL;
 191
 192 /**
 193  * For GL_MESA_program_debug.
 194  * Return current value (4*GLfloat) of a program register.
 195  * Called via ctx->Driver.GetProgramRegister().
 196  */
 197 void
 198 _mesa_get_program_register(GLcontext *ctx, enum register_file file,
 199                            GLuint index, GLfloat val[4])
 200 {
 201    if (CurrentMachine) {
 202       struct prog_src_register srcReg;
 203       const GLfloat *src;
 204       srcReg.File = file;
 205       srcReg.Index = index;
 206       src = get_src_register_pointer(&srcReg, CurrentMachine);
 207       COPY_4V(val, src);
 208    }
 209 }
 210 #endif /* FEATURE_MESA_program_debug */
 211
 212
 213 /**
 214  * Fetch a 4-element float vector from the given source register.
 215  * Apply swizzling and negating as needed.
 216  */
 217 static void
 218 fetch_vector4(const struct prog_src_register *source,
 219               const struct gl_program_machine *machine, GLfloat result[4])
 220 {
 221    const GLfloat *src = get_src_register_pointer(source, machine);
 222    ASSERT(src);
 223
 224    if (source->Swizzle == SWIZZLE_NOOP) {
 225       /* no swizzling */
 226       COPY_4V(result, src);
 227    }
 228    else {
 229       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 230       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 231       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 232       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 233       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 234       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 235       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 236       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 237    }
 238
 239    if (source->NegateBase) {
 240       result[0] = -result[0];
 241       result[1] = -result[1];
 242       result[2] = -result[2];
 243       result[3] = -result[3];
 244    }
 245    if (source->Abs) {
 246       result[0] = FABSF(result[0]);
 247       result[1] = FABSF(result[1]);
 248       result[2] = FABSF(result[2]);
 249       result[3] = FABSF(result[3]);
 250    }
 251    if (source->NegateAbs) {
 252       result[0] = -result[0];
 253       result[1] = -result[1];
 254       result[2] = -result[2];
 255       result[3] = -result[3];
 256    }
 257 }
 258
 259
 260 /**
 261  * Fetch a 4-element uint vector from the given source register.
 262  * Apply swizzling but not negation/abs.
 263  */
 264 static void
 265 fetch_vector4ui(const struct prog_src_register *source,
 266                 const struct gl_program_machine *machine, GLuint result[4])
 267 {
 268    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 269    ASSERT(src);
 270
 271    if (source->Swizzle == SWIZZLE_NOOP) {
 272       /* no swizzling */
 273       COPY_4V(result, src);
 274    }
 275    else {
 276       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 277       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 278       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 279       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 280       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 281       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 282       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 283       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 284    }
 285
 286    /* Note: no NegateBase, Abs, NegateAbs here */
 287 }
 288
 289
 290
 291 /**
 292  * Fetch the derivative with respect to X or Y for the given register.
 293  * XXX this currently only works for fragment program input attribs.
 294  */
 295 static void
 296 fetch_vector4_deriv(GLcontext * ctx,
 297                     const struct prog_src_register *source,
 298                     const struct gl_program_machine *machine,
 299                     char xOrY, GLfloat result[4])
 300 {
 301    if (source->File == PROGRAM_INPUT &&
 302        source->Index < (GLint) machine->NumDeriv) {
 303       const GLint col = machine->CurElement;
 304       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 305       const GLfloat invQ = 1.0f / w;
 306       GLfloat deriv[4];
 307
 308       if (xOrY == 'X') {
 309          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 310          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 311          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 312          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 313       }
 314       else {
 315          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 316          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 317          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 318          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 319       }
 320
 321       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 322       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 323       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 324       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 325
 326       if (source->NegateBase) {
 327          result[0] = -result[0];
 328          result[1] = -result[1];
 329          result[2] = -result[2];
 330          result[3] = -result[3];
 331       }
 332       if (source->Abs) {
 333          result[0] = FABSF(result[0]);
 334          result[1] = FABSF(result[1]);
 335          result[2] = FABSF(result[2]);
 336          result[3] = FABSF(result[3]);
 337       }
 338       if (source->NegateAbs) {
 339          result[0] = -result[0];
 340          result[1] = -result[1];
 341          result[2] = -result[2];
 342          result[3] = -result[3];
 343       }
 344    }
 345    else {
 346       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 347    }
 348 }
 349
 350
 351 /**
 352  * As above, but only return result[0] element.
 353  */
 354 static void
 355 fetch_vector1(const struct prog_src_register *source,
 356               const struct gl_program_machine *machine, GLfloat result[4])
 357 {
 358    const GLfloat *src = get_src_register_pointer(source, machine);
 359    ASSERT(src);
 360
 361    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 362
 363    if (source->NegateBase) {
 364       result[0] = -result[0];
 365    }
 366    if (source->Abs) {
 367       result[0] = FABSF(result[0]);
 368    }
 369    if (source->NegateAbs) {
 370       result[0] = -result[0];
 371    }
 372 }
 373
 374
 375 /**
 376  * Fetch texel from texture.  Use partial derivatives when possible.
 377  */
 378 static INLINE void
 379 fetch_texel(GLcontext *ctx,
 380             const struct gl_program_machine *machine,
 381             const struct prog_instruction *inst,
 382             const GLfloat texcoord[4], GLfloat lodBias,
 383             GLfloat color[4])
 384 {
 385    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 386
 387    /* Note: we only have the right derivatives for fragment input attribs.
 388     */
 389    if (machine->NumDeriv > 0 &&
 390        inst->SrcReg[0].File == PROGRAM_INPUT &&
 391        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 392       /* simple texture fetch for which we should have derivatives */
 393       GLuint attr = inst->SrcReg[0].Index;
 394       machine->FetchTexelDeriv(ctx, texcoord,
 395                                machine->DerivX[attr],
 396                                machine->DerivY[attr],
 397                                lodBias, unit, color);
 398    }
 399    else {
 400       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 401    }
 402 }
 403
 404
 405 /**
 406  * Test value against zero and return GT, LT, EQ or UN if NaN.
 407  */
 408 static INLINE GLuint
 409 generate_cc(float value)
 410 {
 411    if (value != value)
 412       return COND_UN;           /* NaN */
 413    if (value > 0.0F)
 414       return COND_GT;
 415    if (value < 0.0F)
 416       return COND_LT;
 417    return COND_EQ;
 418 }
 419
 420
 421 /**
 422  * Test if the ccMaskRule is satisfied by the given condition code.
 423  * Used to mask destination writes according to the current condition code.
 424  */
 425 static INLINE GLboolean
 426 test_cc(GLuint condCode, GLuint ccMaskRule)
 427 {
 428    switch (ccMaskRule) {
 429    case COND_EQ: return (condCode == COND_EQ);
 430    case COND_NE: return (condCode != COND_EQ);
 431    case COND_LT: return (condCode == COND_LT);
 432    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 433    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 434    case COND_GT: return (condCode == COND_GT);
 435    case COND_TR: return GL_TRUE;
 436    case COND_FL: return GL_FALSE;
 437    default:      return GL_TRUE;
 438    }
 439 }
 440
 441
 442 /**
 443  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 444  * or GL_FALSE to indicate result.
 445  */
 446 static INLINE GLboolean
 447 eval_condition(const struct gl_program_machine *machine,
 448                const struct prog_instruction *inst)
 449 {
 450    const GLuint swizzle = inst->DstReg.CondSwizzle;
 451    const GLuint condMask = inst->DstReg.CondMask;
 452    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 453        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 454        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 455        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 456       return GL_TRUE;
 457    }
 458    else {
 459       return GL_FALSE;
 460    }
 461 }
 462
 463
 464
 465 /**
 466  * Store 4 floats into a register.  Observe the instructions saturate and
 467  * set-condition-code flags.
 468  */
 469 static void
 470 store_vector4(const struct prog_instruction *inst,
 471               struct gl_program_machine *machine, const GLfloat value[4])
 472 {
 473    const struct prog_dst_register *dstReg = &(inst->DstReg);
 474    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 475    GLuint writeMask = dstReg->WriteMask;
 476    GLfloat clampedValue[4];
 477    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 478
 479 #if 0
 480    if (value[0] > 1.0e10 ||
 481        IS_INF_OR_NAN(value[0]) ||
 482        IS_INF_OR_NAN(value[1]) ||
 483        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 484       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 485 #endif
 486
 487    if (clamp) {
 488       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 489       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 490       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 491       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 492       value = clampedValue;
 493    }
 494
 495    if (dstReg->CondMask != COND_TR) {
 496       /* condition codes may turn off some writes */
 497       if (writeMask & WRITEMASK_X) {
 498          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 499                       dstReg->CondMask))
 500             writeMask &= ~WRITEMASK_X;
 501       }
 502       if (writeMask & WRITEMASK_Y) {
 503          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 504                       dstReg->CondMask))
 505             writeMask &= ~WRITEMASK_Y;
 506       }
 507       if (writeMask & WRITEMASK_Z) {
 508          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 509                       dstReg->CondMask))
 510             writeMask &= ~WRITEMASK_Z;
 511       }
 512       if (writeMask & WRITEMASK_W) {
 513          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 514                       dstReg->CondMask))
 515             writeMask &= ~WRITEMASK_W;
 516       }
 517    }
 518
 519    if (writeMask & WRITEMASK_X)
 520       dst[0] = value[0];
 521    if (writeMask & WRITEMASK_Y)
 522       dst[1] = value[1];
 523    if (writeMask & WRITEMASK_Z)
 524       dst[2] = value[2];
 525    if (writeMask & WRITEMASK_W)
 526       dst[3] = value[3];
 527
 528    if (inst->CondUpdate) {
 529       if (writeMask & WRITEMASK_X)
 530          machine->CondCodes[0] = generate_cc(value[0]);
 531       if (writeMask & WRITEMASK_Y)
 532          machine->CondCodes[1] = generate_cc(value[1]);
 533       if (writeMask & WRITEMASK_Z)
 534          machine->CondCodes[2] = generate_cc(value[2]);
 535       if (writeMask & WRITEMASK_W)
 536          machine->CondCodes[3] = generate_cc(value[3]);
 537 #if DEBUG_PROG
 538       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 539              _mesa_condcode_string(machine->CondCodes[0]),
 540              _mesa_condcode_string(machine->CondCodes[1]),
 541              _mesa_condcode_string(machine->CondCodes[2]),
 542              _mesa_condcode_string(machine->CondCodes[3]));
 543 #endif
 544    }
 545 }
 546
 547
 548 /**
 549  * Store 4 uints into a register.  Observe the set-condition-code flags.
 550  */
 551 static void
 552 store_vector4ui(const struct prog_instruction *inst,
 553                 struct gl_program_machine *machine, const GLuint value[4])
 554 {
 555    const struct prog_dst_register *dstReg = &(inst->DstReg);
 556    GLuint writeMask = dstReg->WriteMask;
 557    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 558
 559    if (dstReg->CondMask != COND_TR) {
 560       /* condition codes may turn off some writes */
 561       if (writeMask & WRITEMASK_X) {
 562          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 563                       dstReg->CondMask))
 564             writeMask &= ~WRITEMASK_X;
 565       }
 566       if (writeMask & WRITEMASK_Y) {
 567          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 568                       dstReg->CondMask))
 569             writeMask &= ~WRITEMASK_Y;
 570       }
 571       if (writeMask & WRITEMASK_Z) {
 572          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 573                       dstReg->CondMask))
 574             writeMask &= ~WRITEMASK_Z;
 575       }
 576       if (writeMask & WRITEMASK_W) {
 577          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 578                       dstReg->CondMask))
 579             writeMask &= ~WRITEMASK_W;
 580       }
 581    }
 582
 583    if (writeMask & WRITEMASK_X)
 584       dst[0] = value[0];
 585    if (writeMask & WRITEMASK_Y)
 586       dst[1] = value[1];
 587    if (writeMask & WRITEMASK_Z)
 588       dst[2] = value[2];
 589    if (writeMask & WRITEMASK_W)
 590       dst[3] = value[3];
 591
 592    if (inst->CondUpdate) {
 593       if (writeMask & WRITEMASK_X)
 594          machine->CondCodes[0] = generate_cc(value[0]);
 595       if (writeMask & WRITEMASK_Y)
 596          machine->CondCodes[1] = generate_cc(value[1]);
 597       if (writeMask & WRITEMASK_Z)
 598          machine->CondCodes[2] = generate_cc(value[2]);
 599       if (writeMask & WRITEMASK_W)
 600          machine->CondCodes[3] = generate_cc(value[3]);
 601 #if DEBUG_PROG
 602       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 603              _mesa_condcode_string(machine->CondCodes[0]),
 604              _mesa_condcode_string(machine->CondCodes[1]),
 605              _mesa_condcode_string(machine->CondCodes[2]),
 606              _mesa_condcode_string(machine->CondCodes[3]));
 607 #endif
 608    }
 609 }
 610
 611
 612
 613 /**
 614  * Execute the given vertex/fragment program.
 615  *
 616  * \param ctx  rendering context
 617  * \param program  the program to execute
 618  * \param machine  machine state (must be initialized)
 619  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 620  */
 621 GLboolean
 622 _mesa_execute_program(GLcontext * ctx,
 623                       const struct gl_program *program,
 624                       struct gl_program_machine *machine)
 625 {
 626    const GLuint numInst = program->NumInstructions;
 627    const GLuint maxExec = 10000;
 628    GLuint pc, numExec = 0;
 629
 630    machine->CurProgram = program;
 631
 632    if (DEBUG_PROG) {
 633       printf("execute program %u --------------------\n", program->Id);
 634    }
 635
 636 #if FEATURE_MESA_program_debug
 637    CurrentMachine = machine;
 638 #endif
 639
 640    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 641       machine->EnvParams = ctx->VertexProgram.Parameters;
 642    }
 643    else {
 644       machine->EnvParams = ctx->FragmentProgram.Parameters;
 645    }
 646
 647    for (pc = 0; pc < numInst; pc++) {
 648       const struct prog_instruction *inst = program->Instructions + pc;
 649
 650 #if FEATURE_MESA_program_debug
 651       if (ctx->FragmentProgram.CallbackEnabled &&
 652           ctx->FragmentProgram.Callback) {
 653          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 654          ctx->FragmentProgram.Callback(program->Target,
 655                                        ctx->FragmentProgram.CallbackData);
 656       }
 657 #endif
 658
 659       if (DEBUG_PROG) {
 660          _mesa_print_instruction(inst);
 661       }
 662
 663       switch (inst->Opcode) {
 664       case OPCODE_ABS:
 665          {
 666             GLfloat a[4], result[4];
 667             fetch_vector4(&inst->SrcReg[0], machine, a);
 668             result[0] = FABSF(a[0]);
 669             result[1] = FABSF(a[1]);
 670             result[2] = FABSF(a[2]);
 671             result[3] = FABSF(a[3]);
 672             store_vector4(inst, machine, result);
 673          }
 674          break;
 675       case OPCODE_ADD:
 676          {
 677             GLfloat a[4], b[4], result[4];
 678             fetch_vector4(&inst->SrcReg[0], machine, a);
 679             fetch_vector4(&inst->SrcReg[1], machine, b);
 680             result[0] = a[0] + b[0];
 681             result[1] = a[1] + b[1];
 682             result[2] = a[2] + b[2];
 683             result[3] = a[3] + b[3];
 684             store_vector4(inst, machine, result);
 685             if (DEBUG_PROG) {
 686                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 687                       result[0], result[1], result[2], result[3],
 688                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 689             }
 690          }
 691          break;
 692       case OPCODE_AND:     /* bitwise AND */
 693          {
 694             GLuint a[4], b[4], result[4];
 695             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 696             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 697             result[0] = a[0] & b[0];
 698             result[1] = a[1] & b[1];
 699             result[2] = a[2] & b[2];
 700             result[3] = a[3] & b[3];
 701             store_vector4ui(inst, machine, result);
 702          }
 703          break;
 704       case OPCODE_ARL:
 705          {
 706             GLfloat t[4];
 707             fetch_vector4(&inst->SrcReg[0], machine, t);
 708             machine->AddressReg[0][0] = IFLOOR(t[0]);
 709          }
 710          break;
 711       case OPCODE_BGNLOOP:
 712          /* no-op */
 713          break;
 714       case OPCODE_ENDLOOP:
 715          /* subtract 1 here since pc is incremented by for(pc) loop */
 716          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 717          break;
 718       case OPCODE_BGNSUB:      /* begin subroutine */
 719          break;
 720       case OPCODE_ENDSUB:      /* end subroutine */
 721          break;
 722       case OPCODE_BRA:         /* branch (conditional) */
 723          /* fall-through */
 724       case OPCODE_BRK:         /* break out of loop (conditional) */
 725          /* fall-through */
 726       case OPCODE_CONT:        /* continue loop (conditional) */
 727          if (eval_condition(machine, inst)) {
 728             /* take branch */
 729             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 730             pc = inst->BranchTarget - 1;
 731          }
 732          break;
 733       case OPCODE_CAL:         /* Call subroutine (conditional) */
 734          if (eval_condition(machine, inst)) {
 735             /* call the subroutine */
 736             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 737                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 738             }
 739             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 740             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 741             pc = inst->BranchTarget - 1;
 742          }
 743          break;
 744       case OPCODE_CMP:
 745          {
 746             GLfloat a[4], b[4], c[4], result[4];
 747             fetch_vector4(&inst->SrcReg[0], machine, a);
 748             fetch_vector4(&inst->SrcReg[1], machine, b);
 749             fetch_vector4(&inst->SrcReg[2], machine, c);
 750             result[0] = a[0] < 0.0F ? b[0] : c[0];
 751             result[1] = a[1] < 0.0F ? b[1] : c[1];
 752             result[2] = a[2] < 0.0F ? b[2] : c[2];
 753             result[3] = a[3] < 0.0F ? b[3] : c[3];
 754             store_vector4(inst, machine, result);
 755          }
 756          break;
 757       case OPCODE_COS:
 758          {
 759             GLfloat a[4], result[4];
 760             fetch_vector1(&inst->SrcReg[0], machine, a);
 761             result[0] = result[1] = result[2] = result[3]
 762                = (GLfloat) _mesa_cos(a[0]);
 763             store_vector4(inst, machine, result);
 764          }
 765          break;
 766       case OPCODE_DDX:         /* Partial derivative with respect to X */
 767          {
 768             GLfloat result[4];
 769             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 770                                 'X', result);
 771             store_vector4(inst, machine, result);
 772          }
 773          break;
 774       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 775          {
 776             GLfloat result[4];
 777             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 778                                 'Y', result);
 779             store_vector4(inst, machine, result);
 780          }
 781          break;
 782       case OPCODE_DP2:
 783          {
 784             GLfloat a[4], b[4], result[4];
 785             fetch_vector4(&inst->SrcReg[0], machine, a);
 786             fetch_vector4(&inst->SrcReg[1], machine, b);
 787             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 788             store_vector4(inst, machine, result);
 789             if (DEBUG_PROG) {
 790                printf("DP2 %g = (%g %g) . (%g %g)\n",
 791                       result[0], a[0], a[1], b[0], b[1]);
 792             }
 793          }
 794          break;
 795       case OPCODE_DP2A:
 796          {
 797             GLfloat a[4], b[4], c, result[4];
 798             fetch_vector4(&inst->SrcReg[0], machine, a);
 799             fetch_vector4(&inst->SrcReg[1], machine, b);
 800             fetch_vector1(&inst->SrcReg[1], machine, &c);
 801             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 802             store_vector4(inst, machine, result);
 803             if (DEBUG_PROG) {
 804                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 805                       result[0], a[0], a[1], b[0], b[1], c);
 806             }
 807          }
 808          break;
 809       case OPCODE_DP3:
 810          {
 811             GLfloat a[4], b[4], result[4];
 812             fetch_vector4(&inst->SrcReg[0], machine, a);
 813             fetch_vector4(&inst->SrcReg[1], machine, b);
 814             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 815             store_vector4(inst, machine, result);
 816             if (DEBUG_PROG) {
 817                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 818                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 819             }
 820          }
 821          break;
 822       case OPCODE_DP4:
 823          {
 824             GLfloat a[4], b[4], result[4];
 825             fetch_vector4(&inst->SrcReg[0], machine, a);
 826             fetch_vector4(&inst->SrcReg[1], machine, b);
 827             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 828             store_vector4(inst, machine, result);
 829             if (DEBUG_PROG) {
 830                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 831                       result[0], a[0], a[1], a[2], a[3],
 832                       b[0], b[1], b[2], b[3]);
 833             }
 834          }
 835          break;
 836       case OPCODE_DPH:
 837          {
 838             GLfloat a[4], b[4], result[4];
 839             fetch_vector4(&inst->SrcReg[0], machine, a);
 840             fetch_vector4(&inst->SrcReg[1], machine, b);
 841             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 842             store_vector4(inst, machine, result);
 843          }
 844          break;
 845       case OPCODE_DST:         /* Distance vector */
 846          {
 847             GLfloat a[4], b[4], result[4];
 848             fetch_vector4(&inst->SrcReg[0], machine, a);
 849             fetch_vector4(&inst->SrcReg[1], machine, b);
 850             result[0] = 1.0F;
 851             result[1] = a[1] * b[1];
 852             result[2] = a[2];
 853             result[3] = b[3];
 854             store_vector4(inst, machine, result);
 855          }
 856          break;
 857       case OPCODE_EXP:
 858          {
 859             GLfloat t[4], q[4], floor_t0;
 860             fetch_vector1(&inst->SrcReg[0], machine, t);
 861             floor_t0 = FLOORF(t[0]);
 862             if (floor_t0 > FLT_MAX_EXP) {
 863                SET_POS_INFINITY(q[0]);
 864                SET_POS_INFINITY(q[2]);
 865             }
 866             else if (floor_t0 < FLT_MIN_EXP) {
 867                q[0] = 0.0F;
 868                q[2] = 0.0F;
 869             }
 870             else {
 871                q[0] = LDEXPF(1.0, (int) floor_t0);
 872                /* Note: GL_NV_vertex_program expects
 873                 * result.z = result.x * APPX(result.y)
 874                 * We do what the ARB extension says.
 875                 */
 876                q[2] = (GLfloat) pow(2.0, t[0]);
 877             }
 878             q[1] = t[0] - floor_t0;
 879             q[3] = 1.0F;
 880             store_vector4( inst, machine, q );
 881          }
 882          break;
 883       case OPCODE_EX2:         /* Exponential base 2 */
 884          {
 885             GLfloat a[4], result[4];
 886             fetch_vector1(&inst->SrcReg[0], machine, a);
 887             result[0] = result[1] = result[2] = result[3] =
 888                (GLfloat) _mesa_pow(2.0, a[0]);
 889             store_vector4(inst, machine, result);
 890          }
 891          break;
 892       case OPCODE_FLR:
 893          {
 894             GLfloat a[4], result[4];
 895             fetch_vector4(&inst->SrcReg[0], machine, a);
 896             result[0] = FLOORF(a[0]);
 897             result[1] = FLOORF(a[1]);
 898             result[2] = FLOORF(a[2]);
 899             result[3] = FLOORF(a[3]);
 900             store_vector4(inst, machine, result);
 901          }
 902          break;
 903       case OPCODE_FRC:
 904          {
 905             GLfloat a[4], result[4];
 906             fetch_vector4(&inst->SrcReg[0], machine, a);
 907             result[0] = a[0] - FLOORF(a[0]);
 908             result[1] = a[1] - FLOORF(a[1]);
 909             result[2] = a[2] - FLOORF(a[2]);
 910             result[3] = a[3] - FLOORF(a[3]);
 911             store_vector4(inst, machine, result);
 912          }
 913          break;
 914       case OPCODE_IF:
 915          {
 916             GLboolean cond;
 917             /* eval condition */
 918             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 919                GLfloat a[4];
 920                fetch_vector1(&inst->SrcReg[0], machine, a);
 921                cond = (a[0] != 0.0);
 922             }
 923             else {
 924                cond = eval_condition(machine, inst);
 925             }
 926             if (DEBUG_PROG) {
 927                printf("IF: %d\n", cond);
 928             }
 929             /* do if/else */
 930             if (cond) {
 931                /* do if-clause (just continue execution) */
 932             }
 933             else {
 934                /* go to the instruction after ELSE or ENDIF */
 935                assert(inst->BranchTarget >= 0);
 936                pc = inst->BranchTarget - 1;
 937             }
 938          }
 939          break;
 940       case OPCODE_ELSE:
 941          /* goto ENDIF */
 942          assert(inst->BranchTarget >= 0);
 943          pc = inst->BranchTarget - 1;
 944          break;
 945       case OPCODE_ENDIF:
 946          /* nothing */
 947          break;
 948       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 949          if (eval_condition(machine, inst)) {
 950             return GL_FALSE;
 951          }
 952          break;
 953       case OPCODE_KIL:         /* ARB_f_p only */
 954          {
 955             GLfloat a[4];
 956             fetch_vector4(&inst->SrcReg[0], machine, a);
 957             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 958                return GL_FALSE;
 959             }
 960          }
 961          break;
 962       case OPCODE_LG2:         /* log base 2 */
 963          {
 964             GLfloat a[4], result[4];
 965             fetch_vector1(&inst->SrcReg[0], machine, a);
 966             /* The fast LOG2 macro doesn't meet the precision requirements.
 967              */
 968             result[0] = result[1] = result[2] = result[3] =
 969                 (log(a[0]) * 1.442695F);
 970             store_vector4(inst, machine, result);
 971          }
 972          break;
 973       case OPCODE_LIT:
 974          {
 975             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 976             GLfloat a[4], result[4];
 977             fetch_vector4(&inst->SrcReg[0], machine, a);
 978             a[0] = MAX2(a[0], 0.0F);
 979             a[1] = MAX2(a[1], 0.0F);
 980             /* XXX ARB version clamps a[3], NV version doesn't */
 981             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 982             result[0] = 1.0F;
 983             result[1] = a[0];
 984             /* XXX we could probably just use pow() here */
 985             if (a[0] > 0.0F) {
 986                if (a[1] == 0.0 && a[3] == 0.0)
 987                   result[2] = 1.0;
 988                else
 989                   result[2] = EXPF(a[3] * LOGF(a[1]));
 990             }
 991             else {
 992                result[2] = 0.0;
 993             }
 994             result[3] = 1.0F;
 995             store_vector4(inst, machine, result);
 996             if (DEBUG_PROG) {
 997                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 998                       result[0], result[1], result[2], result[3],
 999                       a[0], a[1], a[2], a[3]);
1000             }
1001          }
1002          break;
1003       case OPCODE_LOG:
1004          {
1005             GLfloat t[4], q[4], abs_t0;
1006             fetch_vector1(&inst->SrcReg[0], machine, t);
1007             abs_t0 = FABSF(t[0]);
1008             if (abs_t0 != 0.0F) {
1009                /* Since we really can't handle infinite values on VMS
1010                 * like other OSes we'll use __MAXFLOAT to represent
1011                 * infinity.  This may need some tweaking.
1012                 */
1013 #ifdef VMS
1014                if (abs_t0 == __MAXFLOAT)
1015 #else
1016                if (IS_INF_OR_NAN(abs_t0))
1017 #endif
1018                {
1019                   SET_POS_INFINITY(q[0]);
1020                   q[1] = 1.0F;
1021                   SET_POS_INFINITY(q[2]);
1022                }
1023                else {
1024                   int exponent;
1025                   GLfloat mantissa = FREXPF(t[0], &exponent);
1026                   q[0] = (GLfloat) (exponent - 1);
1027                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1028
1029                   /* The fast LOG2 macro doesn't meet the precision
1030                    * requirements.
1031                    */
1032                   q[2] = (log(t[0]) * 1.442695F);
1033                }
1034             }
1035             else {
1036                SET_NEG_INFINITY(q[0]);
1037                q[1] = 1.0F;
1038                SET_NEG_INFINITY(q[2]);
1039             }
1040             q[3] = 1.0;
1041             store_vector4(inst, machine, q);
1042          }
1043          break;
1044       case OPCODE_LRP:
1045          {
1046             GLfloat a[4], b[4], c[4], result[4];
1047             fetch_vector4(&inst->SrcReg[0], machine, a);
1048             fetch_vector4(&inst->SrcReg[1], machine, b);
1049             fetch_vector4(&inst->SrcReg[2], machine, c);
1050             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1051             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1052             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1053             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1054             store_vector4(inst, machine, result);
1055             if (DEBUG_PROG) {
1056                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1057                       "(%g %g %g %g), (%g %g %g %g)\n",
1058                       result[0], result[1], result[2], result[3],
1059                       a[0], a[1], a[2], a[3],
1060                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1061             }
1062          }
1063          break;
1064       case OPCODE_MAD:
1065          {
1066             GLfloat a[4], b[4], c[4], result[4];
1067             fetch_vector4(&inst->SrcReg[0], machine, a);
1068             fetch_vector4(&inst->SrcReg[1], machine, b);
1069             fetch_vector4(&inst->SrcReg[2], machine, c);
1070             result[0] = a[0] * b[0] + c[0];
1071             result[1] = a[1] * b[1] + c[1];
1072             result[2] = a[2] * b[2] + c[2];
1073             result[3] = a[3] * b[3] + c[3];
1074             store_vector4(inst, machine, result);
1075             if (DEBUG_PROG) {
1076                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1077                       "(%g %g %g %g) + (%g %g %g %g)\n",
1078                       result[0], result[1], result[2], result[3],
1079                       a[0], a[1], a[2], a[3],
1080                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1081             }
1082          }
1083          break;
1084       case OPCODE_MAX:
1085          {
1086             GLfloat a[4], b[4], result[4];
1087             fetch_vector4(&inst->SrcReg[0], machine, a);
1088             fetch_vector4(&inst->SrcReg[1], machine, b);
1089             result[0] = MAX2(a[0], b[0]);
1090             result[1] = MAX2(a[1], b[1]);
1091             result[2] = MAX2(a[2], b[2]);
1092             result[3] = MAX2(a[3], b[3]);
1093             store_vector4(inst, machine, result);
1094             if (DEBUG_PROG) {
1095                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1096                       result[0], result[1], result[2], result[3],
1097                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1098             }
1099          }
1100          break;
1101       case OPCODE_MIN:
1102          {
1103             GLfloat a[4], b[4], result[4];
1104             fetch_vector4(&inst->SrcReg[0], machine, a);
1105             fetch_vector4(&inst->SrcReg[1], machine, b);
1106             result[0] = MIN2(a[0], b[0]);
1107             result[1] = MIN2(a[1], b[1]);
1108             result[2] = MIN2(a[2], b[2]);
1109             result[3] = MIN2(a[3], b[3]);
1110             store_vector4(inst, machine, result);
1111          }
1112          break;
1113       case OPCODE_MOV:
1114          {
1115             GLfloat result[4];
1116             fetch_vector4(&inst->SrcReg[0], machine, result);
1117             store_vector4(inst, machine, result);
1118             if (DEBUG_PROG) {
1119                printf("MOV (%g %g %g %g)\n",
1120                       result[0], result[1], result[2], result[3]);
1121             }
1122          }
1123          break;
1124       case OPCODE_MUL:
1125          {
1126             GLfloat a[4], b[4], result[4];
1127             fetch_vector4(&inst->SrcReg[0], machine, a);
1128             fetch_vector4(&inst->SrcReg[1], machine, b);
1129             result[0] = a[0] * b[0];
1130             result[1] = a[1] * b[1];
1131             result[2] = a[2] * b[2];
1132             result[3] = a[3] * b[3];
1133             store_vector4(inst, machine, result);
1134             if (DEBUG_PROG) {
1135                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1136                       result[0], result[1], result[2], result[3],
1137                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1138             }
1139          }
1140          break;
1141       case OPCODE_NOISE1:
1142          {
1143             GLfloat a[4], result[4];
1144             fetch_vector1(&inst->SrcReg[0], machine, a);
1145             result[0] =
1146                result[1] =
1147                result[2] =
1148                result[3] = _mesa_noise1(a[0]);
1149             store_vector4(inst, machine, result);
1150          }
1151          break;
1152       case OPCODE_NOISE2:
1153          {
1154             GLfloat a[4], result[4];
1155             fetch_vector4(&inst->SrcReg[0], machine, a);
1156             result[0] =
1157                result[1] =
1158                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1159             store_vector4(inst, machine, result);
1160          }
1161          break;
1162       case OPCODE_NOISE3:
1163          {
1164             GLfloat a[4], result[4];
1165             fetch_vector4(&inst->SrcReg[0], machine, a);
1166             result[0] =
1167                result[1] =
1168                result[2] =
1169                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1170             store_vector4(inst, machine, result);
1171          }
1172          break;
1173       case OPCODE_NOISE4:
1174          {
1175             GLfloat a[4], result[4];
1176             fetch_vector4(&inst->SrcReg[0], machine, a);
1177             result[0] =
1178                result[1] =
1179                result[2] =
1180                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1181             store_vector4(inst, machine, result);
1182          }
1183          break;
1184       case OPCODE_NOP:
1185          break;
1186       case OPCODE_NOT:         /* bitwise NOT */
1187          {
1188             GLuint a[4], result[4];
1189             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1190             result[0] = ~a[0];
1191             result[1] = ~a[1];
1192             result[2] = ~a[2];
1193             result[3] = ~a[3];
1194             store_vector4ui(inst, machine, result);
1195          }
1196          break;
1197       case OPCODE_NRM3:        /* 3-component normalization */
1198          {
1199             GLfloat a[4], result[4];
1200             GLfloat tmp;
1201             fetch_vector4(&inst->SrcReg[0], machine, a);
1202             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1203             if (tmp != 0.0F)
1204                tmp = INV_SQRTF(tmp);
1205             result[0] = tmp * a[0];
1206             result[1] = tmp * a[1];
1207             result[2] = tmp * a[2];
1208             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1209             store_vector4(inst, machine, result);
1210          }
1211          break;
1212       case OPCODE_NRM4:        /* 4-component normalization */
1213          {
1214             GLfloat a[4], result[4];
1215             GLfloat tmp;
1216             fetch_vector4(&inst->SrcReg[0], machine, a);
1217             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1218             if (tmp != 0.0F)
1219                tmp = INV_SQRTF(tmp);
1220             result[0] = tmp * a[0];
1221             result[1] = tmp * a[1];
1222             result[2] = tmp * a[2];
1223             result[3] = tmp * a[3];
1224             store_vector4(inst, machine, result);
1225          }
1226          break;
1227       case OPCODE_OR:          /* bitwise OR */
1228          {
1229             GLuint a[4], b[4], result[4];
1230             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1231             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1232             result[0] = a[0] | b[0];
1233             result[1] = a[1] | b[1];
1234             result[2] = a[2] | b[2];
1235             result[3] = a[3] | b[3];
1236             store_vector4ui(inst, machine, result);
1237          }
1238          break;
1239       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1240          {
1241             GLfloat a[4];
1242             GLuint result[4];
1243             GLhalfNV hx, hy;
1244             fetch_vector4(&inst->SrcReg[0], machine, a);
1245             hx = _mesa_float_to_half(a[0]);
1246             hy = _mesa_float_to_half(a[1]);
1247             result[0] =
1248             result[1] =
1249             result[2] =
1250             result[3] = hx | (hy << 16);
1251             store_vector4ui(inst, machine, result);
1252          }
1253          break;
1254       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1255          {
1256             GLfloat a[4];
1257             GLuint result[4], usx, usy;
1258             fetch_vector4(&inst->SrcReg[0], machine, a);
1259             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1260             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1261             usx = IROUND(a[0] * 65535.0F);
1262             usy = IROUND(a[1] * 65535.0F);
1263             result[0] =
1264             result[1] =
1265             result[2] =
1266             result[3] = usx | (usy << 16);
1267             store_vector4ui(inst, machine, result);
1268          }
1269          break;
1270       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1271          {
1272             GLfloat a[4];
1273             GLuint result[4], ubx, uby, ubz, ubw;
1274             fetch_vector4(&inst->SrcReg[0], machine, a);
1275             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1276             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1277             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1278             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1279             ubx = IROUND(127.0F * a[0] + 128.0F);
1280             uby = IROUND(127.0F * a[1] + 128.0F);
1281             ubz = IROUND(127.0F * a[2] + 128.0F);
1282             ubw = IROUND(127.0F * a[3] + 128.0F);
1283             result[0] =
1284             result[1] =
1285             result[2] =
1286             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1287             store_vector4ui(inst, machine, result);
1288          }
1289          break;
1290       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1291          {
1292             GLfloat a[4];
1293             GLuint result[4], ubx, uby, ubz, ubw;
1294             fetch_vector4(&inst->SrcReg[0], machine, a);
1295             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1296             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1297             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1298             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1299             ubx = IROUND(255.0F * a[0]);
1300             uby = IROUND(255.0F * a[1]);
1301             ubz = IROUND(255.0F * a[2]);
1302             ubw = IROUND(255.0F * a[3]);
1303             result[0] =
1304             result[1] =
1305             result[2] =
1306             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1307             store_vector4ui(inst, machine, result);
1308          }
1309          break;
1310       case OPCODE_POW:
1311          {
1312             GLfloat a[4], b[4], result[4];
1313             fetch_vector1(&inst->SrcReg[0], machine, a);
1314             fetch_vector1(&inst->SrcReg[1], machine, b);
1315             result[0] = result[1] = result[2] = result[3]
1316                = (GLfloat) _mesa_pow(a[0], b[0]);
1317             store_vector4(inst, machine, result);
1318          }
1319          break;
1320       case OPCODE_RCP:
1321          {
1322             GLfloat a[4], result[4];
1323             fetch_vector1(&inst->SrcReg[0], machine, a);
1324             if (DEBUG_PROG) {
1325                if (a[0] == 0)
1326                   printf("RCP(0)\n");
1327                else if (IS_INF_OR_NAN(a[0]))
1328                   printf("RCP(inf)\n");
1329             }
1330             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1331             store_vector4(inst, machine, result);
1332          }
1333          break;
1334       case OPCODE_RET:         /* return from subroutine (conditional) */
1335          if (eval_condition(machine, inst)) {
1336             if (machine->StackDepth == 0) {
1337                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1338             }
1339             /* subtract one because of pc++ in the for loop */
1340             pc = machine->CallStack[--machine->StackDepth] - 1;
1341          }
1342          break;
1343       case OPCODE_RFL:         /* reflection vector */
1344          {
1345             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1346             fetch_vector4(&inst->SrcReg[0], machine, axis);
1347             fetch_vector4(&inst->SrcReg[1], machine, dir);
1348             tmpW = DOT3(axis, axis);
1349             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1350             result[0] = tmpX * axis[0] - dir[0];
1351             result[1] = tmpX * axis[1] - dir[1];
1352             result[2] = tmpX * axis[2] - dir[2];
1353             /* result[3] is never written! XXX enforce in parser! */
1354             store_vector4(inst, machine, result);
1355          }
1356          break;
1357       case OPCODE_RSQ:         /* 1 / sqrt() */
1358          {
1359             GLfloat a[4], result[4];
1360             fetch_vector1(&inst->SrcReg[0], machine, a);
1361             a[0] = FABSF(a[0]);
1362             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1363             store_vector4(inst, machine, result);
1364             if (DEBUG_PROG) {
1365                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1366             }
1367          }
1368          break;
1369       case OPCODE_SCS:         /* sine and cos */
1370          {
1371             GLfloat a[4], result[4];
1372             fetch_vector1(&inst->SrcReg[0], machine, a);
1373             result[0] = (GLfloat) _mesa_cos(a[0]);
1374             result[1] = (GLfloat) _mesa_sin(a[0]);
1375             result[2] = 0.0;    /* undefined! */
1376             result[3] = 0.0;    /* undefined! */
1377             store_vector4(inst, machine, result);
1378          }
1379          break;
1380       case OPCODE_SEQ:         /* set on equal */
1381          {
1382             GLfloat a[4], b[4], result[4];
1383             fetch_vector4(&inst->SrcReg[0], machine, a);
1384             fetch_vector4(&inst->SrcReg[1], machine, b);
1385             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1386             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1387             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1388             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1389             store_vector4(inst, machine, result);
1390             if (DEBUG_PROG) {
1391                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1392                       result[0], result[1], result[2], result[3],
1393                       a[0], a[1], a[2], a[3],
1394                       b[0], b[1], b[2], b[3]);
1395             }
1396          }
1397          break;
1398       case OPCODE_SFL:         /* set false, operands ignored */
1399          {
1400             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1401             store_vector4(inst, machine, result);
1402          }
1403          break;
1404       case OPCODE_SGE:         /* set on greater or equal */
1405          {
1406             GLfloat a[4], b[4], result[4];
1407             fetch_vector4(&inst->SrcReg[0], machine, a);
1408             fetch_vector4(&inst->SrcReg[1], machine, b);
1409             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1410             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1411             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1412             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1413             store_vector4(inst, machine, result);
1414             if (DEBUG_PROG) {
1415                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1416                       result[0], result[1], result[2], result[3],
1417                       a[0], a[1], a[2], a[3],
1418                       b[0], b[1], b[2], b[3]);
1419             }
1420          }
1421          break;
1422       case OPCODE_SGT:         /* set on greater */
1423          {
1424             GLfloat a[4], b[4], result[4];
1425             fetch_vector4(&inst->SrcReg[0], machine, a);
1426             fetch_vector4(&inst->SrcReg[1], machine, b);
1427             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1428             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1429             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1430             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1431             store_vector4(inst, machine, result);
1432             if (DEBUG_PROG) {
1433                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1434                       result[0], result[1], result[2], result[3],
1435                       a[0], a[1], a[2], a[3],
1436                       b[0], b[1], b[2], b[3]);
1437             }
1438          }
1439          break;
1440       case OPCODE_SIN:
1441          {
1442             GLfloat a[4], result[4];
1443             fetch_vector1(&inst->SrcReg[0], machine, a);
1444             result[0] = result[1] = result[2] = result[3]
1445                = (GLfloat) _mesa_sin(a[0]);
1446             store_vector4(inst, machine, result);
1447          }
1448          break;
1449       case OPCODE_SLE:         /* set on less or equal */
1450          {
1451             GLfloat a[4], b[4], result[4];
1452             fetch_vector4(&inst->SrcReg[0], machine, a);
1453             fetch_vector4(&inst->SrcReg[1], machine, b);
1454             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1455             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1456             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1457             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1458             store_vector4(inst, machine, result);
1459             if (DEBUG_PROG) {
1460                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1461                       result[0], result[1], result[2], result[3],
1462                       a[0], a[1], a[2], a[3],
1463                       b[0], b[1], b[2], b[3]);
1464             }
1465          }
1466          break;
1467       case OPCODE_SLT:         /* set on less */
1468          {
1469             GLfloat a[4], b[4], result[4];
1470             fetch_vector4(&inst->SrcReg[0], machine, a);
1471             fetch_vector4(&inst->SrcReg[1], machine, b);
1472             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1473             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1474             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1475             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1476             store_vector4(inst, machine, result);
1477             if (DEBUG_PROG) {
1478                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1479                       result[0], result[1], result[2], result[3],
1480                       a[0], a[1], a[2], a[3],
1481                       b[0], b[1], b[2], b[3]);
1482             }
1483          }
1484          break;
1485       case OPCODE_SNE:         /* set on not equal */
1486          {
1487             GLfloat a[4], b[4], result[4];
1488             fetch_vector4(&inst->SrcReg[0], machine, a);
1489             fetch_vector4(&inst->SrcReg[1], machine, b);
1490             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1491             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1492             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1493             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1494             store_vector4(inst, machine, result);
1495             if (DEBUG_PROG) {
1496                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1497                       result[0], result[1], result[2], result[3],
1498                       a[0], a[1], a[2], a[3],
1499                       b[0], b[1], b[2], b[3]);
1500             }
1501          }
1502          break;
1503       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1504          {
1505             GLfloat a[4], result[4];
1506             fetch_vector4(&inst->SrcReg[0], machine, a);
1507             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1508             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1509             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1510             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1511             store_vector4(inst, machine, result);
1512          }
1513          break;
1514       case OPCODE_STR:         /* set true, operands ignored */
1515          {
1516             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1517             store_vector4(inst, machine, result);
1518          }
1519          break;
1520       case OPCODE_SUB:
1521          {
1522             GLfloat a[4], b[4], result[4];
1523             fetch_vector4(&inst->SrcReg[0], machine, a);
1524             fetch_vector4(&inst->SrcReg[1], machine, b);
1525             result[0] = a[0] - b[0];
1526             result[1] = a[1] - b[1];
1527             result[2] = a[2] - b[2];
1528             result[3] = a[3] - b[3];
1529             store_vector4(inst, machine, result);
1530             if (DEBUG_PROG) {
1531                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1532                       result[0], result[1], result[2], result[3],
1533                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1534             }
1535          }
1536          break;
1537       case OPCODE_SWZ:         /* extended swizzle */
1538          {
1539             const struct prog_src_register *source = &inst->SrcReg[0];
1540             const GLfloat *src = get_src_register_pointer(source, machine);
1541             GLfloat result[4];
1542             GLuint i;
1543             for (i = 0; i < 4; i++) {
1544                const GLuint swz = GET_SWZ(source->Swizzle, i);
1545                if (swz == SWIZZLE_ZERO)
1546                   result[i] = 0.0;
1547                else if (swz == SWIZZLE_ONE)
1548                   result[i] = 1.0;
1549                else {
1550                   ASSERT(swz >= 0);
1551                   ASSERT(swz <= 3);
1552                   result[i] = src[swz];
1553                }
1554                if (source->NegateBase & (1 << i))
1555                   result[i] = -result[i];
1556             }
1557             store_vector4(inst, machine, result);
1558          }
1559          break;
1560       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1561          /* Simple texel lookup */
1562          {
1563             GLfloat texcoord[4], color[4];
1564             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1565
1566             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1567
1568             if (DEBUG_PROG) {
1569                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1570                       color[0], color[1], color[2], color[3],
1571                       inst->TexSrcUnit,
1572                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1573             }
1574             store_vector4(inst, machine, color);
1575          }
1576          break;
1577       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1578          /* Texel lookup with LOD bias */
1579          {
1580             const struct gl_texture_unit *texUnit
1581                = &ctx->Texture.Unit[inst->TexSrcUnit];
1582             GLfloat texcoord[4], color[4], lodBias;
1583
1584             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1585
1586             /* texcoord[3] is the bias to add to lambda */
1587             lodBias = texUnit->LodBias + texcoord[3];
1588             if (texUnit->_Current) {
1589                lodBias += texUnit->_Current->LodBias;
1590             }
1591
1592             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1593
1594             store_vector4(inst, machine, color);
1595          }
1596          break;
1597       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1598          /* Texture lookup w/ partial derivatives for LOD */
1599          {
1600             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1601             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1602             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1603             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1604             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1605                                      0.0, /* lodBias */
1606                                      inst->TexSrcUnit, color);
1607             store_vector4(inst, machine, color);
1608          }
1609          break;
1610       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1611          /* Texture lookup w/ projective divide */
1612          {
1613             GLfloat texcoord[4], color[4];
1614
1615             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1616             /* Not so sure about this test - if texcoord[3] is
1617              * zero, we'd probably be fine except for an ASSERT in
1618              * IROUND_POS() which gets triggered by the inf values created.
1619              */
1620             if (texcoord[3] != 0.0) {
1621                texcoord[0] /= texcoord[3];
1622                texcoord[1] /= texcoord[3];
1623                texcoord[2] /= texcoord[3];
1624             }
1625
1626             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1627
1628             store_vector4(inst, machine, color);
1629          }
1630          break;
1631       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1632          /* Texture lookup w/ projective divide, as above, but do not
1633           * do the divide by w if sampling from a cube map.
1634           */
1635          {
1636             GLfloat texcoord[4], color[4];
1637
1638             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1639             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1640                 texcoord[3] != 0.0) {
1641                texcoord[0] /= texcoord[3];
1642                texcoord[1] /= texcoord[3];
1643                texcoord[2] /= texcoord[3];
1644             }
1645
1646             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1647
1648             store_vector4(inst, machine, color);
1649          }
1650          break;
1651       case OPCODE_TRUNC:       /* truncate toward zero */
1652          {
1653             GLfloat a[4], result[4];
1654             fetch_vector4(&inst->SrcReg[0], machine, a);
1655             result[0] = (GLfloat) (GLint) a[0];
1656             result[1] = (GLfloat) (GLint) a[1];
1657             result[2] = (GLfloat) (GLint) a[2];
1658             result[3] = (GLfloat) (GLint) a[3];
1659             store_vector4(inst, machine, result);
1660          }
1661          break;
1662       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1663          {
1664             GLfloat a[4], result[4];
1665             const GLuint *rawBits = (const GLuint *) a;
1666             GLhalfNV hx, hy;
1667             fetch_vector1(&inst->SrcReg[0], machine, a);
1668             hx = rawBits[0] & 0xffff;
1669             hy = rawBits[0] >> 16;
1670             result[0] = result[2] = _mesa_half_to_float(hx);
1671             result[1] = result[3] = _mesa_half_to_float(hy);
1672             store_vector4(inst, machine, result);
1673          }
1674          break;
1675       case OPCODE_UP2US:       /* unpack two GLushorts */
1676          {
1677             GLfloat a[4], result[4];
1678             const GLuint *rawBits = (const GLuint *) a;
1679             GLushort usx, usy;
1680             fetch_vector1(&inst->SrcReg[0], machine, a);
1681             usx = rawBits[0] & 0xffff;
1682             usy = rawBits[0] >> 16;
1683             result[0] = result[2] = usx * (1.0f / 65535.0f);
1684             result[1] = result[3] = usy * (1.0f / 65535.0f);
1685             store_vector4(inst, machine, result);
1686          }
1687          break;
1688       case OPCODE_UP4B:        /* unpack four GLbytes */
1689          {
1690             GLfloat a[4], result[4];
1691             const GLuint *rawBits = (const GLuint *) a;
1692             fetch_vector1(&inst->SrcReg[0], machine, a);
1693             result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1694             result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1695             result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1696             result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1697             store_vector4(inst, machine, result);
1698          }
1699          break;
1700       case OPCODE_UP4UB:       /* unpack four GLubytes */
1701          {
1702             GLfloat a[4], result[4];
1703             const GLuint *rawBits = (const GLuint *) a;
1704             fetch_vector1(&inst->SrcReg[0], machine, a);
1705             result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1706             result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1707             result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1708             result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1709             store_vector4(inst, machine, result);
1710          }
1711          break;
1712       case OPCODE_XOR:         /* bitwise XOR */
1713          {
1714             GLuint a[4], b[4], result[4];
1715             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1716             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1717             result[0] = a[0] ^ b[0];
1718             result[1] = a[1] ^ b[1];
1719             result[2] = a[2] ^ b[2];
1720             result[3] = a[3] ^ b[3];
1721             store_vector4ui(inst, machine, result);
1722          }
1723          break;
1724       case OPCODE_XPD:         /* cross product */
1725          {
1726             GLfloat a[4], b[4], result[4];
1727             fetch_vector4(&inst->SrcReg[0], machine, a);
1728             fetch_vector4(&inst->SrcReg[1], machine, b);
1729             result[0] = a[1] * b[2] - a[2] * b[1];
1730             result[1] = a[2] * b[0] - a[0] * b[2];
1731             result[2] = a[0] * b[1] - a[1] * b[0];
1732             result[3] = 1.0;
1733             store_vector4(inst, machine, result);
1734             if (DEBUG_PROG) {
1735                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1736                       result[0], result[1], result[2], result[3],
1737                       a[0], a[1], a[2], b[0], b[1], b[2]);
1738             }
1739          }
1740          break;
1741       case OPCODE_X2D:         /* 2-D matrix transform */
1742          {
1743             GLfloat a[4], b[4], c[4], result[4];
1744             fetch_vector4(&inst->SrcReg[0], machine, a);
1745             fetch_vector4(&inst->SrcReg[1], machine, b);
1746             fetch_vector4(&inst->SrcReg[2], machine, c);
1747             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1748             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1749             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1750             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1751             store_vector4(inst, machine, result);
1752          }
1753          break;
1754       case OPCODE_PRINT:
1755          {
1756             if (inst->SrcReg[0].File != -1) {
1757                GLfloat a[4];
1758                fetch_vector4(&inst->SrcReg[0], machine, a);
1759                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1760                             a[0], a[1], a[2], a[3]);
1761             }
1762             else {
1763                _mesa_printf("%s\n", (const char *) inst->Data);
1764             }
1765          }
1766          break;
1767       case OPCODE_END:
1768          return GL_TRUE;
1769       default:
1770          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1771                        inst->Opcode);
1772          return GL_TRUE;        /* return value doesn't matter */
1773       }
1774
1775       numExec++;
1776       if (numExec > maxExec) {
1777          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1778          return GL_TRUE;
1779       }
1780
1781    } /* for pc */
1782
1783 #if FEATURE_MESA_program_debug
1784    CurrentMachine = NULL;
1785 #endif
1786
1787    return GL_TRUE;
1788 }