src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "program.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "shader/slang/slang_library_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
  58 #define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
  59 #elif defined(VMS)
  60 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  61 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  62 #else
  63 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  64 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  65 #endif
  66
  67 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  68
  69
  70 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  71
  72
  73
  74 /**
  75  * Return a pointer to the 4-element float vector specified by the given
  76  * source register.
  77  */
  78 static INLINE const GLfloat *
  79 get_src_register_pointer(const struct prog_src_register *source,
  80                          const struct gl_program_machine *machine)
  81 {
  82    const struct gl_program *prog = machine->CurProgram;
  83    GLint reg = source->Index;
  84
  85    if (source->RelAddr) {
  86       /* add address register value to src index/offset */
  87       reg += machine->AddressReg[0][0];
  88       if (reg < 0) {
  89          return ZeroVec;
  90       }
  91    }
  92
  93    switch (source->File) {
  94    case PROGRAM_TEMPORARY:
  95       if (reg >= MAX_PROGRAM_TEMPS)
  96          return ZeroVec;
  97       return machine->Temporaries[reg];
  98
  99    case PROGRAM_INPUT:
 100       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 101          if (reg >= VERT_ATTRIB_MAX)
 102             return ZeroVec;
 103          return machine->VertAttribs[reg];
 104       }
 105       else {
 106          if (reg >= FRAG_ATTRIB_MAX)
 107             return ZeroVec;
 108          return machine->Attribs[reg][machine->CurElement];
 109       }
 110
 111    case PROGRAM_OUTPUT:
 112       if (reg >= MAX_PROGRAM_OUTPUTS)
 113          return ZeroVec;
 114       return machine->Outputs[reg];
 115
 116    case PROGRAM_LOCAL_PARAM:
 117       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 118          return ZeroVec;
 119       return machine->CurProgram->LocalParams[reg];
 120
 121    case PROGRAM_ENV_PARAM:
 122       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 123          return ZeroVec;
 124       return machine->EnvParams[reg];
 125
 126    case PROGRAM_STATE_VAR:
 127       /* Fallthrough */
 128    case PROGRAM_CONSTANT:
 129       /* Fallthrough */
 130    case PROGRAM_UNIFORM:
 131       /* Fallthrough */
 132    case PROGRAM_NAMED_PARAM:
 133       if (reg >= (GLint) prog->Parameters->NumParameters)
 134          return ZeroVec;
 135       return prog->Parameters->ParameterValues[reg];
 136
 137    default:
 138       _mesa_problem(NULL,
 139          "Invalid src register file %d in get_src_register_pointer()",
 140          source->File);
 141       return NULL;
 142    }
 143 }
 144
 145
 146 /**
 147  * Return a pointer to the 4-element float vector specified by the given
 148  * destination register.
 149  */
 150 static INLINE GLfloat *
 151 get_dst_register_pointer(const struct prog_dst_register *dest,
 152                          struct gl_program_machine *machine)
 153 {
 154    static GLfloat dummyReg[4];
 155    GLint reg = dest->Index;
 156
 157    if (dest->RelAddr) {
 158       /* add address register value to src index/offset */
 159       reg += machine->AddressReg[0][0];
 160       if (reg < 0) {
 161          return dummyReg;
 162       }
 163    }
 164
 165    switch (dest->File) {
 166    case PROGRAM_TEMPORARY:
 167       if (reg >= MAX_PROGRAM_TEMPS)
 168          return dummyReg;
 169       return machine->Temporaries[reg];
 170
 171    case PROGRAM_OUTPUT:
 172       if (reg >= MAX_PROGRAM_OUTPUTS)
 173          return dummyReg;
 174       return machine->Outputs[reg];
 175
 176    case PROGRAM_WRITE_ONLY:
 177       return dummyReg;
 178
 179    default:
 180       _mesa_problem(NULL,
 181          "Invalid dest register file %d in get_dst_register_pointer()",
 182          dest->File);
 183       return NULL;
 184    }
 185 }
 186
 187
 188
 189 #if FEATURE_MESA_program_debug
 190 static struct gl_program_machine *CurrentMachine = NULL;
 191
 192 /**
 193  * For GL_MESA_program_debug.
 194  * Return current value (4*GLfloat) of a program register.
 195  * Called via ctx->Driver.GetProgramRegister().
 196  */
 197 void
 198 _mesa_get_program_register(GLcontext *ctx, enum register_file file,
 199                            GLuint index, GLfloat val[4])
 200 {
 201    if (CurrentMachine) {
 202       struct prog_src_register srcReg;
 203       const GLfloat *src;
 204       srcReg.File = file;
 205       srcReg.Index = index;
 206       src = get_src_register_pointer(&srcReg, CurrentMachine);
 207       COPY_4V(val, src);
 208    }
 209 }
 210 #endif /* FEATURE_MESA_program_debug */
 211
 212
 213 /**
 214  * Fetch a 4-element float vector from the given source register.
 215  * Apply swizzling and negating as needed.
 216  */
 217 static void
 218 fetch_vector4(const struct prog_src_register *source,
 219               const struct gl_program_machine *machine, GLfloat result[4])
 220 {
 221    const GLfloat *src = get_src_register_pointer(source, machine);
 222    ASSERT(src);
 223
 224    if (source->Swizzle == SWIZZLE_NOOP) {
 225       /* no swizzling */
 226       COPY_4V(result, src);
 227    }
 228    else {
 229       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 230       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 231       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 232       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 233       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 234       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 235       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 236       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 237    }
 238
 239    if (source->NegateBase) {
 240       result[0] = -result[0];
 241       result[1] = -result[1];
 242       result[2] = -result[2];
 243       result[3] = -result[3];
 244    }
 245    if (source->Abs) {
 246       result[0] = FABSF(result[0]);
 247       result[1] = FABSF(result[1]);
 248       result[2] = FABSF(result[2]);
 249       result[3] = FABSF(result[3]);
 250    }
 251    if (source->NegateAbs) {
 252       result[0] = -result[0];
 253       result[1] = -result[1];
 254       result[2] = -result[2];
 255       result[3] = -result[3];
 256    }
 257 }
 258
 259
 260 /**
 261  * Fetch a 4-element uint vector from the given source register.
 262  * Apply swizzling but not negation/abs.
 263  */
 264 static void
 265 fetch_vector4ui(const struct prog_src_register *source,
 266                 const struct gl_program_machine *machine, GLuint result[4])
 267 {
 268    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 269    ASSERT(src);
 270
 271    if (source->Swizzle == SWIZZLE_NOOP) {
 272       /* no swizzling */
 273       COPY_4V(result, src);
 274    }
 275    else {
 276       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 277       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 278       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 279       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 280       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 281       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 282       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 283       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 284    }
 285
 286    /* Note: no NegateBase, Abs, NegateAbs here */
 287 }
 288
 289
 290
 291 /**
 292  * Fetch the derivative with respect to X or Y for the given register.
 293  * XXX this currently only works for fragment program input attribs.
 294  */
 295 static void
 296 fetch_vector4_deriv(GLcontext * ctx,
 297                     const struct prog_src_register *source,
 298                     const struct gl_program_machine *machine,
 299                     char xOrY, GLfloat result[4])
 300 {
 301    if (source->File == PROGRAM_INPUT &&
 302        source->Index < (GLint) machine->NumDeriv) {
 303       const GLint col = machine->CurElement;
 304       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 305       const GLfloat invQ = 1.0f / w;
 306       GLfloat deriv[4];
 307
 308       if (xOrY == 'X') {
 309          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 310          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 311          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 312          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 313       }
 314       else {
 315          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 316          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 317          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 318          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 319       }
 320
 321       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 322       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 323       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 324       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 325
 326       if (source->NegateBase) {
 327          result[0] = -result[0];
 328          result[1] = -result[1];
 329          result[2] = -result[2];
 330          result[3] = -result[3];
 331       }
 332       if (source->Abs) {
 333          result[0] = FABSF(result[0]);
 334          result[1] = FABSF(result[1]);
 335          result[2] = FABSF(result[2]);
 336          result[3] = FABSF(result[3]);
 337       }
 338       if (source->NegateAbs) {
 339          result[0] = -result[0];
 340          result[1] = -result[1];
 341          result[2] = -result[2];
 342          result[3] = -result[3];
 343       }
 344    }
 345    else {
 346       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 347    }
 348 }
 349
 350
 351 /**
 352  * As above, but only return result[0] element.
 353  */
 354 static void
 355 fetch_vector1(const struct prog_src_register *source,
 356               const struct gl_program_machine *machine, GLfloat result[4])
 357 {
 358    const GLfloat *src = get_src_register_pointer(source, machine);
 359    ASSERT(src);
 360
 361    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 362
 363    if (source->NegateBase) {
 364       result[0] = -result[0];
 365    }
 366    if (source->Abs) {
 367       result[0] = FABSF(result[0]);
 368    }
 369    if (source->NegateAbs) {
 370       result[0] = -result[0];
 371    }
 372 }
 373
 374
 375 /**
 376  * Fetch texel from texture.  Use partial derivatives when possible.
 377  */
 378 static INLINE void
 379 fetch_texel(GLcontext *ctx,
 380             const struct gl_program_machine *machine,
 381             const struct prog_instruction *inst,
 382             const GLfloat texcoord[4], GLfloat lodBias,
 383             GLfloat color[4])
 384 {
 385    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 386
 387    /* Note: we only have the right derivatives for fragment input attribs.
 388     */
 389    if (machine->NumDeriv > 0 &&
 390        inst->SrcReg[0].File == PROGRAM_INPUT &&
 391        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 392       /* simple texture fetch for which we should have derivatives */
 393       GLuint attr = inst->SrcReg[0].Index;
 394       machine->FetchTexelDeriv(ctx, texcoord,
 395                                machine->DerivX[attr],
 396                                machine->DerivY[attr],
 397                                lodBias, unit, color);
 398    }
 399    else {
 400       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 401    }
 402 }
 403
 404
 405 /**
 406  * Test value against zero and return GT, LT, EQ or UN if NaN.
 407  */
 408 static INLINE GLuint
 409 generate_cc(float value)
 410 {
 411    if (value != value)
 412       return COND_UN;           /* NaN */
 413    if (value > 0.0F)
 414       return COND_GT;
 415    if (value < 0.0F)
 416       return COND_LT;
 417    return COND_EQ;
 418 }
 419
 420
 421 /**
 422  * Test if the ccMaskRule is satisfied by the given condition code.
 423  * Used to mask destination writes according to the current condition code.
 424  */
 425 static INLINE GLboolean
 426 test_cc(GLuint condCode, GLuint ccMaskRule)
 427 {
 428    switch (ccMaskRule) {
 429    case COND_EQ: return (condCode == COND_EQ);
 430    case COND_NE: return (condCode != COND_EQ);
 431    case COND_LT: return (condCode == COND_LT);
 432    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 433    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 434    case COND_GT: return (condCode == COND_GT);
 435    case COND_TR: return GL_TRUE;
 436    case COND_FL: return GL_FALSE;
 437    default:      return GL_TRUE;
 438    }
 439 }
 440
 441
 442 /**
 443  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 444  * or GL_FALSE to indicate result.
 445  */
 446 static INLINE GLboolean
 447 eval_condition(const struct gl_program_machine *machine,
 448                const struct prog_instruction *inst)
 449 {
 450    const GLuint swizzle = inst->DstReg.CondSwizzle;
 451    const GLuint condMask = inst->DstReg.CondMask;
 452    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 453        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 454        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 455        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 456       return GL_TRUE;
 457    }
 458    else {
 459       return GL_FALSE;
 460    }
 461 }
 462
 463
 464
 465 /**
 466  * Store 4 floats into a register.  Observe the instructions saturate and
 467  * set-condition-code flags.
 468  */
 469 static void
 470 store_vector4(const struct prog_instruction *inst,
 471               struct gl_program_machine *machine, const GLfloat value[4])
 472 {
 473    const struct prog_dst_register *dstReg = &(inst->DstReg);
 474    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 475    GLuint writeMask = dstReg->WriteMask;
 476    GLfloat clampedValue[4];
 477    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 478
 479 #if 0
 480    if (value[0] > 1.0e10 ||
 481        IS_INF_OR_NAN(value[0]) ||
 482        IS_INF_OR_NAN(value[1]) ||
 483        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 484       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 485 #endif
 486
 487    if (clamp) {
 488       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 489       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 490       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 491       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 492       value = clampedValue;
 493    }
 494
 495    if (dstReg->CondMask != COND_TR) {
 496       /* condition codes may turn off some writes */
 497       if (writeMask & WRITEMASK_X) {
 498          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 499                       dstReg->CondMask))
 500             writeMask &= ~WRITEMASK_X;
 501       }
 502       if (writeMask & WRITEMASK_Y) {
 503          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 504                       dstReg->CondMask))
 505             writeMask &= ~WRITEMASK_Y;
 506       }
 507       if (writeMask & WRITEMASK_Z) {
 508          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 509                       dstReg->CondMask))
 510             writeMask &= ~WRITEMASK_Z;
 511       }
 512       if (writeMask & WRITEMASK_W) {
 513          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 514                       dstReg->CondMask))
 515             writeMask &= ~WRITEMASK_W;
 516       }
 517    }
 518
 519    if (writeMask & WRITEMASK_X)
 520       dst[0] = value[0];
 521    if (writeMask & WRITEMASK_Y)
 522       dst[1] = value[1];
 523    if (writeMask & WRITEMASK_Z)
 524       dst[2] = value[2];
 525    if (writeMask & WRITEMASK_W)
 526       dst[3] = value[3];
 527
 528    if (inst->CondUpdate) {
 529       if (writeMask & WRITEMASK_X)
 530          machine->CondCodes[0] = generate_cc(value[0]);
 531       if (writeMask & WRITEMASK_Y)
 532          machine->CondCodes[1] = generate_cc(value[1]);
 533       if (writeMask & WRITEMASK_Z)
 534          machine->CondCodes[2] = generate_cc(value[2]);
 535       if (writeMask & WRITEMASK_W)
 536          machine->CondCodes[3] = generate_cc(value[3]);
 537 #if DEBUG_PROG
 538       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 539              _mesa_condcode_string(machine->CondCodes[0]),
 540              _mesa_condcode_string(machine->CondCodes[1]),
 541              _mesa_condcode_string(machine->CondCodes[2]),
 542              _mesa_condcode_string(machine->CondCodes[3]));
 543 #endif
 544    }
 545 }
 546
 547
 548 /**
 549  * Store 4 uints into a register.  Observe the set-condition-code flags.
 550  */
 551 static void
 552 store_vector4ui(const struct prog_instruction *inst,
 553                 struct gl_program_machine *machine, const GLuint value[4])
 554 {
 555    const struct prog_dst_register *dstReg = &(inst->DstReg);
 556    GLuint writeMask = dstReg->WriteMask;
 557    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 558
 559    if (dstReg->CondMask != COND_TR) {
 560       /* condition codes may turn off some writes */
 561       if (writeMask & WRITEMASK_X) {
 562          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 563                       dstReg->CondMask))
 564             writeMask &= ~WRITEMASK_X;
 565       }
 566       if (writeMask & WRITEMASK_Y) {
 567          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 568                       dstReg->CondMask))
 569             writeMask &= ~WRITEMASK_Y;
 570       }
 571       if (writeMask & WRITEMASK_Z) {
 572          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 573                       dstReg->CondMask))
 574             writeMask &= ~WRITEMASK_Z;
 575       }
 576       if (writeMask & WRITEMASK_W) {
 577          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 578                       dstReg->CondMask))
 579             writeMask &= ~WRITEMASK_W;
 580       }
 581    }
 582
 583    if (writeMask & WRITEMASK_X)
 584       dst[0] = value[0];
 585    if (writeMask & WRITEMASK_Y)
 586       dst[1] = value[1];
 587    if (writeMask & WRITEMASK_Z)
 588       dst[2] = value[2];
 589    if (writeMask & WRITEMASK_W)
 590       dst[3] = value[3];
 591
 592    if (inst->CondUpdate) {
 593       if (writeMask & WRITEMASK_X)
 594          machine->CondCodes[0] = generate_cc(value[0]);
 595       if (writeMask & WRITEMASK_Y)
 596          machine->CondCodes[1] = generate_cc(value[1]);
 597       if (writeMask & WRITEMASK_Z)
 598          machine->CondCodes[2] = generate_cc(value[2]);
 599       if (writeMask & WRITEMASK_W)
 600          machine->CondCodes[3] = generate_cc(value[3]);
 601 #if DEBUG_PROG
 602       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 603              _mesa_condcode_string(machine->CondCodes[0]),
 604              _mesa_condcode_string(machine->CondCodes[1]),
 605              _mesa_condcode_string(machine->CondCodes[2]),
 606              _mesa_condcode_string(machine->CondCodes[3]));
 607 #endif
 608    }
 609 }
 610
 611
 612
 613 /**
 614  * Execute the given vertex/fragment program.
 615  *
 616  * \param ctx  rendering context
 617  * \param program  the program to execute
 618  * \param machine  machine state (must be initialized)
 619  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 620  */
 621 GLboolean
 622 _mesa_execute_program(GLcontext * ctx,
 623                       const struct gl_program *program,
 624                       struct gl_program_machine *machine)
 625 {
 626    const GLuint numInst = program->NumInstructions;
 627    const GLuint maxExec = 10000;
 628    GLuint pc, numExec = 0;
 629
 630    machine->CurProgram = program;
 631
 632    if (DEBUG_PROG) {
 633       printf("execute program %u --------------------\n", program->Id);
 634    }
 635
 636 #if FEATURE_MESA_program_debug
 637    CurrentMachine = machine;
 638 #endif
 639
 640    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 641       machine->EnvParams = ctx->VertexProgram.Parameters;
 642    }
 643    else {
 644       machine->EnvParams = ctx->FragmentProgram.Parameters;
 645    }
 646
 647    for (pc = 0; pc < numInst; pc++) {
 648       const struct prog_instruction *inst = program->Instructions + pc;
 649
 650 #if FEATURE_MESA_program_debug
 651       if (ctx->FragmentProgram.CallbackEnabled &&
 652           ctx->FragmentProgram.Callback) {
 653          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 654          ctx->FragmentProgram.Callback(program->Target,
 655                                        ctx->FragmentProgram.CallbackData);
 656       }
 657 #endif
 658
 659       if (DEBUG_PROG) {
 660          _mesa_print_instruction(inst);
 661       }
 662
 663       switch (inst->Opcode) {
 664       case OPCODE_ABS:
 665          {
 666             GLfloat a[4], result[4];
 667             fetch_vector4(&inst->SrcReg[0], machine, a);
 668             result[0] = FABSF(a[0]);
 669             result[1] = FABSF(a[1]);
 670             result[2] = FABSF(a[2]);
 671             result[3] = FABSF(a[3]);
 672             store_vector4(inst, machine, result);
 673          }
 674          break;
 675       case OPCODE_ADD:
 676          {
 677             GLfloat a[4], b[4], result[4];
 678             fetch_vector4(&inst->SrcReg[0], machine, a);
 679             fetch_vector4(&inst->SrcReg[1], machine, b);
 680             result[0] = a[0] + b[0];
 681             result[1] = a[1] + b[1];
 682             result[2] = a[2] + b[2];
 683             result[3] = a[3] + b[3];
 684             store_vector4(inst, machine, result);
 685             if (DEBUG_PROG) {
 686                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 687                       result[0], result[1], result[2], result[3],
 688                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 689             }
 690          }
 691          break;
 692       case OPCODE_AND:     /* bitwise AND */
 693          {
 694             GLuint a[4], b[4], result[4];
 695             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 696             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 697             result[0] = a[0] & b[0];
 698             result[1] = a[1] & b[1];
 699             result[2] = a[2] & b[2];
 700             result[3] = a[3] & b[3];
 701             store_vector4ui(inst, machine, result);
 702          }
 703          break;
 704       case OPCODE_ARL:
 705          {
 706             GLfloat t[4];
 707             fetch_vector4(&inst->SrcReg[0], machine, t);
 708             machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
 709          }
 710          break;
 711       case OPCODE_BGNLOOP:
 712          /* no-op */
 713          break;
 714       case OPCODE_ENDLOOP:
 715          /* subtract 1 here since pc is incremented by for(pc) loop */
 716          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 717          break;
 718       case OPCODE_BGNSUB:      /* begin subroutine */
 719          break;
 720       case OPCODE_ENDSUB:      /* end subroutine */
 721          break;
 722       case OPCODE_BRA:         /* branch (conditional) */
 723          /* fall-through */
 724       case OPCODE_BRK:         /* break out of loop (conditional) */
 725          /* fall-through */
 726       case OPCODE_CONT:        /* continue loop (conditional) */
 727          if (eval_condition(machine, inst)) {
 728             /* take branch */
 729             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 730             pc = inst->BranchTarget - 1;
 731          }
 732          break;
 733       case OPCODE_CAL:         /* Call subroutine (conditional) */
 734          if (eval_condition(machine, inst)) {
 735             /* call the subroutine */
 736             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 737                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 738             }
 739             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 740             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 741             pc = inst->BranchTarget - 1;
 742          }
 743          break;
 744       case OPCODE_CMP:
 745          {
 746             GLfloat a[4], b[4], c[4], result[4];
 747             fetch_vector4(&inst->SrcReg[0], machine, a);
 748             fetch_vector4(&inst->SrcReg[1], machine, b);
 749             fetch_vector4(&inst->SrcReg[2], machine, c);
 750             result[0] = a[0] < 0.0F ? b[0] : c[0];
 751             result[1] = a[1] < 0.0F ? b[1] : c[1];
 752             result[2] = a[2] < 0.0F ? b[2] : c[2];
 753             result[3] = a[3] < 0.0F ? b[3] : c[3];
 754             store_vector4(inst, machine, result);
 755          }
 756          break;
 757       case OPCODE_COS:
 758          {
 759             GLfloat a[4], result[4];
 760             fetch_vector1(&inst->SrcReg[0], machine, a);
 761             result[0] = result[1] = result[2] = result[3]
 762                = (GLfloat) _mesa_cos(a[0]);
 763             store_vector4(inst, machine, result);
 764          }
 765          break;
 766       case OPCODE_DDX:         /* Partial derivative with respect to X */
 767          {
 768             GLfloat result[4];
 769             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 770                                 'X', result);
 771             store_vector4(inst, machine, result);
 772          }
 773          break;
 774       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 775          {
 776             GLfloat result[4];
 777             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 778                                 'Y', result);
 779             store_vector4(inst, machine, result);
 780          }
 781          break;
 782       case OPCODE_DP2:
 783          {
 784             GLfloat a[4], b[4], result[4];
 785             fetch_vector4(&inst->SrcReg[0], machine, a);
 786             fetch_vector4(&inst->SrcReg[1], machine, b);
 787             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 788             store_vector4(inst, machine, result);
 789             if (DEBUG_PROG) {
 790                printf("DP2 %g = (%g %g) . (%g %g)\n",
 791                       result[0], a[0], a[1], b[0], b[1]);
 792             }
 793          }
 794          break;
 795       case OPCODE_DP2A:
 796          {
 797             GLfloat a[4], b[4], c, result[4];
 798             fetch_vector4(&inst->SrcReg[0], machine, a);
 799             fetch_vector4(&inst->SrcReg[1], machine, b);
 800             fetch_vector1(&inst->SrcReg[1], machine, &c);
 801             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 802             store_vector4(inst, machine, result);
 803             if (DEBUG_PROG) {
 804                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 805                       result[0], a[0], a[1], b[0], b[1], c);
 806             }
 807          }
 808          break;
 809       case OPCODE_DP3:
 810          {
 811             GLfloat a[4], b[4], result[4];
 812             fetch_vector4(&inst->SrcReg[0], machine, a);
 813             fetch_vector4(&inst->SrcReg[1], machine, b);
 814             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 815             store_vector4(inst, machine, result);
 816             if (DEBUG_PROG) {
 817                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 818                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 819             }
 820          }
 821          break;
 822       case OPCODE_DP4:
 823          {
 824             GLfloat a[4], b[4], result[4];
 825             fetch_vector4(&inst->SrcReg[0], machine, a);
 826             fetch_vector4(&inst->SrcReg[1], machine, b);
 827             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 828             store_vector4(inst, machine, result);
 829             if (DEBUG_PROG) {
 830                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 831                       result[0], a[0], a[1], a[2], a[3],
 832                       b[0], b[1], b[2], b[3]);
 833             }
 834          }
 835          break;
 836       case OPCODE_DPH:
 837          {
 838             GLfloat a[4], b[4], result[4];
 839             fetch_vector4(&inst->SrcReg[0], machine, a);
 840             fetch_vector4(&inst->SrcReg[1], machine, b);
 841             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 842             store_vector4(inst, machine, result);
 843          }
 844          break;
 845       case OPCODE_DST:         /* Distance vector */
 846          {
 847             GLfloat a[4], b[4], result[4];
 848             fetch_vector4(&inst->SrcReg[0], machine, a);
 849             fetch_vector4(&inst->SrcReg[1], machine, b);
 850             result[0] = 1.0F;
 851             result[1] = a[1] * b[1];
 852             result[2] = a[2];
 853             result[3] = b[3];
 854             store_vector4(inst, machine, result);
 855          }
 856          break;
 857       case OPCODE_EXP:
 858          {
 859             GLfloat t[4], q[4], floor_t0;
 860             fetch_vector1(&inst->SrcReg[0], machine, t);
 861             floor_t0 = FLOORF(t[0]);
 862             if (floor_t0 > FLT_MAX_EXP) {
 863                SET_POS_INFINITY(q[0]);
 864                SET_POS_INFINITY(q[2]);
 865             }
 866             else if (floor_t0 < FLT_MIN_EXP) {
 867                q[0] = 0.0F;
 868                q[2] = 0.0F;
 869             }
 870             else {
 871                q[0] = LDEXPF(1.0, (int) floor_t0);
 872                /* Note: GL_NV_vertex_program expects
 873                 * result.z = result.x * APPX(result.y)
 874                 * We do what the ARB extension says.
 875                 */
 876                q[2] = (GLfloat) pow(2.0, t[0]);
 877             }
 878             q[1] = t[0] - floor_t0;
 879             q[3] = 1.0F;
 880             store_vector4( inst, machine, q );
 881          }
 882          break;
 883       case OPCODE_EX2:         /* Exponential base 2 */
 884          {
 885             GLfloat a[4], result[4];
 886             fetch_vector1(&inst->SrcReg[0], machine, a);
 887             result[0] = result[1] = result[2] = result[3] =
 888                (GLfloat) _mesa_pow(2.0, a[0]);
 889             store_vector4(inst, machine, result);
 890          }
 891          break;
 892       case OPCODE_FLR:
 893          {
 894             GLfloat a[4], result[4];
 895             fetch_vector4(&inst->SrcReg[0], machine, a);
 896             result[0] = FLOORF(a[0]);
 897             result[1] = FLOORF(a[1]);
 898             result[2] = FLOORF(a[2]);
 899             result[3] = FLOORF(a[3]);
 900             store_vector4(inst, machine, result);
 901          }
 902          break;
 903       case OPCODE_FRC:
 904          {
 905             GLfloat a[4], result[4];
 906             fetch_vector4(&inst->SrcReg[0], machine, a);
 907             result[0] = a[0] - FLOORF(a[0]);
 908             result[1] = a[1] - FLOORF(a[1]);
 909             result[2] = a[2] - FLOORF(a[2]);
 910             result[3] = a[3] - FLOORF(a[3]);
 911             store_vector4(inst, machine, result);
 912          }
 913          break;
 914       case OPCODE_IF:
 915          {
 916             GLboolean cond;
 917             /* eval condition */
 918             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 919                GLfloat a[4];
 920                fetch_vector1(&inst->SrcReg[0], machine, a);
 921                cond = (a[0] != 0.0);
 922             }
 923             else {
 924                cond = eval_condition(machine, inst);
 925             }
 926             if (DEBUG_PROG) {
 927                printf("IF: %d\n", cond);
 928             }
 929             /* do if/else */
 930             if (cond) {
 931                /* do if-clause (just continue execution) */
 932             }
 933             else {
 934                /* go to the instruction after ELSE or ENDIF */
 935                assert(inst->BranchTarget >= 0);
 936                pc = inst->BranchTarget - 1;
 937             }
 938          }
 939          break;
 940       case OPCODE_ELSE:
 941          /* goto ENDIF */
 942          assert(inst->BranchTarget >= 0);
 943          pc = inst->BranchTarget - 1;
 944          break;
 945       case OPCODE_ENDIF:
 946          /* nothing */
 947          break;
 948       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 949          if (eval_condition(machine, inst)) {
 950             return GL_FALSE;
 951          }
 952          break;
 953       case OPCODE_KIL:         /* ARB_f_p only */
 954          {
 955             GLfloat a[4];
 956             fetch_vector4(&inst->SrcReg[0], machine, a);
 957             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 958                return GL_FALSE;
 959             }
 960          }
 961          break;
 962       case OPCODE_LG2:         /* log base 2 */
 963          {
 964             GLfloat a[4], result[4];
 965             fetch_vector1(&inst->SrcReg[0], machine, a);
 966             result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
 967             store_vector4(inst, machine, result);
 968          }
 969          break;
 970       case OPCODE_LIT:
 971          {
 972             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 973             GLfloat a[4], result[4];
 974             fetch_vector4(&inst->SrcReg[0], machine, a);
 975             a[0] = MAX2(a[0], 0.0F);
 976             a[1] = MAX2(a[1], 0.0F);
 977             /* XXX ARB version clamps a[3], NV version doesn't */
 978             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 979             result[0] = 1.0F;
 980             result[1] = a[0];
 981             /* XXX we could probably just use pow() here */
 982             if (a[0] > 0.0F) {
 983                if (a[1] == 0.0 && a[3] == 0.0)
 984                   result[2] = 1.0;
 985                else
 986                   result[2] = EXPF(a[3] * LOGF(a[1]));
 987             }
 988             else {
 989                result[2] = 0.0;
 990             }
 991             result[3] = 1.0F;
 992             store_vector4(inst, machine, result);
 993             if (DEBUG_PROG) {
 994                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 995                       result[0], result[1], result[2], result[3],
 996                       a[0], a[1], a[2], a[3]);
 997             }
 998          }
 999          break;
1000       case OPCODE_LOG:
1001          {
1002             GLfloat t[4], q[4], abs_t0;
1003             fetch_vector1(&inst->SrcReg[0], machine, t);
1004             abs_t0 = FABSF(t[0]);
1005             if (abs_t0 != 0.0F) {
1006                /* Since we really can't handle infinite values on VMS
1007                 * like other OSes we'll use __MAXFLOAT to represent
1008                 * infinity.  This may need some tweaking.
1009                 */
1010 #ifdef VMS
1011                if (abs_t0 == __MAXFLOAT)
1012 #else
1013                if (IS_INF_OR_NAN(abs_t0))
1014 #endif
1015                {
1016                   SET_POS_INFINITY(q[0]);
1017                   q[1] = 1.0F;
1018                   SET_POS_INFINITY(q[2]);
1019                }
1020                else {
1021                   int exponent;
1022                   GLfloat mantissa = FREXPF(t[0], &exponent);
1023                   q[0] = (GLfloat) (exponent - 1);
1024                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1025                   q[2] = (GLfloat) (q[0] + LOG2(q[1]));
1026                }
1027             }
1028             else {
1029                SET_NEG_INFINITY(q[0]);
1030                q[1] = 1.0F;
1031                SET_NEG_INFINITY(q[2]);
1032             }
1033             q[3] = 1.0;
1034             store_vector4(inst, machine, q);
1035          }
1036          break;
1037       case OPCODE_LRP:
1038          {
1039             GLfloat a[4], b[4], c[4], result[4];
1040             fetch_vector4(&inst->SrcReg[0], machine, a);
1041             fetch_vector4(&inst->SrcReg[1], machine, b);
1042             fetch_vector4(&inst->SrcReg[2], machine, c);
1043             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1044             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1045             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1046             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1047             store_vector4(inst, machine, result);
1048             if (DEBUG_PROG) {
1049                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1050                       "(%g %g %g %g), (%g %g %g %g)\n",
1051                       result[0], result[1], result[2], result[3],
1052                       a[0], a[1], a[2], a[3],
1053                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1054             }
1055          }
1056          break;
1057       case OPCODE_MAD:
1058          {
1059             GLfloat a[4], b[4], c[4], result[4];
1060             fetch_vector4(&inst->SrcReg[0], machine, a);
1061             fetch_vector4(&inst->SrcReg[1], machine, b);
1062             fetch_vector4(&inst->SrcReg[2], machine, c);
1063             result[0] = a[0] * b[0] + c[0];
1064             result[1] = a[1] * b[1] + c[1];
1065             result[2] = a[2] * b[2] + c[2];
1066             result[3] = a[3] * b[3] + c[3];
1067             store_vector4(inst, machine, result);
1068             if (DEBUG_PROG) {
1069                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1070                       "(%g %g %g %g) + (%g %g %g %g)\n",
1071                       result[0], result[1], result[2], result[3],
1072                       a[0], a[1], a[2], a[3],
1073                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1074             }
1075          }
1076          break;
1077       case OPCODE_MAX:
1078          {
1079             GLfloat a[4], b[4], result[4];
1080             fetch_vector4(&inst->SrcReg[0], machine, a);
1081             fetch_vector4(&inst->SrcReg[1], machine, b);
1082             result[0] = MAX2(a[0], b[0]);
1083             result[1] = MAX2(a[1], b[1]);
1084             result[2] = MAX2(a[2], b[2]);
1085             result[3] = MAX2(a[3], b[3]);
1086             store_vector4(inst, machine, result);
1087             if (DEBUG_PROG) {
1088                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1089                       result[0], result[1], result[2], result[3],
1090                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1091             }
1092          }
1093          break;
1094       case OPCODE_MIN:
1095          {
1096             GLfloat a[4], b[4], result[4];
1097             fetch_vector4(&inst->SrcReg[0], machine, a);
1098             fetch_vector4(&inst->SrcReg[1], machine, b);
1099             result[0] = MIN2(a[0], b[0]);
1100             result[1] = MIN2(a[1], b[1]);
1101             result[2] = MIN2(a[2], b[2]);
1102             result[3] = MIN2(a[3], b[3]);
1103             store_vector4(inst, machine, result);
1104          }
1105          break;
1106       case OPCODE_MOV:
1107          {
1108             GLfloat result[4];
1109             fetch_vector4(&inst->SrcReg[0], machine, result);
1110             store_vector4(inst, machine, result);
1111             if (DEBUG_PROG) {
1112                printf("MOV (%g %g %g %g)\n",
1113                       result[0], result[1], result[2], result[3]);
1114             }
1115          }
1116          break;
1117       case OPCODE_MUL:
1118          {
1119             GLfloat a[4], b[4], result[4];
1120             fetch_vector4(&inst->SrcReg[0], machine, a);
1121             fetch_vector4(&inst->SrcReg[1], machine, b);
1122             result[0] = a[0] * b[0];
1123             result[1] = a[1] * b[1];
1124             result[2] = a[2] * b[2];
1125             result[3] = a[3] * b[3];
1126             store_vector4(inst, machine, result);
1127             if (DEBUG_PROG) {
1128                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1129                       result[0], result[1], result[2], result[3],
1130                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1131             }
1132          }
1133          break;
1134       case OPCODE_NOISE1:
1135          {
1136             GLfloat a[4], result[4];
1137             fetch_vector1(&inst->SrcReg[0], machine, a);
1138             result[0] =
1139                result[1] =
1140                result[2] = result[3] = _slang_library_noise1(a[0]);
1141             store_vector4(inst, machine, result);
1142          }
1143          break;
1144       case OPCODE_NOISE2:
1145          {
1146             GLfloat a[4], result[4];
1147             fetch_vector4(&inst->SrcReg[0], machine, a);
1148             result[0] =
1149                result[1] =
1150                result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1151             store_vector4(inst, machine, result);
1152          }
1153          break;
1154       case OPCODE_NOISE3:
1155          {
1156             GLfloat a[4], result[4];
1157             fetch_vector4(&inst->SrcReg[0], machine, a);
1158             result[0] =
1159                result[1] =
1160                result[2] =
1161                result[3] = _slang_library_noise3(a[0], a[1], a[2]);
1162             store_vector4(inst, machine, result);
1163          }
1164          break;
1165       case OPCODE_NOISE4:
1166          {
1167             GLfloat a[4], result[4];
1168             fetch_vector4(&inst->SrcReg[0], machine, a);
1169             result[0] =
1170                result[1] =
1171                result[2] =
1172                result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
1173             store_vector4(inst, machine, result);
1174          }
1175          break;
1176       case OPCODE_NOP:
1177          break;
1178       case OPCODE_NOT:         /* bitwise NOT */
1179          {
1180             GLuint a[4], result[4];
1181             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1182             result[0] = ~a[0];
1183             result[1] = ~a[1];
1184             result[2] = ~a[2];
1185             result[3] = ~a[3];
1186             store_vector4ui(inst, machine, result);
1187          }
1188          break;
1189       case OPCODE_NRM3:        /* 3-component normalization */
1190          {
1191             GLfloat a[4], result[4];
1192             GLfloat tmp;
1193             fetch_vector4(&inst->SrcReg[0], machine, a);
1194             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1195             if (tmp != 0.0F)
1196                tmp = INV_SQRTF(tmp);
1197             result[0] = tmp * a[0];
1198             result[1] = tmp * a[1];
1199             result[2] = tmp * a[2];
1200             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1201             store_vector4(inst, machine, result);
1202          }
1203          break;
1204       case OPCODE_NRM4:        /* 4-component normalization */
1205          {
1206             GLfloat a[4], result[4];
1207             GLfloat tmp;
1208             fetch_vector4(&inst->SrcReg[0], machine, a);
1209             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1210             if (tmp != 0.0F)
1211                tmp = INV_SQRTF(tmp);
1212             result[0] = tmp * a[0];
1213             result[1] = tmp * a[1];
1214             result[2] = tmp * a[2];
1215             result[3] = tmp * a[3];
1216             store_vector4(inst, machine, result);
1217          }
1218          break;
1219       case OPCODE_OR:          /* bitwise OR */
1220          {
1221             GLuint a[4], b[4], result[4];
1222             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1223             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1224             result[0] = a[0] | b[0];
1225             result[1] = a[1] | b[1];
1226             result[2] = a[2] | b[2];
1227             result[3] = a[3] | b[3];
1228             store_vector4ui(inst, machine, result);
1229          }
1230          break;
1231       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1232          {
1233             GLfloat a[4];
1234             GLuint result[4];
1235             GLhalfNV hx, hy;
1236             fetch_vector4(&inst->SrcReg[0], machine, a);
1237             hx = _mesa_float_to_half(a[0]);
1238             hy = _mesa_float_to_half(a[1]);
1239             result[0] =
1240             result[1] =
1241             result[2] =
1242             result[3] = hx | (hy << 16);
1243             store_vector4ui(inst, machine, result);
1244          }
1245          break;
1246       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1247          {
1248             GLfloat a[4];
1249             GLuint result[4], usx, usy;
1250             fetch_vector4(&inst->SrcReg[0], machine, a);
1251             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1252             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1253             usx = IROUND(a[0] * 65535.0F);
1254             usy = IROUND(a[1] * 65535.0F);
1255             result[0] =
1256             result[1] =
1257             result[2] =
1258             result[3] = usx | (usy << 16);
1259             store_vector4ui(inst, machine, result);
1260          }
1261          break;
1262       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1263          {
1264             GLfloat a[4];
1265             GLuint result[4], ubx, uby, ubz, ubw;
1266             fetch_vector4(&inst->SrcReg[0], machine, a);
1267             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1268             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1269             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1270             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1271             ubx = IROUND(127.0F * a[0] + 128.0F);
1272             uby = IROUND(127.0F * a[1] + 128.0F);
1273             ubz = IROUND(127.0F * a[2] + 128.0F);
1274             ubw = IROUND(127.0F * a[3] + 128.0F);
1275             result[0] =
1276             result[1] =
1277             result[2] =
1278             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1279             store_vector4ui(inst, machine, result);
1280          }
1281          break;
1282       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1283          {
1284             GLfloat a[4];
1285             GLuint result[4], ubx, uby, ubz, ubw;
1286             fetch_vector4(&inst->SrcReg[0], machine, a);
1287             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1288             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1289             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1290             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1291             ubx = IROUND(255.0F * a[0]);
1292             uby = IROUND(255.0F * a[1]);
1293             ubz = IROUND(255.0F * a[2]);
1294             ubw = IROUND(255.0F * a[3]);
1295             result[0] =
1296             result[1] =
1297             result[2] =
1298             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1299             store_vector4ui(inst, machine, result);
1300          }
1301          break;
1302       case OPCODE_POW:
1303          {
1304             GLfloat a[4], b[4], result[4];
1305             fetch_vector1(&inst->SrcReg[0], machine, a);
1306             fetch_vector1(&inst->SrcReg[1], machine, b);
1307             result[0] = result[1] = result[2] = result[3]
1308                = (GLfloat) _mesa_pow(a[0], b[0]);
1309             store_vector4(inst, machine, result);
1310          }
1311          break;
1312       case OPCODE_RCP:
1313          {
1314             GLfloat a[4], result[4];
1315             fetch_vector1(&inst->SrcReg[0], machine, a);
1316             if (DEBUG_PROG) {
1317                if (a[0] == 0)
1318                   printf("RCP(0)\n");
1319                else if (IS_INF_OR_NAN(a[0]))
1320                   printf("RCP(inf)\n");
1321             }
1322             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1323             store_vector4(inst, machine, result);
1324          }
1325          break;
1326       case OPCODE_RET:         /* return from subroutine (conditional) */
1327          if (eval_condition(machine, inst)) {
1328             if (machine->StackDepth == 0) {
1329                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1330             }
1331             /* subtract one because of pc++ in the for loop */
1332             pc = machine->CallStack[--machine->StackDepth] - 1;
1333          }
1334          break;
1335       case OPCODE_RFL:         /* reflection vector */
1336          {
1337             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1338             fetch_vector4(&inst->SrcReg[0], machine, axis);
1339             fetch_vector4(&inst->SrcReg[1], machine, dir);
1340             tmpW = DOT3(axis, axis);
1341             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1342             result[0] = tmpX * axis[0] - dir[0];
1343             result[1] = tmpX * axis[1] - dir[1];
1344             result[2] = tmpX * axis[2] - dir[2];
1345             /* result[3] is never written! XXX enforce in parser! */
1346             store_vector4(inst, machine, result);
1347          }
1348          break;
1349       case OPCODE_RSQ:         /* 1 / sqrt() */
1350          {
1351             GLfloat a[4], result[4];
1352             fetch_vector1(&inst->SrcReg[0], machine, a);
1353             a[0] = FABSF(a[0]);
1354             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1355             store_vector4(inst, machine, result);
1356             if (DEBUG_PROG) {
1357                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1358             }
1359          }
1360          break;
1361       case OPCODE_SCS:         /* sine and cos */
1362          {
1363             GLfloat a[4], result[4];
1364             fetch_vector1(&inst->SrcReg[0], machine, a);
1365             result[0] = (GLfloat) _mesa_cos(a[0]);
1366             result[1] = (GLfloat) _mesa_sin(a[0]);
1367             result[2] = 0.0;    /* undefined! */
1368             result[3] = 0.0;    /* undefined! */
1369             store_vector4(inst, machine, result);
1370          }
1371          break;
1372       case OPCODE_SEQ:         /* set on equal */
1373          {
1374             GLfloat a[4], b[4], result[4];
1375             fetch_vector4(&inst->SrcReg[0], machine, a);
1376             fetch_vector4(&inst->SrcReg[1], machine, b);
1377             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1378             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1379             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1380             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1381             store_vector4(inst, machine, result);
1382             if (DEBUG_PROG) {
1383                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1384                       result[0], result[1], result[2], result[3],
1385                       a[0], a[1], a[2], a[3],
1386                       b[0], b[1], b[2], b[3]);
1387             }
1388          }
1389          break;
1390       case OPCODE_SFL:         /* set false, operands ignored */
1391          {
1392             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1393             store_vector4(inst, machine, result);
1394          }
1395          break;
1396       case OPCODE_SGE:         /* set on greater or equal */
1397          {
1398             GLfloat a[4], b[4], result[4];
1399             fetch_vector4(&inst->SrcReg[0], machine, a);
1400             fetch_vector4(&inst->SrcReg[1], machine, b);
1401             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1402             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1403             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1404             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1405             store_vector4(inst, machine, result);
1406             if (DEBUG_PROG) {
1407                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1408                       result[0], result[1], result[2], result[3],
1409                       a[0], a[1], a[2], a[3],
1410                       b[0], b[1], b[2], b[3]);
1411             }
1412          }
1413          break;
1414       case OPCODE_SGT:         /* set on greater */
1415          {
1416             GLfloat a[4], b[4], result[4];
1417             fetch_vector4(&inst->SrcReg[0], machine, a);
1418             fetch_vector4(&inst->SrcReg[1], machine, b);
1419             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1420             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1421             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1422             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1423             store_vector4(inst, machine, result);
1424             if (DEBUG_PROG) {
1425                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1426                       result[0], result[1], result[2], result[3],
1427                       a[0], a[1], a[2], a[3],
1428                       b[0], b[1], b[2], b[3]);
1429             }
1430          }
1431          break;
1432       case OPCODE_SIN:
1433          {
1434             GLfloat a[4], result[4];
1435             fetch_vector1(&inst->SrcReg[0], machine, a);
1436             result[0] = result[1] = result[2] = result[3]
1437                = (GLfloat) _mesa_sin(a[0]);
1438             store_vector4(inst, machine, result);
1439          }
1440          break;
1441       case OPCODE_SLE:         /* set on less or equal */
1442          {
1443             GLfloat a[4], b[4], result[4];
1444             fetch_vector4(&inst->SrcReg[0], machine, a);
1445             fetch_vector4(&inst->SrcReg[1], machine, b);
1446             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1447             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1448             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1449             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1450             store_vector4(inst, machine, result);
1451             if (DEBUG_PROG) {
1452                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1453                       result[0], result[1], result[2], result[3],
1454                       a[0], a[1], a[2], a[3],
1455                       b[0], b[1], b[2], b[3]);
1456             }
1457          }
1458          break;
1459       case OPCODE_SLT:         /* set on less */
1460          {
1461             GLfloat a[4], b[4], result[4];
1462             fetch_vector4(&inst->SrcReg[0], machine, a);
1463             fetch_vector4(&inst->SrcReg[1], machine, b);
1464             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1465             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1466             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1467             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1468             store_vector4(inst, machine, result);
1469             if (DEBUG_PROG) {
1470                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1471                       result[0], result[1], result[2], result[3],
1472                       a[0], a[1], a[2], a[3],
1473                       b[0], b[1], b[2], b[3]);
1474             }
1475          }
1476          break;
1477       case OPCODE_SNE:         /* set on not equal */
1478          {
1479             GLfloat a[4], b[4], result[4];
1480             fetch_vector4(&inst->SrcReg[0], machine, a);
1481             fetch_vector4(&inst->SrcReg[1], machine, b);
1482             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1483             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1484             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1485             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1486             store_vector4(inst, machine, result);
1487             if (DEBUG_PROG) {
1488                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1489                       result[0], result[1], result[2], result[3],
1490                       a[0], a[1], a[2], a[3],
1491                       b[0], b[1], b[2], b[3]);
1492             }
1493          }
1494          break;
1495       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1496          {
1497             GLfloat a[4], result[4];
1498             fetch_vector4(&inst->SrcReg[0], machine, a);
1499             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1500             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1501             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1502             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1503             store_vector4(inst, machine, result);
1504          }
1505          break;
1506       case OPCODE_STR:         /* set true, operands ignored */
1507          {
1508             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1509             store_vector4(inst, machine, result);
1510          }
1511          break;
1512       case OPCODE_SUB:
1513          {
1514             GLfloat a[4], b[4], result[4];
1515             fetch_vector4(&inst->SrcReg[0], machine, a);
1516             fetch_vector4(&inst->SrcReg[1], machine, b);
1517             result[0] = a[0] - b[0];
1518             result[1] = a[1] - b[1];
1519             result[2] = a[2] - b[2];
1520             result[3] = a[3] - b[3];
1521             store_vector4(inst, machine, result);
1522             if (DEBUG_PROG) {
1523                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1524                       result[0], result[1], result[2], result[3],
1525                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1526             }
1527          }
1528          break;
1529       case OPCODE_SWZ:         /* extended swizzle */
1530          {
1531             const struct prog_src_register *source = &inst->SrcReg[0];
1532             const GLfloat *src = get_src_register_pointer(source, machine);
1533             GLfloat result[4];
1534             GLuint i;
1535             for (i = 0; i < 4; i++) {
1536                const GLuint swz = GET_SWZ(source->Swizzle, i);
1537                if (swz == SWIZZLE_ZERO)
1538                   result[i] = 0.0;
1539                else if (swz == SWIZZLE_ONE)
1540                   result[i] = 1.0;
1541                else {
1542                   ASSERT(swz >= 0);
1543                   ASSERT(swz <= 3);
1544                   result[i] = src[swz];
1545                }
1546                if (source->NegateBase & (1 << i))
1547                   result[i] = -result[i];
1548             }
1549             store_vector4(inst, machine, result);
1550          }
1551          break;
1552       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1553          /* Simple texel lookup */
1554          {
1555             GLfloat texcoord[4], color[4];
1556             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1557
1558             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1559
1560             if (DEBUG_PROG) {
1561                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1562                       color[0], color[1], color[2], color[3],
1563                       inst->TexSrcUnit,
1564                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1565             }
1566             store_vector4(inst, machine, color);
1567          }
1568          break;
1569       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1570          /* Texel lookup with LOD bias */
1571          {
1572             const struct gl_texture_unit *texUnit
1573                = &ctx->Texture.Unit[inst->TexSrcUnit];
1574             GLfloat texcoord[4], color[4], lodBias;
1575
1576             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1577
1578             /* texcoord[3] is the bias to add to lambda */
1579             lodBias = texUnit->LodBias + texcoord[3];
1580             if (texUnit->_Current) {
1581                lodBias += texUnit->_Current->LodBias;
1582             }
1583
1584             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1585
1586             store_vector4(inst, machine, color);
1587          }
1588          break;
1589       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1590          /* Texture lookup w/ partial derivatives for LOD */
1591          {
1592             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1593             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1594             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1595             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1596             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1597                                      0.0, /* lodBias */
1598                                      inst->TexSrcUnit, color);
1599             store_vector4(inst, machine, color);
1600          }
1601          break;
1602       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1603          /* Texture lookup w/ projective divide */
1604          {
1605             GLfloat texcoord[4], color[4];
1606
1607             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1608             /* Not so sure about this test - if texcoord[3] is
1609              * zero, we'd probably be fine except for an ASSERT in
1610              * IROUND_POS() which gets triggered by the inf values created.
1611              */
1612             if (texcoord[3] != 0.0) {
1613                texcoord[0] /= texcoord[3];
1614                texcoord[1] /= texcoord[3];
1615                texcoord[2] /= texcoord[3];
1616             }
1617
1618             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1619
1620             store_vector4(inst, machine, color);
1621          }
1622          break;
1623       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1624          /* Texture lookup w/ projective divide, as above, but do not
1625           * do the divide by w if sampling from a cube map.
1626           */
1627          {
1628             GLfloat texcoord[4], color[4];
1629
1630             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1631             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1632                 texcoord[3] != 0.0) {
1633                texcoord[0] /= texcoord[3];
1634                texcoord[1] /= texcoord[3];
1635                texcoord[2] /= texcoord[3];
1636             }
1637
1638             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1639
1640             store_vector4(inst, machine, color);
1641          }
1642          break;
1643       case OPCODE_TRUNC:       /* truncate toward zero */
1644          {
1645             GLfloat a[4], result[4];
1646             fetch_vector4(&inst->SrcReg[0], machine, a);
1647             result[0] = (GLfloat) (GLint) a[0];
1648             result[1] = (GLfloat) (GLint) a[1];
1649             result[2] = (GLfloat) (GLint) a[2];
1650             result[3] = (GLfloat) (GLint) a[3];
1651             store_vector4(inst, machine, result);
1652          }
1653          break;
1654       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1655          {
1656             GLfloat a[4], result[4];
1657             const GLuint *rawBits = (const GLuint *) a;
1658             GLhalfNV hx, hy;
1659             fetch_vector1(&inst->SrcReg[0], machine, a);
1660             hx = rawBits[0] & 0xffff;
1661             hy = rawBits[0] >> 16;
1662             result[0] = result[2] = _mesa_half_to_float(hx);
1663             result[1] = result[3] = _mesa_half_to_float(hy);
1664             store_vector4(inst, machine, result);
1665          }
1666          break;
1667       case OPCODE_UP2US:       /* unpack two GLushorts */
1668          {
1669             GLfloat a[4], result[4];
1670             const GLuint *rawBits = (const GLuint *) a;
1671             GLushort usx, usy;
1672             fetch_vector1(&inst->SrcReg[0], machine, a);
1673             usx = rawBits[0] & 0xffff;
1674             usy = rawBits[0] >> 16;
1675             result[0] = result[2] = usx * (1.0f / 65535.0f);
1676             result[1] = result[3] = usy * (1.0f / 65535.0f);
1677             store_vector4(inst, machine, result);
1678          }
1679          break;
1680       case OPCODE_UP4B:        /* unpack four GLbytes */
1681          {
1682             GLfloat a[4], result[4];
1683             const GLuint *rawBits = (const GLuint *) a;
1684             fetch_vector1(&inst->SrcReg[0], machine, a);
1685             result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1686             result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1687             result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1688             result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1689             store_vector4(inst, machine, result);
1690          }
1691          break;
1692       case OPCODE_UP4UB:       /* unpack four GLubytes */
1693          {
1694             GLfloat a[4], result[4];
1695             const GLuint *rawBits = (const GLuint *) a;
1696             fetch_vector1(&inst->SrcReg[0], machine, a);
1697             result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1698             result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1699             result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1700             result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1701             store_vector4(inst, machine, result);
1702          }
1703          break;
1704       case OPCODE_XOR:         /* bitwise XOR */
1705          {
1706             GLuint a[4], b[4], result[4];
1707             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1708             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1709             result[0] = a[0] ^ b[0];
1710             result[1] = a[1] ^ b[1];
1711             result[2] = a[2] ^ b[2];
1712             result[3] = a[3] ^ b[3];
1713             store_vector4ui(inst, machine, result);
1714          }
1715          break;
1716       case OPCODE_XPD:         /* cross product */
1717          {
1718             GLfloat a[4], b[4], result[4];
1719             fetch_vector4(&inst->SrcReg[0], machine, a);
1720             fetch_vector4(&inst->SrcReg[1], machine, b);
1721             result[0] = a[1] * b[2] - a[2] * b[1];
1722             result[1] = a[2] * b[0] - a[0] * b[2];
1723             result[2] = a[0] * b[1] - a[1] * b[0];
1724             result[3] = 1.0;
1725             store_vector4(inst, machine, result);
1726             if (DEBUG_PROG) {
1727                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1728                       result[0], result[1], result[2], result[3],
1729                       a[0], a[1], a[2], b[0], b[1], b[2]);
1730             }
1731          }
1732          break;
1733       case OPCODE_X2D:         /* 2-D matrix transform */
1734          {
1735             GLfloat a[4], b[4], c[4], result[4];
1736             fetch_vector4(&inst->SrcReg[0], machine, a);
1737             fetch_vector4(&inst->SrcReg[1], machine, b);
1738             fetch_vector4(&inst->SrcReg[2], machine, c);
1739             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1740             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1741             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1742             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1743             store_vector4(inst, machine, result);
1744          }
1745          break;
1746       case OPCODE_PRINT:
1747          {
1748             if (inst->SrcReg[0].File != -1) {
1749                GLfloat a[4];
1750                fetch_vector4(&inst->SrcReg[0], machine, a);
1751                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1752                             a[0], a[1], a[2], a[3]);
1753             }
1754             else {
1755                _mesa_printf("%s\n", (const char *) inst->Data);
1756             }
1757          }
1758          break;
1759       case OPCODE_END:
1760          return GL_TRUE;
1761       default:
1762          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1763                        inst->Opcode);
1764          return GL_TRUE;        /* return value doesn't matter */
1765       }
1766
1767       numExec++;
1768       if (numExec > maxExec) {
1769          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1770          return GL_TRUE;
1771       }
1772
1773    } /* for pc */
1774
1775 #if FEATURE_MESA_program_debug
1776    CurrentMachine = NULL;
1777 #endif
1778
1779    return GL_TRUE;
1780 }