src/mesa/program/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "c99_math.h"
  39 #include "main/errors.h"
  40 #include "main/glheader.h"
  41 #include "main/macros.h"
  42 #include "main/mtypes.h"
  43 #include "prog_execute.h"
  44 #include "prog_instruction.h"
  45 #include "prog_parameter.h"
  46 #include "prog_print.h"
  47 #include "prog_noise.h"
  48
  49
  50 /* debug predicate */
  51 #define DEBUG_PROG 0
  52
  53
  54 /**
  55  * Set x to positive or negative infinity.
  56  */
  57 #define SET_POS_INFINITY(x)                  \
  58    do {                                      \
  59          fi_type fi;                         \
  60          fi.i = 0x7F800000;                  \
  61          x = fi.f;                           \
  62    } while (0)
  63 #define SET_NEG_INFINITY(x)                  \
  64    do {                                      \
  65          fi_type fi;                         \
  66          fi.i = 0xFF800000;                  \
  67          x = fi.f;                           \
  68    } while (0)
  69
  70 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  71
  72
  73 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  74
  75
  76 /**
  77  * Return a pointer to the 4-element float vector specified by the given
  78  * source register.
  79  */
  80 static inline const GLfloat *
  81 get_src_register_pointer(const struct prog_src_register *source,
  82                          const struct gl_program_machine *machine)
  83 {
  84    const struct gl_program *prog = machine->CurProgram;
  85    GLint reg = source->Index;
  86
  87    if (source->RelAddr) {
  88       /* add address register value to src index/offset */
  89       reg += machine->AddressReg[0][0];
  90       if (reg < 0) {
  91          return ZeroVec;
  92       }
  93    }
  94
  95    switch (source->File) {
  96    case PROGRAM_TEMPORARY:
  97       if (reg >= MAX_PROGRAM_TEMPS)
  98          return ZeroVec;
  99       return machine->Temporaries[reg];
 100
 101    case PROGRAM_INPUT:
 102       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 103          if (reg >= VERT_ATTRIB_MAX)
 104             return ZeroVec;
 105          return machine->VertAttribs[reg];
 106       }
 107       else {
 108          if (reg >= VARYING_SLOT_MAX)
 109             return ZeroVec;
 110          return machine->Attribs[reg][machine->CurElement];
 111       }
 112
 113    case PROGRAM_OUTPUT:
 114       if (reg >= MAX_PROGRAM_OUTPUTS)
 115          return ZeroVec;
 116       return machine->Outputs[reg];
 117
 118    case PROGRAM_STATE_VAR:
 119       /* Fallthrough */
 120    case PROGRAM_CONSTANT:
 121       /* Fallthrough */
 122    case PROGRAM_UNIFORM: {
 123       if (reg >= (GLint) prog->Parameters->NumParameters)
 124          return ZeroVec;
 125
 126       unsigned pvo = prog->Parameters->ParameterValueOffset[reg];
 127       return (GLfloat *) prog->Parameters->ParameterValues + pvo;
 128    }
 129    case PROGRAM_SYSTEM_VALUE:
 130       assert(reg < (GLint) ARRAY_SIZE(machine->SystemValues));
 131       return machine->SystemValues[reg];
 132
 133    default:
 134       _mesa_problem(NULL,
 135          "Invalid src register file %d in get_src_register_pointer()",
 136          source->File);
 137       return ZeroVec;
 138    }
 139 }
 140
 141
 142 /**
 143  * Return a pointer to the 4-element float vector specified by the given
 144  * destination register.
 145  */
 146 static inline GLfloat *
 147 get_dst_register_pointer(const struct prog_dst_register *dest,
 148                          struct gl_program_machine *machine)
 149 {
 150    static GLfloat dummyReg[4];
 151    GLint reg = dest->Index;
 152
 153    if (dest->RelAddr) {
 154       /* add address register value to src index/offset */
 155       reg += machine->AddressReg[0][0];
 156       if (reg < 0) {
 157          return dummyReg;
 158       }
 159    }
 160
 161    switch (dest->File) {
 162    case PROGRAM_TEMPORARY:
 163       if (reg >= MAX_PROGRAM_TEMPS)
 164          return dummyReg;
 165       return machine->Temporaries[reg];
 166
 167    case PROGRAM_OUTPUT:
 168       if (reg >= MAX_PROGRAM_OUTPUTS)
 169          return dummyReg;
 170       return machine->Outputs[reg];
 171
 172    default:
 173       _mesa_problem(NULL,
 174          "Invalid dest register file %d in get_dst_register_pointer()",
 175          dest->File);
 176       return dummyReg;
 177    }
 178 }
 179
 180
 181
 182 /**
 183  * Fetch a 4-element float vector from the given source register.
 184  * Apply swizzling and negating as needed.
 185  */
 186 static void
 187 fetch_vector4(const struct prog_src_register *source,
 188               const struct gl_program_machine *machine, GLfloat result[4])
 189 {
 190    const GLfloat *src = get_src_register_pointer(source, machine);
 191
 192    if (source->Swizzle == SWIZZLE_NOOP) {
 193       /* no swizzling */
 194       COPY_4V(result, src);
 195    }
 196    else {
 197       assert(GET_SWZ(source->Swizzle, 0) <= 3);
 198       assert(GET_SWZ(source->Swizzle, 1) <= 3);
 199       assert(GET_SWZ(source->Swizzle, 2) <= 3);
 200       assert(GET_SWZ(source->Swizzle, 3) <= 3);
 201       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 202       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 203       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 204       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 205    }
 206
 207    if (source->Negate) {
 208       assert(source->Negate == NEGATE_XYZW);
 209       result[0] = -result[0];
 210       result[1] = -result[1];
 211       result[2] = -result[2];
 212       result[3] = -result[3];
 213    }
 214
 215 #ifdef NAN_CHECK
 216    assert(!IS_INF_OR_NAN(result[0]));
 217    assert(!IS_INF_OR_NAN(result[0]));
 218    assert(!IS_INF_OR_NAN(result[0]));
 219    assert(!IS_INF_OR_NAN(result[0]));
 220 #endif
 221 }
 222
 223
 224 /**
 225  * Fetch the derivative with respect to X or Y for the given register.
 226  * XXX this currently only works for fragment program input attribs.
 227  */
 228 static void
 229 fetch_vector4_deriv(const struct prog_src_register *source,
 230                     const struct gl_program_machine *machine,
 231                     char xOrY, GLfloat result[4])
 232 {
 233    if (source->File == PROGRAM_INPUT &&
 234        source->Index < (GLint) machine->NumDeriv) {
 235       const GLint col = machine->CurElement;
 236       const GLfloat w = machine->Attribs[VARYING_SLOT_POS][col][3];
 237       const GLfloat invQ = 1.0f / w;
 238       GLfloat deriv[4];
 239
 240       if (xOrY == 'X') {
 241          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 242          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 243          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 244          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 245       }
 246       else {
 247          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 248          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 249          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 250          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 251       }
 252
 253       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 254       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 255       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 256       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 257
 258       if (source->Negate) {
 259          assert(source->Negate == NEGATE_XYZW);
 260          result[0] = -result[0];
 261          result[1] = -result[1];
 262          result[2] = -result[2];
 263          result[3] = -result[3];
 264       }
 265    }
 266    else {
 267       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 268    }
 269 }
 270
 271
 272 /**
 273  * As above, but only return result[0] element.
 274  */
 275 static void
 276 fetch_vector1(const struct prog_src_register *source,
 277               const struct gl_program_machine *machine, GLfloat result[4])
 278 {
 279    const GLfloat *src = get_src_register_pointer(source, machine);
 280
 281    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 282
 283    if (source->Negate) {
 284       result[0] = -result[0];
 285    }
 286 }
 287
 288
 289 /**
 290  * Fetch texel from texture.  Use partial derivatives when possible.
 291  */
 292 static inline void
 293 fetch_texel(struct gl_context *ctx,
 294             const struct gl_program_machine *machine,
 295             const struct prog_instruction *inst,
 296             const GLfloat texcoord[4], GLfloat lodBias,
 297             GLfloat color[4])
 298 {
 299    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 300
 301    /* Note: we only have the right derivatives for fragment input attribs.
 302     */
 303    if (machine->NumDeriv > 0 &&
 304        inst->SrcReg[0].File == PROGRAM_INPUT &&
 305        inst->SrcReg[0].Index == VARYING_SLOT_TEX0 + inst->TexSrcUnit) {
 306       /* simple texture fetch for which we should have derivatives */
 307       GLuint attr = inst->SrcReg[0].Index;
 308       machine->FetchTexelDeriv(ctx, texcoord,
 309                                machine->DerivX[attr],
 310                                machine->DerivY[attr],
 311                                lodBias, unit, color);
 312    }
 313    else {
 314       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 315    }
 316 }
 317
 318
 319 /**
 320  * Store 4 floats into a register.  Observe the instructions saturate and
 321  * set-condition-code flags.
 322  */
 323 static void
 324 store_vector4(const struct prog_instruction *inst,
 325               struct gl_program_machine *machine, const GLfloat value[4])
 326 {
 327    const struct prog_dst_register *dstReg = &(inst->DstReg);
 328    const GLboolean clamp = inst->Saturate;
 329    GLuint writeMask = dstReg->WriteMask;
 330    GLfloat clampedValue[4];
 331    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 332
 333 #if 0
 334    if (value[0] > 1.0e10 ||
 335        IS_INF_OR_NAN(value[0]) ||
 336        IS_INF_OR_NAN(value[1]) ||
 337        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 338       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 339 #endif
 340
 341    if (clamp) {
 342       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 343       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 344       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 345       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 346       value = clampedValue;
 347    }
 348
 349 #ifdef NAN_CHECK
 350    assert(!IS_INF_OR_NAN(value[0]));
 351    assert(!IS_INF_OR_NAN(value[0]));
 352    assert(!IS_INF_OR_NAN(value[0]));
 353    assert(!IS_INF_OR_NAN(value[0]));
 354 #endif
 355
 356    if (writeMask & WRITEMASK_X)
 357       dst[0] = value[0];
 358    if (writeMask & WRITEMASK_Y)
 359       dst[1] = value[1];
 360    if (writeMask & WRITEMASK_Z)
 361       dst[2] = value[2];
 362    if (writeMask & WRITEMASK_W)
 363       dst[3] = value[3];
 364 }
 365
 366
 367 /**
 368  * Execute the given vertex/fragment program.
 369  *
 370  * \param ctx  rendering context
 371  * \param program  the program to execute
 372  * \param machine  machine state (must be initialized)
 373  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 374  */
 375 GLboolean
 376 _mesa_execute_program(struct gl_context * ctx,
 377                       const struct gl_program *program,
 378                       struct gl_program_machine *machine)
 379 {
 380    const GLuint numInst = program->arb.NumInstructions;
 381    const GLuint maxExec = 65536;
 382    GLuint pc, numExec = 0;
 383
 384    machine->CurProgram = program;
 385
 386    if (DEBUG_PROG) {
 387       printf("execute program %u --------------------\n", program->Id);
 388    }
 389
 390    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 391       machine->EnvParams = ctx->VertexProgram.Parameters;
 392    }
 393    else {
 394       machine->EnvParams = ctx->FragmentProgram.Parameters;
 395    }
 396
 397    for (pc = 0; pc < numInst; pc++) {
 398       const struct prog_instruction *inst = program->arb.Instructions + pc;
 399
 400       if (DEBUG_PROG) {
 401          _mesa_print_instruction(inst);
 402       }
 403
 404       switch (inst->Opcode) {
 405       case OPCODE_ABS:
 406          {
 407             GLfloat a[4], result[4];
 408             fetch_vector4(&inst->SrcReg[0], machine, a);
 409             result[0] = fabsf(a[0]);
 410             result[1] = fabsf(a[1]);
 411             result[2] = fabsf(a[2]);
 412             result[3] = fabsf(a[3]);
 413             store_vector4(inst, machine, result);
 414          }
 415          break;
 416       case OPCODE_ADD:
 417          {
 418             GLfloat a[4], b[4], result[4];
 419             fetch_vector4(&inst->SrcReg[0], machine, a);
 420             fetch_vector4(&inst->SrcReg[1], machine, b);
 421             result[0] = a[0] + b[0];
 422             result[1] = a[1] + b[1];
 423             result[2] = a[2] + b[2];
 424             result[3] = a[3] + b[3];
 425             store_vector4(inst, machine, result);
 426             if (DEBUG_PROG) {
 427                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 428                       result[0], result[1], result[2], result[3],
 429                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 430             }
 431          }
 432          break;
 433       case OPCODE_ARL:
 434          {
 435             GLfloat t[4];
 436             fetch_vector4(&inst->SrcReg[0], machine, t);
 437             machine->AddressReg[0][0] = IFLOOR(t[0]);
 438             if (DEBUG_PROG) {
 439                printf("ARL %d\n", machine->AddressReg[0][0]);
 440             }
 441          }
 442          break;
 443       case OPCODE_BGNLOOP:
 444          /* no-op */
 445          assert(program->arb.Instructions[inst->BranchTarget].Opcode
 446                 == OPCODE_ENDLOOP);
 447          break;
 448       case OPCODE_ENDLOOP:
 449          /* subtract 1 here since pc is incremented by for(pc) loop */
 450          assert(program->arb.Instructions[inst->BranchTarget].Opcode
 451                 == OPCODE_BGNLOOP);
 452          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 453          break;
 454       case OPCODE_BGNSUB:      /* begin subroutine */
 455          break;
 456       case OPCODE_ENDSUB:      /* end subroutine */
 457          break;
 458       case OPCODE_BRK:         /* break out of loop (conditional) */
 459          assert(program->arb.Instructions[inst->BranchTarget].Opcode
 460                 == OPCODE_ENDLOOP);
 461          /* break out of loop */
 462          /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 463          pc = inst->BranchTarget;
 464          break;
 465       case OPCODE_CONT:        /* continue loop (conditional) */
 466          assert(program->arb.Instructions[inst->BranchTarget].Opcode
 467                 == OPCODE_ENDLOOP);
 468          /* continue at ENDLOOP */
 469          /* Subtract 1 here since we'll do pc++ at end of for-loop */
 470          pc = inst->BranchTarget - 1;
 471          break;
 472       case OPCODE_CAL:         /* Call subroutine (conditional) */
 473          /* call the subroutine */
 474          if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 475             return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 476          }
 477          machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 478          /* Subtract 1 here since we'll do pc++ at end of for-loop */
 479          pc = inst->BranchTarget - 1;
 480          break;
 481       case OPCODE_CMP:
 482          {
 483             GLfloat a[4], b[4], c[4], result[4];
 484             fetch_vector4(&inst->SrcReg[0], machine, a);
 485             fetch_vector4(&inst->SrcReg[1], machine, b);
 486             fetch_vector4(&inst->SrcReg[2], machine, c);
 487             result[0] = a[0] < 0.0F ? b[0] : c[0];
 488             result[1] = a[1] < 0.0F ? b[1] : c[1];
 489             result[2] = a[2] < 0.0F ? b[2] : c[2];
 490             result[3] = a[3] < 0.0F ? b[3] : c[3];
 491             store_vector4(inst, machine, result);
 492             if (DEBUG_PROG) {
 493                printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
 494                       result[0], result[1], result[2], result[3],
 495                       a[0], a[1], a[2], a[3],
 496                       b[0], b[1], b[2], b[3],
 497                       c[0], c[1], c[2], c[3]);
 498             }
 499          }
 500          break;
 501       case OPCODE_COS:
 502          {
 503             GLfloat a[4], result[4];
 504             fetch_vector1(&inst->SrcReg[0], machine, a);
 505             result[0] = result[1] = result[2] = result[3]
 506                = cosf(a[0]);
 507             store_vector4(inst, machine, result);
 508          }
 509          break;
 510       case OPCODE_DDX:         /* Partial derivative with respect to X */
 511          {
 512             GLfloat result[4];
 513             fetch_vector4_deriv(&inst->SrcReg[0], machine, 'X', result);
 514             store_vector4(inst, machine, result);
 515          }
 516          break;
 517       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 518          {
 519             GLfloat result[4];
 520             fetch_vector4_deriv(&inst->SrcReg[0], machine, 'Y', result);
 521             store_vector4(inst, machine, result);
 522          }
 523          break;
 524       case OPCODE_DP2:
 525          {
 526             GLfloat a[4], b[4], result[4];
 527             fetch_vector4(&inst->SrcReg[0], machine, a);
 528             fetch_vector4(&inst->SrcReg[1], machine, b);
 529             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 530             store_vector4(inst, machine, result);
 531             if (DEBUG_PROG) {
 532                printf("DP2 %g = (%g %g) . (%g %g)\n",
 533                       result[0], a[0], a[1], b[0], b[1]);
 534             }
 535          }
 536          break;
 537       case OPCODE_DP3:
 538          {
 539             GLfloat a[4], b[4], result[4];
 540             fetch_vector4(&inst->SrcReg[0], machine, a);
 541             fetch_vector4(&inst->SrcReg[1], machine, b);
 542             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 543             store_vector4(inst, machine, result);
 544             if (DEBUG_PROG) {
 545                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 546                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 547             }
 548          }
 549          break;
 550       case OPCODE_DP4:
 551          {
 552             GLfloat a[4], b[4], result[4];
 553             fetch_vector4(&inst->SrcReg[0], machine, a);
 554             fetch_vector4(&inst->SrcReg[1], machine, b);
 555             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 556             store_vector4(inst, machine, result);
 557             if (DEBUG_PROG) {
 558                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 559                       result[0], a[0], a[1], a[2], a[3],
 560                       b[0], b[1], b[2], b[3]);
 561             }
 562          }
 563          break;
 564       case OPCODE_DPH:
 565          {
 566             GLfloat a[4], b[4], result[4];
 567             fetch_vector4(&inst->SrcReg[0], machine, a);
 568             fetch_vector4(&inst->SrcReg[1], machine, b);
 569             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 570             store_vector4(inst, machine, result);
 571          }
 572          break;
 573       case OPCODE_DST:         /* Distance vector */
 574          {
 575             GLfloat a[4], b[4], result[4];
 576             fetch_vector4(&inst->SrcReg[0], machine, a);
 577             fetch_vector4(&inst->SrcReg[1], machine, b);
 578             result[0] = 1.0F;
 579             result[1] = a[1] * b[1];
 580             result[2] = a[2];
 581             result[3] = b[3];
 582             store_vector4(inst, machine, result);
 583          }
 584          break;
 585       case OPCODE_EXP:
 586          {
 587             GLfloat t[4], q[4], floor_t0;
 588             fetch_vector1(&inst->SrcReg[0], machine, t);
 589             floor_t0 = floorf(t[0]);
 590             if (floor_t0 > FLT_MAX_EXP) {
 591                SET_POS_INFINITY(q[0]);
 592                SET_POS_INFINITY(q[2]);
 593             }
 594             else if (floor_t0 < FLT_MIN_EXP) {
 595                q[0] = 0.0F;
 596                q[2] = 0.0F;
 597             }
 598             else {
 599                q[0] = ldexpf(1.0, (int) floor_t0);
 600                /* Note: GL_NV_vertex_program expects
 601                 * result.z = result.x * APPX(result.y)
 602                 * We do what the ARB extension says.
 603                 */
 604                q[2] = exp2f(t[0]);
 605             }
 606             q[1] = t[0] - floor_t0;
 607             q[3] = 1.0F;
 608             store_vector4( inst, machine, q );
 609          }
 610          break;
 611       case OPCODE_EX2:         /* Exponential base 2 */
 612          {
 613             GLfloat a[4], result[4], val;
 614             fetch_vector1(&inst->SrcReg[0], machine, a);
 615             val = exp2f(a[0]);
 616             /*
 617             if (IS_INF_OR_NAN(val))
 618                val = 1.0e10;
 619             */
 620             result[0] = result[1] = result[2] = result[3] = val;
 621             store_vector4(inst, machine, result);
 622          }
 623          break;
 624       case OPCODE_FLR:
 625          {
 626             GLfloat a[4], result[4];
 627             fetch_vector4(&inst->SrcReg[0], machine, a);
 628             result[0] = floorf(a[0]);
 629             result[1] = floorf(a[1]);
 630             result[2] = floorf(a[2]);
 631             result[3] = floorf(a[3]);
 632             store_vector4(inst, machine, result);
 633          }
 634          break;
 635       case OPCODE_FRC:
 636          {
 637             GLfloat a[4], result[4];
 638             fetch_vector4(&inst->SrcReg[0], machine, a);
 639             result[0] = a[0] - floorf(a[0]);
 640             result[1] = a[1] - floorf(a[1]);
 641             result[2] = a[2] - floorf(a[2]);
 642             result[3] = a[3] - floorf(a[3]);
 643             store_vector4(inst, machine, result);
 644          }
 645          break;
 646       case OPCODE_IF:
 647          {
 648             GLboolean cond;
 649             assert(program->arb.Instructions[inst->BranchTarget].Opcode
 650                    == OPCODE_ELSE ||
 651                    program->arb.Instructions[inst->BranchTarget].Opcode
 652                    == OPCODE_ENDIF);
 653             /* eval condition */
 654             GLfloat a[4];
 655             fetch_vector1(&inst->SrcReg[0], machine, a);
 656             cond = (a[0] != 0.0F);
 657             if (DEBUG_PROG) {
 658                printf("IF: %d\n", cond);
 659             }
 660             /* do if/else */
 661             if (cond) {
 662                /* do if-clause (just continue execution) */
 663             }
 664             else {
 665                /* go to the instruction after ELSE or ENDIF */
 666                assert(inst->BranchTarget >= 0);
 667                pc = inst->BranchTarget;
 668             }
 669          }
 670          break;
 671       case OPCODE_ELSE:
 672          /* goto ENDIF */
 673          assert(program->arb.Instructions[inst->BranchTarget].Opcode
 674                 == OPCODE_ENDIF);
 675          assert(inst->BranchTarget >= 0);
 676          pc = inst->BranchTarget;
 677          break;
 678       case OPCODE_ENDIF:
 679          /* nothing */
 680          break;
 681       case OPCODE_KIL:         /* ARB_f_p only */
 682          {
 683             GLfloat a[4];
 684             fetch_vector4(&inst->SrcReg[0], machine, a);
 685             if (DEBUG_PROG) {
 686                printf("KIL if (%g %g %g %g) <= 0.0\n",
 687                       a[0], a[1], a[2], a[3]);
 688             }
 689
 690             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 691                return GL_FALSE;
 692             }
 693          }
 694          break;
 695       case OPCODE_LG2:         /* log base 2 */
 696          {
 697             GLfloat a[4], result[4], val;
 698             fetch_vector1(&inst->SrcReg[0], machine, a);
 699             /* The fast LOG2 macro doesn't meet the precision requirements.
 700              */
 701             if (a[0] == 0.0F) {
 702                val = -FLT_MAX;
 703             }
 704             else {
 705                val = logf(a[0]) * 1.442695F;
 706             }
 707             result[0] = result[1] = result[2] = result[3] = val;
 708             store_vector4(inst, machine, result);
 709          }
 710          break;
 711       case OPCODE_LIT:
 712          {
 713             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 714             GLfloat a[4], result[4];
 715             fetch_vector4(&inst->SrcReg[0], machine, a);
 716             a[0] = MAX2(a[0], 0.0F);
 717             a[1] = MAX2(a[1], 0.0F);
 718             /* XXX ARB version clamps a[3], NV version doesn't */
 719             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 720             result[0] = 1.0F;
 721             result[1] = a[0];
 722             /* XXX we could probably just use pow() here */
 723             if (a[0] > 0.0F) {
 724                if (a[1] == 0.0F && a[3] == 0.0F)
 725                   result[2] = 1.0F;
 726                else
 727                   result[2] = powf(a[1], a[3]);
 728             }
 729             else {
 730                result[2] = 0.0F;
 731             }
 732             result[3] = 1.0F;
 733             store_vector4(inst, machine, result);
 734             if (DEBUG_PROG) {
 735                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 736                       result[0], result[1], result[2], result[3],
 737                       a[0], a[1], a[2], a[3]);
 738             }
 739          }
 740          break;
 741       case OPCODE_LOG:
 742          {
 743             GLfloat t[4], q[4], abs_t0;
 744             fetch_vector1(&inst->SrcReg[0], machine, t);
 745             abs_t0 = fabsf(t[0]);
 746             if (abs_t0 != 0.0F) {
 747                if (IS_INF_OR_NAN(abs_t0))
 748                {
 749                   SET_POS_INFINITY(q[0]);
 750                   q[1] = 1.0F;
 751                   SET_POS_INFINITY(q[2]);
 752                }
 753                else {
 754                   int exponent;
 755                   GLfloat mantissa = frexpf(t[0], &exponent);
 756                   q[0] = (GLfloat) (exponent - 1);
 757                   q[1] = 2.0F * mantissa; /* map [.5, 1) -> [1, 2) */
 758
 759                   /* The fast LOG2 macro doesn't meet the precision
 760                    * requirements.
 761                    */
 762                   q[2] = logf(t[0]) * 1.442695F;
 763                }
 764             }
 765             else {
 766                SET_NEG_INFINITY(q[0]);
 767                q[1] = 1.0F;
 768                SET_NEG_INFINITY(q[2]);
 769             }
 770             q[3] = 1.0;
 771             store_vector4(inst, machine, q);
 772          }
 773          break;
 774       case OPCODE_LRP:
 775          {
 776             GLfloat a[4], b[4], c[4], result[4];
 777             fetch_vector4(&inst->SrcReg[0], machine, a);
 778             fetch_vector4(&inst->SrcReg[1], machine, b);
 779             fetch_vector4(&inst->SrcReg[2], machine, c);
 780             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 781             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 782             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 783             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 784             store_vector4(inst, machine, result);
 785             if (DEBUG_PROG) {
 786                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
 787                       "(%g %g %g %g), (%g %g %g %g)\n",
 788                       result[0], result[1], result[2], result[3],
 789                       a[0], a[1], a[2], a[3],
 790                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
 791             }
 792          }
 793          break;
 794       case OPCODE_MAD:
 795          {
 796             GLfloat a[4], b[4], c[4], result[4];
 797             fetch_vector4(&inst->SrcReg[0], machine, a);
 798             fetch_vector4(&inst->SrcReg[1], machine, b);
 799             fetch_vector4(&inst->SrcReg[2], machine, c);
 800             result[0] = a[0] * b[0] + c[0];
 801             result[1] = a[1] * b[1] + c[1];
 802             result[2] = a[2] * b[2] + c[2];
 803             result[3] = a[3] * b[3] + c[3];
 804             store_vector4(inst, machine, result);
 805             if (DEBUG_PROG) {
 806                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
 807                       "(%g %g %g %g) + (%g %g %g %g)\n",
 808                       result[0], result[1], result[2], result[3],
 809                       a[0], a[1], a[2], a[3],
 810                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
 811             }
 812          }
 813          break;
 814       case OPCODE_MAX:
 815          {
 816             GLfloat a[4], b[4], result[4];
 817             fetch_vector4(&inst->SrcReg[0], machine, a);
 818             fetch_vector4(&inst->SrcReg[1], machine, b);
 819             result[0] = MAX2(a[0], b[0]);
 820             result[1] = MAX2(a[1], b[1]);
 821             result[2] = MAX2(a[2], b[2]);
 822             result[3] = MAX2(a[3], b[3]);
 823             store_vector4(inst, machine, result);
 824             if (DEBUG_PROG) {
 825                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
 826                       result[0], result[1], result[2], result[3],
 827                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 828             }
 829          }
 830          break;
 831       case OPCODE_MIN:
 832          {
 833             GLfloat a[4], b[4], result[4];
 834             fetch_vector4(&inst->SrcReg[0], machine, a);
 835             fetch_vector4(&inst->SrcReg[1], machine, b);
 836             result[0] = MIN2(a[0], b[0]);
 837             result[1] = MIN2(a[1], b[1]);
 838             result[2] = MIN2(a[2], b[2]);
 839             result[3] = MIN2(a[3], b[3]);
 840             store_vector4(inst, machine, result);
 841          }
 842          break;
 843       case OPCODE_MOV:
 844          {
 845             GLfloat result[4];
 846             fetch_vector4(&inst->SrcReg[0], machine, result);
 847             store_vector4(inst, machine, result);
 848             if (DEBUG_PROG) {
 849                printf("MOV (%g %g %g %g)\n",
 850                       result[0], result[1], result[2], result[3]);
 851             }
 852          }
 853          break;
 854       case OPCODE_MUL:
 855          {
 856             GLfloat a[4], b[4], result[4];
 857             fetch_vector4(&inst->SrcReg[0], machine, a);
 858             fetch_vector4(&inst->SrcReg[1], machine, b);
 859             result[0] = a[0] * b[0];
 860             result[1] = a[1] * b[1];
 861             result[2] = a[2] * b[2];
 862             result[3] = a[3] * b[3];
 863             store_vector4(inst, machine, result);
 864             if (DEBUG_PROG) {
 865                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 866                       result[0], result[1], result[2], result[3],
 867                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 868             }
 869          }
 870          break;
 871       case OPCODE_NOISE1:
 872          {
 873             GLfloat a[4], result[4];
 874             fetch_vector1(&inst->SrcReg[0], machine, a);
 875             result[0] =
 876                result[1] =
 877                result[2] =
 878                result[3] = _mesa_noise1(a[0]);
 879             store_vector4(inst, machine, result);
 880          }
 881          break;
 882       case OPCODE_NOISE2:
 883          {
 884             GLfloat a[4], result[4];
 885             fetch_vector4(&inst->SrcReg[0], machine, a);
 886             result[0] =
 887                result[1] =
 888                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
 889             store_vector4(inst, machine, result);
 890          }
 891          break;
 892       case OPCODE_NOISE3:
 893          {
 894             GLfloat a[4], result[4];
 895             fetch_vector4(&inst->SrcReg[0], machine, a);
 896             result[0] =
 897                result[1] =
 898                result[2] =
 899                result[3] = _mesa_noise3(a[0], a[1], a[2]);
 900             store_vector4(inst, machine, result);
 901          }
 902          break;
 903       case OPCODE_NOISE4:
 904          {
 905             GLfloat a[4], result[4];
 906             fetch_vector4(&inst->SrcReg[0], machine, a);
 907             result[0] =
 908                result[1] =
 909                result[2] =
 910                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
 911             store_vector4(inst, machine, result);
 912          }
 913          break;
 914       case OPCODE_NOP:
 915          break;
 916       case OPCODE_POW:
 917          {
 918             GLfloat a[4], b[4], result[4];
 919             fetch_vector1(&inst->SrcReg[0], machine, a);
 920             fetch_vector1(&inst->SrcReg[1], machine, b);
 921             result[0] = result[1] = result[2] = result[3]
 922                = powf(a[0], b[0]);
 923             store_vector4(inst, machine, result);
 924          }
 925          break;
 926
 927       case OPCODE_RCP:
 928          {
 929             GLfloat a[4], result[4];
 930             fetch_vector1(&inst->SrcReg[0], machine, a);
 931             if (DEBUG_PROG) {
 932                if (a[0] == 0)
 933                   printf("RCP(0)\n");
 934                else if (IS_INF_OR_NAN(a[0]))
 935                   printf("RCP(inf)\n");
 936             }
 937             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
 938             store_vector4(inst, machine, result);
 939          }
 940          break;
 941       case OPCODE_RET:         /* return from subroutine (conditional) */
 942          if (machine->StackDepth == 0) {
 943             return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 944          }
 945          /* subtract one because of pc++ in the for loop */
 946          pc = machine->CallStack[--machine->StackDepth] - 1;
 947          break;
 948       case OPCODE_RSQ:         /* 1 / sqrt() */
 949          {
 950             GLfloat a[4], result[4];
 951             fetch_vector1(&inst->SrcReg[0], machine, a);
 952             a[0] = fabsf(a[0]);
 953             result[0] = result[1] = result[2] = result[3] = 1.0f / sqrtf(a[0]);
 954             store_vector4(inst, machine, result);
 955             if (DEBUG_PROG) {
 956                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
 957             }
 958          }
 959          break;
 960       case OPCODE_SCS:         /* sine and cos */
 961          {
 962             GLfloat a[4], result[4];
 963             fetch_vector1(&inst->SrcReg[0], machine, a);
 964             result[0] = cosf(a[0]);
 965             result[1] = sinf(a[0]);
 966             result[2] = 0.0F;    /* undefined! */
 967             result[3] = 0.0F;    /* undefined! */
 968             store_vector4(inst, machine, result);
 969          }
 970          break;
 971       case OPCODE_SGE:         /* set on greater or equal */
 972          {
 973             GLfloat a[4], b[4], result[4];
 974             fetch_vector4(&inst->SrcReg[0], machine, a);
 975             fetch_vector4(&inst->SrcReg[1], machine, b);
 976             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
 977             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
 978             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
 979             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
 980             store_vector4(inst, machine, result);
 981             if (DEBUG_PROG) {
 982                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
 983                       result[0], result[1], result[2], result[3],
 984                       a[0], a[1], a[2], a[3],
 985                       b[0], b[1], b[2], b[3]);
 986             }
 987          }
 988          break;
 989       case OPCODE_SIN:
 990          {
 991             GLfloat a[4], result[4];
 992             fetch_vector1(&inst->SrcReg[0], machine, a);
 993             result[0] = result[1] = result[2] = result[3]
 994                = sinf(a[0]);
 995             store_vector4(inst, machine, result);
 996          }
 997          break;
 998       case OPCODE_SLT:         /* set on less */
 999          {
1000             GLfloat a[4], b[4], result[4];
1001             fetch_vector4(&inst->SrcReg[0], machine, a);
1002             fetch_vector4(&inst->SrcReg[1], machine, b);
1003             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1004             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1005             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1006             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1007             store_vector4(inst, machine, result);
1008             if (DEBUG_PROG) {
1009                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1010                       result[0], result[1], result[2], result[3],
1011                       a[0], a[1], a[2], a[3],
1012                       b[0], b[1], b[2], b[3]);
1013             }
1014          }
1015          break;
1016       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1017          {
1018             GLfloat a[4], result[4];
1019             fetch_vector4(&inst->SrcReg[0], machine, a);
1020             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1021             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1022             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1023             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1024             store_vector4(inst, machine, result);
1025          }
1026          break;
1027       case OPCODE_SUB:
1028          {
1029             GLfloat a[4], b[4], result[4];
1030             fetch_vector4(&inst->SrcReg[0], machine, a);
1031             fetch_vector4(&inst->SrcReg[1], machine, b);
1032             result[0] = a[0] - b[0];
1033             result[1] = a[1] - b[1];
1034             result[2] = a[2] - b[2];
1035             result[3] = a[3] - b[3];
1036             store_vector4(inst, machine, result);
1037             if (DEBUG_PROG) {
1038                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1039                       result[0], result[1], result[2], result[3],
1040                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1041             }
1042          }
1043          break;
1044       case OPCODE_SWZ:         /* extended swizzle */
1045          {
1046             const struct prog_src_register *source = &inst->SrcReg[0];
1047             const GLfloat *src = get_src_register_pointer(source, machine);
1048             GLfloat result[4];
1049             GLuint i;
1050             for (i = 0; i < 4; i++) {
1051                const GLuint swz = GET_SWZ(source->Swizzle, i);
1052                if (swz == SWIZZLE_ZERO)
1053                   result[i] = 0.0;
1054                else if (swz == SWIZZLE_ONE)
1055                   result[i] = 1.0;
1056                else {
1057                   assert(swz <= 3);
1058                   result[i] = src[swz];
1059                }
1060                if (source->Negate & (1 << i))
1061                   result[i] = -result[i];
1062             }
1063             store_vector4(inst, machine, result);
1064          }
1065          break;
1066       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1067          /* Simple texel lookup */
1068          {
1069             GLfloat texcoord[4], color[4];
1070             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1071
1072             /* For TEX, texcoord.Q should not be used and its value should not
1073              * matter (at most, we pass coord.xyz to texture3D() in GLSL).
1074              * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
1075              * which is effectively what happens when the texcoord swizzle
1076              * is .xyzz
1077              */
1078             texcoord[3] = 1.0f;
1079
1080             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1081
1082             if (DEBUG_PROG) {
1083                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1084                       color[0], color[1], color[2], color[3],
1085                       inst->TexSrcUnit,
1086                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1087             }
1088             store_vector4(inst, machine, color);
1089          }
1090          break;
1091       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1092          /* Texel lookup with LOD bias */
1093          {
1094             GLfloat texcoord[4], color[4], lodBias;
1095
1096             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1097
1098             /* texcoord[3] is the bias to add to lambda */
1099             lodBias = texcoord[3];
1100
1101             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1102
1103             if (DEBUG_PROG) {
1104                printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1105                       "  bias %g\n",
1106                       color[0], color[1], color[2], color[3],
1107                       inst->TexSrcUnit,
1108                       texcoord[0],
1109                       texcoord[1],
1110                       texcoord[2],
1111                       texcoord[3],
1112                       lodBias);
1113             }
1114
1115             store_vector4(inst, machine, color);
1116          }
1117          break;
1118       case OPCODE_TXD:
1119          /* Texture lookup w/ partial derivatives for LOD */
1120          {
1121             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1122             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1123             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1124             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1125             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1126                                      0.0, /* lodBias */
1127                                      inst->TexSrcUnit, color);
1128             store_vector4(inst, machine, color);
1129          }
1130          break;
1131       case OPCODE_TXL:
1132          /* Texel lookup with explicit LOD */
1133          {
1134             GLfloat texcoord[4], color[4], lod;
1135
1136             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1137
1138             /* texcoord[3] is the LOD */
1139             lod = texcoord[3];
1140
1141             machine->FetchTexelLod(ctx, texcoord, lod,
1142                                    machine->Samplers[inst->TexSrcUnit], color);
1143
1144             store_vector4(inst, machine, color);
1145          }
1146          break;
1147       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1148          /* Texture lookup w/ projective divide */
1149          {
1150             GLfloat texcoord[4], color[4];
1151
1152             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1153             /* Not so sure about this test - if texcoord[3] is
1154              * zero, we'd probably be fine except for an assert in
1155              * IROUND_POS() which gets triggered by the inf values created.
1156              */
1157             if (texcoord[3] != 0.0F) {
1158                texcoord[0] /= texcoord[3];
1159                texcoord[1] /= texcoord[3];
1160                texcoord[2] /= texcoord[3];
1161             }
1162
1163             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1164
1165             store_vector4(inst, machine, color);
1166          }
1167          break;
1168       case OPCODE_TRUNC:       /* truncate toward zero */
1169          {
1170             GLfloat a[4], result[4];
1171             fetch_vector4(&inst->SrcReg[0], machine, a);
1172             result[0] = (GLfloat) (GLint) a[0];
1173             result[1] = (GLfloat) (GLint) a[1];
1174             result[2] = (GLfloat) (GLint) a[2];
1175             result[3] = (GLfloat) (GLint) a[3];
1176             store_vector4(inst, machine, result);
1177          }
1178          break;
1179       case OPCODE_XPD:         /* cross product */
1180          {
1181             GLfloat a[4], b[4], result[4];
1182             fetch_vector4(&inst->SrcReg[0], machine, a);
1183             fetch_vector4(&inst->SrcReg[1], machine, b);
1184             result[0] = a[1] * b[2] - a[2] * b[1];
1185             result[1] = a[2] * b[0] - a[0] * b[2];
1186             result[2] = a[0] * b[1] - a[1] * b[0];
1187             result[3] = 1.0;
1188             store_vector4(inst, machine, result);
1189             if (DEBUG_PROG) {
1190                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1191                       result[0], result[1], result[2], result[3],
1192                       a[0], a[1], a[2], b[0], b[1], b[2]);
1193             }
1194          }
1195          break;
1196       case OPCODE_END:
1197          return GL_TRUE;
1198       default:
1199          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1200                        inst->Opcode);
1201          return GL_TRUE;        /* return value doesn't matter */
1202       }
1203
1204       numExec++;
1205       if (numExec > maxExec) {
1206          static GLboolean reported = GL_FALSE;
1207          if (!reported) {
1208             _mesa_problem(ctx, "Infinite loop detected in fragment program");
1209             reported = GL_TRUE;
1210          }
1211          return GL_TRUE;
1212       }
1213
1214    } /* for pc */
1215
1216    return GL_TRUE;
1217 }