src/mesa/program/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "c99_math.h"
  39 #include "main/glheader.h"
  40 #include "main/colormac.h"
  41 #include "main/macros.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "prog_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #define SET_POS_INFINITY(x)                  \
  57    do {                                      \
  58          fi_type fi;                         \
  59          fi.i = 0x7F800000;                  \
  60          x = fi.f;                           \
  61    } while (0)
  62 #define SET_NEG_INFINITY(x)                  \
  63    do {                                      \
  64          fi_type fi;                         \
  65          fi.i = 0xFF800000;                  \
  66          x = fi.f;                           \
  67    } while (0)
  68
  69 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  70
  71
  72 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  73
  74
  75 /**
  76  * Return a pointer to the 4-element float vector specified by the given
  77  * source register.
  78  */
  79 static inline const GLfloat *
  80 get_src_register_pointer(const struct prog_src_register *source,
  81                          const struct gl_program_machine *machine)
  82 {
  83    const struct gl_program *prog = machine->CurProgram;
  84    GLint reg = source->Index;
  85
  86    if (source->RelAddr) {
  87       /* add address register value to src index/offset */
  88       reg += machine->AddressReg[0][0];
  89       if (reg < 0) {
  90          return ZeroVec;
  91       }
  92    }
  93
  94    switch (source->File) {
  95    case PROGRAM_TEMPORARY:
  96       if (reg >= MAX_PROGRAM_TEMPS)
  97          return ZeroVec;
  98       return machine->Temporaries[reg];
  99
 100    case PROGRAM_INPUT:
 101       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 102          if (reg >= VERT_ATTRIB_MAX)
 103             return ZeroVec;
 104          return machine->VertAttribs[reg];
 105       }
 106       else {
 107          if (reg >= VARYING_SLOT_MAX)
 108             return ZeroVec;
 109          return machine->Attribs[reg][machine->CurElement];
 110       }
 111
 112    case PROGRAM_OUTPUT:
 113       if (reg >= MAX_PROGRAM_OUTPUTS)
 114          return ZeroVec;
 115       return machine->Outputs[reg];
 116
 117    case PROGRAM_STATE_VAR:
 118       /* Fallthrough */
 119    case PROGRAM_CONSTANT:
 120       /* Fallthrough */
 121    case PROGRAM_UNIFORM:
 122       if (reg >= (GLint) prog->Parameters->NumParameters)
 123          return ZeroVec;
 124       return (GLfloat *) prog->Parameters->ParameterValues[reg];
 125
 126    case PROGRAM_SYSTEM_VALUE:
 127       assert(reg < (GLint) Elements(machine->SystemValues));
 128       return machine->SystemValues[reg];
 129
 130    default:
 131       _mesa_problem(NULL,
 132          "Invalid src register file %d in get_src_register_pointer()",
 133          source->File);
 134       return ZeroVec;
 135    }
 136 }
 137
 138
 139 /**
 140  * Return a pointer to the 4-element float vector specified by the given
 141  * destination register.
 142  */
 143 static inline GLfloat *
 144 get_dst_register_pointer(const struct prog_dst_register *dest,
 145                          struct gl_program_machine *machine)
 146 {
 147    static GLfloat dummyReg[4];
 148    GLint reg = dest->Index;
 149
 150    if (dest->RelAddr) {
 151       /* add address register value to src index/offset */
 152       reg += machine->AddressReg[0][0];
 153       if (reg < 0) {
 154          return dummyReg;
 155       }
 156    }
 157
 158    switch (dest->File) {
 159    case PROGRAM_TEMPORARY:
 160       if (reg >= MAX_PROGRAM_TEMPS)
 161          return dummyReg;
 162       return machine->Temporaries[reg];
 163
 164    case PROGRAM_OUTPUT:
 165       if (reg >= MAX_PROGRAM_OUTPUTS)
 166          return dummyReg;
 167       return machine->Outputs[reg];
 168
 169    default:
 170       _mesa_problem(NULL,
 171          "Invalid dest register file %d in get_dst_register_pointer()",
 172          dest->File);
 173       return dummyReg;
 174    }
 175 }
 176
 177
 178
 179 /**
 180  * Fetch a 4-element float vector from the given source register.
 181  * Apply swizzling and negating as needed.
 182  */
 183 static void
 184 fetch_vector4(const struct prog_src_register *source,
 185               const struct gl_program_machine *machine, GLfloat result[4])
 186 {
 187    const GLfloat *src = get_src_register_pointer(source, machine);
 188
 189    if (source->Swizzle == SWIZZLE_NOOP) {
 190       /* no swizzling */
 191       COPY_4V(result, src);
 192    }
 193    else {
 194       assert(GET_SWZ(source->Swizzle, 0) <= 3);
 195       assert(GET_SWZ(source->Swizzle, 1) <= 3);
 196       assert(GET_SWZ(source->Swizzle, 2) <= 3);
 197       assert(GET_SWZ(source->Swizzle, 3) <= 3);
 198       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 199       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 200       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 201       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 202    }
 203
 204    if (source->Abs) {
 205       result[0] = fabsf(result[0]);
 206       result[1] = fabsf(result[1]);
 207       result[2] = fabsf(result[2]);
 208       result[3] = fabsf(result[3]);
 209    }
 210    if (source->Negate) {
 211       assert(source->Negate == NEGATE_XYZW);
 212       result[0] = -result[0];
 213       result[1] = -result[1];
 214       result[2] = -result[2];
 215       result[3] = -result[3];
 216    }
 217
 218 #ifdef NAN_CHECK
 219    assert(!IS_INF_OR_NAN(result[0]));
 220    assert(!IS_INF_OR_NAN(result[0]));
 221    assert(!IS_INF_OR_NAN(result[0]));
 222    assert(!IS_INF_OR_NAN(result[0]));
 223 #endif
 224 }
 225
 226
 227 /**
 228  * Fetch the derivative with respect to X or Y for the given register.
 229  * XXX this currently only works for fragment program input attribs.
 230  */
 231 static void
 232 fetch_vector4_deriv(struct gl_context * ctx,
 233                     const struct prog_src_register *source,
 234                     const struct gl_program_machine *machine,
 235                     char xOrY, GLfloat result[4])
 236 {
 237    if (source->File == PROGRAM_INPUT &&
 238        source->Index < (GLint) machine->NumDeriv) {
 239       const GLint col = machine->CurElement;
 240       const GLfloat w = machine->Attribs[VARYING_SLOT_POS][col][3];
 241       const GLfloat invQ = 1.0f / w;
 242       GLfloat deriv[4];
 243
 244       if (xOrY == 'X') {
 245          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 246          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 247          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 248          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 249       }
 250       else {
 251          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 252          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 253          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 254          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 255       }
 256
 257       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 258       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 259       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 260       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 261
 262       if (source->Abs) {
 263          result[0] = fabsf(result[0]);
 264          result[1] = fabsf(result[1]);
 265          result[2] = fabsf(result[2]);
 266          result[3] = fabsf(result[3]);
 267       }
 268       if (source->Negate) {
 269          assert(source->Negate == NEGATE_XYZW);
 270          result[0] = -result[0];
 271          result[1] = -result[1];
 272          result[2] = -result[2];
 273          result[3] = -result[3];
 274       }
 275    }
 276    else {
 277       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 278    }
 279 }
 280
 281
 282 /**
 283  * As above, but only return result[0] element.
 284  */
 285 static void
 286 fetch_vector1(const struct prog_src_register *source,
 287               const struct gl_program_machine *machine, GLfloat result[4])
 288 {
 289    const GLfloat *src = get_src_register_pointer(source, machine);
 290
 291    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 292
 293    if (source->Abs) {
 294       result[0] = fabsf(result[0]);
 295    }
 296    if (source->Negate) {
 297       result[0] = -result[0];
 298    }
 299 }
 300
 301
 302 /**
 303  * Fetch texel from texture.  Use partial derivatives when possible.
 304  */
 305 static inline void
 306 fetch_texel(struct gl_context *ctx,
 307             const struct gl_program_machine *machine,
 308             const struct prog_instruction *inst,
 309             const GLfloat texcoord[4], GLfloat lodBias,
 310             GLfloat color[4])
 311 {
 312    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 313
 314    /* Note: we only have the right derivatives for fragment input attribs.
 315     */
 316    if (machine->NumDeriv > 0 &&
 317        inst->SrcReg[0].File == PROGRAM_INPUT &&
 318        inst->SrcReg[0].Index == VARYING_SLOT_TEX0 + inst->TexSrcUnit) {
 319       /* simple texture fetch for which we should have derivatives */
 320       GLuint attr = inst->SrcReg[0].Index;
 321       machine->FetchTexelDeriv(ctx, texcoord,
 322                                machine->DerivX[attr],
 323                                machine->DerivY[attr],
 324                                lodBias, unit, color);
 325    }
 326    else {
 327       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 328    }
 329 }
 330
 331
 332 /**
 333  * Test value against zero and return GT, LT, EQ or UN if NaN.
 334  */
 335 static inline GLuint
 336 generate_cc(float value)
 337 {
 338    if (value != value)
 339       return COND_UN;           /* NaN */
 340    if (value > 0.0F)
 341       return COND_GT;
 342    if (value < 0.0F)
 343       return COND_LT;
 344    return COND_EQ;
 345 }
 346
 347
 348 /**
 349  * Test if the ccMaskRule is satisfied by the given condition code.
 350  * Used to mask destination writes according to the current condition code.
 351  */
 352 static inline GLboolean
 353 test_cc(GLuint condCode, GLuint ccMaskRule)
 354 {
 355    switch (ccMaskRule) {
 356    case COND_EQ: return (condCode == COND_EQ);
 357    case COND_NE: return (condCode != COND_EQ);
 358    case COND_LT: return (condCode == COND_LT);
 359    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 360    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 361    case COND_GT: return (condCode == COND_GT);
 362    case COND_TR: return GL_TRUE;
 363    case COND_FL: return GL_FALSE;
 364    default:      return GL_TRUE;
 365    }
 366 }
 367
 368
 369 /**
 370  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 371  * or GL_FALSE to indicate result.
 372  */
 373 static inline GLboolean
 374 eval_condition(const struct gl_program_machine *machine,
 375                const struct prog_instruction *inst)
 376 {
 377    const GLuint swizzle = inst->DstReg.CondSwizzle;
 378    const GLuint condMask = inst->DstReg.CondMask;
 379    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 380        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 381        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 382        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 383       return GL_TRUE;
 384    }
 385    else {
 386       return GL_FALSE;
 387    }
 388 }
 389
 390
 391
 392 /**
 393  * Store 4 floats into a register.  Observe the instructions saturate and
 394  * set-condition-code flags.
 395  */
 396 static void
 397 store_vector4(const struct prog_instruction *inst,
 398               struct gl_program_machine *machine, const GLfloat value[4])
 399 {
 400    const struct prog_dst_register *dstReg = &(inst->DstReg);
 401    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 402    GLuint writeMask = dstReg->WriteMask;
 403    GLfloat clampedValue[4];
 404    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 405
 406 #if 0
 407    if (value[0] > 1.0e10 ||
 408        IS_INF_OR_NAN(value[0]) ||
 409        IS_INF_OR_NAN(value[1]) ||
 410        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 411       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 412 #endif
 413
 414    if (clamp) {
 415       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 416       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 417       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 418       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 419       value = clampedValue;
 420    }
 421
 422    if (dstReg->CondMask != COND_TR) {
 423       /* condition codes may turn off some writes */
 424       if (writeMask & WRITEMASK_X) {
 425          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 426                       dstReg->CondMask))
 427             writeMask &= ~WRITEMASK_X;
 428       }
 429       if (writeMask & WRITEMASK_Y) {
 430          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 431                       dstReg->CondMask))
 432             writeMask &= ~WRITEMASK_Y;
 433       }
 434       if (writeMask & WRITEMASK_Z) {
 435          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 436                       dstReg->CondMask))
 437             writeMask &= ~WRITEMASK_Z;
 438       }
 439       if (writeMask & WRITEMASK_W) {
 440          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 441                       dstReg->CondMask))
 442             writeMask &= ~WRITEMASK_W;
 443       }
 444    }
 445
 446 #ifdef NAN_CHECK
 447    assert(!IS_INF_OR_NAN(value[0]));
 448    assert(!IS_INF_OR_NAN(value[0]));
 449    assert(!IS_INF_OR_NAN(value[0]));
 450    assert(!IS_INF_OR_NAN(value[0]));
 451 #endif
 452
 453    if (writeMask & WRITEMASK_X)
 454       dst[0] = value[0];
 455    if (writeMask & WRITEMASK_Y)
 456       dst[1] = value[1];
 457    if (writeMask & WRITEMASK_Z)
 458       dst[2] = value[2];
 459    if (writeMask & WRITEMASK_W)
 460       dst[3] = value[3];
 461
 462    if (inst->CondUpdate) {
 463       if (writeMask & WRITEMASK_X)
 464          machine->CondCodes[0] = generate_cc(value[0]);
 465       if (writeMask & WRITEMASK_Y)
 466          machine->CondCodes[1] = generate_cc(value[1]);
 467       if (writeMask & WRITEMASK_Z)
 468          machine->CondCodes[2] = generate_cc(value[2]);
 469       if (writeMask & WRITEMASK_W)
 470          machine->CondCodes[3] = generate_cc(value[3]);
 471 #if DEBUG_PROG
 472       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 473              _mesa_condcode_string(machine->CondCodes[0]),
 474              _mesa_condcode_string(machine->CondCodes[1]),
 475              _mesa_condcode_string(machine->CondCodes[2]),
 476              _mesa_condcode_string(machine->CondCodes[3]));
 477 #endif
 478    }
 479 }
 480
 481
 482 /**
 483  * Execute the given vertex/fragment program.
 484  *
 485  * \param ctx  rendering context
 486  * \param program  the program to execute
 487  * \param machine  machine state (must be initialized)
 488  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 489  */
 490 GLboolean
 491 _mesa_execute_program(struct gl_context * ctx,
 492                       const struct gl_program *program,
 493                       struct gl_program_machine *machine)
 494 {
 495    const GLuint numInst = program->NumInstructions;
 496    const GLuint maxExec = 65536;
 497    GLuint pc, numExec = 0;
 498
 499    machine->CurProgram = program;
 500
 501    if (DEBUG_PROG) {
 502       printf("execute program %u --------------------\n", program->Id);
 503    }
 504
 505    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 506       machine->EnvParams = ctx->VertexProgram.Parameters;
 507    }
 508    else {
 509       machine->EnvParams = ctx->FragmentProgram.Parameters;
 510    }
 511
 512    for (pc = 0; pc < numInst; pc++) {
 513       const struct prog_instruction *inst = program->Instructions + pc;
 514
 515       if (DEBUG_PROG) {
 516          _mesa_print_instruction(inst);
 517       }
 518
 519       switch (inst->Opcode) {
 520       case OPCODE_ABS:
 521          {
 522             GLfloat a[4], result[4];
 523             fetch_vector4(&inst->SrcReg[0], machine, a);
 524             result[0] = fabsf(a[0]);
 525             result[1] = fabsf(a[1]);
 526             result[2] = fabsf(a[2]);
 527             result[3] = fabsf(a[3]);
 528             store_vector4(inst, machine, result);
 529          }
 530          break;
 531       case OPCODE_ADD:
 532          {
 533             GLfloat a[4], b[4], result[4];
 534             fetch_vector4(&inst->SrcReg[0], machine, a);
 535             fetch_vector4(&inst->SrcReg[1], machine, b);
 536             result[0] = a[0] + b[0];
 537             result[1] = a[1] + b[1];
 538             result[2] = a[2] + b[2];
 539             result[3] = a[3] + b[3];
 540             store_vector4(inst, machine, result);
 541             if (DEBUG_PROG) {
 542                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 543                       result[0], result[1], result[2], result[3],
 544                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 545             }
 546          }
 547          break;
 548       case OPCODE_ARL:
 549          {
 550             GLfloat t[4];
 551             fetch_vector4(&inst->SrcReg[0], machine, t);
 552             machine->AddressReg[0][0] = IFLOOR(t[0]);
 553             if (DEBUG_PROG) {
 554                printf("ARL %d\n", machine->AddressReg[0][0]);
 555             }
 556          }
 557          break;
 558       case OPCODE_BGNLOOP:
 559          /* no-op */
 560          assert(program->Instructions[inst->BranchTarget].Opcode
 561                 == OPCODE_ENDLOOP);
 562          break;
 563       case OPCODE_ENDLOOP:
 564          /* subtract 1 here since pc is incremented by for(pc) loop */
 565          assert(program->Instructions[inst->BranchTarget].Opcode
 566                 == OPCODE_BGNLOOP);
 567          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 568          break;
 569       case OPCODE_BGNSUB:      /* begin subroutine */
 570          break;
 571       case OPCODE_ENDSUB:      /* end subroutine */
 572          break;
 573       case OPCODE_BRK:         /* break out of loop (conditional) */
 574          assert(program->Instructions[inst->BranchTarget].Opcode
 575                 == OPCODE_ENDLOOP);
 576          if (eval_condition(machine, inst)) {
 577             /* break out of loop */
 578             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 579             pc = inst->BranchTarget;
 580          }
 581          break;
 582       case OPCODE_CONT:        /* continue loop (conditional) */
 583          assert(program->Instructions[inst->BranchTarget].Opcode
 584                 == OPCODE_ENDLOOP);
 585          if (eval_condition(machine, inst)) {
 586             /* continue at ENDLOOP */
 587             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 588             pc = inst->BranchTarget - 1;
 589          }
 590          break;
 591       case OPCODE_CAL:         /* Call subroutine (conditional) */
 592          if (eval_condition(machine, inst)) {
 593             /* call the subroutine */
 594             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 595                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 596             }
 597             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 598             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 599             pc = inst->BranchTarget - 1;
 600          }
 601          break;
 602       case OPCODE_CMP:
 603          {
 604             GLfloat a[4], b[4], c[4], result[4];
 605             fetch_vector4(&inst->SrcReg[0], machine, a);
 606             fetch_vector4(&inst->SrcReg[1], machine, b);
 607             fetch_vector4(&inst->SrcReg[2], machine, c);
 608             result[0] = a[0] < 0.0F ? b[0] : c[0];
 609             result[1] = a[1] < 0.0F ? b[1] : c[1];
 610             result[2] = a[2] < 0.0F ? b[2] : c[2];
 611             result[3] = a[3] < 0.0F ? b[3] : c[3];
 612             store_vector4(inst, machine, result);
 613             if (DEBUG_PROG) {
 614                printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
 615                       result[0], result[1], result[2], result[3],
 616                       a[0], a[1], a[2], a[3],
 617                       b[0], b[1], b[2], b[3],
 618                       c[0], c[1], c[2], c[3]);
 619             }
 620          }
 621          break;
 622       case OPCODE_COS:
 623          {
 624             GLfloat a[4], result[4];
 625             fetch_vector1(&inst->SrcReg[0], machine, a);
 626             result[0] = result[1] = result[2] = result[3]
 627                = (GLfloat) cos(a[0]);
 628             store_vector4(inst, machine, result);
 629          }
 630          break;
 631       case OPCODE_DDX:         /* Partial derivative with respect to X */
 632          {
 633             GLfloat result[4];
 634             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 635                                 'X', result);
 636             store_vector4(inst, machine, result);
 637          }
 638          break;
 639       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 640          {
 641             GLfloat result[4];
 642             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 643                                 'Y', result);
 644             store_vector4(inst, machine, result);
 645          }
 646          break;
 647       case OPCODE_DP2:
 648          {
 649             GLfloat a[4], b[4], result[4];
 650             fetch_vector4(&inst->SrcReg[0], machine, a);
 651             fetch_vector4(&inst->SrcReg[1], machine, b);
 652             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 653             store_vector4(inst, machine, result);
 654             if (DEBUG_PROG) {
 655                printf("DP2 %g = (%g %g) . (%g %g)\n",
 656                       result[0], a[0], a[1], b[0], b[1]);
 657             }
 658          }
 659          break;
 660       case OPCODE_DP3:
 661          {
 662             GLfloat a[4], b[4], result[4];
 663             fetch_vector4(&inst->SrcReg[0], machine, a);
 664             fetch_vector4(&inst->SrcReg[1], machine, b);
 665             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 666             store_vector4(inst, machine, result);
 667             if (DEBUG_PROG) {
 668                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 669                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 670             }
 671          }
 672          break;
 673       case OPCODE_DP4:
 674          {
 675             GLfloat a[4], b[4], result[4];
 676             fetch_vector4(&inst->SrcReg[0], machine, a);
 677             fetch_vector4(&inst->SrcReg[1], machine, b);
 678             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 679             store_vector4(inst, machine, result);
 680             if (DEBUG_PROG) {
 681                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 682                       result[0], a[0], a[1], a[2], a[3],
 683                       b[0], b[1], b[2], b[3]);
 684             }
 685          }
 686          break;
 687       case OPCODE_DPH:
 688          {
 689             GLfloat a[4], b[4], result[4];
 690             fetch_vector4(&inst->SrcReg[0], machine, a);
 691             fetch_vector4(&inst->SrcReg[1], machine, b);
 692             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 693             store_vector4(inst, machine, result);
 694          }
 695          break;
 696       case OPCODE_DST:         /* Distance vector */
 697          {
 698             GLfloat a[4], b[4], result[4];
 699             fetch_vector4(&inst->SrcReg[0], machine, a);
 700             fetch_vector4(&inst->SrcReg[1], machine, b);
 701             result[0] = 1.0F;
 702             result[1] = a[1] * b[1];
 703             result[2] = a[2];
 704             result[3] = b[3];
 705             store_vector4(inst, machine, result);
 706          }
 707          break;
 708       case OPCODE_EXP:
 709          {
 710             GLfloat t[4], q[4], floor_t0;
 711             fetch_vector1(&inst->SrcReg[0], machine, t);
 712             floor_t0 = floorf(t[0]);
 713             if (floor_t0 > FLT_MAX_EXP) {
 714                SET_POS_INFINITY(q[0]);
 715                SET_POS_INFINITY(q[2]);
 716             }
 717             else if (floor_t0 < FLT_MIN_EXP) {
 718                q[0] = 0.0F;
 719                q[2] = 0.0F;
 720             }
 721             else {
 722                q[0] = ldexpf(1.0, (int) floor_t0);
 723                /* Note: GL_NV_vertex_program expects
 724                 * result.z = result.x * APPX(result.y)
 725                 * We do what the ARB extension says.
 726                 */
 727                q[2] = (GLfloat) pow(2.0, t[0]);
 728             }
 729             q[1] = t[0] - floor_t0;
 730             q[3] = 1.0F;
 731             store_vector4( inst, machine, q );
 732          }
 733          break;
 734       case OPCODE_EX2:         /* Exponential base 2 */
 735          {
 736             GLfloat a[4], result[4], val;
 737             fetch_vector1(&inst->SrcReg[0], machine, a);
 738             val = (GLfloat) pow(2.0, a[0]);
 739             /*
 740             if (IS_INF_OR_NAN(val))
 741                val = 1.0e10;
 742             */
 743             result[0] = result[1] = result[2] = result[3] = val;
 744             store_vector4(inst, machine, result);
 745          }
 746          break;
 747       case OPCODE_FLR:
 748          {
 749             GLfloat a[4], result[4];
 750             fetch_vector4(&inst->SrcReg[0], machine, a);
 751             result[0] = floorf(a[0]);
 752             result[1] = floorf(a[1]);
 753             result[2] = floorf(a[2]);
 754             result[3] = floorf(a[3]);
 755             store_vector4(inst, machine, result);
 756          }
 757          break;
 758       case OPCODE_FRC:
 759          {
 760             GLfloat a[4], result[4];
 761             fetch_vector4(&inst->SrcReg[0], machine, a);
 762             result[0] = a[0] - floorf(a[0]);
 763             result[1] = a[1] - floorf(a[1]);
 764             result[2] = a[2] - floorf(a[2]);
 765             result[3] = a[3] - floorf(a[3]);
 766             store_vector4(inst, machine, result);
 767          }
 768          break;
 769       case OPCODE_IF:
 770          {
 771             GLboolean cond;
 772             assert(program->Instructions[inst->BranchTarget].Opcode
 773                    == OPCODE_ELSE ||
 774                    program->Instructions[inst->BranchTarget].Opcode
 775                    == OPCODE_ENDIF);
 776             /* eval condition */
 777             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 778                GLfloat a[4];
 779                fetch_vector1(&inst->SrcReg[0], machine, a);
 780                cond = (a[0] != 0.0);
 781             }
 782             else {
 783                cond = eval_condition(machine, inst);
 784             }
 785             if (DEBUG_PROG) {
 786                printf("IF: %d\n", cond);
 787             }
 788             /* do if/else */
 789             if (cond) {
 790                /* do if-clause (just continue execution) */
 791             }
 792             else {
 793                /* go to the instruction after ELSE or ENDIF */
 794                assert(inst->BranchTarget >= 0);
 795                pc = inst->BranchTarget;
 796             }
 797          }
 798          break;
 799       case OPCODE_ELSE:
 800          /* goto ENDIF */
 801          assert(program->Instructions[inst->BranchTarget].Opcode
 802                 == OPCODE_ENDIF);
 803          assert(inst->BranchTarget >= 0);
 804          pc = inst->BranchTarget;
 805          break;
 806       case OPCODE_ENDIF:
 807          /* nothing */
 808          break;
 809       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 810          if (eval_condition(machine, inst)) {
 811             return GL_FALSE;
 812          }
 813          break;
 814       case OPCODE_KIL:         /* ARB_f_p only */
 815          {
 816             GLfloat a[4];
 817             fetch_vector4(&inst->SrcReg[0], machine, a);
 818             if (DEBUG_PROG) {
 819                printf("KIL if (%g %g %g %g) <= 0.0\n",
 820                       a[0], a[1], a[2], a[3]);
 821             }
 822
 823             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 824                return GL_FALSE;
 825             }
 826          }
 827          break;
 828       case OPCODE_LG2:         /* log base 2 */
 829          {
 830             GLfloat a[4], result[4], val;
 831             fetch_vector1(&inst->SrcReg[0], machine, a);
 832             /* The fast LOG2 macro doesn't meet the precision requirements.
 833              */
 834             if (a[0] == 0.0F) {
 835                val = -FLT_MAX;
 836             }
 837             else {
 838                val = (float)(log(a[0]) * 1.442695F);
 839             }
 840             result[0] = result[1] = result[2] = result[3] = val;
 841             store_vector4(inst, machine, result);
 842          }
 843          break;
 844       case OPCODE_LIT:
 845          {
 846             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 847             GLfloat a[4], result[4];
 848             fetch_vector4(&inst->SrcReg[0], machine, a);
 849             a[0] = MAX2(a[0], 0.0F);
 850             a[1] = MAX2(a[1], 0.0F);
 851             /* XXX ARB version clamps a[3], NV version doesn't */
 852             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 853             result[0] = 1.0F;
 854             result[1] = a[0];
 855             /* XXX we could probably just use pow() here */
 856             if (a[0] > 0.0F) {
 857                if (a[1] == 0.0 && a[3] == 0.0)
 858                   result[2] = 1.0F;
 859                else
 860                   result[2] = (GLfloat) pow(a[1], a[3]);
 861             }
 862             else {
 863                result[2] = 0.0F;
 864             }
 865             result[3] = 1.0F;
 866             store_vector4(inst, machine, result);
 867             if (DEBUG_PROG) {
 868                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 869                       result[0], result[1], result[2], result[3],
 870                       a[0], a[1], a[2], a[3]);
 871             }
 872          }
 873          break;
 874       case OPCODE_LOG:
 875          {
 876             GLfloat t[4], q[4], abs_t0;
 877             fetch_vector1(&inst->SrcReg[0], machine, t);
 878             abs_t0 = fabsf(t[0]);
 879             if (abs_t0 != 0.0F) {
 880                if (IS_INF_OR_NAN(abs_t0))
 881                {
 882                   SET_POS_INFINITY(q[0]);
 883                   q[1] = 1.0F;
 884                   SET_POS_INFINITY(q[2]);
 885                }
 886                else {
 887                   int exponent;
 888                   GLfloat mantissa = frexpf(t[0], &exponent);
 889                   q[0] = (GLfloat) (exponent - 1);
 890                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
 891
 892                   /* The fast LOG2 macro doesn't meet the precision
 893                    * requirements.
 894                    */
 895                   q[2] = (float)(log(t[0]) * 1.442695F);
 896                }
 897             }
 898             else {
 899                SET_NEG_INFINITY(q[0]);
 900                q[1] = 1.0F;
 901                SET_NEG_INFINITY(q[2]);
 902             }
 903             q[3] = 1.0;
 904             store_vector4(inst, machine, q);
 905          }
 906          break;
 907       case OPCODE_LRP:
 908          {
 909             GLfloat a[4], b[4], c[4], result[4];
 910             fetch_vector4(&inst->SrcReg[0], machine, a);
 911             fetch_vector4(&inst->SrcReg[1], machine, b);
 912             fetch_vector4(&inst->SrcReg[2], machine, c);
 913             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 914             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 915             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 916             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 917             store_vector4(inst, machine, result);
 918             if (DEBUG_PROG) {
 919                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
 920                       "(%g %g %g %g), (%g %g %g %g)\n",
 921                       result[0], result[1], result[2], result[3],
 922                       a[0], a[1], a[2], a[3],
 923                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
 924             }
 925          }
 926          break;
 927       case OPCODE_MAD:
 928          {
 929             GLfloat a[4], b[4], c[4], result[4];
 930             fetch_vector4(&inst->SrcReg[0], machine, a);
 931             fetch_vector4(&inst->SrcReg[1], machine, b);
 932             fetch_vector4(&inst->SrcReg[2], machine, c);
 933             result[0] = a[0] * b[0] + c[0];
 934             result[1] = a[1] * b[1] + c[1];
 935             result[2] = a[2] * b[2] + c[2];
 936             result[3] = a[3] * b[3] + c[3];
 937             store_vector4(inst, machine, result);
 938             if (DEBUG_PROG) {
 939                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
 940                       "(%g %g %g %g) + (%g %g %g %g)\n",
 941                       result[0], result[1], result[2], result[3],
 942                       a[0], a[1], a[2], a[3],
 943                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
 944             }
 945          }
 946          break;
 947       case OPCODE_MAX:
 948          {
 949             GLfloat a[4], b[4], result[4];
 950             fetch_vector4(&inst->SrcReg[0], machine, a);
 951             fetch_vector4(&inst->SrcReg[1], machine, b);
 952             result[0] = MAX2(a[0], b[0]);
 953             result[1] = MAX2(a[1], b[1]);
 954             result[2] = MAX2(a[2], b[2]);
 955             result[3] = MAX2(a[3], b[3]);
 956             store_vector4(inst, machine, result);
 957             if (DEBUG_PROG) {
 958                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
 959                       result[0], result[1], result[2], result[3],
 960                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 961             }
 962          }
 963          break;
 964       case OPCODE_MIN:
 965          {
 966             GLfloat a[4], b[4], result[4];
 967             fetch_vector4(&inst->SrcReg[0], machine, a);
 968             fetch_vector4(&inst->SrcReg[1], machine, b);
 969             result[0] = MIN2(a[0], b[0]);
 970             result[1] = MIN2(a[1], b[1]);
 971             result[2] = MIN2(a[2], b[2]);
 972             result[3] = MIN2(a[3], b[3]);
 973             store_vector4(inst, machine, result);
 974          }
 975          break;
 976       case OPCODE_MOV:
 977          {
 978             GLfloat result[4];
 979             fetch_vector4(&inst->SrcReg[0], machine, result);
 980             store_vector4(inst, machine, result);
 981             if (DEBUG_PROG) {
 982                printf("MOV (%g %g %g %g)\n",
 983                       result[0], result[1], result[2], result[3]);
 984             }
 985          }
 986          break;
 987       case OPCODE_MUL:
 988          {
 989             GLfloat a[4], b[4], result[4];
 990             fetch_vector4(&inst->SrcReg[0], machine, a);
 991             fetch_vector4(&inst->SrcReg[1], machine, b);
 992             result[0] = a[0] * b[0];
 993             result[1] = a[1] * b[1];
 994             result[2] = a[2] * b[2];
 995             result[3] = a[3] * b[3];
 996             store_vector4(inst, machine, result);
 997             if (DEBUG_PROG) {
 998                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 999                       result[0], result[1], result[2], result[3],
1000                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1001             }
1002          }
1003          break;
1004       case OPCODE_NOISE1:
1005          {
1006             GLfloat a[4], result[4];
1007             fetch_vector1(&inst->SrcReg[0], machine, a);
1008             result[0] =
1009                result[1] =
1010                result[2] =
1011                result[3] = _mesa_noise1(a[0]);
1012             store_vector4(inst, machine, result);
1013          }
1014          break;
1015       case OPCODE_NOISE2:
1016          {
1017             GLfloat a[4], result[4];
1018             fetch_vector4(&inst->SrcReg[0], machine, a);
1019             result[0] =
1020                result[1] =
1021                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1022             store_vector4(inst, machine, result);
1023          }
1024          break;
1025       case OPCODE_NOISE3:
1026          {
1027             GLfloat a[4], result[4];
1028             fetch_vector4(&inst->SrcReg[0], machine, a);
1029             result[0] =
1030                result[1] =
1031                result[2] =
1032                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1033             store_vector4(inst, machine, result);
1034          }
1035          break;
1036       case OPCODE_NOISE4:
1037          {
1038             GLfloat a[4], result[4];
1039             fetch_vector4(&inst->SrcReg[0], machine, a);
1040             result[0] =
1041                result[1] =
1042                result[2] =
1043                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1044             store_vector4(inst, machine, result);
1045          }
1046          break;
1047       case OPCODE_NOP:
1048          break;
1049       case OPCODE_POW:
1050          {
1051             GLfloat a[4], b[4], result[4];
1052             fetch_vector1(&inst->SrcReg[0], machine, a);
1053             fetch_vector1(&inst->SrcReg[1], machine, b);
1054             result[0] = result[1] = result[2] = result[3]
1055                = (GLfloat) pow(a[0], b[0]);
1056             store_vector4(inst, machine, result);
1057          }
1058          break;
1059
1060       case OPCODE_RCP:
1061          {
1062             GLfloat a[4], result[4];
1063             fetch_vector1(&inst->SrcReg[0], machine, a);
1064             if (DEBUG_PROG) {
1065                if (a[0] == 0)
1066                   printf("RCP(0)\n");
1067                else if (IS_INF_OR_NAN(a[0]))
1068                   printf("RCP(inf)\n");
1069             }
1070             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1071             store_vector4(inst, machine, result);
1072          }
1073          break;
1074       case OPCODE_RET:         /* return from subroutine (conditional) */
1075          if (eval_condition(machine, inst)) {
1076             if (machine->StackDepth == 0) {
1077                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1078             }
1079             /* subtract one because of pc++ in the for loop */
1080             pc = machine->CallStack[--machine->StackDepth] - 1;
1081          }
1082          break;
1083       case OPCODE_RSQ:         /* 1 / sqrt() */
1084          {
1085             GLfloat a[4], result[4];
1086             fetch_vector1(&inst->SrcReg[0], machine, a);
1087             a[0] = fabsf(a[0]);
1088             result[0] = result[1] = result[2] = result[3] = 1.0f / sqrtf(a[0]);
1089             store_vector4(inst, machine, result);
1090             if (DEBUG_PROG) {
1091                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1092             }
1093          }
1094          break;
1095       case OPCODE_SCS:         /* sine and cos */
1096          {
1097             GLfloat a[4], result[4];
1098             fetch_vector1(&inst->SrcReg[0], machine, a);
1099             result[0] = (GLfloat) cos(a[0]);
1100             result[1] = (GLfloat) sin(a[0]);
1101             result[2] = 0.0;    /* undefined! */
1102             result[3] = 0.0;    /* undefined! */
1103             store_vector4(inst, machine, result);
1104          }
1105          break;
1106       case OPCODE_SEQ:         /* set on equal */
1107          {
1108             GLfloat a[4], b[4], result[4];
1109             fetch_vector4(&inst->SrcReg[0], machine, a);
1110             fetch_vector4(&inst->SrcReg[1], machine, b);
1111             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1112             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1113             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1114             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1115             store_vector4(inst, machine, result);
1116             if (DEBUG_PROG) {
1117                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1118                       result[0], result[1], result[2], result[3],
1119                       a[0], a[1], a[2], a[3],
1120                       b[0], b[1], b[2], b[3]);
1121             }
1122          }
1123          break;
1124       case OPCODE_SGE:         /* set on greater or equal */
1125          {
1126             GLfloat a[4], b[4], result[4];
1127             fetch_vector4(&inst->SrcReg[0], machine, a);
1128             fetch_vector4(&inst->SrcReg[1], machine, b);
1129             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1130             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1131             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1132             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1133             store_vector4(inst, machine, result);
1134             if (DEBUG_PROG) {
1135                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1136                       result[0], result[1], result[2], result[3],
1137                       a[0], a[1], a[2], a[3],
1138                       b[0], b[1], b[2], b[3]);
1139             }
1140          }
1141          break;
1142       case OPCODE_SGT:         /* set on greater */
1143          {
1144             GLfloat a[4], b[4], result[4];
1145             fetch_vector4(&inst->SrcReg[0], machine, a);
1146             fetch_vector4(&inst->SrcReg[1], machine, b);
1147             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1148             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1149             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1150             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1151             store_vector4(inst, machine, result);
1152             if (DEBUG_PROG) {
1153                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1154                       result[0], result[1], result[2], result[3],
1155                       a[0], a[1], a[2], a[3],
1156                       b[0], b[1], b[2], b[3]);
1157             }
1158          }
1159          break;
1160       case OPCODE_SIN:
1161          {
1162             GLfloat a[4], result[4];
1163             fetch_vector1(&inst->SrcReg[0], machine, a);
1164             result[0] = result[1] = result[2] = result[3]
1165                = (GLfloat) sin(a[0]);
1166             store_vector4(inst, machine, result);
1167          }
1168          break;
1169       case OPCODE_SLE:         /* set on less or equal */
1170          {
1171             GLfloat a[4], b[4], result[4];
1172             fetch_vector4(&inst->SrcReg[0], machine, a);
1173             fetch_vector4(&inst->SrcReg[1], machine, b);
1174             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1175             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1176             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1177             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1178             store_vector4(inst, machine, result);
1179             if (DEBUG_PROG) {
1180                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1181                       result[0], result[1], result[2], result[3],
1182                       a[0], a[1], a[2], a[3],
1183                       b[0], b[1], b[2], b[3]);
1184             }
1185          }
1186          break;
1187       case OPCODE_SLT:         /* set on less */
1188          {
1189             GLfloat a[4], b[4], result[4];
1190             fetch_vector4(&inst->SrcReg[0], machine, a);
1191             fetch_vector4(&inst->SrcReg[1], machine, b);
1192             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1193             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1194             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1195             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1196             store_vector4(inst, machine, result);
1197             if (DEBUG_PROG) {
1198                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1199                       result[0], result[1], result[2], result[3],
1200                       a[0], a[1], a[2], a[3],
1201                       b[0], b[1], b[2], b[3]);
1202             }
1203          }
1204          break;
1205       case OPCODE_SNE:         /* set on not equal */
1206          {
1207             GLfloat a[4], b[4], result[4];
1208             fetch_vector4(&inst->SrcReg[0], machine, a);
1209             fetch_vector4(&inst->SrcReg[1], machine, b);
1210             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1211             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1212             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1213             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1214             store_vector4(inst, machine, result);
1215             if (DEBUG_PROG) {
1216                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1217                       result[0], result[1], result[2], result[3],
1218                       a[0], a[1], a[2], a[3],
1219                       b[0], b[1], b[2], b[3]);
1220             }
1221          }
1222          break;
1223       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1224          {
1225             GLfloat a[4], result[4];
1226             fetch_vector4(&inst->SrcReg[0], machine, a);
1227             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1228             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1229             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1230             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1231             store_vector4(inst, machine, result);
1232          }
1233          break;
1234       case OPCODE_SUB:
1235          {
1236             GLfloat a[4], b[4], result[4];
1237             fetch_vector4(&inst->SrcReg[0], machine, a);
1238             fetch_vector4(&inst->SrcReg[1], machine, b);
1239             result[0] = a[0] - b[0];
1240             result[1] = a[1] - b[1];
1241             result[2] = a[2] - b[2];
1242             result[3] = a[3] - b[3];
1243             store_vector4(inst, machine, result);
1244             if (DEBUG_PROG) {
1245                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1246                       result[0], result[1], result[2], result[3],
1247                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1248             }
1249          }
1250          break;
1251       case OPCODE_SWZ:         /* extended swizzle */
1252          {
1253             const struct prog_src_register *source = &inst->SrcReg[0];
1254             const GLfloat *src = get_src_register_pointer(source, machine);
1255             GLfloat result[4];
1256             GLuint i;
1257             for (i = 0; i < 4; i++) {
1258                const GLuint swz = GET_SWZ(source->Swizzle, i);
1259                if (swz == SWIZZLE_ZERO)
1260                   result[i] = 0.0;
1261                else if (swz == SWIZZLE_ONE)
1262                   result[i] = 1.0;
1263                else {
1264                   assert(swz <= 3);
1265                   result[i] = src[swz];
1266                }
1267                if (source->Negate & (1 << i))
1268                   result[i] = -result[i];
1269             }
1270             store_vector4(inst, machine, result);
1271          }
1272          break;
1273       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1274          /* Simple texel lookup */
1275          {
1276             GLfloat texcoord[4], color[4];
1277             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1278
1279             /* For TEX, texcoord.Q should not be used and its value should not
1280              * matter (at most, we pass coord.xyz to texture3D() in GLSL).
1281              * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
1282              * which is effectively what happens when the texcoord swizzle
1283              * is .xyzz
1284              */
1285             texcoord[3] = 1.0f;
1286
1287             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1288
1289             if (DEBUG_PROG) {
1290                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1291                       color[0], color[1], color[2], color[3],
1292                       inst->TexSrcUnit,
1293                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1294             }
1295             store_vector4(inst, machine, color);
1296          }
1297          break;
1298       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1299          /* Texel lookup with LOD bias */
1300          {
1301             GLfloat texcoord[4], color[4], lodBias;
1302
1303             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1304
1305             /* texcoord[3] is the bias to add to lambda */
1306             lodBias = texcoord[3];
1307
1308             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1309
1310             if (DEBUG_PROG) {
1311                printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1312                       "  bias %g\n",
1313                       color[0], color[1], color[2], color[3],
1314                       inst->TexSrcUnit,
1315                       texcoord[0],
1316                       texcoord[1],
1317                       texcoord[2],
1318                       texcoord[3],
1319                       lodBias);
1320             }
1321
1322             store_vector4(inst, machine, color);
1323          }
1324          break;
1325       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1326          /* Texture lookup w/ partial derivatives for LOD */
1327          {
1328             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1329             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1330             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1331             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1332             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1333                                      0.0, /* lodBias */
1334                                      inst->TexSrcUnit, color);
1335             store_vector4(inst, machine, color);
1336          }
1337          break;
1338       case OPCODE_TXL:
1339          /* Texel lookup with explicit LOD */
1340          {
1341             GLfloat texcoord[4], color[4], lod;
1342
1343             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1344
1345             /* texcoord[3] is the LOD */
1346             lod = texcoord[3];
1347
1348             machine->FetchTexelLod(ctx, texcoord, lod,
1349                                    machine->Samplers[inst->TexSrcUnit], color);
1350
1351             store_vector4(inst, machine, color);
1352          }
1353          break;
1354       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1355          /* Texture lookup w/ projective divide */
1356          {
1357             GLfloat texcoord[4], color[4];
1358
1359             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1360             /* Not so sure about this test - if texcoord[3] is
1361              * zero, we'd probably be fine except for an assert in
1362              * IROUND_POS() which gets triggered by the inf values created.
1363              */
1364             if (texcoord[3] != 0.0) {
1365                texcoord[0] /= texcoord[3];
1366                texcoord[1] /= texcoord[3];
1367                texcoord[2] /= texcoord[3];
1368             }
1369
1370             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1371
1372             store_vector4(inst, machine, color);
1373          }
1374          break;
1375       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1376          /* Texture lookup w/ projective divide, as above, but do not
1377           * do the divide by w if sampling from a cube map.
1378           */
1379          {
1380             GLfloat texcoord[4], color[4];
1381
1382             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1383             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1384                 texcoord[3] != 0.0) {
1385                texcoord[0] /= texcoord[3];
1386                texcoord[1] /= texcoord[3];
1387                texcoord[2] /= texcoord[3];
1388             }
1389
1390             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1391
1392             store_vector4(inst, machine, color);
1393          }
1394          break;
1395       case OPCODE_TRUNC:       /* truncate toward zero */
1396          {
1397             GLfloat a[4], result[4];
1398             fetch_vector4(&inst->SrcReg[0], machine, a);
1399             result[0] = (GLfloat) (GLint) a[0];
1400             result[1] = (GLfloat) (GLint) a[1];
1401             result[2] = (GLfloat) (GLint) a[2];
1402             result[3] = (GLfloat) (GLint) a[3];
1403             store_vector4(inst, machine, result);
1404          }
1405          break;
1406       case OPCODE_XPD:         /* cross product */
1407          {
1408             GLfloat a[4], b[4], result[4];
1409             fetch_vector4(&inst->SrcReg[0], machine, a);
1410             fetch_vector4(&inst->SrcReg[1], machine, b);
1411             result[0] = a[1] * b[2] - a[2] * b[1];
1412             result[1] = a[2] * b[0] - a[0] * b[2];
1413             result[2] = a[0] * b[1] - a[1] * b[0];
1414             result[3] = 1.0;
1415             store_vector4(inst, machine, result);
1416             if (DEBUG_PROG) {
1417                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1418                       result[0], result[1], result[2], result[3],
1419                       a[0], a[1], a[2], b[0], b[1], b[2]);
1420             }
1421          }
1422          break;
1423       case OPCODE_END:
1424          return GL_TRUE;
1425       default:
1426          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1427                        inst->Opcode);
1428          return GL_TRUE;        /* return value doesn't matter */
1429       }
1430
1431       numExec++;
1432       if (numExec > maxExec) {
1433          static GLboolean reported = GL_FALSE;
1434          if (!reported) {
1435             _mesa_problem(ctx, "Infinite loop detected in fragment program");
1436             reported = GL_TRUE;
1437          }
1438          return GL_TRUE;
1439       }
1440
1441    } /* for pc */
1442
1443    return GL_TRUE;
1444 }