src/mesa/shader/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.0.3
   4  *
   5  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "program.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "shader/slang/slang_library_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
  58 #define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
  59 #elif defined(VMS)
  60 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  61 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  62 #else
  63 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  64 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  65 #endif
  66
  67 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  68
  69
  70 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  71
  72
  73
  74 /**
  75  * Return a pointer to the 4-element float vector specified by the given
  76  * source register.
  77  */
  78 static INLINE const GLfloat *
  79 get_register_pointer(const struct prog_src_register *source,
  80                      const struct gl_program_machine *machine)
  81 {
  82    if (source->RelAddr) {
  83       const GLint reg = source->Index + machine->AddressReg[0][0];
  84       if (source->File == PROGRAM_ENV_PARAM)
  85          if (reg < 0 || reg >= MAX_PROGRAM_ENV_PARAMS)
  86             return ZeroVec;
  87          else
  88             return machine->EnvParams[reg];
  89       else {
  90          const struct gl_program_parameter_list *params;
  91          ASSERT(source->File == PROGRAM_LOCAL_PARAM ||
  92                 source->File == PROGRAM_CONSTANT ||
  93                 source->File == PROGRAM_STATE_VAR ||
  94                 source->File == PROGRAM_UNIFORM);
  95          params = machine->CurProgram->Parameters;
  96          if (reg < 0 || reg >= (GLint)params->NumParameters)
  97             return ZeroVec;
  98          else
  99             return params->ParameterValues[reg];
 100       }
 101    }
 102
 103    switch (source->File) {
 104    case PROGRAM_TEMPORARY:
 105       ASSERT(source->Index < MAX_PROGRAM_TEMPS);
 106       return machine->Temporaries[source->Index];
 107
 108    case PROGRAM_INPUT:
 109       if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
 110          ASSERT(source->Index < VERT_ATTRIB_MAX);
 111          return machine->VertAttribs[source->Index];
 112       }
 113       else {
 114          ASSERT(source->Index < FRAG_ATTRIB_MAX);
 115          return machine->Attribs[source->Index][machine->CurElement];
 116       }
 117
 118    case PROGRAM_OUTPUT:
 119       ASSERT(source->Index < MAX_PROGRAM_OUTPUTS);
 120       return machine->Outputs[source->Index];
 121
 122    case PROGRAM_LOCAL_PARAM:
 123       ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
 124       return machine->CurProgram->LocalParams[source->Index];
 125
 126    case PROGRAM_ENV_PARAM:
 127       ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
 128       return machine->EnvParams[source->Index];
 129
 130    case PROGRAM_STATE_VAR:
 131       /* Fallthrough */
 132    case PROGRAM_CONSTANT:
 133       /* Fallthrough */
 134    case PROGRAM_UNIFORM:
 135       /* Fallthrough */
 136    case PROGRAM_NAMED_PARAM:
 137       ASSERT(source->Index <
 138              (GLint) machine->CurProgram->Parameters->NumParameters);
 139       return machine->CurProgram->Parameters->ParameterValues[source->Index];
 140
 141    default:
 142       _mesa_problem(NULL,
 143                     "Invalid input register file %d in get_register_pointer()",
 144                     source->File);
 145       return NULL;
 146    }
 147 }
 148
 149
 150 #if FEATURE_MESA_program_debug
 151 static struct gl_program_machine *CurrentMachine = NULL;
 152
 153 /**
 154  * For GL_MESA_program_debug.
 155  * Return current value (4*GLfloat) of a program register.
 156  * Called via ctx->Driver.GetProgramRegister().
 157  */
 158 void
 159 _mesa_get_program_register(GLcontext *ctx, enum register_file file,
 160                            GLuint index, GLfloat val[4])
 161 {
 162    if (CurrentMachine) {
 163       struct prog_src_register src;
 164       const GLfloat *reg;
 165       src.File = file;
 166       src.Index = index;
 167       reg = get_register_pointer(&src, CurrentMachine);
 168       COPY_4V(val, reg);
 169    }
 170 }
 171 #endif /* FEATURE_MESA_program_debug */
 172
 173
 174 /**
 175  * Fetch a 4-element float vector from the given source register.
 176  * Apply swizzling and negating as needed.
 177  */
 178 static void
 179 fetch_vector4(const struct prog_src_register *source,
 180               const struct gl_program_machine *machine, GLfloat result[4])
 181 {
 182    const GLfloat *src = get_register_pointer(source, machine);
 183    ASSERT(src);
 184
 185    if (source->Swizzle == SWIZZLE_NOOP) {
 186       /* no swizzling */
 187       COPY_4V(result, src);
 188    }
 189    else {
 190       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 191       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 192       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 193       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 194       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 195       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 196       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 197       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 198    }
 199
 200    if (source->NegateBase) {
 201       result[0] = -result[0];
 202       result[1] = -result[1];
 203       result[2] = -result[2];
 204       result[3] = -result[3];
 205    }
 206    if (source->Abs) {
 207       result[0] = FABSF(result[0]);
 208       result[1] = FABSF(result[1]);
 209       result[2] = FABSF(result[2]);
 210       result[3] = FABSF(result[3]);
 211    }
 212    if (source->NegateAbs) {
 213       result[0] = -result[0];
 214       result[1] = -result[1];
 215       result[2] = -result[2];
 216       result[3] = -result[3];
 217    }
 218 }
 219
 220
 221 /**
 222  * Fetch the derivative with respect to X or Y for the given register.
 223  * XXX this currently only works for fragment program input attribs.
 224  */
 225 static void
 226 fetch_vector4_deriv(GLcontext * ctx,
 227                     const struct prog_src_register *source,
 228                     const struct gl_program_machine *machine,
 229                     char xOrY, GLfloat result[4])
 230 {
 231    if (source->File == PROGRAM_INPUT && source->Index < (GLint)machine->NumDeriv) {
 232       const GLint col = machine->CurElement;
 233       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 234       const GLfloat invQ = 1.0f / w;
 235       GLfloat deriv[4];
 236
 237       if (xOrY == 'X') {
 238          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 239          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 240          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 241          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 242       }
 243       else {
 244          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 245          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 246          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 247          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 248       }
 249
 250       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 251       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 252       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 253       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 254
 255       if (source->NegateBase) {
 256          result[0] = -result[0];
 257          result[1] = -result[1];
 258          result[2] = -result[2];
 259          result[3] = -result[3];
 260       }
 261       if (source->Abs) {
 262          result[0] = FABSF(result[0]);
 263          result[1] = FABSF(result[1]);
 264          result[2] = FABSF(result[2]);
 265          result[3] = FABSF(result[3]);
 266       }
 267       if (source->NegateAbs) {
 268          result[0] = -result[0];
 269          result[1] = -result[1];
 270          result[2] = -result[2];
 271          result[3] = -result[3];
 272       }
 273    }
 274    else {
 275       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 276    }
 277 }
 278
 279
 280 /**
 281  * As above, but only return result[0] element.
 282  */
 283 static void
 284 fetch_vector1(const struct prog_src_register *source,
 285               const struct gl_program_machine *machine, GLfloat result[4])
 286 {
 287    const GLfloat *src = get_register_pointer(source, machine);
 288    ASSERT(src);
 289
 290    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 291
 292    if (source->NegateBase) {
 293       result[0] = -result[0];
 294    }
 295    if (source->Abs) {
 296       result[0] = FABSF(result[0]);
 297    }
 298    if (source->NegateAbs) {
 299       result[0] = -result[0];
 300    }
 301 }
 302
 303
 304 /**
 305  * Fetch texel from texture.  Use partial derivatives when possible.
 306  */
 307 static INLINE void
 308 fetch_texel(GLcontext *ctx,
 309             const struct gl_program_machine *machine,
 310             const struct prog_instruction *inst,
 311             const GLfloat texcoord[4], GLfloat lodBias,
 312             GLfloat color[4])
 313 {
 314    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 315
 316    /* Note: we only have the right derivatives for fragment input attribs.
 317     */
 318    if (machine->NumDeriv > 0 &&
 319        inst->SrcReg[0].File == PROGRAM_INPUT &&
 320        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 321       /* simple texture fetch for which we should have derivatives */
 322       GLuint attr = inst->SrcReg[0].Index;
 323       machine->FetchTexelDeriv(ctx, texcoord,
 324                                machine->DerivX[attr],
 325                                machine->DerivY[attr],
 326                                lodBias, unit, color);
 327    }
 328    else {
 329       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 330    }
 331 }
 332
 333
 334 /**
 335  * Test value against zero and return GT, LT, EQ or UN if NaN.
 336  */
 337 static INLINE GLuint
 338 generate_cc(float value)
 339 {
 340    if (value != value)
 341       return COND_UN;           /* NaN */
 342    if (value > 0.0F)
 343       return COND_GT;
 344    if (value < 0.0F)
 345       return COND_LT;
 346    return COND_EQ;
 347 }
 348
 349
 350 /**
 351  * Test if the ccMaskRule is satisfied by the given condition code.
 352  * Used to mask destination writes according to the current condition code.
 353  */
 354 static INLINE GLboolean
 355 test_cc(GLuint condCode, GLuint ccMaskRule)
 356 {
 357    switch (ccMaskRule) {
 358    case COND_EQ: return (condCode == COND_EQ);
 359    case COND_NE: return (condCode != COND_EQ);
 360    case COND_LT: return (condCode == COND_LT);
 361    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 362    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 363    case COND_GT: return (condCode == COND_GT);
 364    case COND_TR: return GL_TRUE;
 365    case COND_FL: return GL_FALSE;
 366    default:      return GL_TRUE;
 367    }
 368 }
 369
 370
 371 /**
 372  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 373  * or GL_FALSE to indicate result.
 374  */
 375 static INLINE GLboolean
 376 eval_condition(const struct gl_program_machine *machine,
 377                const struct prog_instruction *inst)
 378 {
 379    const GLuint swizzle = inst->DstReg.CondSwizzle;
 380    const GLuint condMask = inst->DstReg.CondMask;
 381    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 382        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 383        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 384        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 385       return GL_TRUE;
 386    }
 387    else {
 388       return GL_FALSE;
 389    }
 390 }
 391
 392
 393
 394 /**
 395  * Store 4 floats into a register.  Observe the instructions saturate and
 396  * set-condition-code flags.
 397  */
 398 static void
 399 store_vector4(const struct prog_instruction *inst,
 400               struct gl_program_machine *machine, const GLfloat value[4])
 401 {
 402    const struct prog_dst_register *dest = &(inst->DstReg);
 403    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 404    GLfloat *dstReg;
 405    GLfloat dummyReg[4];
 406    GLfloat clampedValue[4];
 407    GLuint writeMask = dest->WriteMask;
 408
 409    switch (dest->File) {
 410    case PROGRAM_OUTPUT:
 411       ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
 412       dstReg = machine->Outputs[dest->Index];
 413       break;
 414    case PROGRAM_TEMPORARY:
 415       ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
 416       dstReg = machine->Temporaries[dest->Index];
 417       break;
 418    case PROGRAM_WRITE_ONLY:
 419       dstReg = dummyReg;
 420       return;
 421    default:
 422       _mesa_problem(NULL, "bad register file in store_vector4(fp)");
 423       return;
 424    }
 425
 426 #if 0
 427    if (value[0] > 1.0e10 ||
 428        IS_INF_OR_NAN(value[0]) ||
 429        IS_INF_OR_NAN(value[1]) ||
 430        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 431       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 432 #endif
 433
 434    if (clamp) {
 435       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 436       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 437       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 438       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 439       value = clampedValue;
 440    }
 441
 442    if (dest->CondMask != COND_TR) {
 443       /* condition codes may turn off some writes */
 444       if (writeMask & WRITEMASK_X) {
 445          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
 446                       dest->CondMask))
 447             writeMask &= ~WRITEMASK_X;
 448       }
 449       if (writeMask & WRITEMASK_Y) {
 450          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
 451                       dest->CondMask))
 452             writeMask &= ~WRITEMASK_Y;
 453       }
 454       if (writeMask & WRITEMASK_Z) {
 455          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
 456                       dest->CondMask))
 457             writeMask &= ~WRITEMASK_Z;
 458       }
 459       if (writeMask & WRITEMASK_W) {
 460          if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
 461                       dest->CondMask))
 462             writeMask &= ~WRITEMASK_W;
 463       }
 464    }
 465
 466    if (writeMask & WRITEMASK_X)
 467       dstReg[0] = value[0];
 468    if (writeMask & WRITEMASK_Y)
 469       dstReg[1] = value[1];
 470    if (writeMask & WRITEMASK_Z)
 471       dstReg[2] = value[2];
 472    if (writeMask & WRITEMASK_W)
 473       dstReg[3] = value[3];
 474
 475    if (inst->CondUpdate) {
 476       if (writeMask & WRITEMASK_X)
 477          machine->CondCodes[0] = generate_cc(value[0]);
 478       if (writeMask & WRITEMASK_Y)
 479          machine->CondCodes[1] = generate_cc(value[1]);
 480       if (writeMask & WRITEMASK_Z)
 481          machine->CondCodes[2] = generate_cc(value[2]);
 482       if (writeMask & WRITEMASK_W)
 483          machine->CondCodes[3] = generate_cc(value[3]);
 484 #if DEBUG_PROG
 485       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 486              _mesa_condcode_string(machine->CondCodes[0]),
 487              _mesa_condcode_string(machine->CondCodes[1]),
 488              _mesa_condcode_string(machine->CondCodes[2]),
 489              _mesa_condcode_string(machine->CondCodes[3]));
 490 #endif
 491    }
 492 }
 493
 494
 495 /**
 496  * Execute the given vertex/fragment program.
 497  *
 498  * \param ctx  rendering context
 499  * \param program  the program to execute
 500  * \param machine  machine state (must be initialized)
 501  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 502  */
 503 GLboolean
 504 _mesa_execute_program(GLcontext * ctx,
 505                       const struct gl_program *program,
 506                       struct gl_program_machine *machine)
 507 {
 508    const GLuint numInst = program->NumInstructions;
 509    const GLuint maxExec = 10000;
 510    GLuint pc, numExec = 0;
 511
 512    machine->CurProgram = program;
 513
 514    if (DEBUG_PROG) {
 515       printf("execute program %u --------------------\n", program->Id);
 516    }
 517
 518 #if FEATURE_MESA_program_debug
 519    CurrentMachine = machine;
 520 #endif
 521
 522    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 523       machine->EnvParams = ctx->VertexProgram.Parameters;
 524    }
 525    else {
 526       machine->EnvParams = ctx->FragmentProgram.Parameters;
 527    }
 528
 529    for (pc = 0; pc < numInst; pc++) {
 530       const struct prog_instruction *inst = program->Instructions + pc;
 531
 532 #if FEATURE_MESA_program_debug
 533       if (ctx->FragmentProgram.CallbackEnabled &&
 534           ctx->FragmentProgram.Callback) {
 535          ctx->FragmentProgram.CurrentPosition = inst->StringPos;
 536          ctx->FragmentProgram.Callback(program->Target,
 537                                        ctx->FragmentProgram.CallbackData);
 538       }
 539 #endif
 540
 541       if (DEBUG_PROG) {
 542          _mesa_print_instruction(inst);
 543       }
 544
 545       switch (inst->Opcode) {
 546       case OPCODE_ABS:
 547          {
 548             GLfloat a[4], result[4];
 549             fetch_vector4(&inst->SrcReg[0], machine, a);
 550             result[0] = FABSF(a[0]);
 551             result[1] = FABSF(a[1]);
 552             result[2] = FABSF(a[2]);
 553             result[3] = FABSF(a[3]);
 554             store_vector4(inst, machine, result);
 555          }
 556          break;
 557       case OPCODE_ADD:
 558          {
 559             GLfloat a[4], b[4], result[4];
 560             fetch_vector4(&inst->SrcReg[0], machine, a);
 561             fetch_vector4(&inst->SrcReg[1], machine, b);
 562             result[0] = a[0] + b[0];
 563             result[1] = a[1] + b[1];
 564             result[2] = a[2] + b[2];
 565             result[3] = a[3] + b[3];
 566             store_vector4(inst, machine, result);
 567             if (DEBUG_PROG) {
 568                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 569                       result[0], result[1], result[2], result[3],
 570                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 571             }
 572          }
 573          break;
 574       case OPCODE_ARL:
 575          {
 576             GLfloat t[4];
 577             fetch_vector4(&inst->SrcReg[0], machine, t);
 578             machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
 579          }
 580          break;
 581       case OPCODE_BGNLOOP:
 582          /* no-op */
 583          break;
 584       case OPCODE_ENDLOOP:
 585          /* subtract 1 here since pc is incremented by for(pc) loop */
 586          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 587          break;
 588       case OPCODE_BGNSUB:      /* begin subroutine */
 589          break;
 590       case OPCODE_ENDSUB:      /* end subroutine */
 591          break;
 592       case OPCODE_BRA:         /* branch (conditional) */
 593          /* fall-through */
 594       case OPCODE_BRK:         /* break out of loop (conditional) */
 595          /* fall-through */
 596       case OPCODE_CONT:        /* continue loop (conditional) */
 597          if (eval_condition(machine, inst)) {
 598             /* take branch */
 599             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 600             pc = inst->BranchTarget - 1;
 601          }
 602          break;
 603       case OPCODE_CAL:         /* Call subroutine (conditional) */
 604          if (eval_condition(machine, inst)) {
 605             /* call the subroutine */
 606             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 607                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 608             }
 609             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 610             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 611             pc = inst->BranchTarget - 1;
 612          }
 613          break;
 614       case OPCODE_CMP:
 615          {
 616             GLfloat a[4], b[4], c[4], result[4];
 617             fetch_vector4(&inst->SrcReg[0], machine, a);
 618             fetch_vector4(&inst->SrcReg[1], machine, b);
 619             fetch_vector4(&inst->SrcReg[2], machine, c);
 620             result[0] = a[0] < 0.0F ? b[0] : c[0];
 621             result[1] = a[1] < 0.0F ? b[1] : c[1];
 622             result[2] = a[2] < 0.0F ? b[2] : c[2];
 623             result[3] = a[3] < 0.0F ? b[3] : c[3];
 624             store_vector4(inst, machine, result);
 625          }
 626          break;
 627       case OPCODE_COS:
 628          {
 629             GLfloat a[4], result[4];
 630             fetch_vector1(&inst->SrcReg[0], machine, a);
 631             result[0] = result[1] = result[2] = result[3]
 632                = (GLfloat) _mesa_cos(a[0]);
 633             store_vector4(inst, machine, result);
 634          }
 635          break;
 636       case OPCODE_DDX:         /* Partial derivative with respect to X */
 637          {
 638             GLfloat result[4];
 639             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 640                                 'X', result);
 641             store_vector4(inst, machine, result);
 642          }
 643          break;
 644       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 645          {
 646             GLfloat result[4];
 647             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 648                                 'Y', result);
 649             store_vector4(inst, machine, result);
 650          }
 651          break;
 652       case OPCODE_DP3:
 653          {
 654             GLfloat a[4], b[4], result[4];
 655             fetch_vector4(&inst->SrcReg[0], machine, a);
 656             fetch_vector4(&inst->SrcReg[1], machine, b);
 657             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 658             store_vector4(inst, machine, result);
 659             if (DEBUG_PROG) {
 660                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 661                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 662             }
 663          }
 664          break;
 665       case OPCODE_DP4:
 666          {
 667             GLfloat a[4], b[4], result[4];
 668             fetch_vector4(&inst->SrcReg[0], machine, a);
 669             fetch_vector4(&inst->SrcReg[1], machine, b);
 670             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 671             store_vector4(inst, machine, result);
 672             if (DEBUG_PROG) {
 673                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 674                       result[0], a[0], a[1], a[2], a[3],
 675                       b[0], b[1], b[2], b[3]);
 676             }
 677          }
 678          break;
 679       case OPCODE_DPH:
 680          {
 681             GLfloat a[4], b[4], result[4];
 682             fetch_vector4(&inst->SrcReg[0], machine, a);
 683             fetch_vector4(&inst->SrcReg[1], machine, b);
 684             result[0] = result[1] = result[2] = result[3] =
 685                a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
 686             store_vector4(inst, machine, result);
 687          }
 688          break;
 689       case OPCODE_DST:         /* Distance vector */
 690          {
 691             GLfloat a[4], b[4], result[4];
 692             fetch_vector4(&inst->SrcReg[0], machine, a);
 693             fetch_vector4(&inst->SrcReg[1], machine, b);
 694             result[0] = 1.0F;
 695             result[1] = a[1] * b[1];
 696             result[2] = a[2];
 697             result[3] = b[3];
 698             store_vector4(inst, machine, result);
 699          }
 700          break;
 701       case OPCODE_EXP:
 702          {
 703             GLfloat t[4], q[4], floor_t0;
 704             fetch_vector1(&inst->SrcReg[0], machine, t);
 705             floor_t0 = FLOORF(t[0]);
 706             if (floor_t0 > FLT_MAX_EXP) {
 707                SET_POS_INFINITY(q[0]);
 708                SET_POS_INFINITY(q[2]);
 709             }
 710             else if (floor_t0 < FLT_MIN_EXP) {
 711                q[0] = 0.0F;
 712                q[2] = 0.0F;
 713             }
 714             else {
 715                q[0] = LDEXPF(1.0, (int) floor_t0);
 716                /* Note: GL_NV_vertex_program expects
 717                 * result.z = result.x * APPX(result.y)
 718                 * We do what the ARB extension says.
 719                 */
 720                q[2] = (GLfloat) pow(2.0, t[0]);
 721             }
 722             q[1] = t[0] - floor_t0;
 723             q[3] = 1.0F;
 724             store_vector4( inst, machine, q );
 725          }
 726          break;
 727       case OPCODE_EX2:         /* Exponential base 2 */
 728          {
 729             GLfloat a[4], result[4];
 730             fetch_vector1(&inst->SrcReg[0], machine, a);
 731             result[0] = result[1] = result[2] = result[3] =
 732                (GLfloat) _mesa_pow(2.0, a[0]);
 733             store_vector4(inst, machine, result);
 734          }
 735          break;
 736       case OPCODE_FLR:
 737          {
 738             GLfloat a[4], result[4];
 739             fetch_vector4(&inst->SrcReg[0], machine, a);
 740             result[0] = FLOORF(a[0]);
 741             result[1] = FLOORF(a[1]);
 742             result[2] = FLOORF(a[2]);
 743             result[3] = FLOORF(a[3]);
 744             store_vector4(inst, machine, result);
 745          }
 746          break;
 747       case OPCODE_FRC:
 748          {
 749             GLfloat a[4], result[4];
 750             fetch_vector4(&inst->SrcReg[0], machine, a);
 751             result[0] = a[0] - FLOORF(a[0]);
 752             result[1] = a[1] - FLOORF(a[1]);
 753             result[2] = a[2] - FLOORF(a[2]);
 754             result[3] = a[3] - FLOORF(a[3]);
 755             store_vector4(inst, machine, result);
 756          }
 757          break;
 758       case OPCODE_IF:
 759          {
 760             GLboolean cond;
 761             /* eval condition */
 762             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 763                GLfloat a[4];
 764                fetch_vector1(&inst->SrcReg[0], machine, a);
 765                cond = (a[0] != 0.0);
 766             }
 767             else {
 768                cond = eval_condition(machine, inst);
 769             }
 770             if (DEBUG_PROG) {
 771                printf("IF: %d\n", cond);
 772             }
 773             /* do if/else */
 774             if (cond) {
 775                /* do if-clause (just continue execution) */
 776             }
 777             else {
 778                /* go to the instruction after ELSE or ENDIF */
 779                assert(inst->BranchTarget >= 0);
 780                pc = inst->BranchTarget - 1;
 781             }
 782          }
 783          break;
 784       case OPCODE_ELSE:
 785          /* goto ENDIF */
 786          assert(inst->BranchTarget >= 0);
 787          pc = inst->BranchTarget - 1;
 788          break;
 789       case OPCODE_ENDIF:
 790          /* nothing */
 791          break;
 792       case OPCODE_INT:         /* float to int */
 793          {
 794             GLfloat a[4], result[4];
 795             fetch_vector4(&inst->SrcReg[0], machine, a);
 796             result[0] = (GLfloat) (GLint) a[0];
 797             result[1] = (GLfloat) (GLint) a[1];
 798             result[2] = (GLfloat) (GLint) a[2];
 799             result[3] = (GLfloat) (GLint) a[3];
 800             store_vector4(inst, machine, result);
 801          }
 802          break;
 803       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 804          if (eval_condition(machine, inst)) {
 805             return GL_FALSE;
 806          }
 807          break;
 808       case OPCODE_KIL:         /* ARB_f_p only */
 809          {
 810             GLfloat a[4];
 811             fetch_vector4(&inst->SrcReg[0], machine, a);
 812             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 813                return GL_FALSE;
 814             }
 815          }
 816          break;
 817       case OPCODE_LG2:         /* log base 2 */
 818          {
 819             GLfloat a[4], result[4];
 820             fetch_vector1(&inst->SrcReg[0], machine, a);
 821             result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
 822             store_vector4(inst, machine, result);
 823          }
 824          break;
 825       case OPCODE_LIT:
 826          {
 827             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
 828             GLfloat a[4], result[4];
 829             fetch_vector4(&inst->SrcReg[0], machine, a);
 830             a[0] = MAX2(a[0], 0.0F);
 831             a[1] = MAX2(a[1], 0.0F);
 832             /* XXX ARB version clamps a[3], NV version doesn't */
 833             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
 834             result[0] = 1.0F;
 835             result[1] = a[0];
 836             /* XXX we could probably just use pow() here */
 837             if (a[0] > 0.0F) {
 838                if (a[1] == 0.0 && a[3] == 0.0)
 839                   result[2] = 1.0;
 840                else
 841                   result[2] = EXPF(a[3] * LOGF(a[1]));
 842             }
 843             else {
 844                result[2] = 0.0;
 845             }
 846             result[3] = 1.0F;
 847             store_vector4(inst, machine, result);
 848             if (DEBUG_PROG) {
 849                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
 850                       result[0], result[1], result[2], result[3],
 851                       a[0], a[1], a[2], a[3]);
 852             }
 853          }
 854          break;
 855       case OPCODE_LOG:
 856          {
 857             GLfloat t[4], q[4], abs_t0;
 858             fetch_vector1(&inst->SrcReg[0], machine, t);
 859             abs_t0 = FABSF(t[0]);
 860             if (abs_t0 != 0.0F) {
 861                /* Since we really can't handle infinite values on VMS
 862                 * like other OSes we'll use __MAXFLOAT to represent
 863                 * infinity.  This may need some tweaking.
 864                 */
 865 #ifdef VMS
 866                if (abs_t0 == __MAXFLOAT)
 867 #else
 868                if (IS_INF_OR_NAN(abs_t0))
 869 #endif
 870                {
 871                   SET_POS_INFINITY(q[0]);
 872                   q[1] = 1.0F;
 873                   SET_POS_INFINITY(q[2]);
 874                }
 875                else {
 876                   int exponent;
 877                   GLfloat mantissa = FREXPF(t[0], &exponent);
 878                   q[0] = (GLfloat) (exponent - 1);
 879                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
 880                   q[2] = (GLfloat) (q[0] + LOG2(q[1]));
 881                }
 882             }
 883             else {
 884                SET_NEG_INFINITY(q[0]);
 885                q[1] = 1.0F;
 886                SET_NEG_INFINITY(q[2]);
 887             }
 888             q[3] = 1.0;
 889             store_vector4(inst, machine, q);
 890          }
 891          break;
 892       case OPCODE_LRP:
 893          {
 894             GLfloat a[4], b[4], c[4], result[4];
 895             fetch_vector4(&inst->SrcReg[0], machine, a);
 896             fetch_vector4(&inst->SrcReg[1], machine, b);
 897             fetch_vector4(&inst->SrcReg[2], machine, c);
 898             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 899             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 900             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 901             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 902             store_vector4(inst, machine, result);
 903             if (DEBUG_PROG) {
 904                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
 905                       "(%g %g %g %g), (%g %g %g %g)\n",
 906                       result[0], result[1], result[2], result[3],
 907                       a[0], a[1], a[2], a[3],
 908                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
 909             }
 910          }
 911          break;
 912       case OPCODE_MAD:
 913          {
 914             GLfloat a[4], b[4], c[4], result[4];
 915             fetch_vector4(&inst->SrcReg[0], machine, a);
 916             fetch_vector4(&inst->SrcReg[1], machine, b);
 917             fetch_vector4(&inst->SrcReg[2], machine, c);
 918             result[0] = a[0] * b[0] + c[0];
 919             result[1] = a[1] * b[1] + c[1];
 920             result[2] = a[2] * b[2] + c[2];
 921             result[3] = a[3] * b[3] + c[3];
 922             store_vector4(inst, machine, result);
 923             if (DEBUG_PROG) {
 924                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
 925                       "(%g %g %g %g) + (%g %g %g %g)\n",
 926                       result[0], result[1], result[2], result[3],
 927                       a[0], a[1], a[2], a[3],
 928                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
 929             }
 930          }
 931          break;
 932       case OPCODE_MAX:
 933          {
 934             GLfloat a[4], b[4], result[4];
 935             fetch_vector4(&inst->SrcReg[0], machine, a);
 936             fetch_vector4(&inst->SrcReg[1], machine, b);
 937             result[0] = MAX2(a[0], b[0]);
 938             result[1] = MAX2(a[1], b[1]);
 939             result[2] = MAX2(a[2], b[2]);
 940             result[3] = MAX2(a[3], b[3]);
 941             store_vector4(inst, machine, result);
 942             if (DEBUG_PROG) {
 943                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
 944                       result[0], result[1], result[2], result[3],
 945                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 946             }
 947          }
 948          break;
 949       case OPCODE_MIN:
 950          {
 951             GLfloat a[4], b[4], result[4];
 952             fetch_vector4(&inst->SrcReg[0], machine, a);
 953             fetch_vector4(&inst->SrcReg[1], machine, b);
 954             result[0] = MIN2(a[0], b[0]);
 955             result[1] = MIN2(a[1], b[1]);
 956             result[2] = MIN2(a[2], b[2]);
 957             result[3] = MIN2(a[3], b[3]);
 958             store_vector4(inst, machine, result);
 959          }
 960          break;
 961       case OPCODE_MOV:
 962          {
 963             GLfloat result[4];
 964             fetch_vector4(&inst->SrcReg[0], machine, result);
 965             store_vector4(inst, machine, result);
 966             if (DEBUG_PROG) {
 967                printf("MOV (%g %g %g %g)\n",
 968                       result[0], result[1], result[2], result[3]);
 969             }
 970          }
 971          break;
 972       case OPCODE_MUL:
 973          {
 974             GLfloat a[4], b[4], result[4];
 975             fetch_vector4(&inst->SrcReg[0], machine, a);
 976             fetch_vector4(&inst->SrcReg[1], machine, b);
 977             result[0] = a[0] * b[0];
 978             result[1] = a[1] * b[1];
 979             result[2] = a[2] * b[2];
 980             result[3] = a[3] * b[3];
 981             store_vector4(inst, machine, result);
 982             if (DEBUG_PROG) {
 983                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
 984                       result[0], result[1], result[2], result[3],
 985                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 986             }
 987          }
 988          break;
 989       case OPCODE_NOISE1:
 990          {
 991             GLfloat a[4], result[4];
 992             fetch_vector1(&inst->SrcReg[0], machine, a);
 993             result[0] =
 994                result[1] =
 995                result[2] = result[3] = _slang_library_noise1(a[0]);
 996             store_vector4(inst, machine, result);
 997          }
 998          break;
 999       case OPCODE_NOISE2:
1000          {
1001             GLfloat a[4], result[4];
1002             fetch_vector4(&inst->SrcReg[0], machine, a);
1003             result[0] =
1004                result[1] =
1005                result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1006             store_vector4(inst, machine, result);
1007          }
1008          break;
1009       case OPCODE_NOISE3:
1010          {
1011             GLfloat a[4], result[4];
1012             fetch_vector4(&inst->SrcReg[0], machine, a);
1013             result[0] =
1014                result[1] =
1015                result[2] =
1016                result[3] = _slang_library_noise3(a[0], a[1], a[2]);
1017             store_vector4(inst, machine, result);
1018          }
1019          break;
1020       case OPCODE_NOISE4:
1021          {
1022             GLfloat a[4], result[4];
1023             fetch_vector4(&inst->SrcReg[0], machine, a);
1024             result[0] =
1025                result[1] =
1026                result[2] =
1027                result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
1028             store_vector4(inst, machine, result);
1029          }
1030          break;
1031       case OPCODE_NOP:
1032          break;
1033       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1034          {
1035             GLfloat a[4], result[4];
1036             GLhalfNV hx, hy;
1037             GLuint *rawResult = (GLuint *) result;
1038             GLuint twoHalves;
1039             fetch_vector4(&inst->SrcReg[0], machine, a);
1040             hx = _mesa_float_to_half(a[0]);
1041             hy = _mesa_float_to_half(a[1]);
1042             twoHalves = hx | (hy << 16);
1043             rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1044                = twoHalves;
1045             store_vector4(inst, machine, result);
1046          }
1047          break;
1048       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1049          {
1050             GLfloat a[4], result[4];
1051             GLuint usx, usy, *rawResult = (GLuint *) result;
1052             fetch_vector4(&inst->SrcReg[0], machine, a);
1053             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1054             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1055             usx = IROUND(a[0] * 65535.0F);
1056             usy = IROUND(a[1] * 65535.0F);
1057             rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1058                = usx | (usy << 16);
1059             store_vector4(inst, machine, result);
1060          }
1061          break;
1062       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1063          {
1064             GLfloat a[4], result[4];
1065             GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1066             fetch_vector4(&inst->SrcReg[0], machine, a);
1067             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1068             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1069             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1070             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1071             ubx = IROUND(127.0F * a[0] + 128.0F);
1072             uby = IROUND(127.0F * a[1] + 128.0F);
1073             ubz = IROUND(127.0F * a[2] + 128.0F);
1074             ubw = IROUND(127.0F * a[3] + 128.0F);
1075             rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1076                = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1077             store_vector4(inst, machine, result);
1078          }
1079          break;
1080       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1081          {
1082             GLfloat a[4], result[4];
1083             GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1084             fetch_vector4(&inst->SrcReg[0], machine, a);
1085             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1086             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1087             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1088             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1089             ubx = IROUND(255.0F * a[0]);
1090             uby = IROUND(255.0F * a[1]);
1091             ubz = IROUND(255.0F * a[2]);
1092             ubw = IROUND(255.0F * a[3]);
1093             rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1094                = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1095             store_vector4(inst, machine, result);
1096          }
1097          break;
1098       case OPCODE_POW:
1099          {
1100             GLfloat a[4], b[4], result[4];
1101             fetch_vector1(&inst->SrcReg[0], machine, a);
1102             fetch_vector1(&inst->SrcReg[1], machine, b);
1103             result[0] = result[1] = result[2] = result[3]
1104                = (GLfloat) _mesa_pow(a[0], b[0]);
1105             store_vector4(inst, machine, result);
1106          }
1107          break;
1108       case OPCODE_RCP:
1109          {
1110             GLfloat a[4], result[4];
1111             fetch_vector1(&inst->SrcReg[0], machine, a);
1112             if (DEBUG_PROG) {
1113                if (a[0] == 0)
1114                   printf("RCP(0)\n");
1115                else if (IS_INF_OR_NAN(a[0]))
1116                   printf("RCP(inf)\n");
1117             }
1118             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1119             store_vector4(inst, machine, result);
1120          }
1121          break;
1122       case OPCODE_RET:         /* return from subroutine (conditional) */
1123          if (eval_condition(machine, inst)) {
1124             if (machine->StackDepth == 0) {
1125                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1126             }
1127             /* subtract one because of pc++ in the for loop */
1128             pc = machine->CallStack[--machine->StackDepth] - 1;
1129          }
1130          break;
1131       case OPCODE_RFL:         /* reflection vector */
1132          {
1133             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1134             fetch_vector4(&inst->SrcReg[0], machine, axis);
1135             fetch_vector4(&inst->SrcReg[1], machine, dir);
1136             tmpW = DOT3(axis, axis);
1137             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1138             result[0] = tmpX * axis[0] - dir[0];
1139             result[1] = tmpX * axis[1] - dir[1];
1140             result[2] = tmpX * axis[2] - dir[2];
1141             /* result[3] is never written! XXX enforce in parser! */
1142             store_vector4(inst, machine, result);
1143          }
1144          break;
1145       case OPCODE_RSQ:         /* 1 / sqrt() */
1146          {
1147             GLfloat a[4], result[4];
1148             fetch_vector1(&inst->SrcReg[0], machine, a);
1149             a[0] = FABSF(a[0]);
1150             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1151             store_vector4(inst, machine, result);
1152             if (DEBUG_PROG) {
1153                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1154             }
1155          }
1156          break;
1157       case OPCODE_SCS:         /* sine and cos */
1158          {
1159             GLfloat a[4], result[4];
1160             fetch_vector1(&inst->SrcReg[0], machine, a);
1161             result[0] = (GLfloat) _mesa_cos(a[0]);
1162             result[1] = (GLfloat) _mesa_sin(a[0]);
1163             result[2] = 0.0;    /* undefined! */
1164             result[3] = 0.0;    /* undefined! */
1165             store_vector4(inst, machine, result);
1166          }
1167          break;
1168       case OPCODE_SEQ:         /* set on equal */
1169          {
1170             GLfloat a[4], b[4], result[4];
1171             fetch_vector4(&inst->SrcReg[0], machine, a);
1172             fetch_vector4(&inst->SrcReg[1], machine, b);
1173             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1174             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1175             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1176             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1177             store_vector4(inst, machine, result);
1178             if (DEBUG_PROG) {
1179                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1180                       result[0], result[1], result[2], result[3],
1181                       a[0], a[1], a[2], a[3],
1182                       b[0], b[1], b[2], b[3]);
1183             }
1184          }
1185          break;
1186       case OPCODE_SFL:         /* set false, operands ignored */
1187          {
1188             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1189             store_vector4(inst, machine, result);
1190          }
1191          break;
1192       case OPCODE_SGE:         /* set on greater or equal */
1193          {
1194             GLfloat a[4], b[4], result[4];
1195             fetch_vector4(&inst->SrcReg[0], machine, a);
1196             fetch_vector4(&inst->SrcReg[1], machine, b);
1197             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1198             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1199             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1200             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1201             store_vector4(inst, machine, result);
1202             if (DEBUG_PROG) {
1203                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1204                       result[0], result[1], result[2], result[3],
1205                       a[0], a[1], a[2], a[3],
1206                       b[0], b[1], b[2], b[3]);
1207             }
1208          }
1209          break;
1210       case OPCODE_SGT:         /* set on greater */
1211          {
1212             GLfloat a[4], b[4], result[4];
1213             fetch_vector4(&inst->SrcReg[0], machine, a);
1214             fetch_vector4(&inst->SrcReg[1], machine, b);
1215             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1216             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1217             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1218             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1219             store_vector4(inst, machine, result);
1220             if (DEBUG_PROG) {
1221                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1222                       result[0], result[1], result[2], result[3],
1223                       a[0], a[1], a[2], a[3],
1224                       b[0], b[1], b[2], b[3]);
1225             }
1226          }
1227          break;
1228       case OPCODE_SIN:
1229          {
1230             GLfloat a[4], result[4];
1231             fetch_vector1(&inst->SrcReg[0], machine, a);
1232             result[0] = result[1] = result[2] = result[3]
1233                = (GLfloat) _mesa_sin(a[0]);
1234             store_vector4(inst, machine, result);
1235          }
1236          break;
1237       case OPCODE_SLE:         /* set on less or equal */
1238          {
1239             GLfloat a[4], b[4], result[4];
1240             fetch_vector4(&inst->SrcReg[0], machine, a);
1241             fetch_vector4(&inst->SrcReg[1], machine, b);
1242             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1243             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1244             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1245             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1246             store_vector4(inst, machine, result);
1247             if (DEBUG_PROG) {
1248                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1249                       result[0], result[1], result[2], result[3],
1250                       a[0], a[1], a[2], a[3],
1251                       b[0], b[1], b[2], b[3]);
1252             }
1253          }
1254          break;
1255       case OPCODE_SLT:         /* set on less */
1256          {
1257             GLfloat a[4], b[4], result[4];
1258             fetch_vector4(&inst->SrcReg[0], machine, a);
1259             fetch_vector4(&inst->SrcReg[1], machine, b);
1260             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1261             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1262             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1263             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1264             store_vector4(inst, machine, result);
1265             if (DEBUG_PROG) {
1266                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1267                       result[0], result[1], result[2], result[3],
1268                       a[0], a[1], a[2], a[3],
1269                       b[0], b[1], b[2], b[3]);
1270             }
1271          }
1272          break;
1273       case OPCODE_SNE:         /* set on not equal */
1274          {
1275             GLfloat a[4], b[4], result[4];
1276             fetch_vector4(&inst->SrcReg[0], machine, a);
1277             fetch_vector4(&inst->SrcReg[1], machine, b);
1278             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1279             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1280             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1281             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1282             store_vector4(inst, machine, result);
1283             if (DEBUG_PROG) {
1284                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1285                       result[0], result[1], result[2], result[3],
1286                       a[0], a[1], a[2], a[3],
1287                       b[0], b[1], b[2], b[3]);
1288             }
1289          }
1290          break;
1291       case OPCODE_STR:         /* set true, operands ignored */
1292          {
1293             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1294             store_vector4(inst, machine, result);
1295          }
1296          break;
1297       case OPCODE_SUB:
1298          {
1299             GLfloat a[4], b[4], result[4];
1300             fetch_vector4(&inst->SrcReg[0], machine, a);
1301             fetch_vector4(&inst->SrcReg[1], machine, b);
1302             result[0] = a[0] - b[0];
1303             result[1] = a[1] - b[1];
1304             result[2] = a[2] - b[2];
1305             result[3] = a[3] - b[3];
1306             store_vector4(inst, machine, result);
1307             if (DEBUG_PROG) {
1308                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1309                       result[0], result[1], result[2], result[3],
1310                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1311             }
1312          }
1313          break;
1314       case OPCODE_SWZ:         /* extended swizzle */
1315          {
1316             const struct prog_src_register *source = &inst->SrcReg[0];
1317             const GLfloat *src = get_register_pointer(source, machine);
1318             GLfloat result[4];
1319             GLuint i;
1320             for (i = 0; i < 4; i++) {
1321                const GLuint swz = GET_SWZ(source->Swizzle, i);
1322                if (swz == SWIZZLE_ZERO)
1323                   result[i] = 0.0;
1324                else if (swz == SWIZZLE_ONE)
1325                   result[i] = 1.0;
1326                else {
1327                   ASSERT(swz >= 0);
1328                   ASSERT(swz <= 3);
1329                   result[i] = src[swz];
1330                }
1331                if (source->NegateBase & (1 << i))
1332                   result[i] = -result[i];
1333             }
1334             store_vector4(inst, machine, result);
1335          }
1336          break;
1337       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1338          /* Simple texel lookup */
1339          {
1340             GLfloat texcoord[4], color[4];
1341             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1342
1343             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1344
1345             if (DEBUG_PROG) {
1346                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1347                       color[0], color[1], color[2], color[3],
1348                       inst->TexSrcUnit,
1349                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1350             }
1351             store_vector4(inst, machine, color);
1352          }
1353          break;
1354       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1355          /* Texel lookup with LOD bias */
1356          {
1357             const struct gl_texture_unit *texUnit
1358                = &ctx->Texture.Unit[inst->TexSrcUnit];
1359             GLfloat texcoord[4], color[4], lodBias;
1360
1361             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1362
1363             /* texcoord[3] is the bias to add to lambda */
1364             lodBias = texUnit->LodBias + texcoord[3];
1365             if (texUnit->_Current) {
1366                lodBias += texUnit->_Current->LodBias;
1367             }
1368
1369             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1370
1371             store_vector4(inst, machine, color);
1372          }
1373          break;
1374       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1375          /* Texture lookup w/ partial derivatives for LOD */
1376          {
1377             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1378             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1379             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1380             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1381             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1382                                      0.0, /* lodBias */
1383                                      inst->TexSrcUnit, color);
1384             store_vector4(inst, machine, color);
1385          }
1386          break;
1387       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1388          /* Texture lookup w/ projective divide */
1389          {
1390             GLfloat texcoord[4], color[4];
1391
1392             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1393             /* Not so sure about this test - if texcoord[3] is
1394              * zero, we'd probably be fine except for an ASSERT in
1395              * IROUND_POS() which gets triggered by the inf values created.
1396              */
1397             if (texcoord[3] != 0.0) {
1398                texcoord[0] /= texcoord[3];
1399                texcoord[1] /= texcoord[3];
1400                texcoord[2] /= texcoord[3];
1401             }
1402
1403             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1404
1405             store_vector4(inst, machine, color);
1406          }
1407          break;
1408       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1409          /* Texture lookup w/ projective divide, as above, but do not
1410           * do the divide by w if sampling from a cube map.
1411           */
1412          {
1413             GLfloat texcoord[4], color[4];
1414
1415             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1416             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1417                 texcoord[3] != 0.0) {
1418                texcoord[0] /= texcoord[3];
1419                texcoord[1] /= texcoord[3];
1420                texcoord[2] /= texcoord[3];
1421             }
1422
1423             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1424
1425             store_vector4(inst, machine, color);
1426          }
1427          break;
1428       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1429          {
1430             GLfloat a[4], result[4];
1431             const GLuint *rawBits = (const GLuint *) a;
1432             GLhalfNV hx, hy;
1433             fetch_vector1(&inst->SrcReg[0], machine, a);
1434             hx = rawBits[0] & 0xffff;
1435             hy = rawBits[0] >> 16;
1436             result[0] = result[2] = _mesa_half_to_float(hx);
1437             result[1] = result[3] = _mesa_half_to_float(hy);
1438             store_vector4(inst, machine, result);
1439          }
1440          break;
1441       case OPCODE_UP2US:       /* unpack two GLushorts */
1442          {
1443             GLfloat a[4], result[4];
1444             const GLuint *rawBits = (const GLuint *) a;
1445             GLushort usx, usy;
1446             fetch_vector1(&inst->SrcReg[0], machine, a);
1447             usx = rawBits[0] & 0xffff;
1448             usy = rawBits[0] >> 16;
1449             result[0] = result[2] = usx * (1.0f / 65535.0f);
1450             result[1] = result[3] = usy * (1.0f / 65535.0f);
1451             store_vector4(inst, machine, result);
1452          }
1453          break;
1454       case OPCODE_UP4B:        /* unpack four GLbytes */
1455          {
1456             GLfloat a[4], result[4];
1457             const GLuint *rawBits = (const GLuint *) a;
1458             fetch_vector1(&inst->SrcReg[0], machine, a);
1459             result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1460             result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1461             result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1462             result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1463             store_vector4(inst, machine, result);
1464          }
1465          break;
1466       case OPCODE_UP4UB:       /* unpack four GLubytes */
1467          {
1468             GLfloat a[4], result[4];
1469             const GLuint *rawBits = (const GLuint *) a;
1470             fetch_vector1(&inst->SrcReg[0], machine, a);
1471             result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1472             result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1473             result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1474             result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1475             store_vector4(inst, machine, result);
1476          }
1477          break;
1478       case OPCODE_XPD:         /* cross product */
1479          {
1480             GLfloat a[4], b[4], result[4];
1481             fetch_vector4(&inst->SrcReg[0], machine, a);
1482             fetch_vector4(&inst->SrcReg[1], machine, b);
1483             result[0] = a[1] * b[2] - a[2] * b[1];
1484             result[1] = a[2] * b[0] - a[0] * b[2];
1485             result[2] = a[0] * b[1] - a[1] * b[0];
1486             result[3] = 1.0;
1487             store_vector4(inst, machine, result);
1488             if (DEBUG_PROG) {
1489                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1490                       result[0], result[1], result[2], result[3],
1491                       a[0], a[1], a[2], b[0], b[1], b[2]);
1492             }
1493          }
1494          break;
1495       case OPCODE_X2D:         /* 2-D matrix transform */
1496          {
1497             GLfloat a[4], b[4], c[4], result[4];
1498             fetch_vector4(&inst->SrcReg[0], machine, a);
1499             fetch_vector4(&inst->SrcReg[1], machine, b);
1500             fetch_vector4(&inst->SrcReg[2], machine, c);
1501             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1502             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1503             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1504             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1505             store_vector4(inst, machine, result);
1506          }
1507          break;
1508       case OPCODE_PRINT:
1509          {
1510             if (inst->SrcReg[0].File != -1) {
1511                GLfloat a[4];
1512                fetch_vector4(&inst->SrcReg[0], machine, a);
1513                _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1514                             a[0], a[1], a[2], a[3]);
1515             }
1516             else {
1517                _mesa_printf("%s\n", (const char *) inst->Data);
1518             }
1519          }
1520          break;
1521       case OPCODE_END:
1522          return GL_TRUE;
1523       default:
1524          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1525                        inst->Opcode);
1526          return GL_TRUE;        /* return value doesn't matter */
1527       }
1528
1529       numExec++;
1530       if (numExec > maxExec) {
1531          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1532          return GL_TRUE;
1533       }
1534
1535    } /* for pc */
1536
1537 #if FEATURE_MESA_program_debug
1538    CurrentMachine = NULL;
1539 #endif
1540
1541    return GL_TRUE;
1542 }