src/mesa/program/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/macros.h"
  41 #include "prog_execute.h"
  42 #include "prog_instruction.h"
  43 #include "prog_parameter.h"
  44 #include "prog_print.h"
  45 #include "prog_noise.h"
  46
  47
  48 /* debug predicate */
  49 #define DEBUG_PROG 0
  50
  51
  52 /**
  53  * Set x to positive or negative infinity.
  54  */
  55 #if defined(USE_IEEE) || defined(_WIN32)
  56 #define SET_POS_INFINITY(x)                  \
  57    do {                                      \
  58          fi_type fi;                         \
  59          fi.i = 0x7F800000;                  \
  60          x = fi.f;                           \
  61    } while (0)
  62 #define SET_NEG_INFINITY(x)                  \
  63    do {                                      \
  64          fi_type fi;                         \
  65          fi.i = 0xFF800000;                  \
  66          x = fi.f;                           \
  67    } while (0)
  68 #elif defined(VMS)
  69 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  70 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  71 #else
  72 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  73 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  74 #endif
  75
  76 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  77
  78
  79 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  80
  81
  82
  83 /**
  84  * Return TRUE for +0 and other positive values, FALSE otherwise.
  85  * Used for RCC opcode.
  86  */
  87 static INLINE GLboolean
  88 positive(float x)
  89 {
  90    fi_type fi;
  91    fi.f = x;
  92    if (fi.i & 0x80000000)
  93       return GL_FALSE;
  94    return GL_TRUE;
  95 }
  96
  97
  98
  99 /**
 100  * Return a pointer to the 4-element float vector specified by the given
 101  * source register.
 102  */
 103 static INLINE const GLfloat *
 104 get_src_register_pointer(const struct prog_src_register *source,
 105                          const struct gl_program_machine *machine)
 106 {
 107    const struct gl_program *prog = machine->CurProgram;
 108    GLint reg = source->Index;
 109
 110    if (source->RelAddr) {
 111       /* add address register value to src index/offset */
 112       reg += machine->AddressReg[0][0];
 113       if (reg < 0) {
 114          return ZeroVec;
 115       }
 116    }
 117
 118    switch (source->File) {
 119    case PROGRAM_TEMPORARY:
 120       if (reg >= MAX_PROGRAM_TEMPS)
 121          return ZeroVec;
 122       return machine->Temporaries[reg];
 123
 124    case PROGRAM_INPUT:
 125       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 126          if (reg >= VERT_ATTRIB_MAX)
 127             return ZeroVec;
 128          return machine->VertAttribs[reg];
 129       }
 130       else {
 131          if (reg >= FRAG_ATTRIB_MAX)
 132             return ZeroVec;
 133          return machine->Attribs[reg][machine->CurElement];
 134       }
 135
 136    case PROGRAM_OUTPUT:
 137       if (reg >= MAX_PROGRAM_OUTPUTS)
 138          return ZeroVec;
 139       return machine->Outputs[reg];
 140
 141    case PROGRAM_LOCAL_PARAM:
 142       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 143          return ZeroVec;
 144       return machine->CurProgram->LocalParams[reg];
 145
 146    case PROGRAM_ENV_PARAM:
 147       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 148          return ZeroVec;
 149       return machine->EnvParams[reg];
 150
 151    case PROGRAM_STATE_VAR:
 152       /* Fallthrough */
 153    case PROGRAM_CONSTANT:
 154       /* Fallthrough */
 155    case PROGRAM_UNIFORM:
 156       /* Fallthrough */
 157    case PROGRAM_NAMED_PARAM:
 158       if (reg >= (GLint) prog->Parameters->NumParameters)
 159          return ZeroVec;
 160       return prog->Parameters->ParameterValues[reg];
 161
 162    default:
 163       _mesa_problem(NULL,
 164          "Invalid src register file %d in get_src_register_pointer()",
 165          source->File);
 166       return NULL;
 167    }
 168 }
 169
 170
 171 /**
 172  * Return a pointer to the 4-element float vector specified by the given
 173  * destination register.
 174  */
 175 static INLINE GLfloat *
 176 get_dst_register_pointer(const struct prog_dst_register *dest,
 177                          struct gl_program_machine *machine)
 178 {
 179    static GLfloat dummyReg[4];
 180    GLint reg = dest->Index;
 181
 182    if (dest->RelAddr) {
 183       /* add address register value to src index/offset */
 184       reg += machine->AddressReg[0][0];
 185       if (reg < 0) {
 186          return dummyReg;
 187       }
 188    }
 189
 190    switch (dest->File) {
 191    case PROGRAM_TEMPORARY:
 192       if (reg >= MAX_PROGRAM_TEMPS)
 193          return dummyReg;
 194       return machine->Temporaries[reg];
 195
 196    case PROGRAM_OUTPUT:
 197       if (reg >= MAX_PROGRAM_OUTPUTS)
 198          return dummyReg;
 199       return machine->Outputs[reg];
 200
 201    case PROGRAM_WRITE_ONLY:
 202       return dummyReg;
 203
 204    default:
 205       _mesa_problem(NULL,
 206          "Invalid dest register file %d in get_dst_register_pointer()",
 207          dest->File);
 208       return NULL;
 209    }
 210 }
 211
 212
 213
 214 /**
 215  * Fetch a 4-element float vector from the given source register.
 216  * Apply swizzling and negating as needed.
 217  */
 218 static void
 219 fetch_vector4(const struct prog_src_register *source,
 220               const struct gl_program_machine *machine, GLfloat result[4])
 221 {
 222    const GLfloat *src = get_src_register_pointer(source, machine);
 223    ASSERT(src);
 224
 225    if (source->Swizzle == SWIZZLE_NOOP) {
 226       /* no swizzling */
 227       COPY_4V(result, src);
 228    }
 229    else {
 230       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 231       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 232       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 233       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 234       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 235       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 236       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 237       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 238    }
 239
 240    if (source->Abs) {
 241       result[0] = FABSF(result[0]);
 242       result[1] = FABSF(result[1]);
 243       result[2] = FABSF(result[2]);
 244       result[3] = FABSF(result[3]);
 245    }
 246    if (source->Negate) {
 247       ASSERT(source->Negate == NEGATE_XYZW);
 248       result[0] = -result[0];
 249       result[1] = -result[1];
 250       result[2] = -result[2];
 251       result[3] = -result[3];
 252    }
 253
 254 #ifdef NAN_CHECK
 255    assert(!IS_INF_OR_NAN(result[0]));
 256    assert(!IS_INF_OR_NAN(result[0]));
 257    assert(!IS_INF_OR_NAN(result[0]));
 258    assert(!IS_INF_OR_NAN(result[0]));
 259 #endif
 260 }
 261
 262
 263 /**
 264  * Fetch a 4-element uint vector from the given source register.
 265  * Apply swizzling but not negation/abs.
 266  */
 267 static void
 268 fetch_vector4ui(const struct prog_src_register *source,
 269                 const struct gl_program_machine *machine, GLuint result[4])
 270 {
 271    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 272    ASSERT(src);
 273
 274    if (source->Swizzle == SWIZZLE_NOOP) {
 275       /* no swizzling */
 276       COPY_4V(result, src);
 277    }
 278    else {
 279       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 280       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 281       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 282       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 283       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 284       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 285       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 286       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 287    }
 288
 289    /* Note: no Negate or Abs here */
 290 }
 291
 292
 293
 294 /**
 295  * Fetch the derivative with respect to X or Y for the given register.
 296  * XXX this currently only works for fragment program input attribs.
 297  */
 298 static void
 299 fetch_vector4_deriv(struct gl_context * ctx,
 300                     const struct prog_src_register *source,
 301                     const struct gl_program_machine *machine,
 302                     char xOrY, GLfloat result[4])
 303 {
 304    if (source->File == PROGRAM_INPUT &&
 305        source->Index < (GLint) machine->NumDeriv) {
 306       const GLint col = machine->CurElement;
 307       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 308       const GLfloat invQ = 1.0f / w;
 309       GLfloat deriv[4];
 310
 311       if (xOrY == 'X') {
 312          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 313          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 314          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 315          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 316       }
 317       else {
 318          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 319          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 320          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 321          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 322       }
 323
 324       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 325       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 326       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 327       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 328
 329       if (source->Abs) {
 330          result[0] = FABSF(result[0]);
 331          result[1] = FABSF(result[1]);
 332          result[2] = FABSF(result[2]);
 333          result[3] = FABSF(result[3]);
 334       }
 335       if (source->Negate) {
 336          ASSERT(source->Negate == NEGATE_XYZW);
 337          result[0] = -result[0];
 338          result[1] = -result[1];
 339          result[2] = -result[2];
 340          result[3] = -result[3];
 341       }
 342    }
 343    else {
 344       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 345    }
 346 }
 347
 348
 349 /**
 350  * As above, but only return result[0] element.
 351  */
 352 static void
 353 fetch_vector1(const struct prog_src_register *source,
 354               const struct gl_program_machine *machine, GLfloat result[4])
 355 {
 356    const GLfloat *src = get_src_register_pointer(source, machine);
 357    ASSERT(src);
 358
 359    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 360
 361    if (source->Abs) {
 362       result[0] = FABSF(result[0]);
 363    }
 364    if (source->Negate) {
 365       result[0] = -result[0];
 366    }
 367 }
 368
 369
 370 static GLuint
 371 fetch_vector1ui(const struct prog_src_register *source,
 372                 const struct gl_program_machine *machine)
 373 {
 374    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 375    return src[GET_SWZ(source->Swizzle, 0)];
 376 }
 377
 378
 379 /**
 380  * Fetch texel from texture.  Use partial derivatives when possible.
 381  */
 382 static INLINE void
 383 fetch_texel(struct gl_context *ctx,
 384             const struct gl_program_machine *machine,
 385             const struct prog_instruction *inst,
 386             const GLfloat texcoord[4], GLfloat lodBias,
 387             GLfloat color[4])
 388 {
 389    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 390
 391    /* Note: we only have the right derivatives for fragment input attribs.
 392     */
 393    if (machine->NumDeriv > 0 &&
 394        inst->SrcReg[0].File == PROGRAM_INPUT &&
 395        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 396       /* simple texture fetch for which we should have derivatives */
 397       GLuint attr = inst->SrcReg[0].Index;
 398       machine->FetchTexelDeriv(ctx, texcoord,
 399                                machine->DerivX[attr],
 400                                machine->DerivY[attr],
 401                                lodBias, unit, color);
 402    }
 403    else {
 404       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 405    }
 406 }
 407
 408
 409 /**
 410  * Test value against zero and return GT, LT, EQ or UN if NaN.
 411  */
 412 static INLINE GLuint
 413 generate_cc(float value)
 414 {
 415    if (value != value)
 416       return COND_UN;           /* NaN */
 417    if (value > 0.0F)
 418       return COND_GT;
 419    if (value < 0.0F)
 420       return COND_LT;
 421    return COND_EQ;
 422 }
 423
 424
 425 /**
 426  * Test if the ccMaskRule is satisfied by the given condition code.
 427  * Used to mask destination writes according to the current condition code.
 428  */
 429 static INLINE GLboolean
 430 test_cc(GLuint condCode, GLuint ccMaskRule)
 431 {
 432    switch (ccMaskRule) {
 433    case COND_EQ: return (condCode == COND_EQ);
 434    case COND_NE: return (condCode != COND_EQ);
 435    case COND_LT: return (condCode == COND_LT);
 436    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 437    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 438    case COND_GT: return (condCode == COND_GT);
 439    case COND_TR: return GL_TRUE;
 440    case COND_FL: return GL_FALSE;
 441    default:      return GL_TRUE;
 442    }
 443 }
 444
 445
 446 /**
 447  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 448  * or GL_FALSE to indicate result.
 449  */
 450 static INLINE GLboolean
 451 eval_condition(const struct gl_program_machine *machine,
 452                const struct prog_instruction *inst)
 453 {
 454    const GLuint swizzle = inst->DstReg.CondSwizzle;
 455    const GLuint condMask = inst->DstReg.CondMask;
 456    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 457        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 458        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 459        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 460       return GL_TRUE;
 461    }
 462    else {
 463       return GL_FALSE;
 464    }
 465 }
 466
 467
 468
 469 /**
 470  * Store 4 floats into a register.  Observe the instructions saturate and
 471  * set-condition-code flags.
 472  */
 473 static void
 474 store_vector4(const struct prog_instruction *inst,
 475               struct gl_program_machine *machine, const GLfloat value[4])
 476 {
 477    const struct prog_dst_register *dstReg = &(inst->DstReg);
 478    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 479    GLuint writeMask = dstReg->WriteMask;
 480    GLfloat clampedValue[4];
 481    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 482
 483 #if 0
 484    if (value[0] > 1.0e10 ||
 485        IS_INF_OR_NAN(value[0]) ||
 486        IS_INF_OR_NAN(value[1]) ||
 487        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 488       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 489 #endif
 490
 491    if (clamp) {
 492       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 493       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 494       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 495       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 496       value = clampedValue;
 497    }
 498
 499    if (dstReg->CondMask != COND_TR) {
 500       /* condition codes may turn off some writes */
 501       if (writeMask & WRITEMASK_X) {
 502          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 503                       dstReg->CondMask))
 504             writeMask &= ~WRITEMASK_X;
 505       }
 506       if (writeMask & WRITEMASK_Y) {
 507          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 508                       dstReg->CondMask))
 509             writeMask &= ~WRITEMASK_Y;
 510       }
 511       if (writeMask & WRITEMASK_Z) {
 512          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 513                       dstReg->CondMask))
 514             writeMask &= ~WRITEMASK_Z;
 515       }
 516       if (writeMask & WRITEMASK_W) {
 517          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 518                       dstReg->CondMask))
 519             writeMask &= ~WRITEMASK_W;
 520       }
 521    }
 522
 523 #ifdef NAN_CHECK
 524    assert(!IS_INF_OR_NAN(value[0]));
 525    assert(!IS_INF_OR_NAN(value[0]));
 526    assert(!IS_INF_OR_NAN(value[0]));
 527    assert(!IS_INF_OR_NAN(value[0]));
 528 #endif
 529
 530    if (writeMask & WRITEMASK_X)
 531       dst[0] = value[0];
 532    if (writeMask & WRITEMASK_Y)
 533       dst[1] = value[1];
 534    if (writeMask & WRITEMASK_Z)
 535       dst[2] = value[2];
 536    if (writeMask & WRITEMASK_W)
 537       dst[3] = value[3];
 538
 539    if (inst->CondUpdate) {
 540       if (writeMask & WRITEMASK_X)
 541          machine->CondCodes[0] = generate_cc(value[0]);
 542       if (writeMask & WRITEMASK_Y)
 543          machine->CondCodes[1] = generate_cc(value[1]);
 544       if (writeMask & WRITEMASK_Z)
 545          machine->CondCodes[2] = generate_cc(value[2]);
 546       if (writeMask & WRITEMASK_W)
 547          machine->CondCodes[3] = generate_cc(value[3]);
 548 #if DEBUG_PROG
 549       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 550              _mesa_condcode_string(machine->CondCodes[0]),
 551              _mesa_condcode_string(machine->CondCodes[1]),
 552              _mesa_condcode_string(machine->CondCodes[2]),
 553              _mesa_condcode_string(machine->CondCodes[3]));
 554 #endif
 555    }
 556 }
 557
 558
 559 /**
 560  * Store 4 uints into a register.  Observe the set-condition-code flags.
 561  */
 562 static void
 563 store_vector4ui(const struct prog_instruction *inst,
 564                 struct gl_program_machine *machine, const GLuint value[4])
 565 {
 566    const struct prog_dst_register *dstReg = &(inst->DstReg);
 567    GLuint writeMask = dstReg->WriteMask;
 568    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 569
 570    if (dstReg->CondMask != COND_TR) {
 571       /* condition codes may turn off some writes */
 572       if (writeMask & WRITEMASK_X) {
 573          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 574                       dstReg->CondMask))
 575             writeMask &= ~WRITEMASK_X;
 576       }
 577       if (writeMask & WRITEMASK_Y) {
 578          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 579                       dstReg->CondMask))
 580             writeMask &= ~WRITEMASK_Y;
 581       }
 582       if (writeMask & WRITEMASK_Z) {
 583          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 584                       dstReg->CondMask))
 585             writeMask &= ~WRITEMASK_Z;
 586       }
 587       if (writeMask & WRITEMASK_W) {
 588          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 589                       dstReg->CondMask))
 590             writeMask &= ~WRITEMASK_W;
 591       }
 592    }
 593
 594    if (writeMask & WRITEMASK_X)
 595       dst[0] = value[0];
 596    if (writeMask & WRITEMASK_Y)
 597       dst[1] = value[1];
 598    if (writeMask & WRITEMASK_Z)
 599       dst[2] = value[2];
 600    if (writeMask & WRITEMASK_W)
 601       dst[3] = value[3];
 602
 603    if (inst->CondUpdate) {
 604       if (writeMask & WRITEMASK_X)
 605          machine->CondCodes[0] = generate_cc((float)value[0]);
 606       if (writeMask & WRITEMASK_Y)
 607          machine->CondCodes[1] = generate_cc((float)value[1]);
 608       if (writeMask & WRITEMASK_Z)
 609          machine->CondCodes[2] = generate_cc((float)value[2]);
 610       if (writeMask & WRITEMASK_W)
 611          machine->CondCodes[3] = generate_cc((float)value[3]);
 612 #if DEBUG_PROG
 613       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 614              _mesa_condcode_string(machine->CondCodes[0]),
 615              _mesa_condcode_string(machine->CondCodes[1]),
 616              _mesa_condcode_string(machine->CondCodes[2]),
 617              _mesa_condcode_string(machine->CondCodes[3]));
 618 #endif
 619    }
 620 }
 621
 622
 623
 624 /**
 625  * Execute the given vertex/fragment program.
 626  *
 627  * \param ctx  rendering context
 628  * \param program  the program to execute
 629  * \param machine  machine state (must be initialized)
 630  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 631  */
 632 GLboolean
 633 _mesa_execute_program(struct gl_context * ctx,
 634                       const struct gl_program *program,
 635                       struct gl_program_machine *machine)
 636 {
 637    const GLuint numInst = program->NumInstructions;
 638    const GLuint maxExec = 10000;
 639    GLuint pc, numExec = 0;
 640
 641    machine->CurProgram = program;
 642
 643    if (DEBUG_PROG) {
 644       printf("execute program %u --------------------\n", program->Id);
 645    }
 646
 647    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 648       machine->EnvParams = ctx->VertexProgram.Parameters;
 649    }
 650    else {
 651       machine->EnvParams = ctx->FragmentProgram.Parameters;
 652    }
 653
 654    for (pc = 0; pc < numInst; pc++) {
 655       const struct prog_instruction *inst = program->Instructions + pc;
 656
 657       if (DEBUG_PROG) {
 658          _mesa_print_instruction(inst);
 659       }
 660
 661       switch (inst->Opcode) {
 662       case OPCODE_ABS:
 663          {
 664             GLfloat a[4], result[4];
 665             fetch_vector4(&inst->SrcReg[0], machine, a);
 666             result[0] = FABSF(a[0]);
 667             result[1] = FABSF(a[1]);
 668             result[2] = FABSF(a[2]);
 669             result[3] = FABSF(a[3]);
 670             store_vector4(inst, machine, result);
 671          }
 672          break;
 673       case OPCODE_ADD:
 674          {
 675             GLfloat a[4], b[4], result[4];
 676             fetch_vector4(&inst->SrcReg[0], machine, a);
 677             fetch_vector4(&inst->SrcReg[1], machine, b);
 678             result[0] = a[0] + b[0];
 679             result[1] = a[1] + b[1];
 680             result[2] = a[2] + b[2];
 681             result[3] = a[3] + b[3];
 682             store_vector4(inst, machine, result);
 683             if (DEBUG_PROG) {
 684                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 685                       result[0], result[1], result[2], result[3],
 686                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 687             }
 688          }
 689          break;
 690       case OPCODE_AND:     /* bitwise AND */
 691          {
 692             GLuint a[4], b[4], result[4];
 693             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 694             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 695             result[0] = a[0] & b[0];
 696             result[1] = a[1] & b[1];
 697             result[2] = a[2] & b[2];
 698             result[3] = a[3] & b[3];
 699             store_vector4ui(inst, machine, result);
 700          }
 701          break;
 702       case OPCODE_ARL:
 703          {
 704             GLfloat t[4];
 705             fetch_vector4(&inst->SrcReg[0], machine, t);
 706             machine->AddressReg[0][0] = IFLOOR(t[0]);
 707             if (DEBUG_PROG) {
 708                printf("ARL %d\n", machine->AddressReg[0][0]);
 709             }
 710          }
 711          break;
 712       case OPCODE_BGNLOOP:
 713          /* no-op */
 714          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 715                 == OPCODE_ENDLOOP);
 716          break;
 717       case OPCODE_ENDLOOP:
 718          /* subtract 1 here since pc is incremented by for(pc) loop */
 719          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 720                 == OPCODE_BGNLOOP);
 721          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 722          break;
 723       case OPCODE_BGNSUB:      /* begin subroutine */
 724          break;
 725       case OPCODE_ENDSUB:      /* end subroutine */
 726          break;
 727       case OPCODE_BRA:         /* branch (conditional) */
 728          if (eval_condition(machine, inst)) {
 729             /* take branch */
 730             /* Subtract 1 here since we'll do pc++ below */
 731             pc = inst->BranchTarget - 1;
 732          }
 733          break;
 734       case OPCODE_BRK:         /* break out of loop (conditional) */
 735          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 736                 == OPCODE_ENDLOOP);
 737          if (eval_condition(machine, inst)) {
 738             /* break out of loop */
 739             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 740             pc = inst->BranchTarget;
 741          }
 742          break;
 743       case OPCODE_CONT:        /* continue loop (conditional) */
 744          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 745                 == OPCODE_ENDLOOP);
 746          if (eval_condition(machine, inst)) {
 747             /* continue at ENDLOOP */
 748             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 749             pc = inst->BranchTarget - 1;
 750          }
 751          break;
 752       case OPCODE_CAL:         /* Call subroutine (conditional) */
 753          if (eval_condition(machine, inst)) {
 754             /* call the subroutine */
 755             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 756                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 757             }
 758             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 759             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 760             pc = inst->BranchTarget - 1;
 761          }
 762          break;
 763       case OPCODE_CMP:
 764          {
 765             GLfloat a[4], b[4], c[4], result[4];
 766             fetch_vector4(&inst->SrcReg[0], machine, a);
 767             fetch_vector4(&inst->SrcReg[1], machine, b);
 768             fetch_vector4(&inst->SrcReg[2], machine, c);
 769             result[0] = a[0] < 0.0F ? b[0] : c[0];
 770             result[1] = a[1] < 0.0F ? b[1] : c[1];
 771             result[2] = a[2] < 0.0F ? b[2] : c[2];
 772             result[3] = a[3] < 0.0F ? b[3] : c[3];
 773             store_vector4(inst, machine, result);
 774             if (DEBUG_PROG) {
 775                printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
 776                       result[0], result[1], result[2], result[3],
 777                       a[0], a[1], a[2], a[3],
 778                       b[0], b[1], b[2], b[3],
 779                       c[0], c[1], c[2], c[3]);
 780             }
 781          }
 782          break;
 783       case OPCODE_COS:
 784          {
 785             GLfloat a[4], result[4];
 786             fetch_vector1(&inst->SrcReg[0], machine, a);
 787             result[0] = result[1] = result[2] = result[3]
 788                = (GLfloat) cos(a[0]);
 789             store_vector4(inst, machine, result);
 790          }
 791          break;
 792       case OPCODE_DDX:         /* Partial derivative with respect to X */
 793          {
 794             GLfloat result[4];
 795             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 796                                 'X', result);
 797             store_vector4(inst, machine, result);
 798          }
 799          break;
 800       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 801          {
 802             GLfloat result[4];
 803             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 804                                 'Y', result);
 805             store_vector4(inst, machine, result);
 806          }
 807          break;
 808       case OPCODE_DP2:
 809          {
 810             GLfloat a[4], b[4], result[4];
 811             fetch_vector4(&inst->SrcReg[0], machine, a);
 812             fetch_vector4(&inst->SrcReg[1], machine, b);
 813             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 814             store_vector4(inst, machine, result);
 815             if (DEBUG_PROG) {
 816                printf("DP2 %g = (%g %g) . (%g %g)\n",
 817                       result[0], a[0], a[1], b[0], b[1]);
 818             }
 819          }
 820          break;
 821       case OPCODE_DP2A:
 822          {
 823             GLfloat a[4], b[4], c, result[4];
 824             fetch_vector4(&inst->SrcReg[0], machine, a);
 825             fetch_vector4(&inst->SrcReg[1], machine, b);
 826             fetch_vector1(&inst->SrcReg[1], machine, &c);
 827             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 828             store_vector4(inst, machine, result);
 829             if (DEBUG_PROG) {
 830                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 831                       result[0], a[0], a[1], b[0], b[1], c);
 832             }
 833          }
 834          break;
 835       case OPCODE_DP3:
 836          {
 837             GLfloat a[4], b[4], result[4];
 838             fetch_vector4(&inst->SrcReg[0], machine, a);
 839             fetch_vector4(&inst->SrcReg[1], machine, b);
 840             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 841             store_vector4(inst, machine, result);
 842             if (DEBUG_PROG) {
 843                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 844                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 845             }
 846          }
 847          break;
 848       case OPCODE_DP4:
 849          {
 850             GLfloat a[4], b[4], result[4];
 851             fetch_vector4(&inst->SrcReg[0], machine, a);
 852             fetch_vector4(&inst->SrcReg[1], machine, b);
 853             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 854             store_vector4(inst, machine, result);
 855             if (DEBUG_PROG) {
 856                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 857                       result[0], a[0], a[1], a[2], a[3],
 858                       b[0], b[1], b[2], b[3]);
 859             }
 860          }
 861          break;
 862       case OPCODE_DPH:
 863          {
 864             GLfloat a[4], b[4], result[4];
 865             fetch_vector4(&inst->SrcReg[0], machine, a);
 866             fetch_vector4(&inst->SrcReg[1], machine, b);
 867             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 868             store_vector4(inst, machine, result);
 869          }
 870          break;
 871       case OPCODE_DST:         /* Distance vector */
 872          {
 873             GLfloat a[4], b[4], result[4];
 874             fetch_vector4(&inst->SrcReg[0], machine, a);
 875             fetch_vector4(&inst->SrcReg[1], machine, b);
 876             result[0] = 1.0F;
 877             result[1] = a[1] * b[1];
 878             result[2] = a[2];
 879             result[3] = b[3];
 880             store_vector4(inst, machine, result);
 881          }
 882          break;
 883       case OPCODE_EXP:
 884          {
 885             GLfloat t[4], q[4], floor_t0;
 886             fetch_vector1(&inst->SrcReg[0], machine, t);
 887             floor_t0 = FLOORF(t[0]);
 888             if (floor_t0 > FLT_MAX_EXP) {
 889                SET_POS_INFINITY(q[0]);
 890                SET_POS_INFINITY(q[2]);
 891             }
 892             else if (floor_t0 < FLT_MIN_EXP) {
 893                q[0] = 0.0F;
 894                q[2] = 0.0F;
 895             }
 896             else {
 897                q[0] = LDEXPF(1.0, (int) floor_t0);
 898                /* Note: GL_NV_vertex_program expects
 899                 * result.z = result.x * APPX(result.y)
 900                 * We do what the ARB extension says.
 901                 */
 902                q[2] = (GLfloat) pow(2.0, t[0]);
 903             }
 904             q[1] = t[0] - floor_t0;
 905             q[3] = 1.0F;
 906             store_vector4( inst, machine, q );
 907          }
 908          break;
 909       case OPCODE_EX2:         /* Exponential base 2 */
 910          {
 911             GLfloat a[4], result[4], val;
 912             fetch_vector1(&inst->SrcReg[0], machine, a);
 913             val = (GLfloat) pow(2.0, a[0]);
 914             /*
 915             if (IS_INF_OR_NAN(val))
 916                val = 1.0e10;
 917             */
 918             result[0] = result[1] = result[2] = result[3] = val;
 919             store_vector4(inst, machine, result);
 920          }
 921          break;
 922       case OPCODE_FLR:
 923          {
 924             GLfloat a[4], result[4];
 925             fetch_vector4(&inst->SrcReg[0], machine, a);
 926             result[0] = FLOORF(a[0]);
 927             result[1] = FLOORF(a[1]);
 928             result[2] = FLOORF(a[2]);
 929             result[3] = FLOORF(a[3]);
 930             store_vector4(inst, machine, result);
 931          }
 932          break;
 933       case OPCODE_FRC:
 934          {
 935             GLfloat a[4], result[4];
 936             fetch_vector4(&inst->SrcReg[0], machine, a);
 937             result[0] = a[0] - FLOORF(a[0]);
 938             result[1] = a[1] - FLOORF(a[1]);
 939             result[2] = a[2] - FLOORF(a[2]);
 940             result[3] = a[3] - FLOORF(a[3]);
 941             store_vector4(inst, machine, result);
 942          }
 943          break;
 944       case OPCODE_IF:
 945          {
 946             GLboolean cond;
 947             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 948                    == OPCODE_ELSE ||
 949                    program->Instructions[inst->BranchTarget].Opcode
 950                    == OPCODE_ENDIF);
 951             /* eval condition */
 952             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 953                GLfloat a[4];
 954                fetch_vector1(&inst->SrcReg[0], machine, a);
 955                cond = (a[0] != 0.0);
 956             }
 957             else {
 958                cond = eval_condition(machine, inst);
 959             }
 960             if (DEBUG_PROG) {
 961                printf("IF: %d\n", cond);
 962             }
 963             /* do if/else */
 964             if (cond) {
 965                /* do if-clause (just continue execution) */
 966             }
 967             else {
 968                /* go to the instruction after ELSE or ENDIF */
 969                assert(inst->BranchTarget >= 0);
 970                pc = inst->BranchTarget;
 971             }
 972          }
 973          break;
 974       case OPCODE_ELSE:
 975          /* goto ENDIF */
 976          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 977                 == OPCODE_ENDIF);
 978          assert(inst->BranchTarget >= 0);
 979          pc = inst->BranchTarget;
 980          break;
 981       case OPCODE_ENDIF:
 982          /* nothing */
 983          break;
 984       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 985          if (eval_condition(machine, inst)) {
 986             return GL_FALSE;
 987          }
 988          break;
 989       case OPCODE_KIL:         /* ARB_f_p only */
 990          {
 991             GLfloat a[4];
 992             fetch_vector4(&inst->SrcReg[0], machine, a);
 993             if (DEBUG_PROG) {
 994                printf("KIL if (%g %g %g %g) <= 0.0\n",
 995                       a[0], a[1], a[2], a[3]);
 996             }
 997
 998             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 999                return GL_FALSE;
1000             }
1001          }
1002          break;
1003       case OPCODE_LG2:         /* log base 2 */
1004          {
1005             GLfloat a[4], result[4], val;
1006             fetch_vector1(&inst->SrcReg[0], machine, a);
1007             /* The fast LOG2 macro doesn't meet the precision requirements.
1008              */
1009             if (a[0] == 0.0F) {
1010                val = -FLT_MAX;
1011             }
1012             else {
1013                val = (float)(log(a[0]) * 1.442695F);
1014             }
1015             result[0] = result[1] = result[2] = result[3] = val;
1016             store_vector4(inst, machine, result);
1017          }
1018          break;
1019       case OPCODE_LIT:
1020          {
1021             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
1022             GLfloat a[4], result[4];
1023             fetch_vector4(&inst->SrcReg[0], machine, a);
1024             a[0] = MAX2(a[0], 0.0F);
1025             a[1] = MAX2(a[1], 0.0F);
1026             /* XXX ARB version clamps a[3], NV version doesn't */
1027             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1028             result[0] = 1.0F;
1029             result[1] = a[0];
1030             /* XXX we could probably just use pow() here */
1031             if (a[0] > 0.0F) {
1032                if (a[1] == 0.0 && a[3] == 0.0)
1033                   result[2] = 1.0F;
1034                else
1035                   result[2] = (GLfloat) pow(a[1], a[3]);
1036             }
1037             else {
1038                result[2] = 0.0F;
1039             }
1040             result[3] = 1.0F;
1041             store_vector4(inst, machine, result);
1042             if (DEBUG_PROG) {
1043                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1044                       result[0], result[1], result[2], result[3],
1045                       a[0], a[1], a[2], a[3]);
1046             }
1047          }
1048          break;
1049       case OPCODE_LOG:
1050          {
1051             GLfloat t[4], q[4], abs_t0;
1052             fetch_vector1(&inst->SrcReg[0], machine, t);
1053             abs_t0 = FABSF(t[0]);
1054             if (abs_t0 != 0.0F) {
1055                /* Since we really can't handle infinite values on VMS
1056                 * like other OSes we'll use __MAXFLOAT to represent
1057                 * infinity.  This may need some tweaking.
1058                 */
1059 #ifdef VMS
1060                if (abs_t0 == __MAXFLOAT)
1061 #else
1062                if (IS_INF_OR_NAN(abs_t0))
1063 #endif
1064                {
1065                   SET_POS_INFINITY(q[0]);
1066                   q[1] = 1.0F;
1067                   SET_POS_INFINITY(q[2]);
1068                }
1069                else {
1070                   int exponent;
1071                   GLfloat mantissa = FREXPF(t[0], &exponent);
1072                   q[0] = (GLfloat) (exponent - 1);
1073                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1074
1075                   /* The fast LOG2 macro doesn't meet the precision
1076                    * requirements.
1077                    */
1078                   q[2] = (float)(log(t[0]) * 1.442695F);
1079                }
1080             }
1081             else {
1082                SET_NEG_INFINITY(q[0]);
1083                q[1] = 1.0F;
1084                SET_NEG_INFINITY(q[2]);
1085             }
1086             q[3] = 1.0;
1087             store_vector4(inst, machine, q);
1088          }
1089          break;
1090       case OPCODE_LRP:
1091          {
1092             GLfloat a[4], b[4], c[4], result[4];
1093             fetch_vector4(&inst->SrcReg[0], machine, a);
1094             fetch_vector4(&inst->SrcReg[1], machine, b);
1095             fetch_vector4(&inst->SrcReg[2], machine, c);
1096             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1097             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1098             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1099             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1100             store_vector4(inst, machine, result);
1101             if (DEBUG_PROG) {
1102                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1103                       "(%g %g %g %g), (%g %g %g %g)\n",
1104                       result[0], result[1], result[2], result[3],
1105                       a[0], a[1], a[2], a[3],
1106                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1107             }
1108          }
1109          break;
1110       case OPCODE_MAD:
1111          {
1112             GLfloat a[4], b[4], c[4], result[4];
1113             fetch_vector4(&inst->SrcReg[0], machine, a);
1114             fetch_vector4(&inst->SrcReg[1], machine, b);
1115             fetch_vector4(&inst->SrcReg[2], machine, c);
1116             result[0] = a[0] * b[0] + c[0];
1117             result[1] = a[1] * b[1] + c[1];
1118             result[2] = a[2] * b[2] + c[2];
1119             result[3] = a[3] * b[3] + c[3];
1120             store_vector4(inst, machine, result);
1121             if (DEBUG_PROG) {
1122                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1123                       "(%g %g %g %g) + (%g %g %g %g)\n",
1124                       result[0], result[1], result[2], result[3],
1125                       a[0], a[1], a[2], a[3],
1126                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1127             }
1128          }
1129          break;
1130       case OPCODE_MAX:
1131          {
1132             GLfloat a[4], b[4], result[4];
1133             fetch_vector4(&inst->SrcReg[0], machine, a);
1134             fetch_vector4(&inst->SrcReg[1], machine, b);
1135             result[0] = MAX2(a[0], b[0]);
1136             result[1] = MAX2(a[1], b[1]);
1137             result[2] = MAX2(a[2], b[2]);
1138             result[3] = MAX2(a[3], b[3]);
1139             store_vector4(inst, machine, result);
1140             if (DEBUG_PROG) {
1141                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1142                       result[0], result[1], result[2], result[3],
1143                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1144             }
1145          }
1146          break;
1147       case OPCODE_MIN:
1148          {
1149             GLfloat a[4], b[4], result[4];
1150             fetch_vector4(&inst->SrcReg[0], machine, a);
1151             fetch_vector4(&inst->SrcReg[1], machine, b);
1152             result[0] = MIN2(a[0], b[0]);
1153             result[1] = MIN2(a[1], b[1]);
1154             result[2] = MIN2(a[2], b[2]);
1155             result[3] = MIN2(a[3], b[3]);
1156             store_vector4(inst, machine, result);
1157          }
1158          break;
1159       case OPCODE_MOV:
1160          {
1161             GLfloat result[4];
1162             fetch_vector4(&inst->SrcReg[0], machine, result);
1163             store_vector4(inst, machine, result);
1164             if (DEBUG_PROG) {
1165                printf("MOV (%g %g %g %g)\n",
1166                       result[0], result[1], result[2], result[3]);
1167             }
1168          }
1169          break;
1170       case OPCODE_MUL:
1171          {
1172             GLfloat a[4], b[4], result[4];
1173             fetch_vector4(&inst->SrcReg[0], machine, a);
1174             fetch_vector4(&inst->SrcReg[1], machine, b);
1175             result[0] = a[0] * b[0];
1176             result[1] = a[1] * b[1];
1177             result[2] = a[2] * b[2];
1178             result[3] = a[3] * b[3];
1179             store_vector4(inst, machine, result);
1180             if (DEBUG_PROG) {
1181                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1182                       result[0], result[1], result[2], result[3],
1183                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1184             }
1185          }
1186          break;
1187       case OPCODE_NOISE1:
1188          {
1189             GLfloat a[4], result[4];
1190             fetch_vector1(&inst->SrcReg[0], machine, a);
1191             result[0] =
1192                result[1] =
1193                result[2] =
1194                result[3] = _mesa_noise1(a[0]);
1195             store_vector4(inst, machine, result);
1196          }
1197          break;
1198       case OPCODE_NOISE2:
1199          {
1200             GLfloat a[4], result[4];
1201             fetch_vector4(&inst->SrcReg[0], machine, a);
1202             result[0] =
1203                result[1] =
1204                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1205             store_vector4(inst, machine, result);
1206          }
1207          break;
1208       case OPCODE_NOISE3:
1209          {
1210             GLfloat a[4], result[4];
1211             fetch_vector4(&inst->SrcReg[0], machine, a);
1212             result[0] =
1213                result[1] =
1214                result[2] =
1215                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1216             store_vector4(inst, machine, result);
1217          }
1218          break;
1219       case OPCODE_NOISE4:
1220          {
1221             GLfloat a[4], result[4];
1222             fetch_vector4(&inst->SrcReg[0], machine, a);
1223             result[0] =
1224                result[1] =
1225                result[2] =
1226                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1227             store_vector4(inst, machine, result);
1228          }
1229          break;
1230       case OPCODE_NOP:
1231          break;
1232       case OPCODE_NOT:         /* bitwise NOT */
1233          {
1234             GLuint a[4], result[4];
1235             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1236             result[0] = ~a[0];
1237             result[1] = ~a[1];
1238             result[2] = ~a[2];
1239             result[3] = ~a[3];
1240             store_vector4ui(inst, machine, result);
1241          }
1242          break;
1243       case OPCODE_NRM3:        /* 3-component normalization */
1244          {
1245             GLfloat a[4], result[4];
1246             GLfloat tmp;
1247             fetch_vector4(&inst->SrcReg[0], machine, a);
1248             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1249             if (tmp != 0.0F)
1250                tmp = INV_SQRTF(tmp);
1251             result[0] = tmp * a[0];
1252             result[1] = tmp * a[1];
1253             result[2] = tmp * a[2];
1254             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1255             store_vector4(inst, machine, result);
1256          }
1257          break;
1258       case OPCODE_NRM4:        /* 4-component normalization */
1259          {
1260             GLfloat a[4], result[4];
1261             GLfloat tmp;
1262             fetch_vector4(&inst->SrcReg[0], machine, a);
1263             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1264             if (tmp != 0.0F)
1265                tmp = INV_SQRTF(tmp);
1266             result[0] = tmp * a[0];
1267             result[1] = tmp * a[1];
1268             result[2] = tmp * a[2];
1269             result[3] = tmp * a[3];
1270             store_vector4(inst, machine, result);
1271          }
1272          break;
1273       case OPCODE_OR:          /* bitwise OR */
1274          {
1275             GLuint a[4], b[4], result[4];
1276             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1277             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1278             result[0] = a[0] | b[0];
1279             result[1] = a[1] | b[1];
1280             result[2] = a[2] | b[2];
1281             result[3] = a[3] | b[3];
1282             store_vector4ui(inst, machine, result);
1283          }
1284          break;
1285       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1286          {
1287             GLfloat a[4];
1288             GLuint result[4];
1289             GLhalfNV hx, hy;
1290             fetch_vector4(&inst->SrcReg[0], machine, a);
1291             hx = _mesa_float_to_half(a[0]);
1292             hy = _mesa_float_to_half(a[1]);
1293             result[0] =
1294             result[1] =
1295             result[2] =
1296             result[3] = hx | (hy << 16);
1297             store_vector4ui(inst, machine, result);
1298          }
1299          break;
1300       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1301          {
1302             GLfloat a[4];
1303             GLuint result[4], usx, usy;
1304             fetch_vector4(&inst->SrcReg[0], machine, a);
1305             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1306             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1307             usx = IROUND(a[0] * 65535.0F);
1308             usy = IROUND(a[1] * 65535.0F);
1309             result[0] =
1310             result[1] =
1311             result[2] =
1312             result[3] = usx | (usy << 16);
1313             store_vector4ui(inst, machine, result);
1314          }
1315          break;
1316       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1317          {
1318             GLfloat a[4];
1319             GLuint result[4], ubx, uby, ubz, ubw;
1320             fetch_vector4(&inst->SrcReg[0], machine, a);
1321             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1322             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1323             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1324             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1325             ubx = IROUND(127.0F * a[0] + 128.0F);
1326             uby = IROUND(127.0F * a[1] + 128.0F);
1327             ubz = IROUND(127.0F * a[2] + 128.0F);
1328             ubw = IROUND(127.0F * a[3] + 128.0F);
1329             result[0] =
1330             result[1] =
1331             result[2] =
1332             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1333             store_vector4ui(inst, machine, result);
1334          }
1335          break;
1336       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1337          {
1338             GLfloat a[4];
1339             GLuint result[4], ubx, uby, ubz, ubw;
1340             fetch_vector4(&inst->SrcReg[0], machine, a);
1341             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1342             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1343             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1344             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1345             ubx = IROUND(255.0F * a[0]);
1346             uby = IROUND(255.0F * a[1]);
1347             ubz = IROUND(255.0F * a[2]);
1348             ubw = IROUND(255.0F * a[3]);
1349             result[0] =
1350             result[1] =
1351             result[2] =
1352             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1353             store_vector4ui(inst, machine, result);
1354          }
1355          break;
1356       case OPCODE_POW:
1357          {
1358             GLfloat a[4], b[4], result[4];
1359             fetch_vector1(&inst->SrcReg[0], machine, a);
1360             fetch_vector1(&inst->SrcReg[1], machine, b);
1361             result[0] = result[1] = result[2] = result[3]
1362                = (GLfloat) pow(a[0], b[0]);
1363             store_vector4(inst, machine, result);
1364          }
1365          break;
1366       case OPCODE_RCC:  /* clamped riciprocal */
1367          {
1368             const float largest = 1.884467e+19, smallest = 5.42101e-20;
1369             GLfloat a[4], r, result[4];
1370             fetch_vector1(&inst->SrcReg[0], machine, a);
1371             if (DEBUG_PROG) {
1372                if (a[0] == 0)
1373                   printf("RCC(0)\n");
1374                else if (IS_INF_OR_NAN(a[0]))
1375                   printf("RCC(inf)\n");
1376             }
1377             if (a[0] == 1.0F) {
1378                r = 1.0F;
1379             }
1380             else {
1381                r = 1.0F / a[0];
1382             }
1383             if (positive(r)) {
1384                if (r > largest) {
1385                   r = largest;
1386                }
1387                else if (r < smallest) {
1388                   r = smallest;
1389                }
1390             }
1391             else {
1392                if (r < -largest) {
1393                   r = -largest;
1394                }
1395                else if (r > -smallest) {
1396                   r = -smallest;
1397                }
1398             }
1399             result[0] = result[1] = result[2] = result[3] = r;
1400             store_vector4(inst, machine, result);
1401          }
1402          break;
1403
1404       case OPCODE_RCP:
1405          {
1406             GLfloat a[4], result[4];
1407             fetch_vector1(&inst->SrcReg[0], machine, a);
1408             if (DEBUG_PROG) {
1409                if (a[0] == 0)
1410                   printf("RCP(0)\n");
1411                else if (IS_INF_OR_NAN(a[0]))
1412                   printf("RCP(inf)\n");
1413             }
1414             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1415             store_vector4(inst, machine, result);
1416          }
1417          break;
1418       case OPCODE_RET:         /* return from subroutine (conditional) */
1419          if (eval_condition(machine, inst)) {
1420             if (machine->StackDepth == 0) {
1421                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1422             }
1423             /* subtract one because of pc++ in the for loop */
1424             pc = machine->CallStack[--machine->StackDepth] - 1;
1425          }
1426          break;
1427       case OPCODE_RFL:         /* reflection vector */
1428          {
1429             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1430             fetch_vector4(&inst->SrcReg[0], machine, axis);
1431             fetch_vector4(&inst->SrcReg[1], machine, dir);
1432             tmpW = DOT3(axis, axis);
1433             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1434             result[0] = tmpX * axis[0] - dir[0];
1435             result[1] = tmpX * axis[1] - dir[1];
1436             result[2] = tmpX * axis[2] - dir[2];
1437             /* result[3] is never written! XXX enforce in parser! */
1438             store_vector4(inst, machine, result);
1439          }
1440          break;
1441       case OPCODE_RSQ:         /* 1 / sqrt() */
1442          {
1443             GLfloat a[4], result[4];
1444             fetch_vector1(&inst->SrcReg[0], machine, a);
1445             a[0] = FABSF(a[0]);
1446             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1447             store_vector4(inst, machine, result);
1448             if (DEBUG_PROG) {
1449                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1450             }
1451          }
1452          break;
1453       case OPCODE_SCS:         /* sine and cos */
1454          {
1455             GLfloat a[4], result[4];
1456             fetch_vector1(&inst->SrcReg[0], machine, a);
1457             result[0] = (GLfloat) cos(a[0]);
1458             result[1] = (GLfloat) sin(a[0]);
1459             result[2] = 0.0;    /* undefined! */
1460             result[3] = 0.0;    /* undefined! */
1461             store_vector4(inst, machine, result);
1462          }
1463          break;
1464       case OPCODE_SEQ:         /* set on equal */
1465          {
1466             GLfloat a[4], b[4], result[4];
1467             fetch_vector4(&inst->SrcReg[0], machine, a);
1468             fetch_vector4(&inst->SrcReg[1], machine, b);
1469             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1470             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1471             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1472             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1473             store_vector4(inst, machine, result);
1474             if (DEBUG_PROG) {
1475                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1476                       result[0], result[1], result[2], result[3],
1477                       a[0], a[1], a[2], a[3],
1478                       b[0], b[1], b[2], b[3]);
1479             }
1480          }
1481          break;
1482       case OPCODE_SFL:         /* set false, operands ignored */
1483          {
1484             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1485             store_vector4(inst, machine, result);
1486          }
1487          break;
1488       case OPCODE_SGE:         /* set on greater or equal */
1489          {
1490             GLfloat a[4], b[4], result[4];
1491             fetch_vector4(&inst->SrcReg[0], machine, a);
1492             fetch_vector4(&inst->SrcReg[1], machine, b);
1493             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1494             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1495             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1496             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1497             store_vector4(inst, machine, result);
1498             if (DEBUG_PROG) {
1499                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1500                       result[0], result[1], result[2], result[3],
1501                       a[0], a[1], a[2], a[3],
1502                       b[0], b[1], b[2], b[3]);
1503             }
1504          }
1505          break;
1506       case OPCODE_SGT:         /* set on greater */
1507          {
1508             GLfloat a[4], b[4], result[4];
1509             fetch_vector4(&inst->SrcReg[0], machine, a);
1510             fetch_vector4(&inst->SrcReg[1], machine, b);
1511             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1512             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1513             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1514             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1515             store_vector4(inst, machine, result);
1516             if (DEBUG_PROG) {
1517                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1518                       result[0], result[1], result[2], result[3],
1519                       a[0], a[1], a[2], a[3],
1520                       b[0], b[1], b[2], b[3]);
1521             }
1522          }
1523          break;
1524       case OPCODE_SIN:
1525          {
1526             GLfloat a[4], result[4];
1527             fetch_vector1(&inst->SrcReg[0], machine, a);
1528             result[0] = result[1] = result[2] = result[3]
1529                = (GLfloat) sin(a[0]);
1530             store_vector4(inst, machine, result);
1531          }
1532          break;
1533       case OPCODE_SLE:         /* set on less or equal */
1534          {
1535             GLfloat a[4], b[4], result[4];
1536             fetch_vector4(&inst->SrcReg[0], machine, a);
1537             fetch_vector4(&inst->SrcReg[1], machine, b);
1538             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1539             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1540             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1541             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1542             store_vector4(inst, machine, result);
1543             if (DEBUG_PROG) {
1544                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1545                       result[0], result[1], result[2], result[3],
1546                       a[0], a[1], a[2], a[3],
1547                       b[0], b[1], b[2], b[3]);
1548             }
1549          }
1550          break;
1551       case OPCODE_SLT:         /* set on less */
1552          {
1553             GLfloat a[4], b[4], result[4];
1554             fetch_vector4(&inst->SrcReg[0], machine, a);
1555             fetch_vector4(&inst->SrcReg[1], machine, b);
1556             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1557             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1558             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1559             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1560             store_vector4(inst, machine, result);
1561             if (DEBUG_PROG) {
1562                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1563                       result[0], result[1], result[2], result[3],
1564                       a[0], a[1], a[2], a[3],
1565                       b[0], b[1], b[2], b[3]);
1566             }
1567          }
1568          break;
1569       case OPCODE_SNE:         /* set on not equal */
1570          {
1571             GLfloat a[4], b[4], result[4];
1572             fetch_vector4(&inst->SrcReg[0], machine, a);
1573             fetch_vector4(&inst->SrcReg[1], machine, b);
1574             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1575             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1576             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1577             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1578             store_vector4(inst, machine, result);
1579             if (DEBUG_PROG) {
1580                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1581                       result[0], result[1], result[2], result[3],
1582                       a[0], a[1], a[2], a[3],
1583                       b[0], b[1], b[2], b[3]);
1584             }
1585          }
1586          break;
1587       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1588          {
1589             GLfloat a[4], result[4];
1590             fetch_vector4(&inst->SrcReg[0], machine, a);
1591             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1592             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1593             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1594             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1595             store_vector4(inst, machine, result);
1596          }
1597          break;
1598       case OPCODE_STR:         /* set true, operands ignored */
1599          {
1600             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1601             store_vector4(inst, machine, result);
1602          }
1603          break;
1604       case OPCODE_SUB:
1605          {
1606             GLfloat a[4], b[4], result[4];
1607             fetch_vector4(&inst->SrcReg[0], machine, a);
1608             fetch_vector4(&inst->SrcReg[1], machine, b);
1609             result[0] = a[0] - b[0];
1610             result[1] = a[1] - b[1];
1611             result[2] = a[2] - b[2];
1612             result[3] = a[3] - b[3];
1613             store_vector4(inst, machine, result);
1614             if (DEBUG_PROG) {
1615                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1616                       result[0], result[1], result[2], result[3],
1617                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1618             }
1619          }
1620          break;
1621       case OPCODE_SWZ:         /* extended swizzle */
1622          {
1623             const struct prog_src_register *source = &inst->SrcReg[0];
1624             const GLfloat *src = get_src_register_pointer(source, machine);
1625             GLfloat result[4];
1626             GLuint i;
1627             for (i = 0; i < 4; i++) {
1628                const GLuint swz = GET_SWZ(source->Swizzle, i);
1629                if (swz == SWIZZLE_ZERO)
1630                   result[i] = 0.0;
1631                else if (swz == SWIZZLE_ONE)
1632                   result[i] = 1.0;
1633                else {
1634                   ASSERT(swz >= 0);
1635                   ASSERT(swz <= 3);
1636                   result[i] = src[swz];
1637                }
1638                if (source->Negate & (1 << i))
1639                   result[i] = -result[i];
1640             }
1641             store_vector4(inst, machine, result);
1642          }
1643          break;
1644       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1645          /* Simple texel lookup */
1646          {
1647             GLfloat texcoord[4], color[4];
1648             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1649
1650             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1651
1652             if (DEBUG_PROG) {
1653                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1654                       color[0], color[1], color[2], color[3],
1655                       inst->TexSrcUnit,
1656                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1657             }
1658             store_vector4(inst, machine, color);
1659          }
1660          break;
1661       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1662          /* Texel lookup with LOD bias */
1663          {
1664             GLfloat texcoord[4], color[4], lodBias;
1665
1666             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1667
1668             /* texcoord[3] is the bias to add to lambda */
1669             lodBias = texcoord[3];
1670
1671             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1672
1673             store_vector4(inst, machine, color);
1674          }
1675          break;
1676       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1677          /* Texture lookup w/ partial derivatives for LOD */
1678          {
1679             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1680             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1681             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1682             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1683             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1684                                      0.0, /* lodBias */
1685                                      inst->TexSrcUnit, color);
1686             store_vector4(inst, machine, color);
1687          }
1688          break;
1689       case OPCODE_TXL:
1690          /* Texel lookup with explicit LOD */
1691          {
1692             GLfloat texcoord[4], color[4], lod;
1693
1694             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1695
1696             /* texcoord[3] is the LOD */
1697             lod = texcoord[3];
1698
1699             machine->FetchTexelLod(ctx, texcoord, lod,
1700                                    machine->Samplers[inst->TexSrcUnit], color);
1701
1702             store_vector4(inst, machine, color);
1703          }
1704          break;
1705       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1706          /* Texture lookup w/ projective divide */
1707          {
1708             GLfloat texcoord[4], color[4];
1709
1710             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1711             /* Not so sure about this test - if texcoord[3] is
1712              * zero, we'd probably be fine except for an ASSERT in
1713              * IROUND_POS() which gets triggered by the inf values created.
1714              */
1715             if (texcoord[3] != 0.0) {
1716                texcoord[0] /= texcoord[3];
1717                texcoord[1] /= texcoord[3];
1718                texcoord[2] /= texcoord[3];
1719             }
1720
1721             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1722
1723             store_vector4(inst, machine, color);
1724          }
1725          break;
1726       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1727          /* Texture lookup w/ projective divide, as above, but do not
1728           * do the divide by w if sampling from a cube map.
1729           */
1730          {
1731             GLfloat texcoord[4], color[4];
1732
1733             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1734             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1735                 texcoord[3] != 0.0) {
1736                texcoord[0] /= texcoord[3];
1737                texcoord[1] /= texcoord[3];
1738                texcoord[2] /= texcoord[3];
1739             }
1740
1741             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1742
1743             store_vector4(inst, machine, color);
1744          }
1745          break;
1746       case OPCODE_TRUNC:       /* truncate toward zero */
1747          {
1748             GLfloat a[4], result[4];
1749             fetch_vector4(&inst->SrcReg[0], machine, a);
1750             result[0] = (GLfloat) (GLint) a[0];
1751             result[1] = (GLfloat) (GLint) a[1];
1752             result[2] = (GLfloat) (GLint) a[2];
1753             result[3] = (GLfloat) (GLint) a[3];
1754             store_vector4(inst, machine, result);
1755          }
1756          break;
1757       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1758          {
1759             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1760             GLfloat result[4];
1761             GLushort hx, hy;
1762             hx = raw & 0xffff;
1763             hy = raw >> 16;
1764             result[0] = result[2] = _mesa_half_to_float(hx);
1765             result[1] = result[3] = _mesa_half_to_float(hy);
1766             store_vector4(inst, machine, result);
1767          }
1768          break;
1769       case OPCODE_UP2US:       /* unpack two GLushorts */
1770          {
1771             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1772             GLfloat result[4];
1773             GLushort usx, usy;
1774             usx = raw & 0xffff;
1775             usy = raw >> 16;
1776             result[0] = result[2] = usx * (1.0f / 65535.0f);
1777             result[1] = result[3] = usy * (1.0f / 65535.0f);
1778             store_vector4(inst, machine, result);
1779          }
1780          break;
1781       case OPCODE_UP4B:        /* unpack four GLbytes */
1782          {
1783             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1784             GLfloat result[4];
1785             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1786             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1787             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1788             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1789             store_vector4(inst, machine, result);
1790          }
1791          break;
1792       case OPCODE_UP4UB:       /* unpack four GLubytes */
1793          {
1794             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1795             GLfloat result[4];
1796             result[0] = ((raw >> 0) & 0xff) / 255.0F;
1797             result[1] = ((raw >> 8) & 0xff) / 255.0F;
1798             result[2] = ((raw >> 16) & 0xff) / 255.0F;
1799             result[3] = ((raw >> 24) & 0xff) / 255.0F;
1800             store_vector4(inst, machine, result);
1801          }
1802          break;
1803       case OPCODE_XOR:         /* bitwise XOR */
1804          {
1805             GLuint a[4], b[4], result[4];
1806             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1807             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1808             result[0] = a[0] ^ b[0];
1809             result[1] = a[1] ^ b[1];
1810             result[2] = a[2] ^ b[2];
1811             result[3] = a[3] ^ b[3];
1812             store_vector4ui(inst, machine, result);
1813          }
1814          break;
1815       case OPCODE_XPD:         /* cross product */
1816          {
1817             GLfloat a[4], b[4], result[4];
1818             fetch_vector4(&inst->SrcReg[0], machine, a);
1819             fetch_vector4(&inst->SrcReg[1], machine, b);
1820             result[0] = a[1] * b[2] - a[2] * b[1];
1821             result[1] = a[2] * b[0] - a[0] * b[2];
1822             result[2] = a[0] * b[1] - a[1] * b[0];
1823             result[3] = 1.0;
1824             store_vector4(inst, machine, result);
1825             if (DEBUG_PROG) {
1826                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1827                       result[0], result[1], result[2], result[3],
1828                       a[0], a[1], a[2], b[0], b[1], b[2]);
1829             }
1830          }
1831          break;
1832       case OPCODE_X2D:         /* 2-D matrix transform */
1833          {
1834             GLfloat a[4], b[4], c[4], result[4];
1835             fetch_vector4(&inst->SrcReg[0], machine, a);
1836             fetch_vector4(&inst->SrcReg[1], machine, b);
1837             fetch_vector4(&inst->SrcReg[2], machine, c);
1838             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1839             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1840             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1841             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1842             store_vector4(inst, machine, result);
1843          }
1844          break;
1845       case OPCODE_PRINT:
1846          {
1847             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
1848                GLfloat a[4];
1849                fetch_vector4(&inst->SrcReg[0], machine, a);
1850                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1851                             a[0], a[1], a[2], a[3]);
1852             }
1853             else {
1854                printf("%s\n", (const char *) inst->Data);
1855             }
1856          }
1857          break;
1858       case OPCODE_END:
1859          return GL_TRUE;
1860       default:
1861          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1862                        inst->Opcode);
1863          return GL_TRUE;        /* return value doesn't matter */
1864       }
1865
1866       numExec++;
1867       if (numExec > maxExec) {
1868          static GLboolean reported = GL_FALSE;
1869          if (!reported) {
1870             _mesa_problem(ctx, "Infinite loop detected in fragment program");
1871             reported = GL_TRUE;
1872          }
1873          return GL_TRUE;
1874       }
1875
1876    } /* for pc */
1877
1878    return GL_TRUE;
1879 }