src/mesa/program/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/macros.h"
  41 #include "prog_execute.h"
  42 #include "prog_instruction.h"
  43 #include "prog_parameter.h"
  44 #include "prog_print.h"
  45 #include "prog_noise.h"
  46
  47
  48 /* debug predicate */
  49 #define DEBUG_PROG 0
  50
  51
  52 /**
  53  * Set x to positive or negative infinity.
  54  */
  55 #if defined(USE_IEEE) || defined(_WIN32)
  56 #define SET_POS_INFINITY(x)                  \
  57    do {                                      \
  58          fi_type fi;                         \
  59          fi.i = 0x7F800000;                  \
  60          x = fi.f;                           \
  61    } while (0)
  62 #define SET_NEG_INFINITY(x)                  \
  63    do {                                      \
  64          fi_type fi;                         \
  65          fi.i = 0xFF800000;                  \
  66          x = fi.f;                           \
  67    } while (0)
  68 #elif defined(VMS)
  69 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  70 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  71 #else
  72 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  73 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  74 #endif
  75
  76 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  77
  78
  79 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  80
  81
  82
  83 /**
  84  * Return TRUE for +0 and other positive values, FALSE otherwise.
  85  * Used for RCC opcode.
  86  */
  87 static INLINE GLboolean
  88 positive(float x)
  89 {
  90    fi_type fi;
  91    fi.f = x;
  92    if (fi.i & 0x80000000)
  93       return GL_FALSE;
  94    return GL_TRUE;
  95 }
  96
  97
  98
  99 /**
 100  * Return a pointer to the 4-element float vector specified by the given
 101  * source register.
 102  */
 103 static INLINE const GLfloat *
 104 get_src_register_pointer(const struct prog_src_register *source,
 105                          const struct gl_program_machine *machine)
 106 {
 107    const struct gl_program *prog = machine->CurProgram;
 108    GLint reg = source->Index;
 109
 110    if (source->RelAddr) {
 111       /* add address register value to src index/offset */
 112       reg += machine->AddressReg[0][0];
 113       if (reg < 0) {
 114          return ZeroVec;
 115       }
 116    }
 117
 118    switch (source->File) {
 119    case PROGRAM_TEMPORARY:
 120       if (reg >= MAX_PROGRAM_TEMPS)
 121          return ZeroVec;
 122       return machine->Temporaries[reg];
 123
 124    case PROGRAM_INPUT:
 125       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 126          if (reg >= VERT_ATTRIB_MAX)
 127             return ZeroVec;
 128          return machine->VertAttribs[reg];
 129       }
 130       else {
 131          if (reg >= FRAG_ATTRIB_MAX)
 132             return ZeroVec;
 133          return machine->Attribs[reg][machine->CurElement];
 134       }
 135
 136    case PROGRAM_OUTPUT:
 137       if (reg >= MAX_PROGRAM_OUTPUTS)
 138          return ZeroVec;
 139       return machine->Outputs[reg];
 140
 141    case PROGRAM_LOCAL_PARAM:
 142       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 143          return ZeroVec;
 144       return machine->CurProgram->LocalParams[reg];
 145
 146    case PROGRAM_ENV_PARAM:
 147       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 148          return ZeroVec;
 149       return machine->EnvParams[reg];
 150
 151    case PROGRAM_STATE_VAR:
 152       /* Fallthrough */
 153    case PROGRAM_CONSTANT:
 154       /* Fallthrough */
 155    case PROGRAM_UNIFORM:
 156       /* Fallthrough */
 157    case PROGRAM_NAMED_PARAM:
 158       if (reg >= (GLint) prog->Parameters->NumParameters)
 159          return ZeroVec;
 160       return prog->Parameters->ParameterValues[reg];
 161
 162    default:
 163       _mesa_problem(NULL,
 164          "Invalid src register file %d in get_src_register_pointer()",
 165          source->File);
 166       return NULL;
 167    }
 168 }
 169
 170
 171 /**
 172  * Return a pointer to the 4-element float vector specified by the given
 173  * destination register.
 174  */
 175 static INLINE GLfloat *
 176 get_dst_register_pointer(const struct prog_dst_register *dest,
 177                          struct gl_program_machine *machine)
 178 {
 179    static GLfloat dummyReg[4];
 180    GLint reg = dest->Index;
 181
 182    if (dest->RelAddr) {
 183       /* add address register value to src index/offset */
 184       reg += machine->AddressReg[0][0];
 185       if (reg < 0) {
 186          return dummyReg;
 187       }
 188    }
 189
 190    switch (dest->File) {
 191    case PROGRAM_TEMPORARY:
 192       if (reg >= MAX_PROGRAM_TEMPS)
 193          return dummyReg;
 194       return machine->Temporaries[reg];
 195
 196    case PROGRAM_OUTPUT:
 197       if (reg >= MAX_PROGRAM_OUTPUTS)
 198          return dummyReg;
 199       return machine->Outputs[reg];
 200
 201    case PROGRAM_WRITE_ONLY:
 202       return dummyReg;
 203
 204    default:
 205       _mesa_problem(NULL,
 206          "Invalid dest register file %d in get_dst_register_pointer()",
 207          dest->File);
 208       return NULL;
 209    }
 210 }
 211
 212
 213
 214 /**
 215  * Fetch a 4-element float vector from the given source register.
 216  * Apply swizzling and negating as needed.
 217  */
 218 static void
 219 fetch_vector4(const struct prog_src_register *source,
 220               const struct gl_program_machine *machine, GLfloat result[4])
 221 {
 222    const GLfloat *src = get_src_register_pointer(source, machine);
 223    ASSERT(src);
 224
 225    if (source->Swizzle == SWIZZLE_NOOP) {
 226       /* no swizzling */
 227       COPY_4V(result, src);
 228    }
 229    else {
 230       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 231       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 232       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 233       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 234       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 235       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 236       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 237       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 238    }
 239
 240    if (source->Abs) {
 241       result[0] = FABSF(result[0]);
 242       result[1] = FABSF(result[1]);
 243       result[2] = FABSF(result[2]);
 244       result[3] = FABSF(result[3]);
 245    }
 246    if (source->Negate) {
 247       ASSERT(source->Negate == NEGATE_XYZW);
 248       result[0] = -result[0];
 249       result[1] = -result[1];
 250       result[2] = -result[2];
 251       result[3] = -result[3];
 252    }
 253
 254 #ifdef NAN_CHECK
 255    assert(!IS_INF_OR_NAN(result[0]));
 256    assert(!IS_INF_OR_NAN(result[0]));
 257    assert(!IS_INF_OR_NAN(result[0]));
 258    assert(!IS_INF_OR_NAN(result[0]));
 259 #endif
 260 }
 261
 262
 263 /**
 264  * Fetch a 4-element uint vector from the given source register.
 265  * Apply swizzling but not negation/abs.
 266  */
 267 static void
 268 fetch_vector4ui(const struct prog_src_register *source,
 269                 const struct gl_program_machine *machine, GLuint result[4])
 270 {
 271    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 272    ASSERT(src);
 273
 274    if (source->Swizzle == SWIZZLE_NOOP) {
 275       /* no swizzling */
 276       COPY_4V(result, src);
 277    }
 278    else {
 279       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 280       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 281       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 282       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 283       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 284       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 285       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 286       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 287    }
 288
 289    /* Note: no Negate or Abs here */
 290 }
 291
 292
 293
 294 /**
 295  * Fetch the derivative with respect to X or Y for the given register.
 296  * XXX this currently only works for fragment program input attribs.
 297  */
 298 static void
 299 fetch_vector4_deriv(GLcontext * ctx,
 300                     const struct prog_src_register *source,
 301                     const struct gl_program_machine *machine,
 302                     char xOrY, GLfloat result[4])
 303 {
 304    if (source->File == PROGRAM_INPUT &&
 305        source->Index < (GLint) machine->NumDeriv) {
 306       const GLint col = machine->CurElement;
 307       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 308       const GLfloat invQ = 1.0f / w;
 309       GLfloat deriv[4];
 310
 311       if (xOrY == 'X') {
 312          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 313          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 314          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 315          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 316       }
 317       else {
 318          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 319          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 320          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 321          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 322       }
 323
 324       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 325       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 326       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 327       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 328
 329       if (source->Abs) {
 330          result[0] = FABSF(result[0]);
 331          result[1] = FABSF(result[1]);
 332          result[2] = FABSF(result[2]);
 333          result[3] = FABSF(result[3]);
 334       }
 335       if (source->Negate) {
 336          ASSERT(source->Negate == NEGATE_XYZW);
 337          result[0] = -result[0];
 338          result[1] = -result[1];
 339          result[2] = -result[2];
 340          result[3] = -result[3];
 341       }
 342    }
 343    else {
 344       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 345    }
 346 }
 347
 348
 349 /**
 350  * As above, but only return result[0] element.
 351  */
 352 static void
 353 fetch_vector1(const struct prog_src_register *source,
 354               const struct gl_program_machine *machine, GLfloat result[4])
 355 {
 356    const GLfloat *src = get_src_register_pointer(source, machine);
 357    ASSERT(src);
 358
 359    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 360
 361    if (source->Abs) {
 362       result[0] = FABSF(result[0]);
 363    }
 364    if (source->Negate) {
 365       result[0] = -result[0];
 366    }
 367 }
 368
 369
 370 static GLuint
 371 fetch_vector1ui(const struct prog_src_register *source,
 372                 const struct gl_program_machine *machine)
 373 {
 374    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 375    return src[GET_SWZ(source->Swizzle, 0)];
 376 }
 377
 378
 379 /**
 380  * Fetch texel from texture.  Use partial derivatives when possible.
 381  */
 382 static INLINE void
 383 fetch_texel(GLcontext *ctx,
 384             const struct gl_program_machine *machine,
 385             const struct prog_instruction *inst,
 386             const GLfloat texcoord[4], GLfloat lodBias,
 387             GLfloat color[4])
 388 {
 389    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 390
 391    /* Note: we only have the right derivatives for fragment input attribs.
 392     */
 393    if (machine->NumDeriv > 0 &&
 394        inst->SrcReg[0].File == PROGRAM_INPUT &&
 395        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 396       /* simple texture fetch for which we should have derivatives */
 397       GLuint attr = inst->SrcReg[0].Index;
 398       machine->FetchTexelDeriv(ctx, texcoord,
 399                                machine->DerivX[attr],
 400                                machine->DerivY[attr],
 401                                lodBias, unit, color);
 402    }
 403    else {
 404       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 405    }
 406 }
 407
 408
 409 /**
 410  * Test value against zero and return GT, LT, EQ or UN if NaN.
 411  */
 412 static INLINE GLuint
 413 generate_cc(float value)
 414 {
 415    if (value != value)
 416       return COND_UN;           /* NaN */
 417    if (value > 0.0F)
 418       return COND_GT;
 419    if (value < 0.0F)
 420       return COND_LT;
 421    return COND_EQ;
 422 }
 423
 424
 425 /**
 426  * Test if the ccMaskRule is satisfied by the given condition code.
 427  * Used to mask destination writes according to the current condition code.
 428  */
 429 static INLINE GLboolean
 430 test_cc(GLuint condCode, GLuint ccMaskRule)
 431 {
 432    switch (ccMaskRule) {
 433    case COND_EQ: return (condCode == COND_EQ);
 434    case COND_NE: return (condCode != COND_EQ);
 435    case COND_LT: return (condCode == COND_LT);
 436    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 437    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 438    case COND_GT: return (condCode == COND_GT);
 439    case COND_TR: return GL_TRUE;
 440    case COND_FL: return GL_FALSE;
 441    default:      return GL_TRUE;
 442    }
 443 }
 444
 445
 446 /**
 447  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 448  * or GL_FALSE to indicate result.
 449  */
 450 static INLINE GLboolean
 451 eval_condition(const struct gl_program_machine *machine,
 452                const struct prog_instruction *inst)
 453 {
 454    const GLuint swizzle = inst->DstReg.CondSwizzle;
 455    const GLuint condMask = inst->DstReg.CondMask;
 456    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 457        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 458        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 459        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 460       return GL_TRUE;
 461    }
 462    else {
 463       return GL_FALSE;
 464    }
 465 }
 466
 467
 468
 469 /**
 470  * Store 4 floats into a register.  Observe the instructions saturate and
 471  * set-condition-code flags.
 472  */
 473 static void
 474 store_vector4(const struct prog_instruction *inst,
 475               struct gl_program_machine *machine, const GLfloat value[4])
 476 {
 477    const struct prog_dst_register *dstReg = &(inst->DstReg);
 478    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 479    GLuint writeMask = dstReg->WriteMask;
 480    GLfloat clampedValue[4];
 481    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 482
 483 #if 0
 484    if (value[0] > 1.0e10 ||
 485        IS_INF_OR_NAN(value[0]) ||
 486        IS_INF_OR_NAN(value[1]) ||
 487        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 488       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 489 #endif
 490
 491    if (clamp) {
 492       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 493       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 494       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 495       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 496       value = clampedValue;
 497    }
 498
 499    if (dstReg->CondMask != COND_TR) {
 500       /* condition codes may turn off some writes */
 501       if (writeMask & WRITEMASK_X) {
 502          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 503                       dstReg->CondMask))
 504             writeMask &= ~WRITEMASK_X;
 505       }
 506       if (writeMask & WRITEMASK_Y) {
 507          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 508                       dstReg->CondMask))
 509             writeMask &= ~WRITEMASK_Y;
 510       }
 511       if (writeMask & WRITEMASK_Z) {
 512          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 513                       dstReg->CondMask))
 514             writeMask &= ~WRITEMASK_Z;
 515       }
 516       if (writeMask & WRITEMASK_W) {
 517          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 518                       dstReg->CondMask))
 519             writeMask &= ~WRITEMASK_W;
 520       }
 521    }
 522
 523 #ifdef NAN_CHECK
 524    assert(!IS_INF_OR_NAN(value[0]));
 525    assert(!IS_INF_OR_NAN(value[0]));
 526    assert(!IS_INF_OR_NAN(value[0]));
 527    assert(!IS_INF_OR_NAN(value[0]));
 528 #endif
 529
 530    if (writeMask & WRITEMASK_X)
 531       dst[0] = value[0];
 532    if (writeMask & WRITEMASK_Y)
 533       dst[1] = value[1];
 534    if (writeMask & WRITEMASK_Z)
 535       dst[2] = value[2];
 536    if (writeMask & WRITEMASK_W)
 537       dst[3] = value[3];
 538
 539    if (inst->CondUpdate) {
 540       if (writeMask & WRITEMASK_X)
 541          machine->CondCodes[0] = generate_cc(value[0]);
 542       if (writeMask & WRITEMASK_Y)
 543          machine->CondCodes[1] = generate_cc(value[1]);
 544       if (writeMask & WRITEMASK_Z)
 545          machine->CondCodes[2] = generate_cc(value[2]);
 546       if (writeMask & WRITEMASK_W)
 547          machine->CondCodes[3] = generate_cc(value[3]);
 548 #if DEBUG_PROG
 549       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 550              _mesa_condcode_string(machine->CondCodes[0]),
 551              _mesa_condcode_string(machine->CondCodes[1]),
 552              _mesa_condcode_string(machine->CondCodes[2]),
 553              _mesa_condcode_string(machine->CondCodes[3]));
 554 #endif
 555    }
 556 }
 557
 558
 559 /**
 560  * Store 4 uints into a register.  Observe the set-condition-code flags.
 561  */
 562 static void
 563 store_vector4ui(const struct prog_instruction *inst,
 564                 struct gl_program_machine *machine, const GLuint value[4])
 565 {
 566    const struct prog_dst_register *dstReg = &(inst->DstReg);
 567    GLuint writeMask = dstReg->WriteMask;
 568    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 569
 570    if (dstReg->CondMask != COND_TR) {
 571       /* condition codes may turn off some writes */
 572       if (writeMask & WRITEMASK_X) {
 573          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 574                       dstReg->CondMask))
 575             writeMask &= ~WRITEMASK_X;
 576       }
 577       if (writeMask & WRITEMASK_Y) {
 578          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 579                       dstReg->CondMask))
 580             writeMask &= ~WRITEMASK_Y;
 581       }
 582       if (writeMask & WRITEMASK_Z) {
 583          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 584                       dstReg->CondMask))
 585             writeMask &= ~WRITEMASK_Z;
 586       }
 587       if (writeMask & WRITEMASK_W) {
 588          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 589                       dstReg->CondMask))
 590             writeMask &= ~WRITEMASK_W;
 591       }
 592    }
 593
 594    if (writeMask & WRITEMASK_X)
 595       dst[0] = value[0];
 596    if (writeMask & WRITEMASK_Y)
 597       dst[1] = value[1];
 598    if (writeMask & WRITEMASK_Z)
 599       dst[2] = value[2];
 600    if (writeMask & WRITEMASK_W)
 601       dst[3] = value[3];
 602
 603    if (inst->CondUpdate) {
 604       if (writeMask & WRITEMASK_X)
 605          machine->CondCodes[0] = generate_cc((float)value[0]);
 606       if (writeMask & WRITEMASK_Y)
 607          machine->CondCodes[1] = generate_cc((float)value[1]);
 608       if (writeMask & WRITEMASK_Z)
 609          machine->CondCodes[2] = generate_cc((float)value[2]);
 610       if (writeMask & WRITEMASK_W)
 611          machine->CondCodes[3] = generate_cc((float)value[3]);
 612 #if DEBUG_PROG
 613       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 614              _mesa_condcode_string(machine->CondCodes[0]),
 615              _mesa_condcode_string(machine->CondCodes[1]),
 616              _mesa_condcode_string(machine->CondCodes[2]),
 617              _mesa_condcode_string(machine->CondCodes[3]));
 618 #endif
 619    }
 620 }
 621
 622
 623
 624 /**
 625  * Execute the given vertex/fragment program.
 626  *
 627  * \param ctx  rendering context
 628  * \param program  the program to execute
 629  * \param machine  machine state (must be initialized)
 630  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 631  */
 632 GLboolean
 633 _mesa_execute_program(GLcontext * ctx,
 634                       const struct gl_program *program,
 635                       struct gl_program_machine *machine)
 636 {
 637    const GLuint numInst = program->NumInstructions;
 638    const GLuint maxExec = 10000;
 639    GLuint pc, numExec = 0;
 640
 641    machine->CurProgram = program;
 642
 643    if (DEBUG_PROG) {
 644       printf("execute program %u --------------------\n", program->Id);
 645    }
 646
 647    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 648       machine->EnvParams = ctx->VertexProgram.Parameters;
 649    }
 650    else {
 651       machine->EnvParams = ctx->FragmentProgram.Parameters;
 652    }
 653
 654    for (pc = 0; pc < numInst; pc++) {
 655       const struct prog_instruction *inst = program->Instructions + pc;
 656
 657       if (DEBUG_PROG) {
 658          _mesa_print_instruction(inst);
 659       }
 660
 661       switch (inst->Opcode) {
 662       case OPCODE_ABS:
 663          {
 664             GLfloat a[4], result[4];
 665             fetch_vector4(&inst->SrcReg[0], machine, a);
 666             result[0] = FABSF(a[0]);
 667             result[1] = FABSF(a[1]);
 668             result[2] = FABSF(a[2]);
 669             result[3] = FABSF(a[3]);
 670             store_vector4(inst, machine, result);
 671          }
 672          break;
 673       case OPCODE_ADD:
 674          {
 675             GLfloat a[4], b[4], result[4];
 676             fetch_vector4(&inst->SrcReg[0], machine, a);
 677             fetch_vector4(&inst->SrcReg[1], machine, b);
 678             result[0] = a[0] + b[0];
 679             result[1] = a[1] + b[1];
 680             result[2] = a[2] + b[2];
 681             result[3] = a[3] + b[3];
 682             store_vector4(inst, machine, result);
 683             if (DEBUG_PROG) {
 684                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 685                       result[0], result[1], result[2], result[3],
 686                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 687             }
 688          }
 689          break;
 690       case OPCODE_AND:     /* bitwise AND */
 691          {
 692             GLuint a[4], b[4], result[4];
 693             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 694             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 695             result[0] = a[0] & b[0];
 696             result[1] = a[1] & b[1];
 697             result[2] = a[2] & b[2];
 698             result[3] = a[3] & b[3];
 699             store_vector4ui(inst, machine, result);
 700          }
 701          break;
 702       case OPCODE_ARL:
 703          {
 704             GLfloat t[4];
 705             fetch_vector4(&inst->SrcReg[0], machine, t);
 706             machine->AddressReg[0][0] = IFLOOR(t[0]);
 707             if (DEBUG_PROG) {
 708                printf("ARL %d\n", machine->AddressReg[0][0]);
 709             }
 710          }
 711          break;
 712       case OPCODE_BGNLOOP:
 713          /* no-op */
 714          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 715                 == OPCODE_ENDLOOP);
 716          break;
 717       case OPCODE_ENDLOOP:
 718          /* subtract 1 here since pc is incremented by for(pc) loop */
 719          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 720                 == OPCODE_BGNLOOP);
 721          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 722          break;
 723       case OPCODE_BGNSUB:      /* begin subroutine */
 724          break;
 725       case OPCODE_ENDSUB:      /* end subroutine */
 726          break;
 727       case OPCODE_BRA:         /* branch (conditional) */
 728          if (eval_condition(machine, inst)) {
 729             /* take branch */
 730             /* Subtract 1 here since we'll do pc++ below */
 731             pc = inst->BranchTarget - 1;
 732          }
 733          break;
 734       case OPCODE_BRK:         /* break out of loop (conditional) */
 735          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 736                 == OPCODE_ENDLOOP);
 737          if (eval_condition(machine, inst)) {
 738             /* break out of loop */
 739             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 740             pc = inst->BranchTarget;
 741          }
 742          break;
 743       case OPCODE_CONT:        /* continue loop (conditional) */
 744          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 745                 == OPCODE_ENDLOOP);
 746          if (eval_condition(machine, inst)) {
 747             /* continue at ENDLOOP */
 748             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 749             pc = inst->BranchTarget - 1;
 750          }
 751          break;
 752       case OPCODE_CAL:         /* Call subroutine (conditional) */
 753          if (eval_condition(machine, inst)) {
 754             /* call the subroutine */
 755             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 756                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 757             }
 758             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 759             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 760             pc = inst->BranchTarget - 1;
 761          }
 762          break;
 763       case OPCODE_CMP:
 764          {
 765             GLfloat a[4], b[4], c[4], result[4];
 766             fetch_vector4(&inst->SrcReg[0], machine, a);
 767             fetch_vector4(&inst->SrcReg[1], machine, b);
 768             fetch_vector4(&inst->SrcReg[2], machine, c);
 769             result[0] = a[0] < 0.0F ? b[0] : c[0];
 770             result[1] = a[1] < 0.0F ? b[1] : c[1];
 771             result[2] = a[2] < 0.0F ? b[2] : c[2];
 772             result[3] = a[3] < 0.0F ? b[3] : c[3];
 773             store_vector4(inst, machine, result);
 774          }
 775          break;
 776       case OPCODE_COS:
 777          {
 778             GLfloat a[4], result[4];
 779             fetch_vector1(&inst->SrcReg[0], machine, a);
 780             result[0] = result[1] = result[2] = result[3]
 781                = (GLfloat) cos(a[0]);
 782             store_vector4(inst, machine, result);
 783          }
 784          break;
 785       case OPCODE_DDX:         /* Partial derivative with respect to X */
 786          {
 787             GLfloat result[4];
 788             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 789                                 'X', result);
 790             store_vector4(inst, machine, result);
 791          }
 792          break;
 793       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 794          {
 795             GLfloat result[4];
 796             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 797                                 'Y', result);
 798             store_vector4(inst, machine, result);
 799          }
 800          break;
 801       case OPCODE_DP2:
 802          {
 803             GLfloat a[4], b[4], result[4];
 804             fetch_vector4(&inst->SrcReg[0], machine, a);
 805             fetch_vector4(&inst->SrcReg[1], machine, b);
 806             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 807             store_vector4(inst, machine, result);
 808             if (DEBUG_PROG) {
 809                printf("DP2 %g = (%g %g) . (%g %g)\n",
 810                       result[0], a[0], a[1], b[0], b[1]);
 811             }
 812          }
 813          break;
 814       case OPCODE_DP2A:
 815          {
 816             GLfloat a[4], b[4], c, result[4];
 817             fetch_vector4(&inst->SrcReg[0], machine, a);
 818             fetch_vector4(&inst->SrcReg[1], machine, b);
 819             fetch_vector1(&inst->SrcReg[1], machine, &c);
 820             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 821             store_vector4(inst, machine, result);
 822             if (DEBUG_PROG) {
 823                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 824                       result[0], a[0], a[1], b[0], b[1], c);
 825             }
 826          }
 827          break;
 828       case OPCODE_DP3:
 829          {
 830             GLfloat a[4], b[4], result[4];
 831             fetch_vector4(&inst->SrcReg[0], machine, a);
 832             fetch_vector4(&inst->SrcReg[1], machine, b);
 833             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 834             store_vector4(inst, machine, result);
 835             if (DEBUG_PROG) {
 836                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 837                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 838             }
 839          }
 840          break;
 841       case OPCODE_DP4:
 842          {
 843             GLfloat a[4], b[4], result[4];
 844             fetch_vector4(&inst->SrcReg[0], machine, a);
 845             fetch_vector4(&inst->SrcReg[1], machine, b);
 846             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 847             store_vector4(inst, machine, result);
 848             if (DEBUG_PROG) {
 849                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 850                       result[0], a[0], a[1], a[2], a[3],
 851                       b[0], b[1], b[2], b[3]);
 852             }
 853          }
 854          break;
 855       case OPCODE_DPH:
 856          {
 857             GLfloat a[4], b[4], result[4];
 858             fetch_vector4(&inst->SrcReg[0], machine, a);
 859             fetch_vector4(&inst->SrcReg[1], machine, b);
 860             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 861             store_vector4(inst, machine, result);
 862          }
 863          break;
 864       case OPCODE_DST:         /* Distance vector */
 865          {
 866             GLfloat a[4], b[4], result[4];
 867             fetch_vector4(&inst->SrcReg[0], machine, a);
 868             fetch_vector4(&inst->SrcReg[1], machine, b);
 869             result[0] = 1.0F;
 870             result[1] = a[1] * b[1];
 871             result[2] = a[2];
 872             result[3] = b[3];
 873             store_vector4(inst, machine, result);
 874          }
 875          break;
 876       case OPCODE_EXP:
 877          {
 878             GLfloat t[4], q[4], floor_t0;
 879             fetch_vector1(&inst->SrcReg[0], machine, t);
 880             floor_t0 = FLOORF(t[0]);
 881             if (floor_t0 > FLT_MAX_EXP) {
 882                SET_POS_INFINITY(q[0]);
 883                SET_POS_INFINITY(q[2]);
 884             }
 885             else if (floor_t0 < FLT_MIN_EXP) {
 886                q[0] = 0.0F;
 887                q[2] = 0.0F;
 888             }
 889             else {
 890                q[0] = LDEXPF(1.0, (int) floor_t0);
 891                /* Note: GL_NV_vertex_program expects
 892                 * result.z = result.x * APPX(result.y)
 893                 * We do what the ARB extension says.
 894                 */
 895                q[2] = (GLfloat) pow(2.0, t[0]);
 896             }
 897             q[1] = t[0] - floor_t0;
 898             q[3] = 1.0F;
 899             store_vector4( inst, machine, q );
 900          }
 901          break;
 902       case OPCODE_EX2:         /* Exponential base 2 */
 903          {
 904             GLfloat a[4], result[4], val;
 905             fetch_vector1(&inst->SrcReg[0], machine, a);
 906             val = (GLfloat) pow(2.0, a[0]);
 907             /*
 908             if (IS_INF_OR_NAN(val))
 909                val = 1.0e10;
 910             */
 911             result[0] = result[1] = result[2] = result[3] = val;
 912             store_vector4(inst, machine, result);
 913          }
 914          break;
 915       case OPCODE_FLR:
 916          {
 917             GLfloat a[4], result[4];
 918             fetch_vector4(&inst->SrcReg[0], machine, a);
 919             result[0] = FLOORF(a[0]);
 920             result[1] = FLOORF(a[1]);
 921             result[2] = FLOORF(a[2]);
 922             result[3] = FLOORF(a[3]);
 923             store_vector4(inst, machine, result);
 924          }
 925          break;
 926       case OPCODE_FRC:
 927          {
 928             GLfloat a[4], result[4];
 929             fetch_vector4(&inst->SrcReg[0], machine, a);
 930             result[0] = a[0] - FLOORF(a[0]);
 931             result[1] = a[1] - FLOORF(a[1]);
 932             result[2] = a[2] - FLOORF(a[2]);
 933             result[3] = a[3] - FLOORF(a[3]);
 934             store_vector4(inst, machine, result);
 935          }
 936          break;
 937       case OPCODE_IF:
 938          {
 939             GLboolean cond;
 940             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 941                    == OPCODE_ELSE ||
 942                    program->Instructions[inst->BranchTarget].Opcode
 943                    == OPCODE_ENDIF);
 944             /* eval condition */
 945             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 946                GLfloat a[4];
 947                fetch_vector1(&inst->SrcReg[0], machine, a);
 948                cond = (a[0] != 0.0);
 949             }
 950             else {
 951                cond = eval_condition(machine, inst);
 952             }
 953             if (DEBUG_PROG) {
 954                printf("IF: %d\n", cond);
 955             }
 956             /* do if/else */
 957             if (cond) {
 958                /* do if-clause (just continue execution) */
 959             }
 960             else {
 961                /* go to the instruction after ELSE or ENDIF */
 962                assert(inst->BranchTarget >= 0);
 963                pc = inst->BranchTarget;
 964             }
 965          }
 966          break;
 967       case OPCODE_ELSE:
 968          /* goto ENDIF */
 969          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 970                 == OPCODE_ENDIF);
 971          assert(inst->BranchTarget >= 0);
 972          pc = inst->BranchTarget;
 973          break;
 974       case OPCODE_ENDIF:
 975          /* nothing */
 976          break;
 977       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 978          if (eval_condition(machine, inst)) {
 979             return GL_FALSE;
 980          }
 981          break;
 982       case OPCODE_KIL:         /* ARB_f_p only */
 983          {
 984             GLfloat a[4];
 985             fetch_vector4(&inst->SrcReg[0], machine, a);
 986             if (DEBUG_PROG) {
 987                printf("KIL if (%g %g %g %g) <= 0.0\n",
 988                       a[0], a[1], a[2], a[3]);
 989             }
 990
 991             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 992                return GL_FALSE;
 993             }
 994          }
 995          break;
 996       case OPCODE_LG2:         /* log base 2 */
 997          {
 998             GLfloat a[4], result[4], val;
 999             fetch_vector1(&inst->SrcReg[0], machine, a);
1000             /* The fast LOG2 macro doesn't meet the precision requirements.
1001              */
1002             if (a[0] == 0.0F) {
1003                val = -FLT_MAX;
1004             }
1005             else {
1006                val = (float)(log(a[0]) * 1.442695F);
1007             }
1008             result[0] = result[1] = result[2] = result[3] = val;
1009             store_vector4(inst, machine, result);
1010          }
1011          break;
1012       case OPCODE_LIT:
1013          {
1014             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
1015             GLfloat a[4], result[4];
1016             fetch_vector4(&inst->SrcReg[0], machine, a);
1017             a[0] = MAX2(a[0], 0.0F);
1018             a[1] = MAX2(a[1], 0.0F);
1019             /* XXX ARB version clamps a[3], NV version doesn't */
1020             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1021             result[0] = 1.0F;
1022             result[1] = a[0];
1023             /* XXX we could probably just use pow() here */
1024             if (a[0] > 0.0F) {
1025                if (a[1] == 0.0 && a[3] == 0.0)
1026                   result[2] = 1.0F;
1027                else
1028                   result[2] = (GLfloat) pow(a[1], a[3]);
1029             }
1030             else {
1031                result[2] = 0.0F;
1032             }
1033             result[3] = 1.0F;
1034             store_vector4(inst, machine, result);
1035             if (DEBUG_PROG) {
1036                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1037                       result[0], result[1], result[2], result[3],
1038                       a[0], a[1], a[2], a[3]);
1039             }
1040          }
1041          break;
1042       case OPCODE_LOG:
1043          {
1044             GLfloat t[4], q[4], abs_t0;
1045             fetch_vector1(&inst->SrcReg[0], machine, t);
1046             abs_t0 = FABSF(t[0]);
1047             if (abs_t0 != 0.0F) {
1048                /* Since we really can't handle infinite values on VMS
1049                 * like other OSes we'll use __MAXFLOAT to represent
1050                 * infinity.  This may need some tweaking.
1051                 */
1052 #ifdef VMS
1053                if (abs_t0 == __MAXFLOAT)
1054 #else
1055                if (IS_INF_OR_NAN(abs_t0))
1056 #endif
1057                {
1058                   SET_POS_INFINITY(q[0]);
1059                   q[1] = 1.0F;
1060                   SET_POS_INFINITY(q[2]);
1061                }
1062                else {
1063                   int exponent;
1064                   GLfloat mantissa = FREXPF(t[0], &exponent);
1065                   q[0] = (GLfloat) (exponent - 1);
1066                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1067
1068                   /* The fast LOG2 macro doesn't meet the precision
1069                    * requirements.
1070                    */
1071                   q[2] = (float)(log(t[0]) * 1.442695F);
1072                }
1073             }
1074             else {
1075                SET_NEG_INFINITY(q[0]);
1076                q[1] = 1.0F;
1077                SET_NEG_INFINITY(q[2]);
1078             }
1079             q[3] = 1.0;
1080             store_vector4(inst, machine, q);
1081          }
1082          break;
1083       case OPCODE_LRP:
1084          {
1085             GLfloat a[4], b[4], c[4], result[4];
1086             fetch_vector4(&inst->SrcReg[0], machine, a);
1087             fetch_vector4(&inst->SrcReg[1], machine, b);
1088             fetch_vector4(&inst->SrcReg[2], machine, c);
1089             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1090             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1091             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1092             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1093             store_vector4(inst, machine, result);
1094             if (DEBUG_PROG) {
1095                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1096                       "(%g %g %g %g), (%g %g %g %g)\n",
1097                       result[0], result[1], result[2], result[3],
1098                       a[0], a[1], a[2], a[3],
1099                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1100             }
1101          }
1102          break;
1103       case OPCODE_MAD:
1104          {
1105             GLfloat a[4], b[4], c[4], result[4];
1106             fetch_vector4(&inst->SrcReg[0], machine, a);
1107             fetch_vector4(&inst->SrcReg[1], machine, b);
1108             fetch_vector4(&inst->SrcReg[2], machine, c);
1109             result[0] = a[0] * b[0] + c[0];
1110             result[1] = a[1] * b[1] + c[1];
1111             result[2] = a[2] * b[2] + c[2];
1112             result[3] = a[3] * b[3] + c[3];
1113             store_vector4(inst, machine, result);
1114             if (DEBUG_PROG) {
1115                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1116                       "(%g %g %g %g) + (%g %g %g %g)\n",
1117                       result[0], result[1], result[2], result[3],
1118                       a[0], a[1], a[2], a[3],
1119                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1120             }
1121          }
1122          break;
1123       case OPCODE_MAX:
1124          {
1125             GLfloat a[4], b[4], result[4];
1126             fetch_vector4(&inst->SrcReg[0], machine, a);
1127             fetch_vector4(&inst->SrcReg[1], machine, b);
1128             result[0] = MAX2(a[0], b[0]);
1129             result[1] = MAX2(a[1], b[1]);
1130             result[2] = MAX2(a[2], b[2]);
1131             result[3] = MAX2(a[3], b[3]);
1132             store_vector4(inst, machine, result);
1133             if (DEBUG_PROG) {
1134                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1135                       result[0], result[1], result[2], result[3],
1136                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1137             }
1138          }
1139          break;
1140       case OPCODE_MIN:
1141          {
1142             GLfloat a[4], b[4], result[4];
1143             fetch_vector4(&inst->SrcReg[0], machine, a);
1144             fetch_vector4(&inst->SrcReg[1], machine, b);
1145             result[0] = MIN2(a[0], b[0]);
1146             result[1] = MIN2(a[1], b[1]);
1147             result[2] = MIN2(a[2], b[2]);
1148             result[3] = MIN2(a[3], b[3]);
1149             store_vector4(inst, machine, result);
1150          }
1151          break;
1152       case OPCODE_MOV:
1153          {
1154             GLfloat result[4];
1155             fetch_vector4(&inst->SrcReg[0], machine, result);
1156             store_vector4(inst, machine, result);
1157             if (DEBUG_PROG) {
1158                printf("MOV (%g %g %g %g)\n",
1159                       result[0], result[1], result[2], result[3]);
1160             }
1161          }
1162          break;
1163       case OPCODE_MUL:
1164          {
1165             GLfloat a[4], b[4], result[4];
1166             fetch_vector4(&inst->SrcReg[0], machine, a);
1167             fetch_vector4(&inst->SrcReg[1], machine, b);
1168             result[0] = a[0] * b[0];
1169             result[1] = a[1] * b[1];
1170             result[2] = a[2] * b[2];
1171             result[3] = a[3] * b[3];
1172             store_vector4(inst, machine, result);
1173             if (DEBUG_PROG) {
1174                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1175                       result[0], result[1], result[2], result[3],
1176                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1177             }
1178          }
1179          break;
1180       case OPCODE_NOISE1:
1181          {
1182             GLfloat a[4], result[4];
1183             fetch_vector1(&inst->SrcReg[0], machine, a);
1184             result[0] =
1185                result[1] =
1186                result[2] =
1187                result[3] = _mesa_noise1(a[0]);
1188             store_vector4(inst, machine, result);
1189          }
1190          break;
1191       case OPCODE_NOISE2:
1192          {
1193             GLfloat a[4], result[4];
1194             fetch_vector4(&inst->SrcReg[0], machine, a);
1195             result[0] =
1196                result[1] =
1197                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1198             store_vector4(inst, machine, result);
1199          }
1200          break;
1201       case OPCODE_NOISE3:
1202          {
1203             GLfloat a[4], result[4];
1204             fetch_vector4(&inst->SrcReg[0], machine, a);
1205             result[0] =
1206                result[1] =
1207                result[2] =
1208                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1209             store_vector4(inst, machine, result);
1210          }
1211          break;
1212       case OPCODE_NOISE4:
1213          {
1214             GLfloat a[4], result[4];
1215             fetch_vector4(&inst->SrcReg[0], machine, a);
1216             result[0] =
1217                result[1] =
1218                result[2] =
1219                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1220             store_vector4(inst, machine, result);
1221          }
1222          break;
1223       case OPCODE_NOP:
1224          break;
1225       case OPCODE_NOT:         /* bitwise NOT */
1226          {
1227             GLuint a[4], result[4];
1228             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1229             result[0] = ~a[0];
1230             result[1] = ~a[1];
1231             result[2] = ~a[2];
1232             result[3] = ~a[3];
1233             store_vector4ui(inst, machine, result);
1234          }
1235          break;
1236       case OPCODE_NRM3:        /* 3-component normalization */
1237          {
1238             GLfloat a[4], result[4];
1239             GLfloat tmp;
1240             fetch_vector4(&inst->SrcReg[0], machine, a);
1241             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1242             if (tmp != 0.0F)
1243                tmp = INV_SQRTF(tmp);
1244             result[0] = tmp * a[0];
1245             result[1] = tmp * a[1];
1246             result[2] = tmp * a[2];
1247             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1248             store_vector4(inst, machine, result);
1249          }
1250          break;
1251       case OPCODE_NRM4:        /* 4-component normalization */
1252          {
1253             GLfloat a[4], result[4];
1254             GLfloat tmp;
1255             fetch_vector4(&inst->SrcReg[0], machine, a);
1256             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1257             if (tmp != 0.0F)
1258                tmp = INV_SQRTF(tmp);
1259             result[0] = tmp * a[0];
1260             result[1] = tmp * a[1];
1261             result[2] = tmp * a[2];
1262             result[3] = tmp * a[3];
1263             store_vector4(inst, machine, result);
1264          }
1265          break;
1266       case OPCODE_OR:          /* bitwise OR */
1267          {
1268             GLuint a[4], b[4], result[4];
1269             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1270             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1271             result[0] = a[0] | b[0];
1272             result[1] = a[1] | b[1];
1273             result[2] = a[2] | b[2];
1274             result[3] = a[3] | b[3];
1275             store_vector4ui(inst, machine, result);
1276          }
1277          break;
1278       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1279          {
1280             GLfloat a[4];
1281             GLuint result[4];
1282             GLhalfNV hx, hy;
1283             fetch_vector4(&inst->SrcReg[0], machine, a);
1284             hx = _mesa_float_to_half(a[0]);
1285             hy = _mesa_float_to_half(a[1]);
1286             result[0] =
1287             result[1] =
1288             result[2] =
1289             result[3] = hx | (hy << 16);
1290             store_vector4ui(inst, machine, result);
1291          }
1292          break;
1293       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1294          {
1295             GLfloat a[4];
1296             GLuint result[4], usx, usy;
1297             fetch_vector4(&inst->SrcReg[0], machine, a);
1298             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1299             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1300             usx = IROUND(a[0] * 65535.0F);
1301             usy = IROUND(a[1] * 65535.0F);
1302             result[0] =
1303             result[1] =
1304             result[2] =
1305             result[3] = usx | (usy << 16);
1306             store_vector4ui(inst, machine, result);
1307          }
1308          break;
1309       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1310          {
1311             GLfloat a[4];
1312             GLuint result[4], ubx, uby, ubz, ubw;
1313             fetch_vector4(&inst->SrcReg[0], machine, a);
1314             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1315             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1316             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1317             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1318             ubx = IROUND(127.0F * a[0] + 128.0F);
1319             uby = IROUND(127.0F * a[1] + 128.0F);
1320             ubz = IROUND(127.0F * a[2] + 128.0F);
1321             ubw = IROUND(127.0F * a[3] + 128.0F);
1322             result[0] =
1323             result[1] =
1324             result[2] =
1325             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1326             store_vector4ui(inst, machine, result);
1327          }
1328          break;
1329       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1330          {
1331             GLfloat a[4];
1332             GLuint result[4], ubx, uby, ubz, ubw;
1333             fetch_vector4(&inst->SrcReg[0], machine, a);
1334             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1335             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1336             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1337             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1338             ubx = IROUND(255.0F * a[0]);
1339             uby = IROUND(255.0F * a[1]);
1340             ubz = IROUND(255.0F * a[2]);
1341             ubw = IROUND(255.0F * a[3]);
1342             result[0] =
1343             result[1] =
1344             result[2] =
1345             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1346             store_vector4ui(inst, machine, result);
1347          }
1348          break;
1349       case OPCODE_POW:
1350          {
1351             GLfloat a[4], b[4], result[4];
1352             fetch_vector1(&inst->SrcReg[0], machine, a);
1353             fetch_vector1(&inst->SrcReg[1], machine, b);
1354             result[0] = result[1] = result[2] = result[3]
1355                = (GLfloat) pow(a[0], b[0]);
1356             store_vector4(inst, machine, result);
1357          }
1358          break;
1359       case OPCODE_RCC:  /* clamped riciprocal */
1360          {
1361             const float largest = 1.884467e+19, smallest = 5.42101e-20;
1362             GLfloat a[4], r, result[4];
1363             fetch_vector1(&inst->SrcReg[0], machine, a);
1364             if (DEBUG_PROG) {
1365                if (a[0] == 0)
1366                   printf("RCC(0)\n");
1367                else if (IS_INF_OR_NAN(a[0]))
1368                   printf("RCC(inf)\n");
1369             }
1370             if (a[0] == 1.0F) {
1371                r = 1.0F;
1372             }
1373             else {
1374                r = 1.0F / a[0];
1375             }
1376             if (positive(r)) {
1377                if (r > largest) {
1378                   r = largest;
1379                }
1380                else if (r < smallest) {
1381                   r = smallest;
1382                }
1383             }
1384             else {
1385                if (r < -largest) {
1386                   r = -largest;
1387                }
1388                else if (r > -smallest) {
1389                   r = -smallest;
1390                }
1391             }
1392             result[0] = result[1] = result[2] = result[3] = r;
1393             store_vector4(inst, machine, result);
1394          }
1395          break;
1396
1397       case OPCODE_RCP:
1398          {
1399             GLfloat a[4], result[4];
1400             fetch_vector1(&inst->SrcReg[0], machine, a);
1401             if (DEBUG_PROG) {
1402                if (a[0] == 0)
1403                   printf("RCP(0)\n");
1404                else if (IS_INF_OR_NAN(a[0]))
1405                   printf("RCP(inf)\n");
1406             }
1407             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1408             store_vector4(inst, machine, result);
1409          }
1410          break;
1411       case OPCODE_RET:         /* return from subroutine (conditional) */
1412          if (eval_condition(machine, inst)) {
1413             if (machine->StackDepth == 0) {
1414                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1415             }
1416             /* subtract one because of pc++ in the for loop */
1417             pc = machine->CallStack[--machine->StackDepth] - 1;
1418          }
1419          break;
1420       case OPCODE_RFL:         /* reflection vector */
1421          {
1422             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1423             fetch_vector4(&inst->SrcReg[0], machine, axis);
1424             fetch_vector4(&inst->SrcReg[1], machine, dir);
1425             tmpW = DOT3(axis, axis);
1426             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1427             result[0] = tmpX * axis[0] - dir[0];
1428             result[1] = tmpX * axis[1] - dir[1];
1429             result[2] = tmpX * axis[2] - dir[2];
1430             /* result[3] is never written! XXX enforce in parser! */
1431             store_vector4(inst, machine, result);
1432          }
1433          break;
1434       case OPCODE_RSQ:         /* 1 / sqrt() */
1435          {
1436             GLfloat a[4], result[4];
1437             fetch_vector1(&inst->SrcReg[0], machine, a);
1438             a[0] = FABSF(a[0]);
1439             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1440             store_vector4(inst, machine, result);
1441             if (DEBUG_PROG) {
1442                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1443             }
1444          }
1445          break;
1446       case OPCODE_SCS:         /* sine and cos */
1447          {
1448             GLfloat a[4], result[4];
1449             fetch_vector1(&inst->SrcReg[0], machine, a);
1450             result[0] = (GLfloat) cos(a[0]);
1451             result[1] = (GLfloat) sin(a[0]);
1452             result[2] = 0.0;    /* undefined! */
1453             result[3] = 0.0;    /* undefined! */
1454             store_vector4(inst, machine, result);
1455          }
1456          break;
1457       case OPCODE_SEQ:         /* set on equal */
1458          {
1459             GLfloat a[4], b[4], result[4];
1460             fetch_vector4(&inst->SrcReg[0], machine, a);
1461             fetch_vector4(&inst->SrcReg[1], machine, b);
1462             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1463             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1464             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1465             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1466             store_vector4(inst, machine, result);
1467             if (DEBUG_PROG) {
1468                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1469                       result[0], result[1], result[2], result[3],
1470                       a[0], a[1], a[2], a[3],
1471                       b[0], b[1], b[2], b[3]);
1472             }
1473          }
1474          break;
1475       case OPCODE_SFL:         /* set false, operands ignored */
1476          {
1477             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1478             store_vector4(inst, machine, result);
1479          }
1480          break;
1481       case OPCODE_SGE:         /* set on greater or equal */
1482          {
1483             GLfloat a[4], b[4], result[4];
1484             fetch_vector4(&inst->SrcReg[0], machine, a);
1485             fetch_vector4(&inst->SrcReg[1], machine, b);
1486             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1487             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1488             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1489             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1490             store_vector4(inst, machine, result);
1491             if (DEBUG_PROG) {
1492                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1493                       result[0], result[1], result[2], result[3],
1494                       a[0], a[1], a[2], a[3],
1495                       b[0], b[1], b[2], b[3]);
1496             }
1497          }
1498          break;
1499       case OPCODE_SGT:         /* set on greater */
1500          {
1501             GLfloat a[4], b[4], result[4];
1502             fetch_vector4(&inst->SrcReg[0], machine, a);
1503             fetch_vector4(&inst->SrcReg[1], machine, b);
1504             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1505             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1506             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1507             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1508             store_vector4(inst, machine, result);
1509             if (DEBUG_PROG) {
1510                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1511                       result[0], result[1], result[2], result[3],
1512                       a[0], a[1], a[2], a[3],
1513                       b[0], b[1], b[2], b[3]);
1514             }
1515          }
1516          break;
1517       case OPCODE_SIN:
1518          {
1519             GLfloat a[4], result[4];
1520             fetch_vector1(&inst->SrcReg[0], machine, a);
1521             result[0] = result[1] = result[2] = result[3]
1522                = (GLfloat) sin(a[0]);
1523             store_vector4(inst, machine, result);
1524          }
1525          break;
1526       case OPCODE_SLE:         /* set on less or equal */
1527          {
1528             GLfloat a[4], b[4], result[4];
1529             fetch_vector4(&inst->SrcReg[0], machine, a);
1530             fetch_vector4(&inst->SrcReg[1], machine, b);
1531             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1532             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1533             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1534             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1535             store_vector4(inst, machine, result);
1536             if (DEBUG_PROG) {
1537                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1538                       result[0], result[1], result[2], result[3],
1539                       a[0], a[1], a[2], a[3],
1540                       b[0], b[1], b[2], b[3]);
1541             }
1542          }
1543          break;
1544       case OPCODE_SLT:         /* set on less */
1545          {
1546             GLfloat a[4], b[4], result[4];
1547             fetch_vector4(&inst->SrcReg[0], machine, a);
1548             fetch_vector4(&inst->SrcReg[1], machine, b);
1549             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1550             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1551             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1552             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1553             store_vector4(inst, machine, result);
1554             if (DEBUG_PROG) {
1555                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1556                       result[0], result[1], result[2], result[3],
1557                       a[0], a[1], a[2], a[3],
1558                       b[0], b[1], b[2], b[3]);
1559             }
1560          }
1561          break;
1562       case OPCODE_SNE:         /* set on not equal */
1563          {
1564             GLfloat a[4], b[4], result[4];
1565             fetch_vector4(&inst->SrcReg[0], machine, a);
1566             fetch_vector4(&inst->SrcReg[1], machine, b);
1567             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1568             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1569             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1570             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1571             store_vector4(inst, machine, result);
1572             if (DEBUG_PROG) {
1573                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1574                       result[0], result[1], result[2], result[3],
1575                       a[0], a[1], a[2], a[3],
1576                       b[0], b[1], b[2], b[3]);
1577             }
1578          }
1579          break;
1580       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1581          {
1582             GLfloat a[4], result[4];
1583             fetch_vector4(&inst->SrcReg[0], machine, a);
1584             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1585             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1586             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1587             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1588             store_vector4(inst, machine, result);
1589          }
1590          break;
1591       case OPCODE_STR:         /* set true, operands ignored */
1592          {
1593             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1594             store_vector4(inst, machine, result);
1595          }
1596          break;
1597       case OPCODE_SUB:
1598          {
1599             GLfloat a[4], b[4], result[4];
1600             fetch_vector4(&inst->SrcReg[0], machine, a);
1601             fetch_vector4(&inst->SrcReg[1], machine, b);
1602             result[0] = a[0] - b[0];
1603             result[1] = a[1] - b[1];
1604             result[2] = a[2] - b[2];
1605             result[3] = a[3] - b[3];
1606             store_vector4(inst, machine, result);
1607             if (DEBUG_PROG) {
1608                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1609                       result[0], result[1], result[2], result[3],
1610                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1611             }
1612          }
1613          break;
1614       case OPCODE_SWZ:         /* extended swizzle */
1615          {
1616             const struct prog_src_register *source = &inst->SrcReg[0];
1617             const GLfloat *src = get_src_register_pointer(source, machine);
1618             GLfloat result[4];
1619             GLuint i;
1620             for (i = 0; i < 4; i++) {
1621                const GLuint swz = GET_SWZ(source->Swizzle, i);
1622                if (swz == SWIZZLE_ZERO)
1623                   result[i] = 0.0;
1624                else if (swz == SWIZZLE_ONE)
1625                   result[i] = 1.0;
1626                else {
1627                   ASSERT(swz >= 0);
1628                   ASSERT(swz <= 3);
1629                   result[i] = src[swz];
1630                }
1631                if (source->Negate & (1 << i))
1632                   result[i] = -result[i];
1633             }
1634             store_vector4(inst, machine, result);
1635          }
1636          break;
1637       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1638          /* Simple texel lookup */
1639          {
1640             GLfloat texcoord[4], color[4];
1641             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1642
1643             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1644
1645             if (DEBUG_PROG) {
1646                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1647                       color[0], color[1], color[2], color[3],
1648                       inst->TexSrcUnit,
1649                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1650             }
1651             store_vector4(inst, machine, color);
1652          }
1653          break;
1654       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1655          /* Texel lookup with LOD bias */
1656          {
1657             GLfloat texcoord[4], color[4], lodBias;
1658
1659             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1660
1661             /* texcoord[3] is the bias to add to lambda */
1662             lodBias = texcoord[3];
1663
1664             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1665
1666             store_vector4(inst, machine, color);
1667          }
1668          break;
1669       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1670          /* Texture lookup w/ partial derivatives for LOD */
1671          {
1672             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1673             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1674             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1675             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1676             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1677                                      0.0, /* lodBias */
1678                                      inst->TexSrcUnit, color);
1679             store_vector4(inst, machine, color);
1680          }
1681          break;
1682       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1683          /* Texture lookup w/ projective divide */
1684          {
1685             GLfloat texcoord[4], color[4];
1686
1687             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1688             /* Not so sure about this test - if texcoord[3] is
1689              * zero, we'd probably be fine except for an ASSERT in
1690              * IROUND_POS() which gets triggered by the inf values created.
1691              */
1692             if (texcoord[3] != 0.0) {
1693                texcoord[0] /= texcoord[3];
1694                texcoord[1] /= texcoord[3];
1695                texcoord[2] /= texcoord[3];
1696             }
1697
1698             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1699
1700             store_vector4(inst, machine, color);
1701          }
1702          break;
1703       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1704          /* Texture lookup w/ projective divide, as above, but do not
1705           * do the divide by w if sampling from a cube map.
1706           */
1707          {
1708             GLfloat texcoord[4], color[4];
1709
1710             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1711             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1712                 texcoord[3] != 0.0) {
1713                texcoord[0] /= texcoord[3];
1714                texcoord[1] /= texcoord[3];
1715                texcoord[2] /= texcoord[3];
1716             }
1717
1718             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1719
1720             store_vector4(inst, machine, color);
1721          }
1722          break;
1723       case OPCODE_TRUNC:       /* truncate toward zero */
1724          {
1725             GLfloat a[4], result[4];
1726             fetch_vector4(&inst->SrcReg[0], machine, a);
1727             result[0] = (GLfloat) (GLint) a[0];
1728             result[1] = (GLfloat) (GLint) a[1];
1729             result[2] = (GLfloat) (GLint) a[2];
1730             result[3] = (GLfloat) (GLint) a[3];
1731             store_vector4(inst, machine, result);
1732          }
1733          break;
1734       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1735          {
1736             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1737             GLfloat result[4];
1738             GLushort hx, hy;
1739             hx = raw & 0xffff;
1740             hy = raw >> 16;
1741             result[0] = result[2] = _mesa_half_to_float(hx);
1742             result[1] = result[3] = _mesa_half_to_float(hy);
1743             store_vector4(inst, machine, result);
1744          }
1745          break;
1746       case OPCODE_UP2US:       /* unpack two GLushorts */
1747          {
1748             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1749             GLfloat result[4];
1750             GLushort usx, usy;
1751             usx = raw & 0xffff;
1752             usy = raw >> 16;
1753             result[0] = result[2] = usx * (1.0f / 65535.0f);
1754             result[1] = result[3] = usy * (1.0f / 65535.0f);
1755             store_vector4(inst, machine, result);
1756          }
1757          break;
1758       case OPCODE_UP4B:        /* unpack four GLbytes */
1759          {
1760             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1761             GLfloat result[4];
1762             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1763             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1764             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1765             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1766             store_vector4(inst, machine, result);
1767          }
1768          break;
1769       case OPCODE_UP4UB:       /* unpack four GLubytes */
1770          {
1771             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1772             GLfloat result[4];
1773             result[0] = ((raw >> 0) & 0xff) / 255.0F;
1774             result[1] = ((raw >> 8) & 0xff) / 255.0F;
1775             result[2] = ((raw >> 16) & 0xff) / 255.0F;
1776             result[3] = ((raw >> 24) & 0xff) / 255.0F;
1777             store_vector4(inst, machine, result);
1778          }
1779          break;
1780       case OPCODE_XOR:         /* bitwise XOR */
1781          {
1782             GLuint a[4], b[4], result[4];
1783             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1784             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1785             result[0] = a[0] ^ b[0];
1786             result[1] = a[1] ^ b[1];
1787             result[2] = a[2] ^ b[2];
1788             result[3] = a[3] ^ b[3];
1789             store_vector4ui(inst, machine, result);
1790          }
1791          break;
1792       case OPCODE_XPD:         /* cross product */
1793          {
1794             GLfloat a[4], b[4], result[4];
1795             fetch_vector4(&inst->SrcReg[0], machine, a);
1796             fetch_vector4(&inst->SrcReg[1], machine, b);
1797             result[0] = a[1] * b[2] - a[2] * b[1];
1798             result[1] = a[2] * b[0] - a[0] * b[2];
1799             result[2] = a[0] * b[1] - a[1] * b[0];
1800             result[3] = 1.0;
1801             store_vector4(inst, machine, result);
1802             if (DEBUG_PROG) {
1803                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1804                       result[0], result[1], result[2], result[3],
1805                       a[0], a[1], a[2], b[0], b[1], b[2]);
1806             }
1807          }
1808          break;
1809       case OPCODE_X2D:         /* 2-D matrix transform */
1810          {
1811             GLfloat a[4], b[4], c[4], result[4];
1812             fetch_vector4(&inst->SrcReg[0], machine, a);
1813             fetch_vector4(&inst->SrcReg[1], machine, b);
1814             fetch_vector4(&inst->SrcReg[2], machine, c);
1815             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1816             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1817             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1818             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1819             store_vector4(inst, machine, result);
1820          }
1821          break;
1822       case OPCODE_PRINT:
1823          {
1824             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
1825                GLfloat a[4];
1826                fetch_vector4(&inst->SrcReg[0], machine, a);
1827                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1828                             a[0], a[1], a[2], a[3]);
1829             }
1830             else {
1831                printf("%s\n", (const char *) inst->Data);
1832             }
1833          }
1834          break;
1835       case OPCODE_END:
1836          return GL_TRUE;
1837       default:
1838          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1839                        inst->Opcode);
1840          return GL_TRUE;        /* return value doesn't matter */
1841       }
1842
1843       numExec++;
1844       if (numExec > maxExec) {
1845          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1846          return GL_TRUE;
1847       }
1848
1849    } /* for pc */
1850
1851    return GL_TRUE;
1852 }