src/mesa/program/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/macros.h"
  41 #include "prog_execute.h"
  42 #include "prog_instruction.h"
  43 #include "prog_parameter.h"
  44 #include "prog_print.h"
  45 #include "prog_noise.h"
  46
  47
  48 /* debug predicate */
  49 #define DEBUG_PROG 0
  50
  51
  52 /**
  53  * Set x to positive or negative infinity.
  54  */
  55 #if defined(USE_IEEE) || defined(_WIN32)
  56 #define SET_POS_INFINITY(x)                  \
  57    do {                                      \
  58          fi_type fi;                         \
  59          fi.i = 0x7F800000;                  \
  60          x = fi.f;                           \
  61    } while (0)
  62 #define SET_NEG_INFINITY(x)                  \
  63    do {                                      \
  64          fi_type fi;                         \
  65          fi.i = 0xFF800000;                  \
  66          x = fi.f;                           \
  67    } while (0)
  68 #elif defined(VMS)
  69 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  70 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  71 #else
  72 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  73 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  74 #endif
  75
  76 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  77
  78
  79 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  80
  81
  82
  83 /**
  84  * Return TRUE for +0 and other positive values, FALSE otherwise.
  85  * Used for RCC opcode.
  86  */
  87 static INLINE GLboolean
  88 positive(float x)
  89 {
  90    fi_type fi;
  91    fi.f = x;
  92    if (fi.i & 0x80000000)
  93       return GL_FALSE;
  94    return GL_TRUE;
  95 }
  96
  97
  98
  99 /**
 100  * Return a pointer to the 4-element float vector specified by the given
 101  * source register.
 102  */
 103 static INLINE const GLfloat *
 104 get_src_register_pointer(const struct prog_src_register *source,
 105                          const struct gl_program_machine *machine)
 106 {
 107    const struct gl_program *prog = machine->CurProgram;
 108    GLint reg = source->Index;
 109
 110    if (source->RelAddr) {
 111       /* add address register value to src index/offset */
 112       reg += machine->AddressReg[0][0];
 113       if (reg < 0) {
 114          return ZeroVec;
 115       }
 116    }
 117
 118    switch (source->File) {
 119    case PROGRAM_TEMPORARY:
 120       if (reg >= MAX_PROGRAM_TEMPS)
 121          return ZeroVec;
 122       return machine->Temporaries[reg];
 123
 124    case PROGRAM_INPUT:
 125       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 126          if (reg >= VERT_ATTRIB_MAX)
 127             return ZeroVec;
 128          return machine->VertAttribs[reg];
 129       }
 130       else {
 131          if (reg >= FRAG_ATTRIB_MAX)
 132             return ZeroVec;
 133          return machine->Attribs[reg][machine->CurElement];
 134       }
 135
 136    case PROGRAM_OUTPUT:
 137       if (reg >= MAX_PROGRAM_OUTPUTS)
 138          return ZeroVec;
 139       return machine->Outputs[reg];
 140
 141    case PROGRAM_LOCAL_PARAM:
 142       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 143          return ZeroVec;
 144       return machine->CurProgram->LocalParams[reg];
 145
 146    case PROGRAM_ENV_PARAM:
 147       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 148          return ZeroVec;
 149       return machine->EnvParams[reg];
 150
 151    case PROGRAM_STATE_VAR:
 152       /* Fallthrough */
 153    case PROGRAM_CONSTANT:
 154       /* Fallthrough */
 155    case PROGRAM_UNIFORM:
 156       /* Fallthrough */
 157    case PROGRAM_NAMED_PARAM:
 158       if (reg >= (GLint) prog->Parameters->NumParameters)
 159          return ZeroVec;
 160       return prog->Parameters->ParameterValues[reg];
 161
 162    case PROGRAM_SYSTEM_VALUE:
 163       assert(reg < Elements(machine->SystemValues));
 164       return machine->SystemValues[reg];
 165
 166    default:
 167       _mesa_problem(NULL,
 168          "Invalid src register file %d in get_src_register_pointer()",
 169          source->File);
 170       return NULL;
 171    }
 172 }
 173
 174
 175 /**
 176  * Return a pointer to the 4-element float vector specified by the given
 177  * destination register.
 178  */
 179 static INLINE GLfloat *
 180 get_dst_register_pointer(const struct prog_dst_register *dest,
 181                          struct gl_program_machine *machine)
 182 {
 183    static GLfloat dummyReg[4];
 184    GLint reg = dest->Index;
 185
 186    if (dest->RelAddr) {
 187       /* add address register value to src index/offset */
 188       reg += machine->AddressReg[0][0];
 189       if (reg < 0) {
 190          return dummyReg;
 191       }
 192    }
 193
 194    switch (dest->File) {
 195    case PROGRAM_TEMPORARY:
 196       if (reg >= MAX_PROGRAM_TEMPS)
 197          return dummyReg;
 198       return machine->Temporaries[reg];
 199
 200    case PROGRAM_OUTPUT:
 201       if (reg >= MAX_PROGRAM_OUTPUTS)
 202          return dummyReg;
 203       return machine->Outputs[reg];
 204
 205    case PROGRAM_WRITE_ONLY:
 206       return dummyReg;
 207
 208    default:
 209       _mesa_problem(NULL,
 210          "Invalid dest register file %d in get_dst_register_pointer()",
 211          dest->File);
 212       return NULL;
 213    }
 214 }
 215
 216
 217
 218 /**
 219  * Fetch a 4-element float vector from the given source register.
 220  * Apply swizzling and negating as needed.
 221  */
 222 static void
 223 fetch_vector4(const struct prog_src_register *source,
 224               const struct gl_program_machine *machine, GLfloat result[4])
 225 {
 226    const GLfloat *src = get_src_register_pointer(source, machine);
 227    ASSERT(src);
 228
 229    if (source->Swizzle == SWIZZLE_NOOP) {
 230       /* no swizzling */
 231       COPY_4V(result, src);
 232    }
 233    else {
 234       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 235       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 236       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 237       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 238       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 239       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 240       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 241       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 242    }
 243
 244    if (source->Abs) {
 245       result[0] = FABSF(result[0]);
 246       result[1] = FABSF(result[1]);
 247       result[2] = FABSF(result[2]);
 248       result[3] = FABSF(result[3]);
 249    }
 250    if (source->Negate) {
 251       ASSERT(source->Negate == NEGATE_XYZW);
 252       result[0] = -result[0];
 253       result[1] = -result[1];
 254       result[2] = -result[2];
 255       result[3] = -result[3];
 256    }
 257
 258 #ifdef NAN_CHECK
 259    assert(!IS_INF_OR_NAN(result[0]));
 260    assert(!IS_INF_OR_NAN(result[0]));
 261    assert(!IS_INF_OR_NAN(result[0]));
 262    assert(!IS_INF_OR_NAN(result[0]));
 263 #endif
 264 }
 265
 266
 267 /**
 268  * Fetch a 4-element uint vector from the given source register.
 269  * Apply swizzling but not negation/abs.
 270  */
 271 static void
 272 fetch_vector4ui(const struct prog_src_register *source,
 273                 const struct gl_program_machine *machine, GLuint result[4])
 274 {
 275    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 276    ASSERT(src);
 277
 278    if (source->Swizzle == SWIZZLE_NOOP) {
 279       /* no swizzling */
 280       COPY_4V(result, src);
 281    }
 282    else {
 283       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 284       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 285       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 286       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 287       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 288       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 289       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 290       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 291    }
 292
 293    /* Note: no Negate or Abs here */
 294 }
 295
 296
 297
 298 /**
 299  * Fetch the derivative with respect to X or Y for the given register.
 300  * XXX this currently only works for fragment program input attribs.
 301  */
 302 static void
 303 fetch_vector4_deriv(struct gl_context * ctx,
 304                     const struct prog_src_register *source,
 305                     const struct gl_program_machine *machine,
 306                     char xOrY, GLfloat result[4])
 307 {
 308    if (source->File == PROGRAM_INPUT &&
 309        source->Index < (GLint) machine->NumDeriv) {
 310       const GLint col = machine->CurElement;
 311       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 312       const GLfloat invQ = 1.0f / w;
 313       GLfloat deriv[4];
 314
 315       if (xOrY == 'X') {
 316          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 317          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 318          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 319          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 320       }
 321       else {
 322          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 323          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 324          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 325          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 326       }
 327
 328       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 329       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 330       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 331       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 332
 333       if (source->Abs) {
 334          result[0] = FABSF(result[0]);
 335          result[1] = FABSF(result[1]);
 336          result[2] = FABSF(result[2]);
 337          result[3] = FABSF(result[3]);
 338       }
 339       if (source->Negate) {
 340          ASSERT(source->Negate == NEGATE_XYZW);
 341          result[0] = -result[0];
 342          result[1] = -result[1];
 343          result[2] = -result[2];
 344          result[3] = -result[3];
 345       }
 346    }
 347    else {
 348       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 349    }
 350 }
 351
 352
 353 /**
 354  * As above, but only return result[0] element.
 355  */
 356 static void
 357 fetch_vector1(const struct prog_src_register *source,
 358               const struct gl_program_machine *machine, GLfloat result[4])
 359 {
 360    const GLfloat *src = get_src_register_pointer(source, machine);
 361    ASSERT(src);
 362
 363    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 364
 365    if (source->Abs) {
 366       result[0] = FABSF(result[0]);
 367    }
 368    if (source->Negate) {
 369       result[0] = -result[0];
 370    }
 371 }
 372
 373
 374 static GLuint
 375 fetch_vector1ui(const struct prog_src_register *source,
 376                 const struct gl_program_machine *machine)
 377 {
 378    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 379    return src[GET_SWZ(source->Swizzle, 0)];
 380 }
 381
 382
 383 /**
 384  * Fetch texel from texture.  Use partial derivatives when possible.
 385  */
 386 static INLINE void
 387 fetch_texel(struct gl_context *ctx,
 388             const struct gl_program_machine *machine,
 389             const struct prog_instruction *inst,
 390             const GLfloat texcoord[4], GLfloat lodBias,
 391             GLfloat color[4])
 392 {
 393    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 394
 395    /* Note: we only have the right derivatives for fragment input attribs.
 396     */
 397    if (machine->NumDeriv > 0 &&
 398        inst->SrcReg[0].File == PROGRAM_INPUT &&
 399        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 400       /* simple texture fetch for which we should have derivatives */
 401       GLuint attr = inst->SrcReg[0].Index;
 402       machine->FetchTexelDeriv(ctx, texcoord,
 403                                machine->DerivX[attr],
 404                                machine->DerivY[attr],
 405                                lodBias, unit, color);
 406    }
 407    else {
 408       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 409    }
 410 }
 411
 412
 413 /**
 414  * Test value against zero and return GT, LT, EQ or UN if NaN.
 415  */
 416 static INLINE GLuint
 417 generate_cc(float value)
 418 {
 419    if (value != value)
 420       return COND_UN;           /* NaN */
 421    if (value > 0.0F)
 422       return COND_GT;
 423    if (value < 0.0F)
 424       return COND_LT;
 425    return COND_EQ;
 426 }
 427
 428
 429 /**
 430  * Test if the ccMaskRule is satisfied by the given condition code.
 431  * Used to mask destination writes according to the current condition code.
 432  */
 433 static INLINE GLboolean
 434 test_cc(GLuint condCode, GLuint ccMaskRule)
 435 {
 436    switch (ccMaskRule) {
 437    case COND_EQ: return (condCode == COND_EQ);
 438    case COND_NE: return (condCode != COND_EQ);
 439    case COND_LT: return (condCode == COND_LT);
 440    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 441    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 442    case COND_GT: return (condCode == COND_GT);
 443    case COND_TR: return GL_TRUE;
 444    case COND_FL: return GL_FALSE;
 445    default:      return GL_TRUE;
 446    }
 447 }
 448
 449
 450 /**
 451  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 452  * or GL_FALSE to indicate result.
 453  */
 454 static INLINE GLboolean
 455 eval_condition(const struct gl_program_machine *machine,
 456                const struct prog_instruction *inst)
 457 {
 458    const GLuint swizzle = inst->DstReg.CondSwizzle;
 459    const GLuint condMask = inst->DstReg.CondMask;
 460    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 461        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 462        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 463        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 464       return GL_TRUE;
 465    }
 466    else {
 467       return GL_FALSE;
 468    }
 469 }
 470
 471
 472
 473 /**
 474  * Store 4 floats into a register.  Observe the instructions saturate and
 475  * set-condition-code flags.
 476  */
 477 static void
 478 store_vector4(const struct prog_instruction *inst,
 479               struct gl_program_machine *machine, const GLfloat value[4])
 480 {
 481    const struct prog_dst_register *dstReg = &(inst->DstReg);
 482    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 483    GLuint writeMask = dstReg->WriteMask;
 484    GLfloat clampedValue[4];
 485    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 486
 487 #if 0
 488    if (value[0] > 1.0e10 ||
 489        IS_INF_OR_NAN(value[0]) ||
 490        IS_INF_OR_NAN(value[1]) ||
 491        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 492       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 493 #endif
 494
 495    if (clamp) {
 496       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 497       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 498       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 499       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 500       value = clampedValue;
 501    }
 502
 503    if (dstReg->CondMask != COND_TR) {
 504       /* condition codes may turn off some writes */
 505       if (writeMask & WRITEMASK_X) {
 506          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 507                       dstReg->CondMask))
 508             writeMask &= ~WRITEMASK_X;
 509       }
 510       if (writeMask & WRITEMASK_Y) {
 511          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 512                       dstReg->CondMask))
 513             writeMask &= ~WRITEMASK_Y;
 514       }
 515       if (writeMask & WRITEMASK_Z) {
 516          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 517                       dstReg->CondMask))
 518             writeMask &= ~WRITEMASK_Z;
 519       }
 520       if (writeMask & WRITEMASK_W) {
 521          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 522                       dstReg->CondMask))
 523             writeMask &= ~WRITEMASK_W;
 524       }
 525    }
 526
 527 #ifdef NAN_CHECK
 528    assert(!IS_INF_OR_NAN(value[0]));
 529    assert(!IS_INF_OR_NAN(value[0]));
 530    assert(!IS_INF_OR_NAN(value[0]));
 531    assert(!IS_INF_OR_NAN(value[0]));
 532 #endif
 533
 534    if (writeMask & WRITEMASK_X)
 535       dst[0] = value[0];
 536    if (writeMask & WRITEMASK_Y)
 537       dst[1] = value[1];
 538    if (writeMask & WRITEMASK_Z)
 539       dst[2] = value[2];
 540    if (writeMask & WRITEMASK_W)
 541       dst[3] = value[3];
 542
 543    if (inst->CondUpdate) {
 544       if (writeMask & WRITEMASK_X)
 545          machine->CondCodes[0] = generate_cc(value[0]);
 546       if (writeMask & WRITEMASK_Y)
 547          machine->CondCodes[1] = generate_cc(value[1]);
 548       if (writeMask & WRITEMASK_Z)
 549          machine->CondCodes[2] = generate_cc(value[2]);
 550       if (writeMask & WRITEMASK_W)
 551          machine->CondCodes[3] = generate_cc(value[3]);
 552 #if DEBUG_PROG
 553       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 554              _mesa_condcode_string(machine->CondCodes[0]),
 555              _mesa_condcode_string(machine->CondCodes[1]),
 556              _mesa_condcode_string(machine->CondCodes[2]),
 557              _mesa_condcode_string(machine->CondCodes[3]));
 558 #endif
 559    }
 560 }
 561
 562
 563 /**
 564  * Store 4 uints into a register.  Observe the set-condition-code flags.
 565  */
 566 static void
 567 store_vector4ui(const struct prog_instruction *inst,
 568                 struct gl_program_machine *machine, const GLuint value[4])
 569 {
 570    const struct prog_dst_register *dstReg = &(inst->DstReg);
 571    GLuint writeMask = dstReg->WriteMask;
 572    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 573
 574    if (dstReg->CondMask != COND_TR) {
 575       /* condition codes may turn off some writes */
 576       if (writeMask & WRITEMASK_X) {
 577          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 578                       dstReg->CondMask))
 579             writeMask &= ~WRITEMASK_X;
 580       }
 581       if (writeMask & WRITEMASK_Y) {
 582          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 583                       dstReg->CondMask))
 584             writeMask &= ~WRITEMASK_Y;
 585       }
 586       if (writeMask & WRITEMASK_Z) {
 587          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 588                       dstReg->CondMask))
 589             writeMask &= ~WRITEMASK_Z;
 590       }
 591       if (writeMask & WRITEMASK_W) {
 592          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 593                       dstReg->CondMask))
 594             writeMask &= ~WRITEMASK_W;
 595       }
 596    }
 597
 598    if (writeMask & WRITEMASK_X)
 599       dst[0] = value[0];
 600    if (writeMask & WRITEMASK_Y)
 601       dst[1] = value[1];
 602    if (writeMask & WRITEMASK_Z)
 603       dst[2] = value[2];
 604    if (writeMask & WRITEMASK_W)
 605       dst[3] = value[3];
 606
 607    if (inst->CondUpdate) {
 608       if (writeMask & WRITEMASK_X)
 609          machine->CondCodes[0] = generate_cc((float)value[0]);
 610       if (writeMask & WRITEMASK_Y)
 611          machine->CondCodes[1] = generate_cc((float)value[1]);
 612       if (writeMask & WRITEMASK_Z)
 613          machine->CondCodes[2] = generate_cc((float)value[2]);
 614       if (writeMask & WRITEMASK_W)
 615          machine->CondCodes[3] = generate_cc((float)value[3]);
 616 #if DEBUG_PROG
 617       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 618              _mesa_condcode_string(machine->CondCodes[0]),
 619              _mesa_condcode_string(machine->CondCodes[1]),
 620              _mesa_condcode_string(machine->CondCodes[2]),
 621              _mesa_condcode_string(machine->CondCodes[3]));
 622 #endif
 623    }
 624 }
 625
 626
 627
 628 /**
 629  * Execute the given vertex/fragment program.
 630  *
 631  * \param ctx  rendering context
 632  * \param program  the program to execute
 633  * \param machine  machine state (must be initialized)
 634  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 635  */
 636 GLboolean
 637 _mesa_execute_program(struct gl_context * ctx,
 638                       const struct gl_program *program,
 639                       struct gl_program_machine *machine)
 640 {
 641    const GLuint numInst = program->NumInstructions;
 642    const GLuint maxExec = 10000;
 643    GLuint pc, numExec = 0;
 644
 645    machine->CurProgram = program;
 646
 647    if (DEBUG_PROG) {
 648       printf("execute program %u --------------------\n", program->Id);
 649    }
 650
 651    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 652       machine->EnvParams = ctx->VertexProgram.Parameters;
 653    }
 654    else {
 655       machine->EnvParams = ctx->FragmentProgram.Parameters;
 656    }
 657
 658    for (pc = 0; pc < numInst; pc++) {
 659       const struct prog_instruction *inst = program->Instructions + pc;
 660
 661       if (DEBUG_PROG) {
 662          _mesa_print_instruction(inst);
 663       }
 664
 665       switch (inst->Opcode) {
 666       case OPCODE_ABS:
 667          {
 668             GLfloat a[4], result[4];
 669             fetch_vector4(&inst->SrcReg[0], machine, a);
 670             result[0] = FABSF(a[0]);
 671             result[1] = FABSF(a[1]);
 672             result[2] = FABSF(a[2]);
 673             result[3] = FABSF(a[3]);
 674             store_vector4(inst, machine, result);
 675          }
 676          break;
 677       case OPCODE_ADD:
 678          {
 679             GLfloat a[4], b[4], result[4];
 680             fetch_vector4(&inst->SrcReg[0], machine, a);
 681             fetch_vector4(&inst->SrcReg[1], machine, b);
 682             result[0] = a[0] + b[0];
 683             result[1] = a[1] + b[1];
 684             result[2] = a[2] + b[2];
 685             result[3] = a[3] + b[3];
 686             store_vector4(inst, machine, result);
 687             if (DEBUG_PROG) {
 688                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 689                       result[0], result[1], result[2], result[3],
 690                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 691             }
 692          }
 693          break;
 694       case OPCODE_AND:     /* bitwise AND */
 695          {
 696             GLuint a[4], b[4], result[4];
 697             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 698             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 699             result[0] = a[0] & b[0];
 700             result[1] = a[1] & b[1];
 701             result[2] = a[2] & b[2];
 702             result[3] = a[3] & b[3];
 703             store_vector4ui(inst, machine, result);
 704          }
 705          break;
 706       case OPCODE_ARL:
 707          {
 708             GLfloat t[4];
 709             fetch_vector4(&inst->SrcReg[0], machine, t);
 710             machine->AddressReg[0][0] = IFLOOR(t[0]);
 711             if (DEBUG_PROG) {
 712                printf("ARL %d\n", machine->AddressReg[0][0]);
 713             }
 714          }
 715          break;
 716       case OPCODE_BGNLOOP:
 717          /* no-op */
 718          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 719                 == OPCODE_ENDLOOP);
 720          break;
 721       case OPCODE_ENDLOOP:
 722          /* subtract 1 here since pc is incremented by for(pc) loop */
 723          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 724                 == OPCODE_BGNLOOP);
 725          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 726          break;
 727       case OPCODE_BGNSUB:      /* begin subroutine */
 728          break;
 729       case OPCODE_ENDSUB:      /* end subroutine */
 730          break;
 731       case OPCODE_BRA:         /* branch (conditional) */
 732          if (eval_condition(machine, inst)) {
 733             /* take branch */
 734             /* Subtract 1 here since we'll do pc++ below */
 735             pc = inst->BranchTarget - 1;
 736          }
 737          break;
 738       case OPCODE_BRK:         /* break out of loop (conditional) */
 739          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 740                 == OPCODE_ENDLOOP);
 741          if (eval_condition(machine, inst)) {
 742             /* break out of loop */
 743             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 744             pc = inst->BranchTarget;
 745          }
 746          break;
 747       case OPCODE_CONT:        /* continue loop (conditional) */
 748          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 749                 == OPCODE_ENDLOOP);
 750          if (eval_condition(machine, inst)) {
 751             /* continue at ENDLOOP */
 752             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 753             pc = inst->BranchTarget - 1;
 754          }
 755          break;
 756       case OPCODE_CAL:         /* Call subroutine (conditional) */
 757          if (eval_condition(machine, inst)) {
 758             /* call the subroutine */
 759             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 760                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 761             }
 762             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 763             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 764             pc = inst->BranchTarget - 1;
 765          }
 766          break;
 767       case OPCODE_CMP:
 768          {
 769             GLfloat a[4], b[4], c[4], result[4];
 770             fetch_vector4(&inst->SrcReg[0], machine, a);
 771             fetch_vector4(&inst->SrcReg[1], machine, b);
 772             fetch_vector4(&inst->SrcReg[2], machine, c);
 773             result[0] = a[0] < 0.0F ? b[0] : c[0];
 774             result[1] = a[1] < 0.0F ? b[1] : c[1];
 775             result[2] = a[2] < 0.0F ? b[2] : c[2];
 776             result[3] = a[3] < 0.0F ? b[3] : c[3];
 777             store_vector4(inst, machine, result);
 778             if (DEBUG_PROG) {
 779                printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
 780                       result[0], result[1], result[2], result[3],
 781                       a[0], a[1], a[2], a[3],
 782                       b[0], b[1], b[2], b[3],
 783                       c[0], c[1], c[2], c[3]);
 784             }
 785          }
 786          break;
 787       case OPCODE_COS:
 788          {
 789             GLfloat a[4], result[4];
 790             fetch_vector1(&inst->SrcReg[0], machine, a);
 791             result[0] = result[1] = result[2] = result[3]
 792                = (GLfloat) cos(a[0]);
 793             store_vector4(inst, machine, result);
 794          }
 795          break;
 796       case OPCODE_DDX:         /* Partial derivative with respect to X */
 797          {
 798             GLfloat result[4];
 799             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 800                                 'X', result);
 801             store_vector4(inst, machine, result);
 802          }
 803          break;
 804       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 805          {
 806             GLfloat result[4];
 807             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 808                                 'Y', result);
 809             store_vector4(inst, machine, result);
 810          }
 811          break;
 812       case OPCODE_DP2:
 813          {
 814             GLfloat a[4], b[4], result[4];
 815             fetch_vector4(&inst->SrcReg[0], machine, a);
 816             fetch_vector4(&inst->SrcReg[1], machine, b);
 817             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 818             store_vector4(inst, machine, result);
 819             if (DEBUG_PROG) {
 820                printf("DP2 %g = (%g %g) . (%g %g)\n",
 821                       result[0], a[0], a[1], b[0], b[1]);
 822             }
 823          }
 824          break;
 825       case OPCODE_DP2A:
 826          {
 827             GLfloat a[4], b[4], c, result[4];
 828             fetch_vector4(&inst->SrcReg[0], machine, a);
 829             fetch_vector4(&inst->SrcReg[1], machine, b);
 830             fetch_vector1(&inst->SrcReg[1], machine, &c);
 831             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 832             store_vector4(inst, machine, result);
 833             if (DEBUG_PROG) {
 834                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 835                       result[0], a[0], a[1], b[0], b[1], c);
 836             }
 837          }
 838          break;
 839       case OPCODE_DP3:
 840          {
 841             GLfloat a[4], b[4], result[4];
 842             fetch_vector4(&inst->SrcReg[0], machine, a);
 843             fetch_vector4(&inst->SrcReg[1], machine, b);
 844             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 845             store_vector4(inst, machine, result);
 846             if (DEBUG_PROG) {
 847                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 848                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 849             }
 850          }
 851          break;
 852       case OPCODE_DP4:
 853          {
 854             GLfloat a[4], b[4], result[4];
 855             fetch_vector4(&inst->SrcReg[0], machine, a);
 856             fetch_vector4(&inst->SrcReg[1], machine, b);
 857             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 858             store_vector4(inst, machine, result);
 859             if (DEBUG_PROG) {
 860                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 861                       result[0], a[0], a[1], a[2], a[3],
 862                       b[0], b[1], b[2], b[3]);
 863             }
 864          }
 865          break;
 866       case OPCODE_DPH:
 867          {
 868             GLfloat a[4], b[4], result[4];
 869             fetch_vector4(&inst->SrcReg[0], machine, a);
 870             fetch_vector4(&inst->SrcReg[1], machine, b);
 871             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 872             store_vector4(inst, machine, result);
 873          }
 874          break;
 875       case OPCODE_DST:         /* Distance vector */
 876          {
 877             GLfloat a[4], b[4], result[4];
 878             fetch_vector4(&inst->SrcReg[0], machine, a);
 879             fetch_vector4(&inst->SrcReg[1], machine, b);
 880             result[0] = 1.0F;
 881             result[1] = a[1] * b[1];
 882             result[2] = a[2];
 883             result[3] = b[3];
 884             store_vector4(inst, machine, result);
 885          }
 886          break;
 887       case OPCODE_EXP:
 888          {
 889             GLfloat t[4], q[4], floor_t0;
 890             fetch_vector1(&inst->SrcReg[0], machine, t);
 891             floor_t0 = FLOORF(t[0]);
 892             if (floor_t0 > FLT_MAX_EXP) {
 893                SET_POS_INFINITY(q[0]);
 894                SET_POS_INFINITY(q[2]);
 895             }
 896             else if (floor_t0 < FLT_MIN_EXP) {
 897                q[0] = 0.0F;
 898                q[2] = 0.0F;
 899             }
 900             else {
 901                q[0] = LDEXPF(1.0, (int) floor_t0);
 902                /* Note: GL_NV_vertex_program expects
 903                 * result.z = result.x * APPX(result.y)
 904                 * We do what the ARB extension says.
 905                 */
 906                q[2] = (GLfloat) pow(2.0, t[0]);
 907             }
 908             q[1] = t[0] - floor_t0;
 909             q[3] = 1.0F;
 910             store_vector4( inst, machine, q );
 911          }
 912          break;
 913       case OPCODE_EX2:         /* Exponential base 2 */
 914          {
 915             GLfloat a[4], result[4], val;
 916             fetch_vector1(&inst->SrcReg[0], machine, a);
 917             val = (GLfloat) pow(2.0, a[0]);
 918             /*
 919             if (IS_INF_OR_NAN(val))
 920                val = 1.0e10;
 921             */
 922             result[0] = result[1] = result[2] = result[3] = val;
 923             store_vector4(inst, machine, result);
 924          }
 925          break;
 926       case OPCODE_FLR:
 927          {
 928             GLfloat a[4], result[4];
 929             fetch_vector4(&inst->SrcReg[0], machine, a);
 930             result[0] = FLOORF(a[0]);
 931             result[1] = FLOORF(a[1]);
 932             result[2] = FLOORF(a[2]);
 933             result[3] = FLOORF(a[3]);
 934             store_vector4(inst, machine, result);
 935          }
 936          break;
 937       case OPCODE_FRC:
 938          {
 939             GLfloat a[4], result[4];
 940             fetch_vector4(&inst->SrcReg[0], machine, a);
 941             result[0] = a[0] - FLOORF(a[0]);
 942             result[1] = a[1] - FLOORF(a[1]);
 943             result[2] = a[2] - FLOORF(a[2]);
 944             result[3] = a[3] - FLOORF(a[3]);
 945             store_vector4(inst, machine, result);
 946          }
 947          break;
 948       case OPCODE_IF:
 949          {
 950             GLboolean cond;
 951             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 952                    == OPCODE_ELSE ||
 953                    program->Instructions[inst->BranchTarget].Opcode
 954                    == OPCODE_ENDIF);
 955             /* eval condition */
 956             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 957                GLfloat a[4];
 958                fetch_vector1(&inst->SrcReg[0], machine, a);
 959                cond = (a[0] != 0.0);
 960             }
 961             else {
 962                cond = eval_condition(machine, inst);
 963             }
 964             if (DEBUG_PROG) {
 965                printf("IF: %d\n", cond);
 966             }
 967             /* do if/else */
 968             if (cond) {
 969                /* do if-clause (just continue execution) */
 970             }
 971             else {
 972                /* go to the instruction after ELSE or ENDIF */
 973                assert(inst->BranchTarget >= 0);
 974                pc = inst->BranchTarget;
 975             }
 976          }
 977          break;
 978       case OPCODE_ELSE:
 979          /* goto ENDIF */
 980          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 981                 == OPCODE_ENDIF);
 982          assert(inst->BranchTarget >= 0);
 983          pc = inst->BranchTarget;
 984          break;
 985       case OPCODE_ENDIF:
 986          /* nothing */
 987          break;
 988       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 989          if (eval_condition(machine, inst)) {
 990             return GL_FALSE;
 991          }
 992          break;
 993       case OPCODE_KIL:         /* ARB_f_p only */
 994          {
 995             GLfloat a[4];
 996             fetch_vector4(&inst->SrcReg[0], machine, a);
 997             if (DEBUG_PROG) {
 998                printf("KIL if (%g %g %g %g) <= 0.0\n",
 999                       a[0], a[1], a[2], a[3]);
1000             }
1001
1002             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
1003                return GL_FALSE;
1004             }
1005          }
1006          break;
1007       case OPCODE_LG2:         /* log base 2 */
1008          {
1009             GLfloat a[4], result[4], val;
1010             fetch_vector1(&inst->SrcReg[0], machine, a);
1011             /* The fast LOG2 macro doesn't meet the precision requirements.
1012              */
1013             if (a[0] == 0.0F) {
1014                val = -FLT_MAX;
1015             }
1016             else {
1017                val = (float)(log(a[0]) * 1.442695F);
1018             }
1019             result[0] = result[1] = result[2] = result[3] = val;
1020             store_vector4(inst, machine, result);
1021          }
1022          break;
1023       case OPCODE_LIT:
1024          {
1025             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
1026             GLfloat a[4], result[4];
1027             fetch_vector4(&inst->SrcReg[0], machine, a);
1028             a[0] = MAX2(a[0], 0.0F);
1029             a[1] = MAX2(a[1], 0.0F);
1030             /* XXX ARB version clamps a[3], NV version doesn't */
1031             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1032             result[0] = 1.0F;
1033             result[1] = a[0];
1034             /* XXX we could probably just use pow() here */
1035             if (a[0] > 0.0F) {
1036                if (a[1] == 0.0 && a[3] == 0.0)
1037                   result[2] = 1.0F;
1038                else
1039                   result[2] = (GLfloat) pow(a[1], a[3]);
1040             }
1041             else {
1042                result[2] = 0.0F;
1043             }
1044             result[3] = 1.0F;
1045             store_vector4(inst, machine, result);
1046             if (DEBUG_PROG) {
1047                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1048                       result[0], result[1], result[2], result[3],
1049                       a[0], a[1], a[2], a[3]);
1050             }
1051          }
1052          break;
1053       case OPCODE_LOG:
1054          {
1055             GLfloat t[4], q[4], abs_t0;
1056             fetch_vector1(&inst->SrcReg[0], machine, t);
1057             abs_t0 = FABSF(t[0]);
1058             if (abs_t0 != 0.0F) {
1059                /* Since we really can't handle infinite values on VMS
1060                 * like other OSes we'll use __MAXFLOAT to represent
1061                 * infinity.  This may need some tweaking.
1062                 */
1063 #ifdef VMS
1064                if (abs_t0 == __MAXFLOAT)
1065 #else
1066                if (IS_INF_OR_NAN(abs_t0))
1067 #endif
1068                {
1069                   SET_POS_INFINITY(q[0]);
1070                   q[1] = 1.0F;
1071                   SET_POS_INFINITY(q[2]);
1072                }
1073                else {
1074                   int exponent;
1075                   GLfloat mantissa = FREXPF(t[0], &exponent);
1076                   q[0] = (GLfloat) (exponent - 1);
1077                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1078
1079                   /* The fast LOG2 macro doesn't meet the precision
1080                    * requirements.
1081                    */
1082                   q[2] = (float)(log(t[0]) * 1.442695F);
1083                }
1084             }
1085             else {
1086                SET_NEG_INFINITY(q[0]);
1087                q[1] = 1.0F;
1088                SET_NEG_INFINITY(q[2]);
1089             }
1090             q[3] = 1.0;
1091             store_vector4(inst, machine, q);
1092          }
1093          break;
1094       case OPCODE_LRP:
1095          {
1096             GLfloat a[4], b[4], c[4], result[4];
1097             fetch_vector4(&inst->SrcReg[0], machine, a);
1098             fetch_vector4(&inst->SrcReg[1], machine, b);
1099             fetch_vector4(&inst->SrcReg[2], machine, c);
1100             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1101             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1102             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1103             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1104             store_vector4(inst, machine, result);
1105             if (DEBUG_PROG) {
1106                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1107                       "(%g %g %g %g), (%g %g %g %g)\n",
1108                       result[0], result[1], result[2], result[3],
1109                       a[0], a[1], a[2], a[3],
1110                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1111             }
1112          }
1113          break;
1114       case OPCODE_MAD:
1115          {
1116             GLfloat a[4], b[4], c[4], result[4];
1117             fetch_vector4(&inst->SrcReg[0], machine, a);
1118             fetch_vector4(&inst->SrcReg[1], machine, b);
1119             fetch_vector4(&inst->SrcReg[2], machine, c);
1120             result[0] = a[0] * b[0] + c[0];
1121             result[1] = a[1] * b[1] + c[1];
1122             result[2] = a[2] * b[2] + c[2];
1123             result[3] = a[3] * b[3] + c[3];
1124             store_vector4(inst, machine, result);
1125             if (DEBUG_PROG) {
1126                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1127                       "(%g %g %g %g) + (%g %g %g %g)\n",
1128                       result[0], result[1], result[2], result[3],
1129                       a[0], a[1], a[2], a[3],
1130                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1131             }
1132          }
1133          break;
1134       case OPCODE_MAX:
1135          {
1136             GLfloat a[4], b[4], result[4];
1137             fetch_vector4(&inst->SrcReg[0], machine, a);
1138             fetch_vector4(&inst->SrcReg[1], machine, b);
1139             result[0] = MAX2(a[0], b[0]);
1140             result[1] = MAX2(a[1], b[1]);
1141             result[2] = MAX2(a[2], b[2]);
1142             result[3] = MAX2(a[3], b[3]);
1143             store_vector4(inst, machine, result);
1144             if (DEBUG_PROG) {
1145                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1146                       result[0], result[1], result[2], result[3],
1147                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1148             }
1149          }
1150          break;
1151       case OPCODE_MIN:
1152          {
1153             GLfloat a[4], b[4], result[4];
1154             fetch_vector4(&inst->SrcReg[0], machine, a);
1155             fetch_vector4(&inst->SrcReg[1], machine, b);
1156             result[0] = MIN2(a[0], b[0]);
1157             result[1] = MIN2(a[1], b[1]);
1158             result[2] = MIN2(a[2], b[2]);
1159             result[3] = MIN2(a[3], b[3]);
1160             store_vector4(inst, machine, result);
1161          }
1162          break;
1163       case OPCODE_MOV:
1164          {
1165             GLfloat result[4];
1166             fetch_vector4(&inst->SrcReg[0], machine, result);
1167             store_vector4(inst, machine, result);
1168             if (DEBUG_PROG) {
1169                printf("MOV (%g %g %g %g)\n",
1170                       result[0], result[1], result[2], result[3]);
1171             }
1172          }
1173          break;
1174       case OPCODE_MUL:
1175          {
1176             GLfloat a[4], b[4], result[4];
1177             fetch_vector4(&inst->SrcReg[0], machine, a);
1178             fetch_vector4(&inst->SrcReg[1], machine, b);
1179             result[0] = a[0] * b[0];
1180             result[1] = a[1] * b[1];
1181             result[2] = a[2] * b[2];
1182             result[3] = a[3] * b[3];
1183             store_vector4(inst, machine, result);
1184             if (DEBUG_PROG) {
1185                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1186                       result[0], result[1], result[2], result[3],
1187                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1188             }
1189          }
1190          break;
1191       case OPCODE_NOISE1:
1192          {
1193             GLfloat a[4], result[4];
1194             fetch_vector1(&inst->SrcReg[0], machine, a);
1195             result[0] =
1196                result[1] =
1197                result[2] =
1198                result[3] = _mesa_noise1(a[0]);
1199             store_vector4(inst, machine, result);
1200          }
1201          break;
1202       case OPCODE_NOISE2:
1203          {
1204             GLfloat a[4], result[4];
1205             fetch_vector4(&inst->SrcReg[0], machine, a);
1206             result[0] =
1207                result[1] =
1208                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1209             store_vector4(inst, machine, result);
1210          }
1211          break;
1212       case OPCODE_NOISE3:
1213          {
1214             GLfloat a[4], result[4];
1215             fetch_vector4(&inst->SrcReg[0], machine, a);
1216             result[0] =
1217                result[1] =
1218                result[2] =
1219                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1220             store_vector4(inst, machine, result);
1221          }
1222          break;
1223       case OPCODE_NOISE4:
1224          {
1225             GLfloat a[4], result[4];
1226             fetch_vector4(&inst->SrcReg[0], machine, a);
1227             result[0] =
1228                result[1] =
1229                result[2] =
1230                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1231             store_vector4(inst, machine, result);
1232          }
1233          break;
1234       case OPCODE_NOP:
1235          break;
1236       case OPCODE_NOT:         /* bitwise NOT */
1237          {
1238             GLuint a[4], result[4];
1239             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1240             result[0] = ~a[0];
1241             result[1] = ~a[1];
1242             result[2] = ~a[2];
1243             result[3] = ~a[3];
1244             store_vector4ui(inst, machine, result);
1245          }
1246          break;
1247       case OPCODE_NRM3:        /* 3-component normalization */
1248          {
1249             GLfloat a[4], result[4];
1250             GLfloat tmp;
1251             fetch_vector4(&inst->SrcReg[0], machine, a);
1252             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1253             if (tmp != 0.0F)
1254                tmp = INV_SQRTF(tmp);
1255             result[0] = tmp * a[0];
1256             result[1] = tmp * a[1];
1257             result[2] = tmp * a[2];
1258             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1259             store_vector4(inst, machine, result);
1260          }
1261          break;
1262       case OPCODE_NRM4:        /* 4-component normalization */
1263          {
1264             GLfloat a[4], result[4];
1265             GLfloat tmp;
1266             fetch_vector4(&inst->SrcReg[0], machine, a);
1267             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1268             if (tmp != 0.0F)
1269                tmp = INV_SQRTF(tmp);
1270             result[0] = tmp * a[0];
1271             result[1] = tmp * a[1];
1272             result[2] = tmp * a[2];
1273             result[3] = tmp * a[3];
1274             store_vector4(inst, machine, result);
1275          }
1276          break;
1277       case OPCODE_OR:          /* bitwise OR */
1278          {
1279             GLuint a[4], b[4], result[4];
1280             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1281             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1282             result[0] = a[0] | b[0];
1283             result[1] = a[1] | b[1];
1284             result[2] = a[2] | b[2];
1285             result[3] = a[3] | b[3];
1286             store_vector4ui(inst, machine, result);
1287          }
1288          break;
1289       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1290          {
1291             GLfloat a[4];
1292             GLuint result[4];
1293             GLhalfNV hx, hy;
1294             fetch_vector4(&inst->SrcReg[0], machine, a);
1295             hx = _mesa_float_to_half(a[0]);
1296             hy = _mesa_float_to_half(a[1]);
1297             result[0] =
1298             result[1] =
1299             result[2] =
1300             result[3] = hx | (hy << 16);
1301             store_vector4ui(inst, machine, result);
1302          }
1303          break;
1304       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1305          {
1306             GLfloat a[4];
1307             GLuint result[4], usx, usy;
1308             fetch_vector4(&inst->SrcReg[0], machine, a);
1309             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1310             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1311             usx = IROUND(a[0] * 65535.0F);
1312             usy = IROUND(a[1] * 65535.0F);
1313             result[0] =
1314             result[1] =
1315             result[2] =
1316             result[3] = usx | (usy << 16);
1317             store_vector4ui(inst, machine, result);
1318          }
1319          break;
1320       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1321          {
1322             GLfloat a[4];
1323             GLuint result[4], ubx, uby, ubz, ubw;
1324             fetch_vector4(&inst->SrcReg[0], machine, a);
1325             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1326             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1327             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1328             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1329             ubx = IROUND(127.0F * a[0] + 128.0F);
1330             uby = IROUND(127.0F * a[1] + 128.0F);
1331             ubz = IROUND(127.0F * a[2] + 128.0F);
1332             ubw = IROUND(127.0F * a[3] + 128.0F);
1333             result[0] =
1334             result[1] =
1335             result[2] =
1336             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1337             store_vector4ui(inst, machine, result);
1338          }
1339          break;
1340       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1341          {
1342             GLfloat a[4];
1343             GLuint result[4], ubx, uby, ubz, ubw;
1344             fetch_vector4(&inst->SrcReg[0], machine, a);
1345             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1346             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1347             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1348             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1349             ubx = IROUND(255.0F * a[0]);
1350             uby = IROUND(255.0F * a[1]);
1351             ubz = IROUND(255.0F * a[2]);
1352             ubw = IROUND(255.0F * a[3]);
1353             result[0] =
1354             result[1] =
1355             result[2] =
1356             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1357             store_vector4ui(inst, machine, result);
1358          }
1359          break;
1360       case OPCODE_POW:
1361          {
1362             GLfloat a[4], b[4], result[4];
1363             fetch_vector1(&inst->SrcReg[0], machine, a);
1364             fetch_vector1(&inst->SrcReg[1], machine, b);
1365             result[0] = result[1] = result[2] = result[3]
1366                = (GLfloat) pow(a[0], b[0]);
1367             store_vector4(inst, machine, result);
1368          }
1369          break;
1370       case OPCODE_RCC:  /* clamped riciprocal */
1371          {
1372             const float largest = 1.884467e+19, smallest = 5.42101e-20;
1373             GLfloat a[4], r, result[4];
1374             fetch_vector1(&inst->SrcReg[0], machine, a);
1375             if (DEBUG_PROG) {
1376                if (a[0] == 0)
1377                   printf("RCC(0)\n");
1378                else if (IS_INF_OR_NAN(a[0]))
1379                   printf("RCC(inf)\n");
1380             }
1381             if (a[0] == 1.0F) {
1382                r = 1.0F;
1383             }
1384             else {
1385                r = 1.0F / a[0];
1386             }
1387             if (positive(r)) {
1388                if (r > largest) {
1389                   r = largest;
1390                }
1391                else if (r < smallest) {
1392                   r = smallest;
1393                }
1394             }
1395             else {
1396                if (r < -largest) {
1397                   r = -largest;
1398                }
1399                else if (r > -smallest) {
1400                   r = -smallest;
1401                }
1402             }
1403             result[0] = result[1] = result[2] = result[3] = r;
1404             store_vector4(inst, machine, result);
1405          }
1406          break;
1407
1408       case OPCODE_RCP:
1409          {
1410             GLfloat a[4], result[4];
1411             fetch_vector1(&inst->SrcReg[0], machine, a);
1412             if (DEBUG_PROG) {
1413                if (a[0] == 0)
1414                   printf("RCP(0)\n");
1415                else if (IS_INF_OR_NAN(a[0]))
1416                   printf("RCP(inf)\n");
1417             }
1418             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1419             store_vector4(inst, machine, result);
1420          }
1421          break;
1422       case OPCODE_RET:         /* return from subroutine (conditional) */
1423          if (eval_condition(machine, inst)) {
1424             if (machine->StackDepth == 0) {
1425                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1426             }
1427             /* subtract one because of pc++ in the for loop */
1428             pc = machine->CallStack[--machine->StackDepth] - 1;
1429          }
1430          break;
1431       case OPCODE_RFL:         /* reflection vector */
1432          {
1433             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1434             fetch_vector4(&inst->SrcReg[0], machine, axis);
1435             fetch_vector4(&inst->SrcReg[1], machine, dir);
1436             tmpW = DOT3(axis, axis);
1437             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1438             result[0] = tmpX * axis[0] - dir[0];
1439             result[1] = tmpX * axis[1] - dir[1];
1440             result[2] = tmpX * axis[2] - dir[2];
1441             /* result[3] is never written! XXX enforce in parser! */
1442             store_vector4(inst, machine, result);
1443          }
1444          break;
1445       case OPCODE_RSQ:         /* 1 / sqrt() */
1446          {
1447             GLfloat a[4], result[4];
1448             fetch_vector1(&inst->SrcReg[0], machine, a);
1449             a[0] = FABSF(a[0]);
1450             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1451             store_vector4(inst, machine, result);
1452             if (DEBUG_PROG) {
1453                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1454             }
1455          }
1456          break;
1457       case OPCODE_SCS:         /* sine and cos */
1458          {
1459             GLfloat a[4], result[4];
1460             fetch_vector1(&inst->SrcReg[0], machine, a);
1461             result[0] = (GLfloat) cos(a[0]);
1462             result[1] = (GLfloat) sin(a[0]);
1463             result[2] = 0.0;    /* undefined! */
1464             result[3] = 0.0;    /* undefined! */
1465             store_vector4(inst, machine, result);
1466          }
1467          break;
1468       case OPCODE_SEQ:         /* set on equal */
1469          {
1470             GLfloat a[4], b[4], result[4];
1471             fetch_vector4(&inst->SrcReg[0], machine, a);
1472             fetch_vector4(&inst->SrcReg[1], machine, b);
1473             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1474             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1475             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1476             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1477             store_vector4(inst, machine, result);
1478             if (DEBUG_PROG) {
1479                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1480                       result[0], result[1], result[2], result[3],
1481                       a[0], a[1], a[2], a[3],
1482                       b[0], b[1], b[2], b[3]);
1483             }
1484          }
1485          break;
1486       case OPCODE_SFL:         /* set false, operands ignored */
1487          {
1488             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1489             store_vector4(inst, machine, result);
1490          }
1491          break;
1492       case OPCODE_SGE:         /* set on greater or equal */
1493          {
1494             GLfloat a[4], b[4], result[4];
1495             fetch_vector4(&inst->SrcReg[0], machine, a);
1496             fetch_vector4(&inst->SrcReg[1], machine, b);
1497             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1498             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1499             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1500             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1501             store_vector4(inst, machine, result);
1502             if (DEBUG_PROG) {
1503                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1504                       result[0], result[1], result[2], result[3],
1505                       a[0], a[1], a[2], a[3],
1506                       b[0], b[1], b[2], b[3]);
1507             }
1508          }
1509          break;
1510       case OPCODE_SGT:         /* set on greater */
1511          {
1512             GLfloat a[4], b[4], result[4];
1513             fetch_vector4(&inst->SrcReg[0], machine, a);
1514             fetch_vector4(&inst->SrcReg[1], machine, b);
1515             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1516             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1517             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1518             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1519             store_vector4(inst, machine, result);
1520             if (DEBUG_PROG) {
1521                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1522                       result[0], result[1], result[2], result[3],
1523                       a[0], a[1], a[2], a[3],
1524                       b[0], b[1], b[2], b[3]);
1525             }
1526          }
1527          break;
1528       case OPCODE_SIN:
1529          {
1530             GLfloat a[4], result[4];
1531             fetch_vector1(&inst->SrcReg[0], machine, a);
1532             result[0] = result[1] = result[2] = result[3]
1533                = (GLfloat) sin(a[0]);
1534             store_vector4(inst, machine, result);
1535          }
1536          break;
1537       case OPCODE_SLE:         /* set on less or equal */
1538          {
1539             GLfloat a[4], b[4], result[4];
1540             fetch_vector4(&inst->SrcReg[0], machine, a);
1541             fetch_vector4(&inst->SrcReg[1], machine, b);
1542             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1543             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1544             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1545             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1546             store_vector4(inst, machine, result);
1547             if (DEBUG_PROG) {
1548                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1549                       result[0], result[1], result[2], result[3],
1550                       a[0], a[1], a[2], a[3],
1551                       b[0], b[1], b[2], b[3]);
1552             }
1553          }
1554          break;
1555       case OPCODE_SLT:         /* set on less */
1556          {
1557             GLfloat a[4], b[4], result[4];
1558             fetch_vector4(&inst->SrcReg[0], machine, a);
1559             fetch_vector4(&inst->SrcReg[1], machine, b);
1560             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1561             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1562             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1563             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1564             store_vector4(inst, machine, result);
1565             if (DEBUG_PROG) {
1566                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1567                       result[0], result[1], result[2], result[3],
1568                       a[0], a[1], a[2], a[3],
1569                       b[0], b[1], b[2], b[3]);
1570             }
1571          }
1572          break;
1573       case OPCODE_SNE:         /* set on not equal */
1574          {
1575             GLfloat a[4], b[4], result[4];
1576             fetch_vector4(&inst->SrcReg[0], machine, a);
1577             fetch_vector4(&inst->SrcReg[1], machine, b);
1578             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1579             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1580             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1581             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1582             store_vector4(inst, machine, result);
1583             if (DEBUG_PROG) {
1584                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1585                       result[0], result[1], result[2], result[3],
1586                       a[0], a[1], a[2], a[3],
1587                       b[0], b[1], b[2], b[3]);
1588             }
1589          }
1590          break;
1591       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1592          {
1593             GLfloat a[4], result[4];
1594             fetch_vector4(&inst->SrcReg[0], machine, a);
1595             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1596             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1597             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1598             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1599             store_vector4(inst, machine, result);
1600          }
1601          break;
1602       case OPCODE_STR:         /* set true, operands ignored */
1603          {
1604             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1605             store_vector4(inst, machine, result);
1606          }
1607          break;
1608       case OPCODE_SUB:
1609          {
1610             GLfloat a[4], b[4], result[4];
1611             fetch_vector4(&inst->SrcReg[0], machine, a);
1612             fetch_vector4(&inst->SrcReg[1], machine, b);
1613             result[0] = a[0] - b[0];
1614             result[1] = a[1] - b[1];
1615             result[2] = a[2] - b[2];
1616             result[3] = a[3] - b[3];
1617             store_vector4(inst, machine, result);
1618             if (DEBUG_PROG) {
1619                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1620                       result[0], result[1], result[2], result[3],
1621                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1622             }
1623          }
1624          break;
1625       case OPCODE_SWZ:         /* extended swizzle */
1626          {
1627             const struct prog_src_register *source = &inst->SrcReg[0];
1628             const GLfloat *src = get_src_register_pointer(source, machine);
1629             GLfloat result[4];
1630             GLuint i;
1631             for (i = 0; i < 4; i++) {
1632                const GLuint swz = GET_SWZ(source->Swizzle, i);
1633                if (swz == SWIZZLE_ZERO)
1634                   result[i] = 0.0;
1635                else if (swz == SWIZZLE_ONE)
1636                   result[i] = 1.0;
1637                else {
1638                   ASSERT(swz >= 0);
1639                   ASSERT(swz <= 3);
1640                   result[i] = src[swz];
1641                }
1642                if (source->Negate & (1 << i))
1643                   result[i] = -result[i];
1644             }
1645             store_vector4(inst, machine, result);
1646          }
1647          break;
1648       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1649          /* Simple texel lookup */
1650          {
1651             GLfloat texcoord[4], color[4];
1652             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1653
1654             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1655
1656             if (DEBUG_PROG) {
1657                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1658                       color[0], color[1], color[2], color[3],
1659                       inst->TexSrcUnit,
1660                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1661             }
1662             store_vector4(inst, machine, color);
1663          }
1664          break;
1665       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1666          /* Texel lookup with LOD bias */
1667          {
1668             GLfloat texcoord[4], color[4], lodBias;
1669
1670             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1671
1672             /* texcoord[3] is the bias to add to lambda */
1673             lodBias = texcoord[3];
1674
1675             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1676
1677             if (DEBUG_PROG) {
1678                printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1679                       "  bias %g\n",
1680                       color[0], color[1], color[2], color[3],
1681                       inst->TexSrcUnit,
1682                       texcoord[0],
1683                       texcoord[1],
1684                       texcoord[2],
1685                       texcoord[3],
1686                       lodBias);
1687             }
1688
1689             store_vector4(inst, machine, color);
1690          }
1691          break;
1692       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1693          /* Texture lookup w/ partial derivatives for LOD */
1694          {
1695             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1696             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1697             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1698             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1699             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1700                                      0.0, /* lodBias */
1701                                      inst->TexSrcUnit, color);
1702             store_vector4(inst, machine, color);
1703          }
1704          break;
1705       case OPCODE_TXL:
1706          /* Texel lookup with explicit LOD */
1707          {
1708             GLfloat texcoord[4], color[4], lod;
1709
1710             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1711
1712             /* texcoord[3] is the LOD */
1713             lod = texcoord[3];
1714
1715             machine->FetchTexelLod(ctx, texcoord, lod,
1716                                    machine->Samplers[inst->TexSrcUnit], color);
1717
1718             store_vector4(inst, machine, color);
1719          }
1720          break;
1721       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1722          /* Texture lookup w/ projective divide */
1723          {
1724             GLfloat texcoord[4], color[4];
1725
1726             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1727             /* Not so sure about this test - if texcoord[3] is
1728              * zero, we'd probably be fine except for an ASSERT in
1729              * IROUND_POS() which gets triggered by the inf values created.
1730              */
1731             if (texcoord[3] != 0.0) {
1732                texcoord[0] /= texcoord[3];
1733                texcoord[1] /= texcoord[3];
1734                texcoord[2] /= texcoord[3];
1735             }
1736
1737             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1738
1739             store_vector4(inst, machine, color);
1740          }
1741          break;
1742       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1743          /* Texture lookup w/ projective divide, as above, but do not
1744           * do the divide by w if sampling from a cube map.
1745           */
1746          {
1747             GLfloat texcoord[4], color[4];
1748
1749             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1750             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1751                 texcoord[3] != 0.0) {
1752                texcoord[0] /= texcoord[3];
1753                texcoord[1] /= texcoord[3];
1754                texcoord[2] /= texcoord[3];
1755             }
1756
1757             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1758
1759             store_vector4(inst, machine, color);
1760          }
1761          break;
1762       case OPCODE_TRUNC:       /* truncate toward zero */
1763          {
1764             GLfloat a[4], result[4];
1765             fetch_vector4(&inst->SrcReg[0], machine, a);
1766             result[0] = (GLfloat) (GLint) a[0];
1767             result[1] = (GLfloat) (GLint) a[1];
1768             result[2] = (GLfloat) (GLint) a[2];
1769             result[3] = (GLfloat) (GLint) a[3];
1770             store_vector4(inst, machine, result);
1771          }
1772          break;
1773       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1774          {
1775             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1776             GLfloat result[4];
1777             GLushort hx, hy;
1778             hx = raw & 0xffff;
1779             hy = raw >> 16;
1780             result[0] = result[2] = _mesa_half_to_float(hx);
1781             result[1] = result[3] = _mesa_half_to_float(hy);
1782             store_vector4(inst, machine, result);
1783          }
1784          break;
1785       case OPCODE_UP2US:       /* unpack two GLushorts */
1786          {
1787             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1788             GLfloat result[4];
1789             GLushort usx, usy;
1790             usx = raw & 0xffff;
1791             usy = raw >> 16;
1792             result[0] = result[2] = usx * (1.0f / 65535.0f);
1793             result[1] = result[3] = usy * (1.0f / 65535.0f);
1794             store_vector4(inst, machine, result);
1795          }
1796          break;
1797       case OPCODE_UP4B:        /* unpack four GLbytes */
1798          {
1799             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1800             GLfloat result[4];
1801             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1802             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1803             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1804             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1805             store_vector4(inst, machine, result);
1806          }
1807          break;
1808       case OPCODE_UP4UB:       /* unpack four GLubytes */
1809          {
1810             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1811             GLfloat result[4];
1812             result[0] = ((raw >> 0) & 0xff) / 255.0F;
1813             result[1] = ((raw >> 8) & 0xff) / 255.0F;
1814             result[2] = ((raw >> 16) & 0xff) / 255.0F;
1815             result[3] = ((raw >> 24) & 0xff) / 255.0F;
1816             store_vector4(inst, machine, result);
1817          }
1818          break;
1819       case OPCODE_XOR:         /* bitwise XOR */
1820          {
1821             GLuint a[4], b[4], result[4];
1822             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1823             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1824             result[0] = a[0] ^ b[0];
1825             result[1] = a[1] ^ b[1];
1826             result[2] = a[2] ^ b[2];
1827             result[3] = a[3] ^ b[3];
1828             store_vector4ui(inst, machine, result);
1829          }
1830          break;
1831       case OPCODE_XPD:         /* cross product */
1832          {
1833             GLfloat a[4], b[4], result[4];
1834             fetch_vector4(&inst->SrcReg[0], machine, a);
1835             fetch_vector4(&inst->SrcReg[1], machine, b);
1836             result[0] = a[1] * b[2] - a[2] * b[1];
1837             result[1] = a[2] * b[0] - a[0] * b[2];
1838             result[2] = a[0] * b[1] - a[1] * b[0];
1839             result[3] = 1.0;
1840             store_vector4(inst, machine, result);
1841             if (DEBUG_PROG) {
1842                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1843                       result[0], result[1], result[2], result[3],
1844                       a[0], a[1], a[2], b[0], b[1], b[2]);
1845             }
1846          }
1847          break;
1848       case OPCODE_X2D:         /* 2-D matrix transform */
1849          {
1850             GLfloat a[4], b[4], c[4], result[4];
1851             fetch_vector4(&inst->SrcReg[0], machine, a);
1852             fetch_vector4(&inst->SrcReg[1], machine, b);
1853             fetch_vector4(&inst->SrcReg[2], machine, c);
1854             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1855             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1856             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1857             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1858             store_vector4(inst, machine, result);
1859          }
1860          break;
1861       case OPCODE_PRINT:
1862          {
1863             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
1864                GLfloat a[4];
1865                fetch_vector4(&inst->SrcReg[0], machine, a);
1866                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1867                             a[0], a[1], a[2], a[3]);
1868             }
1869             else {
1870                printf("%s\n", (const char *) inst->Data);
1871             }
1872          }
1873          break;
1874       case OPCODE_END:
1875          return GL_TRUE;
1876       default:
1877          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1878                        inst->Opcode);
1879          return GL_TRUE;        /* return value doesn't matter */
1880       }
1881
1882       numExec++;
1883       if (numExec > maxExec) {
1884          static GLboolean reported = GL_FALSE;
1885          if (!reported) {
1886             _mesa_problem(ctx, "Infinite loop detected in fragment program");
1887             reported = GL_TRUE;
1888          }
1889          return GL_TRUE;
1890       }
1891
1892    } /* for pc */
1893
1894    return GL_TRUE;
1895 }