src/mesa/program/prog_execute.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  7.3
   4  *
   5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file prog_execute.c
  27  * Software interpreter for vertex/fragment programs.
  28  * \author Brian Paul
  29  */
  30
  31 /*
  32  * NOTE: we do everything in single-precision floating point; we don't
  33  * currently observe the single/half/fixed-precision qualifiers.
  34  *
  35  */
  36
  37
  38 #include "main/glheader.h"
  39 #include "main/colormac.h"
  40 #include "main/context.h"
  41 #include "main/macros.h"
  42 #include "prog_execute.h"
  43 #include "prog_instruction.h"
  44 #include "prog_parameter.h"
  45 #include "prog_print.h"
  46 #include "prog_noise.h"
  47
  48
  49 /* debug predicate */
  50 #define DEBUG_PROG 0
  51
  52
  53 /**
  54  * Set x to positive or negative infinity.
  55  */
  56 #if defined(USE_IEEE) || defined(_WIN32)
  57 #define SET_POS_INFINITY(x)                  \
  58    do {                                      \
  59          fi_type fi;                         \
  60          fi.i = 0x7F800000;                  \
  61          x = fi.f;                           \
  62    } while (0)
  63 #define SET_NEG_INFINITY(x)                  \
  64    do {                                      \
  65          fi_type fi;                         \
  66          fi.i = 0xFF800000;                  \
  67          x = fi.f;                           \
  68    } while (0)
  69 #elif defined(VMS)
  70 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
  71 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
  72 #else
  73 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
  74 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
  75 #endif
  76
  77 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
  78
  79
  80 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
  81
  82
  83
  84 /**
  85  * Return TRUE for +0 and other positive values, FALSE otherwise.
  86  * Used for RCC opcode.
  87  */
  88 static INLINE GLboolean
  89 positive(float x)
  90 {
  91    fi_type fi;
  92    fi.f = x;
  93    if (fi.i & 0x80000000)
  94       return GL_FALSE;
  95    return GL_TRUE;
  96 }
  97
  98
  99
 100 /**
 101  * Return a pointer to the 4-element float vector specified by the given
 102  * source register.
 103  */
 104 static INLINE const GLfloat *
 105 get_src_register_pointer(const struct prog_src_register *source,
 106                          const struct gl_program_machine *machine)
 107 {
 108    const struct gl_program *prog = machine->CurProgram;
 109    GLint reg = source->Index;
 110
 111    if (source->RelAddr) {
 112       /* add address register value to src index/offset */
 113       reg += machine->AddressReg[0][0];
 114       if (reg < 0) {
 115          return ZeroVec;
 116       }
 117    }
 118
 119    switch (source->File) {
 120    case PROGRAM_TEMPORARY:
 121       if (reg >= MAX_PROGRAM_TEMPS)
 122          return ZeroVec;
 123       return machine->Temporaries[reg];
 124
 125    case PROGRAM_INPUT:
 126       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
 127          if (reg >= VERT_ATTRIB_MAX)
 128             return ZeroVec;
 129          return machine->VertAttribs[reg];
 130       }
 131       else {
 132          if (reg >= FRAG_ATTRIB_MAX)
 133             return ZeroVec;
 134          return machine->Attribs[reg][machine->CurElement];
 135       }
 136
 137    case PROGRAM_OUTPUT:
 138       if (reg >= MAX_PROGRAM_OUTPUTS)
 139          return ZeroVec;
 140       return machine->Outputs[reg];
 141
 142    case PROGRAM_LOCAL_PARAM:
 143       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
 144          return ZeroVec;
 145       return machine->CurProgram->LocalParams[reg];
 146
 147    case PROGRAM_ENV_PARAM:
 148       if (reg >= MAX_PROGRAM_ENV_PARAMS)
 149          return ZeroVec;
 150       return machine->EnvParams[reg];
 151
 152    case PROGRAM_STATE_VAR:
 153       /* Fallthrough */
 154    case PROGRAM_CONSTANT:
 155       /* Fallthrough */
 156    case PROGRAM_UNIFORM:
 157       /* Fallthrough */
 158    case PROGRAM_NAMED_PARAM:
 159       if (reg >= (GLint) prog->Parameters->NumParameters)
 160          return ZeroVec;
 161       return prog->Parameters->ParameterValues[reg];
 162
 163    default:
 164       _mesa_problem(NULL,
 165          "Invalid src register file %d in get_src_register_pointer()",
 166          source->File);
 167       return NULL;
 168    }
 169 }
 170
 171
 172 /**
 173  * Return a pointer to the 4-element float vector specified by the given
 174  * destination register.
 175  */
 176 static INLINE GLfloat *
 177 get_dst_register_pointer(const struct prog_dst_register *dest,
 178                          struct gl_program_machine *machine)
 179 {
 180    static GLfloat dummyReg[4];
 181    GLint reg = dest->Index;
 182
 183    if (dest->RelAddr) {
 184       /* add address register value to src index/offset */
 185       reg += machine->AddressReg[0][0];
 186       if (reg < 0) {
 187          return dummyReg;
 188       }
 189    }
 190
 191    switch (dest->File) {
 192    case PROGRAM_TEMPORARY:
 193       if (reg >= MAX_PROGRAM_TEMPS)
 194          return dummyReg;
 195       return machine->Temporaries[reg];
 196
 197    case PROGRAM_OUTPUT:
 198       if (reg >= MAX_PROGRAM_OUTPUTS)
 199          return dummyReg;
 200       return machine->Outputs[reg];
 201
 202    case PROGRAM_WRITE_ONLY:
 203       return dummyReg;
 204
 205    default:
 206       _mesa_problem(NULL,
 207          "Invalid dest register file %d in get_dst_register_pointer()",
 208          dest->File);
 209       return NULL;
 210    }
 211 }
 212
 213
 214
 215 /**
 216  * Fetch a 4-element float vector from the given source register.
 217  * Apply swizzling and negating as needed.
 218  */
 219 static void
 220 fetch_vector4(const struct prog_src_register *source,
 221               const struct gl_program_machine *machine, GLfloat result[4])
 222 {
 223    const GLfloat *src = get_src_register_pointer(source, machine);
 224    ASSERT(src);
 225
 226    if (source->Swizzle == SWIZZLE_NOOP) {
 227       /* no swizzling */
 228       COPY_4V(result, src);
 229    }
 230    else {
 231       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 232       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 233       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 234       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 235       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 236       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 237       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 238       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 239    }
 240
 241    if (source->Abs) {
 242       result[0] = FABSF(result[0]);
 243       result[1] = FABSF(result[1]);
 244       result[2] = FABSF(result[2]);
 245       result[3] = FABSF(result[3]);
 246    }
 247    if (source->Negate) {
 248       ASSERT(source->Negate == NEGATE_XYZW);
 249       result[0] = -result[0];
 250       result[1] = -result[1];
 251       result[2] = -result[2];
 252       result[3] = -result[3];
 253    }
 254
 255 #ifdef NAN_CHECK
 256    assert(!IS_INF_OR_NAN(result[0]));
 257    assert(!IS_INF_OR_NAN(result[0]));
 258    assert(!IS_INF_OR_NAN(result[0]));
 259    assert(!IS_INF_OR_NAN(result[0]));
 260 #endif
 261 }
 262
 263
 264 /**
 265  * Fetch a 4-element uint vector from the given source register.
 266  * Apply swizzling but not negation/abs.
 267  */
 268 static void
 269 fetch_vector4ui(const struct prog_src_register *source,
 270                 const struct gl_program_machine *machine, GLuint result[4])
 271 {
 272    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 273    ASSERT(src);
 274
 275    if (source->Swizzle == SWIZZLE_NOOP) {
 276       /* no swizzling */
 277       COPY_4V(result, src);
 278    }
 279    else {
 280       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
 281       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
 282       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
 283       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
 284       result[0] = src[GET_SWZ(source->Swizzle, 0)];
 285       result[1] = src[GET_SWZ(source->Swizzle, 1)];
 286       result[2] = src[GET_SWZ(source->Swizzle, 2)];
 287       result[3] = src[GET_SWZ(source->Swizzle, 3)];
 288    }
 289
 290    /* Note: no Negate or Abs here */
 291 }
 292
 293
 294
 295 /**
 296  * Fetch the derivative with respect to X or Y for the given register.
 297  * XXX this currently only works for fragment program input attribs.
 298  */
 299 static void
 300 fetch_vector4_deriv(GLcontext * ctx,
 301                     const struct prog_src_register *source,
 302                     const struct gl_program_machine *machine,
 303                     char xOrY, GLfloat result[4])
 304 {
 305    if (source->File == PROGRAM_INPUT &&
 306        source->Index < (GLint) machine->NumDeriv) {
 307       const GLint col = machine->CurElement;
 308       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
 309       const GLfloat invQ = 1.0f / w;
 310       GLfloat deriv[4];
 311
 312       if (xOrY == 'X') {
 313          deriv[0] = machine->DerivX[source->Index][0] * invQ;
 314          deriv[1] = machine->DerivX[source->Index][1] * invQ;
 315          deriv[2] = machine->DerivX[source->Index][2] * invQ;
 316          deriv[3] = machine->DerivX[source->Index][3] * invQ;
 317       }
 318       else {
 319          deriv[0] = machine->DerivY[source->Index][0] * invQ;
 320          deriv[1] = machine->DerivY[source->Index][1] * invQ;
 321          deriv[2] = machine->DerivY[source->Index][2] * invQ;
 322          deriv[3] = machine->DerivY[source->Index][3] * invQ;
 323       }
 324
 325       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
 326       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
 327       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
 328       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
 329
 330       if (source->Abs) {
 331          result[0] = FABSF(result[0]);
 332          result[1] = FABSF(result[1]);
 333          result[2] = FABSF(result[2]);
 334          result[3] = FABSF(result[3]);
 335       }
 336       if (source->Negate) {
 337          ASSERT(source->Negate == NEGATE_XYZW);
 338          result[0] = -result[0];
 339          result[1] = -result[1];
 340          result[2] = -result[2];
 341          result[3] = -result[3];
 342       }
 343    }
 344    else {
 345       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
 346    }
 347 }
 348
 349
 350 /**
 351  * As above, but only return result[0] element.
 352  */
 353 static void
 354 fetch_vector1(const struct prog_src_register *source,
 355               const struct gl_program_machine *machine, GLfloat result[4])
 356 {
 357    const GLfloat *src = get_src_register_pointer(source, machine);
 358    ASSERT(src);
 359
 360    result[0] = src[GET_SWZ(source->Swizzle, 0)];
 361
 362    if (source->Abs) {
 363       result[0] = FABSF(result[0]);
 364    }
 365    if (source->Negate) {
 366       result[0] = -result[0];
 367    }
 368 }
 369
 370
 371 static GLuint
 372 fetch_vector1ui(const struct prog_src_register *source,
 373                 const struct gl_program_machine *machine)
 374 {
 375    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
 376    return src[GET_SWZ(source->Swizzle, 0)];
 377 }
 378
 379
 380 /**
 381  * Fetch texel from texture.  Use partial derivatives when possible.
 382  */
 383 static INLINE void
 384 fetch_texel(GLcontext *ctx,
 385             const struct gl_program_machine *machine,
 386             const struct prog_instruction *inst,
 387             const GLfloat texcoord[4], GLfloat lodBias,
 388             GLfloat color[4])
 389 {
 390    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
 391
 392    /* Note: we only have the right derivatives for fragment input attribs.
 393     */
 394    if (machine->NumDeriv > 0 &&
 395        inst->SrcReg[0].File == PROGRAM_INPUT &&
 396        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
 397       /* simple texture fetch for which we should have derivatives */
 398       GLuint attr = inst->SrcReg[0].Index;
 399       machine->FetchTexelDeriv(ctx, texcoord,
 400                                machine->DerivX[attr],
 401                                machine->DerivY[attr],
 402                                lodBias, unit, color);
 403    }
 404    else {
 405       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
 406    }
 407 }
 408
 409
 410 /**
 411  * Test value against zero and return GT, LT, EQ or UN if NaN.
 412  */
 413 static INLINE GLuint
 414 generate_cc(float value)
 415 {
 416    if (value != value)
 417       return COND_UN;           /* NaN */
 418    if (value > 0.0F)
 419       return COND_GT;
 420    if (value < 0.0F)
 421       return COND_LT;
 422    return COND_EQ;
 423 }
 424
 425
 426 /**
 427  * Test if the ccMaskRule is satisfied by the given condition code.
 428  * Used to mask destination writes according to the current condition code.
 429  */
 430 static INLINE GLboolean
 431 test_cc(GLuint condCode, GLuint ccMaskRule)
 432 {
 433    switch (ccMaskRule) {
 434    case COND_EQ: return (condCode == COND_EQ);
 435    case COND_NE: return (condCode != COND_EQ);
 436    case COND_LT: return (condCode == COND_LT);
 437    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 438    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 439    case COND_GT: return (condCode == COND_GT);
 440    case COND_TR: return GL_TRUE;
 441    case COND_FL: return GL_FALSE;
 442    default:      return GL_TRUE;
 443    }
 444 }
 445
 446
 447 /**
 448  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
 449  * or GL_FALSE to indicate result.
 450  */
 451 static INLINE GLboolean
 452 eval_condition(const struct gl_program_machine *machine,
 453                const struct prog_instruction *inst)
 454 {
 455    const GLuint swizzle = inst->DstReg.CondSwizzle;
 456    const GLuint condMask = inst->DstReg.CondMask;
 457    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
 458        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
 459        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
 460        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
 461       return GL_TRUE;
 462    }
 463    else {
 464       return GL_FALSE;
 465    }
 466 }
 467
 468
 469
 470 /**
 471  * Store 4 floats into a register.  Observe the instructions saturate and
 472  * set-condition-code flags.
 473  */
 474 static void
 475 store_vector4(const struct prog_instruction *inst,
 476               struct gl_program_machine *machine, const GLfloat value[4])
 477 {
 478    const struct prog_dst_register *dstReg = &(inst->DstReg);
 479    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
 480    GLuint writeMask = dstReg->WriteMask;
 481    GLfloat clampedValue[4];
 482    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
 483
 484 #if 0
 485    if (value[0] > 1.0e10 ||
 486        IS_INF_OR_NAN(value[0]) ||
 487        IS_INF_OR_NAN(value[1]) ||
 488        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
 489       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
 490 #endif
 491
 492    if (clamp) {
 493       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 494       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 495       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 496       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 497       value = clampedValue;
 498    }
 499
 500    if (dstReg->CondMask != COND_TR) {
 501       /* condition codes may turn off some writes */
 502       if (writeMask & WRITEMASK_X) {
 503          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 504                       dstReg->CondMask))
 505             writeMask &= ~WRITEMASK_X;
 506       }
 507       if (writeMask & WRITEMASK_Y) {
 508          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 509                       dstReg->CondMask))
 510             writeMask &= ~WRITEMASK_Y;
 511       }
 512       if (writeMask & WRITEMASK_Z) {
 513          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 514                       dstReg->CondMask))
 515             writeMask &= ~WRITEMASK_Z;
 516       }
 517       if (writeMask & WRITEMASK_W) {
 518          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 519                       dstReg->CondMask))
 520             writeMask &= ~WRITEMASK_W;
 521       }
 522    }
 523
 524 #ifdef NAN_CHECK
 525    assert(!IS_INF_OR_NAN(value[0]));
 526    assert(!IS_INF_OR_NAN(value[0]));
 527    assert(!IS_INF_OR_NAN(value[0]));
 528    assert(!IS_INF_OR_NAN(value[0]));
 529 #endif
 530
 531    if (writeMask & WRITEMASK_X)
 532       dst[0] = value[0];
 533    if (writeMask & WRITEMASK_Y)
 534       dst[1] = value[1];
 535    if (writeMask & WRITEMASK_Z)
 536       dst[2] = value[2];
 537    if (writeMask & WRITEMASK_W)
 538       dst[3] = value[3];
 539
 540    if (inst->CondUpdate) {
 541       if (writeMask & WRITEMASK_X)
 542          machine->CondCodes[0] = generate_cc(value[0]);
 543       if (writeMask & WRITEMASK_Y)
 544          machine->CondCodes[1] = generate_cc(value[1]);
 545       if (writeMask & WRITEMASK_Z)
 546          machine->CondCodes[2] = generate_cc(value[2]);
 547       if (writeMask & WRITEMASK_W)
 548          machine->CondCodes[3] = generate_cc(value[3]);
 549 #if DEBUG_PROG
 550       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 551              _mesa_condcode_string(machine->CondCodes[0]),
 552              _mesa_condcode_string(machine->CondCodes[1]),
 553              _mesa_condcode_string(machine->CondCodes[2]),
 554              _mesa_condcode_string(machine->CondCodes[3]));
 555 #endif
 556    }
 557 }
 558
 559
 560 /**
 561  * Store 4 uints into a register.  Observe the set-condition-code flags.
 562  */
 563 static void
 564 store_vector4ui(const struct prog_instruction *inst,
 565                 struct gl_program_machine *machine, const GLuint value[4])
 566 {
 567    const struct prog_dst_register *dstReg = &(inst->DstReg);
 568    GLuint writeMask = dstReg->WriteMask;
 569    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
 570
 571    if (dstReg->CondMask != COND_TR) {
 572       /* condition codes may turn off some writes */
 573       if (writeMask & WRITEMASK_X) {
 574          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
 575                       dstReg->CondMask))
 576             writeMask &= ~WRITEMASK_X;
 577       }
 578       if (writeMask & WRITEMASK_Y) {
 579          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
 580                       dstReg->CondMask))
 581             writeMask &= ~WRITEMASK_Y;
 582       }
 583       if (writeMask & WRITEMASK_Z) {
 584          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
 585                       dstReg->CondMask))
 586             writeMask &= ~WRITEMASK_Z;
 587       }
 588       if (writeMask & WRITEMASK_W) {
 589          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
 590                       dstReg->CondMask))
 591             writeMask &= ~WRITEMASK_W;
 592       }
 593    }
 594
 595    if (writeMask & WRITEMASK_X)
 596       dst[0] = value[0];
 597    if (writeMask & WRITEMASK_Y)
 598       dst[1] = value[1];
 599    if (writeMask & WRITEMASK_Z)
 600       dst[2] = value[2];
 601    if (writeMask & WRITEMASK_W)
 602       dst[3] = value[3];
 603
 604    if (inst->CondUpdate) {
 605       if (writeMask & WRITEMASK_X)
 606          machine->CondCodes[0] = generate_cc((float)value[0]);
 607       if (writeMask & WRITEMASK_Y)
 608          machine->CondCodes[1] = generate_cc((float)value[1]);
 609       if (writeMask & WRITEMASK_Z)
 610          machine->CondCodes[2] = generate_cc((float)value[2]);
 611       if (writeMask & WRITEMASK_W)
 612          machine->CondCodes[3] = generate_cc((float)value[3]);
 613 #if DEBUG_PROG
 614       printf("CondCodes=(%s,%s,%s,%s) for:\n",
 615              _mesa_condcode_string(machine->CondCodes[0]),
 616              _mesa_condcode_string(machine->CondCodes[1]),
 617              _mesa_condcode_string(machine->CondCodes[2]),
 618              _mesa_condcode_string(machine->CondCodes[3]));
 619 #endif
 620    }
 621 }
 622
 623
 624
 625 /**
 626  * Execute the given vertex/fragment program.
 627  *
 628  * \param ctx  rendering context
 629  * \param program  the program to execute
 630  * \param machine  machine state (must be initialized)
 631  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 632  */
 633 GLboolean
 634 _mesa_execute_program(GLcontext * ctx,
 635                       const struct gl_program *program,
 636                       struct gl_program_machine *machine)
 637 {
 638    const GLuint numInst = program->NumInstructions;
 639    const GLuint maxExec = 10000;
 640    GLuint pc, numExec = 0;
 641
 642    machine->CurProgram = program;
 643
 644    if (DEBUG_PROG) {
 645       printf("execute program %u --------------------\n", program->Id);
 646    }
 647
 648    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
 649       machine->EnvParams = ctx->VertexProgram.Parameters;
 650    }
 651    else {
 652       machine->EnvParams = ctx->FragmentProgram.Parameters;
 653    }
 654
 655    for (pc = 0; pc < numInst; pc++) {
 656       const struct prog_instruction *inst = program->Instructions + pc;
 657
 658       if (DEBUG_PROG) {
 659          _mesa_print_instruction(inst);
 660       }
 661
 662       switch (inst->Opcode) {
 663       case OPCODE_ABS:
 664          {
 665             GLfloat a[4], result[4];
 666             fetch_vector4(&inst->SrcReg[0], machine, a);
 667             result[0] = FABSF(a[0]);
 668             result[1] = FABSF(a[1]);
 669             result[2] = FABSF(a[2]);
 670             result[3] = FABSF(a[3]);
 671             store_vector4(inst, machine, result);
 672          }
 673          break;
 674       case OPCODE_ADD:
 675          {
 676             GLfloat a[4], b[4], result[4];
 677             fetch_vector4(&inst->SrcReg[0], machine, a);
 678             fetch_vector4(&inst->SrcReg[1], machine, b);
 679             result[0] = a[0] + b[0];
 680             result[1] = a[1] + b[1];
 681             result[2] = a[2] + b[2];
 682             result[3] = a[3] + b[3];
 683             store_vector4(inst, machine, result);
 684             if (DEBUG_PROG) {
 685                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
 686                       result[0], result[1], result[2], result[3],
 687                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
 688             }
 689          }
 690          break;
 691       case OPCODE_AND:     /* bitwise AND */
 692          {
 693             GLuint a[4], b[4], result[4];
 694             fetch_vector4ui(&inst->SrcReg[0], machine, a);
 695             fetch_vector4ui(&inst->SrcReg[1], machine, b);
 696             result[0] = a[0] & b[0];
 697             result[1] = a[1] & b[1];
 698             result[2] = a[2] & b[2];
 699             result[3] = a[3] & b[3];
 700             store_vector4ui(inst, machine, result);
 701          }
 702          break;
 703       case OPCODE_ARL:
 704          {
 705             GLfloat t[4];
 706             fetch_vector4(&inst->SrcReg[0], machine, t);
 707             machine->AddressReg[0][0] = IFLOOR(t[0]);
 708             if (DEBUG_PROG) {
 709                printf("ARL %d\n", machine->AddressReg[0][0]);
 710             }
 711          }
 712          break;
 713       case OPCODE_BGNLOOP:
 714          /* no-op */
 715          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 716                 == OPCODE_ENDLOOP);
 717          break;
 718       case OPCODE_ENDLOOP:
 719          /* subtract 1 here since pc is incremented by for(pc) loop */
 720          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 721                 == OPCODE_BGNLOOP);
 722          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
 723          break;
 724       case OPCODE_BGNSUB:      /* begin subroutine */
 725          break;
 726       case OPCODE_ENDSUB:      /* end subroutine */
 727          break;
 728       case OPCODE_BRA:         /* branch (conditional) */
 729          if (eval_condition(machine, inst)) {
 730             /* take branch */
 731             /* Subtract 1 here since we'll do pc++ below */
 732             pc = inst->BranchTarget - 1;
 733          }
 734          break;
 735       case OPCODE_BRK:         /* break out of loop (conditional) */
 736          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 737                 == OPCODE_ENDLOOP);
 738          if (eval_condition(machine, inst)) {
 739             /* break out of loop */
 740             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
 741             pc = inst->BranchTarget;
 742          }
 743          break;
 744       case OPCODE_CONT:        /* continue loop (conditional) */
 745          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 746                 == OPCODE_ENDLOOP);
 747          if (eval_condition(machine, inst)) {
 748             /* continue at ENDLOOP */
 749             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 750             pc = inst->BranchTarget - 1;
 751          }
 752          break;
 753       case OPCODE_CAL:         /* Call subroutine (conditional) */
 754          if (eval_condition(machine, inst)) {
 755             /* call the subroutine */
 756             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
 757                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
 758             }
 759             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
 760             /* Subtract 1 here since we'll do pc++ at end of for-loop */
 761             pc = inst->BranchTarget - 1;
 762          }
 763          break;
 764       case OPCODE_CMP:
 765          {
 766             GLfloat a[4], b[4], c[4], result[4];
 767             fetch_vector4(&inst->SrcReg[0], machine, a);
 768             fetch_vector4(&inst->SrcReg[1], machine, b);
 769             fetch_vector4(&inst->SrcReg[2], machine, c);
 770             result[0] = a[0] < 0.0F ? b[0] : c[0];
 771             result[1] = a[1] < 0.0F ? b[1] : c[1];
 772             result[2] = a[2] < 0.0F ? b[2] : c[2];
 773             result[3] = a[3] < 0.0F ? b[3] : c[3];
 774             store_vector4(inst, machine, result);
 775          }
 776          break;
 777       case OPCODE_COS:
 778          {
 779             GLfloat a[4], result[4];
 780             fetch_vector1(&inst->SrcReg[0], machine, a);
 781             result[0] = result[1] = result[2] = result[3]
 782                = (GLfloat) cos(a[0]);
 783             store_vector4(inst, machine, result);
 784          }
 785          break;
 786       case OPCODE_DDX:         /* Partial derivative with respect to X */
 787          {
 788             GLfloat result[4];
 789             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 790                                 'X', result);
 791             store_vector4(inst, machine, result);
 792          }
 793          break;
 794       case OPCODE_DDY:         /* Partial derivative with respect to Y */
 795          {
 796             GLfloat result[4];
 797             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
 798                                 'Y', result);
 799             store_vector4(inst, machine, result);
 800          }
 801          break;
 802       case OPCODE_DP2:
 803          {
 804             GLfloat a[4], b[4], result[4];
 805             fetch_vector4(&inst->SrcReg[0], machine, a);
 806             fetch_vector4(&inst->SrcReg[1], machine, b);
 807             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
 808             store_vector4(inst, machine, result);
 809             if (DEBUG_PROG) {
 810                printf("DP2 %g = (%g %g) . (%g %g)\n",
 811                       result[0], a[0], a[1], b[0], b[1]);
 812             }
 813          }
 814          break;
 815       case OPCODE_DP2A:
 816          {
 817             GLfloat a[4], b[4], c, result[4];
 818             fetch_vector4(&inst->SrcReg[0], machine, a);
 819             fetch_vector4(&inst->SrcReg[1], machine, b);
 820             fetch_vector1(&inst->SrcReg[1], machine, &c);
 821             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
 822             store_vector4(inst, machine, result);
 823             if (DEBUG_PROG) {
 824                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
 825                       result[0], a[0], a[1], b[0], b[1], c);
 826             }
 827          }
 828          break;
 829       case OPCODE_DP3:
 830          {
 831             GLfloat a[4], b[4], result[4];
 832             fetch_vector4(&inst->SrcReg[0], machine, a);
 833             fetch_vector4(&inst->SrcReg[1], machine, b);
 834             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
 835             store_vector4(inst, machine, result);
 836             if (DEBUG_PROG) {
 837                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
 838                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
 839             }
 840          }
 841          break;
 842       case OPCODE_DP4:
 843          {
 844             GLfloat a[4], b[4], result[4];
 845             fetch_vector4(&inst->SrcReg[0], machine, a);
 846             fetch_vector4(&inst->SrcReg[1], machine, b);
 847             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
 848             store_vector4(inst, machine, result);
 849             if (DEBUG_PROG) {
 850                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
 851                       result[0], a[0], a[1], a[2], a[3],
 852                       b[0], b[1], b[2], b[3]);
 853             }
 854          }
 855          break;
 856       case OPCODE_DPH:
 857          {
 858             GLfloat a[4], b[4], result[4];
 859             fetch_vector4(&inst->SrcReg[0], machine, a);
 860             fetch_vector4(&inst->SrcReg[1], machine, b);
 861             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
 862             store_vector4(inst, machine, result);
 863          }
 864          break;
 865       case OPCODE_DST:         /* Distance vector */
 866          {
 867             GLfloat a[4], b[4], result[4];
 868             fetch_vector4(&inst->SrcReg[0], machine, a);
 869             fetch_vector4(&inst->SrcReg[1], machine, b);
 870             result[0] = 1.0F;
 871             result[1] = a[1] * b[1];
 872             result[2] = a[2];
 873             result[3] = b[3];
 874             store_vector4(inst, machine, result);
 875          }
 876          break;
 877       case OPCODE_EXP:
 878          {
 879             GLfloat t[4], q[4], floor_t0;
 880             fetch_vector1(&inst->SrcReg[0], machine, t);
 881             floor_t0 = FLOORF(t[0]);
 882             if (floor_t0 > FLT_MAX_EXP) {
 883                SET_POS_INFINITY(q[0]);
 884                SET_POS_INFINITY(q[2]);
 885             }
 886             else if (floor_t0 < FLT_MIN_EXP) {
 887                q[0] = 0.0F;
 888                q[2] = 0.0F;
 889             }
 890             else {
 891                q[0] = LDEXPF(1.0, (int) floor_t0);
 892                /* Note: GL_NV_vertex_program expects
 893                 * result.z = result.x * APPX(result.y)
 894                 * We do what the ARB extension says.
 895                 */
 896                q[2] = (GLfloat) pow(2.0, t[0]);
 897             }
 898             q[1] = t[0] - floor_t0;
 899             q[3] = 1.0F;
 900             store_vector4( inst, machine, q );
 901          }
 902          break;
 903       case OPCODE_EX2:         /* Exponential base 2 */
 904          {
 905             GLfloat a[4], result[4], val;
 906             fetch_vector1(&inst->SrcReg[0], machine, a);
 907             val = (GLfloat) pow(2.0, a[0]);
 908             /*
 909             if (IS_INF_OR_NAN(val))
 910                val = 1.0e10;
 911             */
 912             result[0] = result[1] = result[2] = result[3] = val;
 913             store_vector4(inst, machine, result);
 914          }
 915          break;
 916       case OPCODE_FLR:
 917          {
 918             GLfloat a[4], result[4];
 919             fetch_vector4(&inst->SrcReg[0], machine, a);
 920             result[0] = FLOORF(a[0]);
 921             result[1] = FLOORF(a[1]);
 922             result[2] = FLOORF(a[2]);
 923             result[3] = FLOORF(a[3]);
 924             store_vector4(inst, machine, result);
 925          }
 926          break;
 927       case OPCODE_FRC:
 928          {
 929             GLfloat a[4], result[4];
 930             fetch_vector4(&inst->SrcReg[0], machine, a);
 931             result[0] = a[0] - FLOORF(a[0]);
 932             result[1] = a[1] - FLOORF(a[1]);
 933             result[2] = a[2] - FLOORF(a[2]);
 934             result[3] = a[3] - FLOORF(a[3]);
 935             store_vector4(inst, machine, result);
 936          }
 937          break;
 938       case OPCODE_IF:
 939          {
 940             GLboolean cond;
 941             ASSERT(program->Instructions[inst->BranchTarget].Opcode
 942                    == OPCODE_ELSE ||
 943                    program->Instructions[inst->BranchTarget].Opcode
 944                    == OPCODE_ENDIF);
 945             /* eval condition */
 946             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
 947                GLfloat a[4];
 948                fetch_vector1(&inst->SrcReg[0], machine, a);
 949                cond = (a[0] != 0.0);
 950             }
 951             else {
 952                cond = eval_condition(machine, inst);
 953             }
 954             if (DEBUG_PROG) {
 955                printf("IF: %d\n", cond);
 956             }
 957             /* do if/else */
 958             if (cond) {
 959                /* do if-clause (just continue execution) */
 960             }
 961             else {
 962                /* go to the instruction after ELSE or ENDIF */
 963                assert(inst->BranchTarget >= 0);
 964                pc = inst->BranchTarget;
 965             }
 966          }
 967          break;
 968       case OPCODE_ELSE:
 969          /* goto ENDIF */
 970          ASSERT(program->Instructions[inst->BranchTarget].Opcode
 971                 == OPCODE_ENDIF);
 972          assert(inst->BranchTarget >= 0);
 973          pc = inst->BranchTarget;
 974          break;
 975       case OPCODE_ENDIF:
 976          /* nothing */
 977          break;
 978       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
 979          if (eval_condition(machine, inst)) {
 980             return GL_FALSE;
 981          }
 982          break;
 983       case OPCODE_KIL:         /* ARB_f_p only */
 984          {
 985             GLfloat a[4];
 986             fetch_vector4(&inst->SrcReg[0], machine, a);
 987             if (DEBUG_PROG) {
 988                printf("KIL if (%g %g %g %g) <= 0.0\n",
 989                       a[0], a[1], a[2], a[3]);
 990             }
 991
 992             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
 993                return GL_FALSE;
 994             }
 995          }
 996          break;
 997       case OPCODE_LG2:         /* log base 2 */
 998          {
 999             GLfloat a[4], result[4], val;
1000             fetch_vector1(&inst->SrcReg[0], machine, a);
1001             /* The fast LOG2 macro doesn't meet the precision requirements.
1002              */
1003             if (a[0] == 0.0F) {
1004                val = -FLT_MAX;
1005             }
1006             else {
1007                val = (float)(log(a[0]) * 1.442695F);
1008             }
1009             result[0] = result[1] = result[2] = result[3] = val;
1010             store_vector4(inst, machine, result);
1011          }
1012          break;
1013       case OPCODE_LIT:
1014          {
1015             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
1016             GLfloat a[4], result[4];
1017             fetch_vector4(&inst->SrcReg[0], machine, a);
1018             a[0] = MAX2(a[0], 0.0F);
1019             a[1] = MAX2(a[1], 0.0F);
1020             /* XXX ARB version clamps a[3], NV version doesn't */
1021             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1022             result[0] = 1.0F;
1023             result[1] = a[0];
1024             /* XXX we could probably just use pow() here */
1025             if (a[0] > 0.0F) {
1026                if (a[1] == 0.0 && a[3] == 0.0)
1027                   result[2] = 1.0F;
1028                else
1029                   result[2] = (GLfloat) pow(a[1], a[3]);
1030             }
1031             else {
1032                result[2] = 0.0F;
1033             }
1034             result[3] = 1.0F;
1035             store_vector4(inst, machine, result);
1036             if (DEBUG_PROG) {
1037                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1038                       result[0], result[1], result[2], result[3],
1039                       a[0], a[1], a[2], a[3]);
1040             }
1041          }
1042          break;
1043       case OPCODE_LOG:
1044          {
1045             GLfloat t[4], q[4], abs_t0;
1046             fetch_vector1(&inst->SrcReg[0], machine, t);
1047             abs_t0 = FABSF(t[0]);
1048             if (abs_t0 != 0.0F) {
1049                /* Since we really can't handle infinite values on VMS
1050                 * like other OSes we'll use __MAXFLOAT to represent
1051                 * infinity.  This may need some tweaking.
1052                 */
1053 #ifdef VMS
1054                if (abs_t0 == __MAXFLOAT)
1055 #else
1056                if (IS_INF_OR_NAN(abs_t0))
1057 #endif
1058                {
1059                   SET_POS_INFINITY(q[0]);
1060                   q[1] = 1.0F;
1061                   SET_POS_INFINITY(q[2]);
1062                }
1063                else {
1064                   int exponent;
1065                   GLfloat mantissa = FREXPF(t[0], &exponent);
1066                   q[0] = (GLfloat) (exponent - 1);
1067                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1068
1069                   /* The fast LOG2 macro doesn't meet the precision
1070                    * requirements.
1071                    */
1072                   q[2] = (float)(log(t[0]) * 1.442695F);
1073                }
1074             }
1075             else {
1076                SET_NEG_INFINITY(q[0]);
1077                q[1] = 1.0F;
1078                SET_NEG_INFINITY(q[2]);
1079             }
1080             q[3] = 1.0;
1081             store_vector4(inst, machine, q);
1082          }
1083          break;
1084       case OPCODE_LRP:
1085          {
1086             GLfloat a[4], b[4], c[4], result[4];
1087             fetch_vector4(&inst->SrcReg[0], machine, a);
1088             fetch_vector4(&inst->SrcReg[1], machine, b);
1089             fetch_vector4(&inst->SrcReg[2], machine, c);
1090             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1091             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1092             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1093             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1094             store_vector4(inst, machine, result);
1095             if (DEBUG_PROG) {
1096                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1097                       "(%g %g %g %g), (%g %g %g %g)\n",
1098                       result[0], result[1], result[2], result[3],
1099                       a[0], a[1], a[2], a[3],
1100                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1101             }
1102          }
1103          break;
1104       case OPCODE_MAD:
1105          {
1106             GLfloat a[4], b[4], c[4], result[4];
1107             fetch_vector4(&inst->SrcReg[0], machine, a);
1108             fetch_vector4(&inst->SrcReg[1], machine, b);
1109             fetch_vector4(&inst->SrcReg[2], machine, c);
1110             result[0] = a[0] * b[0] + c[0];
1111             result[1] = a[1] * b[1] + c[1];
1112             result[2] = a[2] * b[2] + c[2];
1113             result[3] = a[3] * b[3] + c[3];
1114             store_vector4(inst, machine, result);
1115             if (DEBUG_PROG) {
1116                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1117                       "(%g %g %g %g) + (%g %g %g %g)\n",
1118                       result[0], result[1], result[2], result[3],
1119                       a[0], a[1], a[2], a[3],
1120                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1121             }
1122          }
1123          break;
1124       case OPCODE_MAX:
1125          {
1126             GLfloat a[4], b[4], result[4];
1127             fetch_vector4(&inst->SrcReg[0], machine, a);
1128             fetch_vector4(&inst->SrcReg[1], machine, b);
1129             result[0] = MAX2(a[0], b[0]);
1130             result[1] = MAX2(a[1], b[1]);
1131             result[2] = MAX2(a[2], b[2]);
1132             result[3] = MAX2(a[3], b[3]);
1133             store_vector4(inst, machine, result);
1134             if (DEBUG_PROG) {
1135                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1136                       result[0], result[1], result[2], result[3],
1137                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1138             }
1139          }
1140          break;
1141       case OPCODE_MIN:
1142          {
1143             GLfloat a[4], b[4], result[4];
1144             fetch_vector4(&inst->SrcReg[0], machine, a);
1145             fetch_vector4(&inst->SrcReg[1], machine, b);
1146             result[0] = MIN2(a[0], b[0]);
1147             result[1] = MIN2(a[1], b[1]);
1148             result[2] = MIN2(a[2], b[2]);
1149             result[3] = MIN2(a[3], b[3]);
1150             store_vector4(inst, machine, result);
1151          }
1152          break;
1153       case OPCODE_MOV:
1154          {
1155             GLfloat result[4];
1156             fetch_vector4(&inst->SrcReg[0], machine, result);
1157             store_vector4(inst, machine, result);
1158             if (DEBUG_PROG) {
1159                printf("MOV (%g %g %g %g)\n",
1160                       result[0], result[1], result[2], result[3]);
1161             }
1162          }
1163          break;
1164       case OPCODE_MUL:
1165          {
1166             GLfloat a[4], b[4], result[4];
1167             fetch_vector4(&inst->SrcReg[0], machine, a);
1168             fetch_vector4(&inst->SrcReg[1], machine, b);
1169             result[0] = a[0] * b[0];
1170             result[1] = a[1] * b[1];
1171             result[2] = a[2] * b[2];
1172             result[3] = a[3] * b[3];
1173             store_vector4(inst, machine, result);
1174             if (DEBUG_PROG) {
1175                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1176                       result[0], result[1], result[2], result[3],
1177                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1178             }
1179          }
1180          break;
1181       case OPCODE_NOISE1:
1182          {
1183             GLfloat a[4], result[4];
1184             fetch_vector1(&inst->SrcReg[0], machine, a);
1185             result[0] =
1186                result[1] =
1187                result[2] =
1188                result[3] = _mesa_noise1(a[0]);
1189             store_vector4(inst, machine, result);
1190          }
1191          break;
1192       case OPCODE_NOISE2:
1193          {
1194             GLfloat a[4], result[4];
1195             fetch_vector4(&inst->SrcReg[0], machine, a);
1196             result[0] =
1197                result[1] =
1198                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1199             store_vector4(inst, machine, result);
1200          }
1201          break;
1202       case OPCODE_NOISE3:
1203          {
1204             GLfloat a[4], result[4];
1205             fetch_vector4(&inst->SrcReg[0], machine, a);
1206             result[0] =
1207                result[1] =
1208                result[2] =
1209                result[3] = _mesa_noise3(a[0], a[1], a[2]);
1210             store_vector4(inst, machine, result);
1211          }
1212          break;
1213       case OPCODE_NOISE4:
1214          {
1215             GLfloat a[4], result[4];
1216             fetch_vector4(&inst->SrcReg[0], machine, a);
1217             result[0] =
1218                result[1] =
1219                result[2] =
1220                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1221             store_vector4(inst, machine, result);
1222          }
1223          break;
1224       case OPCODE_NOP:
1225          break;
1226       case OPCODE_NOT:         /* bitwise NOT */
1227          {
1228             GLuint a[4], result[4];
1229             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1230             result[0] = ~a[0];
1231             result[1] = ~a[1];
1232             result[2] = ~a[2];
1233             result[3] = ~a[3];
1234             store_vector4ui(inst, machine, result);
1235          }
1236          break;
1237       case OPCODE_NRM3:        /* 3-component normalization */
1238          {
1239             GLfloat a[4], result[4];
1240             GLfloat tmp;
1241             fetch_vector4(&inst->SrcReg[0], machine, a);
1242             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1243             if (tmp != 0.0F)
1244                tmp = INV_SQRTF(tmp);
1245             result[0] = tmp * a[0];
1246             result[1] = tmp * a[1];
1247             result[2] = tmp * a[2];
1248             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1249             store_vector4(inst, machine, result);
1250          }
1251          break;
1252       case OPCODE_NRM4:        /* 4-component normalization */
1253          {
1254             GLfloat a[4], result[4];
1255             GLfloat tmp;
1256             fetch_vector4(&inst->SrcReg[0], machine, a);
1257             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1258             if (tmp != 0.0F)
1259                tmp = INV_SQRTF(tmp);
1260             result[0] = tmp * a[0];
1261             result[1] = tmp * a[1];
1262             result[2] = tmp * a[2];
1263             result[3] = tmp * a[3];
1264             store_vector4(inst, machine, result);
1265          }
1266          break;
1267       case OPCODE_OR:          /* bitwise OR */
1268          {
1269             GLuint a[4], b[4], result[4];
1270             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1271             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1272             result[0] = a[0] | b[0];
1273             result[1] = a[1] | b[1];
1274             result[2] = a[2] | b[2];
1275             result[3] = a[3] | b[3];
1276             store_vector4ui(inst, machine, result);
1277          }
1278          break;
1279       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1280          {
1281             GLfloat a[4];
1282             GLuint result[4];
1283             GLhalfNV hx, hy;
1284             fetch_vector4(&inst->SrcReg[0], machine, a);
1285             hx = _mesa_float_to_half(a[0]);
1286             hy = _mesa_float_to_half(a[1]);
1287             result[0] =
1288             result[1] =
1289             result[2] =
1290             result[3] = hx | (hy << 16);
1291             store_vector4ui(inst, machine, result);
1292          }
1293          break;
1294       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1295          {
1296             GLfloat a[4];
1297             GLuint result[4], usx, usy;
1298             fetch_vector4(&inst->SrcReg[0], machine, a);
1299             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1300             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1301             usx = IROUND(a[0] * 65535.0F);
1302             usy = IROUND(a[1] * 65535.0F);
1303             result[0] =
1304             result[1] =
1305             result[2] =
1306             result[3] = usx | (usy << 16);
1307             store_vector4ui(inst, machine, result);
1308          }
1309          break;
1310       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1311          {
1312             GLfloat a[4];
1313             GLuint result[4], ubx, uby, ubz, ubw;
1314             fetch_vector4(&inst->SrcReg[0], machine, a);
1315             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1316             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1317             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1318             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1319             ubx = IROUND(127.0F * a[0] + 128.0F);
1320             uby = IROUND(127.0F * a[1] + 128.0F);
1321             ubz = IROUND(127.0F * a[2] + 128.0F);
1322             ubw = IROUND(127.0F * a[3] + 128.0F);
1323             result[0] =
1324             result[1] =
1325             result[2] =
1326             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1327             store_vector4ui(inst, machine, result);
1328          }
1329          break;
1330       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1331          {
1332             GLfloat a[4];
1333             GLuint result[4], ubx, uby, ubz, ubw;
1334             fetch_vector4(&inst->SrcReg[0], machine, a);
1335             a[0] = CLAMP(a[0], 0.0F, 1.0F);
1336             a[1] = CLAMP(a[1], 0.0F, 1.0F);
1337             a[2] = CLAMP(a[2], 0.0F, 1.0F);
1338             a[3] = CLAMP(a[3], 0.0F, 1.0F);
1339             ubx = IROUND(255.0F * a[0]);
1340             uby = IROUND(255.0F * a[1]);
1341             ubz = IROUND(255.0F * a[2]);
1342             ubw = IROUND(255.0F * a[3]);
1343             result[0] =
1344             result[1] =
1345             result[2] =
1346             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1347             store_vector4ui(inst, machine, result);
1348          }
1349          break;
1350       case OPCODE_POW:
1351          {
1352             GLfloat a[4], b[4], result[4];
1353             fetch_vector1(&inst->SrcReg[0], machine, a);
1354             fetch_vector1(&inst->SrcReg[1], machine, b);
1355             result[0] = result[1] = result[2] = result[3]
1356                = (GLfloat) pow(a[0], b[0]);
1357             store_vector4(inst, machine, result);
1358          }
1359          break;
1360       case OPCODE_RCC:  /* clamped riciprocal */
1361          {
1362             const float largest = 1.884467e+19, smallest = 5.42101e-20;
1363             GLfloat a[4], r, result[4];
1364             fetch_vector1(&inst->SrcReg[0], machine, a);
1365             if (DEBUG_PROG) {
1366                if (a[0] == 0)
1367                   printf("RCC(0)\n");
1368                else if (IS_INF_OR_NAN(a[0]))
1369                   printf("RCC(inf)\n");
1370             }
1371             if (a[0] == 1.0F) {
1372                r = 1.0F;
1373             }
1374             else {
1375                r = 1.0F / a[0];
1376             }
1377             if (positive(r)) {
1378                if (r > largest) {
1379                   r = largest;
1380                }
1381                else if (r < smallest) {
1382                   r = smallest;
1383                }
1384             }
1385             else {
1386                if (r < -largest) {
1387                   r = -largest;
1388                }
1389                else if (r > -smallest) {
1390                   r = -smallest;
1391                }
1392             }
1393             result[0] = result[1] = result[2] = result[3] = r;
1394             store_vector4(inst, machine, result);
1395          }
1396          break;
1397
1398       case OPCODE_RCP:
1399          {
1400             GLfloat a[4], result[4];
1401             fetch_vector1(&inst->SrcReg[0], machine, a);
1402             if (DEBUG_PROG) {
1403                if (a[0] == 0)
1404                   printf("RCP(0)\n");
1405                else if (IS_INF_OR_NAN(a[0]))
1406                   printf("RCP(inf)\n");
1407             }
1408             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1409             store_vector4(inst, machine, result);
1410          }
1411          break;
1412       case OPCODE_RET:         /* return from subroutine (conditional) */
1413          if (eval_condition(machine, inst)) {
1414             if (machine->StackDepth == 0) {
1415                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1416             }
1417             /* subtract one because of pc++ in the for loop */
1418             pc = machine->CallStack[--machine->StackDepth] - 1;
1419          }
1420          break;
1421       case OPCODE_RFL:         /* reflection vector */
1422          {
1423             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1424             fetch_vector4(&inst->SrcReg[0], machine, axis);
1425             fetch_vector4(&inst->SrcReg[1], machine, dir);
1426             tmpW = DOT3(axis, axis);
1427             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1428             result[0] = tmpX * axis[0] - dir[0];
1429             result[1] = tmpX * axis[1] - dir[1];
1430             result[2] = tmpX * axis[2] - dir[2];
1431             /* result[3] is never written! XXX enforce in parser! */
1432             store_vector4(inst, machine, result);
1433          }
1434          break;
1435       case OPCODE_RSQ:         /* 1 / sqrt() */
1436          {
1437             GLfloat a[4], result[4];
1438             fetch_vector1(&inst->SrcReg[0], machine, a);
1439             a[0] = FABSF(a[0]);
1440             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1441             store_vector4(inst, machine, result);
1442             if (DEBUG_PROG) {
1443                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1444             }
1445          }
1446          break;
1447       case OPCODE_SCS:         /* sine and cos */
1448          {
1449             GLfloat a[4], result[4];
1450             fetch_vector1(&inst->SrcReg[0], machine, a);
1451             result[0] = (GLfloat) cos(a[0]);
1452             result[1] = (GLfloat) sin(a[0]);
1453             result[2] = 0.0;    /* undefined! */
1454             result[3] = 0.0;    /* undefined! */
1455             store_vector4(inst, machine, result);
1456          }
1457          break;
1458       case OPCODE_SEQ:         /* set on equal */
1459          {
1460             GLfloat a[4], b[4], result[4];
1461             fetch_vector4(&inst->SrcReg[0], machine, a);
1462             fetch_vector4(&inst->SrcReg[1], machine, b);
1463             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1464             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1465             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1466             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1467             store_vector4(inst, machine, result);
1468             if (DEBUG_PROG) {
1469                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1470                       result[0], result[1], result[2], result[3],
1471                       a[0], a[1], a[2], a[3],
1472                       b[0], b[1], b[2], b[3]);
1473             }
1474          }
1475          break;
1476       case OPCODE_SFL:         /* set false, operands ignored */
1477          {
1478             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1479             store_vector4(inst, machine, result);
1480          }
1481          break;
1482       case OPCODE_SGE:         /* set on greater or equal */
1483          {
1484             GLfloat a[4], b[4], result[4];
1485             fetch_vector4(&inst->SrcReg[0], machine, a);
1486             fetch_vector4(&inst->SrcReg[1], machine, b);
1487             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1488             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1489             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1490             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1491             store_vector4(inst, machine, result);
1492             if (DEBUG_PROG) {
1493                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1494                       result[0], result[1], result[2], result[3],
1495                       a[0], a[1], a[2], a[3],
1496                       b[0], b[1], b[2], b[3]);
1497             }
1498          }
1499          break;
1500       case OPCODE_SGT:         /* set on greater */
1501          {
1502             GLfloat a[4], b[4], result[4];
1503             fetch_vector4(&inst->SrcReg[0], machine, a);
1504             fetch_vector4(&inst->SrcReg[1], machine, b);
1505             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1506             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1507             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1508             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1509             store_vector4(inst, machine, result);
1510             if (DEBUG_PROG) {
1511                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1512                       result[0], result[1], result[2], result[3],
1513                       a[0], a[1], a[2], a[3],
1514                       b[0], b[1], b[2], b[3]);
1515             }
1516          }
1517          break;
1518       case OPCODE_SIN:
1519          {
1520             GLfloat a[4], result[4];
1521             fetch_vector1(&inst->SrcReg[0], machine, a);
1522             result[0] = result[1] = result[2] = result[3]
1523                = (GLfloat) sin(a[0]);
1524             store_vector4(inst, machine, result);
1525          }
1526          break;
1527       case OPCODE_SLE:         /* set on less or equal */
1528          {
1529             GLfloat a[4], b[4], result[4];
1530             fetch_vector4(&inst->SrcReg[0], machine, a);
1531             fetch_vector4(&inst->SrcReg[1], machine, b);
1532             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1533             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1534             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1535             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1536             store_vector4(inst, machine, result);
1537             if (DEBUG_PROG) {
1538                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1539                       result[0], result[1], result[2], result[3],
1540                       a[0], a[1], a[2], a[3],
1541                       b[0], b[1], b[2], b[3]);
1542             }
1543          }
1544          break;
1545       case OPCODE_SLT:         /* set on less */
1546          {
1547             GLfloat a[4], b[4], result[4];
1548             fetch_vector4(&inst->SrcReg[0], machine, a);
1549             fetch_vector4(&inst->SrcReg[1], machine, b);
1550             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1551             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1552             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1553             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1554             store_vector4(inst, machine, result);
1555             if (DEBUG_PROG) {
1556                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1557                       result[0], result[1], result[2], result[3],
1558                       a[0], a[1], a[2], a[3],
1559                       b[0], b[1], b[2], b[3]);
1560             }
1561          }
1562          break;
1563       case OPCODE_SNE:         /* set on not equal */
1564          {
1565             GLfloat a[4], b[4], result[4];
1566             fetch_vector4(&inst->SrcReg[0], machine, a);
1567             fetch_vector4(&inst->SrcReg[1], machine, b);
1568             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1569             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1570             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1571             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1572             store_vector4(inst, machine, result);
1573             if (DEBUG_PROG) {
1574                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1575                       result[0], result[1], result[2], result[3],
1576                       a[0], a[1], a[2], a[3],
1577                       b[0], b[1], b[2], b[3]);
1578             }
1579          }
1580          break;
1581       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1582          {
1583             GLfloat a[4], result[4];
1584             fetch_vector4(&inst->SrcReg[0], machine, a);
1585             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1586             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1587             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1588             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1589             store_vector4(inst, machine, result);
1590          }
1591          break;
1592       case OPCODE_STR:         /* set true, operands ignored */
1593          {
1594             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1595             store_vector4(inst, machine, result);
1596          }
1597          break;
1598       case OPCODE_SUB:
1599          {
1600             GLfloat a[4], b[4], result[4];
1601             fetch_vector4(&inst->SrcReg[0], machine, a);
1602             fetch_vector4(&inst->SrcReg[1], machine, b);
1603             result[0] = a[0] - b[0];
1604             result[1] = a[1] - b[1];
1605             result[2] = a[2] - b[2];
1606             result[3] = a[3] - b[3];
1607             store_vector4(inst, machine, result);
1608             if (DEBUG_PROG) {
1609                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1610                       result[0], result[1], result[2], result[3],
1611                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1612             }
1613          }
1614          break;
1615       case OPCODE_SWZ:         /* extended swizzle */
1616          {
1617             const struct prog_src_register *source = &inst->SrcReg[0];
1618             const GLfloat *src = get_src_register_pointer(source, machine);
1619             GLfloat result[4];
1620             GLuint i;
1621             for (i = 0; i < 4; i++) {
1622                const GLuint swz = GET_SWZ(source->Swizzle, i);
1623                if (swz == SWIZZLE_ZERO)
1624                   result[i] = 0.0;
1625                else if (swz == SWIZZLE_ONE)
1626                   result[i] = 1.0;
1627                else {
1628                   ASSERT(swz >= 0);
1629                   ASSERT(swz <= 3);
1630                   result[i] = src[swz];
1631                }
1632                if (source->Negate & (1 << i))
1633                   result[i] = -result[i];
1634             }
1635             store_vector4(inst, machine, result);
1636          }
1637          break;
1638       case OPCODE_TEX:         /* Both ARB and NV frag prog */
1639          /* Simple texel lookup */
1640          {
1641             GLfloat texcoord[4], color[4];
1642             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1643
1644             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1645
1646             if (DEBUG_PROG) {
1647                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1648                       color[0], color[1], color[2], color[3],
1649                       inst->TexSrcUnit,
1650                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1651             }
1652             store_vector4(inst, machine, color);
1653          }
1654          break;
1655       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1656          /* Texel lookup with LOD bias */
1657          {
1658             GLfloat texcoord[4], color[4], lodBias;
1659
1660             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1661
1662             /* texcoord[3] is the bias to add to lambda */
1663             lodBias = texcoord[3];
1664
1665             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1666
1667             store_vector4(inst, machine, color);
1668          }
1669          break;
1670       case OPCODE_TXD:         /* GL_NV_fragment_program only */
1671          /* Texture lookup w/ partial derivatives for LOD */
1672          {
1673             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1674             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1675             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1676             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1677             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1678                                      0.0, /* lodBias */
1679                                      inst->TexSrcUnit, color);
1680             store_vector4(inst, machine, color);
1681          }
1682          break;
1683       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1684          /* Texture lookup w/ projective divide */
1685          {
1686             GLfloat texcoord[4], color[4];
1687
1688             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1689             /* Not so sure about this test - if texcoord[3] is
1690              * zero, we'd probably be fine except for an ASSERT in
1691              * IROUND_POS() which gets triggered by the inf values created.
1692              */
1693             if (texcoord[3] != 0.0) {
1694                texcoord[0] /= texcoord[3];
1695                texcoord[1] /= texcoord[3];
1696                texcoord[2] /= texcoord[3];
1697             }
1698
1699             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1700
1701             store_vector4(inst, machine, color);
1702          }
1703          break;
1704       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1705          /* Texture lookup w/ projective divide, as above, but do not
1706           * do the divide by w if sampling from a cube map.
1707           */
1708          {
1709             GLfloat texcoord[4], color[4];
1710
1711             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1712             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1713                 texcoord[3] != 0.0) {
1714                texcoord[0] /= texcoord[3];
1715                texcoord[1] /= texcoord[3];
1716                texcoord[2] /= texcoord[3];
1717             }
1718
1719             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1720
1721             store_vector4(inst, machine, color);
1722          }
1723          break;
1724       case OPCODE_TRUNC:       /* truncate toward zero */
1725          {
1726             GLfloat a[4], result[4];
1727             fetch_vector4(&inst->SrcReg[0], machine, a);
1728             result[0] = (GLfloat) (GLint) a[0];
1729             result[1] = (GLfloat) (GLint) a[1];
1730             result[2] = (GLfloat) (GLint) a[2];
1731             result[3] = (GLfloat) (GLint) a[3];
1732             store_vector4(inst, machine, result);
1733          }
1734          break;
1735       case OPCODE_UP2H:        /* unpack two 16-bit floats */
1736          {
1737             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1738             GLfloat result[4];
1739             GLushort hx, hy;
1740             hx = raw & 0xffff;
1741             hy = raw >> 16;
1742             result[0] = result[2] = _mesa_half_to_float(hx);
1743             result[1] = result[3] = _mesa_half_to_float(hy);
1744             store_vector4(inst, machine, result);
1745          }
1746          break;
1747       case OPCODE_UP2US:       /* unpack two GLushorts */
1748          {
1749             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1750             GLfloat result[4];
1751             GLushort usx, usy;
1752             usx = raw & 0xffff;
1753             usy = raw >> 16;
1754             result[0] = result[2] = usx * (1.0f / 65535.0f);
1755             result[1] = result[3] = usy * (1.0f / 65535.0f);
1756             store_vector4(inst, machine, result);
1757          }
1758          break;
1759       case OPCODE_UP4B:        /* unpack four GLbytes */
1760          {
1761             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1762             GLfloat result[4];
1763             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1764             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1765             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1766             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1767             store_vector4(inst, machine, result);
1768          }
1769          break;
1770       case OPCODE_UP4UB:       /* unpack four GLubytes */
1771          {
1772             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1773             GLfloat result[4];
1774             result[0] = ((raw >> 0) & 0xff) / 255.0F;
1775             result[1] = ((raw >> 8) & 0xff) / 255.0F;
1776             result[2] = ((raw >> 16) & 0xff) / 255.0F;
1777             result[3] = ((raw >> 24) & 0xff) / 255.0F;
1778             store_vector4(inst, machine, result);
1779          }
1780          break;
1781       case OPCODE_XOR:         /* bitwise XOR */
1782          {
1783             GLuint a[4], b[4], result[4];
1784             fetch_vector4ui(&inst->SrcReg[0], machine, a);
1785             fetch_vector4ui(&inst->SrcReg[1], machine, b);
1786             result[0] = a[0] ^ b[0];
1787             result[1] = a[1] ^ b[1];
1788             result[2] = a[2] ^ b[2];
1789             result[3] = a[3] ^ b[3];
1790             store_vector4ui(inst, machine, result);
1791          }
1792          break;
1793       case OPCODE_XPD:         /* cross product */
1794          {
1795             GLfloat a[4], b[4], result[4];
1796             fetch_vector4(&inst->SrcReg[0], machine, a);
1797             fetch_vector4(&inst->SrcReg[1], machine, b);
1798             result[0] = a[1] * b[2] - a[2] * b[1];
1799             result[1] = a[2] * b[0] - a[0] * b[2];
1800             result[2] = a[0] * b[1] - a[1] * b[0];
1801             result[3] = 1.0;
1802             store_vector4(inst, machine, result);
1803             if (DEBUG_PROG) {
1804                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1805                       result[0], result[1], result[2], result[3],
1806                       a[0], a[1], a[2], b[0], b[1], b[2]);
1807             }
1808          }
1809          break;
1810       case OPCODE_X2D:         /* 2-D matrix transform */
1811          {
1812             GLfloat a[4], b[4], c[4], result[4];
1813             fetch_vector4(&inst->SrcReg[0], machine, a);
1814             fetch_vector4(&inst->SrcReg[1], machine, b);
1815             fetch_vector4(&inst->SrcReg[2], machine, c);
1816             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1817             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1818             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1819             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1820             store_vector4(inst, machine, result);
1821          }
1822          break;
1823       case OPCODE_PRINT:
1824          {
1825             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
1826                GLfloat a[4];
1827                fetch_vector4(&inst->SrcReg[0], machine, a);
1828                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1829                             a[0], a[1], a[2], a[3]);
1830             }
1831             else {
1832                printf("%s\n", (const char *) inst->Data);
1833             }
1834          }
1835          break;
1836       case OPCODE_END:
1837          return GL_TRUE;
1838       default:
1839          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1840                        inst->Opcode);
1841          return GL_TRUE;        /* return value doesn't matter */
1842       }
1843
1844       numExec++;
1845       if (numExec > maxExec) {
1846          _mesa_problem(ctx, "Infinite loop detected in fragment program");
1847          return GL_TRUE;
1848       }
1849
1850    } /* for pc */
1851
1852    return GL_TRUE;
1853 }