src/mesa/main/nvvertexec.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.0
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file nvvertexec.c
  27  * Code to execute vertex programs.
  28  * \author Brian Paul
  29  */
  30
  31 #include "glheader.h"
  32 #include "context.h"
  33 #include "imports.h"
  34 #include "macros.h"
  35 #include "mtypes.h"
  36 #include "nvvertexec.h"
  37 #include "nvvertprog.h"
  38 #include "program.h"
  39 #include "math/m_matrix.h"
  40
  41
  42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
  43
  44
  45 /**
  46  * Load/initialize the vertex program registers.
  47  * This needs to be done per vertex.
  48  */
  49 void
  50 _mesa_init_vp_registers(GLcontext *ctx)
  51 {
  52    GLuint i;
  53
  54    /* Input registers get initialized from the current vertex attribs */
  55    MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
  56           VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
  57
  58    /* Output and temp regs are initialized to [0,0,0,1] */
  59    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
  60       ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
  61    }
  62    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
  63       ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
  64    }
  65
  66    /* The program parameters aren't touched */
  67    /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
  68     * here - Karl
  69     */
  70    if (ctx->VertexProgram.Current->Parameters) {
  71       /* Grab the state */
  72       _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
  73
  74       /* And copy it into the program state */
  75       for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
  76          MEMCPY(ctx->VertexProgram.Parameters[i],
  77                 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
  78                 4*sizeof(GLfloat));
  79       }
  80    }
  81 }
  82
  83
  84
  85 /**
  86  * Copy the 16 elements of a matrix into four consecutive program
  87  * registers starting at 'pos'.
  88  */
  89 static void
  90 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
  91 {
  92    GLuint i;
  93    for (i = 0; i < 4; i++) {
  94       registers[pos + i][0] = mat[0 + i];
  95       registers[pos + i][1] = mat[4 + i];
  96       registers[pos + i][2] = mat[8 + i];
  97       registers[pos + i][3] = mat[12 + i];
  98    }
  99 }
 100
 101
 102 /**
 103  * As above, but transpose the matrix.
 104  */
 105 static void
 106 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
 107                       const GLfloat mat[16])
 108 {
 109    MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
 110 }
 111
 112
 113 /**
 114  * Load all currently tracked matrices into the program registers.
 115  * This needs to be done per glBegin/glEnd.
 116  */
 117 void
 118 _mesa_init_tracked_matrices(GLcontext *ctx)
 119 {
 120    GLuint i;
 121
 122    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
 123       /* point 'mat' at source matrix */
 124       GLmatrix *mat;
 125       if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
 126          mat = ctx->ModelviewMatrixStack.Top;
 127       }
 128       else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
 129          mat = ctx->ProjectionMatrixStack.Top;
 130       }
 131       else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
 132          mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
 133       }
 134       else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
 135          mat = ctx->ColorMatrixStack.Top;
 136       }
 137       else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
 138          /* XXX verify the combined matrix is up to date */
 139          mat = &ctx->_ModelProjectMatrix;
 140       }
 141       else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
 142                ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
 143          GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
 144          ASSERT(n < MAX_PROGRAM_MATRICES);
 145          mat = ctx->ProgramMatrixStack[n].Top;
 146       }
 147       else {
 148          /* no matrix is tracked, but we leave the register values as-is */
 149          assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
 150          continue;
 151       }
 152
 153       /* load the matrix */
 154       if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
 155          load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
 156       }
 157       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
 158          _math_matrix_analyse(mat); /* update the inverse */
 159          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 160          load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
 161       }
 162       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
 163          load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
 164       }
 165       else {
 166          assert(ctx->VertexProgram.TrackMatrixTransform[i]
 167                 == GL_INVERSE_TRANSPOSE_NV);
 168          _math_matrix_analyse(mat); /* update the inverse */
 169          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 170          load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
 171       }
 172    }
 173 }
 174
 175
 176
 177 /**
 178  * For debugging.  Dump the current vertex program machine registers.
 179  */
 180 void
 181 _mesa_dump_vp_state( const struct vertex_program_state *state )
 182 {
 183    int i;
 184    _mesa_printf("VertexIn:\n");
 185    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
 186       _mesa_printf("%d: %f %f %f %f   ", i,
 187                    state->Inputs[i][0],
 188                    state->Inputs[i][1],
 189                    state->Inputs[i][2],
 190                    state->Inputs[i][3]);
 191    }
 192    _mesa_printf("\n");
 193
 194    _mesa_printf("VertexOut:\n");
 195    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
 196       _mesa_printf("%d: %f %f %f %f   ", i,
 197                   state->Outputs[i][0],
 198                   state->Outputs[i][1],
 199                   state->Outputs[i][2],
 200                   state->Outputs[i][3]);
 201    }
 202    _mesa_printf("\n");
 203
 204    _mesa_printf("Registers:\n");
 205    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
 206       _mesa_printf("%d: %f %f %f %f   ", i,
 207                   state->Temporaries[i][0],
 208                   state->Temporaries[i][1],
 209                   state->Temporaries[i][2],
 210                   state->Temporaries[i][3]);
 211    }
 212    _mesa_printf("\n");
 213
 214    _mesa_printf("Parameters:\n");
 215    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
 216       _mesa_printf("%d: %f %f %f %f   ", i,
 217                   state->Parameters[i][0],
 218                   state->Parameters[i][1],
 219                   state->Parameters[i][2],
 220                   state->Parameters[i][3]);
 221    }
 222    _mesa_printf("\n");
 223 }
 224
 225
 226
 227 /**
 228  * Return a pointer to the 4-element float vector specified by the given
 229  * source register.
 230  */
 231 static INLINE const GLfloat *
 232 get_register_pointer( const struct vp_src_register *source,
 233                       const struct vertex_program_state *state )
 234 {
 235    if (source->RelAddr) {
 236       const GLint reg = source->Index + state->AddressReg[0];
 237       ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
 238         (source->File == PROGRAM_STATE_VAR) );
 239       if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
 240          return zeroVec;
 241       else
 242          return state->Parameters[reg];
 243    }
 244    else {
 245       switch (source->File) {
 246          case PROGRAM_TEMPORARY:
 247             return state->Temporaries[source->Index];
 248          case PROGRAM_INPUT:
 249             return state->Inputs[source->Index];
 250          case PROGRAM_LOCAL_PARAM:
 251             /* XXX fix */
 252             return state->Temporaries[source->Index];
 253          case PROGRAM_ENV_PARAM:
 254             return state->Parameters[source->Index];
 255          case PROGRAM_STATE_VAR:
 256             return state->Parameters[source->Index];
 257          default:
 258             _mesa_problem(NULL,
 259                           "Bad source register file in fetch_vector4(vp)");
 260             return NULL;
 261       }
 262    }
 263    return NULL;
 264 }
 265
 266
 267 /**
 268  * Fetch a 4-element float vector from the given source register.
 269  * Apply swizzling and negating as needed.
 270  */
 271 static INLINE void
 272 fetch_vector4( const struct vp_src_register *source,
 273                const struct vertex_program_state *state,
 274                GLfloat result[4] )
 275 {
 276    const GLfloat *src = get_register_pointer(source, state);
 277
 278    if (source->Negate) {
 279       result[0] = -src[source->Swizzle[0]];
 280       result[1] = -src[source->Swizzle[1]];
 281       result[2] = -src[source->Swizzle[2]];
 282       result[3] = -src[source->Swizzle[3]];
 283    }
 284    else {
 285       result[0] = src[source->Swizzle[0]];
 286       result[1] = src[source->Swizzle[1]];
 287       result[2] = src[source->Swizzle[2]];
 288       result[3] = src[source->Swizzle[3]];
 289    }
 290 }
 291
 292
 293
 294 /**
 295  * As above, but only return result[0] element.
 296  */
 297 static INLINE void
 298 fetch_vector1( const struct vp_src_register *source,
 299                const struct vertex_program_state *state,
 300                GLfloat result[4] )
 301 {
 302    const GLfloat *src = get_register_pointer(source, state);
 303
 304    if (source->Negate) {
 305       result[0] = -src[source->Swizzle[0]];
 306    }
 307    else {
 308       result[0] = src[source->Swizzle[0]];
 309    }
 310 }
 311
 312
 313 /**
 314  * Store 4 floats into a register.
 315  */
 316 static void
 317 store_vector4( const struct vp_dst_register *dest,
 318                struct vertex_program_state *state,
 319                const GLfloat value[4] )
 320 {
 321    GLfloat *dst;
 322    switch (dest->File) {
 323       case PROGRAM_TEMPORARY:
 324          dst = state->Temporaries[dest->Index];
 325          break;
 326       case PROGRAM_OUTPUT:
 327          dst = state->Outputs[dest->Index];
 328          break;
 329       default:
 330          _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
 331          return;
 332    }
 333
 334    if (dest->WriteMask[0])
 335       dst[0] = value[0];
 336    if (dest->WriteMask[1])
 337       dst[1] = value[1];
 338    if (dest->WriteMask[2])
 339       dst[2] = value[2];
 340    if (dest->WriteMask[3])
 341       dst[3] = value[3];
 342 }
 343
 344
 345 /**
 346  * Set x to positive or negative infinity.
 347  */
 348 #if defined(USE_IEEE) || defined(_WIN32)
 349 #define SET_POS_INFINITY(x)  ( *((GLuint *) &x) = 0x7F800000 )
 350 #define SET_NEG_INFINITY(x)  ( *((GLuint *) &x) = 0xFF800000 )
 351 #elif defined(VMS)
 352 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
 353 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
 354 #else
 355 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
 356 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
 357 #endif
 358
 359 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
 360
 361
 362 /**
 363  * Execute the given vertex program
 364  */
 365 void
 366 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
 367 {
 368    struct vertex_program_state *state = &ctx->VertexProgram;
 369    const struct vp_instruction *inst;
 370
 371    ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
 372
 373    /* If the program is position invariant, multiply the input
 374     * position and the MVP matrix and stick it into the output pos slot
 375     */
 376    if (ctx->VertexProgram.Current->IsPositionInvariant) {
 377       TRANSFORM_POINT( ctx->VertexProgram.Outputs[0],
 378                        ctx->_ModelProjectMatrix.m,
 379                        ctx->VertexProgram.Inputs[0]);
 380
 381       /* XXX: This could go elsewhere */
 382       ctx->VertexProgram.Current->OutputsWritten |= 0x1;
 383    }
 384
 385    for (inst = program->Instructions; /*inst->Opcode != VP_OPCODE_END*/; inst++) {
 386
 387       if (ctx->VertexProgram.CallbackEnabled &&
 388           ctx->VertexProgram.Callback) {
 389          ctx->VertexProgram.CurrentPosition = inst->StringPos;
 390          ctx->VertexProgram.Callback(program->Base.Target,
 391                                      ctx->VertexProgram.CallbackData);
 392       }
 393
 394       switch (inst->Opcode) {
 395          case VP_OPCODE_MOV:
 396             {
 397                GLfloat t[4];
 398                fetch_vector4( &inst->SrcReg[0], state, t );
 399                store_vector4( &inst->DstReg, state, t );
 400             }
 401             break;
 402          case VP_OPCODE_LIT:
 403             {
 404                const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
 405                GLfloat t[4], lit[4];
 406                fetch_vector4( &inst->SrcReg[0], state, t );
 407                if (t[3] < -(128.0F - epsilon))
 408                    t[3] = - (128.0F - epsilon);
 409                else if (t[3] > 128.0F - epsilon)
 410                   t[3] = 128.0F - epsilon;
 411                if (t[0] < 0.0)
 412                   t[0] = 0.0;
 413                if (t[1] < 0.0)
 414                   t[1] = 0.0;
 415                lit[0] = 1.0;
 416                lit[1] = t[0];
 417                lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
 418                lit[3] = 1.0;
 419                store_vector4( &inst->DstReg, state, lit );
 420             }
 421             break;
 422          case VP_OPCODE_RCP:
 423             {
 424                GLfloat t[4];
 425                fetch_vector1( &inst->SrcReg[0], state, t );
 426                if (t[0] != 1.0F)
 427                   t[0] = 1.0F / t[0];  /* div by zero is infinity! */
 428                t[1] = t[2] = t[3] = t[0];
 429                store_vector4( &inst->DstReg, state, t );
 430             }
 431             break;
 432          case VP_OPCODE_RSQ:
 433             {
 434                GLfloat t[4];
 435                fetch_vector1( &inst->SrcReg[0], state, t );
 436                t[0] = INV_SQRTF(FABSF(t[0]));
 437                t[1] = t[2] = t[3] = t[0];
 438                store_vector4( &inst->DstReg, state, t );
 439             }
 440             break;
 441          case VP_OPCODE_EXP:
 442             {
 443                GLfloat t[4], q[4], floor_t0;
 444                fetch_vector1( &inst->SrcReg[0], state, t );
 445                floor_t0 = (float) floor(t[0]);
 446                if (floor_t0 > FLT_MAX_EXP) {
 447                   SET_POS_INFINITY(q[0]);
 448                   SET_POS_INFINITY(q[2]);
 449                }
 450                else if (floor_t0 < FLT_MIN_EXP) {
 451                   q[0] = 0.0F;
 452                   q[2] = 0.0F;
 453                }
 454                else {
 455 #ifdef USE_IEEE
 456                   GLint ii = (GLint) floor_t0;
 457                   ii = (ii < 23) + 0x3f800000;
 458                   SET_FLOAT_BITS(q[0], ii);
 459                   q[0] = *((GLfloat *) &ii);
 460 #else
 461                   q[0] = (GLfloat) pow(2.0, floor_t0);
 462 #endif
 463                   q[2] = (GLfloat) (q[0] * LOG2(q[1]));
 464                }
 465                q[1] = t[0] - floor_t0;
 466                q[3] = 1.0F;
 467                store_vector4( &inst->DstReg, state, q );
 468             }
 469             break;
 470          case VP_OPCODE_LOG:
 471             {
 472                GLfloat t[4], q[4], abs_t0;
 473                fetch_vector1( &inst->SrcReg[0], state, t );
 474                abs_t0 = (GLfloat) fabs(t[0]);
 475                if (abs_t0 != 0.0F) {
 476                   /* Since we really can't handle infinite values on VMS
 477                    * like other OSes we'll use __MAXFLOAT to represent
 478                    * infinity.  This may need some tweaking.
 479                    */
 480 #ifdef VMS
 481                   if (abs_t0 == __MAXFLOAT)
 482 #else
 483                   if (IS_INF_OR_NAN(abs_t0))
 484 #endif
 485                   {
 486                      SET_POS_INFINITY(q[0]);
 487                      q[1] = 1.0F;
 488                      SET_POS_INFINITY(q[2]);
 489                   }
 490                   else {
 491                      int exponent;
 492                      double mantissa = frexp(t[0], &exponent);
 493                      q[0] = (GLfloat) (exponent - 1);
 494                      q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
 495                      q[2] = (GLfloat) (q[0] + LOG2(q[1]));
 496                   }
 497                   }
 498                else {
 499                   SET_NEG_INFINITY(q[0]);
 500                   q[1] = 1.0F;
 501                   SET_NEG_INFINITY(q[2]);
 502                }
 503                q[3] = 1.0;
 504                store_vector4( &inst->DstReg, state, q );
 505             }
 506             break;
 507          case VP_OPCODE_MUL:
 508             {
 509                GLfloat t[4], u[4], prod[4];
 510                fetch_vector4( &inst->SrcReg[0], state, t );
 511                fetch_vector4( &inst->SrcReg[1], state, u );
 512                prod[0] = t[0] * u[0];
 513                prod[1] = t[1] * u[1];
 514                prod[2] = t[2] * u[2];
 515                prod[3] = t[3] * u[3];
 516                store_vector4( &inst->DstReg, state, prod );
 517             }
 518             break;
 519          case VP_OPCODE_ADD:
 520             {
 521                GLfloat t[4], u[4], sum[4];
 522                fetch_vector4( &inst->SrcReg[0], state, t );
 523                fetch_vector4( &inst->SrcReg[1], state, u );
 524                sum[0] = t[0] + u[0];
 525                sum[1] = t[1] + u[1];
 526                sum[2] = t[2] + u[2];
 527                sum[3] = t[3] + u[3];
 528                store_vector4( &inst->DstReg, state, sum );
 529             }
 530             break;
 531          case VP_OPCODE_DP3:
 532             {
 533                GLfloat t[4], u[4], dot[4];
 534                fetch_vector4( &inst->SrcReg[0], state, t );
 535                fetch_vector4( &inst->SrcReg[1], state, u );
 536                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
 537                dot[1] = dot[2] = dot[3] = dot[0];
 538                store_vector4( &inst->DstReg, state, dot );
 539             }
 540             break;
 541          case VP_OPCODE_DP4:
 542             {
 543                GLfloat t[4], u[4], dot[4];
 544                fetch_vector4( &inst->SrcReg[0], state, t );
 545                fetch_vector4( &inst->SrcReg[1], state, u );
 546                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
 547                dot[1] = dot[2] = dot[3] = dot[0];
 548                store_vector4( &inst->DstReg, state, dot );
 549             }
 550             break;
 551          case VP_OPCODE_DST:
 552             {
 553                GLfloat t[4], u[4], dst[4];
 554                fetch_vector4( &inst->SrcReg[0], state, t );
 555                fetch_vector4( &inst->SrcReg[1], state, u );
 556                dst[0] = 1.0F;
 557                dst[1] = t[1] * u[1];
 558                dst[2] = t[2];
 559                dst[3] = u[3];
 560                store_vector4( &inst->DstReg, state, dst );
 561             }
 562             break;
 563          case VP_OPCODE_MIN:
 564             {
 565                GLfloat t[4], u[4], min[4];
 566                fetch_vector4( &inst->SrcReg[0], state, t );
 567                fetch_vector4( &inst->SrcReg[1], state, u );
 568                min[0] = (t[0] < u[0]) ? t[0] : u[0];
 569                min[1] = (t[1] < u[1]) ? t[1] : u[1];
 570                min[2] = (t[2] < u[2]) ? t[2] : u[2];
 571                min[3] = (t[3] < u[3]) ? t[3] : u[3];
 572                store_vector4( &inst->DstReg, state, min );
 573             }
 574             break;
 575          case VP_OPCODE_MAX:
 576             {
 577                GLfloat t[4], u[4], max[4];
 578                fetch_vector4( &inst->SrcReg[0], state, t );
 579                fetch_vector4( &inst->SrcReg[1], state, u );
 580                max[0] = (t[0] > u[0]) ? t[0] : u[0];
 581                max[1] = (t[1] > u[1]) ? t[1] : u[1];
 582                max[2] = (t[2] > u[2]) ? t[2] : u[2];
 583                max[3] = (t[3] > u[3]) ? t[3] : u[3];
 584                store_vector4( &inst->DstReg, state, max );
 585             }
 586             break;
 587          case VP_OPCODE_SLT:
 588             {
 589                GLfloat t[4], u[4], slt[4];
 590                fetch_vector4( &inst->SrcReg[0], state, t );
 591                fetch_vector4( &inst->SrcReg[1], state, u );
 592                slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
 593                slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
 594                slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
 595                slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
 596                store_vector4( &inst->DstReg, state, slt );
 597             }
 598             break;
 599          case VP_OPCODE_SGE:
 600             {
 601                GLfloat t[4], u[4], sge[4];
 602                fetch_vector4( &inst->SrcReg[0], state, t );
 603                fetch_vector4( &inst->SrcReg[1], state, u );
 604                sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
 605                sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
 606                sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
 607                sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
 608                store_vector4( &inst->DstReg, state, sge );
 609             }
 610             break;
 611          case VP_OPCODE_MAD:
 612             {
 613                GLfloat t[4], u[4], v[4], sum[4];
 614                fetch_vector4( &inst->SrcReg[0], state, t );
 615                fetch_vector4( &inst->SrcReg[1], state, u );
 616                fetch_vector4( &inst->SrcReg[2], state, v );
 617                sum[0] = t[0] * u[0] + v[0];
 618                sum[1] = t[1] * u[1] + v[1];
 619                sum[2] = t[2] * u[2] + v[2];
 620                sum[3] = t[3] * u[3] + v[3];
 621                store_vector4( &inst->DstReg, state, sum );
 622             }
 623             break;
 624          case VP_OPCODE_ARL:
 625             {
 626                GLfloat t[4];
 627                fetch_vector4( &inst->SrcReg[0], state, t );
 628                state->AddressReg[0] = (GLint) floor(t[0]);
 629             }
 630             break;
 631          case VP_OPCODE_DPH:
 632             {
 633                GLfloat t[4], u[4], dot[4];
 634                fetch_vector4( &inst->SrcReg[0], state, t );
 635                fetch_vector4( &inst->SrcReg[1], state, u );
 636                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
 637                dot[1] = dot[2] = dot[3] = dot[0];
 638                store_vector4( &inst->DstReg, state, dot );
 639             }
 640             break;
 641          case VP_OPCODE_RCC:
 642             {
 643                GLfloat t[4], u;
 644                fetch_vector1( &inst->SrcReg[0], state, t );
 645                if (t[0] == 1.0F)
 646                   u = 1.0F;
 647                else
 648                   u = 1.0F / t[0];
 649                if (u > 0.0F) {
 650                   if (u > 1.884467e+019F) {
 651                      u = 1.884467e+019F;  /* IEEE 32-bit binary value 0x5F800000 */
 652                   }
 653                   else if (u < 5.42101e-020F) {
 654                      u = 5.42101e-020F;   /* IEEE 32-bit binary value 0x1F800000 */
 655                   }
 656                }
 657                else {
 658                   if (u < -1.884467e+019F) {
 659                      u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
 660                   }
 661                   else if (u > -5.42101e-020F) {
 662                      u = -5.42101e-020F;  /* IEEE 32-bit binary value 0x9F800000 */
 663                   }
 664                }
 665                t[0] = t[1] = t[2] = t[3] = u;
 666                store_vector4( &inst->DstReg, state, t );
 667             }
 668             break;
 669          case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
 670             {
 671                GLfloat t[4], u[4], sum[4];
 672                fetch_vector4( &inst->SrcReg[0], state, t );
 673                fetch_vector4( &inst->SrcReg[1], state, u );
 674                sum[0] = t[0] - u[0];
 675                sum[1] = t[1] - u[1];
 676                sum[2] = t[2] - u[2];
 677                sum[3] = t[3] - u[3];
 678                store_vector4( &inst->DstReg, state, sum );
 679             }
 680             break;
 681          case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
 682             {
 683                GLfloat t[4];
 684                fetch_vector4( &inst->SrcReg[0], state, t );
 685                if (t[0] < 0.0)  t[0] = -t[0];
 686                if (t[1] < 0.0)  t[1] = -t[1];
 687                if (t[2] < 0.0)  t[2] = -t[2];
 688                if (t[3] < 0.0)  t[3] = -t[3];
 689                store_vector4( &inst->DstReg, state, t );
 690             }
 691             break;
 692          case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
 693             {
 694                GLfloat t[4];
 695                fetch_vector4( &inst->SrcReg[0], state, t );
 696                t[0] = FLOORF(t[0]);
 697                t[1] = FLOORF(t[1]);
 698                t[2] = FLOORF(t[2]);
 699                t[3] = FLOORF(t[3]);
 700                store_vector4( &inst->DstReg, state, t );
 701             }
 702             break;
 703          case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
 704             {
 705                GLfloat t[4];
 706                fetch_vector4( &inst->SrcReg[0], state, t );
 707                t[0] = t[0] - FLOORF(t[0]);
 708                t[1] = t[1] - FLOORF(t[1]);
 709                t[2] = t[2] - FLOORF(t[2]);
 710                t[3] = t[3] - FLOORF(t[3]);
 711                store_vector4( &inst->DstReg, state, t );
 712             }
 713             break;
 714          case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
 715             {
 716                GLfloat t[4];
 717                fetch_vector1( &inst->SrcReg[0], state, t );
 718                t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
 719                store_vector4( &inst->DstReg, state, t );
 720             }
 721             break;
 722          case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
 723             {
 724                GLfloat t[4];
 725                fetch_vector1( &inst->SrcReg[0], state, t );
 726                t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
 727                store_vector4( &inst->DstReg, state, t );
 728             }
 729             break;
 730          case VP_OPCODE_POW: /* GL_ARB_vertex_program */
 731             {
 732                GLfloat t[4], u[4];
 733                fetch_vector1( &inst->SrcReg[0], state, t );
 734                fetch_vector1( &inst->SrcReg[1], state, u );
 735                t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
 736                store_vector4( &inst->DstReg, state, t );
 737             }
 738             break;
 739          case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
 740             {
 741                GLfloat t[4], u[4], cross[4];
 742                fetch_vector4( &inst->SrcReg[0], state, t );
 743                fetch_vector4( &inst->SrcReg[1], state, u );
 744                cross[0] = t[1] * u[2] - t[2] * u[1];
 745                cross[1] = t[2] * u[0] - t[0] * u[2];
 746                cross[2] = t[0] * u[1] - t[1] * u[0];
 747                store_vector4( &inst->DstReg, state, cross );
 748             }
 749             break;
 750          case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
 751             {
 752                const struct vp_src_register *source = &inst->SrcReg[0];
 753                const GLfloat *src = get_register_pointer(source, state);
 754                GLfloat result[4];
 755                GLuint i;
 756
 757                /* do extended swizzling here */
 758                for (i = 0; i < 3; i++) {
 759                   if (source->Swizzle[i] == SWIZZLE_ZERO)
 760                      result[i] = 0.0;
 761                   else if (source->Swizzle[i] == SWIZZLE_ONE)
 762                      result[i] = -1.0;
 763                   else
 764                      result[i] = -src[source->Swizzle[i]];
 765                   if (source->Negate)
 766                      result[i] = -result[i];
 767                }
 768                store_vector4( &inst->DstReg, state, result );
 769             }
 770             break;
 771
 772          case VP_OPCODE_END:
 773             ctx->_CurrentProgram = 0;
 774             return;
 775          default:
 776             /* bad instruction opcode */
 777             _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
 778             ctx->_CurrentProgram = 0;
 779             return;
 780       } /* switch */
 781    } /* for */
 782
 783    ctx->_CurrentProgram = 0;
 784 }
 785
 786
 787
 788 /**
 789 Thoughts on vertex program optimization:
 790
 791 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
 792 assembly code.  That will probably be a lot of work.
 793
 794 Another approach might be to replace the vp_instruction->Opcode field with
 795 a pointer to a specialized C function which executes the instruction.
 796 In particular we can write functions which skip swizzling, negating,
 797 masking, relative addressing, etc. when they're not needed.
 798
 799 For example:
 800
 801 void simple_add( struct vp_instruction *inst )
 802 {
 803    GLfloat *sum = machine->Registers[inst->DstReg.Register];
 804    GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
 805    GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
 806    sum[0] = a[0] + b[0];
 807    sum[1] = a[1] + b[1];
 808    sum[2] = a[2] + b[2];
 809    sum[3] = a[3] + b[3];
 810 }
 811
 812 */
 813
 814 /*
 815
 816 KW:
 817
 818 A first step would be to 'vectorize' the programs in the same way as
 819 the normal transformation code in the tnl module.  Thus each opcode
 820 takes zero or more input vectors (registers) and produces one or more
 821 output vectors.
 822
 823 These operations would intially be coded in C, with machine-specific
 824 assembly following, as is currently the case for matrix
 825 transformations in the math/ directory.  The preprocessing scheme for
 826 selecting simpler operations Brian describes above would also work
 827 here.
 828
 829 This should give reasonable performance without excessive effort.
 830
 831 */