src/mesa/main/nvvertexec.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  5.1
   4  *
   5  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file nvvertexec.c
  27  * Code to execute vertex programs.
  28  * \author Brian Paul
  29  */
  30
  31 #include "glheader.h"
  32 #include "context.h"
  33 #include "imports.h"
  34 #include "macros.h"
  35 #include "mtypes.h"
  36 #include "nvvertexec.h"
  37 #include "nvvertprog.h"
  38 #include "program.h"
  39 #include "math/m_matrix.h"
  40
  41
  42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
  43
  44
  45 /**
  46  * Load/initialize the vertex program registers.
  47  * This needs to be done per vertex.
  48  */
  49 void
  50 _mesa_init_vp_registers(GLcontext *ctx)
  51 {
  52    GLuint i;
  53
  54    /* Input registers get initialized from the current vertex attribs */
  55    MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
  56           VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
  57
  58    /* Output and temp regs are initialized to [0,0,0,1] */
  59    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
  60       ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
  61    }
  62    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
  63       ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
  64    }
  65
  66    /* The program parameters aren't touched */
  67    /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
  68     * here - Karl
  69     */
  70    if (ctx->VertexProgram.Current->Parameters) {
  71       /* Grab the state */
  72       _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
  73
  74       /* And copy it into the program state */
  75       for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
  76          MEMCPY(ctx->VertexProgram.Parameters[i],
  77                 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
  78                 4*sizeof(GLfloat));
  79       }
  80    }
  81 }
  82
  83
  84
  85 /**
  86  * Copy the 16 elements of a matrix into four consecutive program
  87  * registers starting at 'pos'.
  88  */
  89 static void
  90 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
  91 {
  92    GLuint i;
  93    for (i = 0; i < 4; i++) {
  94       registers[pos + i][0] = mat[0 + i];
  95       registers[pos + i][1] = mat[4 + i];
  96       registers[pos + i][2] = mat[8 + i];
  97       registers[pos + i][3] = mat[12 + i];
  98    }
  99 }
 100
 101
 102 /**
 103  * As above, but transpose the matrix.
 104  */
 105 static void
 106 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
 107                       const GLfloat mat[16])
 108 {
 109    MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
 110 }
 111
 112
 113 /**
 114  * Load all currently tracked matrices into the program registers.
 115  * This needs to be done per glBegin/glEnd.
 116  */
 117 void
 118 _mesa_init_tracked_matrices(GLcontext *ctx)
 119 {
 120    GLuint i;
 121
 122    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
 123       /* point 'mat' at source matrix */
 124       GLmatrix *mat;
 125       if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
 126          mat = ctx->ModelviewMatrixStack.Top;
 127       }
 128       else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
 129          mat = ctx->ProjectionMatrixStack.Top;
 130       }
 131       else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
 132          mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
 133       }
 134       else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
 135          mat = ctx->ColorMatrixStack.Top;
 136       }
 137       else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
 138          /* XXX verify the combined matrix is up to date */
 139          mat = &ctx->_ModelProjectMatrix;
 140       }
 141       else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
 142                ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
 143          GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
 144          ASSERT(n < MAX_PROGRAM_MATRICES);
 145          mat = ctx->ProgramMatrixStack[n].Top;
 146       }
 147       else {
 148          /* no matrix is tracked, but we leave the register values as-is */
 149          assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
 150          continue;
 151       }
 152
 153       /* load the matrix */
 154       if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
 155          load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
 156       }
 157       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
 158          _math_matrix_analyse(mat); /* update the inverse */
 159          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 160          load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
 161       }
 162       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
 163          load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
 164       }
 165       else {
 166          assert(ctx->VertexProgram.TrackMatrixTransform[i]
 167                 == GL_INVERSE_TRANSPOSE_NV);
 168          _math_matrix_analyse(mat); /* update the inverse */
 169          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 170          load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
 171       }
 172    }
 173 }
 174
 175
 176
 177 /**
 178  * For debugging.  Dump the current vertex program machine registers.
 179  */
 180 void
 181 _mesa_dump_vp_state( const struct vertex_program_state *state )
 182 {
 183    int i;
 184    _mesa_printf("VertexIn:\n");
 185    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
 186       _mesa_printf("%d: %f %f %f %f   ", i,
 187                    state->Inputs[i][0],
 188                    state->Inputs[i][1],
 189                    state->Inputs[i][2],
 190                    state->Inputs[i][3]);
 191    }
 192    _mesa_printf("\n");
 193
 194    _mesa_printf("VertexOut:\n");
 195    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
 196       _mesa_printf("%d: %f %f %f %f   ", i,
 197                   state->Outputs[i][0],
 198                   state->Outputs[i][1],
 199                   state->Outputs[i][2],
 200                   state->Outputs[i][3]);
 201    }
 202    _mesa_printf("\n");
 203
 204    _mesa_printf("Registers:\n");
 205    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
 206       _mesa_printf("%d: %f %f %f %f   ", i,
 207                   state->Temporaries[i][0],
 208                   state->Temporaries[i][1],
 209                   state->Temporaries[i][2],
 210                   state->Temporaries[i][3]);
 211    }
 212    _mesa_printf("\n");
 213
 214    _mesa_printf("Parameters:\n");
 215    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
 216       _mesa_printf("%d: %f %f %f %f   ", i,
 217                   state->Parameters[i][0],
 218                   state->Parameters[i][1],
 219                   state->Parameters[i][2],
 220                   state->Parameters[i][3]);
 221    }
 222    _mesa_printf("\n");
 223 }
 224
 225
 226
 227 /**
 228  * Return a pointer to the 4-element float vector specified by the given
 229  * source register.
 230  */
 231 static INLINE const GLfloat *
 232 get_register_pointer( const struct vp_src_register *source,
 233                       const struct vertex_program_state *state )
 234 {
 235    if (source->RelAddr) {
 236       const GLint reg = source->Index + state->AddressReg[0];
 237       ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
 238         (source->File == PROGRAM_STATE_VAR) );
 239       if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
 240          return zeroVec;
 241       else
 242          return state->Parameters[reg];
 243    }
 244    else {
 245       switch (source->File) {
 246          case PROGRAM_TEMPORARY:
 247             return state->Temporaries[source->Index];
 248          case PROGRAM_INPUT:
 249             return state->Inputs[source->Index];
 250          case PROGRAM_LOCAL_PARAM:
 251             /* XXX fix */
 252             return state->Temporaries[source->Index];
 253          case PROGRAM_ENV_PARAM:
 254             return state->Parameters[source->Index];
 255          case PROGRAM_STATE_VAR:
 256             return state->Parameters[source->Index];
 257          default:
 258             _mesa_problem(NULL,
 259                           "Bad source register file in fetch_vector4(vp)");
 260             return NULL;
 261       }
 262    }
 263    return NULL;
 264 }
 265
 266
 267 /**
 268  * Fetch a 4-element float vector from the given source register.
 269  * Apply swizzling and negating as needed.
 270  */
 271 static INLINE void
 272 fetch_vector4( const struct vp_src_register *source,
 273                const struct vertex_program_state *state,
 274                GLfloat result[4] )
 275 {
 276    const GLfloat *src = get_register_pointer(source, state);
 277
 278    if (source->Negate) {
 279       result[0] = -src[source->Swizzle[0]];
 280       result[1] = -src[source->Swizzle[1]];
 281       result[2] = -src[source->Swizzle[2]];
 282       result[3] = -src[source->Swizzle[3]];
 283    }
 284    else {
 285       result[0] = src[source->Swizzle[0]];
 286       result[1] = src[source->Swizzle[1]];
 287       result[2] = src[source->Swizzle[2]];
 288       result[3] = src[source->Swizzle[3]];
 289    }
 290 }
 291
 292
 293
 294 /**
 295  * As above, but only return result[0] element.
 296  */
 297 static INLINE void
 298 fetch_vector1( const struct vp_src_register *source,
 299                const struct vertex_program_state *state,
 300                GLfloat result[4] )
 301 {
 302    const GLfloat *src = get_register_pointer(source, state);
 303
 304    if (source->Negate) {
 305       result[0] = -src[source->Swizzle[0]];
 306    }
 307    else {
 308       result[0] = src[source->Swizzle[0]];
 309    }
 310 }
 311
 312
 313 /**
 314  * Store 4 floats into a register.
 315  */
 316 static void
 317 store_vector4( const struct vp_dst_register *dest,
 318                struct vertex_program_state *state,
 319                const GLfloat value[4] )
 320 {
 321    GLfloat *dst;
 322    switch (dest->File) {
 323       case PROGRAM_TEMPORARY:
 324          dst = state->Temporaries[dest->Index];
 325          break;
 326       case PROGRAM_OUTPUT:
 327          dst = state->Outputs[dest->Index];
 328          break;
 329       default:
 330          _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
 331          return;
 332    }
 333
 334    if (dest->WriteMask[0])
 335       dst[0] = value[0];
 336    if (dest->WriteMask[1])
 337       dst[1] = value[1];
 338    if (dest->WriteMask[2])
 339       dst[2] = value[2];
 340    if (dest->WriteMask[3])
 341       dst[3] = value[3];
 342 }
 343
 344
 345 /**
 346  * Set x to positive or negative infinity.
 347  */
 348 #if defined(USE_IEEE) || defined(_WIN32)
 349 #define SET_POS_INFINITY(x)  ( *((GLuint *) &x) = 0x7F800000 )
 350 #define SET_NEG_INFINITY(x)  ( *((GLuint *) &x) = 0xFF800000 )
 351 #elif defined(VMS)
 352 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
 353 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
 354 #else
 355 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
 356 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
 357 #endif
 358
 359 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
 360
 361
 362 /**
 363  * Execute the given vertex program
 364  */
 365 void
 366 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
 367 {
 368    struct vertex_program_state *state = &ctx->VertexProgram;
 369    const struct vp_instruction *inst;
 370
 371    ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
 372
 373    /* If the program is position invariant, multiply the input
 374     * position and the MVP matrix and stick it into the output pos slot
 375     */
 376    if (ctx->VertexProgram.Current->IsPositionInvariant) {
 377       TRANSFORM_POINT( ctx->VertexProgram.Outputs[0],
 378                        ctx->_ModelProjectMatrix.m,
 379                        ctx->VertexProgram.Inputs[0]);
 380
 381       /* XXX: This could go elsewhere */
 382       ctx->VertexProgram.Current->OutputsWritten |= 0x1;
 383    }
 384
 385
 386
 387    for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
 388
 389       if (ctx->VertexProgram.CallbackEnabled &&
 390           ctx->VertexProgram.Callback) {
 391          ctx->VertexProgram.CurrentPosition = inst->StringPos;
 392          ctx->VertexProgram.Callback(program->Base.Target,
 393                                      ctx->VertexProgram.CallbackData);
 394       }
 395
 396       switch (inst->Opcode) {
 397          case VP_OPCODE_MOV:
 398             {
 399                GLfloat t[4];
 400                fetch_vector4( &inst->SrcReg[0], state, t );
 401                store_vector4( &inst->DstReg, state, t );
 402             }
 403             break;
 404          case VP_OPCODE_LIT:
 405             {
 406                const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
 407                GLfloat t[4], lit[4];
 408                fetch_vector4( &inst->SrcReg[0], state, t );
 409                if (t[3] < -(128.0F - epsilon))
 410                    t[3] = - (128.0F - epsilon);
 411                else if (t[3] > 128.0F - epsilon)
 412                   t[3] = 128.0F - epsilon;
 413                if (t[0] < 0.0)
 414                   t[0] = 0.0;
 415                if (t[1] < 0.0)
 416                   t[1] = 0.0;
 417                lit[0] = 1.0;
 418                lit[1] = t[0];
 419                lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
 420                lit[3] = 1.0;
 421                store_vector4( &inst->DstReg, state, lit );
 422             }
 423             break;
 424          case VP_OPCODE_RCP:
 425             {
 426                GLfloat t[4];
 427                fetch_vector1( &inst->SrcReg[0], state, t );
 428                if (t[0] != 1.0F)
 429                   t[0] = 1.0F / t[0];  /* div by zero is infinity! */
 430                t[1] = t[2] = t[3] = t[0];
 431                store_vector4( &inst->DstReg, state, t );
 432             }
 433             break;
 434          case VP_OPCODE_RSQ:
 435             {
 436                GLfloat t[4];
 437                fetch_vector1( &inst->SrcReg[0], state, t );
 438                t[0] = INV_SQRTF(FABSF(t[0]));
 439                t[1] = t[2] = t[3] = t[0];
 440                store_vector4( &inst->DstReg, state, t );
 441             }
 442             break;
 443          case VP_OPCODE_EXP:
 444             {
 445                GLfloat t[4], q[4], floor_t0;
 446                fetch_vector1( &inst->SrcReg[0], state, t );
 447                floor_t0 = (float) floor(t[0]);
 448                if (floor_t0 > FLT_MAX_EXP) {
 449                   SET_POS_INFINITY(q[0]);
 450                   SET_POS_INFINITY(q[2]);
 451                }
 452                else if (floor_t0 < FLT_MIN_EXP) {
 453                   q[0] = 0.0F;
 454                   q[2] = 0.0F;
 455                }
 456                else {
 457 #ifdef USE_IEEE
 458                   GLint ii = (GLint) floor_t0;
 459                   ii = (ii < 23) + 0x3f800000;
 460                   SET_FLOAT_BITS(q[0], ii);
 461                   q[0] = *((GLfloat *) &ii);
 462 #else
 463                   q[0] = (GLfloat) pow(2.0, floor_t0);
 464 #endif
 465                   q[2] = (GLfloat) (q[0] * LOG2(q[1]));
 466                }
 467                q[1] = t[0] - floor_t0;
 468                q[3] = 1.0F;
 469                store_vector4( &inst->DstReg, state, q );
 470             }
 471             break;
 472          case VP_OPCODE_LOG:
 473             {
 474                GLfloat t[4], q[4], abs_t0;
 475                fetch_vector1( &inst->SrcReg[0], state, t );
 476                abs_t0 = (GLfloat) fabs(t[0]);
 477                if (abs_t0 != 0.0F) {
 478                   /* Since we really can't handle infinite values on VMS
 479                    * like other OSes we'll use __MAXFLOAT to represent
 480                    * infinity.  This may need some tweaking.
 481                    */
 482 #ifdef VMS
 483                   if (abs_t0 == __MAXFLOAT)
 484 #else
 485                   if (IS_INF_OR_NAN(abs_t0))
 486 #endif
 487                   {
 488                      SET_POS_INFINITY(q[0]);
 489                      q[1] = 1.0F;
 490                      SET_POS_INFINITY(q[2]);
 491                   }
 492                   else {
 493                      int exponent;
 494                      double mantissa = frexp(t[0], &exponent);
 495                      q[0] = (GLfloat) (exponent - 1);
 496                      q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
 497                      q[2] = (GLfloat) (q[0] + LOG2(q[1]));
 498                   }
 499                   }
 500                else {
 501                   SET_NEG_INFINITY(q[0]);
 502                   q[1] = 1.0F;
 503                   SET_NEG_INFINITY(q[2]);
 504                }
 505                q[3] = 1.0;
 506                store_vector4( &inst->DstReg, state, q );
 507             }
 508             break;
 509          case VP_OPCODE_MUL:
 510             {
 511                GLfloat t[4], u[4], prod[4];
 512                fetch_vector4( &inst->SrcReg[0], state, t );
 513                fetch_vector4( &inst->SrcReg[1], state, u );
 514                prod[0] = t[0] * u[0];
 515                prod[1] = t[1] * u[1];
 516                prod[2] = t[2] * u[2];
 517                prod[3] = t[3] * u[3];
 518                store_vector4( &inst->DstReg, state, prod );
 519             }
 520             break;
 521          case VP_OPCODE_ADD:
 522             {
 523                GLfloat t[4], u[4], sum[4];
 524                fetch_vector4( &inst->SrcReg[0], state, t );
 525                fetch_vector4( &inst->SrcReg[1], state, u );
 526                sum[0] = t[0] + u[0];
 527                sum[1] = t[1] + u[1];
 528                sum[2] = t[2] + u[2];
 529                sum[3] = t[3] + u[3];
 530                store_vector4( &inst->DstReg, state, sum );
 531             }
 532             break;
 533          case VP_OPCODE_DP3:
 534             {
 535                GLfloat t[4], u[4], dot[4];
 536                fetch_vector4( &inst->SrcReg[0], state, t );
 537                fetch_vector4( &inst->SrcReg[1], state, u );
 538                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
 539                dot[1] = dot[2] = dot[3] = dot[0];
 540                store_vector4( &inst->DstReg, state, dot );
 541             }
 542             break;
 543          case VP_OPCODE_DP4:
 544             {
 545                GLfloat t[4], u[4], dot[4];
 546                fetch_vector4( &inst->SrcReg[0], state, t );
 547                fetch_vector4( &inst->SrcReg[1], state, u );
 548                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
 549                dot[1] = dot[2] = dot[3] = dot[0];
 550                store_vector4( &inst->DstReg, state, dot );
 551             }
 552             break;
 553          case VP_OPCODE_DST:
 554             {
 555                GLfloat t[4], u[4], dst[4];
 556                fetch_vector4( &inst->SrcReg[0], state, t );
 557                fetch_vector4( &inst->SrcReg[1], state, u );
 558                dst[0] = 1.0F;
 559                dst[1] = t[1] * u[1];
 560                dst[2] = t[2];
 561                dst[3] = u[3];
 562                store_vector4( &inst->DstReg, state, dst );
 563             }
 564             break;
 565          case VP_OPCODE_MIN:
 566             {
 567                GLfloat t[4], u[4], min[4];
 568                fetch_vector4( &inst->SrcReg[0], state, t );
 569                fetch_vector4( &inst->SrcReg[1], state, u );
 570                min[0] = (t[0] < u[0]) ? t[0] : u[0];
 571                min[1] = (t[1] < u[1]) ? t[1] : u[1];
 572                min[2] = (t[2] < u[2]) ? t[2] : u[2];
 573                min[3] = (t[3] < u[3]) ? t[3] : u[3];
 574                store_vector4( &inst->DstReg, state, min );
 575             }
 576             break;
 577          case VP_OPCODE_MAX:
 578             {
 579                GLfloat t[4], u[4], max[4];
 580                fetch_vector4( &inst->SrcReg[0], state, t );
 581                fetch_vector4( &inst->SrcReg[1], state, u );
 582                max[0] = (t[0] > u[0]) ? t[0] : u[0];
 583                max[1] = (t[1] > u[1]) ? t[1] : u[1];
 584                max[2] = (t[2] > u[2]) ? t[2] : u[2];
 585                max[3] = (t[3] > u[3]) ? t[3] : u[3];
 586                store_vector4( &inst->DstReg, state, max );
 587             }
 588             break;
 589          case VP_OPCODE_SLT:
 590             {
 591                GLfloat t[4], u[4], slt[4];
 592                fetch_vector4( &inst->SrcReg[0], state, t );
 593                fetch_vector4( &inst->SrcReg[1], state, u );
 594                slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
 595                slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
 596                slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
 597                slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
 598                store_vector4( &inst->DstReg, state, slt );
 599             }
 600             break;
 601          case VP_OPCODE_SGE:
 602             {
 603                GLfloat t[4], u[4], sge[4];
 604                fetch_vector4( &inst->SrcReg[0], state, t );
 605                fetch_vector4( &inst->SrcReg[1], state, u );
 606                sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
 607                sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
 608                sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
 609                sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
 610                store_vector4( &inst->DstReg, state, sge );
 611             }
 612             break;
 613          case VP_OPCODE_MAD:
 614             {
 615                GLfloat t[4], u[4], v[4], sum[4];
 616                fetch_vector4( &inst->SrcReg[0], state, t );
 617                fetch_vector4( &inst->SrcReg[1], state, u );
 618                fetch_vector4( &inst->SrcReg[2], state, v );
 619                sum[0] = t[0] * u[0] + v[0];
 620                sum[1] = t[1] * u[1] + v[1];
 621                sum[2] = t[2] * u[2] + v[2];
 622                sum[3] = t[3] * u[3] + v[3];
 623                store_vector4( &inst->DstReg, state, sum );
 624             }
 625             break;
 626          case VP_OPCODE_ARL:
 627             {
 628                GLfloat t[4];
 629                fetch_vector4( &inst->SrcReg[0], state, t );
 630                state->AddressReg[0] = (GLint) floor(t[0]);
 631             }
 632             break;
 633          case VP_OPCODE_DPH:
 634             {
 635                GLfloat t[4], u[4], dot[4];
 636                fetch_vector4( &inst->SrcReg[0], state, t );
 637                fetch_vector4( &inst->SrcReg[1], state, u );
 638                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
 639                dot[1] = dot[2] = dot[3] = dot[0];
 640                store_vector4( &inst->DstReg, state, dot );
 641             }
 642             break;
 643          case VP_OPCODE_RCC:
 644             {
 645                GLfloat t[4], u;
 646                fetch_vector1( &inst->SrcReg[0], state, t );
 647                if (t[0] == 1.0F)
 648                   u = 1.0F;
 649                else
 650                   u = 1.0F / t[0];
 651                if (u > 0.0F) {
 652                   if (u > 1.884467e+019F) {
 653                      u = 1.884467e+019F;  /* IEEE 32-bit binary value 0x5F800000 */
 654                   }
 655                   else if (u < 5.42101e-020F) {
 656                      u = 5.42101e-020F;   /* IEEE 32-bit binary value 0x1F800000 */
 657                   }
 658                }
 659                else {
 660                   if (u < -1.884467e+019F) {
 661                      u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
 662                   }
 663                   else if (u > -5.42101e-020F) {
 664                      u = -5.42101e-020F;  /* IEEE 32-bit binary value 0x9F800000 */
 665                   }
 666                }
 667                t[0] = t[1] = t[2] = t[3] = u;
 668                store_vector4( &inst->DstReg, state, t );
 669             }
 670             break;
 671          case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
 672             {
 673                GLfloat t[4], u[4], sum[4];
 674                fetch_vector4( &inst->SrcReg[0], state, t );
 675                fetch_vector4( &inst->SrcReg[1], state, u );
 676                sum[0] = t[0] - u[0];
 677                sum[1] = t[1] - u[1];
 678                sum[2] = t[2] - u[2];
 679                sum[3] = t[3] - u[3];
 680                store_vector4( &inst->DstReg, state, sum );
 681             }
 682             break;
 683          case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
 684             {
 685                GLfloat t[4];
 686                fetch_vector4( &inst->SrcReg[0], state, t );
 687                if (t[0] < 0.0)  t[0] = -t[0];
 688                if (t[1] < 0.0)  t[1] = -t[1];
 689                if (t[2] < 0.0)  t[2] = -t[2];
 690                if (t[3] < 0.0)  t[3] = -t[3];
 691                store_vector4( &inst->DstReg, state, t );
 692             }
 693             break;
 694          case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
 695             {
 696                GLfloat t[4];
 697                fetch_vector4( &inst->SrcReg[0], state, t );
 698                t[0] = FLOORF(t[0]);
 699                t[1] = FLOORF(t[1]);
 700                t[2] = FLOORF(t[2]);
 701                t[3] = FLOORF(t[3]);
 702                store_vector4( &inst->DstReg, state, t );
 703             }
 704             break;
 705          case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
 706             {
 707                GLfloat t[4];
 708                fetch_vector4( &inst->SrcReg[0], state, t );
 709                t[0] = t[0] - FLOORF(t[0]);
 710                t[1] = t[1] - FLOORF(t[1]);
 711                t[2] = t[2] - FLOORF(t[2]);
 712                t[3] = t[3] - FLOORF(t[3]);
 713                store_vector4( &inst->DstReg, state, t );
 714             }
 715             break;
 716          case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
 717             {
 718                GLfloat t[4];
 719                fetch_vector1( &inst->SrcReg[0], state, t );
 720                t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
 721                store_vector4( &inst->DstReg, state, t );
 722             }
 723             break;
 724          case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
 725             {
 726                GLfloat t[4];
 727                fetch_vector1( &inst->SrcReg[0], state, t );
 728                t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
 729                store_vector4( &inst->DstReg, state, t );
 730             }
 731             break;
 732          case VP_OPCODE_POW: /* GL_ARB_vertex_program */
 733             {
 734                GLfloat t[4], u[4];
 735                fetch_vector1( &inst->SrcReg[0], state, t );
 736                fetch_vector1( &inst->SrcReg[1], state, u );
 737                t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
 738                store_vector4( &inst->DstReg, state, t );
 739             }
 740             break;
 741          case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
 742             {
 743                GLfloat t[4], u[4], cross[4];
 744                fetch_vector4( &inst->SrcReg[0], state, t );
 745                fetch_vector4( &inst->SrcReg[1], state, u );
 746                cross[0] = t[1] * u[2] - t[2] * u[1];
 747                cross[1] = t[2] * u[0] - t[0] * u[2];
 748                cross[2] = t[0] * u[1] - t[1] * u[0];
 749                store_vector4( &inst->DstReg, state, cross );
 750             }
 751             break;
 752          case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
 753             {
 754                const struct vp_src_register *source = &inst->SrcReg[0];
 755                const GLfloat *src = get_register_pointer(source, state);
 756                GLfloat result[4];
 757                GLuint i;
 758
 759                /* do extended swizzling here */
 760                for (i = 0; i < 3; i++) {
 761                   if (source->Swizzle[i] == SWIZZLE_ZERO)
 762                      result[i] = 0.0;
 763                   else if (source->Swizzle[i] == SWIZZLE_ONE)
 764                      result[i] = -1.0;
 765                   else
 766                      result[i] = -src[source->Swizzle[i]];
 767                   if (source->Negate)
 768                      result[i] = -result[i];
 769                }
 770                store_vector4( &inst->DstReg, state, result );
 771             }
 772             break;
 773
 774          case VP_OPCODE_END:
 775             ctx->_CurrentProgram = 0;
 776             return;
 777          default:
 778             /* bad instruction opcode */
 779             _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
 780             ctx->_CurrentProgram = 0;
 781             return;
 782       } /* switch */
 783    } /* for */
 784
 785    ctx->_CurrentProgram = 0;
 786 }
 787
 788
 789
 790 /**
 791 Thoughts on vertex program optimization:
 792
 793 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
 794 assembly code.  That will probably be a lot of work.
 795
 796 Another approach might be to replace the vp_instruction->Opcode field with
 797 a pointer to a specialized C function which executes the instruction.
 798 In particular we can write functions which skip swizzling, negating,
 799 masking, relative addressing, etc. when they're not needed.
 800
 801 For example:
 802
 803 void simple_add( struct vp_instruction *inst )
 804 {
 805    GLfloat *sum = machine->Registers[inst->DstReg.Register];
 806    GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
 807    GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
 808    sum[0] = a[0] + b[0];
 809    sum[1] = a[1] + b[1];
 810    sum[2] = a[2] + b[2];
 811    sum[3] = a[3] + b[3];
 812 }
 813
 814 */
 815
 816 /*
 817
 818 KW:
 819
 820 A first step would be to 'vectorize' the programs in the same way as
 821 the normal transformation code in the tnl module.  Thus each opcode
 822 takes zero or more input vectors (registers) and produces one or more
 823 output vectors.
 824
 825 These operations would intially be coded in C, with machine-specific
 826 assembly following, as is currently the case for matrix
 827 transformations in the math/ directory.  The preprocessing scheme for
 828 selecting simpler operations Brian describes above would also work
 829 here.
 830
 831 This should give reasonable performance without excessive effort.
 832
 833 */