src/mesa/main/nvvertexec.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  5.1
   4  *
   5  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file nvvertexec.c
  27  * Code to execute vertex programs.
  28  * \author Brian Paul
  29  */
  30
  31 #include "glheader.h"
  32 #include "context.h"
  33 #include "imports.h"
  34 #include "macros.h"
  35 #include "mtypes.h"
  36 #include "nvvertexec.h"
  37 #include "nvvertprog.h"
  38 #include "program.h"
  39 #include "math/m_matrix.h"
  40
  41
  42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
  43
  44
  45 /**
  46  * Load/initialize the vertex program registers.
  47  * This needs to be done per vertex.
  48  */
  49 void
  50 _mesa_init_vp_registers(GLcontext *ctx)
  51 {
  52    GLuint i;
  53
  54    /* Input registers get initialized from the current vertex attribs */
  55    MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
  56           VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
  57
  58    /* Output and temp regs are initialized to [0,0,0,1] */
  59    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
  60       ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
  61    }
  62    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
  63       ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
  64    }
  65
  66    /* The program parameters aren't touched */
  67    /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
  68     * here - Karl
  69     */
  70    if (ctx->VertexProgram.Current->Parameters) {
  71       /* Grab the state */
  72       _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
  73
  74       /* And copy it into the program state */
  75       for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
  76          MEMCPY(ctx->VertexProgram.Parameters[i],
  77                 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
  78                 4*sizeof(GLfloat));
  79       }
  80    }
  81 }
  82
  83
  84
  85 /**
  86  * Copy the 16 elements of a matrix into four consecutive program
  87  * registers starting at 'pos'.
  88  */
  89 static void
  90 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
  91 {
  92    GLuint i;
  93    for (i = 0; i < 4; i++) {
  94       registers[pos + i][0] = mat[0 + i];
  95       registers[pos + i][1] = mat[4 + i];
  96       registers[pos + i][2] = mat[8 + i];
  97       registers[pos + i][3] = mat[12 + i];
  98    }
  99 }
 100
 101
 102 /**
 103  * As above, but transpose the matrix.
 104  */
 105 static void
 106 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
 107                       const GLfloat mat[16])
 108 {
 109    MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
 110 }
 111
 112
 113 /**
 114  * Load all currently tracked matrices into the program registers.
 115  * This needs to be done per glBegin/glEnd.
 116  */
 117 void
 118 _mesa_init_tracked_matrices(GLcontext *ctx)
 119 {
 120    GLuint i;
 121
 122    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
 123       /* point 'mat' at source matrix */
 124       GLmatrix *mat;
 125       if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
 126          mat = ctx->ModelviewMatrixStack.Top;
 127       }
 128       else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
 129          mat = ctx->ProjectionMatrixStack.Top;
 130       }
 131       else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
 132          mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
 133       }
 134       else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
 135          mat = ctx->ColorMatrixStack.Top;
 136       }
 137       else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
 138          /* XXX verify the combined matrix is up to date */
 139          mat = &ctx->_ModelProjectMatrix;
 140       }
 141       else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
 142                ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
 143          GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
 144          ASSERT(n < MAX_PROGRAM_MATRICES);
 145          mat = ctx->ProgramMatrixStack[n].Top;
 146       }
 147       else {
 148          /* no matrix is tracked, but we leave the register values as-is */
 149          assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
 150          continue;
 151       }
 152
 153       /* load the matrix */
 154       if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
 155          load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
 156       }
 157       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
 158          _math_matrix_analyse(mat); /* update the inverse */
 159          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 160          load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
 161       }
 162       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
 163          load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
 164       }
 165       else {
 166          assert(ctx->VertexProgram.TrackMatrixTransform[i]
 167                 == GL_INVERSE_TRANSPOSE_NV);
 168          _math_matrix_analyse(mat); /* update the inverse */
 169          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 170          load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
 171       }
 172    }
 173 }
 174
 175
 176
 177 /**
 178  * For debugging.  Dump the current vertex program machine registers.
 179  */
 180 void
 181 _mesa_dump_vp_state( const struct vertex_program_state *state )
 182 {
 183    int i;
 184    _mesa_printf("VertexIn:\n");
 185    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
 186       _mesa_printf("%d: %f %f %f %f   ", i,
 187                    state->Inputs[i][0],
 188                    state->Inputs[i][1],
 189                    state->Inputs[i][2],
 190                    state->Inputs[i][3]);
 191    }
 192    _mesa_printf("\n");
 193
 194    _mesa_printf("VertexOut:\n");
 195    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
 196       _mesa_printf("%d: %f %f %f %f   ", i,
 197                   state->Outputs[i][0],
 198                   state->Outputs[i][1],
 199                   state->Outputs[i][2],
 200                   state->Outputs[i][3]);
 201    }
 202    _mesa_printf("\n");
 203
 204    _mesa_printf("Registers:\n");
 205    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
 206       _mesa_printf("%d: %f %f %f %f   ", i,
 207                   state->Temporaries[i][0],
 208                   state->Temporaries[i][1],
 209                   state->Temporaries[i][2],
 210                   state->Temporaries[i][3]);
 211    }
 212    _mesa_printf("\n");
 213
 214    _mesa_printf("Parameters:\n");
 215    for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
 216       _mesa_printf("%d: %f %f %f %f   ", i,
 217                   state->Parameters[i][0],
 218                   state->Parameters[i][1],
 219                   state->Parameters[i][2],
 220                   state->Parameters[i][3]);
 221    }
 222    _mesa_printf("\n");
 223 }
 224
 225
 226
 227 /**
 228  * Return a pointer to the 4-element float vector specified by the given
 229  * source register.
 230  */
 231 static INLINE const GLfloat *
 232 get_register_pointer( const struct vp_src_register *source,
 233                       const struct vertex_program_state *state )
 234 {
 235    if (source->RelAddr) {
 236       const GLint reg = source->Index + state->AddressReg[0];
 237       ASSERT(source->File == PROGRAM_ENV_PARAM);
 238       if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
 239          return zeroVec;
 240       else
 241          return state->Parameters[reg];
 242    }
 243    else {
 244       switch (source->File) {
 245          case PROGRAM_TEMPORARY:
 246             return state->Temporaries[source->Index];
 247          case PROGRAM_INPUT:
 248             return state->Inputs[source->Index];
 249          case PROGRAM_LOCAL_PARAM:
 250             /* XXX fix */
 251             return state->Temporaries[source->Index];
 252          case PROGRAM_ENV_PARAM:
 253             return state->Parameters[source->Index];
 254          case PROGRAM_STATE_VAR:
 255             return state->Parameters[source->Index];
 256          default:
 257             _mesa_problem(NULL,
 258                           "Bad source register file in fetch_vector4(vp)");
 259             return NULL;
 260       }
 261    }
 262    return NULL;
 263 }
 264
 265
 266 /**
 267  * Fetch a 4-element float vector from the given source register.
 268  * Apply swizzling and negating as needed.
 269  */
 270 static INLINE void
 271 fetch_vector4( const struct vp_src_register *source,
 272                const struct vertex_program_state *state,
 273                GLfloat result[4] )
 274 {
 275    const GLfloat *src = get_register_pointer(source, state);
 276
 277    if (source->Negate) {
 278       result[0] = -src[source->Swizzle[0]];
 279       result[1] = -src[source->Swizzle[1]];
 280       result[2] = -src[source->Swizzle[2]];
 281       result[3] = -src[source->Swizzle[3]];
 282    }
 283    else {
 284       result[0] = src[source->Swizzle[0]];
 285       result[1] = src[source->Swizzle[1]];
 286       result[2] = src[source->Swizzle[2]];
 287       result[3] = src[source->Swizzle[3]];
 288    }
 289 }
 290
 291
 292
 293 /**
 294  * As above, but only return result[0] element.
 295  */
 296 static INLINE void
 297 fetch_vector1( const struct vp_src_register *source,
 298                const struct vertex_program_state *state,
 299                GLfloat result[4] )
 300 {
 301    const GLfloat *src = get_register_pointer(source, state);
 302
 303    if (source->Negate) {
 304       result[0] = -src[source->Swizzle[0]];
 305    }
 306    else {
 307       result[0] = src[source->Swizzle[0]];
 308    }
 309 }
 310
 311
 312 /**
 313  * Store 4 floats into a register.
 314  */
 315 static void
 316 store_vector4( const struct vp_dst_register *dest,
 317                struct vertex_program_state *state,
 318                const GLfloat value[4] )
 319 {
 320    GLfloat *dst;
 321    switch (dest->File) {
 322       case PROGRAM_TEMPORARY:
 323          dst = state->Temporaries[dest->Index];
 324          break;
 325       case PROGRAM_OUTPUT:
 326          dst = state->Outputs[dest->Index];
 327          break;
 328       default:
 329          _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
 330          return;
 331    }
 332
 333    if (dest->WriteMask[0])
 334       dst[0] = value[0];
 335    if (dest->WriteMask[1])
 336       dst[1] = value[1];
 337    if (dest->WriteMask[2])
 338       dst[2] = value[2];
 339    if (dest->WriteMask[3])
 340       dst[3] = value[3];
 341 }
 342
 343
 344 /**
 345  * Set x to positive or negative infinity.
 346  */
 347 #if defined(USE_IEEE) || defined(_WIN32)
 348 #define SET_POS_INFINITY(x)  ( *((GLuint *) &x) = 0x7F800000 )
 349 #define SET_NEG_INFINITY(x)  ( *((GLuint *) &x) = 0xFF800000 )
 350 #elif defined(VMS)
 351 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
 352 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
 353 #else
 354 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
 355 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
 356 #endif
 357
 358 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
 359
 360
 361 /**
 362  * Execute the given vertex program
 363  */
 364 void
 365 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
 366 {
 367    struct vertex_program_state *state = &ctx->VertexProgram;
 368    const struct vp_instruction *inst;
 369
 370    ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
 371
 372    for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
 373
 374       if (ctx->VertexProgram.CallbackEnabled &&
 375           ctx->VertexProgram.Callback) {
 376          ctx->VertexProgram.CurrentPosition = inst->StringPos;
 377          ctx->VertexProgram.Callback(program->Base.Target,
 378                                      ctx->VertexProgram.CallbackData);
 379       }
 380
 381       switch (inst->Opcode) {
 382          case VP_OPCODE_MOV:
 383             {
 384                GLfloat t[4];
 385                fetch_vector4( &inst->SrcReg[0], state, t );
 386                store_vector4( &inst->DstReg, state, t );
 387             }
 388             break;
 389          case VP_OPCODE_LIT:
 390             {
 391                const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
 392                GLfloat t[4], lit[4];
 393                fetch_vector4( &inst->SrcReg[0], state, t );
 394                if (t[3] < -(128.0F - epsilon))
 395                    t[3] = - (128.0F - epsilon);
 396                else if (t[3] > 128.0F - epsilon)
 397                   t[3] = 128.0F - epsilon;
 398                if (t[0] < 0.0)
 399                   t[0] = 0.0;
 400                if (t[1] < 0.0)
 401                   t[1] = 0.0;
 402                lit[0] = 1.0;
 403                lit[1] = t[0];
 404                lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
 405                lit[3] = 1.0;
 406                store_vector4( &inst->DstReg, state, lit );
 407             }
 408             break;
 409          case VP_OPCODE_RCP:
 410             {
 411                GLfloat t[4];
 412                fetch_vector1( &inst->SrcReg[0], state, t );
 413                if (t[0] != 1.0F)
 414                   t[0] = 1.0F / t[0];  /* div by zero is infinity! */
 415                t[1] = t[2] = t[3] = t[0];
 416                store_vector4( &inst->DstReg, state, t );
 417             }
 418             break;
 419          case VP_OPCODE_RSQ:
 420             {
 421                GLfloat t[4];
 422                fetch_vector1( &inst->SrcReg[0], state, t );
 423                t[0] = INV_SQRTF(FABSF(t[0]));
 424                t[1] = t[2] = t[3] = t[0];
 425                store_vector4( &inst->DstReg, state, t );
 426             }
 427             break;
 428          case VP_OPCODE_EXP:
 429             {
 430                GLfloat t[4], q[4], floor_t0;
 431                fetch_vector1( &inst->SrcReg[0], state, t );
 432                floor_t0 = (float) floor(t[0]);
 433                if (floor_t0 > FLT_MAX_EXP) {
 434                   SET_POS_INFINITY(q[0]);
 435                   SET_POS_INFINITY(q[2]);
 436                }
 437                else if (floor_t0 < FLT_MIN_EXP) {
 438                   q[0] = 0.0F;
 439                   q[2] = 0.0F;
 440                }
 441                else {
 442 #ifdef USE_IEEE
 443                   GLint ii = (GLint) floor_t0;
 444                   ii = (ii < 23) + 0x3f800000;
 445                   SET_FLOAT_BITS(q[0], ii);
 446                   q[0] = *((GLfloat *) &ii);
 447 #else
 448                   q[0] = (GLfloat) pow(2.0, floor_t0);
 449 #endif
 450                   q[2] = (GLfloat) (q[0] * LOG2(q[1]));
 451                }
 452                q[1] = t[0] - floor_t0;
 453                q[3] = 1.0F;
 454                store_vector4( &inst->DstReg, state, q );
 455             }
 456             break;
 457          case VP_OPCODE_LOG:
 458             {
 459                GLfloat t[4], q[4], abs_t0;
 460                fetch_vector1( &inst->SrcReg[0], state, t );
 461                abs_t0 = (GLfloat) fabs(t[0]);
 462                if (abs_t0 != 0.0F) {
 463                   /* Since we really can't handle infinite values on VMS
 464                    * like other OSes we'll use __MAXFLOAT to represent
 465                    * infinity.  This may need some tweaking.
 466                    */
 467 #ifdef VMS
 468                   if (abs_t0 == __MAXFLOAT)
 469 #else
 470                   if (IS_INF_OR_NAN(abs_t0))
 471 #endif
 472                   {
 473                      SET_POS_INFINITY(q[0]);
 474                      q[1] = 1.0F;
 475                      SET_POS_INFINITY(q[2]);
 476                   }
 477                   else {
 478                      int exponent;
 479                      double mantissa = frexp(t[0], &exponent);
 480                      q[0] = (GLfloat) (exponent - 1);
 481                      q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
 482                      q[2] = (GLfloat) (q[0] + LOG2(q[1]));
 483                   }
 484                   }
 485                else {
 486                   SET_NEG_INFINITY(q[0]);
 487                   q[1] = 1.0F;
 488                   SET_NEG_INFINITY(q[2]);
 489                }
 490                q[3] = 1.0;
 491                store_vector4( &inst->DstReg, state, q );
 492             }
 493             break;
 494          case VP_OPCODE_MUL:
 495             {
 496                GLfloat t[4], u[4], prod[4];
 497                fetch_vector4( &inst->SrcReg[0], state, t );
 498                fetch_vector4( &inst->SrcReg[1], state, u );
 499                prod[0] = t[0] * u[0];
 500                prod[1] = t[1] * u[1];
 501                prod[2] = t[2] * u[2];
 502                prod[3] = t[3] * u[3];
 503                store_vector4( &inst->DstReg, state, prod );
 504             }
 505             break;
 506          case VP_OPCODE_ADD:
 507             {
 508                GLfloat t[4], u[4], sum[4];
 509                fetch_vector4( &inst->SrcReg[0], state, t );
 510                fetch_vector4( &inst->SrcReg[1], state, u );
 511                sum[0] = t[0] + u[0];
 512                sum[1] = t[1] + u[1];
 513                sum[2] = t[2] + u[2];
 514                sum[3] = t[3] + u[3];
 515                store_vector4( &inst->DstReg, state, sum );
 516             }
 517             break;
 518          case VP_OPCODE_DP3:
 519             {
 520                GLfloat t[4], u[4], dot[4];
 521                fetch_vector4( &inst->SrcReg[0], state, t );
 522                fetch_vector4( &inst->SrcReg[1], state, u );
 523                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
 524                dot[1] = dot[2] = dot[3] = dot[0];
 525                store_vector4( &inst->DstReg, state, dot );
 526             }
 527             break;
 528          case VP_OPCODE_DP4:
 529             {
 530                GLfloat t[4], u[4], dot[4];
 531                fetch_vector4( &inst->SrcReg[0], state, t );
 532                fetch_vector4( &inst->SrcReg[1], state, u );
 533                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
 534                dot[1] = dot[2] = dot[3] = dot[0];
 535                store_vector4( &inst->DstReg, state, dot );
 536             }
 537             break;
 538          case VP_OPCODE_DST:
 539             {
 540                GLfloat t[4], u[4], dst[4];
 541                fetch_vector4( &inst->SrcReg[0], state, t );
 542                fetch_vector4( &inst->SrcReg[1], state, u );
 543                dst[0] = 1.0F;
 544                dst[1] = t[1] * u[1];
 545                dst[2] = t[2];
 546                dst[3] = u[3];
 547                store_vector4( &inst->DstReg, state, dst );
 548             }
 549             break;
 550          case VP_OPCODE_MIN:
 551             {
 552                GLfloat t[4], u[4], min[4];
 553                fetch_vector4( &inst->SrcReg[0], state, t );
 554                fetch_vector4( &inst->SrcReg[1], state, u );
 555                min[0] = (t[0] < u[0]) ? t[0] : u[0];
 556                min[1] = (t[1] < u[1]) ? t[1] : u[1];
 557                min[2] = (t[2] < u[2]) ? t[2] : u[2];
 558                min[3] = (t[3] < u[3]) ? t[3] : u[3];
 559                store_vector4( &inst->DstReg, state, min );
 560             }
 561             break;
 562          case VP_OPCODE_MAX:
 563             {
 564                GLfloat t[4], u[4], max[4];
 565                fetch_vector4( &inst->SrcReg[0], state, t );
 566                fetch_vector4( &inst->SrcReg[1], state, u );
 567                max[0] = (t[0] > u[0]) ? t[0] : u[0];
 568                max[1] = (t[1] > u[1]) ? t[1] : u[1];
 569                max[2] = (t[2] > u[2]) ? t[2] : u[2];
 570                max[3] = (t[3] > u[3]) ? t[3] : u[3];
 571                store_vector4( &inst->DstReg, state, max );
 572             }
 573             break;
 574          case VP_OPCODE_SLT:
 575             {
 576                GLfloat t[4], u[4], slt[4];
 577                fetch_vector4( &inst->SrcReg[0], state, t );
 578                fetch_vector4( &inst->SrcReg[1], state, u );
 579                slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
 580                slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
 581                slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
 582                slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
 583                store_vector4( &inst->DstReg, state, slt );
 584             }
 585             break;
 586          case VP_OPCODE_SGE:
 587             {
 588                GLfloat t[4], u[4], sge[4];
 589                fetch_vector4( &inst->SrcReg[0], state, t );
 590                fetch_vector4( &inst->SrcReg[1], state, u );
 591                sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
 592                sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
 593                sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
 594                sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
 595                store_vector4( &inst->DstReg, state, sge );
 596             }
 597             break;
 598          case VP_OPCODE_MAD:
 599             {
 600                GLfloat t[4], u[4], v[4], sum[4];
 601                fetch_vector4( &inst->SrcReg[0], state, t );
 602                fetch_vector4( &inst->SrcReg[1], state, u );
 603                fetch_vector4( &inst->SrcReg[2], state, v );
 604                sum[0] = t[0] * u[0] + v[0];
 605                sum[1] = t[1] * u[1] + v[1];
 606                sum[2] = t[2] * u[2] + v[2];
 607                sum[3] = t[3] * u[3] + v[3];
 608                store_vector4( &inst->DstReg, state, sum );
 609             }
 610             break;
 611          case VP_OPCODE_ARL:
 612             {
 613                GLfloat t[4];
 614                fetch_vector4( &inst->SrcReg[0], state, t );
 615                state->AddressReg[0] = (GLint) floor(t[0]);
 616             }
 617             break;
 618          case VP_OPCODE_DPH:
 619             {
 620                GLfloat t[4], u[4], dot[4];
 621                fetch_vector4( &inst->SrcReg[0], state, t );
 622                fetch_vector4( &inst->SrcReg[1], state, u );
 623                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
 624                dot[1] = dot[2] = dot[3] = dot[0];
 625                store_vector4( &inst->DstReg, state, dot );
 626             }
 627             break;
 628          case VP_OPCODE_RCC:
 629             {
 630                GLfloat t[4], u;
 631                fetch_vector1( &inst->SrcReg[0], state, t );
 632                if (t[0] == 1.0F)
 633                   u = 1.0F;
 634                else
 635                   u = 1.0F / t[0];
 636                if (u > 0.0F) {
 637                   if (u > 1.884467e+019F) {
 638                      u = 1.884467e+019F;  /* IEEE 32-bit binary value 0x5F800000 */
 639                   }
 640                   else if (u < 5.42101e-020F) {
 641                      u = 5.42101e-020F;   /* IEEE 32-bit binary value 0x1F800000 */
 642                   }
 643                }
 644                else {
 645                   if (u < -1.884467e+019F) {
 646                      u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
 647                   }
 648                   else if (u > -5.42101e-020F) {
 649                      u = -5.42101e-020F;  /* IEEE 32-bit binary value 0x9F800000 */
 650                   }
 651                }
 652                t[0] = t[1] = t[2] = t[3] = u;
 653                store_vector4( &inst->DstReg, state, t );
 654             }
 655             break;
 656          case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
 657             {
 658                GLfloat t[4], u[4], sum[4];
 659                fetch_vector4( &inst->SrcReg[0], state, t );
 660                fetch_vector4( &inst->SrcReg[1], state, u );
 661                sum[0] = t[0] - u[0];
 662                sum[1] = t[1] - u[1];
 663                sum[2] = t[2] - u[2];
 664                sum[3] = t[3] - u[3];
 665                store_vector4( &inst->DstReg, state, sum );
 666             }
 667             break;
 668          case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
 669             {
 670                GLfloat t[4];
 671                fetch_vector4( &inst->SrcReg[0], state, t );
 672                if (t[0] < 0.0)  t[0] = -t[0];
 673                if (t[1] < 0.0)  t[1] = -t[1];
 674                if (t[2] < 0.0)  t[2] = -t[2];
 675                if (t[3] < 0.0)  t[3] = -t[3];
 676                store_vector4( &inst->DstReg, state, t );
 677             }
 678             break;
 679          case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
 680             {
 681                GLfloat t[4];
 682                fetch_vector4( &inst->SrcReg[0], state, t );
 683                t[0] = FLOORF(t[0]);
 684                t[1] = FLOORF(t[1]);
 685                t[2] = FLOORF(t[2]);
 686                t[3] = FLOORF(t[3]);
 687                store_vector4( &inst->DstReg, state, t );
 688             }
 689             break;
 690          case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
 691             {
 692                GLfloat t[4];
 693                fetch_vector4( &inst->SrcReg[0], state, t );
 694                t[0] = t[0] - FLOORF(t[0]);
 695                t[1] = t[1] - FLOORF(t[1]);
 696                t[2] = t[2] - FLOORF(t[2]);
 697                t[3] = t[3] - FLOORF(t[3]);
 698                store_vector4( &inst->DstReg, state, t );
 699             }
 700             break;
 701          case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
 702             {
 703                GLfloat t[4];
 704                fetch_vector1( &inst->SrcReg[0], state, t );
 705                t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
 706                store_vector4( &inst->DstReg, state, t );
 707             }
 708             break;
 709          case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
 710             {
 711                GLfloat t[4];
 712                fetch_vector1( &inst->SrcReg[0], state, t );
 713                t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
 714                store_vector4( &inst->DstReg, state, t );
 715             }
 716             break;
 717          case VP_OPCODE_POW: /* GL_ARB_vertex_program */
 718             {
 719                GLfloat t[4], u[4];
 720                fetch_vector1( &inst->SrcReg[0], state, t );
 721                fetch_vector1( &inst->SrcReg[1], state, u );
 722                t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
 723                store_vector4( &inst->DstReg, state, t );
 724             }
 725             break;
 726          case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
 727             {
 728                GLfloat t[4], u[4], cross[4];
 729                fetch_vector4( &inst->SrcReg[0], state, t );
 730                fetch_vector4( &inst->SrcReg[1], state, u );
 731                cross[0] = t[1] * u[2] - t[2] * u[1];
 732                cross[1] = t[2] * u[0] - t[0] * u[2];
 733                cross[2] = t[0] * u[1] - t[1] * u[0];
 734                store_vector4( &inst->DstReg, state, cross );
 735             }
 736             break;
 737          case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
 738             {
 739                const struct vp_src_register *source = &inst->SrcReg[0];
 740                const GLfloat *src = get_register_pointer(source, state);
 741                GLfloat result[4];
 742                GLuint i;
 743
 744                /* do extended swizzling here */
 745                for (i = 0; i < 3; i++) {
 746                   if (source->Swizzle[i] == SWIZZLE_ZERO)
 747                      result[i] = 0.0;
 748                   else if (source->Swizzle[i] == SWIZZLE_ONE)
 749                      result[i] = -1.0;
 750                   else
 751                      result[i] = -src[source->Swizzle[i]];
 752                   if (source->Negate)
 753                      result[i] = -result[i];
 754                }
 755                store_vector4( &inst->DstReg, state, result );
 756             }
 757             break;
 758
 759          case VP_OPCODE_END:
 760             ctx->_CurrentProgram = 0;
 761             return;
 762          default:
 763             /* bad instruction opcode */
 764             _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
 765             ctx->_CurrentProgram = 0;
 766             return;
 767       } /* switch */
 768    } /* for */
 769
 770    ctx->_CurrentProgram = 0;
 771 }
 772
 773
 774
 775 /**
 776 Thoughts on vertex program optimization:
 777
 778 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
 779 assembly code.  That will probably be a lot of work.
 780
 781 Another approach might be to replace the vp_instruction->Opcode field with
 782 a pointer to a specialized C function which executes the instruction.
 783 In particular we can write functions which skip swizzling, negating,
 784 masking, relative addressing, etc. when they're not needed.
 785
 786 For example:
 787
 788 void simple_add( struct vp_instruction *inst )
 789 {
 790    GLfloat *sum = machine->Registers[inst->DstReg.Register];
 791    GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
 792    GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
 793    sum[0] = a[0] + b[0];
 794    sum[1] = a[1] + b[1];
 795    sum[2] = a[2] + b[2];
 796    sum[3] = a[3] + b[3];
 797 }
 798
 799 */
 800
 801 /*
 802
 803 KW:
 804
 805 A first step would be to 'vectorize' the programs in the same way as
 806 the normal transformation code in the tnl module.  Thus each opcode
 807 takes zero or more input vectors (registers) and produces one or more
 808 output vectors.
 809
 810 These operations would intially be coded in C, with machine-specific
 811 assembly following, as is currently the case for matrix
 812 transformations in the math/ directory.  The preprocessing scheme for
 813 selecting simpler operations Brian describes above would also work
 814 here.
 815
 816 This should give reasonable performance without excessive effort.
 817
 818 */