src/mesa/main/nvvertexec.c

   1 /* $Id: nvvertexec.c,v 1.5 2003/03/29 16:04:31 brianp Exp $ */
   2
   3 /*
   4  * Mesa 3-D graphics library
   5  * Version:  5.1
   6  *
   7  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included
  17  * in all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  23  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 /**
  28  * \file nvvertexec.c
  29  * \brief Code to execute vertex programs.
  30  * \author Brian Paul
  31  */
  32
  33 #include "glheader.h"
  34 #include "context.h"
  35 #include "imports.h"
  36 #include "macros.h"
  37 #include "mtypes.h"
  38 #include "nvvertexec.h"
  39 #include "nvvertprog.h"
  40 #include "math/m_matrix.h"
  41
  42
  43 /**
  44  * Load/initialize the vertex program registers.
  45  * This needs to be done per vertex.
  46  */
  47 void
  48 _mesa_init_vp_registers(GLcontext *ctx)
  49 {
  50    struct vp_machine *machine = &(ctx->VertexProgram.Machine);
  51    GLuint i;
  52
  53    /* Input registers get initialized from the current vertex attribs */
  54    MEMCPY(machine->Registers[VP_INPUT_REG_START],
  55           ctx->Current.Attrib,
  56           16 * 4 * sizeof(GLfloat));
  57
  58    /* Output and temp regs are initialized to [0,0,0,1] */
  59    for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
  60       machine->Registers[i][0] = 0.0F;
  61       machine->Registers[i][1] = 0.0F;
  62       machine->Registers[i][2] = 0.0F;
  63       machine->Registers[i][3] = 1.0F;
  64    }
  65    for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
  66       machine->Registers[i][0] = 0.0F;
  67       machine->Registers[i][1] = 0.0F;
  68       machine->Registers[i][2] = 0.0F;
  69       machine->Registers[i][3] = 1.0F;
  70    }
  71
  72    /* The program regs aren't touched */
  73 }
  74
  75
  76
  77 /**
  78  * Copy the 16 elements of a matrix into four consecutive program
  79  * registers starting at 'pos'.
  80  */
  81 static void
  82 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
  83 {
  84    GLuint i;
  85    pos += VP_PROG_REG_START;
  86    for (i = 0; i < 4; i++) {
  87       registers[pos + i][0] = mat[0 + i];
  88       registers[pos + i][1] = mat[4 + i];
  89       registers[pos + i][2] = mat[8 + i];
  90       registers[pos + i][3] = mat[12 + i];
  91    }
  92 }
  93
  94
  95 /**
  96  * As above, but transpose the matrix.
  97  */
  98 static void
  99 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
 100                       const GLfloat mat[16])
 101 {
 102    pos += VP_PROG_REG_START;
 103    MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
 104 }
 105
 106
 107 /**
 108  * Load all currently tracked matrices into the program registers.
 109  * This needs to be done per glBegin/glEnd.
 110  */
 111 void
 112 _mesa_init_tracked_matrices(GLcontext *ctx)
 113 {
 114    GLuint i;
 115
 116    for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
 117       /* point 'mat' at source matrix */
 118       GLmatrix *mat;
 119       if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
 120          mat = ctx->ModelviewMatrixStack.Top;
 121       }
 122       else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
 123          mat = ctx->ProjectionMatrixStack.Top;
 124       }
 125       else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
 126          mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
 127       }
 128       else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
 129          mat = ctx->ColorMatrixStack.Top;
 130       }
 131       else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
 132          /* XXX verify the combined matrix is up to date */
 133          mat = &ctx->_ModelProjectMatrix;
 134       }
 135       else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
 136                ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
 137          GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
 138          ASSERT(n < MAX_PROGRAM_MATRICES);
 139          mat = ctx->ProgramMatrixStack[n].Top;
 140       }
 141       else {
 142          /* no matrix is tracked, but we leave the register values as-is */
 143          assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
 144          continue;
 145       }
 146
 147       /* load the matrix */
 148       if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
 149          load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
 150       }
 151       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
 152          _math_matrix_analyse(mat); /* update the inverse */
 153          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 154          load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
 155       }
 156       else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
 157          load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
 158       }
 159       else {
 160          assert(ctx->VertexProgram.TrackMatrixTransform[i]
 161                 == GL_INVERSE_TRANSPOSE_NV);
 162          _math_matrix_analyse(mat); /* update the inverse */
 163          assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
 164          load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
 165                                i*4, mat->inv);
 166       }
 167    }
 168 }
 169
 170
 171
 172 /**
 173  * For debugging.  Dump the current vertex program machine registers.
 174  */
 175 void
 176 _mesa_dump_vp_machine( const struct vp_machine *machine )
 177 {
 178    int i;
 179    _mesa_printf("VertexIn:\n");
 180    for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
 181       _mesa_printf("%d: %f %f %f %f   ", i,
 182              machine->Registers[i + VP_INPUT_REG_START][0],
 183              machine->Registers[i + VP_INPUT_REG_START][1],
 184              machine->Registers[i + VP_INPUT_REG_START][2],
 185              machine->Registers[i + VP_INPUT_REG_START][3]);
 186    }
 187    _mesa_printf("\n");
 188
 189    _mesa_printf("VertexOut:\n");
 190    for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
 191       _mesa_printf("%d: %f %f %f %f   ", i,
 192              machine->Registers[i + VP_OUTPUT_REG_START][0],
 193              machine->Registers[i + VP_OUTPUT_REG_START][1],
 194              machine->Registers[i + VP_OUTPUT_REG_START][2],
 195              machine->Registers[i + VP_OUTPUT_REG_START][3]);
 196    }
 197    _mesa_printf("\n");
 198
 199    _mesa_printf("Registers:\n");
 200    for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
 201       _mesa_printf("%d: %f %f %f %f   ", i,
 202              machine->Registers[i + VP_TEMP_REG_START][0],
 203              machine->Registers[i + VP_TEMP_REG_START][1],
 204              machine->Registers[i + VP_TEMP_REG_START][2],
 205              machine->Registers[i + VP_TEMP_REG_START][3]);
 206    }
 207    _mesa_printf("\n");
 208
 209    _mesa_printf("Parameters:\n");
 210    for (i = 0; i < VP_NUM_PROG_REGS; i++) {
 211       _mesa_printf("%d: %f %f %f %f   ", i,
 212              machine->Registers[i + VP_PROG_REG_START][0],
 213              machine->Registers[i + VP_PROG_REG_START][1],
 214              machine->Registers[i + VP_PROG_REG_START][2],
 215              machine->Registers[i + VP_PROG_REG_START][3]);
 216    }
 217    _mesa_printf("\n");
 218 }
 219
 220
 221 /**
 222  * Fetch a 4-element float vector from the given source register.
 223  * Apply swizzling and negating as needed.
 224  */
 225 static void
 226 fetch_vector4( const struct vp_src_register *source,
 227                const struct vp_machine *machine,
 228                GLfloat result[4] )
 229 {
 230    static const GLfloat zero[4] = { 0, 0, 0, 0 };
 231    const GLfloat *src;
 232
 233    if (source->RelAddr) {
 234       const GLint reg = source->Register + machine->AddressReg;
 235       if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
 236          src = zero;
 237       else
 238          src = machine->Registers[VP_PROG_REG_START + reg];
 239    }
 240    else {
 241       src = machine->Registers[source->Register];
 242    }
 243
 244    if (source->Negate) {
 245       result[0] = -src[source->Swizzle[0]];
 246       result[1] = -src[source->Swizzle[1]];
 247       result[2] = -src[source->Swizzle[2]];
 248       result[3] = -src[source->Swizzle[3]];
 249    }
 250    else {
 251       result[0] = src[source->Swizzle[0]];
 252       result[1] = src[source->Swizzle[1]];
 253       result[2] = src[source->Swizzle[2]];
 254       result[3] = src[source->Swizzle[3]];
 255    }
 256 }
 257
 258
 259 /**
 260  * As above, but only return result[0] element.
 261  */
 262 static void
 263 fetch_vector1( const struct vp_src_register *source,
 264                const struct vp_machine *machine,
 265                GLfloat result[4] )
 266 {
 267    static const GLfloat zero[4] = { 0, 0, 0, 0 };
 268    const GLfloat *src;
 269
 270    if (source->RelAddr) {
 271       const GLint reg = source->Register + machine->AddressReg;
 272       if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
 273          src = zero;
 274       else
 275          src = machine->Registers[VP_PROG_REG_START + reg];
 276    }
 277    else {
 278       src = machine->Registers[source->Register];
 279    }
 280
 281    if (source->Negate) {
 282       result[0] = -src[source->Swizzle[0]];
 283    }
 284    else {
 285       result[0] = src[source->Swizzle[0]];
 286    }
 287 }
 288
 289
 290 /**
 291  * Store 4 floats into a register.
 292  */
 293 static void
 294 store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
 295                const GLfloat value[4] )
 296 {
 297    GLfloat *dst = machine->Registers[dest->Register];
 298
 299    if (dest->WriteMask[0])
 300       dst[0] = value[0];
 301    if (dest->WriteMask[1])
 302       dst[1] = value[1];
 303    if (dest->WriteMask[2])
 304       dst[2] = value[2];
 305    if (dest->WriteMask[3])
 306       dst[3] = value[3];
 307 }
 308
 309
 310 /**
 311  * Set x to positive or negative infinity.
 312  */
 313 #ifdef USE_IEEE
 314 #define SET_POS_INFINITY(x)  ( *((GLuint *) &x) = 0x7F800000 )
 315 #define SET_NEG_INFINITY(x)  ( *((GLuint *) &x) = 0xFF800000 )
 316 #elif defined(VMS)
 317 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
 318 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
 319 #else
 320 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
 321 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
 322 #endif
 323
 324 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
 325
 326
 327 /**
 328  * Execute the given vertex program
 329  */
 330 void
 331 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
 332 {
 333    struct vp_machine *machine = &ctx->VertexProgram.Machine;
 334    const struct vp_instruction *inst;
 335
 336    for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
 337       switch (inst->Opcode) {
 338          case VP_OPCODE_MOV:
 339             {
 340                GLfloat t[4];
 341                fetch_vector4( &inst->SrcReg[0], machine, t );
 342                store_vector4( &inst->DstReg, machine, t );
 343             }
 344             break;
 345          case VP_OPCODE_LIT:
 346             {
 347                const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
 348                GLfloat t[4], lit[4];
 349                fetch_vector4( &inst->SrcReg[0], machine, t );
 350                if (t[3] < -(128.0F - epsilon))
 351                    t[3] = - (128.0F - epsilon);
 352                else if (t[3] > 128.0F - epsilon)
 353                   t[3] = 128.0F - epsilon;
 354                if (t[0] < 0.0)
 355                   t[0] = 0.0;
 356                if (t[1] < 0.0)
 357                   t[1] = 0.0;
 358                lit[0] = 1.0;
 359                lit[1] = t[0];
 360                lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
 361                lit[3] = 1.0;
 362                store_vector4( &inst->DstReg, machine, lit );
 363             }
 364             break;
 365          case VP_OPCODE_RCP:
 366             {
 367                GLfloat t[4];
 368                fetch_vector1( &inst->SrcReg[0], machine, t );
 369                if (t[0] != 1.0F)
 370                   t[0] = 1.0F / t[0];  /* div by zero is infinity! */
 371                t[1] = t[2] = t[3] = t[0];
 372                store_vector4( &inst->DstReg, machine, t );
 373             }
 374             break;
 375          case VP_OPCODE_RSQ:
 376             {
 377                GLfloat t[4];
 378                fetch_vector1( &inst->SrcReg[0], machine, t );
 379                t[0] = INV_SQRTF(FABSF(t[0]));
 380                t[1] = t[2] = t[3] = t[0];
 381                store_vector4( &inst->DstReg, machine, t );
 382             }
 383             break;
 384          case VP_OPCODE_EXP:
 385             {
 386                GLfloat t[4], q[4], floor_t0;
 387                fetch_vector1( &inst->SrcReg[0], machine, t );
 388                floor_t0 = (float) floor(t[0]);
 389                if (floor_t0 > FLT_MAX_EXP) {
 390                   SET_POS_INFINITY(q[0]);
 391                   SET_POS_INFINITY(q[2]);
 392                }
 393                else if (floor_t0 < FLT_MIN_EXP) {
 394                   q[0] = 0.0F;
 395                   q[2] = 0.0F;
 396                }
 397                else {
 398 #ifdef USE_IEEE
 399                   GLint ii = (GLint) floor_t0;
 400                   ii = (ii < 23) + 0x3f800000;
 401                   SET_FLOAT_BITS(q[0], ii);
 402                   q[0] = *((GLfloat *) &ii);
 403 #else
 404                   q[0] = (GLfloat) pow(2.0, floor_t0);
 405 #endif
 406                   q[2] = (GLfloat) (q[0] * LOG2(q[1]));
 407                }
 408                q[1] = t[0] - floor_t0;
 409                q[3] = 1.0F;
 410                store_vector4( &inst->DstReg, machine, q );
 411             }
 412             break;
 413          case VP_OPCODE_LOG:
 414             {
 415                GLfloat t[4], q[4], abs_t0;
 416                fetch_vector1( &inst->SrcReg[0], machine, t );
 417                abs_t0 = (GLfloat) fabs(t[0]);
 418                if (abs_t0 != 0.0F) {
 419                   /* Since we really can't handle infinite values on VMS
 420                    * like other OSes we'll use __MAXFLOAT to represent
 421                    * infinity.  This may need some tweaking.
 422                    */
 423 #ifdef VMS
 424                   if (abs_t0 == __MAXFLOAT) {
 425 #else
 426                   if (IS_INF_OR_NAN(abs_t0)) {
 427 #endif
 428                      SET_POS_INFINITY(q[0]);
 429                      q[1] = 1.0F;
 430                      SET_POS_INFINITY(q[2]);
 431                   }
 432                   else {
 433                      int exponent;
 434                      double mantissa = frexp(t[0], &exponent);
 435                      q[0] = (GLfloat) (exponent - 1);
 436                      q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
 437                      q[2] = (GLfloat) (q[0] + LOG2(q[1]));
 438                   }
 439                }
 440                else {
 441                   SET_NEG_INFINITY(q[0]);
 442                   q[1] = 1.0F;
 443                   SET_NEG_INFINITY(q[2]);
 444                }
 445                q[3] = 1.0;
 446                store_vector4( &inst->DstReg, machine, q );
 447             }
 448             break;
 449          case VP_OPCODE_MUL:
 450             {
 451                GLfloat t[4], u[4], prod[4];
 452                fetch_vector4( &inst->SrcReg[0], machine, t );
 453                fetch_vector4( &inst->SrcReg[1], machine, u );
 454                prod[0] = t[0] * u[0];
 455                prod[1] = t[1] * u[1];
 456                prod[2] = t[2] * u[2];
 457                prod[3] = t[3] * u[3];
 458                store_vector4( &inst->DstReg, machine, prod );
 459             }
 460             break;
 461          case VP_OPCODE_ADD:
 462             {
 463                GLfloat t[4], u[4], sum[4];
 464                fetch_vector4( &inst->SrcReg[0], machine, t );
 465                fetch_vector4( &inst->SrcReg[1], machine, u );
 466                sum[0] = t[0] + u[0];
 467                sum[1] = t[1] + u[1];
 468                sum[2] = t[2] + u[2];
 469                sum[3] = t[3] + u[3];
 470                store_vector4( &inst->DstReg, machine, sum );
 471             }
 472             break;
 473          case VP_OPCODE_DP3:
 474             {
 475                GLfloat t[4], u[4], dot[4];
 476                fetch_vector4( &inst->SrcReg[0], machine, t );
 477                fetch_vector4( &inst->SrcReg[1], machine, u );
 478                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
 479                dot[1] = dot[2] = dot[3] = dot[0];
 480                store_vector4( &inst->DstReg, machine, dot );
 481             }
 482             break;
 483          case VP_OPCODE_DP4:
 484             {
 485                GLfloat t[4], u[4], dot[4];
 486                fetch_vector4( &inst->SrcReg[0], machine, t );
 487                fetch_vector4( &inst->SrcReg[1], machine, u );
 488                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
 489                dot[1] = dot[2] = dot[3] = dot[0];
 490                store_vector4( &inst->DstReg, machine, dot );
 491             }
 492             break;
 493          case VP_OPCODE_DST:
 494             {
 495                GLfloat t[4], u[4], dst[4];
 496                fetch_vector4( &inst->SrcReg[0], machine, t );
 497                fetch_vector4( &inst->SrcReg[1], machine, u );
 498                dst[0] = 1.0F;
 499                dst[1] = t[1] * u[1];
 500                dst[2] = t[2];
 501                dst[3] = u[3];
 502                store_vector4( &inst->DstReg, machine, dst );
 503             }
 504             break;
 505          case VP_OPCODE_MIN:
 506             {
 507                GLfloat t[4], u[4], min[4];
 508                fetch_vector4( &inst->SrcReg[0], machine, t );
 509                fetch_vector4( &inst->SrcReg[1], machine, u );
 510                min[0] = (t[0] < u[0]) ? t[0] : u[0];
 511                min[1] = (t[1] < u[1]) ? t[1] : u[1];
 512                min[2] = (t[2] < u[2]) ? t[2] : u[2];
 513                min[3] = (t[3] < u[3]) ? t[3] : u[3];
 514                store_vector4( &inst->DstReg, machine, min );
 515             }
 516             break;
 517          case VP_OPCODE_MAX:
 518             {
 519                GLfloat t[4], u[4], max[4];
 520                fetch_vector4( &inst->SrcReg[0], machine, t );
 521                fetch_vector4( &inst->SrcReg[1], machine, u );
 522                max[0] = (t[0] > u[0]) ? t[0] : u[0];
 523                max[1] = (t[1] > u[1]) ? t[1] : u[1];
 524                max[2] = (t[2] > u[2]) ? t[2] : u[2];
 525                max[3] = (t[3] > u[3]) ? t[3] : u[3];
 526                store_vector4( &inst->DstReg, machine, max );
 527             }
 528             break;
 529          case VP_OPCODE_SLT:
 530             {
 531                GLfloat t[4], u[4], slt[4];
 532                fetch_vector4( &inst->SrcReg[0], machine, t );
 533                fetch_vector4( &inst->SrcReg[1], machine, u );
 534                slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
 535                slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
 536                slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
 537                slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
 538                store_vector4( &inst->DstReg, machine, slt );
 539             }
 540             break;
 541          case VP_OPCODE_SGE:
 542             {
 543                GLfloat t[4], u[4], sge[4];
 544                fetch_vector4( &inst->SrcReg[0], machine, t );
 545                fetch_vector4( &inst->SrcReg[1], machine, u );
 546                sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
 547                sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
 548                sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
 549                sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
 550                store_vector4( &inst->DstReg, machine, sge );
 551             }
 552             break;
 553          case VP_OPCODE_MAD:
 554             {
 555                GLfloat t[4], u[4], v[4], sum[4];
 556                fetch_vector4( &inst->SrcReg[0], machine, t );
 557                fetch_vector4( &inst->SrcReg[1], machine, u );
 558                fetch_vector4( &inst->SrcReg[2], machine, v );
 559                sum[0] = t[0] * u[0] + v[0];
 560                sum[1] = t[1] * u[1] + v[1];
 561                sum[2] = t[2] * u[2] + v[2];
 562                sum[3] = t[3] * u[3] + v[3];
 563                store_vector4( &inst->DstReg, machine, sum );
 564             }
 565             break;
 566          case VP_OPCODE_ARL:
 567             {
 568                GLfloat t[4];
 569                fetch_vector4( &inst->SrcReg[0], machine, t );
 570                machine->AddressReg = (GLint) floor(t[0]);
 571             }
 572             break;
 573          case VP_OPCODE_DPH:
 574             {
 575                GLfloat t[4], u[4], dot[4];
 576                fetch_vector4( &inst->SrcReg[0], machine, t );
 577                fetch_vector4( &inst->SrcReg[1], machine, u );
 578                dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
 579                dot[1] = dot[2] = dot[3] = dot[0];
 580                store_vector4( &inst->DstReg, machine, dot );
 581             }
 582             break;
 583          case VP_OPCODE_RCC:
 584             {
 585                GLfloat t[4], u;
 586                fetch_vector1( &inst->SrcReg[0], machine, t );
 587                if (t[0] == 1.0F)
 588                   u = 1.0F;
 589                else
 590                   u = 1.0F / t[0];
 591                if (u > 0.0F) {
 592                   if (u > 1.884467e+019F) {
 593                      u = 1.884467e+019F;  /* IEEE 32-bit binary value 0x5F800000 */
 594                   }
 595                   else if (u < 5.42101e-020F) {
 596                      u = 5.42101e-020F;   /* IEEE 32-bit binary value 0x1F800000 */
 597                   }
 598                }
 599                else {
 600                   if (u < -1.884467e+019F) {
 601                      u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
 602                   }
 603                   else if (u > -5.42101e-020F) {
 604                      u = -5.42101e-020F;  /* IEEE 32-bit binary value 0x9F800000 */
 605                   }
 606                }
 607                t[0] = t[1] = t[2] = t[3] = u;
 608                store_vector4( &inst->DstReg, machine, t );
 609             }
 610             break;
 611          case VP_OPCODE_SUB:
 612             {
 613                GLfloat t[4], u[4], sum[4];
 614                fetch_vector4( &inst->SrcReg[0], machine, t );
 615                fetch_vector4( &inst->SrcReg[1], machine, u );
 616                sum[0] = t[0] - u[0];
 617                sum[1] = t[1] - u[1];
 618                sum[2] = t[2] - u[2];
 619                sum[3] = t[3] - u[3];
 620                store_vector4( &inst->DstReg, machine, sum );
 621             }
 622             break;
 623          case VP_OPCODE_ABS:
 624             {
 625                GLfloat t[4];
 626                fetch_vector4( &inst->SrcReg[0], machine, t );
 627                if (t[0] < 0.0)  t[0] = -t[0];
 628                if (t[1] < 0.0)  t[1] = -t[1];
 629                if (t[2] < 0.0)  t[2] = -t[2];
 630                if (t[3] < 0.0)  t[3] = -t[3];
 631                store_vector4( &inst->DstReg, machine, t );
 632             }
 633             break;
 634
 635          case VP_OPCODE_END:
 636             return;
 637          default:
 638             /* bad instruction opcode */
 639             _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
 640             return;
 641       }
 642    }
 643 }
 644
 645
 646
 647 /**
 648 Thoughts on vertex program optimization:
 649
 650 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
 651 assembly code.  That will probably be a lot of work.
 652
 653 Another approach might be to replace the vp_instruction->Opcode field with
 654 a pointer to a specialized C function which executes the instruction.
 655 In particular we can write functions which skip swizzling, negating,
 656 masking, relative addressing, etc. when they're not needed.
 657
 658 For example:
 659
 660 void simple_add( struct vp_instruction *inst )
 661 {
 662    GLfloat *sum = machine->Registers[inst->DstReg.Register];
 663    GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
 664    GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
 665    sum[0] = a[0] + b[0];
 666    sum[1] = a[1] + b[1];
 667    sum[2] = a[2] + b[2];
 668    sum[3] = a[3] + b[3];
 669 }
 670
 671 */
 672
 673 /*
 674
 675 KW:
 676
 677 A first step would be to 'vectorize' the programs in the same way as
 678 the normal transformation code in the tnl module.  Thus each opcode
 679 takes zero or more input vectors (registers) and produces one or more
 680 output vectors.
 681
 682 These operations would intially be coded in C, with machine-specific
 683 assembly following, as is currently the case for matrix
 684 transformations in the math/ directory.  The preprocessing scheme for
 685 selecting simpler operations Brian describes above would also work
 686 here.
 687
 688 This should give reasonable performance without excessive effort.
 689
 690 */