src/mesa/swrast/s_nvfragprog.c

   1 /* $Id: s_nvfragprog.c,v 1.4 2003/02/25 19:29:43 brianp Exp $ */
   2
   3 /*
   4  * Mesa 3-D graphics library
   5  * Version:  5.1
   6  *
   7  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included
  17  * in all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  23  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27
  28
  29 #include "glheader.h"
  30 #include "colormac.h"
  31 #include "context.h"
  32 #include "nvfragprog.h"
  33 #include "macros.h"
  34 #include "mmath.h"
  35
  36 #include "s_nvfragprog.h"
  37
  38
  39
  40 /**
  41  * Fetch a texel.
  42  */
  43 static void
  44 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLuint unit,
  45              GLuint targetIndex, GLfloat color[4] )
  46 {
  47    const GLfloat *lambda = NULL;
  48    GLchan rgba[4];
  49    SWcontext *swrast = SWRAST_CONTEXT(ctx);
  50    const struct gl_texture_object *texObj;
  51
  52    switch (targetIndex) {
  53       case TEXTURE_1D_INDEX:
  54          texObj = ctx->Texture.Unit[unit].Current1D;
  55          break;
  56       case TEXTURE_2D_INDEX:
  57          texObj = ctx->Texture.Unit[unit].Current2D;
  58          break;
  59       case TEXTURE_3D_INDEX:
  60          texObj = ctx->Texture.Unit[unit].Current3D;
  61          break;
  62       case TEXTURE_CUBE_INDEX:
  63          texObj = ctx->Texture.Unit[unit].CurrentCubeMap;
  64          break;
  65       case TEXTURE_RECT_INDEX:
  66          texObj = ctx->Texture.Unit[unit].CurrentRect;
  67          break;
  68       default:
  69          _mesa_problem(ctx, "Invalid target in fetch_texel");
  70    }
  71
  72    swrast->TextureSample[unit](ctx, unit, texObj, 1,
  73                                (const GLfloat (*)[4]) &texcoord,
  74                                lambda, &rgba);
  75 }
  76
  77
  78 /**
  79  * Fetch a texel w/ partial derivatives.
  80  */
  81 static void
  82 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
  83                    const GLfloat dtdx[4], const GLfloat dtdy[4],
  84                    GLuint unit, GLuint targetIndex, GLfloat color[4] )
  85 {
  86    /* XXX to do */
  87
  88 }
  89
  90
  91
  92 /**
  93  * Fetch a 4-element float vector from the given source register.
  94  * Apply swizzling and negating as needed.
  95  */
  96 static void
  97 fetch_vector4( const struct fp_src_register *source,
  98                const struct fp_machine *machine,
  99                GLfloat result[4] )
 100 {
 101    const GLfloat *src;
 102
 103    /*
 104    if (source->RelAddr) {
 105       GLint reg = source->Register + machine->AddressReg;
 106       if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
 107          src = zero;
 108       else
 109          src = machine->Registers[reg];
 110    }
 111    else
 112    */
 113
 114    src = machine->Registers[source->Register];
 115
 116    result[0] = src[source->Swizzle[0]];
 117    result[1] = src[source->Swizzle[1]];
 118    result[2] = src[source->Swizzle[2]];
 119    result[3] = src[source->Swizzle[3]];
 120
 121    if (source->NegateBase) {
 122       result[0] = -result[0];
 123       result[1] = -result[1];
 124       result[2] = -result[2];
 125       result[3] = -result[3];
 126    }
 127    if (source->Abs) {
 128       result[0] = FABSF(result[0]);
 129       result[1] = FABSF(result[1]);
 130       result[2] = FABSF(result[2]);
 131       result[3] = FABSF(result[3]);
 132    }
 133    if (source->NegateAbs) {
 134       result[0] = -result[0];
 135       result[1] = -result[1];
 136       result[2] = -result[2];
 137       result[3] = -result[3];
 138    }
 139 }
 140
 141
 142 /**
 143  * As above, but only return result[0] element.
 144  */
 145 static void
 146 fetch_vector1( const struct fp_src_register *source,
 147                const struct fp_machine *machine,
 148                GLfloat result[4] )
 149 {
 150    const GLfloat *src;
 151
 152    /*
 153    if (source->RelAddr) {
 154       GLint reg = source->Register + machine->AddressReg;
 155       if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
 156          src = zero;
 157       else
 158          src = machine->Registers[reg];
 159    }
 160    else
 161    */
 162
 163    src = machine->Registers[source->Register];
 164
 165    result[0] = src[source->Swizzle[0]];
 166
 167    if (source->NegateBase) {
 168       result[0] = -result[0];
 169    }
 170    if (source->Abs) {
 171       result[0] = FABSF(result[0]);
 172    }
 173    if (source->NegateAbs) {
 174       result[0] = -result[0];
 175    }
 176 }
 177
 178
 179 /*
 180  * Test value against zero and return GT, LT, EQ or UN if NaN.
 181  */
 182 static INLINE GLuint
 183 generate_cc( float value )
 184 {
 185    if (value != value)
 186       return COND_UN;  /* NaN */
 187    if (value > 0.0F)
 188       return COND_GT;
 189    if (value < 0.0F)
 190       return COND_LT;
 191    return COND_EQ;
 192 }
 193
 194 /*
 195  * Test if the ccMaskRule is satisfied by the given condition code.
 196  * Used to mask destination writes according to the current condition codee.
 197  */
 198 static INLINE GLboolean
 199 test_cc(GLuint condCode, GLuint ccMaskRule)
 200 {
 201    switch (ccMaskRule) {
 202    case COND_EQ: return (condCode == COND_EQ);
 203    case COND_NE: return (condCode != COND_EQ);
 204    case COND_LT: return (condCode == COND_LT);
 205    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 206    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 207    case COND_GT: return (condCode == COND_GT);
 208    case COND_TR: return GL_TRUE;
 209    case COND_FL: return GL_FALSE;
 210    default:      return GL_TRUE;
 211    }
 212 }
 213
 214
 215 /**
 216  * Store 4 floats into a register.  Observe the instructions saturate and
 217  * set-condition-code flags.
 218  */
 219 static void
 220 store_vector4( const struct fp_instruction *inst,
 221                struct fp_machine *machine,
 222                const GLfloat value[4] )
 223 {
 224    const struct fp_dst_register *dest = &(inst->DstReg);
 225    const GLboolean clamp = inst->Saturate;
 226    const GLboolean updateCC = inst->UpdateCondRegister;
 227    GLfloat *dstReg = machine->Registers[dest->Register];
 228    GLfloat clampedValue[4];
 229    const GLboolean *writeMask = dest->WriteMask;
 230    GLboolean condWriteMask[4];
 231
 232    if (clamp) {
 233       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 234       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 235       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 236       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 237       value = clampedValue;
 238    }
 239
 240    if (dest->CondMask != COND_TR) {
 241       condWriteMask[0] = writeMask[0]
 242          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 243       condWriteMask[1] = writeMask[1]
 244          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 245       condWriteMask[2] = writeMask[2]
 246          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 247       condWriteMask[3] = writeMask[3]
 248          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 249       writeMask = condWriteMask;
 250    }
 251
 252    if (writeMask[0]) {
 253       dstReg[0] = value[0];
 254       if (updateCC)
 255          machine->CondCodes[0] = generate_cc(value[0]);
 256    }
 257    if (writeMask[1]) {
 258       dstReg[1] = value[1];
 259       if (updateCC)
 260          machine->CondCodes[1] = generate_cc(value[1]);
 261    }
 262    if (writeMask[2]) {
 263       dstReg[2] = value[2];
 264       if (updateCC)
 265          machine->CondCodes[2] = generate_cc(value[2]);
 266    }
 267    if (writeMask[3]) {
 268       dstReg[3] = value[3];
 269       if (updateCC)
 270          machine->CondCodes[3] = generate_cc(value[3]);
 271    }
 272 }
 273
 274
 275 /**
 276  * Execute the given vertex program
 277  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 278  */
 279 static GLboolean
 280 execute_program(GLcontext *ctx, const struct fragment_program *program)
 281 {
 282    struct fp_machine *machine = &ctx->FragmentProgram.Machine;
 283    const struct fp_instruction *inst;
 284
 285    for (inst = program->Instructions; inst->Opcode != FP_OPCODE_END; inst++) {
 286       switch (inst->Opcode) {
 287          case FP_OPCODE_ADD:
 288             {
 289                GLfloat a[4], b[4], result[4];
 290                fetch_vector4( &inst->SrcReg[0], machine, a );
 291                fetch_vector4( &inst->SrcReg[1], machine, b );
 292                result[0] = a[0] + b[0];
 293                result[1] = a[1] + b[1];
 294                result[2] = a[2] + b[2];
 295                result[3] = a[3] + b[3];
 296                store_vector4( inst, machine, result );
 297             }
 298             break;
 299          case FP_OPCODE_COS:
 300             {
 301                GLfloat a[4], result[4];
 302                fetch_vector1( &inst->SrcReg[0], machine, a );
 303                result[0] = result[1] = result[2] = result[3] = cos(a[0]);
 304                store_vector4( inst, machine, result );
 305             }
 306             break;
 307          case FP_OPCODE_DDX: /* Partial derivative with respect to X */
 308             {
 309                GLfloat a[4], result[4];
 310                fetch_vector4( &inst->SrcReg[0], machine, a );
 311                result[0] = 0; /* XXX fix */
 312                result[1] = 0;
 313                result[2] = 0;
 314                result[3] = 0;
 315                store_vector4( inst, machine, result );
 316             }
 317             break;
 318          case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
 319             {
 320                GLfloat a[4], result[4];
 321                fetch_vector4( &inst->SrcReg[0], machine, a );
 322                result[0] = 0; /* XXX fix */
 323                result[1] = 0;
 324                result[2] = 0;
 325                result[3] = 0;
 326                store_vector4( inst, machine, result );
 327             }
 328             break;
 329          case FP_OPCODE_DP3:
 330             {
 331                GLfloat a[4], b[4], result[4];
 332                fetch_vector4( &inst->SrcReg[0], machine, a );
 333                fetch_vector4( &inst->SrcReg[1], machine, b );
 334                result[0] = result[1] = result[2] = result[3] =
 335                   a[0] + b[0] + a[1] * b[1] + a[2] * b[2];
 336                store_vector4( inst, machine, result );
 337             }
 338             break;
 339          case FP_OPCODE_DP4:
 340             {
 341                GLfloat a[4], b[4], result[4];
 342                fetch_vector4( &inst->SrcReg[0], machine, a );
 343                fetch_vector4( &inst->SrcReg[1], machine, b );
 344                result[0] = result[1] = result[2] = result[3] =
 345                   a[0] + b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 346                store_vector4( inst, machine, result );
 347             }
 348             break;
 349          case FP_OPCODE_DST: /* Distance vector */
 350             {
 351                GLfloat a[4], b[4], result[4];
 352                fetch_vector4( &inst->SrcReg[0], machine, a );
 353                fetch_vector4( &inst->SrcReg[1], machine, b );
 354                result[0] = 1.0F;
 355                result[1] = a[1] * b[1];
 356                result[2] = a[2];
 357                result[3] = b[3];
 358                store_vector4( inst, machine, result );
 359             }
 360             break;
 361          case FP_OPCODE_EX2: /* Exponential base 2 */
 362             {
 363                GLfloat a[4], result[4];
 364                fetch_vector1( &inst->SrcReg[0], machine, a );
 365                result[0] = result[1] = result[2] = result[3] =
 366                   (GLfloat) pow(2.0, a[0]);
 367                store_vector4( inst, machine, result );
 368             }
 369             break;
 370          case FP_OPCODE_FLR:
 371             {
 372                GLfloat a[4], result[4];
 373                fetch_vector4( &inst->SrcReg[0], machine, a );
 374                result[0] = FLOORF(a[0]);
 375                result[1] = FLOORF(a[1]);
 376                result[2] = FLOORF(a[2]);
 377                result[3] = FLOORF(a[3]);
 378                store_vector4( inst, machine, result );
 379             }
 380             break;
 381          case FP_OPCODE_FRC:
 382             {
 383                GLfloat a[4], result[4];
 384                fetch_vector4( &inst->SrcReg[0], machine, a );
 385                result[0] = a[0] - FLOORF(a[0]);
 386                result[1] = a[1] - FLOORF(a[1]);
 387                result[2] = a[2] - FLOORF(a[2]);
 388                result[3] = a[3] - FLOORF(a[3]);
 389                store_vector4( inst, machine, result );
 390             }
 391             break;
 392          case FP_OPCODE_KIL:
 393             {
 394                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 395                const GLuint condMask = inst->DstReg.CondMask;
 396                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 397                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 398                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 399                    test_cc(machine->CondCodes[swizzle[3]], condMask))
 400                   return GL_FALSE;
 401             }
 402             break;
 403          case FP_OPCODE_LG2:  /* log base 2 */
 404             {
 405                GLfloat a[4], result[4];
 406                fetch_vector1( &inst->SrcReg[0], machine, a );
 407                result[0] = result[1] = result[2] = result[3]
 408                   = LOG2(a[0]);
 409                store_vector4( inst, machine, result );
 410             }
 411             break;
 412          case FP_OPCODE_LIT:
 413             {
 414                GLfloat a[4], result[4];
 415                fetch_vector4( &inst->SrcReg[0], machine, a );
 416                if (a[0] < 0.0F)
 417                   a[0] = 0.0F;
 418                if (a[1] < 0.0F)
 419                   a[1] = 0.0F;
 420                result[0] = 1.0F;
 421                result[1] = a[0];
 422                result[2] = (a[0] > 0.0) ? pow(2.0, a[3]) : 0.0F;
 423                result[3] = 1.0F;
 424                store_vector4( inst, machine, result );
 425             }
 426             break;
 427          case FP_OPCODE_LRP:
 428             {
 429                GLfloat a[4], b[4], c[4], result[4];
 430                fetch_vector4( &inst->SrcReg[0], machine, a );
 431                fetch_vector4( &inst->SrcReg[1], machine, b );
 432                fetch_vector4( &inst->SrcReg[2], machine, c );
 433                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 434                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 435                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 436                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 437                store_vector4( inst, machine, result );
 438             }
 439             break;
 440          case FP_OPCODE_MAD:
 441             {
 442                GLfloat a[4], b[4], c[4], result[4];
 443                fetch_vector4( &inst->SrcReg[0], machine, a );
 444                fetch_vector4( &inst->SrcReg[1], machine, b );
 445                fetch_vector4( &inst->SrcReg[2], machine, c );
 446                result[0] = a[0] * b[0] + c[0];
 447                result[1] = a[1] * b[1] + c[1];
 448                result[2] = a[2] * b[2] + c[2];
 449                result[3] = a[3] * b[3] + c[3];
 450                store_vector4( inst, machine, result );
 451             }
 452             break;
 453          case FP_OPCODE_MAX:
 454             {
 455                GLfloat a[4], b[4], result[4];
 456                fetch_vector4( &inst->SrcReg[0], machine, a );
 457                fetch_vector4( &inst->SrcReg[1], machine, b );
 458                result[0] = MAX2(a[0], b[0]);
 459                result[1] = MAX2(a[1], b[1]);
 460                result[2] = MAX2(a[2], b[2]);
 461                result[3] = MAX2(a[3], b[3]);
 462                store_vector4( inst, machine, result );
 463             }
 464             break;
 465          case FP_OPCODE_MIN:
 466             {
 467                GLfloat a[4], b[4], result[4];
 468                fetch_vector4( &inst->SrcReg[0], machine, a );
 469                fetch_vector4( &inst->SrcReg[1], machine, b );
 470                result[0] = MIN2(a[0], b[0]);
 471                result[1] = MIN2(a[1], b[1]);
 472                result[2] = MIN2(a[2], b[2]);
 473                result[3] = MIN2(a[3], b[3]);
 474                store_vector4( inst, machine, result );
 475             }
 476             break;
 477          case FP_OPCODE_MOV:
 478             {
 479                GLfloat result[4];
 480                fetch_vector4( &inst->SrcReg[0], machine, result );
 481                store_vector4( inst, machine, result );
 482             }
 483             break;
 484          case FP_OPCODE_MUL:
 485             {
 486                GLfloat a[4], b[4], result[4];
 487                fetch_vector4( &inst->SrcReg[0], machine, a );
 488                fetch_vector4( &inst->SrcReg[1], machine, b );
 489                result[0] = a[0] * b[0];
 490                result[1] = a[1] * b[1];
 491                result[2] = a[2] * b[2];
 492                result[3] = a[3] * b[3];
 493                store_vector4( inst, machine, result );
 494             }
 495             break;
 496          case FP_OPCODE_PK2H: /* pack two 16-bit floats */
 497             /* XXX this is probably wrong */
 498             {
 499                GLfloat a[4], result[4];
 500                const GLuint *rawBits = (const GLuint *) a;
 501                GLuint *rawResult = (GLuint *) result;
 502                fetch_vector4( &inst->SrcReg[0], machine, a );
 503                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 504                   = rawBits[0] | (rawBits[1] << 16);
 505                store_vector4( inst, machine, result );
 506             }
 507             break;
 508          case FP_OPCODE_PK2US: /* pack two GLushorts */
 509             {
 510                GLfloat a[4], result[4];
 511                GLuint usx, usy, *rawResult = (GLuint *) result;
 512                fetch_vector4( &inst->SrcReg[0], machine, a );
 513                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 514                a[1] = CLAMP(a[0], 0.0F, 1.0F);
 515                usx = IROUND(a[0] * 65535.0F);
 516                usy = IROUND(a[1] * 65535.0F);
 517                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 518                   = usx | (usy << 16);
 519                store_vector4( inst, machine, result );
 520             }
 521             break;
 522          case FP_OPCODE_PK4B: /* pack four GLbytes */
 523             {
 524                GLfloat a[4], result[4];
 525                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 526                fetch_vector4( &inst->SrcReg[0], machine, a );
 527                a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
 528                a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
 529                a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
 530                a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
 531                ubx = IROUND(127.0F * a[0] + 128.0F);
 532                uby = IROUND(127.0F * a[1] + 128.0F);
 533                ubz = IROUND(127.0F * a[2] + 128.0F);
 534                ubw = IROUND(127.0F * a[3] + 128.0F);
 535                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 536                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 537                store_vector4( inst, machine, result );
 538             }
 539             break;
 540          case FP_OPCODE_PK4UB: /* pack four GLubytes */
 541             {
 542                GLfloat a[4], result[4];
 543                GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
 544                fetch_vector4( &inst->SrcReg[0], machine, a );
 545                a[0] = CLAMP(a[0], 0.0F, 1.0F);
 546                a[1] = CLAMP(a[1], 0.0F, 1.0F);
 547                a[2] = CLAMP(a[2], 0.0F, 1.0F);
 548                a[3] = CLAMP(a[3], 0.0F, 1.0F);
 549                ubx = IROUND(255.0F * a[0]);
 550                uby = IROUND(255.0F * a[1]);
 551                ubz = IROUND(255.0F * a[2]);
 552                ubw = IROUND(255.0F * a[3]);
 553                rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
 554                   = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
 555                store_vector4( inst, machine, result );
 556             }
 557             break;
 558          case FP_OPCODE_POW:
 559             {
 560                GLfloat a[4], b[4], result[4];
 561                fetch_vector1( &inst->SrcReg[0], machine, a );
 562                fetch_vector1( &inst->SrcReg[1], machine, b );
 563                result[0] = result[1] = result[2] = result[3]
 564                   = pow(a[0], b[0]);
 565                store_vector4( inst, machine, result );
 566             }
 567             break;
 568          case FP_OPCODE_RCP:
 569             {
 570                GLfloat a[4], result[4];
 571                fetch_vector1( &inst->SrcReg[0], machine, a );
 572                result[0] = result[1] = result[2] = result[3]
 573                   = 1.0F / a[0];
 574                store_vector4( inst, machine, result );
 575             }
 576             break;
 577          case FP_OPCODE_RFL:
 578             {
 579                GLfloat axis[4], dir[4], result[4], tmp[4];
 580                fetch_vector4( &inst->SrcReg[0], machine, axis );
 581                fetch_vector4( &inst->SrcReg[1], machine, dir );
 582                tmp[3] = axis[0] * axis[0]
 583                       + axis[1] * axis[1]
 584                       + axis[2] * axis[2];
 585                tmp[0] = (2.0F * (axis[0] * dir[0] +
 586                                  axis[1] * dir[1] +
 587                                  axis[2] * dir[2])) / tmp[3];
 588                result[0] = tmp[0] * axis[0] - dir[0];
 589                result[1] = tmp[0] * axis[1] - dir[1];
 590                result[2] = tmp[0] * axis[2] - dir[2];
 591                /* result[3] is never written! XXX enforce in parser! */
 592                store_vector4( inst, machine, result );
 593             }
 594             break;
 595          case FP_OPCODE_RSQ: /* 1 / sqrt() */
 596             {
 597                GLfloat a[4], result[4];
 598                fetch_vector1( &inst->SrcReg[0], machine, a );
 599                result[0] = result[1] = result[2] = result[3]
 600                   = 1.0F / GL_SQRT(a[0]);
 601                store_vector4( inst, machine, result );
 602             }
 603             break;
 604          case FP_OPCODE_SEQ: /* set on equal */
 605             {
 606                GLfloat a[4], b[4], result[4];
 607                fetch_vector4( &inst->SrcReg[0], machine, a );
 608                fetch_vector4( &inst->SrcReg[1], machine, b );
 609                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
 610                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
 611                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
 612                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
 613                store_vector4( inst, machine, result );
 614             }
 615             break;
 616          case FP_OPCODE_SFL: /* set false, operands ignored */
 617             {
 618                static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
 619                store_vector4( inst, machine, result );
 620             }
 621             break;
 622          case FP_OPCODE_SGE: /* set on greater or equal */
 623             {
 624                GLfloat a[4], b[4], result[4];
 625                fetch_vector4( &inst->SrcReg[0], machine, a );
 626                fetch_vector4( &inst->SrcReg[1], machine, b );
 627                result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
 628                result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
 629                result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
 630                result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
 631                store_vector4( inst, machine, result );
 632             }
 633             break;
 634          case FP_OPCODE_SGT: /* set on greater */
 635             {
 636                GLfloat a[4], b[4], result[4];
 637                fetch_vector4( &inst->SrcReg[0], machine, a );
 638                fetch_vector4( &inst->SrcReg[1], machine, b );
 639                result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
 640                result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
 641                result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
 642                result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
 643                store_vector4( inst, machine, result );
 644             }
 645             break;
 646          case FP_OPCODE_SIN:
 647             {
 648                GLfloat a[4], result[4];
 649                fetch_vector1( &inst->SrcReg[0], machine, a );
 650                result[0] = result[1] = result[2] = result[3] = sin(a[0]);
 651                store_vector4( inst, machine, result );
 652             }
 653             break;
 654          case FP_OPCODE_SLE: /* set on less or equal */
 655             {
 656                GLfloat a[4], b[4], result[4];
 657                fetch_vector4( &inst->SrcReg[0], machine, a );
 658                fetch_vector4( &inst->SrcReg[1], machine, b );
 659                result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
 660                result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
 661                result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
 662                result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
 663                store_vector4( inst, machine, result );
 664             }
 665             break;
 666          case FP_OPCODE_SLT: /* set on less */
 667             {
 668                GLfloat a[4], b[4], result[4];
 669                fetch_vector4( &inst->SrcReg[0], machine, a );
 670                fetch_vector4( &inst->SrcReg[1], machine, b );
 671                result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
 672                result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
 673                result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
 674                result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
 675                store_vector4( inst, machine, result );
 676             }
 677             break;
 678          case FP_OPCODE_SNE: /* set on not equal */
 679             {
 680                GLfloat a[4], b[4], result[4];
 681                fetch_vector4( &inst->SrcReg[0], machine, a );
 682                fetch_vector4( &inst->SrcReg[1], machine, b );
 683                result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
 684                result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
 685                result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
 686                result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
 687                store_vector4( inst, machine, result );
 688             }
 689             break;
 690          case FP_OPCODE_STR: /* set true, operands ignored */
 691             {
 692                static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
 693                store_vector4( inst, machine, result );
 694             }
 695             break;
 696          case FP_OPCODE_SUB:
 697             {
 698                GLfloat a[4], b[4], result[4];
 699                fetch_vector4( &inst->SrcReg[0], machine, a );
 700                fetch_vector4( &inst->SrcReg[1], machine, b );
 701                result[0] = a[0] - b[0];
 702                result[1] = a[1] - b[1];
 703                result[2] = a[2] - b[2];
 704                result[3] = a[3] - b[3];
 705                store_vector4( inst, machine, result );
 706             }
 707             break;
 708          case FP_OPCODE_TEX:
 709             /* Texel lookup */
 710             {
 711                GLfloat texcoord[4], color[4];
 712                fetch_vector4( &inst->SrcReg[0], machine, texcoord );
 713                fetch_texel( ctx, texcoord, inst->TexSrcUnit,
 714                             inst->TexSrcIndex, color );
 715                store_vector4( inst, machine, color );
 716             }
 717             break;
 718          case FP_OPCODE_TXD:
 719             /* Texture lookup w/ partial derivatives for LOD */
 720             {
 721                GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
 722                fetch_vector4( &inst->SrcReg[0], machine, texcoord );
 723                fetch_vector4( &inst->SrcReg[1], machine, dtdx );
 724                fetch_vector4( &inst->SrcReg[2], machine, dtdy );
 725                fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
 726                                   inst->TexSrcIndex, color );
 727                store_vector4( inst, machine, color );
 728             }
 729             break;
 730          case FP_OPCODE_TXP:
 731             /* Texture lookup w/ perspective divide */
 732             {
 733                GLfloat texcoord[4], color[4];
 734                fetch_vector4( &inst->SrcReg[0], machine, texcoord );
 735                texcoord[0] /= texcoord[3];
 736                texcoord[1] /= texcoord[3];
 737                texcoord[2] /= texcoord[3];
 738                fetch_texel( ctx, texcoord, inst->TexSrcUnit,
 739                             inst->TexSrcIndex, color );
 740                store_vector4( inst, machine, color );
 741             }
 742             break;
 743          case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
 744             /* XXX this is probably wrong */
 745             {
 746                GLfloat a[4], result[4];
 747                const GLuint *rawBits = (const GLuint *) a;
 748                GLuint *rawResult = (GLuint *) result;
 749                fetch_vector1( &inst->SrcReg[0], machine, a );
 750                rawResult[0] = rawBits[0] & 0xffff;
 751                rawResult[1] = (rawBits[0] >> 16) & 0xffff;
 752                rawResult[2] = rawBits[0] & 0xffff;
 753                rawResult[3] = (rawBits[0] >> 16) & 0xffff;
 754                store_vector4( inst, machine, result );
 755             }
 756             break;
 757          case FP_OPCODE_UP2US: /* unpack two GLushorts */
 758             {
 759                GLfloat a[4], result[4];
 760                const GLuint *rawBits = (const GLuint *) a;
 761                fetch_vector1( &inst->SrcReg[0], machine, a );
 762                result[0] = (GLfloat) ((rawBits[0] >>  0) & 0xffff) / 65535.0F;
 763                result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
 764                result[2] = result[0];
 765                result[3] = result[1];
 766                store_vector4( inst, machine, result );
 767             }
 768             break;
 769          case FP_OPCODE_UP4B: /* unpack four GLbytes */
 770             {
 771                GLfloat a[4], result[4];
 772                const GLuint *rawBits = (const GLuint *) a;
 773                fetch_vector1( &inst->SrcReg[0], machine, a );
 774                result[0] = (((rawBits[0] >>  0) & 0xff) - 128) / 127.0F;
 775                result[0] = (((rawBits[0] >>  8) & 0xff) - 128) / 127.0F;
 776                result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
 777                result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
 778                store_vector4( inst, machine, result );
 779             }
 780             break;
 781          case FP_OPCODE_UP4UB: /* unpack four GLubytes */
 782             {
 783                GLfloat a[4], result[4];
 784                const GLuint *rawBits = (const GLuint *) a;
 785                fetch_vector1( &inst->SrcReg[0], machine, a );
 786                result[0] = ((rawBits[0] >>  0) & 0xff) / 255.0F;
 787                result[0] = ((rawBits[0] >>  8) & 0xff) / 255.0F;
 788                result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
 789                result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
 790                store_vector4( inst, machine, result );
 791             }
 792             break;
 793          case FP_OPCODE_X2D: /* 2-D matrix transform */
 794             {
 795                GLfloat a[4], b[4], c[4], result[4];
 796                fetch_vector4( &inst->SrcReg[0], machine, a );
 797                fetch_vector4( &inst->SrcReg[1], machine, b );
 798                fetch_vector4( &inst->SrcReg[2], machine, c );
 799                result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
 800                result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
 801                result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
 802                result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
 803                store_vector4( inst, machine, result );
 804             }
 805             break;
 806          default:
 807             _mesa_problem(ctx, "Bad opcode in _mesa_exec_fragment_program");
 808             return GL_TRUE; /* return value doesn't matter */
 809       }
 810    }
 811    return GL_TRUE;
 812 }
 813
 814
 815
 816 void
 817 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
 818 {
 819    GLuint i;
 820
 821    for (i = 0; i < span->end; i++) {
 822       if (span->array->mask[i]) {
 823          GLfloat *wpos = ctx->FragmentProgram.Machine.Registers[0];
 824          GLfloat *col0 = ctx->FragmentProgram.Machine.Registers[1];
 825          GLfloat *col1 = ctx->FragmentProgram.Machine.Registers[2];
 826          GLfloat *fogc = ctx->FragmentProgram.Machine.Registers[3];
 827          const GLfloat *colOut = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
 828          GLuint j;
 829
 830          /* Clear temporary registers XXX use memzero() */
 831          _mesa_bzero(ctx->FragmentProgram.Machine.Registers +FP_TEMP_REG_START,
 832                      MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
 833
 834          /*
 835           * Load input registers - yes this is all very inefficient for now.
 836           */
 837          wpos[0] = span->x + i;
 838          wpos[1] = span->y + i;
 839          wpos[2] = (GLfloat) span->array->z[i] / ctx->DepthMaxF;
 840          wpos[3] = 1.0; /* XXX should be 1/w */
 841
 842          col0[0] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
 843          col0[1] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
 844          col0[2] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
 845          col0[3] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
 846
 847          col1[0] = CHAN_TO_FLOAT(span->array->spec[i][RCOMP]);
 848          col1[1] = CHAN_TO_FLOAT(span->array->spec[i][GCOMP]);
 849          col1[2] = CHAN_TO_FLOAT(span->array->spec[i][BCOMP]);
 850          col1[3] = CHAN_TO_FLOAT(span->array->spec[i][ACOMP]);
 851
 852          fogc[0] = span->array->fog[i];
 853          fogc[1] = 0.0F;
 854          fogc[2] = 0.0F;
 855          fogc[3] = 0.0F;
 856
 857          for (j = 0; j < ctx->Const.MaxTextureCoordUnits; j++) {
 858             if (ctx->Texture.Unit[j]._ReallyEnabled) {
 859                COPY_4V(ctx->FragmentProgram.Machine.Registers[4 + j],
 860                        span->array->texcoords[j][i]);
 861             }
 862             else {
 863                COPY_4V(ctx->FragmentProgram.Machine.Registers[4 + j],
 864                        ctx->Current.Attrib[VERT_ATTRIB_TEX0 + j]);
 865             }
 866          }
 867
 868          if (!execute_program(ctx, ctx->FragmentProgram.Current))
 869             span->array->mask[i] = GL_FALSE;  /* killed fragment */
 870
 871          /* Store output registers */
 872          UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
 873          UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
 874          UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
 875          UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
 876          /* depth value */
 877          if (ctx->FragmentProgram.Current->OutputsWritten & 2)
 878             span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START + 2][0] * ctx->DepthMaxF);
 879       }
 880    }
 881 }
 882