src/mesa/swrast/s_nvfragprog.c

   1 /* $Id: s_nvfragprog.c,v 1.1 2003/01/14 04:57:47 brianp Exp $ */
   2
   3 /*
   4  * Mesa 3-D graphics library
   5  * Version:  5.1
   6  *
   7  * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included
  17  * in all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  23  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27
  28
  29 #include "glheader.h"
  30 #include "colormac.h"
  31 #include "context.h"
  32 #include "nvfragprog.h"
  33 #include "macros.h"
  34 #include "mmath.h"
  35
  36 #include "s_nvfragprog.h"
  37
  38
  39 /**
  40  * Fetch a 4-element float vector from the given source register.
  41  * Apply swizzling and negating as needed.
  42  */
  43 static void
  44 fetch_vector4( const struct fp_src_register *source,
  45                const struct fp_machine *machine,
  46                GLfloat result[4] )
  47 {
  48    const GLfloat *src;
  49
  50    /*
  51    if (source->RelAddr) {
  52       GLint reg = source->Register + machine->AddressReg;
  53       if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
  54          src = zero;
  55       else
  56          src = machine->Registers[reg];
  57    }
  58    else
  59    */
  60
  61    src = machine->Registers[source->Register];
  62
  63    result[0] = src[source->Swizzle[0]];
  64    result[1] = src[source->Swizzle[1]];
  65    result[2] = src[source->Swizzle[2]];
  66    result[3] = src[source->Swizzle[3]];
  67
  68    if (source->NegateBase) {
  69       result[0] = -result[0];
  70       result[1] = -result[1];
  71       result[2] = -result[2];
  72       result[3] = -result[3];
  73    }
  74    if (source->Abs) {
  75       result[0] = FABSF(result[0]);
  76       result[1] = FABSF(result[1]);
  77       result[2] = FABSF(result[2]);
  78       result[3] = FABSF(result[3]);
  79    }
  80    if (source->NegateAbs) {
  81       result[0] = -result[0];
  82       result[1] = -result[1];
  83       result[2] = -result[2];
  84       result[3] = -result[3];
  85    }
  86 }
  87
  88
  89 /**
  90  * As above, but only return result[0] element.
  91  */
  92 static void
  93 fetch_vector1( const struct fp_src_register *source,
  94                const struct fp_machine *machine,
  95                GLfloat result[4] )
  96 {
  97    const GLfloat *src;
  98
  99    /*
 100    if (source->RelAddr) {
 101       GLint reg = source->Register + machine->AddressReg;
 102       if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
 103          src = zero;
 104       else
 105          src = machine->Registers[reg];
 106    }
 107    else
 108    */
 109
 110    src = machine->Registers[source->Register];
 111
 112    result[0] = src[source->Swizzle[0]];
 113
 114    if (source->NegateBase) {
 115       result[0] = -result[0];
 116    }
 117    if (source->Abs) {
 118       result[0] = FABSF(result[0]);
 119    }
 120    if (source->NegateAbs) {
 121       result[0] = -result[0];
 122    }
 123 }
 124
 125
 126 /*
 127  * Test value against zero and return GT, LT, EQ or UN if NaN.
 128  */
 129 static INLINE GLuint
 130 generate_cc( float value )
 131 {
 132    if (value != value)
 133       return COND_UN;  /* NaN */
 134    if (value > 0.0F)
 135       return COND_GT;
 136    if (value < 0.0F)
 137       return COND_LT;
 138    return COND_EQ;
 139 }
 140
 141 /*
 142  * Test if the ccMaskRule is satisfied by the given condition code.
 143  * Used to mask destination writes according to the current condition codee.
 144  */
 145 static INLINE GLboolean
 146 test_cc(GLuint condCode, GLuint ccMaskRule)
 147 {
 148    switch (ccMaskRule) {
 149    case COND_EQ: return (condCode == COND_EQ);
 150    case COND_NE: return (condCode != COND_EQ);
 151    case COND_LT: return (condCode == COND_LT);
 152    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
 153    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
 154    case COND_GT: return (condCode == COND_GT);
 155    case COND_TR: return GL_TRUE;
 156    case COND_FL: return GL_FALSE;
 157    default:      return GL_TRUE;
 158    }
 159 }
 160
 161
 162 /**
 163  * Store 4 floats into a register.
 164  */
 165 static void
 166 store_vector4( const struct fp_dst_register *dest, struct fp_machine *machine,
 167                const GLfloat value[4], GLboolean clamp, GLboolean updateCC )
 168 {
 169    GLfloat *dstReg = machine->Registers[dest->Register];
 170    GLfloat clampedValue[4];
 171    const GLboolean *writeMask = dest->WriteMask;
 172    GLboolean condWriteMask[4];
 173
 174    if (clamp) {
 175       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
 176       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
 177       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
 178       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
 179       value = clampedValue;
 180    }
 181
 182    if (dest->CondMask != COND_TR) {
 183       condWriteMask[0] = writeMask[0]
 184          && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
 185       condWriteMask[1] = writeMask[1]
 186          && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
 187       condWriteMask[2] = writeMask[2]
 188          && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
 189       condWriteMask[3] = writeMask[3]
 190          && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
 191       writeMask = condWriteMask;
 192    }
 193
 194    if (writeMask[0]) {
 195       dstReg[0] = value[0];
 196       if (updateCC)
 197          machine->CondCodes[0] = generate_cc(value[0]);
 198    }
 199    if (writeMask[1]) {
 200       dstReg[1] = value[1];
 201       if (updateCC)
 202          machine->CondCodes[1] = generate_cc(value[1]);
 203    }
 204    if (writeMask[2]) {
 205       dstReg[2] = value[2];
 206       if (updateCC)
 207          machine->CondCodes[2] = generate_cc(value[2]);
 208    }
 209    if (writeMask[3]) {
 210       dstReg[3] = value[3];
 211       if (updateCC)
 212          machine->CondCodes[3] = generate_cc(value[3]);
 213    }
 214 }
 215
 216
 217 /**
 218  * Execute the given vertex program
 219  */
 220 static void
 221 execute_program(GLcontext *ctx, const struct fragment_program *program)
 222 {
 223    struct fp_machine *machine = &ctx->FragmentProgram.Machine;
 224    const struct fp_instruction *inst;
 225
 226    for (inst = program->Instructions; inst->Opcode != FP_OPCODE_END; inst++) {
 227       switch (inst->Opcode) {
 228          case FP_OPCODE_ADD:
 229             {
 230                GLfloat a[4], b[4], result[4];
 231                fetch_vector4( &inst->SrcReg[0], machine, a );
 232                fetch_vector4( &inst->SrcReg[1], machine, b );
 233                result[0] = a[0] + b[0];
 234                result[1] = a[1] + b[1];
 235                result[2] = a[2] + b[2];
 236                result[3] = a[3] + b[3];
 237                store_vector4( &inst->DstReg, machine, result, inst->Saturate,
 238                               inst->UpdateCondRegister );
 239             }
 240             break;
 241          case FP_OPCODE_COS:
 242             {
 243                GLfloat a[4], result[4];
 244                fetch_vector1( &inst->SrcReg[0], machine, a );
 245                result[0] = result[1] = result[2] = result[3] = cos(a[0]);
 246                store_vector4( &inst->DstReg, machine, result, inst->Saturate,
 247                               inst->UpdateCondRegister );
 248             }
 249             break;
 250          case FP_OPCODE_DP3:
 251             {
 252                GLfloat a[4], b[4], result[4];
 253                fetch_vector4( &inst->SrcReg[0], machine, a );
 254                fetch_vector4( &inst->SrcReg[1], machine, b );
 255                result[0] = result[1] = result[2] = result[3] =
 256                   a[0] + b[0] + a[1] * b[1] + a[2] * b[2];
 257                store_vector4( &inst->DstReg, machine, result, inst->Saturate,
 258                               inst->UpdateCondRegister );
 259             }
 260             break;
 261          case FP_OPCODE_DP4:
 262             {
 263                GLfloat a[4], b[4], result[4];
 264                fetch_vector4( &inst->SrcReg[0], machine, a );
 265                fetch_vector4( &inst->SrcReg[1], machine, b );
 266                result[0] = result[1] = result[2] = result[3] =
 267                   a[0] + b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 268                store_vector4( &inst->DstReg, machine, result, inst->Saturate,
 269                               inst->UpdateCondRegister );
 270             }
 271             break;
 272          case FP_OPCODE_KIL:
 273             {
 274                const GLuint *swizzle = inst->DstReg.CondSwizzle;
 275                const GLuint condMask = inst->DstReg.CondMask;
 276                if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
 277                    test_cc(machine->CondCodes[swizzle[1]], condMask) ||
 278                    test_cc(machine->CondCodes[swizzle[2]], condMask) ||
 279                    test_cc(machine->CondCodes[swizzle[3]], condMask))
 280                   return;
 281             }
 282             break;
 283          case FP_OPCODE_LRP:
 284             {
 285                GLfloat a[4], b[4], c[4], result[4];
 286                fetch_vector4( &inst->SrcReg[0], machine, a );
 287                fetch_vector4( &inst->SrcReg[1], machine, b );
 288                fetch_vector4( &inst->SrcReg[2], machine, c );
 289                result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
 290                result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
 291                result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
 292                result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
 293                store_vector4( &inst->DstReg, machine, result, inst->Saturate,
 294                               inst->UpdateCondRegister );
 295             }
 296             break;
 297          case FP_OPCODE_MOV:
 298             {
 299                GLfloat t[4];
 300                fetch_vector4( &inst->SrcReg[0], machine, t );
 301                store_vector4( &inst->DstReg, machine, t, inst->Saturate,
 302                               inst->UpdateCondRegister );
 303             }
 304             break;
 305          case FP_OPCODE_SEQ:
 306             {
 307                GLfloat a[4], b[4], result[4];
 308                fetch_vector4( &inst->SrcReg[0], machine, a );
 309                fetch_vector4( &inst->SrcReg[1], machine, b );
 310                result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
 311                result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
 312                result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
 313                result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
 314                store_vector4( &inst->DstReg, machine, result, inst->Saturate,
 315                               inst->UpdateCondRegister );
 316             }
 317             break;
 318          default:
 319             _mesa_problem(ctx, "Bad opcode in _mesa_exec_fragment_program");
 320             return;
 321       }
 322    }
 323
 324 }
 325
 326
 327
 328 void
 329 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
 330 {
 331    GLuint i;
 332
 333    for (i = 0; i < span->end; i++) {
 334       GLfloat *wpos = ctx->FragmentProgram.Machine.Registers[0];
 335       GLfloat *col0 = ctx->FragmentProgram.Machine.Registers[1];
 336       GLfloat *col1 = ctx->FragmentProgram.Machine.Registers[2];
 337       GLfloat *fogc = ctx->FragmentProgram.Machine.Registers[3];
 338       const GLfloat *colOut = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
 339       GLuint j;
 340
 341       /* Clear temporary registers */
 342       for (j = 0; j < MAX_NV_FRAGMENT_PROGRAM_TEMPS; j++) {
 343          ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][0] = 0.0F;
 344          ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][1] = 0.0F;
 345          ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][2] = 0.0F;
 346          ctx->FragmentProgram.Machine.Registers[FP_TEMP_REG_START+j][3] = 0.0F;
 347       }
 348
 349       /* Load input registers */
 350       wpos[0] = span->x + i;
 351       wpos[1] = span->y + i;
 352       wpos[2] = span->array->z[i];
 353       wpos[3] = 1.0;
 354
 355       col0[0] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
 356       col0[1] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
 357       col0[2] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
 358       col0[3] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
 359
 360       col1[0] = CHAN_TO_FLOAT(span->array->spec[i][RCOMP]);
 361       col1[1] = CHAN_TO_FLOAT(span->array->spec[i][GCOMP]);
 362       col1[2] = CHAN_TO_FLOAT(span->array->spec[i][BCOMP]);
 363       col1[3] = CHAN_TO_FLOAT(span->array->spec[i][ACOMP]);
 364
 365       fogc[0] = span->array->fog[i];
 366
 367       execute_program(ctx, ctx->FragmentProgram.Current);
 368
 369       /* Store output registers */
 370       UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
 371       UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
 372       UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
 373       UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
 374    }
 375 }
 376