src/gallium/auxiliary/tgsi/tgsi_exec.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * TGSI interpreter/executor.
  30  *
  31  * Flow control information:
  32  *
  33  * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
  34  * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
  35  * care since a condition may be true for some quad components but false
  36  * for other components.
  37  *
  38  * We basically execute all statements (even if they're in the part of
  39  * an IF/ELSE clause that's "not taken") and use a special mask to
  40  * control writing to destination registers.  This is the ExecMask.
  41  * See store_dest().
  42  *
  43  * The ExecMask is computed from three other masks (CondMask, LoopMask and
  44  * ContMask) which are controlled by the flow control instructions (namely:
  45  * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
  46  *
  47  *
  48  * Authors:
  49  *   Michal Krol
  50  *   Brian Paul
  51  */
  52
  53 #include "pipe/p_compiler.h"
  54 #include "pipe/p_state.h"
  55 #include "pipe/p_shader_tokens.h"
  56 #include "tgsi/tgsi_parse.h"
  57 #include "tgsi/tgsi_util.h"
  58 #include "tgsi_exec.h"
  59 #include "util/u_memory.h"
  60 #include "util/u_math.h"
  61
  62 #define FAST_MATH 1
  63
  64 #define TILE_TOP_LEFT     0
  65 #define TILE_TOP_RIGHT    1
  66 #define TILE_BOTTOM_LEFT  2
  67 #define TILE_BOTTOM_RIGHT 3
  68
  69 #define CHAN_X  0
  70 #define CHAN_Y  1
  71 #define CHAN_Z  2
  72 #define CHAN_W  3
  73
  74 /*
  75  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
  76  */
  77 #define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
  78 #define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
  79 #define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
  80 #define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
  81 #define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
  82 #define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
  83 #define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
  84 #define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
  85 #define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
  86 #define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
  87 #define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
  88 #define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
  89 #define TEMP_128_I         TGSI_EXEC_TEMP_128_I
  90 #define TEMP_128_C         TGSI_EXEC_TEMP_128_C
  91 #define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
  92 #define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
  93 #define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
  94 #define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
  95 #define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
  96 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
  97 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
  98 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
  99 #define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
 100 #define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
 101 #define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
 102 #define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
 103 #define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
 104 #define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
 105 #define TEMP_R0            TGSI_EXEC_TEMP_R0
 106
 107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
 108    ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
 109
 110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
 111    ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
 112
 113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
 114    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
 115       if (IS_CHANNEL_ENABLED( INST, CHAN ))
 116
 117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
 118    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
 119       if (IS_CHANNEL_ENABLED2( INST, CHAN ))
 120
 121
 122 /** The execution mask depends on the conditional mask and the loop mask */
 123 #define UPDATE_EXEC_MASK(MACH) \
 124       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
 125
 126 /**
 127  * Initialize machine state by expanding tokens to full instructions,
 128  * allocating temporary storage, setting up constants, etc.
 129  * After this, we can call tgsi_exec_machine_run() many times.
 130  */
 131 void
 132 tgsi_exec_machine_bind_shader(
 133    struct tgsi_exec_machine *mach,
 134    const struct tgsi_token *tokens,
 135    uint numSamplers,
 136    struct tgsi_sampler **samplers)
 137 {
 138    uint k;
 139    struct tgsi_parse_context parse;
 140    struct tgsi_exec_labels *labels = &mach->Labels;
 141    struct tgsi_full_instruction *instructions;
 142    struct tgsi_full_declaration *declarations;
 143    uint maxInstructions = 10, numInstructions = 0;
 144    uint maxDeclarations = 10, numDeclarations = 0;
 145    uint instno = 0;
 146
 147 #if 0
 148    tgsi_dump(tokens, 0);
 149 #endif
 150
 151    util_init_math();
 152
 153    mach->Tokens = tokens;
 154    mach->Samplers = samplers;
 155
 156    k = tgsi_parse_init (&parse, mach->Tokens);
 157    if (k != TGSI_PARSE_OK) {
 158       debug_printf( "Problem parsing!\n" );
 159       return;
 160    }
 161
 162    mach->Processor = parse.FullHeader.Processor.Processor;
 163    mach->ImmLimit = 0;
 164    labels->count = 0;
 165
 166    declarations = (struct tgsi_full_declaration *)
 167       MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
 168
 169    if (!declarations) {
 170       return;
 171    }
 172
 173    instructions = (struct tgsi_full_instruction *)
 174       MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
 175
 176    if (!instructions) {
 177       FREE( declarations );
 178       return;
 179    }
 180
 181    while( !tgsi_parse_end_of_tokens( &parse ) ) {
 182       uint pointer = parse.Position;
 183       uint i;
 184
 185       tgsi_parse_token( &parse );
 186       switch( parse.FullToken.Token.Type ) {
 187       case TGSI_TOKEN_TYPE_DECLARATION:
 188          /* save expanded declaration */
 189          if (numDeclarations == maxDeclarations) {
 190             declarations = REALLOC(declarations,
 191                                    maxDeclarations
 192                                    * sizeof(struct tgsi_full_declaration),
 193                                    (maxDeclarations + 10)
 194                                    * sizeof(struct tgsi_full_declaration));
 195             maxDeclarations += 10;
 196          }
 197          memcpy(declarations + numDeclarations,
 198                 &parse.FullToken.FullDeclaration,
 199                 sizeof(declarations[0]));
 200          numDeclarations++;
 201          break;
 202
 203       case TGSI_TOKEN_TYPE_IMMEDIATE:
 204          {
 205             uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
 206             assert( size % 4 == 0 );
 207             assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
 208
 209             for( i = 0; i < size; i++ ) {
 210                mach->Imms[mach->ImmLimit + i / 4][i % 4] =
 211                   parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
 212             }
 213             mach->ImmLimit += size / 4;
 214          }
 215          break;
 216
 217       case TGSI_TOKEN_TYPE_INSTRUCTION:
 218          assert( labels->count < MAX_LABELS );
 219
 220          labels->labels[labels->count][0] = instno;
 221          labels->labels[labels->count][1] = pointer;
 222          labels->count++;
 223
 224          /* save expanded instruction */
 225          if (numInstructions == maxInstructions) {
 226             instructions = REALLOC(instructions,
 227                                    maxInstructions
 228                                    * sizeof(struct tgsi_full_instruction),
 229                                    (maxInstructions + 10)
 230                                    * sizeof(struct tgsi_full_instruction));
 231             maxInstructions += 10;
 232          }
 233          memcpy(instructions + numInstructions,
 234                 &parse.FullToken.FullInstruction,
 235                 sizeof(instructions[0]));
 236          numInstructions++;
 237          break;
 238
 239       default:
 240          assert( 0 );
 241       }
 242    }
 243    tgsi_parse_free (&parse);
 244
 245    if (mach->Declarations) {
 246       FREE( mach->Declarations );
 247    }
 248    mach->Declarations = declarations;
 249    mach->NumDeclarations = numDeclarations;
 250
 251    if (mach->Instructions) {
 252       FREE( mach->Instructions );
 253    }
 254    mach->Instructions = instructions;
 255    mach->NumInstructions = numInstructions;
 256 }
 257
 258
 259 void
 260 tgsi_exec_machine_init(
 261    struct tgsi_exec_machine *mach )
 262 {
 263    uint i;
 264
 265    mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
 266    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
 267
 268    /* Setup constants. */
 269    for( i = 0; i < 4; i++ ) {
 270       mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
 271       mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
 272       mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
 273       mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
 274       mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
 275       mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
 276       mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
 277       mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
 278       mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
 279       mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
 280    }
 281 }
 282
 283
 284 void
 285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
 286 {
 287    if (mach->Instructions) {
 288       FREE(mach->Instructions);
 289       mach->Instructions = NULL;
 290       mach->NumInstructions = 0;
 291    }
 292    if (mach->Declarations) {
 293       FREE(mach->Declarations);
 294       mach->Declarations = NULL;
 295       mach->NumDeclarations = 0;
 296    }
 297 }
 298
 299
 300 static void
 301 micro_abs(
 302    union tgsi_exec_channel *dst,
 303    const union tgsi_exec_channel *src )
 304 {
 305    dst->f[0] = fabsf( src->f[0] );
 306    dst->f[1] = fabsf( src->f[1] );
 307    dst->f[2] = fabsf( src->f[2] );
 308    dst->f[3] = fabsf( src->f[3] );
 309 }
 310
 311 static void
 312 micro_add(
 313    union tgsi_exec_channel *dst,
 314    const union tgsi_exec_channel *src0,
 315    const union tgsi_exec_channel *src1 )
 316 {
 317    dst->f[0] = src0->f[0] + src1->f[0];
 318    dst->f[1] = src0->f[1] + src1->f[1];
 319    dst->f[2] = src0->f[2] + src1->f[2];
 320    dst->f[3] = src0->f[3] + src1->f[3];
 321 }
 322
 323 #if 0
 324 static void
 325 micro_iadd(
 326    union tgsi_exec_channel *dst,
 327    const union tgsi_exec_channel *src0,
 328    const union tgsi_exec_channel *src1 )
 329 {
 330    dst->i[0] = src0->i[0] + src1->i[0];
 331    dst->i[1] = src0->i[1] + src1->i[1];
 332    dst->i[2] = src0->i[2] + src1->i[2];
 333    dst->i[3] = src0->i[3] + src1->i[3];
 334 }
 335 #endif
 336
 337 static void
 338 micro_and(
 339    union tgsi_exec_channel *dst,
 340    const union tgsi_exec_channel *src0,
 341    const union tgsi_exec_channel *src1 )
 342 {
 343    dst->u[0] = src0->u[0] & src1->u[0];
 344    dst->u[1] = src0->u[1] & src1->u[1];
 345    dst->u[2] = src0->u[2] & src1->u[2];
 346    dst->u[3] = src0->u[3] & src1->u[3];
 347 }
 348
 349 static void
 350 micro_ceil(
 351    union tgsi_exec_channel *dst,
 352    const union tgsi_exec_channel *src )
 353 {
 354    dst->f[0] = ceilf( src->f[0] );
 355    dst->f[1] = ceilf( src->f[1] );
 356    dst->f[2] = ceilf( src->f[2] );
 357    dst->f[3] = ceilf( src->f[3] );
 358 }
 359
 360 static void
 361 micro_cos(
 362    union tgsi_exec_channel *dst,
 363    const union tgsi_exec_channel *src )
 364 {
 365    dst->f[0] = cosf( src->f[0] );
 366    dst->f[1] = cosf( src->f[1] );
 367    dst->f[2] = cosf( src->f[2] );
 368    dst->f[3] = cosf( src->f[3] );
 369 }
 370
 371 static void
 372 micro_ddx(
 373    union tgsi_exec_channel *dst,
 374    const union tgsi_exec_channel *src )
 375 {
 376    dst->f[0] =
 377    dst->f[1] =
 378    dst->f[2] =
 379    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
 380 }
 381
 382 static void
 383 micro_ddy(
 384    union tgsi_exec_channel *dst,
 385    const union tgsi_exec_channel *src )
 386 {
 387    dst->f[0] =
 388    dst->f[1] =
 389    dst->f[2] =
 390    dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
 391 }
 392
 393 static void
 394 micro_div(
 395    union tgsi_exec_channel *dst,
 396    const union tgsi_exec_channel *src0,
 397    const union tgsi_exec_channel *src1 )
 398 {
 399    if (src1->f[0] != 0) {
 400       dst->f[0] = src0->f[0] / src1->f[0];
 401    }
 402    if (src1->f[1] != 0) {
 403       dst->f[1] = src0->f[1] / src1->f[1];
 404    }
 405    if (src1->f[2] != 0) {
 406       dst->f[2] = src0->f[2] / src1->f[2];
 407    }
 408    if (src1->f[3] != 0) {
 409       dst->f[3] = src0->f[3] / src1->f[3];
 410    }
 411 }
 412
 413 #if 0
 414 static void
 415 micro_udiv(
 416    union tgsi_exec_channel *dst,
 417    const union tgsi_exec_channel *src0,
 418    const union tgsi_exec_channel *src1 )
 419 {
 420    dst->u[0] = src0->u[0] / src1->u[0];
 421    dst->u[1] = src0->u[1] / src1->u[1];
 422    dst->u[2] = src0->u[2] / src1->u[2];
 423    dst->u[3] = src0->u[3] / src1->u[3];
 424 }
 425 #endif
 426
 427 static void
 428 micro_eq(
 429    union tgsi_exec_channel *dst,
 430    const union tgsi_exec_channel *src0,
 431    const union tgsi_exec_channel *src1,
 432    const union tgsi_exec_channel *src2,
 433    const union tgsi_exec_channel *src3 )
 434 {
 435    dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
 436    dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
 437    dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
 438    dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
 439 }
 440
 441 #if 0
 442 static void
 443 micro_ieq(
 444    union tgsi_exec_channel *dst,
 445    const union tgsi_exec_channel *src0,
 446    const union tgsi_exec_channel *src1,
 447    const union tgsi_exec_channel *src2,
 448    const union tgsi_exec_channel *src3 )
 449 {
 450    dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
 451    dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
 452    dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
 453    dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
 454 }
 455 #endif
 456
 457 static void
 458 micro_exp2(
 459    union tgsi_exec_channel *dst,
 460    const union tgsi_exec_channel *src)
 461 {
 462 #if FAST_MATH
 463    dst->f[0] = util_fast_exp2( src->f[0] );
 464    dst->f[1] = util_fast_exp2( src->f[1] );
 465    dst->f[2] = util_fast_exp2( src->f[2] );
 466    dst->f[3] = util_fast_exp2( src->f[3] );
 467 #else
 468    dst->f[0] = powf( 2.0f, src->f[0] );
 469    dst->f[1] = powf( 2.0f, src->f[1] );
 470    dst->f[2] = powf( 2.0f, src->f[2] );
 471    dst->f[3] = powf( 2.0f, src->f[3] );
 472 #endif
 473 }
 474
 475 #if 0
 476 static void
 477 micro_f2ut(
 478    union tgsi_exec_channel *dst,
 479    const union tgsi_exec_channel *src )
 480 {
 481    dst->u[0] = (uint) src->f[0];
 482    dst->u[1] = (uint) src->f[1];
 483    dst->u[2] = (uint) src->f[2];
 484    dst->u[3] = (uint) src->f[3];
 485 }
 486 #endif
 487
 488 static void
 489 micro_flr(
 490    union tgsi_exec_channel *dst,
 491    const union tgsi_exec_channel *src )
 492 {
 493    dst->f[0] = floorf( src->f[0] );
 494    dst->f[1] = floorf( src->f[1] );
 495    dst->f[2] = floorf( src->f[2] );
 496    dst->f[3] = floorf( src->f[3] );
 497 }
 498
 499 static void
 500 micro_frc(
 501    union tgsi_exec_channel *dst,
 502    const union tgsi_exec_channel *src )
 503 {
 504    dst->f[0] = src->f[0] - floorf( src->f[0] );
 505    dst->f[1] = src->f[1] - floorf( src->f[1] );
 506    dst->f[2] = src->f[2] - floorf( src->f[2] );
 507    dst->f[3] = src->f[3] - floorf( src->f[3] );
 508 }
 509
 510 static void
 511 micro_i2f(
 512    union tgsi_exec_channel *dst,
 513    const union tgsi_exec_channel *src )
 514 {
 515    dst->f[0] = (float) src->i[0];
 516    dst->f[1] = (float) src->i[1];
 517    dst->f[2] = (float) src->i[2];
 518    dst->f[3] = (float) src->i[3];
 519 }
 520
 521 static void
 522 micro_lg2(
 523    union tgsi_exec_channel *dst,
 524    const union tgsi_exec_channel *src )
 525 {
 526 #if FAST_MATH
 527    dst->f[0] = util_fast_log2( src->f[0] );
 528    dst->f[1] = util_fast_log2( src->f[1] );
 529    dst->f[2] = util_fast_log2( src->f[2] );
 530    dst->f[3] = util_fast_log2( src->f[3] );
 531 #else
 532    dst->f[0] = logf( src->f[0] ) * 1.442695f;
 533    dst->f[1] = logf( src->f[1] ) * 1.442695f;
 534    dst->f[2] = logf( src->f[2] ) * 1.442695f;
 535    dst->f[3] = logf( src->f[3] ) * 1.442695f;
 536 #endif
 537 }
 538
 539 static void
 540 micro_le(
 541    union tgsi_exec_channel *dst,
 542    const union tgsi_exec_channel *src0,
 543    const union tgsi_exec_channel *src1,
 544    const union tgsi_exec_channel *src2,
 545    const union tgsi_exec_channel *src3 )
 546 {
 547    dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
 548    dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
 549    dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
 550    dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
 551 }
 552
 553 static void
 554 micro_lt(
 555    union tgsi_exec_channel *dst,
 556    const union tgsi_exec_channel *src0,
 557    const union tgsi_exec_channel *src1,
 558    const union tgsi_exec_channel *src2,
 559    const union tgsi_exec_channel *src3 )
 560 {
 561    dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
 562    dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
 563    dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
 564    dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
 565 }
 566
 567 #if 0
 568 static void
 569 micro_ilt(
 570    union tgsi_exec_channel *dst,
 571    const union tgsi_exec_channel *src0,
 572    const union tgsi_exec_channel *src1,
 573    const union tgsi_exec_channel *src2,
 574    const union tgsi_exec_channel *src3 )
 575 {
 576    dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
 577    dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
 578    dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
 579    dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
 580 }
 581 #endif
 582
 583 #if 0
 584 static void
 585 micro_ult(
 586    union tgsi_exec_channel *dst,
 587    const union tgsi_exec_channel *src0,
 588    const union tgsi_exec_channel *src1,
 589    const union tgsi_exec_channel *src2,
 590    const union tgsi_exec_channel *src3 )
 591 {
 592    dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
 593    dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
 594    dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
 595    dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
 596 }
 597 #endif
 598
 599 static void
 600 micro_max(
 601    union tgsi_exec_channel *dst,
 602    const union tgsi_exec_channel *src0,
 603    const union tgsi_exec_channel *src1 )
 604 {
 605    dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
 606    dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
 607    dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
 608    dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
 609 }
 610
 611 #if 0
 612 static void
 613 micro_imax(
 614    union tgsi_exec_channel *dst,
 615    const union tgsi_exec_channel *src0,
 616    const union tgsi_exec_channel *src1 )
 617 {
 618    dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
 619    dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
 620    dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
 621    dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
 622 }
 623 #endif
 624
 625 #if 0
 626 static void
 627 micro_umax(
 628    union tgsi_exec_channel *dst,
 629    const union tgsi_exec_channel *src0,
 630    const union tgsi_exec_channel *src1 )
 631 {
 632    dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
 633    dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
 634    dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
 635    dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
 636 }
 637 #endif
 638
 639 static void
 640 micro_min(
 641    union tgsi_exec_channel *dst,
 642    const union tgsi_exec_channel *src0,
 643    const union tgsi_exec_channel *src1 )
 644 {
 645    dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
 646    dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
 647    dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
 648    dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
 649 }
 650
 651 #if 0
 652 static void
 653 micro_imin(
 654    union tgsi_exec_channel *dst,
 655    const union tgsi_exec_channel *src0,
 656    const union tgsi_exec_channel *src1 )
 657 {
 658    dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
 659    dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
 660    dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
 661    dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
 662 }
 663 #endif
 664
 665 #if 0
 666 static void
 667 micro_umin(
 668    union tgsi_exec_channel *dst,
 669    const union tgsi_exec_channel *src0,
 670    const union tgsi_exec_channel *src1 )
 671 {
 672    dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
 673    dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
 674    dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
 675    dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
 676 }
 677 #endif
 678
 679 #if 0
 680 static void
 681 micro_umod(
 682    union tgsi_exec_channel *dst,
 683    const union tgsi_exec_channel *src0,
 684    const union tgsi_exec_channel *src1 )
 685 {
 686    dst->u[0] = src0->u[0] % src1->u[0];
 687    dst->u[1] = src0->u[1] % src1->u[1];
 688    dst->u[2] = src0->u[2] % src1->u[2];
 689    dst->u[3] = src0->u[3] % src1->u[3];
 690 }
 691 #endif
 692
 693 static void
 694 micro_mul(
 695    union tgsi_exec_channel *dst,
 696    const union tgsi_exec_channel *src0,
 697    const union tgsi_exec_channel *src1 )
 698 {
 699    dst->f[0] = src0->f[0] * src1->f[0];
 700    dst->f[1] = src0->f[1] * src1->f[1];
 701    dst->f[2] = src0->f[2] * src1->f[2];
 702    dst->f[3] = src0->f[3] * src1->f[3];
 703 }
 704
 705 #if 0
 706 static void
 707 micro_imul(
 708    union tgsi_exec_channel *dst,
 709    const union tgsi_exec_channel *src0,
 710    const union tgsi_exec_channel *src1 )
 711 {
 712    dst->i[0] = src0->i[0] * src1->i[0];
 713    dst->i[1] = src0->i[1] * src1->i[1];
 714    dst->i[2] = src0->i[2] * src1->i[2];
 715    dst->i[3] = src0->i[3] * src1->i[3];
 716 }
 717 #endif
 718
 719 #if 0
 720 static void
 721 micro_imul64(
 722    union tgsi_exec_channel *dst0,
 723    union tgsi_exec_channel *dst1,
 724    const union tgsi_exec_channel *src0,
 725    const union tgsi_exec_channel *src1 )
 726 {
 727    dst1->i[0] = src0->i[0] * src1->i[0];
 728    dst1->i[1] = src0->i[1] * src1->i[1];
 729    dst1->i[2] = src0->i[2] * src1->i[2];
 730    dst1->i[3] = src0->i[3] * src1->i[3];
 731    dst0->i[0] = 0;
 732    dst0->i[1] = 0;
 733    dst0->i[2] = 0;
 734    dst0->i[3] = 0;
 735 }
 736 #endif
 737
 738 #if 0
 739 static void
 740 micro_umul64(
 741    union tgsi_exec_channel *dst0,
 742    union tgsi_exec_channel *dst1,
 743    const union tgsi_exec_channel *src0,
 744    const union tgsi_exec_channel *src1 )
 745 {
 746    dst1->u[0] = src0->u[0] * src1->u[0];
 747    dst1->u[1] = src0->u[1] * src1->u[1];
 748    dst1->u[2] = src0->u[2] * src1->u[2];
 749    dst1->u[3] = src0->u[3] * src1->u[3];
 750    dst0->u[0] = 0;
 751    dst0->u[1] = 0;
 752    dst0->u[2] = 0;
 753    dst0->u[3] = 0;
 754 }
 755 #endif
 756
 757
 758 #if 0
 759 static void
 760 micro_movc(
 761    union tgsi_exec_channel *dst,
 762    const union tgsi_exec_channel *src0,
 763    const union tgsi_exec_channel *src1,
 764    const union tgsi_exec_channel *src2 )
 765 {
 766    dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
 767    dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
 768    dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
 769    dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
 770 }
 771 #endif
 772
 773 static void
 774 micro_neg(
 775    union tgsi_exec_channel *dst,
 776    const union tgsi_exec_channel *src )
 777 {
 778    dst->f[0] = -src->f[0];
 779    dst->f[1] = -src->f[1];
 780    dst->f[2] = -src->f[2];
 781    dst->f[3] = -src->f[3];
 782 }
 783
 784 #if 0
 785 static void
 786 micro_ineg(
 787    union tgsi_exec_channel *dst,
 788    const union tgsi_exec_channel *src )
 789 {
 790    dst->i[0] = -src->i[0];
 791    dst->i[1] = -src->i[1];
 792    dst->i[2] = -src->i[2];
 793    dst->i[3] = -src->i[3];
 794 }
 795 #endif
 796
 797 static void
 798 micro_not(
 799    union tgsi_exec_channel *dst,
 800    const union tgsi_exec_channel *src )
 801 {
 802    dst->u[0] = ~src->u[0];
 803    dst->u[1] = ~src->u[1];
 804    dst->u[2] = ~src->u[2];
 805    dst->u[3] = ~src->u[3];
 806 }
 807
 808 static void
 809 micro_or(
 810    union tgsi_exec_channel *dst,
 811    const union tgsi_exec_channel *src0,
 812    const union tgsi_exec_channel *src1 )
 813 {
 814    dst->u[0] = src0->u[0] | src1->u[0];
 815    dst->u[1] = src0->u[1] | src1->u[1];
 816    dst->u[2] = src0->u[2] | src1->u[2];
 817    dst->u[3] = src0->u[3] | src1->u[3];
 818 }
 819
 820 static void
 821 micro_pow(
 822    union tgsi_exec_channel *dst,
 823    const union tgsi_exec_channel *src0,
 824    const union tgsi_exec_channel *src1 )
 825 {
 826 #if FAST_MATH
 827    dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
 828    dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
 829    dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
 830    dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
 831 #else
 832    dst->f[0] = powf( src0->f[0], src1->f[0] );
 833    dst->f[1] = powf( src0->f[1], src1->f[1] );
 834    dst->f[2] = powf( src0->f[2], src1->f[2] );
 835    dst->f[3] = powf( src0->f[3], src1->f[3] );
 836 #endif
 837 }
 838
 839 static void
 840 micro_rnd(
 841    union tgsi_exec_channel *dst,
 842    const union tgsi_exec_channel *src )
 843 {
 844    dst->f[0] = floorf( src->f[0] + 0.5f );
 845    dst->f[1] = floorf( src->f[1] + 0.5f );
 846    dst->f[2] = floorf( src->f[2] + 0.5f );
 847    dst->f[3] = floorf( src->f[3] + 0.5f );
 848 }
 849
 850 static void
 851 micro_sgn(
 852    union tgsi_exec_channel *dst,
 853    const union tgsi_exec_channel *src )
 854 {
 855    dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
 856    dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
 857    dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
 858    dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
 859 }
 860
 861 static void
 862 micro_shl(
 863    union tgsi_exec_channel *dst,
 864    const union tgsi_exec_channel *src0,
 865    const union tgsi_exec_channel *src1 )
 866 {
 867    dst->i[0] = src0->i[0] << src1->i[0];
 868    dst->i[1] = src0->i[1] << src1->i[1];
 869    dst->i[2] = src0->i[2] << src1->i[2];
 870    dst->i[3] = src0->i[3] << src1->i[3];
 871 }
 872
 873 static void
 874 micro_ishr(
 875    union tgsi_exec_channel *dst,
 876    const union tgsi_exec_channel *src0,
 877    const union tgsi_exec_channel *src1 )
 878 {
 879    dst->i[0] = src0->i[0] >> src1->i[0];
 880    dst->i[1] = src0->i[1] >> src1->i[1];
 881    dst->i[2] = src0->i[2] >> src1->i[2];
 882    dst->i[3] = src0->i[3] >> src1->i[3];
 883 }
 884
 885 static void
 886 micro_trunc(
 887    union tgsi_exec_channel *dst,
 888    const union tgsi_exec_channel *src0 )
 889 {
 890    dst->f[0] = (float) (int) src0->f[0];
 891    dst->f[1] = (float) (int) src0->f[1];
 892    dst->f[2] = (float) (int) src0->f[2];
 893    dst->f[3] = (float) (int) src0->f[3];
 894 }
 895
 896 #if 0
 897 static void
 898 micro_ushr(
 899    union tgsi_exec_channel *dst,
 900    const union tgsi_exec_channel *src0,
 901    const union tgsi_exec_channel *src1 )
 902 {
 903    dst->u[0] = src0->u[0] >> src1->u[0];
 904    dst->u[1] = src0->u[1] >> src1->u[1];
 905    dst->u[2] = src0->u[2] >> src1->u[2];
 906    dst->u[3] = src0->u[3] >> src1->u[3];
 907 }
 908 #endif
 909
 910 static void
 911 micro_sin(
 912    union tgsi_exec_channel *dst,
 913    const union tgsi_exec_channel *src )
 914 {
 915    dst->f[0] = sinf( src->f[0] );
 916    dst->f[1] = sinf( src->f[1] );
 917    dst->f[2] = sinf( src->f[2] );
 918    dst->f[3] = sinf( src->f[3] );
 919 }
 920
 921 static void
 922 micro_sqrt( union tgsi_exec_channel *dst,
 923             const union tgsi_exec_channel *src )
 924 {
 925    dst->f[0] = sqrtf( src->f[0] );
 926    dst->f[1] = sqrtf( src->f[1] );
 927    dst->f[2] = sqrtf( src->f[2] );
 928    dst->f[3] = sqrtf( src->f[3] );
 929 }
 930
 931 static void
 932 micro_sub(
 933    union tgsi_exec_channel *dst,
 934    const union tgsi_exec_channel *src0,
 935    const union tgsi_exec_channel *src1 )
 936 {
 937    dst->f[0] = src0->f[0] - src1->f[0];
 938    dst->f[1] = src0->f[1] - src1->f[1];
 939    dst->f[2] = src0->f[2] - src1->f[2];
 940    dst->f[3] = src0->f[3] - src1->f[3];
 941 }
 942
 943 #if 0
 944 static void
 945 micro_u2f(
 946    union tgsi_exec_channel *dst,
 947    const union tgsi_exec_channel *src )
 948 {
 949    dst->f[0] = (float) src->u[0];
 950    dst->f[1] = (float) src->u[1];
 951    dst->f[2] = (float) src->u[2];
 952    dst->f[3] = (float) src->u[3];
 953 }
 954 #endif
 955
 956 static void
 957 micro_xor(
 958    union tgsi_exec_channel *dst,
 959    const union tgsi_exec_channel *src0,
 960    const union tgsi_exec_channel *src1 )
 961 {
 962    dst->u[0] = src0->u[0] ^ src1->u[0];
 963    dst->u[1] = src0->u[1] ^ src1->u[1];
 964    dst->u[2] = src0->u[2] ^ src1->u[2];
 965    dst->u[3] = src0->u[3] ^ src1->u[3];
 966 }
 967
 968 static void
 969 fetch_src_file_channel(
 970    const struct tgsi_exec_machine *mach,
 971    const uint file,
 972    const uint swizzle,
 973    const union tgsi_exec_channel *index,
 974    union tgsi_exec_channel *chan )
 975 {
 976    switch( swizzle ) {
 977    case TGSI_EXTSWIZZLE_X:
 978    case TGSI_EXTSWIZZLE_Y:
 979    case TGSI_EXTSWIZZLE_Z:
 980    case TGSI_EXTSWIZZLE_W:
 981       switch( file ) {
 982       case TGSI_FILE_CONSTANT:
 983          assert(mach->Consts);
 984          if (index->i[0] < 0)
 985             chan->f[0] = 0.0f;
 986          else
 987             chan->f[0] = mach->Consts[index->i[0]][swizzle];
 988          if (index->i[1] < 0)
 989             chan->f[1] = 0.0f;
 990          else
 991             chan->f[1] = mach->Consts[index->i[1]][swizzle];
 992          if (index->i[2] < 0)
 993             chan->f[2] = 0.0f;
 994          else
 995             chan->f[2] = mach->Consts[index->i[2]][swizzle];
 996          if (index->i[3] < 0)
 997             chan->f[3] = 0.0f;
 998          else
 999             chan->f[3] = mach->Consts[index->i[3]][swizzle];
1000          break;
1001
1002       case TGSI_FILE_INPUT:
1003          chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1004          chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1005          chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1006          chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1007          break;
1008
1009       case TGSI_FILE_TEMPORARY:
1010          assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1011          chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1012          chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1013          chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1014          chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1015          break;
1016
1017       case TGSI_FILE_IMMEDIATE:
1018          assert( index->i[0] < (int) mach->ImmLimit );
1019          chan->f[0] = mach->Imms[index->i[0]][swizzle];
1020          assert( index->i[1] < (int) mach->ImmLimit );
1021          chan->f[1] = mach->Imms[index->i[1]][swizzle];
1022          assert( index->i[2] < (int) mach->ImmLimit );
1023          chan->f[2] = mach->Imms[index->i[2]][swizzle];
1024          assert( index->i[3] < (int) mach->ImmLimit );
1025          chan->f[3] = mach->Imms[index->i[3]][swizzle];
1026          break;
1027
1028       case TGSI_FILE_ADDRESS:
1029          chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1030          chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1031          chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1032          chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1033          break;
1034
1035       case TGSI_FILE_OUTPUT:
1036          /* vertex/fragment output vars can be read too */
1037          chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1038          chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1039          chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1040          chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1041          break;
1042
1043       default:
1044          assert( 0 );
1045       }
1046       break;
1047
1048    case TGSI_EXTSWIZZLE_ZERO:
1049       *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1050       break;
1051
1052    case TGSI_EXTSWIZZLE_ONE:
1053       *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1054       break;
1055
1056    default:
1057       assert( 0 );
1058    }
1059 }
1060
1061 static void
1062 fetch_source(
1063    const struct tgsi_exec_machine *mach,
1064    union tgsi_exec_channel *chan,
1065    const struct tgsi_full_src_register *reg,
1066    const uint chan_index )
1067 {
1068    union tgsi_exec_channel index;
1069    uint swizzle;
1070
1071    /* We start with a direct index into a register file.
1072     *
1073     *    file[1],
1074     *    where:
1075     *       file = SrcRegister.File
1076     *       [1] = SrcRegister.Index
1077     */
1078    index.i[0] =
1079    index.i[1] =
1080    index.i[2] =
1081    index.i[3] = reg->SrcRegister.Index;
1082
1083    /* There is an extra source register that indirectly subscripts
1084     * a register file. The direct index now becomes an offset
1085     * that is being added to the indirect register.
1086     *
1087     *    file[ind[2].x+1],
1088     *    where:
1089     *       ind = SrcRegisterInd.File
1090     *       [2] = SrcRegisterInd.Index
1091     *       .x = SrcRegisterInd.SwizzleX
1092     */
1093    if (reg->SrcRegister.Indirect) {
1094       union tgsi_exec_channel index2;
1095       union tgsi_exec_channel indir_index;
1096       const uint execmask = mach->ExecMask;
1097       uint i;
1098
1099       /* which address register (always zero now) */
1100       index2.i[0] =
1101       index2.i[1] =
1102       index2.i[2] =
1103       index2.i[3] = reg->SrcRegisterInd.Index;
1104
1105       /* get current value of address register[swizzle] */
1106       swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1107       fetch_src_file_channel(
1108          mach,
1109          reg->SrcRegisterInd.File,
1110          swizzle,
1111          &index2,
1112          &indir_index );
1113
1114       /* add value of address register to the offset */
1115       index.i[0] += (int) indir_index.f[0];
1116       index.i[1] += (int) indir_index.f[1];
1117       index.i[2] += (int) indir_index.f[2];
1118       index.i[3] += (int) indir_index.f[3];
1119
1120       /* for disabled execution channels, zero-out the index to
1121        * avoid using a potential garbage value.
1122        */
1123       for (i = 0; i < QUAD_SIZE; i++) {
1124          if ((execmask & (1 << i)) == 0)
1125             index.i[i] = 0;
1126       }
1127    }
1128
1129    /* There is an extra source register that is a second
1130     * subscript to a register file. Effectively it means that
1131     * the register file is actually a 2D array of registers.
1132     *
1133     *    file[1][3] == file[1*sizeof(file[1])+3],
1134     *    where:
1135     *       [3] = SrcRegisterDim.Index
1136     */
1137    if (reg->SrcRegister.Dimension) {
1138       /* The size of the first-order array depends on the register file type.
1139        * We need to multiply the index to the first array to get an effective,
1140        * "flat" index that points to the beginning of the second-order array.
1141        */
1142       switch (reg->SrcRegister.File) {
1143       case TGSI_FILE_INPUT:
1144          index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1145          index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1146          index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1147          index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1148          break;
1149       case TGSI_FILE_CONSTANT:
1150          index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1151          index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1152          index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1153          index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1154          break;
1155       default:
1156          assert( 0 );
1157       }
1158
1159       index.i[0] += reg->SrcRegisterDim.Index;
1160       index.i[1] += reg->SrcRegisterDim.Index;
1161       index.i[2] += reg->SrcRegisterDim.Index;
1162       index.i[3] += reg->SrcRegisterDim.Index;
1163
1164       /* Again, the second subscript index can be addressed indirectly
1165        * identically to the first one.
1166        * Nothing stops us from indirectly addressing the indirect register,
1167        * but there is no need for that, so we won't exercise it.
1168        *
1169        *    file[1][ind[4].y+3],
1170        *    where:
1171        *       ind = SrcRegisterDimInd.File
1172        *       [4] = SrcRegisterDimInd.Index
1173        *       .y = SrcRegisterDimInd.SwizzleX
1174        */
1175       if (reg->SrcRegisterDim.Indirect) {
1176          union tgsi_exec_channel index2;
1177          union tgsi_exec_channel indir_index;
1178          const uint execmask = mach->ExecMask;
1179          uint i;
1180
1181          index2.i[0] =
1182          index2.i[1] =
1183          index2.i[2] =
1184          index2.i[3] = reg->SrcRegisterDimInd.Index;
1185
1186          swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1187          fetch_src_file_channel(
1188             mach,
1189             reg->SrcRegisterDimInd.File,
1190             swizzle,
1191             &index2,
1192             &indir_index );
1193
1194          index.i[0] += (int) indir_index.f[0];
1195          index.i[1] += (int) indir_index.f[1];
1196          index.i[2] += (int) indir_index.f[2];
1197          index.i[3] += (int) indir_index.f[3];
1198
1199          /* for disabled execution channels, zero-out the index to
1200           * avoid using a potential garbage value.
1201           */
1202          for (i = 0; i < QUAD_SIZE; i++) {
1203             if ((execmask & (1 << i)) == 0)
1204                index.i[i] = 0;
1205          }
1206       }
1207
1208       /* If by any chance there was a need for a 3D array of register
1209        * files, we would have to check whether SrcRegisterDim is followed
1210        * by a dimension register and continue the saga.
1211        */
1212    }
1213
1214    swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1215    fetch_src_file_channel(
1216       mach,
1217       reg->SrcRegister.File,
1218       swizzle,
1219       &index,
1220       chan );
1221
1222    switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1223    case TGSI_UTIL_SIGN_CLEAR:
1224       micro_abs( chan, chan );
1225       break;
1226
1227    case TGSI_UTIL_SIGN_SET:
1228       micro_abs( chan, chan );
1229       micro_neg( chan, chan );
1230       break;
1231
1232    case TGSI_UTIL_SIGN_TOGGLE:
1233       micro_neg( chan, chan );
1234       break;
1235
1236    case TGSI_UTIL_SIGN_KEEP:
1237       break;
1238    }
1239
1240    if (reg->SrcRegisterExtMod.Complement) {
1241       micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1242    }
1243 }
1244
1245 static void
1246 store_dest(
1247    struct tgsi_exec_machine *mach,
1248    const union tgsi_exec_channel *chan,
1249    const struct tgsi_full_dst_register *reg,
1250    const struct tgsi_full_instruction *inst,
1251    uint chan_index )
1252 {
1253    uint i;
1254    union tgsi_exec_channel null;
1255    union tgsi_exec_channel *dst;
1256    uint execmask = mach->ExecMask;
1257
1258    switch (reg->DstRegister.File) {
1259    case TGSI_FILE_NULL:
1260       dst = &null;
1261       break;
1262
1263    case TGSI_FILE_OUTPUT:
1264       dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1265                            + reg->DstRegister.Index].xyzw[chan_index];
1266       break;
1267
1268    case TGSI_FILE_TEMPORARY:
1269       assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1270       dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1271       break;
1272
1273    case TGSI_FILE_ADDRESS:
1274       dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1275       break;
1276
1277    default:
1278       assert( 0 );
1279       return;
1280    }
1281
1282    if (inst->InstructionExtNv.CondFlowEnable) {
1283       union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1284       uint swizzle;
1285       uint shift;
1286       uint mask;
1287       uint test;
1288
1289       /* Only CC0 supported.
1290        */
1291       assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1292
1293       switch (chan_index) {
1294       case CHAN_X:
1295          swizzle = inst->InstructionExtNv.CondSwizzleX;
1296          break;
1297       case CHAN_Y:
1298          swizzle = inst->InstructionExtNv.CondSwizzleY;
1299          break;
1300       case CHAN_Z:
1301          swizzle = inst->InstructionExtNv.CondSwizzleZ;
1302          break;
1303       case CHAN_W:
1304          swizzle = inst->InstructionExtNv.CondSwizzleW;
1305          break;
1306       default:
1307          assert( 0 );
1308          return;
1309       }
1310
1311       switch (swizzle) {
1312       case TGSI_SWIZZLE_X:
1313          shift = TGSI_EXEC_CC_X_SHIFT;
1314          mask = TGSI_EXEC_CC_X_MASK;
1315          break;
1316       case TGSI_SWIZZLE_Y:
1317          shift = TGSI_EXEC_CC_Y_SHIFT;
1318          mask = TGSI_EXEC_CC_Y_MASK;
1319          break;
1320       case TGSI_SWIZZLE_Z:
1321          shift = TGSI_EXEC_CC_Z_SHIFT;
1322          mask = TGSI_EXEC_CC_Z_MASK;
1323          break;
1324       case TGSI_SWIZZLE_W:
1325          shift = TGSI_EXEC_CC_W_SHIFT;
1326          mask = TGSI_EXEC_CC_W_MASK;
1327          break;
1328       default:
1329          assert( 0 );
1330          return;
1331       }
1332
1333       switch (inst->InstructionExtNv.CondMask) {
1334       case TGSI_CC_GT:
1335          test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1336          for (i = 0; i < QUAD_SIZE; i++)
1337             if (cc->u[i] & test)
1338                execmask &= ~(1 << i);
1339          break;
1340
1341       case TGSI_CC_EQ:
1342          test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1343          for (i = 0; i < QUAD_SIZE; i++)
1344             if (cc->u[i] & test)
1345                execmask &= ~(1 << i);
1346          break;
1347
1348       case TGSI_CC_LT:
1349          test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1350          for (i = 0; i < QUAD_SIZE; i++)
1351             if (cc->u[i] & test)
1352                execmask &= ~(1 << i);
1353          break;
1354
1355       case TGSI_CC_GE:
1356          test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1357          for (i = 0; i < QUAD_SIZE; i++)
1358             if (cc->u[i] & test)
1359                execmask &= ~(1 << i);
1360          break;
1361
1362       case TGSI_CC_LE:
1363          test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1364          for (i = 0; i < QUAD_SIZE; i++)
1365             if (cc->u[i] & test)
1366                execmask &= ~(1 << i);
1367          break;
1368
1369       case TGSI_CC_NE:
1370          test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1371          for (i = 0; i < QUAD_SIZE; i++)
1372             if (cc->u[i] & test)
1373                execmask &= ~(1 << i);
1374          break;
1375
1376       case TGSI_CC_TR:
1377          break;
1378
1379       case TGSI_CC_FL:
1380          for (i = 0; i < QUAD_SIZE; i++)
1381             execmask &= ~(1 << i);
1382          break;
1383
1384       default:
1385          assert( 0 );
1386          return;
1387       }
1388    }
1389
1390    switch (inst->Instruction.Saturate) {
1391    case TGSI_SAT_NONE:
1392       for (i = 0; i < QUAD_SIZE; i++)
1393          if (execmask & (1 << i))
1394             dst->i[i] = chan->i[i];
1395       break;
1396
1397    case TGSI_SAT_ZERO_ONE:
1398       for (i = 0; i < QUAD_SIZE; i++)
1399          if (execmask & (1 << i)) {
1400             if (chan->f[i] < 0.0f)
1401                dst->f[i] = 0.0f;
1402             else if (chan->f[i] > 1.0f)
1403                dst->f[i] = 1.0f;
1404             else
1405                dst->i[i] = chan->i[i];
1406          }
1407       break;
1408
1409    case TGSI_SAT_MINUS_PLUS_ONE:
1410       for (i = 0; i < QUAD_SIZE; i++)
1411          if (execmask & (1 << i)) {
1412             if (chan->f[i] < -1.0f)
1413                dst->f[i] = -1.0f;
1414             else if (chan->f[i] > 1.0f)
1415                dst->f[i] = 1.0f;
1416             else
1417                dst->i[i] = chan->i[i];
1418          }
1419       break;
1420
1421    default:
1422       assert( 0 );
1423    }
1424
1425    if (inst->InstructionExtNv.CondDstUpdate) {
1426       union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1427       uint shift;
1428       uint mask;
1429
1430       /* Only CC0 supported.
1431        */
1432       assert( inst->InstructionExtNv.CondDstIndex < 1 );
1433
1434       switch (chan_index) {
1435       case CHAN_X:
1436          shift = TGSI_EXEC_CC_X_SHIFT;
1437          mask = ~TGSI_EXEC_CC_X_MASK;
1438          break;
1439       case CHAN_Y:
1440          shift = TGSI_EXEC_CC_Y_SHIFT;
1441          mask = ~TGSI_EXEC_CC_Y_MASK;
1442          break;
1443       case CHAN_Z:
1444          shift = TGSI_EXEC_CC_Z_SHIFT;
1445          mask = ~TGSI_EXEC_CC_Z_MASK;
1446          break;
1447       case CHAN_W:
1448          shift = TGSI_EXEC_CC_W_SHIFT;
1449          mask = ~TGSI_EXEC_CC_W_MASK;
1450          break;
1451       default:
1452          assert( 0 );
1453          return;
1454       }
1455
1456       for (i = 0; i < QUAD_SIZE; i++)
1457          if (execmask & (1 << i)) {
1458             cc->u[i] &= mask;
1459             if (dst->f[i] < 0.0f)
1460                cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1461             else if (dst->f[i] > 0.0f)
1462                cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1463             else if (dst->f[i] == 0.0f)
1464                cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1465             else
1466                cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1467          }
1468    }
1469 }
1470
1471 #define FETCH(VAL,INDEX,CHAN)\
1472     fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1473
1474 #define STORE(VAL,INDEX,CHAN)\
1475     store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1476
1477
1478 /**
1479  * Execute ARB-style KIL which is predicated by a src register.
1480  * Kill fragment if any of the four values is less than zero.
1481  */
1482 static void
1483 exec_kil(struct tgsi_exec_machine *mach,
1484          const struct tgsi_full_instruction *inst)
1485 {
1486    uint uniquemask;
1487    uint chan_index;
1488    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1489    union tgsi_exec_channel r[1];
1490
1491    /* This mask stores component bits that were already tested. Note that
1492     * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1493     * tested. */
1494    uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1495
1496    for (chan_index = 0; chan_index < 4; chan_index++)
1497    {
1498       uint swizzle;
1499       uint i;
1500
1501       /* unswizzle channel */
1502       swizzle = tgsi_util_get_full_src_register_extswizzle (
1503                         &inst->FullSrcRegisters[0],
1504                         chan_index);
1505
1506       /* check if the component has not been already tested */
1507       if (uniquemask & (1 << swizzle))
1508          continue;
1509       uniquemask |= 1 << swizzle;
1510
1511       FETCH(&r[0], 0, chan_index);
1512       for (i = 0; i < 4; i++)
1513          if (r[0].f[i] < 0.0f)
1514             kilmask |= 1 << i;
1515    }
1516
1517    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1518 }
1519
1520 /**
1521  * Execute NVIDIA-style KIL which is predicated by a condition code.
1522  * Kill fragment if the condition code is TRUE.
1523  */
1524 static void
1525 exec_kilp(struct tgsi_exec_machine *mach,
1526           const struct tgsi_full_instruction *inst)
1527 {
1528    uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1529
1530    if (inst->InstructionExtNv.CondFlowEnable) {
1531       uint swizzle[4];
1532       uint chan_index;
1533
1534       kilmask = 0x0;
1535
1536       swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1537       swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1538       swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1539       swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1540
1541       for (chan_index = 0; chan_index < 4; chan_index++)
1542       {
1543          uint i;
1544
1545          for (i = 0; i < 4; i++) {
1546             /* TODO: evaluate the condition code */
1547             if (0)
1548                kilmask |= 1 << i;
1549          }
1550       }
1551    }
1552    else {
1553       /* "unconditional" kil */
1554       kilmask = mach->ExecMask;
1555    }
1556    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1557 }
1558
1559
1560 /*
1561  * Fetch a four texture samples using STR texture coordinates.
1562  */
1563 static void
1564 fetch_texel( struct tgsi_sampler *sampler,
1565              const union tgsi_exec_channel *s,
1566              const union tgsi_exec_channel *t,
1567              const union tgsi_exec_channel *p,
1568              float lodbias,  /* XXX should be float[4] */
1569              union tgsi_exec_channel *r,
1570              union tgsi_exec_channel *g,
1571              union tgsi_exec_channel *b,
1572              union tgsi_exec_channel *a )
1573 {
1574    uint j;
1575    float rgba[NUM_CHANNELS][QUAD_SIZE];
1576
1577    sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1578
1579    for (j = 0; j < 4; j++) {
1580       r->f[j] = rgba[0][j];
1581       g->f[j] = rgba[1][j];
1582       b->f[j] = rgba[2][j];
1583       a->f[j] = rgba[3][j];
1584    }
1585 }
1586
1587
1588 static void
1589 exec_tex(struct tgsi_exec_machine *mach,
1590          const struct tgsi_full_instruction *inst,
1591          boolean biasLod,
1592          boolean projected)
1593 {
1594    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1595    union tgsi_exec_channel r[4];
1596    uint chan_index;
1597    float lodBias;
1598
1599    /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1600
1601    switch (inst->InstructionExtTexture.Texture) {
1602    case TGSI_TEXTURE_1D:
1603    case TGSI_TEXTURE_SHADOW1D:
1604
1605       FETCH(&r[0], 0, CHAN_X);
1606
1607       if (projected) {
1608          FETCH(&r[1], 0, CHAN_W);
1609          micro_div( &r[0], &r[0], &r[1] );
1610       }
1611
1612       if (biasLod) {
1613          FETCH(&r[1], 0, CHAN_W);
1614          lodBias = r[2].f[0];
1615       }
1616       else
1617          lodBias = 0.0;
1618
1619       fetch_texel(mach->Samplers[unit],
1620                   &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
1621                   &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1622       break;
1623
1624    case TGSI_TEXTURE_2D:
1625    case TGSI_TEXTURE_RECT:
1626    case TGSI_TEXTURE_SHADOW2D:
1627    case TGSI_TEXTURE_SHADOWRECT:
1628
1629       FETCH(&r[0], 0, CHAN_X);
1630       FETCH(&r[1], 0, CHAN_Y);
1631       FETCH(&r[2], 0, CHAN_Z);
1632
1633       if (projected) {
1634          FETCH(&r[3], 0, CHAN_W);
1635          micro_div( &r[0], &r[0], &r[3] );
1636          micro_div( &r[1], &r[1], &r[3] );
1637          micro_div( &r[2], &r[2], &r[3] );
1638       }
1639
1640       if (biasLod) {
1641          FETCH(&r[3], 0, CHAN_W);
1642          lodBias = r[3].f[0];
1643       }
1644       else
1645          lodBias = 0.0;
1646
1647       fetch_texel(mach->Samplers[unit],
1648                   &r[0], &r[1], &r[2], lodBias,  /* inputs */
1649                   &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1650       break;
1651
1652    case TGSI_TEXTURE_3D:
1653    case TGSI_TEXTURE_CUBE:
1654
1655       FETCH(&r[0], 0, CHAN_X);
1656       FETCH(&r[1], 0, CHAN_Y);
1657       FETCH(&r[2], 0, CHAN_Z);
1658
1659       if (projected) {
1660          FETCH(&r[3], 0, CHAN_W);
1661          micro_div( &r[0], &r[0], &r[3] );
1662          micro_div( &r[1], &r[1], &r[3] );
1663          micro_div( &r[2], &r[2], &r[3] );
1664       }
1665
1666       if (biasLod) {
1667          FETCH(&r[3], 0, CHAN_W);
1668          lodBias = r[3].f[0];
1669       }
1670       else
1671          lodBias = 0.0;
1672
1673       fetch_texel(mach->Samplers[unit],
1674                   &r[0], &r[1], &r[2], lodBias,
1675                   &r[0], &r[1], &r[2], &r[3]);
1676       break;
1677
1678    default:
1679       assert (0);
1680    }
1681
1682    FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1683       STORE( &r[chan_index], 0, chan_index );
1684    }
1685 }
1686
1687
1688 /**
1689  * Evaluate a constant-valued coefficient at the position of the
1690  * current quad.
1691  */
1692 static void
1693 eval_constant_coef(
1694    struct tgsi_exec_machine *mach,
1695    unsigned attrib,
1696    unsigned chan )
1697 {
1698    unsigned i;
1699
1700    for( i = 0; i < QUAD_SIZE; i++ ) {
1701       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1702    }
1703 }
1704
1705 /**
1706  * Evaluate a linear-valued coefficient at the position of the
1707  * current quad.
1708  */
1709 static void
1710 eval_linear_coef(
1711    struct tgsi_exec_machine *mach,
1712    unsigned attrib,
1713    unsigned chan )
1714 {
1715    const float x = mach->QuadPos.xyzw[0].f[0];
1716    const float y = mach->QuadPos.xyzw[1].f[0];
1717    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1718    const float dady = mach->InterpCoefs[attrib].dady[chan];
1719    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1720    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1721    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1722    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1723    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1724 }
1725
1726 /**
1727  * Evaluate a perspective-valued coefficient at the position of the
1728  * current quad.
1729  */
1730 static void
1731 eval_perspective_coef(
1732    struct tgsi_exec_machine *mach,
1733    unsigned attrib,
1734    unsigned chan )
1735 {
1736    const float x = mach->QuadPos.xyzw[0].f[0];
1737    const float y = mach->QuadPos.xyzw[1].f[0];
1738    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1739    const float dady = mach->InterpCoefs[attrib].dady[chan];
1740    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1741    const float *w = mach->QuadPos.xyzw[3].f;
1742    /* divide by W here */
1743    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1744    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1745    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1746    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1747 }
1748
1749
1750 typedef void (* eval_coef_func)(
1751    struct tgsi_exec_machine *mach,
1752    unsigned attrib,
1753    unsigned chan );
1754
1755 static void
1756 exec_declaration(
1757    struct tgsi_exec_machine *mach,
1758    const struct tgsi_full_declaration *decl )
1759 {
1760    if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1761       if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1762          unsigned first, last, mask;
1763          eval_coef_func eval;
1764
1765          first = decl->DeclarationRange.First;
1766          last = decl->DeclarationRange.Last;
1767          mask = decl->Declaration.UsageMask;
1768
1769          switch( decl->Declaration.Interpolate ) {
1770          case TGSI_INTERPOLATE_CONSTANT:
1771             eval = eval_constant_coef;
1772             break;
1773
1774          case TGSI_INTERPOLATE_LINEAR:
1775             eval = eval_linear_coef;
1776             break;
1777
1778          case TGSI_INTERPOLATE_PERSPECTIVE:
1779             eval = eval_perspective_coef;
1780             break;
1781
1782          default:
1783             eval = NULL;
1784             assert( 0 );
1785          }
1786
1787          if( mask == TGSI_WRITEMASK_XYZW ) {
1788             unsigned i, j;
1789
1790             for( i = first; i <= last; i++ ) {
1791                for( j = 0; j < NUM_CHANNELS; j++ ) {
1792                   eval( mach, i, j );
1793                }
1794             }
1795          }
1796          else {
1797             unsigned i, j;
1798
1799             for( j = 0; j < NUM_CHANNELS; j++ ) {
1800                if( mask & (1 << j) ) {
1801                   for( i = first; i <= last; i++ ) {
1802                      eval( mach, i, j );
1803                   }
1804                }
1805             }
1806          }
1807       }
1808    }
1809 }
1810
1811 static void
1812 exec_instruction(
1813    struct tgsi_exec_machine *mach,
1814    const struct tgsi_full_instruction *inst,
1815    int *pc )
1816 {
1817    uint chan_index;
1818    union tgsi_exec_channel r[8];
1819
1820    (*pc)++;
1821
1822    switch (inst->Instruction.Opcode) {
1823    case TGSI_OPCODE_ARL:
1824    /* TGSI_OPCODE_FLOOR */
1825    /* TGSI_OPCODE_FLR */
1826       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1827          FETCH( &r[0], 0, chan_index );
1828          micro_flr( &r[0], &r[0] );
1829          STORE( &r[0], 0, chan_index );
1830       }
1831       break;
1832
1833    case TGSI_OPCODE_MOV:
1834    case TGSI_OPCODE_SWZ:
1835       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1836          FETCH( &r[0], 0, chan_index );
1837          STORE( &r[0], 0, chan_index );
1838       }
1839       break;
1840
1841    case TGSI_OPCODE_LIT:
1842       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1843          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1844       }
1845
1846       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1847          FETCH( &r[0], 0, CHAN_X );
1848          if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1849             micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1850             STORE( &r[0], 0, CHAN_Y );
1851          }
1852
1853          if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1854             FETCH( &r[1], 0, CHAN_Y );
1855             micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1856
1857             FETCH( &r[2], 0, CHAN_W );
1858             micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1859             micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1860             micro_pow( &r[1], &r[1], &r[2] );
1861             micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1862             STORE( &r[0], 0, CHAN_Z );
1863          }
1864       }
1865
1866       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1867          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1868       }
1869       break;
1870
1871    case TGSI_OPCODE_RCP:
1872    /* TGSI_OPCODE_RECIP */
1873       FETCH( &r[0], 0, CHAN_X );
1874       micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1875       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1876          STORE( &r[0], 0, chan_index );
1877       }
1878       break;
1879
1880    case TGSI_OPCODE_RSQ:
1881    /* TGSI_OPCODE_RECIPSQRT */
1882       FETCH( &r[0], 0, CHAN_X );
1883       micro_abs( &r[0], &r[0] );
1884       micro_sqrt( &r[0], &r[0] );
1885       micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1886       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1887          STORE( &r[0], 0, chan_index );
1888       }
1889       break;
1890
1891    case TGSI_OPCODE_EXP:
1892       FETCH( &r[0], 0, CHAN_X );
1893       micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
1894       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1895          micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
1896          STORE( &r[2], 0, CHAN_X );        /* store r2 */
1897       }
1898       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1899          micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1900          STORE( &r[2], 0, CHAN_Y );        /* store r2 */
1901       }
1902       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1903          micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
1904          STORE( &r[2], 0, CHAN_Z );        /* store r2 */
1905       }
1906       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1907          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1908       }
1909       break;
1910
1911    case TGSI_OPCODE_LOG:
1912       FETCH( &r[0], 0, CHAN_X );
1913       micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
1914       micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
1915       micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
1916       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1917          STORE( &r[0], 0, CHAN_X );
1918       }
1919       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1920          micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
1921          micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1922          STORE( &r[0], 0, CHAN_Y );
1923       }
1924       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1925          STORE( &r[1], 0, CHAN_Z );
1926       }
1927       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1928          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1929       }
1930       break;
1931
1932    case TGSI_OPCODE_MUL:
1933       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1934       {
1935          FETCH(&r[0], 0, chan_index);
1936          FETCH(&r[1], 1, chan_index);
1937
1938          micro_mul( &r[0], &r[0], &r[1] );
1939
1940          STORE(&r[0], 0, chan_index);
1941       }
1942       break;
1943
1944    case TGSI_OPCODE_ADD:
1945       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1946          FETCH( &r[0], 0, chan_index );
1947          FETCH( &r[1], 1, chan_index );
1948          micro_add( &r[0], &r[0], &r[1] );
1949          STORE( &r[0], 0, chan_index );
1950       }
1951       break;
1952
1953    case TGSI_OPCODE_DP3:
1954    /* TGSI_OPCODE_DOT3 */
1955       FETCH( &r[0], 0, CHAN_X );
1956       FETCH( &r[1], 1, CHAN_X );
1957       micro_mul( &r[0], &r[0], &r[1] );
1958
1959       FETCH( &r[1], 0, CHAN_Y );
1960       FETCH( &r[2], 1, CHAN_Y );
1961       micro_mul( &r[1], &r[1], &r[2] );
1962       micro_add( &r[0], &r[0], &r[1] );
1963
1964       FETCH( &r[1], 0, CHAN_Z );
1965       FETCH( &r[2], 1, CHAN_Z );
1966       micro_mul( &r[1], &r[1], &r[2] );
1967       micro_add( &r[0], &r[0], &r[1] );
1968
1969       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1970          STORE( &r[0], 0, chan_index );
1971       }
1972       break;
1973
1974     case TGSI_OPCODE_DP4:
1975     /* TGSI_OPCODE_DOT4 */
1976        FETCH(&r[0], 0, CHAN_X);
1977        FETCH(&r[1], 1, CHAN_X);
1978
1979        micro_mul( &r[0], &r[0], &r[1] );
1980
1981        FETCH(&r[1], 0, CHAN_Y);
1982        FETCH(&r[2], 1, CHAN_Y);
1983
1984        micro_mul( &r[1], &r[1], &r[2] );
1985        micro_add( &r[0], &r[0], &r[1] );
1986
1987        FETCH(&r[1], 0, CHAN_Z);
1988        FETCH(&r[2], 1, CHAN_Z);
1989
1990        micro_mul( &r[1], &r[1], &r[2] );
1991        micro_add( &r[0], &r[0], &r[1] );
1992
1993        FETCH(&r[1], 0, CHAN_W);
1994        FETCH(&r[2], 1, CHAN_W);
1995
1996        micro_mul( &r[1], &r[1], &r[2] );
1997        micro_add( &r[0], &r[0], &r[1] );
1998
1999       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2000          STORE( &r[0], 0, chan_index );
2001       }
2002       break;
2003
2004    case TGSI_OPCODE_DST:
2005       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2006          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2007       }
2008
2009       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2010          FETCH( &r[0], 0, CHAN_Y );
2011          FETCH( &r[1], 1, CHAN_Y);
2012          micro_mul( &r[0], &r[0], &r[1] );
2013          STORE( &r[0], 0, CHAN_Y );
2014       }
2015
2016       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2017          FETCH( &r[0], 0, CHAN_Z );
2018          STORE( &r[0], 0, CHAN_Z );
2019       }
2020
2021       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2022          FETCH( &r[0], 1, CHAN_W );
2023          STORE( &r[0], 0, CHAN_W );
2024       }
2025       break;
2026
2027    case TGSI_OPCODE_MIN:
2028       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2029          FETCH(&r[0], 0, chan_index);
2030          FETCH(&r[1], 1, chan_index);
2031
2032          /* XXX use micro_min()?? */
2033          micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2034
2035          STORE(&r[0], 0, chan_index);
2036       }
2037       break;
2038
2039    case TGSI_OPCODE_MAX:
2040       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2041          FETCH(&r[0], 0, chan_index);
2042          FETCH(&r[1], 1, chan_index);
2043
2044          /* XXX use micro_max()?? */
2045          micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2046
2047          STORE(&r[0], 0, chan_index );
2048       }
2049       break;
2050
2051    case TGSI_OPCODE_SLT:
2052    /* TGSI_OPCODE_SETLT */
2053       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2054          FETCH( &r[0], 0, chan_index );
2055          FETCH( &r[1], 1, chan_index );
2056          micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2057          STORE( &r[0], 0, chan_index );
2058       }
2059       break;
2060
2061    case TGSI_OPCODE_SGE:
2062    /* TGSI_OPCODE_SETGE */
2063       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2064          FETCH( &r[0], 0, chan_index );
2065          FETCH( &r[1], 1, chan_index );
2066          micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2067          STORE( &r[0], 0, chan_index );
2068       }
2069       break;
2070
2071    case TGSI_OPCODE_MAD:
2072    /* TGSI_OPCODE_MADD */
2073       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2074          FETCH( &r[0], 0, chan_index );
2075          FETCH( &r[1], 1, chan_index );
2076          micro_mul( &r[0], &r[0], &r[1] );
2077          FETCH( &r[1], 2, chan_index );
2078          micro_add( &r[0], &r[0], &r[1] );
2079          STORE( &r[0], 0, chan_index );
2080       }
2081       break;
2082
2083    case TGSI_OPCODE_SUB:
2084       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2085          FETCH(&r[0], 0, chan_index);
2086          FETCH(&r[1], 1, chan_index);
2087
2088          micro_sub( &r[0], &r[0], &r[1] );
2089
2090          STORE(&r[0], 0, chan_index);
2091       }
2092       break;
2093
2094    case TGSI_OPCODE_LERP:
2095    /* TGSI_OPCODE_LRP */
2096       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2097          FETCH(&r[0], 0, chan_index);
2098          FETCH(&r[1], 1, chan_index);
2099          FETCH(&r[2], 2, chan_index);
2100
2101          micro_sub( &r[1], &r[1], &r[2] );
2102          micro_mul( &r[0], &r[0], &r[1] );
2103          micro_add( &r[0], &r[0], &r[2] );
2104
2105          STORE(&r[0], 0, chan_index);
2106       }
2107       break;
2108
2109    case TGSI_OPCODE_CND:
2110       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2111          FETCH(&r[0], 0, chan_index);
2112          FETCH(&r[1], 1, chan_index);
2113          FETCH(&r[2], 2, chan_index);
2114          micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2115          STORE(&r[0], 0, chan_index);
2116       }
2117       break;
2118
2119    case TGSI_OPCODE_CND0:
2120       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2121          FETCH(&r[0], 0, chan_index);
2122          FETCH(&r[1], 1, chan_index);
2123          FETCH(&r[2], 2, chan_index);
2124          micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2125          STORE(&r[0], 0, chan_index);
2126       }
2127       break;
2128
2129    case TGSI_OPCODE_DOT2ADD:
2130    /* TGSI_OPCODE_DP2A */
2131       FETCH( &r[0], 0, CHAN_X );
2132       FETCH( &r[1], 1, CHAN_X );
2133       micro_mul( &r[0], &r[0], &r[1] );
2134
2135       FETCH( &r[1], 0, CHAN_Y );
2136       FETCH( &r[2], 1, CHAN_Y );
2137       micro_mul( &r[1], &r[1], &r[2] );
2138       micro_add( &r[0], &r[0], &r[1] );
2139
2140       FETCH( &r[2], 2, CHAN_X );
2141       micro_add( &r[0], &r[0], &r[2] );
2142
2143       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2144          STORE( &r[0], 0, chan_index );
2145       }
2146       break;
2147
2148    case TGSI_OPCODE_INDEX:
2149       /* XXX: considered for removal */
2150       assert (0);
2151       break;
2152
2153    case TGSI_OPCODE_NEGATE:
2154       /* XXX: considered for removal */
2155       assert (0);
2156       break;
2157
2158    case TGSI_OPCODE_FRAC:
2159    /* TGSI_OPCODE_FRC */
2160       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2161          FETCH( &r[0], 0, chan_index );
2162          micro_frc( &r[0], &r[0] );
2163          STORE( &r[0], 0, chan_index );
2164       }
2165       break;
2166
2167    case TGSI_OPCODE_CLAMP:
2168       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2169          FETCH(&r[0], 0, chan_index);
2170          FETCH(&r[1], 1, chan_index);
2171          micro_max(&r[0], &r[0], &r[1]);
2172          FETCH(&r[1], 2, chan_index);
2173          micro_min(&r[0], &r[0], &r[1]);
2174          STORE(&r[0], 0, chan_index);
2175       }
2176       break;
2177
2178    case TGSI_OPCODE_ROUND:
2179    case TGSI_OPCODE_ARR:
2180       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2181          FETCH( &r[0], 0, chan_index );
2182          micro_rnd( &r[0], &r[0] );
2183          STORE( &r[0], 0, chan_index );
2184       }
2185       break;
2186
2187    case TGSI_OPCODE_EXPBASE2:
2188    /* TGSI_OPCODE_EX2 */
2189       FETCH(&r[0], 0, CHAN_X);
2190
2191 #if FAST_MATH
2192       micro_exp2( &r[0], &r[0] );
2193 #else
2194       micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2195 #endif
2196
2197       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2198          STORE( &r[0], 0, chan_index );
2199       }
2200       break;
2201
2202    case TGSI_OPCODE_LOGBASE2:
2203    /* TGSI_OPCODE_LG2 */
2204       FETCH( &r[0], 0, CHAN_X );
2205       micro_lg2( &r[0], &r[0] );
2206       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2207          STORE( &r[0], 0, chan_index );
2208       }
2209       break;
2210
2211    case TGSI_OPCODE_POWER:
2212    /* TGSI_OPCODE_POW */
2213       FETCH(&r[0], 0, CHAN_X);
2214       FETCH(&r[1], 1, CHAN_X);
2215
2216       micro_pow( &r[0], &r[0], &r[1] );
2217
2218       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2219          STORE( &r[0], 0, chan_index );
2220       }
2221       break;
2222
2223    case TGSI_OPCODE_CROSSPRODUCT:
2224    /* TGSI_OPCODE_XPD */
2225       FETCH(&r[0], 0, CHAN_Y);
2226       FETCH(&r[1], 1, CHAN_Z);
2227
2228       micro_mul( &r[2], &r[0], &r[1] );
2229
2230       FETCH(&r[3], 0, CHAN_Z);
2231       FETCH(&r[4], 1, CHAN_Y);
2232
2233       micro_mul( &r[5], &r[3], &r[4] );
2234       micro_sub( &r[2], &r[2], &r[5] );
2235
2236       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2237          STORE( &r[2], 0, CHAN_X );
2238       }
2239
2240       FETCH(&r[2], 1, CHAN_X);
2241
2242       micro_mul( &r[3], &r[3], &r[2] );
2243
2244       FETCH(&r[5], 0, CHAN_X);
2245
2246       micro_mul( &r[1], &r[1], &r[5] );
2247       micro_sub( &r[3], &r[3], &r[1] );
2248
2249       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2250          STORE( &r[3], 0, CHAN_Y );
2251       }
2252
2253       micro_mul( &r[5], &r[5], &r[4] );
2254       micro_mul( &r[0], &r[0], &r[2] );
2255       micro_sub( &r[5], &r[5], &r[0] );
2256
2257       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2258          STORE( &r[5], 0, CHAN_Z );
2259       }
2260
2261       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2262          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2263       }
2264       break;
2265
2266     case TGSI_OPCODE_MULTIPLYMATRIX:
2267        /* XXX: considered for removal */
2268        assert (0);
2269        break;
2270
2271     case TGSI_OPCODE_ABS:
2272        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2273           FETCH(&r[0], 0, chan_index);
2274
2275           micro_abs( &r[0], &r[0] );
2276
2277           STORE(&r[0], 0, chan_index);
2278        }
2279        break;
2280
2281    case TGSI_OPCODE_RCC:
2282       assert (0);
2283       break;
2284
2285    case TGSI_OPCODE_DPH:
2286       FETCH(&r[0], 0, CHAN_X);
2287       FETCH(&r[1], 1, CHAN_X);
2288
2289       micro_mul( &r[0], &r[0], &r[1] );
2290
2291       FETCH(&r[1], 0, CHAN_Y);
2292       FETCH(&r[2], 1, CHAN_Y);
2293
2294       micro_mul( &r[1], &r[1], &r[2] );
2295       micro_add( &r[0], &r[0], &r[1] );
2296
2297       FETCH(&r[1], 0, CHAN_Z);
2298       FETCH(&r[2], 1, CHAN_Z);
2299
2300       micro_mul( &r[1], &r[1], &r[2] );
2301       micro_add( &r[0], &r[0], &r[1] );
2302
2303       FETCH(&r[1], 1, CHAN_W);
2304
2305       micro_add( &r[0], &r[0], &r[1] );
2306
2307       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2308          STORE( &r[0], 0, chan_index );
2309       }
2310       break;
2311
2312    case TGSI_OPCODE_COS:
2313       FETCH(&r[0], 0, CHAN_X);
2314
2315       micro_cos( &r[0], &r[0] );
2316
2317       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2318          STORE( &r[0], 0, chan_index );
2319       }
2320       break;
2321
2322    case TGSI_OPCODE_DDX:
2323       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2324          FETCH( &r[0], 0, chan_index );
2325          micro_ddx( &r[0], &r[0] );
2326          STORE( &r[0], 0, chan_index );
2327       }
2328       break;
2329
2330    case TGSI_OPCODE_DDY:
2331       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2332          FETCH( &r[0], 0, chan_index );
2333          micro_ddy( &r[0], &r[0] );
2334          STORE( &r[0], 0, chan_index );
2335       }
2336       break;
2337
2338    case TGSI_OPCODE_KILP:
2339       exec_kilp (mach, inst);
2340       break;
2341
2342    case TGSI_OPCODE_KIL:
2343       exec_kil (mach, inst);
2344       break;
2345
2346    case TGSI_OPCODE_PK2H:
2347       assert (0);
2348       break;
2349
2350    case TGSI_OPCODE_PK2US:
2351       assert (0);
2352       break;
2353
2354    case TGSI_OPCODE_PK4B:
2355       assert (0);
2356       break;
2357
2358    case TGSI_OPCODE_PK4UB:
2359       assert (0);
2360       break;
2361
2362    case TGSI_OPCODE_RFL:
2363       assert (0);
2364       break;
2365
2366    case TGSI_OPCODE_SEQ:
2367       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2368          FETCH( &r[0], 0, chan_index );
2369          FETCH( &r[1], 1, chan_index );
2370          micro_eq( &r[0], &r[0], &r[1],
2371                    &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2372                    &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2373          STORE( &r[0], 0, chan_index );
2374       }
2375       break;
2376
2377    case TGSI_OPCODE_SFL:
2378       assert (0);
2379       break;
2380
2381    case TGSI_OPCODE_SGT:
2382       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2383          FETCH( &r[0], 0, chan_index );
2384          FETCH( &r[1], 1, chan_index );
2385          micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2386          STORE( &r[0], 0, chan_index );
2387       }
2388       break;
2389
2390    case TGSI_OPCODE_SIN:
2391       FETCH( &r[0], 0, CHAN_X );
2392       micro_sin( &r[0], &r[0] );
2393       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2394          STORE( &r[0], 0, chan_index );
2395       }
2396       break;
2397
2398    case TGSI_OPCODE_SLE:
2399       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2400          FETCH( &r[0], 0, chan_index );
2401          FETCH( &r[1], 1, chan_index );
2402          micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2403          STORE( &r[0], 0, chan_index );
2404       }
2405       break;
2406
2407    case TGSI_OPCODE_SNE:
2408       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2409          FETCH( &r[0], 0, chan_index );
2410          FETCH( &r[1], 1, chan_index );
2411          micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2412          STORE( &r[0], 0, chan_index );
2413       }
2414       break;
2415
2416    case TGSI_OPCODE_STR:
2417       assert (0);
2418       break;
2419
2420    case TGSI_OPCODE_TEX:
2421       /* simple texture lookup */
2422       /* src[0] = texcoord */
2423       /* src[1] = sampler unit */
2424       exec_tex(mach, inst, FALSE, FALSE);
2425       break;
2426
2427    case TGSI_OPCODE_TXB:
2428       /* Texture lookup with lod bias */
2429       /* src[0] = texcoord (src[0].w = LOD bias) */
2430       /* src[1] = sampler unit */
2431       exec_tex(mach, inst, TRUE, FALSE);
2432       break;
2433
2434    case TGSI_OPCODE_TXD:
2435       /* Texture lookup with explict partial derivatives */
2436       /* src[0] = texcoord */
2437       /* src[1] = d[strq]/dx */
2438       /* src[2] = d[strq]/dy */
2439       /* src[3] = sampler unit */
2440       assert (0);
2441       break;
2442
2443    case TGSI_OPCODE_TXL:
2444       /* Texture lookup with explit LOD */
2445       /* src[0] = texcoord (src[0].w = LOD) */
2446       /* src[1] = sampler unit */
2447       exec_tex(mach, inst, TRUE, FALSE);
2448       break;
2449
2450    case TGSI_OPCODE_TXP:
2451       /* Texture lookup with projection */
2452       /* src[0] = texcoord (src[0].w = projection) */
2453       /* src[1] = sampler unit */
2454       exec_tex(mach, inst, FALSE, TRUE);
2455       break;
2456
2457    case TGSI_OPCODE_UP2H:
2458       assert (0);
2459       break;
2460
2461    case TGSI_OPCODE_UP2US:
2462       assert (0);
2463       break;
2464
2465    case TGSI_OPCODE_UP4B:
2466       assert (0);
2467       break;
2468
2469    case TGSI_OPCODE_UP4UB:
2470       assert (0);
2471       break;
2472
2473    case TGSI_OPCODE_X2D:
2474       assert (0);
2475       break;
2476
2477    case TGSI_OPCODE_ARA:
2478       assert (0);
2479       break;
2480
2481    case TGSI_OPCODE_BRA:
2482       assert (0);
2483       break;
2484
2485    case TGSI_OPCODE_CAL:
2486       /* skip the call if no execution channels are enabled */
2487       if (mach->ExecMask) {
2488          /* do the call */
2489
2490          /* push the Cond, Loop, Cont stacks */
2491          assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2492          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2493          assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2494          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2495          assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2496          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2497
2498          assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2499          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2500
2501          /* note that PC was already incremented above */
2502          mach->CallStack[mach->CallStackTop++] = *pc;
2503          *pc = inst->InstructionExtLabel.Label;
2504       }
2505       break;
2506
2507    case TGSI_OPCODE_RET:
2508       mach->FuncMask &= ~mach->ExecMask;
2509       UPDATE_EXEC_MASK(mach);
2510
2511       if (mach->FuncMask == 0x0) {
2512          /* really return now (otherwise, keep executing */
2513
2514          if (mach->CallStackTop == 0) {
2515             /* returning from main() */
2516             *pc = -1;
2517             return;
2518          }
2519          *pc = mach->CallStack[--mach->CallStackTop];
2520
2521          /* pop the Cond, Loop, Cont stacks */
2522          assert(mach->CondStackTop > 0);
2523          mach->CondMask = mach->CondStack[--mach->CondStackTop];
2524          assert(mach->LoopStackTop > 0);
2525          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2526          assert(mach->ContStackTop > 0);
2527          mach->ContMask = mach->ContStack[--mach->ContStackTop];
2528          assert(mach->FuncStackTop > 0);
2529          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2530
2531          UPDATE_EXEC_MASK(mach);
2532       }
2533       break;
2534
2535    case TGSI_OPCODE_SSG:
2536    /* TGSI_OPCODE_SGN */
2537       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2538          FETCH( &r[0], 0, chan_index );
2539          micro_sgn( &r[0], &r[0] );
2540          STORE( &r[0], 0, chan_index );
2541       }
2542       break;
2543
2544    case TGSI_OPCODE_CMP:
2545       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2546          FETCH(&r[0], 0, chan_index);
2547          FETCH(&r[1], 1, chan_index);
2548          FETCH(&r[2], 2, chan_index);
2549
2550          micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2551
2552          STORE(&r[0], 0, chan_index);
2553       }
2554       break;
2555
2556    case TGSI_OPCODE_SCS:
2557       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2558          FETCH( &r[0], 0, CHAN_X );
2559       }
2560       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2561          micro_cos( &r[1], &r[0] );
2562          STORE( &r[1], 0, CHAN_X );
2563       }
2564       if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2565          micro_sin( &r[1], &r[0] );
2566          STORE( &r[1], 0, CHAN_Y );
2567       }
2568       if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2569          STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2570       }
2571       if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2572          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2573       }
2574       break;
2575
2576    case TGSI_OPCODE_NRM:
2577       /* 3-component vector normalize */
2578       {
2579          union tgsi_exec_channel tmp, dot;
2580
2581          /* tmp = dp3(src0, src0): */
2582          FETCH( &r[0], 0, CHAN_X );
2583          micro_mul( &tmp, &r[0], &r[0] );
2584
2585          FETCH( &r[1], 0, CHAN_Y );
2586          micro_mul( &dot, &r[1], &r[1] );
2587          micro_add( &tmp, &tmp, &dot );
2588
2589          FETCH( &r[2], 0, CHAN_Z );
2590          micro_mul( &dot, &r[2], &r[2] );
2591          micro_add( &tmp, &tmp, &dot );
2592
2593          /* tmp = 1 / sqrt(tmp) */
2594          micro_sqrt( &tmp, &tmp );
2595          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2596
2597          /* note: w channel is undefined */
2598          FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2599             /* chan = chan * tmp */
2600             micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2601             STORE( &r[chan_index], 0, chan_index );
2602          }
2603       }
2604       break;
2605
2606    case TGSI_OPCODE_NRM4:
2607       /* 4-component vector normalize */
2608       {
2609          union tgsi_exec_channel tmp, dot;
2610
2611          /* tmp = dp4(src0, src0): */
2612          FETCH( &r[0], 0, CHAN_X );
2613          micro_mul( &tmp, &r[0], &r[0] );
2614
2615          FETCH( &r[1], 0, CHAN_Y );
2616          micro_mul( &dot, &r[1], &r[1] );
2617          micro_add( &tmp, &tmp, &dot );
2618
2619          FETCH( &r[2], 0, CHAN_Z );
2620          micro_mul( &dot, &r[2], &r[2] );
2621          micro_add( &tmp, &tmp, &dot );
2622
2623          FETCH( &r[3], 0, CHAN_W );
2624          micro_mul( &dot, &r[3], &r[3] );
2625          micro_add( &tmp, &tmp, &dot );
2626
2627          /* tmp = 1 / sqrt(tmp) */
2628          micro_sqrt( &tmp, &tmp );
2629          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2630
2631          FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2632             /* chan = chan * tmp */
2633             micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2634             STORE( &r[chan_index], 0, chan_index );
2635          }
2636       }
2637       break;
2638
2639    case TGSI_OPCODE_DIV:
2640       assert( 0 );
2641       break;
2642
2643    case TGSI_OPCODE_DP2:
2644       FETCH( &r[0], 0, CHAN_X );
2645       FETCH( &r[1], 1, CHAN_X );
2646       micro_mul( &r[0], &r[0], &r[1] );
2647
2648       FETCH( &r[1], 0, CHAN_Y );
2649       FETCH( &r[2], 1, CHAN_Y );
2650       micro_mul( &r[1], &r[1], &r[2] );
2651       micro_add( &r[0], &r[0], &r[1] );
2652
2653       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2654          STORE( &r[0], 0, chan_index );
2655       }
2656       break;
2657
2658    case TGSI_OPCODE_IF:
2659       /* push CondMask */
2660       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2661       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2662       FETCH( &r[0], 0, CHAN_X );
2663       /* update CondMask */
2664       if( ! r[0].u[0] ) {
2665          mach->CondMask &= ~0x1;
2666       }
2667       if( ! r[0].u[1] ) {
2668          mach->CondMask &= ~0x2;
2669       }
2670       if( ! r[0].u[2] ) {
2671          mach->CondMask &= ~0x4;
2672       }
2673       if( ! r[0].u[3] ) {
2674          mach->CondMask &= ~0x8;
2675       }
2676       UPDATE_EXEC_MASK(mach);
2677       /* Todo: If CondMask==0, jump to ELSE */
2678       break;
2679
2680    case TGSI_OPCODE_ELSE:
2681       /* invert CondMask wrt previous mask */
2682       {
2683          uint prevMask;
2684          assert(mach->CondStackTop > 0);
2685          prevMask = mach->CondStack[mach->CondStackTop - 1];
2686          mach->CondMask = ~mach->CondMask & prevMask;
2687          UPDATE_EXEC_MASK(mach);
2688          /* Todo: If CondMask==0, jump to ENDIF */
2689       }
2690       break;
2691
2692    case TGSI_OPCODE_ENDIF:
2693       /* pop CondMask */
2694       assert(mach->CondStackTop > 0);
2695       mach->CondMask = mach->CondStack[--mach->CondStackTop];
2696       UPDATE_EXEC_MASK(mach);
2697       break;
2698
2699    case TGSI_OPCODE_END:
2700       /* halt execution */
2701       *pc = -1;
2702       break;
2703
2704    case TGSI_OPCODE_REP:
2705       assert (0);
2706       break;
2707
2708    case TGSI_OPCODE_ENDREP:
2709        assert (0);
2710        break;
2711
2712    case TGSI_OPCODE_PUSHA:
2713       assert (0);
2714       break;
2715
2716    case TGSI_OPCODE_POPA:
2717       assert (0);
2718       break;
2719
2720    case TGSI_OPCODE_CEIL:
2721       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2722          FETCH( &r[0], 0, chan_index );
2723          micro_ceil( &r[0], &r[0] );
2724          STORE( &r[0], 0, chan_index );
2725       }
2726       break;
2727
2728    case TGSI_OPCODE_I2F:
2729       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2730          FETCH( &r[0], 0, chan_index );
2731          micro_i2f( &r[0], &r[0] );
2732          STORE( &r[0], 0, chan_index );
2733       }
2734       break;
2735
2736    case TGSI_OPCODE_NOT:
2737       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2738          FETCH( &r[0], 0, chan_index );
2739          micro_not( &r[0], &r[0] );
2740          STORE( &r[0], 0, chan_index );
2741       }
2742       break;
2743
2744    case TGSI_OPCODE_TRUNC:
2745       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2746          FETCH( &r[0], 0, chan_index );
2747          micro_trunc( &r[0], &r[0] );
2748          STORE( &r[0], 0, chan_index );
2749       }
2750       break;
2751
2752    case TGSI_OPCODE_SHL:
2753       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2754          FETCH( &r[0], 0, chan_index );
2755          FETCH( &r[1], 1, chan_index );
2756          micro_shl( &r[0], &r[0], &r[1] );
2757          STORE( &r[0], 0, chan_index );
2758       }
2759       break;
2760
2761    case TGSI_OPCODE_SHR:
2762       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2763          FETCH( &r[0], 0, chan_index );
2764          FETCH( &r[1], 1, chan_index );
2765          micro_ishr( &r[0], &r[0], &r[1] );
2766          STORE( &r[0], 0, chan_index );
2767       }
2768       break;
2769
2770    case TGSI_OPCODE_AND:
2771       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2772          FETCH( &r[0], 0, chan_index );
2773          FETCH( &r[1], 1, chan_index );
2774          micro_and( &r[0], &r[0], &r[1] );
2775          STORE( &r[0], 0, chan_index );
2776       }
2777       break;
2778
2779    case TGSI_OPCODE_OR:
2780       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2781          FETCH( &r[0], 0, chan_index );
2782          FETCH( &r[1], 1, chan_index );
2783          micro_or( &r[0], &r[0], &r[1] );
2784          STORE( &r[0], 0, chan_index );
2785       }
2786       break;
2787
2788    case TGSI_OPCODE_MOD:
2789       assert (0);
2790       break;
2791
2792    case TGSI_OPCODE_XOR:
2793       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2794          FETCH( &r[0], 0, chan_index );
2795          FETCH( &r[1], 1, chan_index );
2796          micro_xor( &r[0], &r[0], &r[1] );
2797          STORE( &r[0], 0, chan_index );
2798       }
2799       break;
2800
2801    case TGSI_OPCODE_SAD:
2802       assert (0);
2803       break;
2804
2805    case TGSI_OPCODE_TXF:
2806       assert (0);
2807       break;
2808
2809    case TGSI_OPCODE_TXQ:
2810       assert (0);
2811       break;
2812
2813    case TGSI_OPCODE_EMIT:
2814       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2815       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2816       break;
2817
2818    case TGSI_OPCODE_ENDPRIM:
2819       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2820       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2821       break;
2822
2823    case TGSI_OPCODE_LOOP:
2824       /* fall-through (for now) */
2825    case TGSI_OPCODE_BGNLOOP2:
2826       /* push LoopMask and ContMasks */
2827       assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2828       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2829       assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2830       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2831       break;
2832
2833    case TGSI_OPCODE_ENDLOOP:
2834       /* fall-through (for now at least) */
2835    case TGSI_OPCODE_ENDLOOP2:
2836       /* Restore ContMask, but don't pop */
2837       assert(mach->ContStackTop > 0);
2838       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2839       UPDATE_EXEC_MASK(mach);
2840       if (mach->ExecMask) {
2841          /* repeat loop: jump to instruction just past BGNLOOP */
2842          *pc = inst->InstructionExtLabel.Label + 1;
2843       }
2844       else {
2845          /* exit loop: pop LoopMask */
2846          assert(mach->LoopStackTop > 0);
2847          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2848          /* pop ContMask */
2849          assert(mach->ContStackTop > 0);
2850          mach->ContMask = mach->ContStack[--mach->ContStackTop];
2851       }
2852       UPDATE_EXEC_MASK(mach);
2853       break;
2854
2855    case TGSI_OPCODE_BRK:
2856       /* turn off loop channels for each enabled exec channel */
2857       mach->LoopMask &= ~mach->ExecMask;
2858       /* Todo: if mach->LoopMask == 0, jump to end of loop */
2859       UPDATE_EXEC_MASK(mach);
2860       break;
2861
2862    case TGSI_OPCODE_CONT:
2863       /* turn off cont channels for each enabled exec channel */
2864       mach->ContMask &= ~mach->ExecMask;
2865       /* Todo: if mach->LoopMask == 0, jump to end of loop */
2866       UPDATE_EXEC_MASK(mach);
2867       break;
2868
2869    case TGSI_OPCODE_BGNSUB:
2870       /* no-op */
2871       break;
2872
2873    case TGSI_OPCODE_ENDSUB:
2874       /* no-op */
2875       break;
2876
2877    case TGSI_OPCODE_NOISE1:
2878       assert( 0 );
2879       break;
2880
2881    case TGSI_OPCODE_NOISE2:
2882       assert( 0 );
2883       break;
2884
2885    case TGSI_OPCODE_NOISE3:
2886       assert( 0 );
2887       break;
2888
2889    case TGSI_OPCODE_NOISE4:
2890       assert( 0 );
2891       break;
2892
2893    case TGSI_OPCODE_NOP:
2894       break;
2895
2896    default:
2897       assert( 0 );
2898    }
2899 }
2900
2901
2902 /**
2903  * Run TGSI interpreter.
2904  * \return bitmask of "alive" quad components
2905  */
2906 uint
2907 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2908 {
2909    uint i;
2910    int pc = 0;
2911
2912    mach->CondMask = 0xf;
2913    mach->LoopMask = 0xf;
2914    mach->ContMask = 0xf;
2915    mach->FuncMask = 0xf;
2916    mach->ExecMask = 0xf;
2917
2918    mach->CondStackTop = 0; /* temporarily subvert this assertion */
2919    assert(mach->CondStackTop == 0);
2920    assert(mach->LoopStackTop == 0);
2921    assert(mach->ContStackTop == 0);
2922    assert(mach->CallStackTop == 0);
2923
2924    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2925    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2926
2927    if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2928       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2929       mach->Primitives[0] = 0;
2930    }
2931
2932    for (i = 0; i < QUAD_SIZE; i++) {
2933       mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2934          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2935          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2936          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2937          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2938    }
2939
2940    /* execute declarations (interpolants) */
2941    for (i = 0; i < mach->NumDeclarations; i++) {
2942       exec_declaration( mach, mach->Declarations+i );
2943    }
2944
2945    /* execute instructions, until pc is set to -1 */
2946    while (pc != -1) {
2947       assert(pc < (int) mach->NumInstructions);
2948       exec_instruction( mach, mach->Instructions + pc, &pc );
2949    }
2950
2951 #if 0
2952    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2953    if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2954       /*
2955        * Scale back depth component.
2956        */
2957       for (i = 0; i < 4; i++)
2958          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2959    }
2960 #endif
2961
2962    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2963 }