src/gallium/drivers/svga/svga_tgsi_insn.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_dump.h"
  29 #include "tgsi/tgsi_parse.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_math.h"
  32
  33 #include "svga_tgsi_emit.h"
  34 #include "svga_context.h"
  35
  36
  37 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  38 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  39
  40
  41
  42
  43 static unsigned
  44 translate_opcode(
  45    uint opcode )
  46 {
  47    switch (opcode) {
  48    case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
  49    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
  50    case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
  51    case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
  52    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
  53    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
  54    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
  55    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
  56    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
  57    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
  58    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
  59    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
  60    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
  61    case TGSI_OPCODE_NRM4:       return SVGA3DOP_NRM;
  62    default:
  63       debug_printf("Unkown opcode %u\n", opcode);
  64       assert( 0 );
  65       return SVGA3DOP_LAST_INST;
  66    }
  67 }
  68
  69
  70 static unsigned translate_file( unsigned file )
  71 {
  72    switch (file) {
  73    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
  74    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
  75    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
  76    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
  77    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
  78    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
  79    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
  80    default:
  81       assert( 0 );
  82       return SVGA3DREG_TEMP;
  83    }
  84 }
  85
  86
  87
  88
  89
  90
  91 static SVGA3dShaderDestToken
  92 translate_dst_register( struct svga_shader_emitter *emit,
  93                         const struct tgsi_full_instruction *insn,
  94                         unsigned idx )
  95 {
  96    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
  97    SVGA3dShaderDestToken dest;
  98
  99    switch (reg->Register.File) {
 100    case TGSI_FILE_OUTPUT:
 101       /* Output registers encode semantic information in their name.
 102        * Need to lookup a table built at decl time:
 103        */
 104       dest = emit->output_map[reg->Register.Index];
 105       break;
 106
 107    default:
 108       dest = dst_register( translate_file( reg->Register.File ),
 109                            reg->Register.Index );
 110       break;
 111    }
 112
 113    dest.mask = reg->Register.WriteMask;
 114    assert(dest.mask);
 115
 116    if (insn->Instruction.Saturate)
 117       dest.dstMod = SVGA3DDSTMOD_SATURATE;
 118
 119    return dest;
 120 }
 121
 122
 123 static struct src_register
 124 swizzle( struct src_register src,
 125          int x,
 126          int y,
 127          int z,
 128          int w )
 129 {
 130    x = (src.base.swizzle >> (x * 2)) & 0x3;
 131    y = (src.base.swizzle >> (y * 2)) & 0x3;
 132    z = (src.base.swizzle >> (z * 2)) & 0x3;
 133    w = (src.base.swizzle >> (w * 2)) & 0x3;
 134
 135    src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
 136
 137    return src;
 138 }
 139
 140 static struct src_register
 141 scalar( struct src_register src,
 142         int comp )
 143 {
 144    return swizzle( src, comp, comp, comp, comp );
 145 }
 146
 147 static INLINE boolean
 148 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 149 {
 150    int i;
 151
 152    for (i = 0; i < emit->num_arl_consts; ++i) {
 153       if (emit->arl_consts[i].arl_num == emit->current_arl)
 154          return TRUE;
 155    }
 156    return FALSE;
 157 }
 158
 159 static INLINE int
 160 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 161 {
 162    int i;
 163
 164    for (i = 0; i < emit->num_arl_consts; ++i) {
 165       if (emit->arl_consts[i].arl_num == emit->current_arl)
 166          return emit->arl_consts[i].number;
 167    }
 168    return 0;
 169 }
 170
 171 static struct src_register
 172 translate_src_register( const struct svga_shader_emitter *emit,
 173                         const struct tgsi_full_src_register *reg )
 174 {
 175    struct src_register src;
 176
 177    switch (reg->Register.File) {
 178    case TGSI_FILE_INPUT:
 179       /* Input registers are referred to by their semantic name rather
 180        * than by index.  Use the mapping build up from the decls:
 181        */
 182       src = emit->input_map[reg->Register.Index];
 183       break;
 184
 185    case TGSI_FILE_IMMEDIATE:
 186       /* Immediates are appended after TGSI constants in the D3D
 187        * constant buffer.
 188        */
 189       src = src_register( translate_file( reg->Register.File ),
 190                           reg->Register.Index +
 191                           emit->imm_start );
 192       break;
 193
 194    default:
 195       src = src_register( translate_file( reg->Register.File ),
 196                           reg->Register.Index );
 197
 198       break;
 199    }
 200
 201    /* Indirect addressing.
 202     */
 203    if (reg->Register.Indirect) {
 204       if (emit->unit == PIPE_SHADER_FRAGMENT) {
 205          /* Pixel shaders have only loop registers for relative
 206           * addressing into inputs. Ignore the redundant address
 207           * register, the contents of aL should be in sync with it.
 208           */
 209          if (reg->Register.File == TGSI_FILE_INPUT) {
 210             src.base.relAddr = 1;
 211             src.indirect = src_token(SVGA3DREG_LOOP, 0);
 212          }
 213       }
 214       else {
 215          /* Constant buffers only.
 216           */
 217          if (reg->Register.File == TGSI_FILE_CONSTANT) {
 218             /* we shift the offset towards the minimum */
 219             if (svga_arl_needs_adjustment( emit )) {
 220                src.base.num -= svga_arl_adjustment( emit );
 221             }
 222             src.base.relAddr = 1;
 223
 224             /* Not really sure what should go in the second token:
 225              */
 226             src.indirect = src_token( SVGA3DREG_ADDR,
 227                                       reg->Indirect.Index );
 228
 229             src.indirect.swizzle = SWIZZLE_XXXX;
 230          }
 231       }
 232    }
 233
 234    src = swizzle( src,
 235                   reg->Register.SwizzleX,
 236                   reg->Register.SwizzleY,
 237                   reg->Register.SwizzleZ,
 238                   reg->Register.SwizzleW );
 239
 240    /* src.mod isn't a bitfield, unfortunately:
 241     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
 242     */
 243    if (reg->Register.Absolute) {
 244       if (reg->Register.Negate)
 245          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 246       else
 247          src.base.srcMod = SVGA3DSRCMOD_ABS;
 248    }
 249    else {
 250       if (reg->Register.Negate)
 251          src.base.srcMod = SVGA3DSRCMOD_NEG;
 252       else
 253          src.base.srcMod = SVGA3DSRCMOD_NONE;
 254    }
 255
 256    return src;
 257 }
 258
 259
 260 /*
 261  * Get a temporary register, return -1 if none available
 262  */
 263 static INLINE SVGA3dShaderDestToken
 264 get_temp( struct svga_shader_emitter *emit )
 265 {
 266    int i = emit->nr_hw_temp + emit->internal_temp_count++;
 267
 268    return dst_register( SVGA3DREG_TEMP, i );
 269 }
 270
 271 /* Release a single temp.  Currently only effective if it was the last
 272  * allocated temp, otherwise release will be delayed until the next
 273  * call to reset_temp_regs().
 274  */
 275 static INLINE void
 276 release_temp( struct svga_shader_emitter *emit,
 277               SVGA3dShaderDestToken temp )
 278 {
 279    if (temp.num == emit->internal_temp_count - 1)
 280       emit->internal_temp_count--;
 281 }
 282
 283 static void reset_temp_regs( struct svga_shader_emitter *emit )
 284 {
 285    emit->internal_temp_count = 0;
 286 }
 287
 288
 289 /* Replace the src with the temporary specified in the dst, but copying
 290  * only the necessary channels, and preserving the original swizzle (which is
 291  * important given that several opcodes have constraints in the allowed
 292  * swizzles).
 293  */
 294 static boolean emit_repl( struct svga_shader_emitter *emit,
 295                           SVGA3dShaderDestToken dst,
 296                           struct src_register *src0)
 297 {
 298    unsigned src0_swizzle;
 299    unsigned chan;
 300
 301    assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
 302
 303    src0_swizzle = src0->base.swizzle;
 304
 305    dst.mask = 0;
 306    for (chan = 0; chan < 4; ++chan) {
 307       unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
 308       dst.mask |= 1 << swizzle;
 309    }
 310    assert(dst.mask);
 311
 312    src0->base.swizzle = SVGA3DSWIZZLE_NONE;
 313
 314    if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
 315       return FALSE;
 316
 317    *src0 = src( dst );
 318    src0->base.swizzle = src0_swizzle;
 319
 320    return TRUE;
 321 }
 322
 323
 324 static boolean submit_op0( struct svga_shader_emitter *emit,
 325                            SVGA3dShaderInstToken inst,
 326                            SVGA3dShaderDestToken dest )
 327 {
 328    return (emit_instruction( emit, inst ) &&
 329            emit_dst( emit, dest ));
 330 }
 331
 332 static boolean submit_op1( struct svga_shader_emitter *emit,
 333                            SVGA3dShaderInstToken inst,
 334                            SVGA3dShaderDestToken dest,
 335                            struct src_register src0 )
 336 {
 337    return emit_op1( emit, inst, dest, src0 );
 338 }
 339
 340
 341 /* SVGA shaders may not refer to >1 constant register in a single
 342  * instruction.  This function checks for that usage and inserts a
 343  * move to temporary if detected.
 344  *
 345  * The same applies to input registers -- at most a single input
 346  * register may be read by any instruction.
 347  */
 348 static boolean submit_op2( struct svga_shader_emitter *emit,
 349                            SVGA3dShaderInstToken inst,
 350                            SVGA3dShaderDestToken dest,
 351                            struct src_register src0,
 352                            struct src_register src1 )
 353 {
 354    SVGA3dShaderDestToken temp;
 355    SVGA3dShaderRegType type0, type1;
 356    boolean need_temp = FALSE;
 357
 358    temp.value = 0;
 359    type0 = SVGA3dShaderGetRegType( src0.base.value );
 360    type1 = SVGA3dShaderGetRegType( src1.base.value );
 361
 362    if (type0 == SVGA3DREG_CONST &&
 363        type1 == SVGA3DREG_CONST &&
 364        src0.base.num != src1.base.num)
 365       need_temp = TRUE;
 366
 367    if (type0 == SVGA3DREG_INPUT &&
 368        type1 == SVGA3DREG_INPUT &&
 369        src0.base.num != src1.base.num)
 370       need_temp = TRUE;
 371
 372    if (need_temp) {
 373       temp = get_temp( emit );
 374
 375       if (!emit_repl( emit, temp, &src0 ))
 376          return FALSE;
 377    }
 378
 379    if (!emit_op2( emit, inst, dest, src0, src1 ))
 380       return FALSE;
 381
 382    if (need_temp)
 383       release_temp( emit, temp );
 384
 385    return TRUE;
 386 }
 387
 388
 389 /* SVGA shaders may not refer to >1 constant register in a single
 390  * instruction.  This function checks for that usage and inserts a
 391  * move to temporary if detected.
 392  */
 393 static boolean submit_op3( struct svga_shader_emitter *emit,
 394                            SVGA3dShaderInstToken inst,
 395                            SVGA3dShaderDestToken dest,
 396                            struct src_register src0,
 397                            struct src_register src1,
 398                            struct src_register src2 )
 399 {
 400    SVGA3dShaderDestToken temp0;
 401    SVGA3dShaderDestToken temp1;
 402    boolean need_temp0 = FALSE;
 403    boolean need_temp1 = FALSE;
 404    SVGA3dShaderRegType type0, type1, type2;
 405
 406    temp0.value = 0;
 407    temp1.value = 0;
 408    type0 = SVGA3dShaderGetRegType( src0.base.value );
 409    type1 = SVGA3dShaderGetRegType( src1.base.value );
 410    type2 = SVGA3dShaderGetRegType( src2.base.value );
 411
 412    if (inst.op != SVGA3DOP_SINCOS) {
 413       if (type0 == SVGA3DREG_CONST &&
 414           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
 415            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 416          need_temp0 = TRUE;
 417
 418       if (type1 == SVGA3DREG_CONST &&
 419           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
 420          need_temp1 = TRUE;
 421    }
 422
 423    if (type0 == SVGA3DREG_INPUT &&
 424        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
 425         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 426       need_temp0 = TRUE;
 427
 428    if (type1 == SVGA3DREG_INPUT &&
 429        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
 430       need_temp1 = TRUE;
 431
 432    if (need_temp0) {
 433       temp0 = get_temp( emit );
 434
 435       if (!emit_repl( emit, temp0, &src0 ))
 436          return FALSE;
 437    }
 438
 439    if (need_temp1) {
 440       temp1 = get_temp( emit );
 441
 442       if (!emit_repl( emit, temp1, &src1 ))
 443          return FALSE;
 444    }
 445
 446    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
 447       return FALSE;
 448
 449    if (need_temp1)
 450       release_temp( emit, temp1 );
 451    if (need_temp0)
 452       release_temp( emit, temp0 );
 453    return TRUE;
 454 }
 455
 456
 457
 458
 459 /* SVGA shaders may not refer to >1 constant register in a single
 460  * instruction.  This function checks for that usage and inserts a
 461  * move to temporary if detected.
 462  */
 463 static boolean submit_op4( struct svga_shader_emitter *emit,
 464                            SVGA3dShaderInstToken inst,
 465                            SVGA3dShaderDestToken dest,
 466                            struct src_register src0,
 467                            struct src_register src1,
 468                            struct src_register src2,
 469                            struct src_register src3)
 470 {
 471    SVGA3dShaderDestToken temp0;
 472    SVGA3dShaderDestToken temp3;
 473    boolean need_temp0 = FALSE;
 474    boolean need_temp3 = FALSE;
 475    SVGA3dShaderRegType type0, type1, type2, type3;
 476
 477    temp0.value = 0;
 478    temp3.value = 0;
 479    type0 = SVGA3dShaderGetRegType( src0.base.value );
 480    type1 = SVGA3dShaderGetRegType( src1.base.value );
 481    type2 = SVGA3dShaderGetRegType( src2.base.value );
 482    type3 = SVGA3dShaderGetRegType( src2.base.value );
 483
 484    /* Make life a little easier - this is only used by the TXD
 485     * instruction which is guaranteed not to have a constant/input reg
 486     * in one slot at least:
 487     */
 488    assert(type1 == SVGA3DREG_SAMPLER);
 489
 490    if (type0 == SVGA3DREG_CONST &&
 491        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
 492         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 493       need_temp0 = TRUE;
 494
 495    if (type3 == SVGA3DREG_CONST &&
 496        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
 497       need_temp3 = TRUE;
 498
 499    if (type0 == SVGA3DREG_INPUT &&
 500        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
 501         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 502       need_temp0 = TRUE;
 503
 504    if (type3 == SVGA3DREG_INPUT &&
 505        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
 506       need_temp3 = TRUE;
 507
 508    if (need_temp0) {
 509       temp0 = get_temp( emit );
 510
 511       if (!emit_repl( emit, temp0, &src0 ))
 512          return FALSE;
 513    }
 514
 515    if (need_temp3) {
 516       temp3 = get_temp( emit );
 517
 518       if (!emit_repl( emit, temp3, &src3 ))
 519          return FALSE;
 520    }
 521
 522    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
 523       return FALSE;
 524
 525    if (need_temp3)
 526       release_temp( emit, temp3 );
 527    if (need_temp0)
 528       release_temp( emit, temp0 );
 529    return TRUE;
 530 }
 531
 532
 533 static boolean alias_src_dst( struct src_register src,
 534                               SVGA3dShaderDestToken dst )
 535 {
 536    if (src.base.num != dst.num)
 537       return FALSE;
 538
 539    if (SVGA3dShaderGetRegType(dst.value) !=
 540        SVGA3dShaderGetRegType(src.base.value))
 541       return FALSE;
 542
 543    return TRUE;
 544 }
 545
 546
 547 static boolean submit_lrp(struct svga_shader_emitter *emit,
 548                           SVGA3dShaderDestToken dst,
 549                           struct src_register src0,
 550                           struct src_register src1,
 551                           struct src_register src2)
 552 {
 553    SVGA3dShaderDestToken tmp;
 554    boolean need_dst_tmp = FALSE;
 555
 556    /* The dst reg must be a temporary, and not be the same as src0 or src2 */
 557    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
 558        alias_src_dst(src0, dst) ||
 559        alias_src_dst(src2, dst))
 560       need_dst_tmp = TRUE;
 561
 562    if (need_dst_tmp) {
 563       tmp = get_temp( emit );
 564       tmp.mask = dst.mask;
 565    }
 566    else {
 567       tmp = dst;
 568    }
 569
 570    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
 571       return FALSE;
 572
 573    if (need_dst_tmp) {
 574       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
 575          return FALSE;
 576    }
 577
 578    return TRUE;
 579 }
 580
 581
 582 static boolean emit_def_const( struct svga_shader_emitter *emit,
 583                                SVGA3dShaderConstType type,
 584                                unsigned idx,
 585                                float a,
 586                                float b,
 587                                float c,
 588                                float d )
 589 {
 590    SVGA3DOpDefArgs def;
 591    SVGA3dShaderInstToken opcode;
 592
 593    switch (type) {
 594    case SVGA3D_CONST_TYPE_FLOAT:
 595       opcode = inst_token( SVGA3DOP_DEF );
 596       def.dst = dst_register( SVGA3DREG_CONST, idx );
 597       def.constValues[0] = a;
 598       def.constValues[1] = b;
 599       def.constValues[2] = c;
 600       def.constValues[3] = d;
 601       break;
 602    case SVGA3D_CONST_TYPE_INT:
 603       opcode = inst_token( SVGA3DOP_DEFI );
 604       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
 605       def.constIValues[0] = (int)a;
 606       def.constIValues[1] = (int)b;
 607       def.constIValues[2] = (int)c;
 608       def.constIValues[3] = (int)d;
 609       break;
 610    default:
 611       assert(0);
 612       opcode = inst_token( SVGA3DOP_NOP );
 613       break;
 614    }
 615
 616    if (!emit_instruction(emit, opcode) ||
 617        !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
 618       return FALSE;
 619
 620    return TRUE;
 621 }
 622
 623 static INLINE boolean
 624 create_zero_immediate( struct svga_shader_emitter *emit )
 625 {
 626    unsigned idx = emit->nr_hw_float_const++;
 627
 628    /* Emit the constant (0, 0, -1, 1) and use swizzling to generate
 629     * other useful vectors.
 630     */
 631    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 632                         idx, 0, 0, -1, 1 ))
 633       return FALSE;
 634
 635    emit->zero_immediate_idx = idx;
 636    emit->created_zero_immediate = TRUE;
 637
 638    return TRUE;
 639 }
 640
 641 static INLINE boolean
 642 create_loop_const( struct svga_shader_emitter *emit )
 643 {
 644    unsigned idx = emit->nr_hw_int_const++;
 645
 646    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
 647                         255, /* iteration count */
 648                         0, /* initial value */
 649                         1, /* step size */
 650                         0 /* not used, must be 0 */))
 651       return FALSE;
 652
 653    emit->loop_const_idx = idx;
 654    emit->created_loop_const = TRUE;
 655
 656    return TRUE;
 657 }
 658
 659 static INLINE boolean
 660 create_sincos_consts( struct svga_shader_emitter *emit )
 661 {
 662    unsigned idx = emit->nr_hw_float_const++;
 663
 664    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 665                         -1.5500992e-006f,
 666                         -2.1701389e-005f,
 667                         0.0026041667f,
 668                         0.00026041668f ))
 669       return FALSE;
 670
 671    emit->sincos_consts_idx = idx;
 672    idx = emit->nr_hw_float_const++;
 673
 674    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 675                         -0.020833334f,
 676                         -0.12500000f,
 677                         1.0f,
 678                         0.50000000f ))
 679       return FALSE;
 680
 681    emit->created_sincos_consts = TRUE;
 682
 683    return TRUE;
 684 }
 685
 686 static INLINE boolean
 687 create_arl_consts( struct svga_shader_emitter *emit )
 688 {
 689    int i;
 690
 691    for (i = 0; i < emit->num_arl_consts; i += 4) {
 692       int j;
 693       unsigned idx = emit->nr_hw_float_const++;
 694       float vals[4];
 695       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
 696          vals[j] = emit->arl_consts[i + j].number;
 697          emit->arl_consts[i + j].idx = idx;
 698          switch (j) {
 699          case 0:
 700             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
 701             break;
 702          case 1:
 703             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
 704             break;
 705          case 2:
 706             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
 707             break;
 708          case 3:
 709             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
 710             break;
 711          }
 712       }
 713       while (j < 4)
 714          vals[j++] = 0;
 715
 716       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 717                            vals[0], vals[1],
 718                            vals[2], vals[3]))
 719          return FALSE;
 720    }
 721
 722    return TRUE;
 723 }
 724
 725 static INLINE struct src_register
 726 get_vface( struct svga_shader_emitter *emit )
 727 {
 728    assert(emit->emitted_vface);
 729    return src_register(SVGA3DREG_MISCTYPE,
 730                        SVGA3DMISCREG_FACE);
 731 }
 732
 733 /* returns {0, 0, 0, 1} immediate */
 734 static INLINE struct src_register
 735 get_zero_immediate( struct svga_shader_emitter *emit )
 736 {
 737    assert(emit->created_zero_immediate);
 738    assert(emit->zero_immediate_idx >= 0);
 739    return swizzle(src_register( SVGA3DREG_CONST,
 740                                 emit->zero_immediate_idx),
 741                   0, 0, 0, 3);
 742 }
 743
 744 /* returns {1, 1, 1, -1} immediate */
 745 static INLINE struct src_register
 746 get_pos_neg_one_immediate( struct svga_shader_emitter *emit )
 747 {
 748    assert(emit->created_zero_immediate);
 749    assert(emit->zero_immediate_idx >= 0);
 750    return swizzle(src_register( SVGA3DREG_CONST,
 751                                 emit->zero_immediate_idx),
 752                   3, 3, 3, 2);
 753 }
 754
 755 /* returns the loop const */
 756 static INLINE struct src_register
 757 get_loop_const( struct svga_shader_emitter *emit )
 758 {
 759    assert(emit->created_loop_const);
 760    assert(emit->loop_const_idx >= 0);
 761    return src_register( SVGA3DREG_CONSTINT,
 762                         emit->loop_const_idx );
 763 }
 764
 765 /* returns a sincos const */
 766 static INLINE struct src_register
 767 get_sincos_const( struct svga_shader_emitter *emit,
 768                   unsigned index )
 769 {
 770    assert(emit->created_sincos_consts);
 771    assert(emit->sincos_consts_idx >= 0);
 772    assert(index == 0 || index == 1);
 773    return src_register( SVGA3DREG_CONST,
 774                         emit->sincos_consts_idx + index );
 775 }
 776
 777 static INLINE struct src_register
 778 get_fake_arl_const( struct svga_shader_emitter *emit )
 779 {
 780    struct src_register reg;
 781    int idx = 0, swizzle = 0, i;
 782
 783    for (i = 0; i < emit->num_arl_consts; ++ i) {
 784       if (emit->arl_consts[i].arl_num == emit->current_arl) {
 785          idx = emit->arl_consts[i].idx;
 786          swizzle = emit->arl_consts[i].swizzle;
 787       }
 788    }
 789
 790    reg = src_register( SVGA3DREG_CONST, idx );
 791    return scalar(reg, swizzle);
 792 }
 793
 794 static INLINE struct src_register
 795 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
 796 {
 797    int idx;
 798    struct src_register reg;
 799
 800    /* the width/height indexes start right after constants */
 801    idx = emit->key.fkey.tex[sampler_num].width_height_idx +
 802          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
 803
 804    reg = src_register( SVGA3DREG_CONST, idx );
 805    return reg;
 806 }
 807
 808 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
 809                              const struct tgsi_full_instruction *insn)
 810 {
 811    const struct src_register src0 = translate_src_register(
 812       emit, &insn->Src[0] );
 813    struct src_register src1 = get_fake_arl_const( emit );
 814    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 815    SVGA3dShaderDestToken tmp = get_temp( emit );
 816
 817    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
 818       return FALSE;
 819
 820    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
 821                     src1))
 822       return FALSE;
 823
 824    /* replicate the original swizzle */
 825    src1 = src(tmp);
 826    src1.base.swizzle = src0.base.swizzle;
 827
 828    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
 829                       dst, src1 );
 830 }
 831
 832 static boolean emit_if(struct svga_shader_emitter *emit,
 833                        const struct tgsi_full_instruction *insn)
 834 {
 835    struct src_register src0 = translate_src_register(
 836       emit, &insn->Src[0] );
 837    struct src_register zero = get_zero_immediate( emit );
 838    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
 839
 840    if_token.control = SVGA3DOPCOMPC_NE;
 841    zero = scalar(zero, TGSI_SWIZZLE_X);
 842
 843    if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
 844       /*
 845        * Max different constant registers readable per IFC instruction is 1.
 846        */
 847
 848       SVGA3dShaderDestToken tmp = get_temp( emit );
 849
 850       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
 851          return FALSE;
 852
 853       src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
 854    }
 855
 856    emit->dynamic_branching_level++;
 857
 858    return (emit_instruction( emit, if_token ) &&
 859            emit_src( emit, src0 ) &&
 860            emit_src( emit, zero ) );
 861 }
 862
 863 static boolean emit_endif(struct svga_shader_emitter *emit,
 864                        const struct tgsi_full_instruction *insn)
 865 {
 866    emit->dynamic_branching_level--;
 867
 868    return (emit_instruction( emit,
 869                              inst_token( SVGA3DOP_ENDIF )));
 870 }
 871
 872 static boolean emit_else(struct svga_shader_emitter *emit,
 873                          const struct tgsi_full_instruction *insn)
 874 {
 875    return (emit_instruction( emit,
 876                              inst_token( SVGA3DOP_ELSE )));
 877 }
 878
 879 /* Translate the following TGSI FLR instruction.
 880  *    FLR  DST, SRC
 881  * To the following SVGA3D instruction sequence.
 882  *    FRC  TMP, SRC
 883  *    SUB  DST, SRC, TMP
 884  */
 885 static boolean emit_floor(struct svga_shader_emitter *emit,
 886                           const struct tgsi_full_instruction *insn )
 887 {
 888    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 889    const struct src_register src0 = translate_src_register(
 890       emit, &insn->Src[0] );
 891    SVGA3dShaderDestToken temp = get_temp( emit );
 892
 893    /* FRC  TMP, SRC */
 894    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
 895       return FALSE;
 896
 897    /* SUB  DST, SRC, TMP */
 898    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
 899                     negate( src( temp ) ) ))
 900       return FALSE;
 901
 902    return TRUE;
 903 }
 904
 905
 906 /* Translate the following TGSI CMP instruction.
 907  *    CMP  DST, SRC0, SRC1, SRC2
 908  * To the following SVGA3D instruction sequence.
 909  *    CMP  DST, SRC0, SRC2, SRC1
 910  */
 911 static boolean emit_cmp(struct svga_shader_emitter *emit,
 912                           const struct tgsi_full_instruction *insn )
 913 {
 914    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 915    const struct src_register src0 = translate_src_register(
 916       emit, &insn->Src[0] );
 917    const struct src_register src1 = translate_src_register(
 918       emit, &insn->Src[1] );
 919    const struct src_register src2 = translate_src_register(
 920       emit, &insn->Src[2] );
 921
 922    if (emit->unit == PIPE_SHADER_VERTEX) {
 923       SVGA3dShaderDestToken temp = get_temp(emit);
 924       struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
 925
 926       /* Since vertex shaders don't support the CMP instruction,
 927        * simulate it with SLT and LRP instructions.
 928        *    SLT  TMP, SRC0, 0.0
 929        *    LRP  DST, TMP, SRC1, SRC2
 930        */
 931       if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
 932          return FALSE;
 933       return submit_lrp(emit, dst, src(temp), src1, src2);
 934    }
 935
 936    /* CMP  DST, SRC0, SRC2, SRC1 */
 937    return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
 938 }
 939
 940
 941
 942 /* Translate the following TGSI DIV instruction.
 943  *    DIV  DST.xy, SRC0, SRC1
 944  * To the following SVGA3D instruction sequence.
 945  *    RCP  TMP.x, SRC1.xxxx
 946  *    RCP  TMP.y, SRC1.yyyy
 947  *    MUL  DST.xy, SRC0, TMP
 948  */
 949 static boolean emit_div(struct svga_shader_emitter *emit,
 950                         const struct tgsi_full_instruction *insn )
 951 {
 952    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 953    const struct src_register src0 = translate_src_register(
 954       emit, &insn->Src[0] );
 955    const struct src_register src1 = translate_src_register(
 956       emit, &insn->Src[1] );
 957    SVGA3dShaderDestToken temp = get_temp( emit );
 958    int i;
 959
 960    /* For each enabled element, perform a RCP instruction.  Note that
 961     * RCP is scalar in SVGA3D:
 962     */
 963    for (i = 0; i < 4; i++) {
 964       unsigned channel = 1 << i;
 965       if (dst.mask & channel) {
 966          /* RCP  TMP.?, SRC1.???? */
 967          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
 968                           writemask(temp, channel),
 969                           scalar(src1, i) ))
 970             return FALSE;
 971       }
 972    }
 973
 974    /* Then multiply them out with a single mul:
 975     *
 976     * MUL  DST, SRC0, TMP
 977     */
 978    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
 979                     src( temp ) ))
 980       return FALSE;
 981
 982    return TRUE;
 983 }
 984
 985 /* Translate the following TGSI DP2 instruction.
 986  *    DP2  DST, SRC1, SRC2
 987  * To the following SVGA3D instruction sequence.
 988  *    MUL  TMP, SRC1, SRC2
 989  *    ADD  DST, TMP.xxxx, TMP.yyyy
 990  */
 991 static boolean emit_dp2(struct svga_shader_emitter *emit,
 992                         const struct tgsi_full_instruction *insn )
 993 {
 994    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 995    const struct src_register src0 = translate_src_register(
 996       emit, &insn->Src[0] );
 997    const struct src_register src1 = translate_src_register(
 998       emit, &insn->Src[1] );
 999    SVGA3dShaderDestToken temp = get_temp( emit );
1000    struct src_register temp_src0, temp_src1;
1001
1002    /* MUL  TMP, SRC1, SRC2 */
1003    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1004       return FALSE;
1005
1006    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1007    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1008
1009    /* ADD  DST, TMP.xxxx, TMP.yyyy */
1010    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1011                     temp_src0, temp_src1 ))
1012       return FALSE;
1013
1014    return TRUE;
1015 }
1016
1017
1018 /* Translate the following TGSI DPH instruction.
1019  *    DPH  DST, SRC1, SRC2
1020  * To the following SVGA3D instruction sequence.
1021  *    DP3  TMP, SRC1, SRC2
1022  *    ADD  DST, TMP, SRC2.wwww
1023  */
1024 static boolean emit_dph(struct svga_shader_emitter *emit,
1025                         const struct tgsi_full_instruction *insn )
1026 {
1027    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1028    const struct src_register src0 = translate_src_register(
1029       emit, &insn->Src[0] );
1030    struct src_register src1 = translate_src_register(
1031       emit, &insn->Src[1] );
1032    SVGA3dShaderDestToken temp = get_temp( emit );
1033
1034    /* DP3  TMP, SRC1, SRC2 */
1035    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1036       return FALSE;
1037
1038    src1 = scalar(src1, TGSI_SWIZZLE_W);
1039
1040    /* ADD  DST, TMP, SRC2.wwww */
1041    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1042                     src( temp ), src1 ))
1043       return FALSE;
1044
1045    return TRUE;
1046 }
1047
1048 /* Translate the following TGSI DST instruction.
1049  *    NRM  DST, SRC
1050  * To the following SVGA3D instruction sequence.
1051  *    DP3  TMP, SRC, SRC
1052  *    RSQ  TMP, TMP
1053  *    MUL  DST, SRC, TMP
1054  */
1055 static boolean emit_nrm(struct svga_shader_emitter *emit,
1056                         const struct tgsi_full_instruction *insn )
1057 {
1058    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1059    const struct src_register src0 = translate_src_register(
1060       emit, &insn->Src[0] );
1061    SVGA3dShaderDestToken temp = get_temp( emit );
1062
1063    /* DP3  TMP, SRC, SRC */
1064    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
1065       return FALSE;
1066
1067    /* RSQ  TMP, TMP */
1068    if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
1069       return FALSE;
1070
1071    /* MUL  DST, SRC, TMP */
1072    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
1073                     src0, src( temp )))
1074       return FALSE;
1075
1076    return TRUE;
1077
1078 }
1079
1080 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
1081                               SVGA3dShaderDestToken dst,
1082                               struct src_register src0)
1083 {
1084    src0 = scalar(src0, TGSI_SWIZZLE_X);
1085
1086    if (emit->use_sm30) {
1087       return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
1088                          dst, src0 );
1089    } else {
1090       struct src_register const1 = get_sincos_const( emit, 0 );
1091       struct src_register const2 = get_sincos_const( emit, 1 );
1092
1093       return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
1094                          dst, src0, const1, const2 );
1095    }
1096 }
1097
1098 static boolean emit_sincos(struct svga_shader_emitter *emit,
1099                            const struct tgsi_full_instruction *insn)
1100 {
1101    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1102    struct src_register src0 = translate_src_register(
1103       emit, &insn->Src[0] );
1104    SVGA3dShaderDestToken temp = get_temp( emit );
1105
1106    /* SCS TMP SRC */
1107    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1108       return FALSE;
1109
1110    /* MOV DST TMP */
1111    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1112       return FALSE;
1113
1114    return TRUE;
1115 }
1116
1117 /*
1118  * SCS TMP SRC
1119  * MOV DST TMP.yyyy
1120  */
1121 static boolean emit_sin(struct svga_shader_emitter *emit,
1122                         const struct tgsi_full_instruction *insn )
1123 {
1124    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1125    struct src_register src0 = translate_src_register(
1126       emit, &insn->Src[0] );
1127    SVGA3dShaderDestToken temp = get_temp( emit );
1128
1129    /* SCS TMP SRC */
1130    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1131       return FALSE;
1132
1133    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1134
1135    /* MOV DST TMP.yyyy */
1136    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1137       return FALSE;
1138
1139    return TRUE;
1140 }
1141
1142 /*
1143  * SCS TMP SRC
1144  * MOV DST TMP.xxxx
1145  */
1146 static boolean emit_cos(struct svga_shader_emitter *emit,
1147                         const struct tgsi_full_instruction *insn )
1148 {
1149    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1150    struct src_register src0 = translate_src_register(
1151       emit, &insn->Src[0] );
1152    SVGA3dShaderDestToken temp = get_temp( emit );
1153
1154    /* SCS TMP SRC */
1155    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1156       return FALSE;
1157
1158    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1159
1160    /* MOV DST TMP.xxxx */
1161    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1162       return FALSE;
1163
1164    return TRUE;
1165 }
1166
1167 static boolean emit_ssg(struct svga_shader_emitter *emit,
1168                         const struct tgsi_full_instruction *insn )
1169 {
1170    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1171    struct src_register src0 = translate_src_register(
1172       emit, &insn->Src[0] );
1173    SVGA3dShaderDestToken temp0 = get_temp( emit );
1174    SVGA3dShaderDestToken temp1 = get_temp( emit );
1175    struct src_register zero, one;
1176
1177    if (emit->unit == PIPE_SHADER_VERTEX) {
1178       /* SGN  DST, SRC0, TMP0, TMP1 */
1179       return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1180                          src( temp0 ), src( temp1 ) );
1181    }
1182
1183    zero = get_zero_immediate( emit );
1184    one = scalar( zero, TGSI_SWIZZLE_W );
1185    zero = scalar( zero, TGSI_SWIZZLE_X );
1186
1187    /* CMP  TMP0, SRC0, one, zero */
1188    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1189                     writemask( temp0, dst.mask ), src0, one, zero ))
1190       return FALSE;
1191
1192    /* CMP  TMP1, negate(SRC0), negate(one), zero */
1193    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1194                     writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1195                     zero ))
1196       return FALSE;
1197
1198    /* ADD  DST, TMP0, TMP1 */
1199    return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1200                       src( temp1 ) );
1201 }
1202
1203 /*
1204  * ADD DST SRC0, negate(SRC0)
1205  */
1206 static boolean emit_sub(struct svga_shader_emitter *emit,
1207                         const struct tgsi_full_instruction *insn)
1208 {
1209    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1210    struct src_register src0 = translate_src_register(
1211       emit, &insn->Src[0] );
1212    struct src_register src1 = translate_src_register(
1213       emit, &insn->Src[1] );
1214
1215    src1 = negate(src1);
1216
1217    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1218                     src0, src1 ))
1219       return FALSE;
1220
1221    return TRUE;
1222 }
1223
1224
1225 static boolean emit_kil(struct svga_shader_emitter *emit,
1226                         const struct tgsi_full_instruction *insn )
1227 {
1228    const struct tgsi_full_src_register *reg = &insn->Src[0];
1229    struct src_register src0, srcIn;
1230    /* is the W component tested in another position? */
1231    const boolean w_tested = (reg->Register.SwizzleW == reg->Register.SwizzleX ||
1232                              reg->Register.SwizzleW == reg->Register.SwizzleY ||
1233                              reg->Register.SwizzleW == reg->Register.SwizzleZ);
1234    const boolean special = (reg->Register.Absolute ||
1235                             reg->Register.Negate ||
1236                             reg->Register.Indirect ||
1237                             reg->Register.SwizzleX != 0 ||
1238                             reg->Register.SwizzleY != 1 ||
1239                             reg->Register.SwizzleZ != 2 ||
1240                             reg->Register.File != TGSI_FILE_TEMPORARY);
1241    SVGA3dShaderDestToken temp;
1242
1243    src0 = srcIn = translate_src_register( emit, reg );
1244
1245    if (special || !w_tested) {
1246       /* need a temp reg */
1247       temp = get_temp( emit );
1248    }
1249
1250    if (special) {
1251       /* move the source into a temp register */
1252       submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1253                   writemask( temp, TGSI_WRITEMASK_XYZ ),
1254                   src0 );
1255
1256       src0 = src( temp );
1257    }
1258
1259    /* do the texkill (on the xyz components) */
1260    if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1261       return FALSE;
1262
1263    if (!w_tested) {
1264       /* need to emit a second texkill to test the W component */
1265       /* put src.wwww into temp register */
1266       if (!submit_op1(emit,
1267                       inst_token( SVGA3DOP_MOV ),
1268                       writemask( temp, TGSI_WRITEMASK_XYZ ),
1269                       scalar(srcIn, TGSI_SWIZZLE_W)))
1270          return FALSE;
1271
1272       /* second texkill */
1273       if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), temp ))
1274          return FALSE;
1275    }
1276
1277    return TRUE;
1278 }
1279
1280
1281 /* mesa state tracker always emits kilp as an unconditional
1282  * kil */
1283 static boolean emit_kilp(struct svga_shader_emitter *emit,
1284                         const struct tgsi_full_instruction *insn )
1285 {
1286    SVGA3dShaderInstToken inst;
1287    SVGA3dShaderDestToken temp;
1288    struct src_register one = scalar( get_zero_immediate( emit ),
1289                                      TGSI_SWIZZLE_W );
1290
1291    inst = inst_token( SVGA3DOP_TEXKILL );
1292
1293    /* texkill doesn't allow negation on the operand so lets move
1294     * negation of {1} to a temp register */
1295    temp = get_temp( emit );
1296    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1297                     negate( one ) ))
1298       return FALSE;
1299
1300    return submit_op0( emit, inst, temp );
1301 }
1302
1303 /* Implement conditionals by initializing destination reg to 'fail',
1304  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1305  * based on predicate reg.
1306  *
1307  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1308  * MOV dst, fail
1309  * MOV dst, pass, p0
1310  */
1311 static boolean
1312 emit_conditional(struct svga_shader_emitter *emit,
1313                  unsigned compare_func,
1314                  SVGA3dShaderDestToken dst,
1315                  struct src_register src0,
1316                  struct src_register src1,
1317                  struct src_register pass,
1318                  struct src_register fail)
1319 {
1320    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1321    SVGA3dShaderInstToken setp_token, mov_token;
1322    setp_token = inst_token( SVGA3DOP_SETP );
1323
1324    switch (compare_func) {
1325    case PIPE_FUNC_NEVER:
1326       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1327                          dst, fail );
1328       break;
1329    case PIPE_FUNC_LESS:
1330       setp_token.control = SVGA3DOPCOMP_LT;
1331       break;
1332    case PIPE_FUNC_EQUAL:
1333       setp_token.control = SVGA3DOPCOMP_EQ;
1334       break;
1335    case PIPE_FUNC_LEQUAL:
1336       setp_token.control = SVGA3DOPCOMP_LE;
1337       break;
1338    case PIPE_FUNC_GREATER:
1339       setp_token.control = SVGA3DOPCOMP_GT;
1340       break;
1341    case PIPE_FUNC_NOTEQUAL:
1342       setp_token.control = SVGA3DOPCOMPC_NE;
1343       break;
1344    case PIPE_FUNC_GEQUAL:
1345       setp_token.control = SVGA3DOPCOMP_GE;
1346       break;
1347    case PIPE_FUNC_ALWAYS:
1348       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1349                          dst, pass );
1350       break;
1351    }
1352
1353    /* SETP src0, COMPOP, src1 */
1354    if (!submit_op2( emit, setp_token, pred_reg,
1355                     src0, src1 ))
1356       return FALSE;
1357
1358    mov_token = inst_token( SVGA3DOP_MOV );
1359
1360    /* MOV dst, fail */
1361    if (!submit_op1( emit, mov_token, dst,
1362                     fail ))
1363       return FALSE;
1364
1365    /* MOV dst, pass (predicated)
1366     *
1367     * Note that the predicate reg (and possible modifiers) is passed
1368     * as the first source argument.
1369     */
1370    mov_token.predicated = 1;
1371    if (!submit_op2( emit, mov_token, dst,
1372                     src( pred_reg ), pass ))
1373       return FALSE;
1374
1375    return TRUE;
1376 }
1377
1378
1379 static boolean
1380 emit_select(struct svga_shader_emitter *emit,
1381             unsigned compare_func,
1382             SVGA3dShaderDestToken dst,
1383             struct src_register src0,
1384             struct src_register src1 )
1385 {
1386    /* There are some SVGA instructions which implement some selects
1387     * directly, but they are only available in the vertex shader.
1388     */
1389    if (emit->unit == PIPE_SHADER_VERTEX) {
1390       switch (compare_func) {
1391       case PIPE_FUNC_GEQUAL:
1392          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1393       case PIPE_FUNC_LEQUAL:
1394          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1395       case PIPE_FUNC_GREATER:
1396          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1397       case PIPE_FUNC_LESS:
1398          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1399       default:
1400          break;
1401       }
1402    }
1403
1404
1405    /* Otherwise, need to use the setp approach:
1406     */
1407    {
1408       struct src_register one, zero;
1409       /* zero immediate is 0,0,0,1 */
1410       zero = get_zero_immediate( emit );
1411       one  = scalar( zero, TGSI_SWIZZLE_W );
1412       zero = scalar( zero, TGSI_SWIZZLE_X );
1413
1414       return emit_conditional(
1415          emit,
1416          compare_func,
1417          dst,
1418          src0,
1419          src1,
1420          one, zero);
1421    }
1422 }
1423
1424
1425 static boolean emit_select_op(struct svga_shader_emitter *emit,
1426                               unsigned compare,
1427                               const struct tgsi_full_instruction *insn)
1428 {
1429    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1430    struct src_register src0 = translate_src_register(
1431       emit, &insn->Src[0] );
1432    struct src_register src1 = translate_src_register(
1433       emit, &insn->Src[1] );
1434
1435    return emit_select( emit, compare, dst, src0, src1 );
1436 }
1437
1438
1439 /* Translate texture instructions to SVGA3D representation.
1440  */
1441 static boolean emit_tex2(struct svga_shader_emitter *emit,
1442                          const struct tgsi_full_instruction *insn,
1443                          SVGA3dShaderDestToken dst )
1444 {
1445    SVGA3dShaderInstToken inst;
1446    struct src_register texcoord;
1447    struct src_register sampler;
1448    SVGA3dShaderDestToken tmp;
1449
1450    inst.value = 0;
1451
1452    switch (insn->Instruction.Opcode) {
1453    case TGSI_OPCODE_TEX:
1454       inst.op = SVGA3DOP_TEX;
1455       break;
1456    case TGSI_OPCODE_TXP:
1457       inst.op = SVGA3DOP_TEX;
1458       inst.control = SVGA3DOPCONT_PROJECT;
1459       break;
1460    case TGSI_OPCODE_TXB:
1461       inst.op = SVGA3DOP_TEX;
1462       inst.control = SVGA3DOPCONT_BIAS;
1463       break;
1464    case TGSI_OPCODE_TXL:
1465       inst.op = SVGA3DOP_TEXLDL;
1466       break;
1467    default:
1468       assert(0);
1469       return FALSE;
1470    }
1471
1472    texcoord = translate_src_register( emit, &insn->Src[0] );
1473    sampler = translate_src_register( emit, &insn->Src[1] );
1474
1475    if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1476        emit->dynamic_branching_level > 0)
1477       tmp = get_temp( emit );
1478
1479    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1480     * zero in that case.
1481     */
1482    if (emit->dynamic_branching_level > 0 &&
1483        inst.op == SVGA3DOP_TEX &&
1484        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1485       struct src_register zero = get_zero_immediate( emit );
1486
1487       /* MOV  tmp, texcoord */
1488       if (!submit_op1( emit,
1489                        inst_token( SVGA3DOP_MOV ),
1490                        tmp,
1491                        texcoord ))
1492          return FALSE;
1493
1494       /* MOV  tmp.w, zero */
1495       if (!submit_op1( emit,
1496                        inst_token( SVGA3DOP_MOV ),
1497                        writemask( tmp, TGSI_WRITEMASK_W ),
1498                        scalar( zero, TGSI_SWIZZLE_X )))
1499          return FALSE;
1500
1501       texcoord = src( tmp );
1502       inst.op = SVGA3DOP_TEXLDL;
1503    }
1504
1505    /* Explicit normalization of texcoords:
1506     */
1507    if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1508       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1509
1510       /* MUL  tmp, SRC0, WH */
1511       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1512                        tmp, texcoord, wh ))
1513          return FALSE;
1514
1515       texcoord = src( tmp );
1516    }
1517
1518    return submit_op2( emit, inst, dst, texcoord, sampler );
1519 }
1520
1521
1522
1523
1524 /* Translate texture instructions to SVGA3D representation.
1525  */
1526 static boolean emit_tex4(struct svga_shader_emitter *emit,
1527                          const struct tgsi_full_instruction *insn,
1528                          SVGA3dShaderDestToken dst )
1529 {
1530    SVGA3dShaderInstToken inst;
1531    struct src_register texcoord;
1532    struct src_register ddx;
1533    struct src_register ddy;
1534    struct src_register sampler;
1535
1536    texcoord = translate_src_register( emit, &insn->Src[0] );
1537    ddx      = translate_src_register( emit, &insn->Src[1] );
1538    ddy      = translate_src_register( emit, &insn->Src[2] );
1539    sampler  = translate_src_register( emit, &insn->Src[3] );
1540
1541    inst.value = 0;
1542
1543    switch (insn->Instruction.Opcode) {
1544    case TGSI_OPCODE_TXD:
1545       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1546       break;
1547    default:
1548       assert(0);
1549       return FALSE;
1550    }
1551
1552    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1553 }
1554
1555
1556 /**
1557  * Emit texture swizzle code.
1558  */
1559 static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
1560                                  SVGA3dShaderDestToken dst,
1561                                  struct src_register src,
1562                                  unsigned swizzle_x,
1563                                  unsigned swizzle_y,
1564                                  unsigned swizzle_z,
1565                                  unsigned swizzle_w)
1566 {
1567    const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1568    unsigned srcSwizzle[4];
1569    unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1570    int i;
1571
1572    /* build writemasks and srcSwizzle terms */
1573    for (i = 0; i < 4; i++) {
1574       if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
1575          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1576          zeroWritemask |= (1 << i);
1577       }
1578       else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
1579          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1580          oneWritemask |= (1 << i);
1581       }
1582       else {
1583          srcSwizzle[i] = swizzleIn[i];
1584          srcWritemask |= (1 << i);
1585       }
1586    }
1587
1588    /* write x/y/z/w comps */
1589    if (dst.mask & srcWritemask) {
1590       if (!submit_op1(emit,
1591                       inst_token(SVGA3DOP_MOV),
1592                       writemask(dst, srcWritemask),
1593                       swizzle(src,
1594                               srcSwizzle[0],
1595                               srcSwizzle[1],
1596                               srcSwizzle[2],
1597                               srcSwizzle[3])))
1598          return FALSE;
1599    }
1600
1601    /* write 0 comps */
1602    if (dst.mask & zeroWritemask) {
1603       if (!submit_op1(emit,
1604                       inst_token(SVGA3DOP_MOV),
1605                       writemask(dst, zeroWritemask),
1606                       scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X)))
1607          return FALSE;
1608    }
1609
1610    /* write 1 comps */
1611    if (dst.mask & oneWritemask) {
1612       if (!submit_op1(emit,
1613                       inst_token(SVGA3DOP_MOV),
1614                       writemask(dst, oneWritemask),
1615                       scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W)))
1616          return FALSE;
1617    }
1618
1619    return TRUE;
1620 }
1621
1622
1623 static boolean emit_tex(struct svga_shader_emitter *emit,
1624                         const struct tgsi_full_instruction *insn )
1625 {
1626    SVGA3dShaderDestToken dst =
1627       translate_dst_register( emit, insn, 0 );
1628    struct src_register src0 =
1629       translate_src_register( emit, &insn->Src[0] );
1630    struct src_register src1 =
1631       translate_src_register( emit, &insn->Src[1] );
1632
1633    SVGA3dShaderDestToken tex_result;
1634    const unsigned unit = src1.base.num;
1635
1636    /* check for shadow samplers */
1637    boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
1638                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
1639
1640    /* texture swizzle */
1641    boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
1642                       emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
1643                       emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
1644                       emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
1645
1646    /* If doing compare processing or tex swizzle, need to put fetched color into
1647     * a temporary so it can be used as a source later on.
1648     */
1649    if (compare ||
1650        swizzle ||
1651        (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
1652       tex_result = get_temp( emit );
1653    }
1654    else {
1655       tex_result = dst;
1656    }
1657
1658    switch(insn->Instruction.Opcode) {
1659    case TGSI_OPCODE_TEX:
1660    case TGSI_OPCODE_TXB:
1661    case TGSI_OPCODE_TXP:
1662    case TGSI_OPCODE_TXL:
1663       if (!emit_tex2( emit, insn, tex_result ))
1664          return FALSE;
1665       break;
1666    case TGSI_OPCODE_TXD:
1667       if (!emit_tex4( emit, insn, tex_result ))
1668          return FALSE;
1669       break;
1670    default:
1671       assert(0);
1672    }
1673
1674
1675    if (compare) {
1676       SVGA3dShaderDestToken dst2;
1677
1678       if (swizzle)
1679          dst2 = tex_result;
1680       else
1681          dst2 = dst;
1682
1683       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1684          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1685          /* When sampling a depth texture, the result of the comparison is in
1686           * the Y component.
1687           */
1688          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1689          struct src_register r_coord;
1690
1691          if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1692             /* Divide texcoord R by Q */
1693             if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1694                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1695                              scalar(src0, TGSI_SWIZZLE_W) ))
1696                return FALSE;
1697
1698             if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1699                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1700                              scalar(src0, TGSI_SWIZZLE_Z),
1701                              scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1702                return FALSE;
1703
1704             r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1705          }
1706          else {
1707             r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1708          }
1709
1710          /* Compare texture sample value against R component of texcoord */
1711          if (!emit_select(emit,
1712                           emit->key.fkey.tex[unit].compare_func,
1713                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
1714                           r_coord,
1715                           tex_src_x))
1716             return FALSE;
1717       }
1718
1719       if (dst.mask & TGSI_WRITEMASK_W) {
1720          struct src_register one =
1721             scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1722
1723         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1724                          writemask( dst2, TGSI_WRITEMASK_W ),
1725                          one ))
1726            return FALSE;
1727       }
1728    }
1729
1730    if (swizzle) {
1731       /* swizzle from tex_result to dst */
1732       emit_tex_swizzle(emit,
1733                        dst, src(tex_result),
1734                        emit->key.fkey.tex[unit].swizzle_r,
1735                        emit->key.fkey.tex[unit].swizzle_g,
1736                        emit->key.fkey.tex[unit].swizzle_b,
1737                        emit->key.fkey.tex[unit].swizzle_a);
1738    }
1739
1740    if (!emit->use_sm30 &&
1741        dst.mask != TGSI_WRITEMASK_XYZW &&
1742        !compare &&
1743        !swizzle) {
1744       /* pre SM3.0 a TEX instruction can't have a writemask.  Do it as a
1745        * separate step here.
1746        */
1747       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1748          return FALSE;
1749    }
1750
1751    return TRUE;
1752 }
1753
1754 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1755                               const struct tgsi_full_instruction *insn )
1756 {
1757    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1758    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1759    struct src_register const_int = get_loop_const( emit );
1760
1761    emit->dynamic_branching_level++;
1762
1763    return (emit_instruction( emit, inst ) &&
1764            emit_src( emit, loop_reg ) &&
1765            emit_src( emit, const_int ) );
1766 }
1767
1768 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1769                               const struct tgsi_full_instruction *insn )
1770 {
1771    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1772
1773    emit->dynamic_branching_level--;
1774
1775    return emit_instruction( emit, inst );
1776 }
1777
1778 static boolean emit_brk( struct svga_shader_emitter *emit,
1779                          const struct tgsi_full_instruction *insn )
1780 {
1781    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1782    return emit_instruction( emit, inst );
1783 }
1784
1785 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1786                                 unsigned opcode,
1787                                 const struct tgsi_full_instruction *insn )
1788 {
1789    SVGA3dShaderInstToken inst;
1790    SVGA3dShaderDestToken dst;
1791    struct src_register src;
1792
1793    inst = inst_token( opcode );
1794    dst = translate_dst_register( emit, insn, 0 );
1795    src = translate_src_register( emit, &insn->Src[0] );
1796    src = scalar( src, TGSI_SWIZZLE_X );
1797
1798    return submit_op1( emit, inst, dst, src );
1799 }
1800
1801
1802 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1803                                        unsigned opcode,
1804                                        const struct tgsi_full_instruction *insn )
1805 {
1806    const struct tgsi_full_src_register *src = insn->Src;
1807    SVGA3dShaderInstToken inst;
1808    SVGA3dShaderDestToken dst;
1809
1810    inst = inst_token( opcode );
1811    dst = translate_dst_register( emit, insn, 0 );
1812
1813    switch (insn->Instruction.NumSrcRegs) {
1814    case 0:
1815       return submit_op0( emit, inst, dst );
1816    case 1:
1817       return submit_op1( emit, inst, dst,
1818                          translate_src_register( emit, &src[0] ));
1819    case 2:
1820       return submit_op2( emit, inst, dst,
1821                          translate_src_register( emit, &src[0] ),
1822                          translate_src_register( emit, &src[1] ) );
1823    case 3:
1824       return submit_op3( emit, inst, dst,
1825                          translate_src_register( emit, &src[0] ),
1826                          translate_src_register( emit, &src[1] ),
1827                          translate_src_register( emit, &src[2] ) );
1828    default:
1829       assert(0);
1830       return FALSE;
1831    }
1832 }
1833
1834
1835 static boolean emit_deriv(struct svga_shader_emitter *emit,
1836                           const struct tgsi_full_instruction *insn )
1837 {
1838    if (emit->dynamic_branching_level > 0 &&
1839        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1840    {
1841       struct src_register zero = get_zero_immediate( emit );
1842       SVGA3dShaderDestToken dst =
1843          translate_dst_register( emit, insn, 0 );
1844
1845       /* Deriv opcodes not valid inside dynamic branching, workaround
1846        * by zeroing out the destination.
1847        */
1848       if (!submit_op1(emit,
1849                       inst_token( SVGA3DOP_MOV ),
1850                       dst,
1851                       scalar(zero, TGSI_SWIZZLE_X)))
1852          return FALSE;
1853
1854       return TRUE;
1855    }
1856    else {
1857       unsigned opcode;
1858       const struct tgsi_full_src_register *reg = &insn->Src[0];
1859       SVGA3dShaderInstToken inst;
1860       SVGA3dShaderDestToken dst;
1861       struct src_register src0;
1862
1863       switch (insn->Instruction.Opcode) {
1864       case TGSI_OPCODE_DDX:
1865          opcode = SVGA3DOP_DSX;
1866          break;
1867       case TGSI_OPCODE_DDY:
1868          opcode = SVGA3DOP_DSY;
1869          break;
1870       default:
1871          return FALSE;
1872       }
1873
1874       inst = inst_token( opcode );
1875       dst = translate_dst_register( emit, insn, 0 );
1876       src0 = translate_src_register( emit, reg );
1877
1878       /* We cannot use negate or abs on source to dsx/dsy instruction.
1879        */
1880       if (reg->Register.Absolute ||
1881           reg->Register.Negate) {
1882          SVGA3dShaderDestToken temp = get_temp( emit );
1883
1884          if (!emit_repl( emit, temp, &src0 ))
1885             return FALSE;
1886       }
1887
1888       return submit_op1( emit, inst, dst, src0 );
1889    }
1890 }
1891
1892 static boolean emit_arl(struct svga_shader_emitter *emit,
1893                         const struct tgsi_full_instruction *insn)
1894 {
1895    ++emit->current_arl;
1896    if (emit->unit == PIPE_SHADER_FRAGMENT) {
1897       /* MOVA not present in pixel shader instruction set.
1898        * Ignore this instruction altogether since it is
1899        * only used for loop counters -- and for that
1900        * we reference aL directly.
1901        */
1902       return TRUE;
1903    }
1904    if (svga_arl_needs_adjustment( emit )) {
1905       return emit_fake_arl( emit, insn );
1906    } else {
1907       /* no need to adjust, just emit straight arl */
1908       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1909    }
1910 }
1911
1912 static boolean emit_pow(struct svga_shader_emitter *emit,
1913                         const struct tgsi_full_instruction *insn)
1914 {
1915    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1916    struct src_register src0 = translate_src_register(
1917       emit, &insn->Src[0] );
1918    struct src_register src1 = translate_src_register(
1919       emit, &insn->Src[1] );
1920    boolean need_tmp = FALSE;
1921
1922    /* POW can only output to a temporary */
1923    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1924       need_tmp = TRUE;
1925
1926    /* POW src1 must not be the same register as dst */
1927    if (alias_src_dst( src1, dst ))
1928       need_tmp = TRUE;
1929
1930    /* it's a scalar op */
1931    src0 = scalar( src0, TGSI_SWIZZLE_X );
1932    src1 = scalar( src1, TGSI_SWIZZLE_X );
1933
1934    if (need_tmp) {
1935       SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1936
1937       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1938          return FALSE;
1939
1940       return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1941    }
1942    else {
1943       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1944    }
1945 }
1946
1947 static boolean emit_xpd(struct svga_shader_emitter *emit,
1948                         const struct tgsi_full_instruction *insn)
1949 {
1950    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1951    const struct src_register src0 = translate_src_register(
1952       emit, &insn->Src[0] );
1953    const struct src_register src1 = translate_src_register(
1954       emit, &insn->Src[1] );
1955    boolean need_dst_tmp = FALSE;
1956
1957    /* XPD can only output to a temporary */
1958    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1959       need_dst_tmp = TRUE;
1960
1961    /* The dst reg must not be the same as src0 or src1*/
1962    if (alias_src_dst(src0, dst) ||
1963        alias_src_dst(src1, dst))
1964       need_dst_tmp = TRUE;
1965
1966    if (need_dst_tmp) {
1967       SVGA3dShaderDestToken tmp = get_temp( emit );
1968
1969       /* Obey DX9 restrictions on mask:
1970        */
1971       tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1972
1973       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1974          return FALSE;
1975
1976       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1977          return FALSE;
1978    }
1979    else {
1980       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1981          return FALSE;
1982    }
1983
1984    /* Need to emit 1.0 to dst.w?
1985     */
1986    if (dst.mask & TGSI_WRITEMASK_W) {
1987       struct src_register zero = get_zero_immediate( emit );
1988
1989       if (!submit_op1(emit,
1990                       inst_token( SVGA3DOP_MOV ),
1991                       writemask(dst, TGSI_WRITEMASK_W),
1992                       zero))
1993          return FALSE;
1994    }
1995
1996    return TRUE;
1997 }
1998
1999
2000 static boolean emit_lrp(struct svga_shader_emitter *emit,
2001                         const struct tgsi_full_instruction *insn)
2002 {
2003    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2004    const struct src_register src0 = translate_src_register(
2005       emit, &insn->Src[0] );
2006    const struct src_register src1 = translate_src_register(
2007       emit, &insn->Src[1] );
2008    const struct src_register src2 = translate_src_register(
2009       emit, &insn->Src[2] );
2010
2011    return submit_lrp(emit, dst, src0, src1, src2);
2012 }
2013
2014
2015 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
2016                              const struct tgsi_full_instruction *insn )
2017 {
2018    if (emit->unit == PIPE_SHADER_VERTEX) {
2019       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2020        */
2021       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2022    }
2023    else {
2024
2025       /* result[0] = 1    * 1;
2026        * result[1] = a[1] * b[1];
2027        * result[2] = a[2] * 1;
2028        * result[3] = 1    * b[3];
2029        */
2030
2031       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2032       SVGA3dShaderDestToken tmp;
2033       const struct src_register src0 = translate_src_register(
2034          emit, &insn->Src[0] );
2035       const struct src_register src1 = translate_src_register(
2036          emit, &insn->Src[1] );
2037       struct src_register zero = get_zero_immediate( emit );
2038       boolean need_tmp = FALSE;
2039
2040       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2041           alias_src_dst(src0, dst) ||
2042           alias_src_dst(src1, dst))
2043          need_tmp = TRUE;
2044
2045       if (need_tmp) {
2046          tmp = get_temp( emit );
2047       }
2048       else {
2049          tmp = dst;
2050       }
2051
2052       /* tmp.xw = 1.0
2053        */
2054       if (tmp.mask & TGSI_WRITEMASK_XW) {
2055          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2056                           writemask(tmp, TGSI_WRITEMASK_XW ),
2057                           scalar( zero, 3 )))
2058             return FALSE;
2059       }
2060
2061       /* tmp.yz = src0
2062        */
2063       if (tmp.mask & TGSI_WRITEMASK_YZ) {
2064          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2065                           writemask(tmp, TGSI_WRITEMASK_YZ ),
2066                           src0))
2067             return FALSE;
2068       }
2069
2070       /* tmp.yw = tmp * src1
2071        */
2072       if (tmp.mask & TGSI_WRITEMASK_YW) {
2073          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2074                           writemask(tmp, TGSI_WRITEMASK_YW ),
2075                           src(tmp),
2076                           src1))
2077             return FALSE;
2078       }
2079
2080       /* dst = tmp
2081        */
2082       if (need_tmp) {
2083          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2084                           dst,
2085                           src(tmp)))
2086             return FALSE;
2087       }
2088    }
2089
2090    return TRUE;
2091 }
2092
2093
2094 static boolean emit_exp(struct svga_shader_emitter *emit,
2095                         const struct tgsi_full_instruction *insn)
2096 {
2097    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2098    struct src_register src0 =
2099       translate_src_register( emit, &insn->Src[0] );
2100    struct src_register zero = get_zero_immediate( emit );
2101    SVGA3dShaderDestToken fraction;
2102
2103    if (dst.mask & TGSI_WRITEMASK_Y)
2104       fraction = dst;
2105    else if (dst.mask & TGSI_WRITEMASK_X)
2106       fraction = get_temp( emit );
2107    else
2108       fraction.value = 0;
2109
2110    /* If y is being written, fill it with src0 - floor(src0).
2111     */
2112    if (dst.mask & TGSI_WRITEMASK_XY) {
2113       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2114                        writemask( fraction, TGSI_WRITEMASK_Y ),
2115                        src0 ))
2116          return FALSE;
2117    }
2118
2119    /* If x is being written, fill it with 2 ^ floor(src0).
2120     */
2121    if (dst.mask & TGSI_WRITEMASK_X) {
2122       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2123                        writemask( dst, TGSI_WRITEMASK_X ),
2124                        src0,
2125                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2126          return FALSE;
2127
2128       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2129                        writemask( dst, TGSI_WRITEMASK_X ),
2130                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2131          return FALSE;
2132
2133       if (!(dst.mask & TGSI_WRITEMASK_Y))
2134          release_temp( emit, fraction );
2135    }
2136
2137    /* If z is being written, fill it with 2 ^ src0 (partial precision).
2138     */
2139    if (dst.mask & TGSI_WRITEMASK_Z) {
2140       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2141                        writemask( dst, TGSI_WRITEMASK_Z ),
2142                        src0 ) )
2143          return FALSE;
2144    }
2145
2146    /* If w is being written, fill it with one.
2147     */
2148    if (dst.mask & TGSI_WRITEMASK_W) {
2149       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2150                        writemask(dst, TGSI_WRITEMASK_W),
2151                        scalar( zero, TGSI_SWIZZLE_W ) ))
2152          return FALSE;
2153    }
2154
2155    return TRUE;
2156 }
2157
2158 static boolean emit_lit(struct svga_shader_emitter *emit,
2159                              const struct tgsi_full_instruction *insn )
2160 {
2161    if (emit->unit == PIPE_SHADER_VERTEX) {
2162       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2163        */
2164       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2165    }
2166    else {
2167
2168       /* D3D vs. GL semantics can be fairly easily accomodated by
2169        * variations on this sequence.
2170        *
2171        * GL:
2172        *   tmp.y = src.x
2173        *   tmp.z = pow(src.y,src.w)
2174        *   p0 = src0.xxxx > 0
2175        *   result = zero.wxxw
2176        *   (p0) result.yz = tmp
2177        *
2178        * D3D:
2179        *   tmp.y = src.x
2180        *   tmp.z = pow(src.y,src.w)
2181        *   p0 = src0.xxyy > 0
2182        *   result = zero.wxxw
2183        *   (p0) result.yz = tmp
2184        *
2185        * Will implement the GL version for now.
2186        */
2187
2188       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2189       SVGA3dShaderDestToken tmp = get_temp( emit );
2190       const struct src_register src0 = translate_src_register(
2191          emit, &insn->Src[0] );
2192       struct src_register zero = get_zero_immediate( emit );
2193
2194       /* tmp = pow(src.y, src.w)
2195        */
2196       if (dst.mask & TGSI_WRITEMASK_Z) {
2197          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2198                          tmp,
2199                          scalar(src0, 1),
2200                          scalar(src0, 3)))
2201             return FALSE;
2202       }
2203
2204       /* tmp.y = src.x
2205        */
2206       if (dst.mask & TGSI_WRITEMASK_Y) {
2207          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2208                           writemask(tmp, TGSI_WRITEMASK_Y ),
2209                           scalar(src0, 0)))
2210             return FALSE;
2211       }
2212
2213       /* Can't quite do this with emit conditional due to the extra
2214        * writemask on the predicated mov:
2215        */
2216       {
2217          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2218          SVGA3dShaderInstToken setp_token, mov_token;
2219          struct src_register predsrc;
2220
2221          setp_token = inst_token( SVGA3DOP_SETP );
2222          mov_token = inst_token( SVGA3DOP_MOV );
2223
2224          setp_token.control = SVGA3DOPCOMP_GT;
2225
2226          /* D3D vs GL semantics:
2227           */
2228          if (0)
2229             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2230          else
2231             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2232
2233          /* SETP src0.xxyy, GT, {0}.x */
2234          if (!submit_op2( emit, setp_token, pred_reg,
2235                           predsrc,
2236                           swizzle(zero, 0, 0, 0, 0) ))
2237             return FALSE;
2238
2239          /* MOV dst, fail */
2240          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2241                           swizzle(zero, 3, 0, 0, 3 )))
2242              return FALSE;
2243
2244          /* MOV dst.yz, tmp (predicated)
2245           *
2246           * Note that the predicate reg (and possible modifiers) is passed
2247           * as the first source argument.
2248           */
2249          if (dst.mask & TGSI_WRITEMASK_YZ) {
2250             mov_token.predicated = 1;
2251             if (!submit_op2( emit, mov_token,
2252                              writemask(dst, TGSI_WRITEMASK_YZ),
2253                              src( pred_reg ), src( tmp ) ))
2254                return FALSE;
2255          }
2256       }
2257    }
2258
2259    return TRUE;
2260 }
2261
2262
2263
2264
2265 static boolean emit_ex2( struct svga_shader_emitter *emit,
2266                          const struct tgsi_full_instruction *insn )
2267 {
2268    SVGA3dShaderInstToken inst;
2269    SVGA3dShaderDestToken dst;
2270    struct src_register src0;
2271
2272    inst = inst_token( SVGA3DOP_EXP );
2273    dst = translate_dst_register( emit, insn, 0 );
2274    src0 = translate_src_register( emit, &insn->Src[0] );
2275    src0 = scalar( src0, TGSI_SWIZZLE_X );
2276
2277    if (dst.mask != TGSI_WRITEMASK_XYZW) {
2278       SVGA3dShaderDestToken tmp = get_temp( emit );
2279
2280       if (!submit_op1( emit, inst, tmp, src0 ))
2281          return FALSE;
2282
2283       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2284                          dst,
2285                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2286    }
2287
2288    return submit_op1( emit, inst, dst, src0 );
2289 }
2290
2291
2292 static boolean emit_log(struct svga_shader_emitter *emit,
2293                         const struct tgsi_full_instruction *insn)
2294 {
2295    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2296    struct src_register src0 =
2297       translate_src_register( emit, &insn->Src[0] );
2298    struct src_register zero = get_zero_immediate( emit );
2299    SVGA3dShaderDestToken abs_tmp;
2300    struct src_register abs_src0;
2301    SVGA3dShaderDestToken log2_abs;
2302
2303    abs_tmp.value = 0;
2304
2305    if (dst.mask & TGSI_WRITEMASK_Z)
2306       log2_abs = dst;
2307    else if (dst.mask & TGSI_WRITEMASK_XY)
2308       log2_abs = get_temp( emit );
2309    else
2310       log2_abs.value = 0;
2311
2312    /* If z is being written, fill it with log2( abs( src0 ) ).
2313     */
2314    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2315       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2316          abs_src0 = src0;
2317       else {
2318          abs_tmp = get_temp( emit );
2319
2320          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2321                           abs_tmp,
2322                           src0 ) )
2323             return FALSE;
2324
2325          abs_src0 = src( abs_tmp );
2326       }
2327
2328       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2329
2330       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2331                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2332                        abs_src0 ) )
2333          return FALSE;
2334    }
2335
2336    if (dst.mask & TGSI_WRITEMASK_XY) {
2337       SVGA3dShaderDestToken floor_log2;
2338
2339       if (dst.mask & TGSI_WRITEMASK_X)
2340          floor_log2 = dst;
2341       else
2342          floor_log2 = get_temp( emit );
2343
2344       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2345        */
2346       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2347                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2348                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2349          return FALSE;
2350
2351       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2352                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2353                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2354                        negate( src( floor_log2 ) ) ) )
2355          return FALSE;
2356
2357       /* If y is being written, fill it with
2358        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2359        */
2360       if (dst.mask & TGSI_WRITEMASK_Y) {
2361          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2362                           writemask( dst, TGSI_WRITEMASK_Y ),
2363                           negate( scalar( src( floor_log2 ),
2364                                           TGSI_SWIZZLE_X ) ) ) )
2365             return FALSE;
2366
2367          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2368                           writemask( dst, TGSI_WRITEMASK_Y ),
2369                           src( dst ),
2370                           abs_src0 ) )
2371             return FALSE;
2372       }
2373
2374       if (!(dst.mask & TGSI_WRITEMASK_X))
2375          release_temp( emit, floor_log2 );
2376
2377       if (!(dst.mask & TGSI_WRITEMASK_Z))
2378          release_temp( emit, log2_abs );
2379    }
2380
2381    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2382        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2383       release_temp( emit, abs_tmp );
2384
2385    /* If w is being written, fill it with one.
2386     */
2387    if (dst.mask & TGSI_WRITEMASK_W) {
2388       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2389                        writemask(dst, TGSI_WRITEMASK_W),
2390                        scalar( zero, TGSI_SWIZZLE_W ) ))
2391          return FALSE;
2392    }
2393
2394    return TRUE;
2395 }
2396
2397
2398 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2399                            unsigned position,
2400                            const struct tgsi_full_instruction *insn )
2401 {
2402    unsigned i;
2403
2404    /* Note that we've finished the main function and are now emitting
2405     * subroutines.  This affects how we terminate the generated
2406     * shader.
2407     */
2408    emit->in_main_func = FALSE;
2409
2410    for (i = 0; i < emit->nr_labels; i++) {
2411       if (emit->label[i] == position) {
2412          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2413                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2414                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2415       }
2416    }
2417
2418    assert(0);
2419    return TRUE;
2420 }
2421
2422 static boolean emit_call( struct svga_shader_emitter *emit,
2423                            const struct tgsi_full_instruction *insn )
2424 {
2425    unsigned position = insn->Label.Label;
2426    unsigned i;
2427
2428    for (i = 0; i < emit->nr_labels; i++) {
2429       if (emit->label[i] == position)
2430          break;
2431    }
2432
2433    if (emit->nr_labels == Elements(emit->label))
2434       return FALSE;
2435
2436    if (i == emit->nr_labels) {
2437       emit->label[i] = position;
2438       emit->nr_labels++;
2439    }
2440
2441    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2442            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2443 }
2444
2445
2446 static boolean emit_end( struct svga_shader_emitter *emit )
2447 {
2448    if (emit->unit == PIPE_SHADER_VERTEX) {
2449       return emit_vs_postamble( emit );
2450    }
2451    else {
2452       return emit_ps_postamble( emit );
2453    }
2454 }
2455
2456
2457
2458 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2459                                       unsigned position,
2460                                       const struct tgsi_full_instruction *insn )
2461 {
2462    switch (insn->Instruction.Opcode) {
2463
2464    case TGSI_OPCODE_ARL:
2465       return emit_arl( emit, insn );
2466
2467    case TGSI_OPCODE_TEX:
2468    case TGSI_OPCODE_TXB:
2469    case TGSI_OPCODE_TXP:
2470    case TGSI_OPCODE_TXL:
2471    case TGSI_OPCODE_TXD:
2472       return emit_tex( emit, insn );
2473
2474    case TGSI_OPCODE_DDX:
2475    case TGSI_OPCODE_DDY:
2476       return emit_deriv( emit, insn );
2477
2478    case TGSI_OPCODE_BGNSUB:
2479       return emit_bgnsub( emit, position, insn );
2480
2481    case TGSI_OPCODE_ENDSUB:
2482       return TRUE;
2483
2484    case TGSI_OPCODE_CAL:
2485       return emit_call( emit, insn );
2486
2487    case TGSI_OPCODE_FLR:
2488    case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
2489       return emit_floor( emit, insn );
2490
2491    case TGSI_OPCODE_CMP:
2492       return emit_cmp( emit, insn );
2493
2494    case TGSI_OPCODE_DIV:
2495       return emit_div( emit, insn );
2496
2497    case TGSI_OPCODE_DP2:
2498       return emit_dp2( emit, insn );
2499
2500    case TGSI_OPCODE_DPH:
2501       return emit_dph( emit, insn );
2502
2503    case TGSI_OPCODE_NRM:
2504       return emit_nrm( emit, insn );
2505
2506    case TGSI_OPCODE_COS:
2507       return emit_cos( emit, insn );
2508
2509    case TGSI_OPCODE_SIN:
2510       return emit_sin( emit, insn );
2511
2512    case TGSI_OPCODE_SCS:
2513       return emit_sincos( emit, insn );
2514
2515    case TGSI_OPCODE_END:
2516       /* TGSI always finishes the main func with an END */
2517       return emit_end( emit );
2518
2519    case TGSI_OPCODE_KIL:
2520       return emit_kil( emit, insn );
2521
2522       /* Selection opcodes.  The underlying language is fairly
2523        * non-orthogonal about these.
2524        */
2525    case TGSI_OPCODE_SEQ:
2526       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2527
2528    case TGSI_OPCODE_SNE:
2529       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2530
2531    case TGSI_OPCODE_SGT:
2532       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2533
2534    case TGSI_OPCODE_SGE:
2535       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2536
2537    case TGSI_OPCODE_SLT:
2538       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2539
2540    case TGSI_OPCODE_SLE:
2541       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2542
2543    case TGSI_OPCODE_SUB:
2544       return emit_sub( emit, insn );
2545
2546    case TGSI_OPCODE_POW:
2547       return emit_pow( emit, insn );
2548
2549    case TGSI_OPCODE_EX2:
2550       return emit_ex2( emit, insn );
2551
2552    case TGSI_OPCODE_EXP:
2553       return emit_exp( emit, insn );
2554
2555    case TGSI_OPCODE_LOG:
2556       return emit_log( emit, insn );
2557
2558    case TGSI_OPCODE_LG2:
2559       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2560
2561    case TGSI_OPCODE_RSQ:
2562       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2563
2564    case TGSI_OPCODE_RCP:
2565       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2566
2567    case TGSI_OPCODE_CONT:
2568    case TGSI_OPCODE_RET:
2569       /* This is a noop -- we tell mesa that we can't support RET
2570        * within a function (early return), so this will always be
2571        * followed by an ENDSUB.
2572        */
2573       return TRUE;
2574
2575       /* These aren't actually used by any of the frontends we care
2576        * about:
2577        */
2578    case TGSI_OPCODE_CLAMP:
2579    case TGSI_OPCODE_ROUND:
2580    case TGSI_OPCODE_AND:
2581    case TGSI_OPCODE_OR:
2582    case TGSI_OPCODE_I2F:
2583    case TGSI_OPCODE_NOT:
2584    case TGSI_OPCODE_SHL:
2585    case TGSI_OPCODE_ISHR:
2586    case TGSI_OPCODE_XOR:
2587       return FALSE;
2588
2589    case TGSI_OPCODE_IF:
2590       return emit_if( emit, insn );
2591    case TGSI_OPCODE_ELSE:
2592       return emit_else( emit, insn );
2593    case TGSI_OPCODE_ENDIF:
2594       return emit_endif( emit, insn );
2595
2596    case TGSI_OPCODE_BGNLOOP:
2597       return emit_bgnloop2( emit, insn );
2598    case TGSI_OPCODE_ENDLOOP:
2599       return emit_endloop2( emit, insn );
2600    case TGSI_OPCODE_BRK:
2601       return emit_brk( emit, insn );
2602
2603    case TGSI_OPCODE_XPD:
2604       return emit_xpd( emit, insn );
2605
2606    case TGSI_OPCODE_KILP:
2607       return emit_kilp( emit, insn );
2608
2609    case TGSI_OPCODE_DST:
2610       return emit_dst_insn( emit, insn );
2611
2612    case TGSI_OPCODE_LIT:
2613       return emit_lit( emit, insn );
2614
2615    case TGSI_OPCODE_LRP:
2616       return emit_lrp( emit, insn );
2617
2618    case TGSI_OPCODE_SSG:
2619       return emit_ssg( emit, insn );
2620
2621    default: {
2622       unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2623
2624       if (opcode == SVGA3DOP_LAST_INST)
2625          return FALSE;
2626
2627       if (!emit_simple_instruction( emit, opcode, insn ))
2628          return FALSE;
2629    }
2630    }
2631
2632    return TRUE;
2633 }
2634
2635
2636 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2637                                     struct tgsi_full_immediate *imm)
2638 {
2639    static const float id[4] = {0,0,0,1};
2640    float value[4];
2641    unsigned i;
2642
2643    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2644    for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2645       value[i] = imm->u[i].Float;
2646
2647    for ( ; i < 4; i++ )
2648       value[i] = id[i];
2649
2650    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2651                           emit->imm_start + emit->internal_imm_count++,
2652                           value[0], value[1], value[2], value[3]);
2653 }
2654
2655 static boolean make_immediate( struct svga_shader_emitter *emit,
2656                                float a,
2657                                float b,
2658                                float c,
2659                                float d,
2660                                struct src_register *out )
2661 {
2662    unsigned idx = emit->nr_hw_float_const++;
2663
2664    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2665                         idx, a, b, c, d ))
2666       return FALSE;
2667
2668    *out = src_register( SVGA3DREG_CONST, idx );
2669
2670    return TRUE;
2671 }
2672
2673 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2674 {
2675    if (!emit->key.vkey.need_prescale) {
2676       if (!make_immediate( emit, 0, 0, .5, .5,
2677                            &emit->imm_0055))
2678          return FALSE;
2679    }
2680
2681    return TRUE;
2682 }
2683
2684 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2685 {
2686    unsigned i;
2687
2688    /* For SM20, need to initialize the temporaries we're using to hold
2689     * color outputs to some value.  Shaders which don't set all of
2690     * these values are likely to be rejected by the DX9 runtime.
2691     */
2692    if (!emit->use_sm30) {
2693       struct src_register zero = get_zero_immediate( emit );
2694       for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2695          if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2696
2697             if (!submit_op1( emit,
2698                              inst_token(SVGA3DOP_MOV),
2699                              emit->temp_col[i],
2700                              zero ))
2701                return FALSE;
2702          }
2703       }
2704    } else if (emit->ps_reads_pos && emit->info.reads_z) {
2705       /*
2706        * Assemble the position from various bits of inputs. Depth and W are
2707        * passed in a texcoord this is due to D3D's vPos not hold Z or W.
2708        * Also fixup the perspective interpolation.
2709        *
2710        * temp_pos.xy = vPos.xy
2711        * temp_pos.w = rcp(texcoord1.w);
2712        * temp_pos.z = texcoord1.z * temp_pos.w;
2713        */
2714       if (!submit_op1( emit,
2715                        inst_token(SVGA3DOP_MOV),
2716                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
2717                        emit->ps_true_pos ))
2718          return FALSE;
2719
2720       if (!submit_op1( emit,
2721                        inst_token(SVGA3DOP_RCP),
2722                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
2723                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
2724          return FALSE;
2725
2726       if (!submit_op2( emit,
2727                        inst_token(SVGA3DOP_MUL),
2728                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
2729                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
2730                        scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
2731          return FALSE;
2732    }
2733
2734    return TRUE;
2735 }
2736
2737 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2738 {
2739    unsigned i;
2740
2741    /* PS oDepth is incredibly fragile and it's very hard to catch the
2742     * types of usage that break it during shader emit.  Easier just to
2743     * redirect the main program to a temporary and then only touch
2744     * oDepth with a hand-crafted MOV below.
2745     */
2746    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2747
2748       if (!submit_op1( emit,
2749                        inst_token(SVGA3DOP_MOV),
2750                        emit->true_pos,
2751                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2752          return FALSE;
2753    }
2754
2755    /* Similarly for SM20 color outputs...  Luckily SM30 isn't so
2756     * fragile.
2757     */
2758    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2759       if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2760
2761          /* Potentially override output colors with white for XOR
2762           * logicop workaround.
2763           */
2764          if (emit->unit == PIPE_SHADER_FRAGMENT &&
2765              emit->key.fkey.white_fragments) {
2766
2767             struct src_register one = scalar( get_zero_immediate( emit ),
2768                                               TGSI_SWIZZLE_W );
2769
2770             if (!submit_op1( emit,
2771                              inst_token(SVGA3DOP_MOV),
2772                              emit->true_col[i],
2773                              one ))
2774                return FALSE;
2775          }
2776          else {
2777             if (!submit_op1( emit,
2778                              inst_token(SVGA3DOP_MOV),
2779                              emit->true_col[i],
2780                              src(emit->temp_col[i]) ))
2781                return FALSE;
2782          }
2783       }
2784    }
2785
2786    return TRUE;
2787 }
2788
2789 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2790 {
2791    /* PSIZ output is incredibly fragile and it's very hard to catch
2792     * the types of usage that break it during shader emit.  Easier
2793     * just to redirect the main program to a temporary and then only
2794     * touch PSIZ with a hand-crafted MOV below.
2795     */
2796    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2797
2798       if (!submit_op1( emit,
2799                        inst_token(SVGA3DOP_MOV),
2800                        emit->true_psiz,
2801                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2802          return FALSE;
2803    }
2804
2805    /* Need to perform various manipulations on vertex position to cope
2806     * with the different GL and D3D clip spaces.
2807     */
2808    if (emit->key.vkey.need_prescale) {
2809       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2810       SVGA3dShaderDestToken depth = emit->depth_pos;
2811       SVGA3dShaderDestToken pos = emit->true_pos;
2812       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2813       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2814                                                          offset + 0 );
2815       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2816                                                          offset + 1 );
2817
2818       if (!submit_op1( emit,
2819                        inst_token(SVGA3DOP_MOV),
2820                        writemask(depth, TGSI_WRITEMASK_W),
2821                        scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
2822          return FALSE;
2823
2824       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
2825        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2826        *   --> Note that prescale.trans.w == 0
2827        */
2828       if (!submit_op2( emit,
2829                        inst_token(SVGA3DOP_MUL),
2830                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2831                        src(temp_pos),
2832                        prescale_scale ))
2833          return FALSE;
2834
2835       if (!submit_op3( emit,
2836                        inst_token(SVGA3DOP_MAD),
2837                        pos,
2838                        swizzle(src(temp_pos), 3, 3, 3, 3),
2839                        prescale_trans,
2840                        src(temp_pos)))
2841          return FALSE;
2842
2843       /* Also write to depth value */
2844       if (!submit_op3( emit,
2845                        inst_token(SVGA3DOP_MAD),
2846                        writemask(depth, TGSI_WRITEMASK_Z),
2847                        swizzle(src(temp_pos), 3, 3, 3, 3),
2848                        prescale_trans,
2849                        src(temp_pos) ))
2850          return FALSE;
2851    }
2852    else {
2853       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2854       SVGA3dShaderDestToken depth = emit->depth_pos;
2855       SVGA3dShaderDestToken pos = emit->true_pos;
2856       struct src_register imm_0055 = emit->imm_0055;
2857
2858       /* Adjust GL clipping coordinate space to hardware (D3D-style):
2859        *
2860        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2861        * MOV result.position, temp_pos
2862        */
2863       if (!submit_op2( emit,
2864                        inst_token(SVGA3DOP_DP4),
2865                        writemask(temp_pos, TGSI_WRITEMASK_Z),
2866                        imm_0055,
2867                        src(temp_pos) ))
2868          return FALSE;
2869
2870       if (!submit_op1( emit,
2871                        inst_token(SVGA3DOP_MOV),
2872                        pos,
2873                        src(temp_pos) ))
2874          return FALSE;
2875
2876       /* Move the manipulated depth into the extra texcoord reg */
2877       if (!submit_op1( emit,
2878                        inst_token(SVGA3DOP_MOV),
2879                        writemask(depth, TGSI_WRITEMASK_ZW),
2880                        src(temp_pos) ))
2881          return FALSE;
2882    }
2883
2884    return TRUE;
2885 }
2886
2887 /*
2888   0: IF VFACE :4
2889   1:   COLOR = FrontColor;
2890   2: ELSE
2891   3:   COLOR = BackColor;
2892   4: ENDIF
2893  */
2894 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2895 {
2896    struct src_register vface, zero;
2897    struct src_register front[2];
2898    struct src_register back[2];
2899    SVGA3dShaderDestToken color[2];
2900    int count =  emit->internal_color_count;
2901    int i;
2902    SVGA3dShaderInstToken if_token;
2903
2904    if (count == 0)
2905       return TRUE;
2906
2907    vface = get_vface( emit );
2908    zero = get_zero_immediate( emit );
2909
2910    /* Can't use get_temp() to allocate the color reg as such
2911     * temporaries will be reclaimed after each instruction by the call
2912     * to reset_temp_regs().
2913     */
2914    for (i = 0; i < count; i++) {
2915       color[i] = dst_register( SVGA3DREG_TEMP,
2916                                emit->nr_hw_temp++ );
2917
2918       front[i] = emit->input_map[emit->internal_color_idx[i]];
2919
2920       /* Back is always the next input:
2921        */
2922       back[i] = front[i];
2923       back[i].base.num = front[i].base.num + 1;
2924
2925       /* Reassign the input_map to the actual front-face color:
2926        */
2927       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2928    }
2929
2930    if_token = inst_token( SVGA3DOP_IFC );
2931
2932    if (emit->key.fkey.front_ccw)
2933       if_token.control = SVGA3DOPCOMP_LT;
2934    else
2935       if_token.control = SVGA3DOPCOMP_GT;
2936
2937    zero = scalar(zero, TGSI_SWIZZLE_X);
2938
2939    if (!(emit_instruction( emit, if_token ) &&
2940          emit_src( emit, vface ) &&
2941          emit_src( emit, zero ) ))
2942       return FALSE;
2943
2944    for (i = 0; i < count; i++) {
2945       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2946          return FALSE;
2947    }
2948
2949    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2950       return FALSE;
2951
2952    for (i = 0; i < count; i++) {
2953       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2954          return FALSE;
2955    }
2956
2957    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2958       return FALSE;
2959
2960    return TRUE;
2961 }
2962
2963 /*
2964   0: SETP_GT TEMP, VFACE, 0
2965   where TEMP is a fake frontface register
2966  */
2967 static boolean emit_frontface( struct svga_shader_emitter *emit )
2968 {
2969    struct src_register vface, zero;
2970    SVGA3dShaderDestToken temp;
2971    struct src_register pass, fail;
2972
2973    vface = get_vface( emit );
2974    zero = get_zero_immediate( emit );
2975
2976    /* Can't use get_temp() to allocate the fake frontface reg as such
2977     * temporaries will be reclaimed after each instruction by the call
2978     * to reset_temp_regs().
2979     */
2980    temp = dst_register( SVGA3DREG_TEMP,
2981                         emit->nr_hw_temp++ );
2982
2983    if (emit->key.fkey.front_ccw) {
2984       pass = scalar( zero, TGSI_SWIZZLE_X );
2985       fail = scalar( zero, TGSI_SWIZZLE_W );
2986    } else {
2987       pass = scalar( zero, TGSI_SWIZZLE_W );
2988       fail = scalar( zero, TGSI_SWIZZLE_X );
2989    }
2990
2991    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2992                          temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2993                          pass, fail))
2994       return FALSE;
2995
2996    /* Reassign the input_map to the actual front-face color:
2997     */
2998    emit->input_map[emit->internal_frontface_idx] = src(temp);
2999
3000    return TRUE;
3001 }
3002
3003
3004 /**
3005  * Emit code to invert the T component of the incoming texture coordinate.
3006  * This is used for drawing point sprites when
3007  * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3008  */
3009 static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit )
3010 {
3011    struct src_register zero = get_zero_immediate(emit);
3012    struct src_register pos_neg_one = get_pos_neg_one_immediate( emit );
3013    unsigned inverted_texcoords = emit->inverted_texcoords;
3014
3015    while (inverted_texcoords) {
3016       const unsigned unit = ffs(inverted_texcoords) - 1;
3017
3018       assert(emit->inverted_texcoords & (1 << unit));
3019
3020       assert(unit < Elements(emit->ps_true_texcoord));
3021
3022       assert(unit < Elements(emit->ps_inverted_texcoord_input));
3023
3024       assert(emit->ps_inverted_texcoord_input[unit]
3025              < Elements(emit->input_map));
3026
3027       /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3028       if (!submit_op3(emit,
3029                       inst_token(SVGA3DOP_MAD),
3030                       dst(emit->ps_inverted_texcoord[unit]),
3031                       emit->ps_true_texcoord[unit],
3032                       swizzle(pos_neg_one, 0, 3, 0, 0),  /* (1, -1, 1, 1) */
3033                       swizzle(zero, 0, 3, 0, 0)))  /* (0, 1, 0, 0) */
3034          return FALSE;
3035
3036       /* Reassign the input_map entry to the new texcoord register */
3037       emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3038          emit->ps_inverted_texcoord[unit];
3039
3040       inverted_texcoords &= ~(1 << unit);
3041    }
3042
3043    return TRUE;
3044 }
3045
3046
3047 static INLINE boolean
3048 needs_to_create_zero( struct svga_shader_emitter *emit )
3049 {
3050    int i;
3051
3052    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3053       if (!emit->use_sm30)
3054          return TRUE;
3055
3056       if (emit->key.fkey.light_twoside)
3057          return TRUE;
3058
3059       if (emit->key.fkey.white_fragments)
3060          return TRUE;
3061
3062       if (emit->emit_frontface)
3063          return TRUE;
3064
3065       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3066           emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3067           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3068          return TRUE;
3069
3070       if (emit->inverted_texcoords)
3071          return TRUE;
3072
3073       /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3074       for (i = 0; i < emit->key.fkey.num_textures; i++) {
3075          if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
3076              emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
3077              emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
3078              emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
3079             return TRUE;
3080       }
3081    }
3082
3083    if (emit->unit == PIPE_SHADER_VERTEX) {
3084       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3085          return TRUE;
3086    }
3087
3088    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3089        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3090        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3091        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3092        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3093        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3094        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3095        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3096        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3097        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3098        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3099        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3100        emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3101        emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
3102       return TRUE;
3103
3104    for (i = 0; i < emit->key.fkey.num_textures; i++) {
3105       if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3106          return TRUE;
3107    }
3108
3109    return FALSE;
3110 }
3111
3112 static INLINE boolean
3113 needs_to_create_loop_const( struct svga_shader_emitter *emit )
3114 {
3115    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3116 }
3117
3118 static INLINE boolean
3119 needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
3120 {
3121    return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
3122                               emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
3123                               emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
3124 }
3125
3126 static INLINE boolean
3127 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
3128 {
3129    return (emit->num_arl_consts > 0);
3130 }
3131
3132 static INLINE boolean
3133 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3134                         int num, int current_arl)
3135 {
3136    int i;
3137    assert(num < 0);
3138
3139    for (i = 0; i < emit->num_arl_consts; ++i) {
3140       if (emit->arl_consts[i].arl_num == current_arl)
3141          break;
3142    }
3143    /* new entry */
3144    if (emit->num_arl_consts == i) {
3145       ++emit->num_arl_consts;
3146    }
3147    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3148                                 num :
3149                                 emit->arl_consts[i].number;
3150    emit->arl_consts[i].arl_num = current_arl;
3151    return TRUE;
3152 }
3153
3154 static boolean
3155 pre_parse_instruction( struct svga_shader_emitter *emit,
3156                        const struct tgsi_full_instruction *insn,
3157                        int current_arl)
3158 {
3159    if (insn->Src[0].Register.Indirect &&
3160        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3161       const struct tgsi_full_src_register *reg = &insn->Src[0];
3162       if (reg->Register.Index < 0) {
3163          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3164       }
3165    }
3166
3167    if (insn->Src[1].Register.Indirect &&
3168        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3169       const struct tgsi_full_src_register *reg = &insn->Src[1];
3170       if (reg->Register.Index < 0) {
3171          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3172       }
3173    }
3174
3175    if (insn->Src[2].Register.Indirect &&
3176        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3177       const struct tgsi_full_src_register *reg = &insn->Src[2];
3178       if (reg->Register.Index < 0) {
3179          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3180       }
3181    }
3182
3183    return TRUE;
3184 }
3185
3186 static boolean
3187 pre_parse_tokens( struct svga_shader_emitter *emit,
3188                   const struct tgsi_token *tokens )
3189 {
3190    struct tgsi_parse_context parse;
3191    int current_arl = 0;
3192
3193    tgsi_parse_init( &parse, tokens );
3194
3195    while (!tgsi_parse_end_of_tokens( &parse )) {
3196       tgsi_parse_token( &parse );
3197       switch (parse.FullToken.Token.Type) {
3198       case TGSI_TOKEN_TYPE_IMMEDIATE:
3199       case TGSI_TOKEN_TYPE_DECLARATION:
3200          break;
3201       case TGSI_TOKEN_TYPE_INSTRUCTION:
3202          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3203              TGSI_OPCODE_ARL) {
3204             ++current_arl;
3205          }
3206          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3207                                      current_arl ))
3208             return FALSE;
3209          break;
3210       default:
3211          break;
3212       }
3213
3214    }
3215    return TRUE;
3216 }
3217
3218 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
3219
3220 {
3221    if (needs_to_create_zero( emit )) {
3222       create_zero_immediate( emit );
3223    }
3224    if (needs_to_create_loop_const( emit )) {
3225       create_loop_const( emit );
3226    }
3227    if (needs_to_create_sincos_consts( emit )) {
3228       create_sincos_consts( emit );
3229    }
3230    if (needs_to_create_arl_consts( emit )) {
3231       create_arl_consts( emit );
3232    }
3233
3234    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3235       if (!emit_ps_preamble( emit ))
3236          return FALSE;
3237
3238       if (emit->key.fkey.light_twoside) {
3239          if (!emit_light_twoside( emit ))
3240             return FALSE;
3241       }
3242       if (emit->emit_frontface) {
3243          if (!emit_frontface( emit ))
3244             return FALSE;
3245       }
3246       if (emit->inverted_texcoords) {
3247          if (!emit_inverted_texcoords( emit ))
3248             return FALSE;
3249       }
3250    }
3251
3252    return TRUE;
3253 }
3254
3255 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
3256                                        const struct tgsi_token *tokens )
3257 {
3258    struct tgsi_parse_context parse;
3259    boolean ret = TRUE;
3260    boolean helpers_emitted = FALSE;
3261    unsigned line_nr = 0;
3262
3263    tgsi_parse_init( &parse, tokens );
3264    emit->internal_imm_count = 0;
3265
3266    if (emit->unit == PIPE_SHADER_VERTEX) {
3267       ret = emit_vs_preamble( emit );
3268       if (!ret)
3269          goto done;
3270    }
3271
3272    pre_parse_tokens(emit, tokens);
3273
3274    while (!tgsi_parse_end_of_tokens( &parse )) {
3275       tgsi_parse_token( &parse );
3276
3277       switch (parse.FullToken.Token.Type) {
3278       case TGSI_TOKEN_TYPE_IMMEDIATE:
3279          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3280          if (!ret)
3281             goto done;
3282          break;
3283
3284       case TGSI_TOKEN_TYPE_DECLARATION:
3285          if (emit->use_sm30)
3286             ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3287          else
3288             ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
3289          if (!ret)
3290             goto done;
3291          break;
3292
3293       case TGSI_TOKEN_TYPE_INSTRUCTION:
3294          if (!helpers_emitted) {
3295             if (!svga_shader_emit_helpers( emit ))
3296                goto done;
3297             helpers_emitted = TRUE;
3298          }
3299          ret = svga_emit_instruction( emit,
3300                                       line_nr++,
3301                                       &parse.FullToken.FullInstruction );
3302          if (!ret)
3303             goto done;
3304          break;
3305       default:
3306          break;
3307       }
3308
3309       reset_temp_regs( emit );
3310    }
3311
3312    /* Need to terminate the current subroutine.  Note that the
3313     * hardware doesn't tolerate shaders without sub-routines
3314     * terminating with RET+END.
3315     */
3316    if (!emit->in_main_func) {
3317       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3318       if (!ret)
3319          goto done;
3320    }
3321
3322    assert(emit->dynamic_branching_level == 0);
3323
3324    /* Need to terminate the whole shader:
3325     */
3326    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3327    if (!ret)
3328       goto done;
3329
3330 done:
3331    tgsi_parse_free( &parse );
3332    return ret;
3333 }
3334