src/gallium/drivers/svga/svga_tgsi_insn.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_parse.h"
  29 #include "util/u_memory.h"
  30
  31 #include "svga_tgsi_emit.h"
  32 #include "svga_context.h"
  33
  34
  35 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  36 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  37
  38
  39
  40
  41 static unsigned
  42 translate_opcode(
  43    uint opcode )
  44 {
  45    switch (opcode) {
  46    case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
  47    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
  48    case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
  49    case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
  50    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
  51    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
  52    case TGSI_OPCODE_ENDFOR:     return SVGA3DOP_ENDLOOP;
  53    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
  54    case TGSI_OPCODE_BGNFOR:     return SVGA3DOP_LOOP;
  55    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
  56    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
  57    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
  58    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
  59    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
  60    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
  61    case TGSI_OPCODE_NRM4:       return SVGA3DOP_NRM;
  62    case TGSI_OPCODE_SSG:        return SVGA3DOP_SGN;
  63    default:
  64       debug_printf("Unkown opcode %u\n", opcode);
  65       assert( 0 );
  66       return SVGA3DOP_LAST_INST;
  67    }
  68 }
  69
  70
  71 static unsigned translate_file( unsigned file )
  72 {
  73    switch (file) {
  74    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
  75    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
  76    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
  77    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
  78    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
  79    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
  80    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
  81    default:
  82       assert( 0 );
  83       return SVGA3DREG_TEMP;
  84    }
  85 }
  86
  87
  88
  89
  90
  91
  92 static SVGA3dShaderDestToken
  93 translate_dst_register( struct svga_shader_emitter *emit,
  94                         const struct tgsi_full_instruction *insn,
  95                         unsigned idx )
  96 {
  97    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
  98    SVGA3dShaderDestToken dest;
  99
 100    switch (reg->Register.File) {
 101    case TGSI_FILE_OUTPUT:
 102       /* Output registers encode semantic information in their name.
 103        * Need to lookup a table built at decl time:
 104        */
 105       dest = emit->output_map[reg->Register.Index];
 106       break;
 107
 108    default:
 109       dest = dst_register( translate_file( reg->Register.File ),
 110                            reg->Register.Index );
 111       break;
 112    }
 113
 114    dest.mask = reg->Register.WriteMask;
 115    assert(dest.mask);
 116
 117    if (insn->Instruction.Saturate)
 118       dest.dstMod = SVGA3DDSTMOD_SATURATE;
 119
 120    return dest;
 121 }
 122
 123
 124 static struct src_register
 125 swizzle( struct src_register src,
 126          int x,
 127          int y,
 128          int z,
 129          int w )
 130 {
 131    x = (src.base.swizzle >> (x * 2)) & 0x3;
 132    y = (src.base.swizzle >> (y * 2)) & 0x3;
 133    z = (src.base.swizzle >> (z * 2)) & 0x3;
 134    w = (src.base.swizzle >> (w * 2)) & 0x3;
 135
 136    src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
 137
 138    return src;
 139 }
 140
 141 static struct src_register
 142 scalar( struct src_register src,
 143         int comp )
 144 {
 145    return swizzle( src, comp, comp, comp, comp );
 146 }
 147
 148 static INLINE boolean
 149 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 150 {
 151    int i;
 152
 153    for (i = 0; i < emit->num_arl_consts; ++i) {
 154       if (emit->arl_consts[i].arl_num == emit->current_arl)
 155          return TRUE;
 156    }
 157    return FALSE;
 158 }
 159
 160 static INLINE int
 161 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 162 {
 163    int i;
 164
 165    for (i = 0; i < emit->num_arl_consts; ++i) {
 166       if (emit->arl_consts[i].arl_num == emit->current_arl)
 167          return emit->arl_consts[i].number;
 168    }
 169    return 0;
 170 }
 171
 172 static struct src_register
 173 translate_src_register( const struct svga_shader_emitter *emit,
 174                         const struct tgsi_full_src_register *reg )
 175 {
 176    struct src_register src;
 177
 178    switch (reg->Register.File) {
 179    case TGSI_FILE_INPUT:
 180       /* Input registers are referred to by their semantic name rather
 181        * than by index.  Use the mapping build up from the decls:
 182        */
 183       src = emit->input_map[reg->Register.Index];
 184       break;
 185
 186    case TGSI_FILE_IMMEDIATE:
 187       /* Immediates are appended after TGSI constants in the D3D
 188        * constant buffer.
 189        */
 190       src = src_register( translate_file( reg->Register.File ),
 191                           reg->Register.Index +
 192                           emit->imm_start );
 193       break;
 194
 195    default:
 196       src = src_register( translate_file( reg->Register.File ),
 197                           reg->Register.Index );
 198
 199       break;
 200    }
 201
 202    /* Indirect addressing (for coninstant buffer lookups only)
 203     */
 204    if (reg->Register.Indirect)
 205    {
 206       /* we shift the offset towards the minimum */
 207       if (svga_arl_needs_adjustment( emit )) {
 208          src.base.num -= svga_arl_adjustment( emit );
 209       }
 210       src.base.relAddr = 1;
 211
 212       /* Not really sure what should go in the second token:
 213        */
 214       src.indirect = src_token( SVGA3DREG_ADDR,
 215                                 reg->Indirect.Index );
 216
 217       src.indirect.swizzle = SWIZZLE_XXXX;
 218    }
 219
 220    src = swizzle( src,
 221                   reg->Register.SwizzleX,
 222                   reg->Register.SwizzleY,
 223                   reg->Register.SwizzleZ,
 224                   reg->Register.SwizzleW );
 225
 226    /* src.mod isn't a bitfield, unfortunately:
 227     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
 228     */
 229    if (reg->Register.Absolute) {
 230       if (reg->Register.Negate)
 231          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 232       else
 233          src.base.srcMod = SVGA3DSRCMOD_ABS;
 234    }
 235    else {
 236       if (reg->Register.Negate)
 237          src.base.srcMod = SVGA3DSRCMOD_NEG;
 238       else
 239          src.base.srcMod = SVGA3DSRCMOD_NONE;
 240    }
 241
 242    return src;
 243 }
 244
 245
 246 /*
 247  * Get a temporary register, return -1 if none available
 248  */
 249 static INLINE SVGA3dShaderDestToken
 250 get_temp( struct svga_shader_emitter *emit )
 251 {
 252    int i = emit->nr_hw_temp + emit->internal_temp_count++;
 253
 254    return dst_register( SVGA3DREG_TEMP, i );
 255 }
 256
 257 /* Release a single temp.  Currently only effective if it was the last
 258  * allocated temp, otherwise release will be delayed until the next
 259  * call to reset_temp_regs().
 260  */
 261 static INLINE void
 262 release_temp( struct svga_shader_emitter *emit,
 263               SVGA3dShaderDestToken temp )
 264 {
 265    if (temp.num == emit->internal_temp_count - 1)
 266       emit->internal_temp_count--;
 267 }
 268
 269 static void reset_temp_regs( struct svga_shader_emitter *emit )
 270 {
 271    emit->internal_temp_count = 0;
 272 }
 273
 274
 275 static boolean submit_op0( struct svga_shader_emitter *emit,
 276                            SVGA3dShaderInstToken inst,
 277                            SVGA3dShaderDestToken dest )
 278 {
 279    return (emit_instruction( emit, inst ) &&
 280            emit_dst( emit, dest ));
 281 }
 282
 283 static boolean submit_op1( struct svga_shader_emitter *emit,
 284                            SVGA3dShaderInstToken inst,
 285                            SVGA3dShaderDestToken dest,
 286                            struct src_register src0 )
 287 {
 288    return emit_op1( emit, inst, dest, src0 );
 289 }
 290
 291
 292 /* SVGA shaders may not refer to >1 constant register in a single
 293  * instruction.  This function checks for that usage and inserts a
 294  * move to temporary if detected.
 295  *
 296  * The same applies to input registers -- at most a single input
 297  * register may be read by any instruction.
 298  */
 299 static boolean submit_op2( struct svga_shader_emitter *emit,
 300                            SVGA3dShaderInstToken inst,
 301                            SVGA3dShaderDestToken dest,
 302                            struct src_register src0,
 303                            struct src_register src1 )
 304 {
 305    SVGA3dShaderDestToken temp;
 306    SVGA3dShaderRegType type0, type1;
 307    boolean need_temp = FALSE;
 308
 309    temp.value = 0;
 310    type0 = SVGA3dShaderGetRegType( src0.base.value );
 311    type1 = SVGA3dShaderGetRegType( src1.base.value );
 312
 313    if (type0 == SVGA3DREG_CONST &&
 314        type1 == SVGA3DREG_CONST &&
 315        src0.base.num != src1.base.num)
 316       need_temp = TRUE;
 317
 318    if (type0 == SVGA3DREG_INPUT &&
 319        type1 == SVGA3DREG_INPUT &&
 320        src0.base.num != src1.base.num)
 321       need_temp = TRUE;
 322
 323    if (need_temp)
 324    {
 325       temp = get_temp( emit );
 326
 327       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
 328          return FALSE;
 329
 330       src0 = src( temp );
 331    }
 332
 333    if (!emit_op2( emit, inst, dest, src0, src1 ))
 334       return FALSE;
 335
 336    if (need_temp)
 337       release_temp( emit, temp );
 338
 339    return TRUE;
 340 }
 341
 342
 343 /* SVGA shaders may not refer to >1 constant register in a single
 344  * instruction.  This function checks for that usage and inserts a
 345  * move to temporary if detected.
 346  */
 347 static boolean submit_op3( struct svga_shader_emitter *emit,
 348                            SVGA3dShaderInstToken inst,
 349                            SVGA3dShaderDestToken dest,
 350                            struct src_register src0,
 351                            struct src_register src1,
 352                            struct src_register src2 )
 353 {
 354    SVGA3dShaderDestToken temp0;
 355    SVGA3dShaderDestToken temp1;
 356    boolean need_temp0 = FALSE;
 357    boolean need_temp1 = FALSE;
 358    SVGA3dShaderRegType type0, type1, type2;
 359
 360    temp0.value = 0;
 361    temp1.value = 0;
 362    type0 = SVGA3dShaderGetRegType( src0.base.value );
 363    type1 = SVGA3dShaderGetRegType( src1.base.value );
 364    type2 = SVGA3dShaderGetRegType( src2.base.value );
 365
 366    if (inst.op != SVGA3DOP_SINCOS) {
 367       if (type0 == SVGA3DREG_CONST &&
 368           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
 369            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 370          need_temp0 = TRUE;
 371
 372       if (type1 == SVGA3DREG_CONST &&
 373           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
 374          need_temp1 = TRUE;
 375    }
 376
 377    if (type0 == SVGA3DREG_INPUT &&
 378        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
 379         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 380       need_temp0 = TRUE;
 381
 382    if (type1 == SVGA3DREG_INPUT &&
 383        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
 384       need_temp1 = TRUE;
 385
 386    if (need_temp0)
 387    {
 388       temp0 = get_temp( emit );
 389
 390       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
 391          return FALSE;
 392
 393       src0 = src( temp0 );
 394    }
 395
 396    if (need_temp1)
 397    {
 398       temp1 = get_temp( emit );
 399
 400       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
 401          return FALSE;
 402
 403       src1 = src( temp1 );
 404    }
 405
 406    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
 407       return FALSE;
 408
 409    if (need_temp1)
 410       release_temp( emit, temp1 );
 411    if (need_temp0)
 412       release_temp( emit, temp0 );
 413    return TRUE;
 414 }
 415
 416
 417
 418
 419 /* SVGA shaders may not refer to >1 constant register in a single
 420  * instruction.  This function checks for that usage and inserts a
 421  * move to temporary if detected.
 422  */
 423 static boolean submit_op4( struct svga_shader_emitter *emit,
 424                            SVGA3dShaderInstToken inst,
 425                            SVGA3dShaderDestToken dest,
 426                            struct src_register src0,
 427                            struct src_register src1,
 428                            struct src_register src2,
 429                            struct src_register src3)
 430 {
 431    SVGA3dShaderDestToken temp0;
 432    SVGA3dShaderDestToken temp3;
 433    boolean need_temp0 = FALSE;
 434    boolean need_temp3 = FALSE;
 435    SVGA3dShaderRegType type0, type1, type2, type3;
 436
 437    temp0.value = 0;
 438    temp3.value = 0;
 439    type0 = SVGA3dShaderGetRegType( src0.base.value );
 440    type1 = SVGA3dShaderGetRegType( src1.base.value );
 441    type2 = SVGA3dShaderGetRegType( src2.base.value );
 442    type3 = SVGA3dShaderGetRegType( src2.base.value );
 443
 444    /* Make life a little easier - this is only used by the TXD
 445     * instruction which is guaranteed not to have a constant/input reg
 446     * in one slot at least:
 447     */
 448    assert(type1 == SVGA3DREG_SAMPLER);
 449
 450    if (type0 == SVGA3DREG_CONST &&
 451        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
 452         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 453       need_temp0 = TRUE;
 454
 455    if (type3 == SVGA3DREG_CONST &&
 456        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
 457       need_temp3 = TRUE;
 458
 459    if (type0 == SVGA3DREG_INPUT &&
 460        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
 461         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 462       need_temp0 = TRUE;
 463
 464    if (type3 == SVGA3DREG_INPUT &&
 465        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
 466       need_temp3 = TRUE;
 467
 468    if (need_temp0)
 469    {
 470       temp0 = get_temp( emit );
 471
 472       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
 473          return FALSE;
 474
 475       src0 = src( temp0 );
 476    }
 477
 478    if (need_temp3)
 479    {
 480       temp3 = get_temp( emit );
 481
 482       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
 483          return FALSE;
 484
 485       src3 = src( temp3 );
 486    }
 487
 488    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
 489       return FALSE;
 490
 491    if (need_temp3)
 492       release_temp( emit, temp3 );
 493    if (need_temp0)
 494       release_temp( emit, temp0 );
 495    return TRUE;
 496 }
 497
 498
 499 static boolean emit_def_const( struct svga_shader_emitter *emit,
 500                                SVGA3dShaderConstType type,
 501                                unsigned idx,
 502                                float a,
 503                                float b,
 504                                float c,
 505                                float d )
 506 {
 507    SVGA3DOpDefArgs def;
 508    SVGA3dShaderInstToken opcode;
 509
 510    switch (type) {
 511    case SVGA3D_CONST_TYPE_FLOAT:
 512       opcode = inst_token( SVGA3DOP_DEF );
 513       def.dst = dst_register( SVGA3DREG_CONST, idx );
 514       def.constValues[0] = a;
 515       def.constValues[1] = b;
 516       def.constValues[2] = c;
 517       def.constValues[3] = d;
 518       break;
 519    case SVGA3D_CONST_TYPE_INT:
 520       opcode = inst_token( SVGA3DOP_DEFI );
 521       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
 522       def.constIValues[0] = (int)a;
 523       def.constIValues[1] = (int)b;
 524       def.constIValues[2] = (int)c;
 525       def.constIValues[3] = (int)d;
 526       break;
 527    default:
 528       assert(0);
 529       opcode = inst_token( SVGA3DOP_NOP );
 530       break;
 531    }
 532
 533    if (!emit_instruction(emit, opcode) ||
 534        !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
 535       return FALSE;
 536
 537    return TRUE;
 538 }
 539
 540 static INLINE boolean
 541 create_zero_immediate( struct svga_shader_emitter *emit )
 542 {
 543    unsigned idx = emit->nr_hw_const++;
 544
 545    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 546                         idx, 0, 0, 0, 1 ))
 547       return FALSE;
 548
 549    emit->zero_immediate_idx = idx;
 550    emit->created_zero_immediate = TRUE;
 551
 552    return TRUE;
 553 }
 554
 555 static INLINE boolean
 556 create_loop_const( struct svga_shader_emitter *emit )
 557 {
 558    unsigned idx = emit->nr_hw_const++;
 559
 560    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
 561                         255, /* iteration count */
 562                         0, /* initial value */
 563                         1, /* step size */
 564                         0 /* not used, must be 0 */))
 565       return FALSE;
 566
 567    emit->loop_const_idx = idx;
 568    emit->created_loop_const = TRUE;
 569
 570    return TRUE;
 571 }
 572
 573 static INLINE boolean
 574 create_sincos_consts( struct svga_shader_emitter *emit )
 575 {
 576    unsigned idx = emit->nr_hw_const++;
 577
 578    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 579                         -1.5500992e-006f,
 580                         -2.1701389e-005f,
 581                         0.0026041667f,
 582                         0.00026041668f ))
 583       return FALSE;
 584
 585    emit->sincos_consts_idx = idx;
 586    idx = emit->nr_hw_const++;
 587
 588    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 589                         -0.020833334f,
 590                         -0.12500000f,
 591                         1.0f,
 592                         0.50000000f ))
 593       return FALSE;
 594
 595    emit->created_sincos_consts = TRUE;
 596
 597    return TRUE;
 598 }
 599
 600 static INLINE boolean
 601 create_arl_consts( struct svga_shader_emitter *emit )
 602 {
 603    int i;
 604
 605    for (i = 0; i < emit->num_arl_consts; i += 4) {
 606       int j;
 607       unsigned idx = emit->nr_hw_const++;
 608       float vals[4];
 609       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
 610          vals[j] = emit->arl_consts[i + j].number;
 611          emit->arl_consts[i + j].idx = idx;
 612          switch (j) {
 613          case 0:
 614             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
 615             break;
 616          case 1:
 617             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
 618             break;
 619          case 2:
 620             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
 621             break;
 622          case 3:
 623             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
 624             break;
 625          }
 626       }
 627       while (j < 4)
 628          vals[j++] = 0;
 629
 630       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 631                            vals[0], vals[1],
 632                            vals[2], vals[3]))
 633          return FALSE;
 634    }
 635
 636    return TRUE;
 637 }
 638
 639 static INLINE struct src_register
 640 get_vface( struct svga_shader_emitter *emit )
 641 {
 642    assert(emit->emitted_vface);
 643    return src_register(SVGA3DREG_MISCTYPE,
 644                        SVGA3DMISCREG_FACE);
 645 }
 646
 647 /* returns {0, 0, 0, 1} immediate */
 648 static INLINE struct src_register
 649 get_zero_immediate( struct svga_shader_emitter *emit )
 650 {
 651    assert(emit->created_zero_immediate);
 652    assert(emit->zero_immediate_idx >= 0);
 653    return src_register( SVGA3DREG_CONST,
 654                         emit->zero_immediate_idx );
 655 }
 656
 657 /* returns the loop const */
 658 static INLINE struct src_register
 659 get_loop_const( struct svga_shader_emitter *emit )
 660 {
 661    assert(emit->created_loop_const);
 662    assert(emit->loop_const_idx >= 0);
 663    return src_register( SVGA3DREG_CONSTINT,
 664                         emit->loop_const_idx );
 665 }
 666
 667 /* returns a sincos const */
 668 static INLINE struct src_register
 669 get_sincos_const( struct svga_shader_emitter *emit,
 670                   unsigned index )
 671 {
 672    assert(emit->created_sincos_consts);
 673    assert(emit->sincos_consts_idx >= 0);
 674    assert(index == 0 || index == 1);
 675    return src_register( SVGA3DREG_CONST,
 676                         emit->sincos_consts_idx + index );
 677 }
 678
 679 static INLINE struct src_register
 680 get_fake_arl_const( struct svga_shader_emitter *emit )
 681 {
 682    struct src_register reg;
 683    int idx = 0, swizzle = 0, i;
 684
 685    for (i = 0; i < emit->num_arl_consts; ++ i) {
 686       if (emit->arl_consts[i].arl_num == emit->current_arl) {
 687          idx = emit->arl_consts[i].idx;
 688          swizzle = emit->arl_consts[i].swizzle;
 689       }
 690    }
 691
 692    reg = src_register( SVGA3DREG_CONST, idx );
 693    return scalar(reg, swizzle);
 694 }
 695
 696 static INLINE struct src_register
 697 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
 698 {
 699    int idx;
 700    struct src_register reg;
 701
 702    /* the width/height indexes start right after constants */
 703    idx = emit->key.fkey.tex[sampler_num].width_height_idx +
 704          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
 705
 706    reg = src_register( SVGA3DREG_CONST, idx );
 707    return reg;
 708 }
 709
 710 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
 711                              const struct tgsi_full_instruction *insn)
 712 {
 713    const struct src_register src0 = translate_src_register(
 714       emit, &insn->Src[0] );
 715    struct src_register src1 = get_fake_arl_const( emit );
 716    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 717    SVGA3dShaderDestToken tmp = get_temp( emit );
 718
 719    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
 720       return FALSE;
 721
 722    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
 723                     src1))
 724       return FALSE;
 725
 726    /* replicate the original swizzle */
 727    src1 = src(tmp);
 728    src1.base.swizzle = src0.base.swizzle;
 729
 730    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
 731                       dst, src1 );
 732 }
 733
 734 static boolean emit_if(struct svga_shader_emitter *emit,
 735                        const struct tgsi_full_instruction *insn)
 736 {
 737    const struct src_register src = translate_src_register(
 738       emit, &insn->Src[0] );
 739    struct src_register zero = get_zero_immediate( emit );
 740    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
 741
 742    if_token.control = SVGA3DOPCOMPC_NE;
 743    zero = scalar(zero, TGSI_SWIZZLE_X);
 744
 745    emit->dynamic_branching_level++;
 746
 747    return (emit_instruction( emit, if_token ) &&
 748            emit_src( emit, src ) &&
 749            emit_src( emit, zero ) );
 750 }
 751
 752 static boolean emit_endif(struct svga_shader_emitter *emit,
 753                        const struct tgsi_full_instruction *insn)
 754 {
 755    emit->dynamic_branching_level--;
 756
 757    return (emit_instruction( emit,
 758                              inst_token( SVGA3DOP_ENDIF )));
 759 }
 760
 761 static boolean emit_else(struct svga_shader_emitter *emit,
 762                          const struct tgsi_full_instruction *insn)
 763 {
 764    return (emit_instruction( emit,
 765                              inst_token( SVGA3DOP_ELSE )));
 766 }
 767
 768 /* Translate the following TGSI FLR instruction.
 769  *    FLR  DST, SRC
 770  * To the following SVGA3D instruction sequence.
 771  *    FRC  TMP, SRC
 772  *    SUB  DST, SRC, TMP
 773  */
 774 static boolean emit_floor(struct svga_shader_emitter *emit,
 775                           const struct tgsi_full_instruction *insn )
 776 {
 777    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 778    const struct src_register src0 = translate_src_register(
 779       emit, &insn->Src[0] );
 780    SVGA3dShaderDestToken temp = get_temp( emit );
 781
 782    /* FRC  TMP, SRC */
 783    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
 784       return FALSE;
 785
 786    /* SUB  DST, SRC, TMP */
 787    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
 788                     negate( src( temp ) ) ))
 789       return FALSE;
 790
 791    return TRUE;
 792 }
 793
 794
 795 /* Translate the following TGSI CMP instruction.
 796  *    CMP  DST, SRC0, SRC1, SRC2
 797  * To the following SVGA3D instruction sequence.
 798  *    CMP  DST, SRC0, SRC2, SRC1
 799  */
 800 static boolean emit_cmp(struct svga_shader_emitter *emit,
 801                           const struct tgsi_full_instruction *insn )
 802 {
 803    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 804    const struct src_register src0 = translate_src_register(
 805       emit, &insn->Src[0] );
 806    const struct src_register src1 = translate_src_register(
 807       emit, &insn->Src[1] );
 808    const struct src_register src2 = translate_src_register(
 809       emit, &insn->Src[2] );
 810
 811    /* CMP  DST, SRC0, SRC2, SRC1 */
 812    return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
 813 }
 814
 815
 816
 817 /* Translate the following TGSI DIV instruction.
 818  *    DIV  DST.xy, SRC0, SRC1
 819  * To the following SVGA3D instruction sequence.
 820  *    RCP  TMP.x, SRC1.xxxx
 821  *    RCP  TMP.y, SRC1.yyyy
 822  *    MUL  DST.xy, SRC0, TMP
 823  */
 824 static boolean emit_div(struct svga_shader_emitter *emit,
 825                         const struct tgsi_full_instruction *insn )
 826 {
 827    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 828    const struct src_register src0 = translate_src_register(
 829       emit, &insn->Src[0] );
 830    const struct src_register src1 = translate_src_register(
 831       emit, &insn->Src[1] );
 832    SVGA3dShaderDestToken temp = get_temp( emit );
 833    int i;
 834
 835    /* For each enabled element, perform a RCP instruction.  Note that
 836     * RCP is scalar in SVGA3D:
 837     */
 838    for (i = 0; i < 4; i++) {
 839       unsigned channel = 1 << i;
 840       if (dst.mask & channel) {
 841          /* RCP  TMP.?, SRC1.???? */
 842          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
 843                           writemask(temp, channel),
 844                           scalar(src1, i) ))
 845             return FALSE;
 846       }
 847    }
 848
 849    /* Then multiply them out with a single mul:
 850     *
 851     * MUL  DST, SRC0, TMP
 852     */
 853    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
 854                     src( temp ) ))
 855       return FALSE;
 856
 857    return TRUE;
 858 }
 859
 860 /* Translate the following TGSI DP2 instruction.
 861  *    DP2  DST, SRC1, SRC2
 862  * To the following SVGA3D instruction sequence.
 863  *    MUL  TMP, SRC1, SRC2
 864  *    ADD  DST, TMP.xxxx, TMP.yyyy
 865  */
 866 static boolean emit_dp2(struct svga_shader_emitter *emit,
 867                         const struct tgsi_full_instruction *insn )
 868 {
 869    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 870    const struct src_register src0 = translate_src_register(
 871       emit, &insn->Src[0] );
 872    const struct src_register src1 = translate_src_register(
 873       emit, &insn->Src[1] );
 874    SVGA3dShaderDestToken temp = get_temp( emit );
 875    struct src_register temp_src0, temp_src1;
 876
 877    /* MUL  TMP, SRC1, SRC2 */
 878    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
 879       return FALSE;
 880
 881    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
 882    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
 883
 884    /* ADD  DST, TMP.xxxx, TMP.yyyy */
 885    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
 886                     temp_src0, temp_src1 ))
 887       return FALSE;
 888
 889    return TRUE;
 890 }
 891
 892
 893 /* Translate the following TGSI DPH instruction.
 894  *    DPH  DST, SRC1, SRC2
 895  * To the following SVGA3D instruction sequence.
 896  *    DP3  TMP, SRC1, SRC2
 897  *    ADD  DST, TMP, SRC2.wwww
 898  */
 899 static boolean emit_dph(struct svga_shader_emitter *emit,
 900                         const struct tgsi_full_instruction *insn )
 901 {
 902    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 903    const struct src_register src0 = translate_src_register(
 904       emit, &insn->Src[0] );
 905    struct src_register src1 = translate_src_register(
 906       emit, &insn->Src[1] );
 907    SVGA3dShaderDestToken temp = get_temp( emit );
 908
 909    /* DP3  TMP, SRC1, SRC2 */
 910    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
 911       return FALSE;
 912
 913    src1 = scalar(src1, TGSI_SWIZZLE_W);
 914
 915    /* ADD  DST, TMP, SRC2.wwww */
 916    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
 917                     src( temp ), src1 ))
 918       return FALSE;
 919
 920    return TRUE;
 921 }
 922
 923 /* Translate the following TGSI DST instruction.
 924  *    NRM  DST, SRC
 925  * To the following SVGA3D instruction sequence.
 926  *    DP3  TMP, SRC, SRC
 927  *    RSQ  TMP, TMP
 928  *    MUL  DST, SRC, TMP
 929  */
 930 static boolean emit_nrm(struct svga_shader_emitter *emit,
 931                         const struct tgsi_full_instruction *insn )
 932 {
 933    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 934    const struct src_register src0 = translate_src_register(
 935       emit, &insn->Src[0] );
 936    SVGA3dShaderDestToken temp = get_temp( emit );
 937
 938    /* DP3  TMP, SRC, SRC */
 939    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
 940       return FALSE;
 941
 942    /* RSQ  TMP, TMP */
 943    if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
 944       return FALSE;
 945
 946    /* MUL  DST, SRC, TMP */
 947    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
 948                     src0, src( temp )))
 949       return FALSE;
 950
 951    return TRUE;
 952
 953 }
 954
 955 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
 956                               SVGA3dShaderDestToken dst,
 957                               struct src_register src0)
 958 {
 959    src0 = scalar(src0, TGSI_SWIZZLE_X);
 960
 961    if (emit->use_sm30) {
 962       return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
 963                          dst, src0 );
 964    } else {
 965       struct src_register const1 = get_sincos_const( emit, 0 );
 966       struct src_register const2 = get_sincos_const( emit, 1 );
 967
 968       return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
 969                          dst, src0, const1, const2 );
 970    }
 971 }
 972
 973 static boolean emit_sincos(struct svga_shader_emitter *emit,
 974                            const struct tgsi_full_instruction *insn)
 975 {
 976    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 977    struct src_register src0 = translate_src_register(
 978       emit, &insn->Src[0] );
 979    SVGA3dShaderDestToken temp = get_temp( emit );
 980
 981    /* SCS TMP SRC */
 982    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
 983       return FALSE;
 984
 985    /* MOV DST TMP */
 986    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
 987       return FALSE;
 988
 989    return TRUE;
 990 }
 991
 992 /*
 993  * SCS TMP SRC
 994  * MOV DST TMP.yyyy
 995  */
 996 static boolean emit_sin(struct svga_shader_emitter *emit,
 997                         const struct tgsi_full_instruction *insn )
 998 {
 999    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1000    struct src_register src0 = translate_src_register(
1001       emit, &insn->Src[0] );
1002    SVGA3dShaderDestToken temp = get_temp( emit );
1003
1004    /* SCS TMP SRC */
1005    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1006       return FALSE;
1007
1008    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1009
1010    /* MOV DST TMP.yyyy */
1011    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1012       return FALSE;
1013
1014    return TRUE;
1015 }
1016
1017 /*
1018  * SCS TMP SRC
1019  * MOV DST TMP.xxxx
1020  */
1021 static boolean emit_cos(struct svga_shader_emitter *emit,
1022                         const struct tgsi_full_instruction *insn )
1023 {
1024    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1025    struct src_register src0 = translate_src_register(
1026       emit, &insn->Src[0] );
1027    SVGA3dShaderDestToken temp = get_temp( emit );
1028
1029    /* SCS TMP SRC */
1030    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1031       return FALSE;
1032
1033    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1034
1035    /* MOV DST TMP.xxxx */
1036    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1037       return FALSE;
1038
1039    return TRUE;
1040 }
1041
1042
1043 /*
1044  * ADD DST SRC0, negate(SRC0)
1045  */
1046 static boolean emit_sub(struct svga_shader_emitter *emit,
1047                         const struct tgsi_full_instruction *insn)
1048 {
1049    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1050    struct src_register src0 = translate_src_register(
1051       emit, &insn->Src[0] );
1052    struct src_register src1 = translate_src_register(
1053       emit, &insn->Src[1] );
1054
1055    src1 = negate(src1);
1056
1057    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1058                     src0, src1 ))
1059       return FALSE;
1060
1061    return TRUE;
1062 }
1063
1064
1065 static boolean emit_kil(struct svga_shader_emitter *emit,
1066                         const struct tgsi_full_instruction *insn )
1067 {
1068    SVGA3dShaderInstToken inst;
1069    const struct tgsi_full_src_register *reg = &insn->Src[0];
1070    struct src_register src0;
1071
1072    inst = inst_token( SVGA3DOP_TEXKILL );
1073    src0 = translate_src_register( emit, reg );
1074
1075    if (reg->Register.Absolute ||
1076        reg->Register.Negate ||
1077        reg->Register.Indirect ||
1078        reg->Register.SwizzleX != 0 ||
1079        reg->Register.SwizzleY != 1 ||
1080        reg->Register.SwizzleZ != 2 ||
1081        reg->Register.File != TGSI_FILE_TEMPORARY)
1082    {
1083       SVGA3dShaderDestToken temp = get_temp( emit );
1084
1085       submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
1086       src0 = src( temp );
1087    }
1088
1089    return submit_op0( emit, inst, dst(src0) );
1090 }
1091
1092
1093 /* mesa state tracker always emits kilp as an unconditional
1094  * kil */
1095 static boolean emit_kilp(struct svga_shader_emitter *emit,
1096                         const struct tgsi_full_instruction *insn )
1097 {
1098    SVGA3dShaderInstToken inst;
1099    SVGA3dShaderDestToken temp;
1100    struct src_register one = scalar( get_zero_immediate( emit ),
1101                                      TGSI_SWIZZLE_W );
1102
1103    inst = inst_token( SVGA3DOP_TEXKILL );
1104
1105    /* texkill doesn't allow negation on the operand so lets move
1106     * negation of {1} to a temp register */
1107    temp = get_temp( emit );
1108    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1109                     negate( one ) ))
1110       return FALSE;
1111
1112    return submit_op0( emit, inst, temp );
1113 }
1114
1115 /* Implement conditionals by initializing destination reg to 'fail',
1116  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1117  * based on predicate reg.
1118  *
1119  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1120  * MOV dst, fail
1121  * MOV dst, pass, p0
1122  */
1123 static boolean
1124 emit_conditional(struct svga_shader_emitter *emit,
1125                  unsigned compare_func,
1126                  SVGA3dShaderDestToken dst,
1127                  struct src_register src0,
1128                  struct src_register src1,
1129                  struct src_register pass,
1130                  struct src_register fail)
1131 {
1132    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1133    SVGA3dShaderInstToken setp_token, mov_token;
1134    setp_token = inst_token( SVGA3DOP_SETP );
1135
1136    switch (compare_func) {
1137    case PIPE_FUNC_NEVER:
1138       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1139                          dst, fail );
1140       break;
1141    case PIPE_FUNC_LESS:
1142       setp_token.control = SVGA3DOPCOMP_LT;
1143       break;
1144    case PIPE_FUNC_EQUAL:
1145       setp_token.control = SVGA3DOPCOMP_EQ;
1146       break;
1147    case PIPE_FUNC_LEQUAL:
1148       setp_token.control = SVGA3DOPCOMP_LE;
1149       break;
1150    case PIPE_FUNC_GREATER:
1151       setp_token.control = SVGA3DOPCOMP_GT;
1152       break;
1153    case PIPE_FUNC_NOTEQUAL:
1154       setp_token.control = SVGA3DOPCOMPC_NE;
1155       break;
1156    case PIPE_FUNC_GEQUAL:
1157       setp_token.control = SVGA3DOPCOMP_GE;
1158       break;
1159    case PIPE_FUNC_ALWAYS:
1160       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1161                          dst, pass );
1162       break;
1163    }
1164
1165    /* SETP src0, COMPOP, src1 */
1166    if (!submit_op2( emit, setp_token, pred_reg,
1167                     src0, src1 ))
1168       return FALSE;
1169
1170    mov_token = inst_token( SVGA3DOP_MOV );
1171
1172    /* MOV dst, fail */
1173    if (!submit_op1( emit, mov_token, dst,
1174                     fail ))
1175       return FALSE;
1176
1177    /* MOV dst, pass (predicated)
1178     *
1179     * Note that the predicate reg (and possible modifiers) is passed
1180     * as the first source argument.
1181     */
1182    mov_token.predicated = 1;
1183    if (!submit_op2( emit, mov_token, dst,
1184                     src( pred_reg ), pass ))
1185       return FALSE;
1186
1187    return TRUE;
1188 }
1189
1190
1191 static boolean
1192 emit_select(struct svga_shader_emitter *emit,
1193             unsigned compare_func,
1194             SVGA3dShaderDestToken dst,
1195             struct src_register src0,
1196             struct src_register src1 )
1197 {
1198    /* There are some SVGA instructions which implement some selects
1199     * directly, but they are only available in the vertex shader.
1200     */
1201    if (emit->unit == PIPE_SHADER_VERTEX) {
1202       switch (compare_func) {
1203       case PIPE_FUNC_GEQUAL:
1204          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1205       case PIPE_FUNC_LEQUAL:
1206          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1207       case PIPE_FUNC_GREATER:
1208          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1209       case PIPE_FUNC_LESS:
1210          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1211       default:
1212          break;
1213       }
1214    }
1215
1216
1217    /* Otherwise, need to use the setp approach:
1218     */
1219    {
1220       struct src_register one, zero;
1221       /* zero immediate is 0,0,0,1 */
1222       zero = get_zero_immediate( emit );
1223       one  = scalar( zero, TGSI_SWIZZLE_W );
1224       zero = scalar( zero, TGSI_SWIZZLE_X );
1225
1226       return emit_conditional(
1227          emit,
1228          compare_func,
1229          dst,
1230          src0,
1231          src1,
1232          one, zero);
1233    }
1234 }
1235
1236
1237 static boolean emit_select_op(struct svga_shader_emitter *emit,
1238                               unsigned compare,
1239                               const struct tgsi_full_instruction *insn)
1240 {
1241    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1242    struct src_register src0 = translate_src_register(
1243       emit, &insn->Src[0] );
1244    struct src_register src1 = translate_src_register(
1245       emit, &insn->Src[1] );
1246
1247    return emit_select( emit, compare, dst, src0, src1 );
1248 }
1249
1250
1251 /* Translate texture instructions to SVGA3D representation.
1252  */
1253 static boolean emit_tex2(struct svga_shader_emitter *emit,
1254                          const struct tgsi_full_instruction *insn,
1255                          SVGA3dShaderDestToken dst )
1256 {
1257    SVGA3dShaderInstToken inst;
1258    struct src_register texcoord;
1259    struct src_register sampler;
1260    SVGA3dShaderDestToken tmp;
1261
1262    inst.value = 0;
1263
1264    switch (insn->Instruction.Opcode) {
1265    case TGSI_OPCODE_TEX:
1266       inst.op = SVGA3DOP_TEX;
1267       break;
1268    case TGSI_OPCODE_TXP:
1269       inst.op = SVGA3DOP_TEX;
1270       inst.control = SVGA3DOPCONT_PROJECT;
1271       break;
1272    case TGSI_OPCODE_TXB:
1273       inst.op = SVGA3DOP_TEX;
1274       inst.control = SVGA3DOPCONT_BIAS;
1275       break;
1276    case TGSI_OPCODE_TXL:
1277       inst.op = SVGA3DOP_TEXLDL;
1278       break;
1279    default:
1280       assert(0);
1281       return FALSE;
1282    }
1283
1284    texcoord = translate_src_register( emit, &insn->Src[0] );
1285    sampler = translate_src_register( emit, &insn->Src[1] );
1286
1287    if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1288        emit->dynamic_branching_level > 0)
1289       tmp = get_temp( emit );
1290
1291    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1292     * zero in that case.
1293     */
1294    if (emit->dynamic_branching_level > 0 &&
1295        inst.op == SVGA3DOP_TEX &&
1296        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1297       struct src_register zero = get_zero_immediate( emit );
1298
1299       /* MOV  tmp, texcoord */
1300       if (!submit_op1( emit,
1301                        inst_token( SVGA3DOP_MOV ),
1302                        tmp,
1303                        texcoord ))
1304          return FALSE;
1305
1306       /* MOV  tmp.w, zero */
1307       if (!submit_op1( emit,
1308                        inst_token( SVGA3DOP_MOV ),
1309                        writemask( tmp, TGSI_WRITEMASK_W ),
1310                        scalar( zero, TGSI_SWIZZLE_X )))
1311          return FALSE;
1312
1313       texcoord = src( tmp );
1314       inst.op = SVGA3DOP_TEXLDL;
1315    }
1316
1317    /* Explicit normalization of texcoords:
1318     */
1319    if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1320       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1321
1322       /* MUL  tmp, SRC0, WH */
1323       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1324                        tmp, texcoord, wh ))
1325          return FALSE;
1326
1327       texcoord = src( tmp );
1328    }
1329
1330    return submit_op2( emit, inst, dst, texcoord, sampler );
1331 }
1332
1333
1334
1335
1336 /* Translate texture instructions to SVGA3D representation.
1337  */
1338 static boolean emit_tex4(struct svga_shader_emitter *emit,
1339                          const struct tgsi_full_instruction *insn,
1340                          SVGA3dShaderDestToken dst )
1341 {
1342    SVGA3dShaderInstToken inst;
1343    struct src_register texcoord;
1344    struct src_register ddx;
1345    struct src_register ddy;
1346    struct src_register sampler;
1347
1348    texcoord = translate_src_register( emit, &insn->Src[0] );
1349    ddx      = translate_src_register( emit, &insn->Src[1] );
1350    ddy      = translate_src_register( emit, &insn->Src[2] );
1351    sampler  = translate_src_register( emit, &insn->Src[3] );
1352
1353    inst.value = 0;
1354
1355    switch (insn->Instruction.Opcode) {
1356    case TGSI_OPCODE_TXD:
1357       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1358       break;
1359    default:
1360       assert(0);
1361       return FALSE;
1362    }
1363
1364    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1365 }
1366
1367
1368 static boolean emit_tex(struct svga_shader_emitter *emit,
1369                         const struct tgsi_full_instruction *insn )
1370 {
1371    SVGA3dShaderDestToken dst =
1372       translate_dst_register( emit, insn, 0 );
1373    struct src_register src0 =
1374       translate_src_register( emit, &insn->Src[0] );
1375    struct src_register src1 =
1376       translate_src_register( emit, &insn->Src[1] );
1377
1378    SVGA3dShaderDestToken tex_result;
1379
1380    /* check for shadow samplers */
1381    boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
1382                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
1383
1384
1385    /* If doing compare processing, need to put this value into a
1386     * temporary so it can be used as a source later on.
1387     */
1388    if (compare ||
1389        (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
1390       tex_result = get_temp( emit );
1391    }
1392    else {
1393       tex_result = dst;
1394    }
1395
1396    switch(insn->Instruction.Opcode) {
1397    case TGSI_OPCODE_TEX:
1398    case TGSI_OPCODE_TXB:
1399    case TGSI_OPCODE_TXP:
1400    case TGSI_OPCODE_TXL:
1401       if (!emit_tex2( emit, insn, tex_result ))
1402          return FALSE;
1403       break;
1404    case TGSI_OPCODE_TXD:
1405       if (!emit_tex4( emit, insn, tex_result ))
1406          return FALSE;
1407       break;
1408    default:
1409       assert(0);
1410    }
1411
1412
1413    if (compare) {
1414       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1415          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1416          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1417
1418          /* Divide texcoord R by Q */
1419          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1420                           writemask(src0_zdivw, TGSI_WRITEMASK_X),
1421                           scalar(src0, TGSI_SWIZZLE_W) ))
1422             return FALSE;
1423
1424          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1425                           writemask(src0_zdivw, TGSI_WRITEMASK_X),
1426                           scalar(src0, TGSI_SWIZZLE_Z),
1427                           scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1428             return FALSE;
1429
1430          if (!emit_select(
1431                 emit,
1432                 emit->key.fkey.tex[src1.base.num].compare_func,
1433                 writemask( dst, TGSI_WRITEMASK_XYZ ),
1434                 scalar(src(src0_zdivw), TGSI_SWIZZLE_X),
1435                 tex_src_x))
1436             return FALSE;
1437       }
1438
1439       if (dst.mask & TGSI_WRITEMASK_W) {
1440          struct src_register one =
1441             scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1442
1443         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1444                          writemask( dst, TGSI_WRITEMASK_W ),
1445                          one ))
1446            return FALSE;
1447       }
1448
1449       return TRUE;
1450    }
1451    else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW)
1452    {
1453       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1454          return FALSE;
1455    }
1456
1457    return TRUE;
1458 }
1459
1460 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1461                               const struct tgsi_full_instruction *insn )
1462 {
1463    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1464    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1465    struct src_register const_int = get_loop_const( emit );
1466
1467    emit->dynamic_branching_level++;
1468
1469    return (emit_instruction( emit, inst ) &&
1470            emit_src( emit, loop_reg ) &&
1471            emit_src( emit, const_int ) );
1472 }
1473
1474 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1475                               const struct tgsi_full_instruction *insn )
1476 {
1477    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1478
1479    emit->dynamic_branching_level--;
1480
1481    return emit_instruction( emit, inst );
1482 }
1483
1484 static boolean emit_brk( struct svga_shader_emitter *emit,
1485                          const struct tgsi_full_instruction *insn )
1486 {
1487    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1488    return emit_instruction( emit, inst );
1489 }
1490
1491 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1492                                 unsigned opcode,
1493                                 const struct tgsi_full_instruction *insn )
1494 {
1495    SVGA3dShaderInstToken inst;
1496    SVGA3dShaderDestToken dst;
1497    struct src_register src;
1498
1499    inst = inst_token( opcode );
1500    dst = translate_dst_register( emit, insn, 0 );
1501    src = translate_src_register( emit, &insn->Src[0] );
1502    src = scalar( src, TGSI_SWIZZLE_X );
1503
1504    return submit_op1( emit, inst, dst, src );
1505 }
1506
1507
1508 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1509                                        unsigned opcode,
1510                                        const struct tgsi_full_instruction *insn )
1511 {
1512    const struct tgsi_full_src_register *src = insn->Src;
1513    SVGA3dShaderInstToken inst;
1514    SVGA3dShaderDestToken dst;
1515
1516    inst = inst_token( opcode );
1517    dst = translate_dst_register( emit, insn, 0 );
1518
1519    switch (insn->Instruction.NumSrcRegs) {
1520    case 0:
1521       return submit_op0( emit, inst, dst );
1522    case 1:
1523       return submit_op1( emit, inst, dst,
1524                          translate_src_register( emit, &src[0] ));
1525    case 2:
1526       return submit_op2( emit, inst, dst,
1527                          translate_src_register( emit, &src[0] ),
1528                          translate_src_register( emit, &src[1] ) );
1529    case 3:
1530       return submit_op3( emit, inst, dst,
1531                          translate_src_register( emit, &src[0] ),
1532                          translate_src_register( emit, &src[1] ),
1533                          translate_src_register( emit, &src[2] ) );
1534    default:
1535       assert(0);
1536       return FALSE;
1537    }
1538 }
1539
1540
1541 static boolean emit_deriv(struct svga_shader_emitter *emit,
1542                           const struct tgsi_full_instruction *insn )
1543 {
1544    if (emit->dynamic_branching_level > 0 &&
1545        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1546    {
1547       struct src_register zero = get_zero_immediate( emit );
1548       SVGA3dShaderDestToken dst =
1549          translate_dst_register( emit, insn, 0 );
1550
1551       /* Deriv opcodes not valid inside dynamic branching, workaround
1552        * by zeroing out the destination.
1553        */
1554       if (!submit_op1(emit,
1555                       inst_token( SVGA3DOP_MOV ),
1556                       dst,
1557                       scalar(zero, TGSI_SWIZZLE_X)))
1558          return FALSE;
1559
1560       return TRUE;
1561    }
1562    else {
1563       unsigned opcode;
1564
1565       switch (insn->Instruction.Opcode) {
1566       case TGSI_OPCODE_DDX:
1567          opcode = SVGA3DOP_DSX;
1568          break;
1569       case TGSI_OPCODE_DDY:
1570          opcode = SVGA3DOP_DSY;
1571          break;
1572       default:
1573          return FALSE;
1574       }
1575
1576       return emit_simple_instruction( emit, opcode, insn );
1577    }
1578 }
1579
1580 static boolean emit_arl(struct svga_shader_emitter *emit,
1581                         const struct tgsi_full_instruction *insn)
1582 {
1583    ++emit->current_arl;
1584    if (svga_arl_needs_adjustment( emit )) {
1585       return emit_fake_arl( emit, insn );
1586    } else {
1587       /* no need to adjust, just emit straight arl */
1588       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1589    }
1590 }
1591
1592 static boolean alias_src_dst( struct src_register src,
1593                               SVGA3dShaderDestToken dst )
1594 {
1595    if (src.base.num != dst.num)
1596       return FALSE;
1597
1598    if (SVGA3dShaderGetRegType(dst.value) !=
1599        SVGA3dShaderGetRegType(src.base.value))
1600       return FALSE;
1601
1602    return TRUE;
1603 }
1604
1605 static boolean emit_pow(struct svga_shader_emitter *emit,
1606                         const struct tgsi_full_instruction *insn)
1607 {
1608    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1609    struct src_register src0 = translate_src_register(
1610       emit, &insn->Src[0] );
1611    struct src_register src1 = translate_src_register(
1612       emit, &insn->Src[1] );
1613    boolean need_tmp = FALSE;
1614
1615    /* POW can only output to a temporary */
1616    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1617       need_tmp = TRUE;
1618
1619    /* POW src1 must not be the same register as dst */
1620    if (alias_src_dst( src1, dst ))
1621       need_tmp = TRUE;
1622
1623    /* it's a scalar op */
1624    src0 = scalar( src0, TGSI_SWIZZLE_X );
1625    src1 = scalar( src1, TGSI_SWIZZLE_X );
1626
1627    if (need_tmp) {
1628       SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1629
1630       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1631          return FALSE;
1632
1633       return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1634    }
1635    else {
1636       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1637    }
1638 }
1639
1640 static boolean emit_xpd(struct svga_shader_emitter *emit,
1641                         const struct tgsi_full_instruction *insn)
1642 {
1643    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1644    const struct src_register src0 = translate_src_register(
1645       emit, &insn->Src[0] );
1646    const struct src_register src1 = translate_src_register(
1647       emit, &insn->Src[1] );
1648    boolean need_dst_tmp = FALSE;
1649
1650    /* XPD can only output to a temporary */
1651    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1652       need_dst_tmp = TRUE;
1653
1654    /* The dst reg must not be the same as src0 or src1*/
1655    if (alias_src_dst(src0, dst) ||
1656        alias_src_dst(src1, dst))
1657       need_dst_tmp = TRUE;
1658
1659    if (need_dst_tmp) {
1660       SVGA3dShaderDestToken tmp = get_temp( emit );
1661
1662       /* Obey DX9 restrictions on mask:
1663        */
1664       tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1665
1666       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1667          return FALSE;
1668
1669       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1670          return FALSE;
1671    }
1672    else {
1673       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1674          return FALSE;
1675    }
1676
1677    /* Need to emit 1.0 to dst.w?
1678     */
1679    if (dst.mask & TGSI_WRITEMASK_W) {
1680       struct src_register zero = get_zero_immediate( emit );
1681
1682       if (!submit_op1(emit,
1683                       inst_token( SVGA3DOP_MOV ),
1684                       writemask(dst, TGSI_WRITEMASK_W),
1685                       zero))
1686          return FALSE;
1687    }
1688
1689    return TRUE;
1690 }
1691
1692
1693 static boolean emit_lrp(struct svga_shader_emitter *emit,
1694                         const struct tgsi_full_instruction *insn)
1695 {
1696    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1697    SVGA3dShaderDestToken tmp;
1698    const struct src_register src0 = translate_src_register(
1699       emit, &insn->Src[0] );
1700    const struct src_register src1 = translate_src_register(
1701       emit, &insn->Src[1] );
1702    const struct src_register src2 = translate_src_register(
1703       emit, &insn->Src[2] );
1704    boolean need_dst_tmp = FALSE;
1705
1706    /* The dst reg must not be the same as src0 or src2 */
1707    if (alias_src_dst(src0, dst) ||
1708        alias_src_dst(src2, dst))
1709       need_dst_tmp = TRUE;
1710
1711    if (need_dst_tmp) {
1712       tmp = get_temp( emit );
1713       tmp.mask = dst.mask;
1714    }
1715    else {
1716       tmp = dst;
1717    }
1718
1719    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
1720       return FALSE;
1721
1722    if (need_dst_tmp) {
1723       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1724          return FALSE;
1725    }
1726
1727    return TRUE;
1728 }
1729
1730
1731 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
1732                              const struct tgsi_full_instruction *insn )
1733 {
1734    if (emit->unit == PIPE_SHADER_VERTEX) {
1735       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
1736        */
1737       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
1738    }
1739    else {
1740
1741       /* result[0] = 1    * 1;
1742        * result[1] = a[1] * b[1];
1743        * result[2] = a[2] * 1;
1744        * result[3] = 1    * b[3];
1745        */
1746
1747       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1748       SVGA3dShaderDestToken tmp;
1749       const struct src_register src0 = translate_src_register(
1750          emit, &insn->Src[0] );
1751       const struct src_register src1 = translate_src_register(
1752          emit, &insn->Src[1] );
1753       struct src_register zero = get_zero_immediate( emit );
1754       boolean need_tmp = FALSE;
1755
1756       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
1757           alias_src_dst(src0, dst) ||
1758           alias_src_dst(src1, dst))
1759          need_tmp = TRUE;
1760
1761       if (need_tmp) {
1762          tmp = get_temp( emit );
1763       }
1764       else {
1765          tmp = dst;
1766       }
1767
1768       /* tmp.xw = 1.0
1769        */
1770       if (tmp.mask & TGSI_WRITEMASK_XW) {
1771          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1772                           writemask(tmp, TGSI_WRITEMASK_XW ),
1773                           scalar( zero, 3 )))
1774             return FALSE;
1775       }
1776
1777       /* tmp.yz = src0
1778        */
1779       if (tmp.mask & TGSI_WRITEMASK_YZ) {
1780          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1781                           writemask(tmp, TGSI_WRITEMASK_YZ ),
1782                           src0))
1783             return FALSE;
1784       }
1785
1786       /* tmp.yw = tmp * src1
1787        */
1788       if (tmp.mask & TGSI_WRITEMASK_YW) {
1789          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1790                           writemask(tmp, TGSI_WRITEMASK_YW ),
1791                           src(tmp),
1792                           src1))
1793             return FALSE;
1794       }
1795
1796       /* dst = tmp
1797        */
1798       if (need_tmp) {
1799          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1800                           dst,
1801                           src(tmp)))
1802             return FALSE;
1803       }
1804    }
1805
1806    return TRUE;
1807 }
1808
1809
1810 static boolean emit_exp(struct svga_shader_emitter *emit,
1811                         const struct tgsi_full_instruction *insn)
1812 {
1813    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1814    struct src_register src0 =
1815       translate_src_register( emit, &insn->Src[0] );
1816    struct src_register zero = get_zero_immediate( emit );
1817    SVGA3dShaderDestToken fraction;
1818
1819    if (dst.mask & TGSI_WRITEMASK_Y)
1820       fraction = dst;
1821    else if (dst.mask & TGSI_WRITEMASK_X)
1822       fraction = get_temp( emit );
1823    else
1824       fraction.value = 0;
1825
1826    /* If y is being written, fill it with src0 - floor(src0).
1827     */
1828    if (dst.mask & TGSI_WRITEMASK_XY) {
1829       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
1830                        writemask( fraction, TGSI_WRITEMASK_Y ),
1831                        src0 ))
1832          return FALSE;
1833    }
1834
1835    /* If x is being written, fill it with 2 ^ floor(src0).
1836     */
1837    if (dst.mask & TGSI_WRITEMASK_X) {
1838       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
1839                        writemask( dst, TGSI_WRITEMASK_X ),
1840                        src0,
1841                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
1842          return FALSE;
1843
1844       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
1845                        writemask( dst, TGSI_WRITEMASK_X ),
1846                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
1847          return FALSE;
1848
1849       if (!(dst.mask & TGSI_WRITEMASK_Y))
1850          release_temp( emit, fraction );
1851    }
1852
1853    /* If z is being written, fill it with 2 ^ src0 (partial precision).
1854     */
1855    if (dst.mask & TGSI_WRITEMASK_Z) {
1856       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
1857                        writemask( dst, TGSI_WRITEMASK_Z ),
1858                        src0 ) )
1859          return FALSE;
1860    }
1861
1862    /* If w is being written, fill it with one.
1863     */
1864    if (dst.mask & TGSI_WRITEMASK_W) {
1865       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1866                        writemask(dst, TGSI_WRITEMASK_W),
1867                        scalar( zero, TGSI_SWIZZLE_W ) ))
1868          return FALSE;
1869    }
1870
1871    return TRUE;
1872 }
1873
1874 static boolean emit_lit(struct svga_shader_emitter *emit,
1875                              const struct tgsi_full_instruction *insn )
1876 {
1877    if (emit->unit == PIPE_SHADER_VERTEX) {
1878       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
1879        */
1880       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
1881    }
1882    else {
1883
1884       /* D3D vs. GL semantics can be fairly easily accomodated by
1885        * variations on this sequence.
1886        *
1887        * GL:
1888        *   tmp.y = src.x
1889        *   tmp.z = pow(src.y,src.w)
1890        *   p0 = src0.xxxx > 0
1891        *   result = zero.wxxw
1892        *   (p0) result.yz = tmp
1893        *
1894        * D3D:
1895        *   tmp.y = src.x
1896        *   tmp.z = pow(src.y,src.w)
1897        *   p0 = src0.xxyy > 0
1898        *   result = zero.wxxw
1899        *   (p0) result.yz = tmp
1900        *
1901        * Will implement the GL version for now.
1902        */
1903
1904       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1905       SVGA3dShaderDestToken tmp = get_temp( emit );
1906       const struct src_register src0 = translate_src_register(
1907          emit, &insn->Src[0] );
1908       struct src_register zero = get_zero_immediate( emit );
1909
1910       /* tmp = pow(src.y, src.w)
1911        */
1912       if (dst.mask & TGSI_WRITEMASK_Z) {
1913          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
1914                          tmp,
1915                          scalar(src0, 1),
1916                          scalar(src0, 3)))
1917             return FALSE;
1918       }
1919
1920       /* tmp.y = src.x
1921        */
1922       if (dst.mask & TGSI_WRITEMASK_Y) {
1923          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1924                           writemask(tmp, TGSI_WRITEMASK_Y ),
1925                           scalar(src0, 0)))
1926             return FALSE;
1927       }
1928
1929       /* Can't quite do this with emit conditional due to the extra
1930        * writemask on the predicated mov:
1931        */
1932       {
1933          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1934          SVGA3dShaderInstToken setp_token, mov_token;
1935          struct src_register predsrc;
1936
1937          setp_token = inst_token( SVGA3DOP_SETP );
1938          mov_token = inst_token( SVGA3DOP_MOV );
1939
1940          setp_token.control = SVGA3DOPCOMP_GT;
1941
1942          /* D3D vs GL semantics:
1943           */
1944          if (0)
1945             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
1946          else
1947             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
1948
1949          /* SETP src0.xxyy, GT, {0}.x */
1950          if (!submit_op2( emit, setp_token, pred_reg,
1951                           predsrc,
1952                           swizzle(zero, 0, 0, 0, 0) ))
1953             return FALSE;
1954
1955          /* MOV dst, fail */
1956          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
1957                           swizzle(zero, 3, 0, 0, 3 )))
1958              return FALSE;
1959
1960          /* MOV dst.yz, tmp (predicated)
1961           *
1962           * Note that the predicate reg (and possible modifiers) is passed
1963           * as the first source argument.
1964           */
1965          if (dst.mask & TGSI_WRITEMASK_YZ) {
1966             mov_token.predicated = 1;
1967             if (!submit_op2( emit, mov_token,
1968                              writemask(dst, TGSI_WRITEMASK_YZ),
1969                              src( pred_reg ), src( tmp ) ))
1970                return FALSE;
1971          }
1972       }
1973    }
1974
1975    return TRUE;
1976 }
1977
1978
1979
1980
1981 static boolean emit_ex2( struct svga_shader_emitter *emit,
1982                          const struct tgsi_full_instruction *insn )
1983 {
1984    SVGA3dShaderInstToken inst;
1985    SVGA3dShaderDestToken dst;
1986    struct src_register src0;
1987
1988    inst = inst_token( SVGA3DOP_EXP );
1989    dst = translate_dst_register( emit, insn, 0 );
1990    src0 = translate_src_register( emit, &insn->Src[0] );
1991    src0 = scalar( src0, TGSI_SWIZZLE_X );
1992
1993    if (dst.mask != TGSI_WRITEMASK_XYZW) {
1994       SVGA3dShaderDestToken tmp = get_temp( emit );
1995
1996       if (!submit_op1( emit, inst, tmp, src0 ))
1997          return FALSE;
1998
1999       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2000                          dst,
2001                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2002    }
2003
2004    return submit_op1( emit, inst, dst, src0 );
2005 }
2006
2007
2008 static boolean emit_log(struct svga_shader_emitter *emit,
2009                         const struct tgsi_full_instruction *insn)
2010 {
2011    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2012    struct src_register src0 =
2013       translate_src_register( emit, &insn->Src[0] );
2014    struct src_register zero = get_zero_immediate( emit );
2015    SVGA3dShaderDestToken abs_tmp;
2016    struct src_register abs_src0;
2017    SVGA3dShaderDestToken log2_abs;
2018
2019    abs_tmp.value = 0;
2020
2021    if (dst.mask & TGSI_WRITEMASK_Z)
2022       log2_abs = dst;
2023    else if (dst.mask & TGSI_WRITEMASK_XY)
2024       log2_abs = get_temp( emit );
2025    else
2026       log2_abs.value = 0;
2027
2028    /* If z is being written, fill it with log2( abs( src0 ) ).
2029     */
2030    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2031       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2032          abs_src0 = src0;
2033       else {
2034          abs_tmp = get_temp( emit );
2035
2036          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2037                           abs_tmp,
2038                           src0 ) )
2039             return FALSE;
2040
2041          abs_src0 = src( abs_tmp );
2042       }
2043
2044       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2045
2046       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2047                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2048                        abs_src0 ) )
2049          return FALSE;
2050    }
2051
2052    if (dst.mask & TGSI_WRITEMASK_XY) {
2053       SVGA3dShaderDestToken floor_log2;
2054
2055       if (dst.mask & TGSI_WRITEMASK_X)
2056          floor_log2 = dst;
2057       else
2058          floor_log2 = get_temp( emit );
2059
2060       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2061        */
2062       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2063                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2064                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2065          return FALSE;
2066
2067       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2068                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2069                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2070                        negate( src( floor_log2 ) ) ) )
2071          return FALSE;
2072
2073       /* If y is being written, fill it with
2074        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2075        */
2076       if (dst.mask & TGSI_WRITEMASK_Y) {
2077          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2078                           writemask( dst, TGSI_WRITEMASK_Y ),
2079                           negate( scalar( src( floor_log2 ),
2080                                           TGSI_SWIZZLE_X ) ) ) )
2081             return FALSE;
2082
2083          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2084                           writemask( dst, TGSI_WRITEMASK_Y ),
2085                           src( dst ),
2086                           abs_src0 ) )
2087             return FALSE;
2088       }
2089
2090       if (!(dst.mask & TGSI_WRITEMASK_X))
2091          release_temp( emit, floor_log2 );
2092
2093       if (!(dst.mask & TGSI_WRITEMASK_Z))
2094          release_temp( emit, log2_abs );
2095    }
2096
2097    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2098        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2099       release_temp( emit, abs_tmp );
2100
2101    /* If w is being written, fill it with one.
2102     */
2103    if (dst.mask & TGSI_WRITEMASK_W) {
2104       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2105                        writemask(dst, TGSI_WRITEMASK_W),
2106                        scalar( zero, TGSI_SWIZZLE_W ) ))
2107          return FALSE;
2108    }
2109
2110    return TRUE;
2111 }
2112
2113
2114 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2115                            unsigned position,
2116                            const struct tgsi_full_instruction *insn )
2117 {
2118    unsigned i;
2119
2120    /* Note that we've finished the main function and are now emitting
2121     * subroutines.  This affects how we terminate the generated
2122     * shader.
2123     */
2124    emit->in_main_func = FALSE;
2125
2126    for (i = 0; i < emit->nr_labels; i++) {
2127       if (emit->label[i] == position) {
2128          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2129                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2130                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2131       }
2132    }
2133
2134    assert(0);
2135    return TRUE;
2136 }
2137
2138 static boolean emit_call( struct svga_shader_emitter *emit,
2139                            const struct tgsi_full_instruction *insn )
2140 {
2141    unsigned position = insn->Label.Label;
2142    unsigned i;
2143
2144    for (i = 0; i < emit->nr_labels; i++) {
2145       if (emit->label[i] == position)
2146          break;
2147    }
2148
2149    if (emit->nr_labels == Elements(emit->label))
2150       return FALSE;
2151
2152    if (i == emit->nr_labels) {
2153       emit->label[i] = position;
2154       emit->nr_labels++;
2155    }
2156
2157    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2158            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2159 }
2160
2161
2162 static boolean emit_end( struct svga_shader_emitter *emit )
2163 {
2164    if (emit->unit == PIPE_SHADER_VERTEX) {
2165       return emit_vs_postamble( emit );
2166    }
2167    else {
2168       return emit_ps_postamble( emit );
2169    }
2170 }
2171
2172
2173
2174 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2175                                       unsigned position,
2176                                       const struct tgsi_full_instruction *insn )
2177 {
2178    switch (insn->Instruction.Opcode) {
2179
2180    case TGSI_OPCODE_ARL:
2181       return emit_arl( emit, insn );
2182
2183    case TGSI_OPCODE_TEX:
2184    case TGSI_OPCODE_TXB:
2185    case TGSI_OPCODE_TXP:
2186    case TGSI_OPCODE_TXL:
2187    case TGSI_OPCODE_TXD:
2188       return emit_tex( emit, insn );
2189
2190    case TGSI_OPCODE_DDX:
2191    case TGSI_OPCODE_DDY:
2192       return emit_deriv( emit, insn );
2193
2194    case TGSI_OPCODE_BGNSUB:
2195       return emit_bgnsub( emit, position, insn );
2196
2197    case TGSI_OPCODE_ENDSUB:
2198       return TRUE;
2199
2200    case TGSI_OPCODE_CAL:
2201       return emit_call( emit, insn );
2202
2203    case TGSI_OPCODE_FLR:
2204    case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
2205       return emit_floor( emit, insn );
2206
2207    case TGSI_OPCODE_CMP:
2208       return emit_cmp( emit, insn );
2209
2210    case TGSI_OPCODE_DIV:
2211       return emit_div( emit, insn );
2212
2213    case TGSI_OPCODE_DP2:
2214       return emit_dp2( emit, insn );
2215
2216    case TGSI_OPCODE_DPH:
2217       return emit_dph( emit, insn );
2218
2219    case TGSI_OPCODE_NRM:
2220       return emit_nrm( emit, insn );
2221
2222    case TGSI_OPCODE_COS:
2223       return emit_cos( emit, insn );
2224
2225    case TGSI_OPCODE_SIN:
2226       return emit_sin( emit, insn );
2227
2228    case TGSI_OPCODE_SCS:
2229       return emit_sincos( emit, insn );
2230
2231    case TGSI_OPCODE_END:
2232       /* TGSI always finishes the main func with an END */
2233       return emit_end( emit );
2234
2235    case TGSI_OPCODE_KIL:
2236       return emit_kil( emit, insn );
2237
2238       /* Selection opcodes.  The underlying language is fairly
2239        * non-orthogonal about these.
2240        */
2241    case TGSI_OPCODE_SEQ:
2242       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2243
2244    case TGSI_OPCODE_SNE:
2245       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2246
2247    case TGSI_OPCODE_SGT:
2248       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2249
2250    case TGSI_OPCODE_SGE:
2251       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2252
2253    case TGSI_OPCODE_SLT:
2254       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2255
2256    case TGSI_OPCODE_SLE:
2257       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2258
2259    case TGSI_OPCODE_SUB:
2260       return emit_sub( emit, insn );
2261
2262    case TGSI_OPCODE_POW:
2263       return emit_pow( emit, insn );
2264
2265    case TGSI_OPCODE_EX2:
2266       return emit_ex2( emit, insn );
2267
2268    case TGSI_OPCODE_EXP:
2269       return emit_exp( emit, insn );
2270
2271    case TGSI_OPCODE_LOG:
2272       return emit_log( emit, insn );
2273
2274    case TGSI_OPCODE_LG2:
2275       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2276
2277    case TGSI_OPCODE_RSQ:
2278       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2279
2280    case TGSI_OPCODE_RCP:
2281       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2282
2283    case TGSI_OPCODE_CONT:
2284    case TGSI_OPCODE_RET:
2285       /* This is a noop -- we tell mesa that we can't support RET
2286        * within a function (early return), so this will always be
2287        * followed by an ENDSUB.
2288        */
2289       return TRUE;
2290
2291       /* These aren't actually used by any of the frontends we care
2292        * about:
2293        */
2294    case TGSI_OPCODE_CLAMP:
2295    case TGSI_OPCODE_ROUND:
2296    case TGSI_OPCODE_AND:
2297    case TGSI_OPCODE_OR:
2298    case TGSI_OPCODE_I2F:
2299    case TGSI_OPCODE_NOT:
2300    case TGSI_OPCODE_SHL:
2301    case TGSI_OPCODE_ISHR:
2302    case TGSI_OPCODE_XOR:
2303       return FALSE;
2304
2305    case TGSI_OPCODE_IF:
2306       return emit_if( emit, insn );
2307    case TGSI_OPCODE_ELSE:
2308       return emit_else( emit, insn );
2309    case TGSI_OPCODE_ENDIF:
2310       return emit_endif( emit, insn );
2311
2312    case TGSI_OPCODE_BGNLOOP:
2313       return emit_bgnloop2( emit, insn );
2314    case TGSI_OPCODE_ENDLOOP:
2315       return emit_endloop2( emit, insn );
2316    case TGSI_OPCODE_BRK:
2317       return emit_brk( emit, insn );
2318
2319    case TGSI_OPCODE_XPD:
2320       return emit_xpd( emit, insn );
2321
2322    case TGSI_OPCODE_KILP:
2323       return emit_kilp( emit, insn );
2324
2325    case TGSI_OPCODE_DST:
2326       return emit_dst_insn( emit, insn );
2327
2328    case TGSI_OPCODE_LIT:
2329       return emit_lit( emit, insn );
2330
2331    case TGSI_OPCODE_LRP:
2332       return emit_lrp( emit, insn );
2333
2334    default: {
2335       unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2336
2337       if (opcode == SVGA3DOP_LAST_INST)
2338          return FALSE;
2339
2340       if (!emit_simple_instruction( emit, opcode, insn ))
2341          return FALSE;
2342    }
2343    }
2344
2345    return TRUE;
2346 }
2347
2348
2349 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2350                                     struct tgsi_full_immediate *imm)
2351 {
2352    static const float id[4] = {0,0,0,1};
2353    float value[4];
2354    unsigned i;
2355
2356    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2357    for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2358       value[i] = imm->u[i].Float;
2359
2360    for ( ; i < 4; i++ )
2361       value[i] = id[i];
2362
2363    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2364                           emit->imm_start + emit->internal_imm_count++,
2365                           value[0], value[1], value[2], value[3]);
2366 }
2367
2368 static boolean make_immediate( struct svga_shader_emitter *emit,
2369                                float a,
2370                                float b,
2371                                float c,
2372                                float d,
2373                                struct src_register *out )
2374 {
2375    unsigned idx = emit->nr_hw_const++;
2376
2377    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2378                         idx, a, b, c, d ))
2379       return FALSE;
2380
2381    *out = src_register( SVGA3DREG_CONST, idx );
2382
2383    return TRUE;
2384 }
2385
2386 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2387 {
2388    if (!emit->key.vkey.need_prescale) {
2389       if (!make_immediate( emit, 0, 0, .5, .5,
2390                            &emit->imm_0055))
2391          return FALSE;
2392    }
2393
2394    return TRUE;
2395 }
2396
2397 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2398 {
2399    unsigned i;
2400
2401    /* For SM20, need to initialize the temporaries we're using to hold
2402     * color outputs to some value.  Shaders which don't set all of
2403     * these values are likely to be rejected by the DX9 runtime.
2404     */
2405    if (!emit->use_sm30) {
2406       struct src_register zero = get_zero_immediate( emit );
2407       for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2408          if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2409
2410             if (!submit_op1( emit,
2411                              inst_token(SVGA3DOP_MOV),
2412                              emit->temp_col[i],
2413                              zero ))
2414                return FALSE;
2415          }
2416       }
2417    }
2418
2419    return TRUE;
2420 }
2421
2422 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2423 {
2424    unsigned i;
2425
2426    /* PS oDepth is incredibly fragile and it's very hard to catch the
2427     * types of usage that break it during shader emit.  Easier just to
2428     * redirect the main program to a temporary and then only touch
2429     * oDepth with a hand-crafted MOV below.
2430     */
2431    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2432
2433       if (!submit_op1( emit,
2434                        inst_token(SVGA3DOP_MOV),
2435                        emit->true_pos,
2436                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2437          return FALSE;
2438    }
2439
2440    /* Similarly for SM20 color outputs...  Luckily SM30 isn't so
2441     * fragile.
2442     */
2443    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2444       if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2445
2446          /* Potentially override output colors with white for XOR
2447           * logicop workaround.
2448           */
2449          if (emit->unit == PIPE_SHADER_FRAGMENT &&
2450              emit->key.fkey.white_fragments) {
2451
2452             struct src_register one = scalar( get_zero_immediate( emit ),
2453                                               TGSI_SWIZZLE_W );
2454
2455             if (!submit_op1( emit,
2456                              inst_token(SVGA3DOP_MOV),
2457                              emit->true_col[i],
2458                              one ))
2459                return FALSE;
2460          }
2461          else {
2462             if (!submit_op1( emit,
2463                              inst_token(SVGA3DOP_MOV),
2464                              emit->true_col[i],
2465                              src(emit->temp_col[i]) ))
2466                return FALSE;
2467          }
2468       }
2469    }
2470
2471    return TRUE;
2472 }
2473
2474 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2475 {
2476    /* PSIZ output is incredibly fragile and it's very hard to catch
2477     * the types of usage that break it during shader emit.  Easier
2478     * just to redirect the main program to a temporary and then only
2479     * touch PSIZ with a hand-crafted MOV below.
2480     */
2481    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2482
2483       if (!submit_op1( emit,
2484                        inst_token(SVGA3DOP_MOV),
2485                        emit->true_psiz,
2486                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2487          return FALSE;
2488    }
2489
2490    /* Need to perform various manipulations on vertex position to cope
2491     * with the different GL and D3D clip spaces.
2492     */
2493    if (emit->key.vkey.need_prescale) {
2494       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2495       SVGA3dShaderDestToken pos = emit->true_pos;
2496       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2497       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2498                                                          offset + 0 );
2499       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2500                                                          offset + 1 );
2501
2502       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
2503        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2504        *   --> Note that prescale.trans.w == 0
2505        */
2506       if (!submit_op2( emit,
2507                        inst_token(SVGA3DOP_MUL),
2508                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2509                        src(temp_pos),
2510                        prescale_scale ))
2511          return FALSE;
2512
2513       if (!submit_op3( emit,
2514                        inst_token(SVGA3DOP_MAD),
2515                        pos,
2516                        swizzle(src(temp_pos), 3, 3, 3, 3),
2517                        prescale_trans,
2518                        src(temp_pos)))
2519          return FALSE;
2520    }
2521    else {
2522       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2523       SVGA3dShaderDestToken pos = emit->true_pos;
2524       struct src_register imm_0055 = emit->imm_0055;
2525
2526       /* Adjust GL clipping coordinate space to hardware (D3D-style):
2527        *
2528        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2529        * MOV result.position, temp_pos
2530        */
2531       if (!submit_op2( emit,
2532                        inst_token(SVGA3DOP_DP4),
2533                        writemask(temp_pos, TGSI_WRITEMASK_Z),
2534                        imm_0055,
2535                        src(temp_pos) ))
2536          return FALSE;
2537
2538       if (!submit_op1( emit,
2539                        inst_token(SVGA3DOP_MOV),
2540                        pos,
2541                        src(temp_pos) ))
2542          return FALSE;
2543    }
2544
2545    return TRUE;
2546 }
2547
2548 /*
2549   0: IF VFACE :4
2550   1:   COLOR = FrontColor;
2551   2: ELSE
2552   3:   COLOR = BackColor;
2553   4: ENDIF
2554  */
2555 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2556 {
2557    struct src_register vface, zero;
2558    struct src_register front[2];
2559    struct src_register back[2];
2560    SVGA3dShaderDestToken color[2];
2561    int count =  emit->internal_color_count;
2562    int i;
2563    SVGA3dShaderInstToken if_token;
2564
2565    if (count == 0)
2566       return TRUE;
2567
2568    vface = get_vface( emit );
2569    zero = get_zero_immediate( emit );
2570
2571    /* Can't use get_temp() to allocate the color reg as such
2572     * temporaries will be reclaimed after each instruction by the call
2573     * to reset_temp_regs().
2574     */
2575    for (i = 0; i < count; i++) {
2576       color[i] = dst_register( SVGA3DREG_TEMP,
2577                                emit->nr_hw_temp++ );
2578
2579       front[i] = emit->input_map[emit->internal_color_idx[i]];
2580
2581       /* Back is always the next input:
2582        */
2583       back[i] = front[i];
2584       back[i].base.num = front[i].base.num + 1;
2585
2586       /* Reassign the input_map to the actual front-face color:
2587        */
2588       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2589    }
2590
2591    if_token = inst_token( SVGA3DOP_IFC );
2592
2593    if (emit->key.fkey.front_cw)
2594       if_token.control = SVGA3DOPCOMP_GT;
2595    else
2596       if_token.control = SVGA3DOPCOMP_LT;
2597
2598    zero = scalar(zero, TGSI_SWIZZLE_X);
2599
2600    if (!(emit_instruction( emit, if_token ) &&
2601          emit_src( emit, vface ) &&
2602          emit_src( emit, zero ) ))
2603       return FALSE;
2604
2605    for (i = 0; i < count; i++) {
2606       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2607          return FALSE;
2608    }
2609
2610    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2611       return FALSE;
2612
2613    for (i = 0; i < count; i++) {
2614       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2615          return FALSE;
2616    }
2617
2618    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2619       return FALSE;
2620
2621    return TRUE;
2622 }
2623
2624 /*
2625   0: SETP_GT TEMP, VFACE, 0
2626   where TEMP is a fake frontface register
2627  */
2628 static boolean emit_frontface( struct svga_shader_emitter *emit )
2629 {
2630    struct src_register vface, zero;
2631    SVGA3dShaderDestToken temp;
2632    struct src_register pass, fail;
2633
2634    vface = get_vface( emit );
2635    zero = get_zero_immediate( emit );
2636
2637    /* Can't use get_temp() to allocate the fake frontface reg as such
2638     * temporaries will be reclaimed after each instruction by the call
2639     * to reset_temp_regs().
2640     */
2641    temp = dst_register( SVGA3DREG_TEMP,
2642                         emit->nr_hw_temp++ );
2643
2644    if (emit->key.fkey.front_cw) {
2645       pass = scalar( zero, TGSI_SWIZZLE_W );
2646       fail = scalar( zero, TGSI_SWIZZLE_X );
2647    } else {
2648       pass = scalar( zero, TGSI_SWIZZLE_X );
2649       fail = scalar( zero, TGSI_SWIZZLE_W );
2650    }
2651
2652    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2653                          temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2654                          pass, fail))
2655       return FALSE;
2656
2657    /* Reassign the input_map to the actual front-face color:
2658     */
2659    emit->input_map[emit->internal_frontface_idx] = src(temp);
2660
2661    return TRUE;
2662 }
2663
2664 static INLINE boolean
2665 needs_to_create_zero( struct svga_shader_emitter *emit )
2666 {
2667    int i;
2668
2669    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2670       if (!emit->use_sm30)
2671          return TRUE;
2672
2673       if (emit->key.fkey.light_twoside)
2674          return TRUE;
2675
2676       if (emit->key.fkey.white_fragments)
2677          return TRUE;
2678
2679       if (emit->emit_frontface)
2680          return TRUE;
2681
2682       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
2683           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
2684          return TRUE;
2685    }
2686
2687    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
2688        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
2689        emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 ||
2690        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
2691        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
2692        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
2693        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
2694        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
2695        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
2696        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
2697        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
2698        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
2699        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
2700        emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
2701        emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
2702       return TRUE;
2703
2704    for (i = 0; i < emit->key.fkey.num_textures; i++) {
2705       if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2706          return TRUE;
2707    }
2708
2709    return FALSE;
2710 }
2711
2712 static INLINE boolean
2713 needs_to_create_loop_const( struct svga_shader_emitter *emit )
2714 {
2715    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
2716 }
2717
2718 static INLINE boolean
2719 needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
2720 {
2721    return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
2722                               emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
2723                               emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
2724 }
2725
2726 static INLINE boolean
2727 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
2728 {
2729    return (emit->num_arl_consts > 0);
2730 }
2731
2732 static INLINE boolean
2733 pre_parse_add_indirect( struct svga_shader_emitter *emit,
2734                         int num, int current_arl)
2735 {
2736    int i;
2737    assert(num < 0);
2738
2739    for (i = 0; i < emit->num_arl_consts; ++i) {
2740       if (emit->arl_consts[i].arl_num == current_arl)
2741          break;
2742    }
2743    /* new entry */
2744    if (emit->num_arl_consts == i) {
2745       ++emit->num_arl_consts;
2746    }
2747    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
2748                                 num :
2749                                 emit->arl_consts[i].number;
2750    emit->arl_consts[i].arl_num = current_arl;
2751    return TRUE;
2752 }
2753
2754 static boolean
2755 pre_parse_instruction( struct svga_shader_emitter *emit,
2756                        const struct tgsi_full_instruction *insn,
2757                        int current_arl)
2758 {
2759    if (insn->Src[0].Register.Indirect &&
2760        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
2761       const struct tgsi_full_src_register *reg = &insn->Src[0];
2762       if (reg->Register.Index < 0) {
2763          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2764       }
2765    }
2766
2767    if (insn->Src[1].Register.Indirect &&
2768        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
2769       const struct tgsi_full_src_register *reg = &insn->Src[1];
2770       if (reg->Register.Index < 0) {
2771          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2772       }
2773    }
2774
2775    if (insn->Src[2].Register.Indirect &&
2776        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
2777       const struct tgsi_full_src_register *reg = &insn->Src[2];
2778       if (reg->Register.Index < 0) {
2779          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2780       }
2781    }
2782
2783    return TRUE;
2784 }
2785
2786 static boolean
2787 pre_parse_tokens( struct svga_shader_emitter *emit,
2788                   const struct tgsi_token *tokens )
2789 {
2790    struct tgsi_parse_context parse;
2791    int current_arl = 0;
2792
2793    tgsi_parse_init( &parse, tokens );
2794
2795    while (!tgsi_parse_end_of_tokens( &parse )) {
2796       tgsi_parse_token( &parse );
2797       switch (parse.FullToken.Token.Type) {
2798       case TGSI_TOKEN_TYPE_IMMEDIATE:
2799       case TGSI_TOKEN_TYPE_DECLARATION:
2800          break;
2801       case TGSI_TOKEN_TYPE_INSTRUCTION:
2802          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
2803              TGSI_OPCODE_ARL) {
2804             ++current_arl;
2805          }
2806          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
2807                                      current_arl ))
2808             return FALSE;
2809          break;
2810       default:
2811          break;
2812       }
2813
2814    }
2815    return TRUE;
2816 }
2817
2818 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
2819
2820 {
2821    if (needs_to_create_zero( emit )) {
2822       create_zero_immediate( emit );
2823    }
2824    if (needs_to_create_loop_const( emit )) {
2825       create_loop_const( emit );
2826    }
2827    if (needs_to_create_sincos_consts( emit )) {
2828       create_sincos_consts( emit );
2829    }
2830    if (needs_to_create_arl_consts( emit )) {
2831       create_arl_consts( emit );
2832    }
2833
2834    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2835       if (!emit_ps_preamble( emit ))
2836          return FALSE;
2837
2838       if (emit->key.fkey.light_twoside) {
2839          if (!emit_light_twoside( emit ))
2840             return FALSE;
2841       }
2842       if (emit->emit_frontface) {
2843          if (!emit_frontface( emit ))
2844             return FALSE;
2845       }
2846    }
2847
2848    return TRUE;
2849 }
2850
2851 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
2852                                        const struct tgsi_token *tokens )
2853 {
2854    struct tgsi_parse_context parse;
2855    boolean ret = TRUE;
2856    boolean helpers_emitted = FALSE;
2857    unsigned line_nr = 0;
2858
2859    tgsi_parse_init( &parse, tokens );
2860    emit->internal_imm_count = 0;
2861
2862    if (emit->unit == PIPE_SHADER_VERTEX) {
2863       ret = emit_vs_preamble( emit );
2864       if (!ret)
2865          goto done;
2866    }
2867
2868    pre_parse_tokens(emit, tokens);
2869
2870    while (!tgsi_parse_end_of_tokens( &parse )) {
2871       tgsi_parse_token( &parse );
2872
2873       switch (parse.FullToken.Token.Type) {
2874       case TGSI_TOKEN_TYPE_IMMEDIATE:
2875          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
2876          if (!ret)
2877             goto done;
2878          break;
2879
2880       case TGSI_TOKEN_TYPE_DECLARATION:
2881          if (emit->use_sm30)
2882             ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
2883          else
2884             ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
2885          if (!ret)
2886             goto done;
2887          break;
2888
2889       case TGSI_TOKEN_TYPE_INSTRUCTION:
2890          if (!helpers_emitted) {
2891             if (!svga_shader_emit_helpers( emit ))
2892                goto done;
2893             helpers_emitted = TRUE;
2894          }
2895          ret = svga_emit_instruction( emit,
2896                                       line_nr++,
2897                                       &parse.FullToken.FullInstruction );
2898          if (!ret)
2899             goto done;
2900          break;
2901       default:
2902          break;
2903       }
2904
2905       reset_temp_regs( emit );
2906    }
2907
2908    /* Need to terminate the current subroutine.  Note that the
2909     * hardware doesn't tolerate shaders without sub-routines
2910     * terminating with RET+END.
2911     */
2912    if (!emit->in_main_func) {
2913       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
2914       if (!ret)
2915          goto done;
2916    }
2917
2918    assert(emit->dynamic_branching_level == 0);
2919
2920    /* Need to terminate the whole shader:
2921     */
2922    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
2923    if (!ret)
2924       goto done;
2925
2926 done:
2927    assert(ret);
2928    tgsi_parse_free( &parse );
2929    return ret;
2930 }
2931