src/gallium/drivers/svga/svga_tgsi_insn.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_dump.h"
  29 #include "tgsi/tgsi_parse.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_math.h"
  32
  33 #include "svga_tgsi_emit.h"
  34 #include "svga_context.h"
  35
  36
  37 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  38 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  39
  40
  41
  42
  43 static unsigned
  44 translate_opcode(
  45    uint opcode )
  46 {
  47    switch (opcode) {
  48    case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
  49    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
  50    case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
  51    case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
  52    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
  53    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
  54    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
  55    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
  56    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
  57    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
  58    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
  59    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
  60    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
  61    case TGSI_OPCODE_NRM4:       return SVGA3DOP_NRM;
  62    default:
  63       debug_printf("Unkown opcode %u\n", opcode);
  64       assert( 0 );
  65       return SVGA3DOP_LAST_INST;
  66    }
  67 }
  68
  69
  70 static unsigned translate_file( unsigned file )
  71 {
  72    switch (file) {
  73    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
  74    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
  75    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
  76    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
  77    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
  78    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
  79    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
  80    default:
  81       assert( 0 );
  82       return SVGA3DREG_TEMP;
  83    }
  84 }
  85
  86
  87
  88
  89
  90
  91 static SVGA3dShaderDestToken
  92 translate_dst_register( struct svga_shader_emitter *emit,
  93                         const struct tgsi_full_instruction *insn,
  94                         unsigned idx )
  95 {
  96    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
  97    SVGA3dShaderDestToken dest;
  98
  99    switch (reg->Register.File) {
 100    case TGSI_FILE_OUTPUT:
 101       /* Output registers encode semantic information in their name.
 102        * Need to lookup a table built at decl time:
 103        */
 104       dest = emit->output_map[reg->Register.Index];
 105       break;
 106
 107    default:
 108       {
 109          unsigned index = reg->Register.Index;
 110          assert(index < SVGA3D_TEMPREG_MAX);
 111          index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
 112          dest = dst_register(translate_file(reg->Register.File), index);
 113       }
 114       break;
 115    }
 116
 117    dest.mask = reg->Register.WriteMask;
 118    assert(dest.mask);
 119
 120    if (insn->Instruction.Saturate)
 121       dest.dstMod = SVGA3DDSTMOD_SATURATE;
 122
 123    return dest;
 124 }
 125
 126
 127 static struct src_register
 128 swizzle( struct src_register src,
 129          int x,
 130          int y,
 131          int z,
 132          int w )
 133 {
 134    x = (src.base.swizzle >> (x * 2)) & 0x3;
 135    y = (src.base.swizzle >> (y * 2)) & 0x3;
 136    z = (src.base.swizzle >> (z * 2)) & 0x3;
 137    w = (src.base.swizzle >> (w * 2)) & 0x3;
 138
 139    src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
 140
 141    return src;
 142 }
 143
 144 static struct src_register
 145 scalar( struct src_register src,
 146         int comp )
 147 {
 148    return swizzle( src, comp, comp, comp, comp );
 149 }
 150
 151 static INLINE boolean
 152 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 153 {
 154    int i;
 155
 156    for (i = 0; i < emit->num_arl_consts; ++i) {
 157       if (emit->arl_consts[i].arl_num == emit->current_arl)
 158          return TRUE;
 159    }
 160    return FALSE;
 161 }
 162
 163 static INLINE int
 164 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 165 {
 166    int i;
 167
 168    for (i = 0; i < emit->num_arl_consts; ++i) {
 169       if (emit->arl_consts[i].arl_num == emit->current_arl)
 170          return emit->arl_consts[i].number;
 171    }
 172    return 0;
 173 }
 174
 175 static struct src_register
 176 translate_src_register( const struct svga_shader_emitter *emit,
 177                         const struct tgsi_full_src_register *reg )
 178 {
 179    struct src_register src;
 180
 181    switch (reg->Register.File) {
 182    case TGSI_FILE_INPUT:
 183       /* Input registers are referred to by their semantic name rather
 184        * than by index.  Use the mapping build up from the decls:
 185        */
 186       src = emit->input_map[reg->Register.Index];
 187       break;
 188
 189    case TGSI_FILE_IMMEDIATE:
 190       /* Immediates are appended after TGSI constants in the D3D
 191        * constant buffer.
 192        */
 193       src = src_register( translate_file( reg->Register.File ),
 194                           reg->Register.Index +
 195                           emit->imm_start );
 196       break;
 197
 198    default:
 199       src = src_register( translate_file( reg->Register.File ),
 200                           reg->Register.Index );
 201
 202       break;
 203    }
 204
 205    /* Indirect addressing.
 206     */
 207    if (reg->Register.Indirect) {
 208       if (emit->unit == PIPE_SHADER_FRAGMENT) {
 209          /* Pixel shaders have only loop registers for relative
 210           * addressing into inputs. Ignore the redundant address
 211           * register, the contents of aL should be in sync with it.
 212           */
 213          if (reg->Register.File == TGSI_FILE_INPUT) {
 214             src.base.relAddr = 1;
 215             src.indirect = src_token(SVGA3DREG_LOOP, 0);
 216          }
 217       }
 218       else {
 219          /* Constant buffers only.
 220           */
 221          if (reg->Register.File == TGSI_FILE_CONSTANT) {
 222             /* we shift the offset towards the minimum */
 223             if (svga_arl_needs_adjustment( emit )) {
 224                src.base.num -= svga_arl_adjustment( emit );
 225             }
 226             src.base.relAddr = 1;
 227
 228             /* Not really sure what should go in the second token:
 229              */
 230             src.indirect = src_token( SVGA3DREG_ADDR,
 231                                       reg->Indirect.Index );
 232
 233             src.indirect.swizzle = SWIZZLE_XXXX;
 234          }
 235       }
 236    }
 237
 238    src = swizzle( src,
 239                   reg->Register.SwizzleX,
 240                   reg->Register.SwizzleY,
 241                   reg->Register.SwizzleZ,
 242                   reg->Register.SwizzleW );
 243
 244    /* src.mod isn't a bitfield, unfortunately:
 245     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
 246     */
 247    if (reg->Register.Absolute) {
 248       if (reg->Register.Negate)
 249          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 250       else
 251          src.base.srcMod = SVGA3DSRCMOD_ABS;
 252    }
 253    else {
 254       if (reg->Register.Negate)
 255          src.base.srcMod = SVGA3DSRCMOD_NEG;
 256       else
 257          src.base.srcMod = SVGA3DSRCMOD_NONE;
 258    }
 259
 260    return src;
 261 }
 262
 263
 264 /*
 265  * Get a temporary register.
 266  * Note: if we exceed the temporary register limit we just use
 267  * register SVGA3D_TEMPREG_MAX - 1.
 268  */
 269 static INLINE SVGA3dShaderDestToken
 270 get_temp( struct svga_shader_emitter *emit )
 271 {
 272    int i = emit->nr_hw_temp + emit->internal_temp_count++;
 273    assert(i < SVGA3D_TEMPREG_MAX);
 274    i = MIN2(i, SVGA3D_TEMPREG_MAX - 1);
 275    return dst_register( SVGA3DREG_TEMP, i );
 276 }
 277
 278 /* Release a single temp.  Currently only effective if it was the last
 279  * allocated temp, otherwise release will be delayed until the next
 280  * call to reset_temp_regs().
 281  */
 282 static INLINE void
 283 release_temp( struct svga_shader_emitter *emit,
 284               SVGA3dShaderDestToken temp )
 285 {
 286    if (temp.num == emit->internal_temp_count - 1)
 287       emit->internal_temp_count--;
 288 }
 289
 290 static void reset_temp_regs( struct svga_shader_emitter *emit )
 291 {
 292    emit->internal_temp_count = 0;
 293 }
 294
 295
 296 /* Replace the src with the temporary specified in the dst, but copying
 297  * only the necessary channels, and preserving the original swizzle (which is
 298  * important given that several opcodes have constraints in the allowed
 299  * swizzles).
 300  */
 301 static boolean emit_repl( struct svga_shader_emitter *emit,
 302                           SVGA3dShaderDestToken dst,
 303                           struct src_register *src0)
 304 {
 305    unsigned src0_swizzle;
 306    unsigned chan;
 307
 308    assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
 309
 310    src0_swizzle = src0->base.swizzle;
 311
 312    dst.mask = 0;
 313    for (chan = 0; chan < 4; ++chan) {
 314       unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
 315       dst.mask |= 1 << swizzle;
 316    }
 317    assert(dst.mask);
 318
 319    src0->base.swizzle = SVGA3DSWIZZLE_NONE;
 320
 321    if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
 322       return FALSE;
 323
 324    *src0 = src( dst );
 325    src0->base.swizzle = src0_swizzle;
 326
 327    return TRUE;
 328 }
 329
 330
 331 static boolean submit_op0( struct svga_shader_emitter *emit,
 332                            SVGA3dShaderInstToken inst,
 333                            SVGA3dShaderDestToken dest )
 334 {
 335    return (emit_instruction( emit, inst ) &&
 336            emit_dst( emit, dest ));
 337 }
 338
 339 static boolean submit_op1( struct svga_shader_emitter *emit,
 340                            SVGA3dShaderInstToken inst,
 341                            SVGA3dShaderDestToken dest,
 342                            struct src_register src0 )
 343 {
 344    return emit_op1( emit, inst, dest, src0 );
 345 }
 346
 347
 348 /* SVGA shaders may not refer to >1 constant register in a single
 349  * instruction.  This function checks for that usage and inserts a
 350  * move to temporary if detected.
 351  *
 352  * The same applies to input registers -- at most a single input
 353  * register may be read by any instruction.
 354  */
 355 static boolean submit_op2( struct svga_shader_emitter *emit,
 356                            SVGA3dShaderInstToken inst,
 357                            SVGA3dShaderDestToken dest,
 358                            struct src_register src0,
 359                            struct src_register src1 )
 360 {
 361    SVGA3dShaderDestToken temp;
 362    SVGA3dShaderRegType type0, type1;
 363    boolean need_temp = FALSE;
 364
 365    temp.value = 0;
 366    type0 = SVGA3dShaderGetRegType( src0.base.value );
 367    type1 = SVGA3dShaderGetRegType( src1.base.value );
 368
 369    if (type0 == SVGA3DREG_CONST &&
 370        type1 == SVGA3DREG_CONST &&
 371        src0.base.num != src1.base.num)
 372       need_temp = TRUE;
 373
 374    if (type0 == SVGA3DREG_INPUT &&
 375        type1 == SVGA3DREG_INPUT &&
 376        src0.base.num != src1.base.num)
 377       need_temp = TRUE;
 378
 379    if (need_temp) {
 380       temp = get_temp( emit );
 381
 382       if (!emit_repl( emit, temp, &src0 ))
 383          return FALSE;
 384    }
 385
 386    if (!emit_op2( emit, inst, dest, src0, src1 ))
 387       return FALSE;
 388
 389    if (need_temp)
 390       release_temp( emit, temp );
 391
 392    return TRUE;
 393 }
 394
 395
 396 /* SVGA shaders may not refer to >1 constant register in a single
 397  * instruction.  This function checks for that usage and inserts a
 398  * move to temporary if detected.
 399  */
 400 static boolean submit_op3( struct svga_shader_emitter *emit,
 401                            SVGA3dShaderInstToken inst,
 402                            SVGA3dShaderDestToken dest,
 403                            struct src_register src0,
 404                            struct src_register src1,
 405                            struct src_register src2 )
 406 {
 407    SVGA3dShaderDestToken temp0;
 408    SVGA3dShaderDestToken temp1;
 409    boolean need_temp0 = FALSE;
 410    boolean need_temp1 = FALSE;
 411    SVGA3dShaderRegType type0, type1, type2;
 412
 413    temp0.value = 0;
 414    temp1.value = 0;
 415    type0 = SVGA3dShaderGetRegType( src0.base.value );
 416    type1 = SVGA3dShaderGetRegType( src1.base.value );
 417    type2 = SVGA3dShaderGetRegType( src2.base.value );
 418
 419    if (inst.op != SVGA3DOP_SINCOS) {
 420       if (type0 == SVGA3DREG_CONST &&
 421           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
 422            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 423          need_temp0 = TRUE;
 424
 425       if (type1 == SVGA3DREG_CONST &&
 426           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
 427          need_temp1 = TRUE;
 428    }
 429
 430    if (type0 == SVGA3DREG_INPUT &&
 431        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
 432         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 433       need_temp0 = TRUE;
 434
 435    if (type1 == SVGA3DREG_INPUT &&
 436        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
 437       need_temp1 = TRUE;
 438
 439    if (need_temp0) {
 440       temp0 = get_temp( emit );
 441
 442       if (!emit_repl( emit, temp0, &src0 ))
 443          return FALSE;
 444    }
 445
 446    if (need_temp1) {
 447       temp1 = get_temp( emit );
 448
 449       if (!emit_repl( emit, temp1, &src1 ))
 450          return FALSE;
 451    }
 452
 453    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
 454       return FALSE;
 455
 456    if (need_temp1)
 457       release_temp( emit, temp1 );
 458    if (need_temp0)
 459       release_temp( emit, temp0 );
 460    return TRUE;
 461 }
 462
 463
 464
 465
 466 /* SVGA shaders may not refer to >1 constant register in a single
 467  * instruction.  This function checks for that usage and inserts a
 468  * move to temporary if detected.
 469  */
 470 static boolean submit_op4( struct svga_shader_emitter *emit,
 471                            SVGA3dShaderInstToken inst,
 472                            SVGA3dShaderDestToken dest,
 473                            struct src_register src0,
 474                            struct src_register src1,
 475                            struct src_register src2,
 476                            struct src_register src3)
 477 {
 478    SVGA3dShaderDestToken temp0;
 479    SVGA3dShaderDestToken temp3;
 480    boolean need_temp0 = FALSE;
 481    boolean need_temp3 = FALSE;
 482    SVGA3dShaderRegType type0, type1, type2, type3;
 483
 484    temp0.value = 0;
 485    temp3.value = 0;
 486    type0 = SVGA3dShaderGetRegType( src0.base.value );
 487    type1 = SVGA3dShaderGetRegType( src1.base.value );
 488    type2 = SVGA3dShaderGetRegType( src2.base.value );
 489    type3 = SVGA3dShaderGetRegType( src2.base.value );
 490
 491    /* Make life a little easier - this is only used by the TXD
 492     * instruction which is guaranteed not to have a constant/input reg
 493     * in one slot at least:
 494     */
 495    assert(type1 == SVGA3DREG_SAMPLER);
 496
 497    if (type0 == SVGA3DREG_CONST &&
 498        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
 499         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 500       need_temp0 = TRUE;
 501
 502    if (type3 == SVGA3DREG_CONST &&
 503        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
 504       need_temp3 = TRUE;
 505
 506    if (type0 == SVGA3DREG_INPUT &&
 507        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
 508         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 509       need_temp0 = TRUE;
 510
 511    if (type3 == SVGA3DREG_INPUT &&
 512        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
 513       need_temp3 = TRUE;
 514
 515    if (need_temp0) {
 516       temp0 = get_temp( emit );
 517
 518       if (!emit_repl( emit, temp0, &src0 ))
 519          return FALSE;
 520    }
 521
 522    if (need_temp3) {
 523       temp3 = get_temp( emit );
 524
 525       if (!emit_repl( emit, temp3, &src3 ))
 526          return FALSE;
 527    }
 528
 529    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
 530       return FALSE;
 531
 532    if (need_temp3)
 533       release_temp( emit, temp3 );
 534    if (need_temp0)
 535       release_temp( emit, temp0 );
 536    return TRUE;
 537 }
 538
 539
 540 static boolean alias_src_dst( struct src_register src,
 541                               SVGA3dShaderDestToken dst )
 542 {
 543    if (src.base.num != dst.num)
 544       return FALSE;
 545
 546    if (SVGA3dShaderGetRegType(dst.value) !=
 547        SVGA3dShaderGetRegType(src.base.value))
 548       return FALSE;
 549
 550    return TRUE;
 551 }
 552
 553
 554 static boolean submit_lrp(struct svga_shader_emitter *emit,
 555                           SVGA3dShaderDestToken dst,
 556                           struct src_register src0,
 557                           struct src_register src1,
 558                           struct src_register src2)
 559 {
 560    SVGA3dShaderDestToken tmp;
 561    boolean need_dst_tmp = FALSE;
 562
 563    /* The dst reg must be a temporary, and not be the same as src0 or src2 */
 564    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
 565        alias_src_dst(src0, dst) ||
 566        alias_src_dst(src2, dst))
 567       need_dst_tmp = TRUE;
 568
 569    if (need_dst_tmp) {
 570       tmp = get_temp( emit );
 571       tmp.mask = dst.mask;
 572    }
 573    else {
 574       tmp = dst;
 575    }
 576
 577    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
 578       return FALSE;
 579
 580    if (need_dst_tmp) {
 581       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
 582          return FALSE;
 583    }
 584
 585    return TRUE;
 586 }
 587
 588
 589 static boolean emit_def_const( struct svga_shader_emitter *emit,
 590                                SVGA3dShaderConstType type,
 591                                unsigned idx,
 592                                float a,
 593                                float b,
 594                                float c,
 595                                float d )
 596 {
 597    SVGA3DOpDefArgs def;
 598    SVGA3dShaderInstToken opcode;
 599
 600    switch (type) {
 601    case SVGA3D_CONST_TYPE_FLOAT:
 602       opcode = inst_token( SVGA3DOP_DEF );
 603       def.dst = dst_register( SVGA3DREG_CONST, idx );
 604       def.constValues[0] = a;
 605       def.constValues[1] = b;
 606       def.constValues[2] = c;
 607       def.constValues[3] = d;
 608       break;
 609    case SVGA3D_CONST_TYPE_INT:
 610       opcode = inst_token( SVGA3DOP_DEFI );
 611       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
 612       def.constIValues[0] = (int)a;
 613       def.constIValues[1] = (int)b;
 614       def.constIValues[2] = (int)c;
 615       def.constIValues[3] = (int)d;
 616       break;
 617    default:
 618       assert(0);
 619       opcode = inst_token( SVGA3DOP_NOP );
 620       break;
 621    }
 622
 623    if (!emit_instruction(emit, opcode) ||
 624        !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
 625       return FALSE;
 626
 627    return TRUE;
 628 }
 629
 630 static INLINE boolean
 631 create_zero_immediate( struct svga_shader_emitter *emit )
 632 {
 633    unsigned idx = emit->nr_hw_float_const++;
 634
 635    /* Emit the constant (0, 0, -1, 1) and use swizzling to generate
 636     * other useful vectors.
 637     */
 638    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 639                         idx, 0, 0, -1, 1 ))
 640       return FALSE;
 641
 642    emit->zero_immediate_idx = idx;
 643    emit->created_zero_immediate = TRUE;
 644
 645    return TRUE;
 646 }
 647
 648 static INLINE boolean
 649 create_loop_const( struct svga_shader_emitter *emit )
 650 {
 651    unsigned idx = emit->nr_hw_int_const++;
 652
 653    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
 654                         255, /* iteration count */
 655                         0, /* initial value */
 656                         1, /* step size */
 657                         0 /* not used, must be 0 */))
 658       return FALSE;
 659
 660    emit->loop_const_idx = idx;
 661    emit->created_loop_const = TRUE;
 662
 663    return TRUE;
 664 }
 665
 666 static INLINE boolean
 667 create_arl_consts( struct svga_shader_emitter *emit )
 668 {
 669    int i;
 670
 671    for (i = 0; i < emit->num_arl_consts; i += 4) {
 672       int j;
 673       unsigned idx = emit->nr_hw_float_const++;
 674       float vals[4];
 675       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
 676          vals[j] = emit->arl_consts[i + j].number;
 677          emit->arl_consts[i + j].idx = idx;
 678          switch (j) {
 679          case 0:
 680             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
 681             break;
 682          case 1:
 683             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
 684             break;
 685          case 2:
 686             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
 687             break;
 688          case 3:
 689             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
 690             break;
 691          }
 692       }
 693       while (j < 4)
 694          vals[j++] = 0;
 695
 696       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 697                            vals[0], vals[1],
 698                            vals[2], vals[3]))
 699          return FALSE;
 700    }
 701
 702    return TRUE;
 703 }
 704
 705 static INLINE struct src_register
 706 get_vface( struct svga_shader_emitter *emit )
 707 {
 708    assert(emit->emitted_vface);
 709    return src_register(SVGA3DREG_MISCTYPE,
 710                        SVGA3DMISCREG_FACE);
 711 }
 712
 713 /* returns {0, 0, 0, 1} immediate */
 714 static INLINE struct src_register
 715 get_zero_immediate( struct svga_shader_emitter *emit )
 716 {
 717    assert(emit->created_zero_immediate);
 718    assert(emit->zero_immediate_idx >= 0);
 719    return swizzle(src_register( SVGA3DREG_CONST,
 720                                 emit->zero_immediate_idx),
 721                   0, 0, 0, 3);
 722 }
 723
 724 /* returns {1, 1, 1, -1} immediate */
 725 static INLINE struct src_register
 726 get_pos_neg_one_immediate( struct svga_shader_emitter *emit )
 727 {
 728    assert(emit->created_zero_immediate);
 729    assert(emit->zero_immediate_idx >= 0);
 730    return swizzle(src_register( SVGA3DREG_CONST,
 731                                 emit->zero_immediate_idx),
 732                   3, 3, 3, 2);
 733 }
 734
 735 /* returns the loop const */
 736 static INLINE struct src_register
 737 get_loop_const( struct svga_shader_emitter *emit )
 738 {
 739    assert(emit->created_loop_const);
 740    assert(emit->loop_const_idx >= 0);
 741    return src_register( SVGA3DREG_CONSTINT,
 742                         emit->loop_const_idx );
 743 }
 744
 745 static INLINE struct src_register
 746 get_fake_arl_const( struct svga_shader_emitter *emit )
 747 {
 748    struct src_register reg;
 749    int idx = 0, swizzle = 0, i;
 750
 751    for (i = 0; i < emit->num_arl_consts; ++ i) {
 752       if (emit->arl_consts[i].arl_num == emit->current_arl) {
 753          idx = emit->arl_consts[i].idx;
 754          swizzle = emit->arl_consts[i].swizzle;
 755       }
 756    }
 757
 758    reg = src_register( SVGA3DREG_CONST, idx );
 759    return scalar(reg, swizzle);
 760 }
 761
 762 static INLINE struct src_register
 763 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
 764 {
 765    int idx;
 766    struct src_register reg;
 767
 768    /* the width/height indexes start right after constants */
 769    idx = emit->key.fkey.tex[sampler_num].width_height_idx +
 770          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
 771
 772    reg = src_register( SVGA3DREG_CONST, idx );
 773    return reg;
 774 }
 775
 776 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
 777                              const struct tgsi_full_instruction *insn)
 778 {
 779    const struct src_register src0 = translate_src_register(
 780       emit, &insn->Src[0] );
 781    struct src_register src1 = get_fake_arl_const( emit );
 782    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 783    SVGA3dShaderDestToken tmp = get_temp( emit );
 784
 785    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
 786       return FALSE;
 787
 788    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
 789                     src1))
 790       return FALSE;
 791
 792    /* replicate the original swizzle */
 793    src1 = src(tmp);
 794    src1.base.swizzle = src0.base.swizzle;
 795
 796    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
 797                       dst, src1 );
 798 }
 799
 800 static boolean emit_if(struct svga_shader_emitter *emit,
 801                        const struct tgsi_full_instruction *insn)
 802 {
 803    struct src_register src0 = translate_src_register(
 804       emit, &insn->Src[0] );
 805    struct src_register zero = get_zero_immediate( emit );
 806    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
 807
 808    if_token.control = SVGA3DOPCOMPC_NE;
 809    zero = scalar(zero, TGSI_SWIZZLE_X);
 810
 811    if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
 812       /*
 813        * Max different constant registers readable per IFC instruction is 1.
 814        */
 815
 816       SVGA3dShaderDestToken tmp = get_temp( emit );
 817
 818       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
 819          return FALSE;
 820
 821       src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
 822    }
 823
 824    emit->dynamic_branching_level++;
 825
 826    return (emit_instruction( emit, if_token ) &&
 827            emit_src( emit, src0 ) &&
 828            emit_src( emit, zero ) );
 829 }
 830
 831 static boolean emit_endif(struct svga_shader_emitter *emit,
 832                        const struct tgsi_full_instruction *insn)
 833 {
 834    emit->dynamic_branching_level--;
 835
 836    return (emit_instruction( emit,
 837                              inst_token( SVGA3DOP_ENDIF )));
 838 }
 839
 840 static boolean emit_else(struct svga_shader_emitter *emit,
 841                          const struct tgsi_full_instruction *insn)
 842 {
 843    return (emit_instruction( emit,
 844                              inst_token( SVGA3DOP_ELSE )));
 845 }
 846
 847 /* Translate the following TGSI FLR instruction.
 848  *    FLR  DST, SRC
 849  * To the following SVGA3D instruction sequence.
 850  *    FRC  TMP, SRC
 851  *    SUB  DST, SRC, TMP
 852  */
 853 static boolean emit_floor(struct svga_shader_emitter *emit,
 854                           const struct tgsi_full_instruction *insn )
 855 {
 856    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 857    const struct src_register src0 = translate_src_register(
 858       emit, &insn->Src[0] );
 859    SVGA3dShaderDestToken temp = get_temp( emit );
 860
 861    /* FRC  TMP, SRC */
 862    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
 863       return FALSE;
 864
 865    /* SUB  DST, SRC, TMP */
 866    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
 867                     negate( src( temp ) ) ))
 868       return FALSE;
 869
 870    return TRUE;
 871 }
 872
 873
 874 /* Translate the following TGSI CEIL instruction.
 875  *    CEIL  DST, SRC
 876  * To the following SVGA3D instruction sequence.
 877  *    FRC  TMP, -SRC
 878  *    ADD  DST, SRC, TMP
 879  */
 880 static boolean emit_ceil(struct svga_shader_emitter *emit,
 881                          const struct tgsi_full_instruction *insn)
 882 {
 883    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
 884    const struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
 885    SVGA3dShaderDestToken temp = get_temp(emit);
 886
 887    /* FRC  TMP, -SRC */
 888    if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
 889       return FALSE;
 890
 891    /* ADD DST, SRC, TMP */
 892    if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
 893       return FALSE;
 894
 895    return TRUE;
 896 }
 897
 898
 899 /* Translate the following TGSI CMP instruction.
 900  *    CMP  DST, SRC0, SRC1, SRC2
 901  * To the following SVGA3D instruction sequence.
 902  *    CMP  DST, SRC0, SRC2, SRC1
 903  */
 904 static boolean emit_cmp(struct svga_shader_emitter *emit,
 905                           const struct tgsi_full_instruction *insn )
 906 {
 907    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 908    const struct src_register src0 = translate_src_register(
 909       emit, &insn->Src[0] );
 910    const struct src_register src1 = translate_src_register(
 911       emit, &insn->Src[1] );
 912    const struct src_register src2 = translate_src_register(
 913       emit, &insn->Src[2] );
 914
 915    if (emit->unit == PIPE_SHADER_VERTEX) {
 916       SVGA3dShaderDestToken temp = get_temp(emit);
 917       struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
 918
 919       /* Since vertex shaders don't support the CMP instruction,
 920        * simulate it with SLT and LRP instructions.
 921        *    SLT  TMP, SRC0, 0.0
 922        *    LRP  DST, TMP, SRC1, SRC2
 923        */
 924       if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
 925          return FALSE;
 926       return submit_lrp(emit, dst, src(temp), src1, src2);
 927    }
 928
 929    /* CMP  DST, SRC0, SRC2, SRC1 */
 930    return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
 931 }
 932
 933
 934
 935 /* Translate the following TGSI DIV instruction.
 936  *    DIV  DST.xy, SRC0, SRC1
 937  * To the following SVGA3D instruction sequence.
 938  *    RCP  TMP.x, SRC1.xxxx
 939  *    RCP  TMP.y, SRC1.yyyy
 940  *    MUL  DST.xy, SRC0, TMP
 941  */
 942 static boolean emit_div(struct svga_shader_emitter *emit,
 943                         const struct tgsi_full_instruction *insn )
 944 {
 945    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 946    const struct src_register src0 = translate_src_register(
 947       emit, &insn->Src[0] );
 948    const struct src_register src1 = translate_src_register(
 949       emit, &insn->Src[1] );
 950    SVGA3dShaderDestToken temp = get_temp( emit );
 951    int i;
 952
 953    /* For each enabled element, perform a RCP instruction.  Note that
 954     * RCP is scalar in SVGA3D:
 955     */
 956    for (i = 0; i < 4; i++) {
 957       unsigned channel = 1 << i;
 958       if (dst.mask & channel) {
 959          /* RCP  TMP.?, SRC1.???? */
 960          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
 961                           writemask(temp, channel),
 962                           scalar(src1, i) ))
 963             return FALSE;
 964       }
 965    }
 966
 967    /* Then multiply them out with a single mul:
 968     *
 969     * MUL  DST, SRC0, TMP
 970     */
 971    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
 972                     src( temp ) ))
 973       return FALSE;
 974
 975    return TRUE;
 976 }
 977
 978 /* Translate the following TGSI DP2 instruction.
 979  *    DP2  DST, SRC1, SRC2
 980  * To the following SVGA3D instruction sequence.
 981  *    MUL  TMP, SRC1, SRC2
 982  *    ADD  DST, TMP.xxxx, TMP.yyyy
 983  */
 984 static boolean emit_dp2(struct svga_shader_emitter *emit,
 985                         const struct tgsi_full_instruction *insn )
 986 {
 987    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 988    const struct src_register src0 = translate_src_register(
 989       emit, &insn->Src[0] );
 990    const struct src_register src1 = translate_src_register(
 991       emit, &insn->Src[1] );
 992    SVGA3dShaderDestToken temp = get_temp( emit );
 993    struct src_register temp_src0, temp_src1;
 994
 995    /* MUL  TMP, SRC1, SRC2 */
 996    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
 997       return FALSE;
 998
 999    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1000    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1001
1002    /* ADD  DST, TMP.xxxx, TMP.yyyy */
1003    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1004                     temp_src0, temp_src1 ))
1005       return FALSE;
1006
1007    return TRUE;
1008 }
1009
1010
1011 /* Translate the following TGSI DPH instruction.
1012  *    DPH  DST, SRC1, SRC2
1013  * To the following SVGA3D instruction sequence.
1014  *    DP3  TMP, SRC1, SRC2
1015  *    ADD  DST, TMP, SRC2.wwww
1016  */
1017 static boolean emit_dph(struct svga_shader_emitter *emit,
1018                         const struct tgsi_full_instruction *insn )
1019 {
1020    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1021    const struct src_register src0 = translate_src_register(
1022       emit, &insn->Src[0] );
1023    struct src_register src1 = translate_src_register(
1024       emit, &insn->Src[1] );
1025    SVGA3dShaderDestToken temp = get_temp( emit );
1026
1027    /* DP3  TMP, SRC1, SRC2 */
1028    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1029       return FALSE;
1030
1031    src1 = scalar(src1, TGSI_SWIZZLE_W);
1032
1033    /* ADD  DST, TMP, SRC2.wwww */
1034    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1035                     src( temp ), src1 ))
1036       return FALSE;
1037
1038    return TRUE;
1039 }
1040
1041 /* Translate the following TGSI DST instruction.
1042  *    NRM  DST, SRC
1043  * To the following SVGA3D instruction sequence.
1044  *    DP3  TMP, SRC, SRC
1045  *    RSQ  TMP, TMP
1046  *    MUL  DST, SRC, TMP
1047  */
1048 static boolean emit_nrm(struct svga_shader_emitter *emit,
1049                         const struct tgsi_full_instruction *insn )
1050 {
1051    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1052    const struct src_register src0 = translate_src_register(
1053       emit, &insn->Src[0] );
1054    SVGA3dShaderDestToken temp = get_temp( emit );
1055
1056    /* DP3  TMP, SRC, SRC */
1057    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
1058       return FALSE;
1059
1060    /* RSQ  TMP, TMP */
1061    if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
1062       return FALSE;
1063
1064    /* MUL  DST, SRC, TMP */
1065    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
1066                     src0, src( temp )))
1067       return FALSE;
1068
1069    return TRUE;
1070
1071 }
1072
1073 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
1074                               SVGA3dShaderDestToken dst,
1075                               struct src_register src0)
1076 {
1077    src0 = scalar(src0, TGSI_SWIZZLE_X);
1078
1079    return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
1080                       dst, src0 );
1081 }
1082
1083 static boolean emit_sincos(struct svga_shader_emitter *emit,
1084                            const struct tgsi_full_instruction *insn)
1085 {
1086    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1087    struct src_register src0 = translate_src_register(
1088       emit, &insn->Src[0] );
1089    SVGA3dShaderDestToken temp = get_temp( emit );
1090
1091    /* SCS TMP SRC */
1092    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1093       return FALSE;
1094
1095    /* MOV DST TMP */
1096    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1097       return FALSE;
1098
1099    return TRUE;
1100 }
1101
1102 /*
1103  * SCS TMP SRC
1104  * MOV DST TMP.yyyy
1105  */
1106 static boolean emit_sin(struct svga_shader_emitter *emit,
1107                         const struct tgsi_full_instruction *insn )
1108 {
1109    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1110    struct src_register src0 = translate_src_register(
1111       emit, &insn->Src[0] );
1112    SVGA3dShaderDestToken temp = get_temp( emit );
1113
1114    /* SCS TMP SRC */
1115    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1116       return FALSE;
1117
1118    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1119
1120    /* MOV DST TMP.yyyy */
1121    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1122       return FALSE;
1123
1124    return TRUE;
1125 }
1126
1127 /*
1128  * SCS TMP SRC
1129  * MOV DST TMP.xxxx
1130  */
1131 static boolean emit_cos(struct svga_shader_emitter *emit,
1132                         const struct tgsi_full_instruction *insn )
1133 {
1134    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1135    struct src_register src0 = translate_src_register(
1136       emit, &insn->Src[0] );
1137    SVGA3dShaderDestToken temp = get_temp( emit );
1138
1139    /* SCS TMP SRC */
1140    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1141       return FALSE;
1142
1143    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1144
1145    /* MOV DST TMP.xxxx */
1146    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1147       return FALSE;
1148
1149    return TRUE;
1150 }
1151
1152 static boolean emit_ssg(struct svga_shader_emitter *emit,
1153                         const struct tgsi_full_instruction *insn )
1154 {
1155    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1156    struct src_register src0 = translate_src_register(
1157       emit, &insn->Src[0] );
1158    SVGA3dShaderDestToken temp0 = get_temp( emit );
1159    SVGA3dShaderDestToken temp1 = get_temp( emit );
1160    struct src_register zero, one;
1161
1162    if (emit->unit == PIPE_SHADER_VERTEX) {
1163       /* SGN  DST, SRC0, TMP0, TMP1 */
1164       return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1165                          src( temp0 ), src( temp1 ) );
1166    }
1167
1168    zero = get_zero_immediate( emit );
1169    one = scalar( zero, TGSI_SWIZZLE_W );
1170    zero = scalar( zero, TGSI_SWIZZLE_X );
1171
1172    /* CMP  TMP0, SRC0, one, zero */
1173    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1174                     writemask( temp0, dst.mask ), src0, one, zero ))
1175       return FALSE;
1176
1177    /* CMP  TMP1, negate(SRC0), negate(one), zero */
1178    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1179                     writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1180                     zero ))
1181       return FALSE;
1182
1183    /* ADD  DST, TMP0, TMP1 */
1184    return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1185                       src( temp1 ) );
1186 }
1187
1188 /*
1189  * ADD DST SRC0, negate(SRC0)
1190  */
1191 static boolean emit_sub(struct svga_shader_emitter *emit,
1192                         const struct tgsi_full_instruction *insn)
1193 {
1194    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1195    struct src_register src0 = translate_src_register(
1196       emit, &insn->Src[0] );
1197    struct src_register src1 = translate_src_register(
1198       emit, &insn->Src[1] );
1199
1200    src1 = negate(src1);
1201
1202    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1203                     src0, src1 ))
1204       return FALSE;
1205
1206    return TRUE;
1207 }
1208
1209
1210 static boolean emit_kil(struct svga_shader_emitter *emit,
1211                         const struct tgsi_full_instruction *insn )
1212 {
1213    const struct tgsi_full_src_register *reg = &insn->Src[0];
1214    struct src_register src0, srcIn;
1215    /* is the W component tested in another position? */
1216    const boolean w_tested = (reg->Register.SwizzleW == reg->Register.SwizzleX ||
1217                              reg->Register.SwizzleW == reg->Register.SwizzleY ||
1218                              reg->Register.SwizzleW == reg->Register.SwizzleZ);
1219    const boolean special = (reg->Register.Absolute ||
1220                             reg->Register.Negate ||
1221                             reg->Register.Indirect ||
1222                             reg->Register.SwizzleX != 0 ||
1223                             reg->Register.SwizzleY != 1 ||
1224                             reg->Register.SwizzleZ != 2 ||
1225                             reg->Register.File != TGSI_FILE_TEMPORARY);
1226    SVGA3dShaderDestToken temp;
1227
1228    src0 = srcIn = translate_src_register( emit, reg );
1229
1230    if (special || !w_tested) {
1231       /* need a temp reg */
1232       temp = get_temp( emit );
1233    }
1234
1235    if (special) {
1236       /* move the source into a temp register */
1237       submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1238                   writemask( temp, TGSI_WRITEMASK_XYZ ),
1239                   src0 );
1240
1241       src0 = src( temp );
1242    }
1243
1244    /* do the texkill (on the xyz components) */
1245    if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1246       return FALSE;
1247
1248    if (!w_tested) {
1249       /* need to emit a second texkill to test the W component */
1250       /* put src.wwww into temp register */
1251       if (!submit_op1(emit,
1252                       inst_token( SVGA3DOP_MOV ),
1253                       writemask( temp, TGSI_WRITEMASK_XYZ ),
1254                       scalar(srcIn, TGSI_SWIZZLE_W)))
1255          return FALSE;
1256
1257       /* second texkill */
1258       if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), temp ))
1259          return FALSE;
1260    }
1261
1262    return TRUE;
1263 }
1264
1265
1266 /* mesa state tracker always emits kilp as an unconditional
1267  * kil */
1268 static boolean emit_kilp(struct svga_shader_emitter *emit,
1269                         const struct tgsi_full_instruction *insn )
1270 {
1271    SVGA3dShaderInstToken inst;
1272    SVGA3dShaderDestToken temp;
1273    struct src_register one = scalar( get_zero_immediate( emit ),
1274                                      TGSI_SWIZZLE_W );
1275
1276    inst = inst_token( SVGA3DOP_TEXKILL );
1277
1278    /* texkill doesn't allow negation on the operand so lets move
1279     * negation of {1} to a temp register */
1280    temp = get_temp( emit );
1281    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1282                     negate( one ) ))
1283       return FALSE;
1284
1285    return submit_op0( emit, inst, temp );
1286 }
1287
1288 /* Implement conditionals by initializing destination reg to 'fail',
1289  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1290  * based on predicate reg.
1291  *
1292  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1293  * MOV dst, fail
1294  * MOV dst, pass, p0
1295  */
1296 static boolean
1297 emit_conditional(struct svga_shader_emitter *emit,
1298                  unsigned compare_func,
1299                  SVGA3dShaderDestToken dst,
1300                  struct src_register src0,
1301                  struct src_register src1,
1302                  struct src_register pass,
1303                  struct src_register fail)
1304 {
1305    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1306    SVGA3dShaderInstToken setp_token, mov_token;
1307    setp_token = inst_token( SVGA3DOP_SETP );
1308
1309    switch (compare_func) {
1310    case PIPE_FUNC_NEVER:
1311       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1312                          dst, fail );
1313       break;
1314    case PIPE_FUNC_LESS:
1315       setp_token.control = SVGA3DOPCOMP_LT;
1316       break;
1317    case PIPE_FUNC_EQUAL:
1318       setp_token.control = SVGA3DOPCOMP_EQ;
1319       break;
1320    case PIPE_FUNC_LEQUAL:
1321       setp_token.control = SVGA3DOPCOMP_LE;
1322       break;
1323    case PIPE_FUNC_GREATER:
1324       setp_token.control = SVGA3DOPCOMP_GT;
1325       break;
1326    case PIPE_FUNC_NOTEQUAL:
1327       setp_token.control = SVGA3DOPCOMPC_NE;
1328       break;
1329    case PIPE_FUNC_GEQUAL:
1330       setp_token.control = SVGA3DOPCOMP_GE;
1331       break;
1332    case PIPE_FUNC_ALWAYS:
1333       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1334                          dst, pass );
1335       break;
1336    }
1337
1338    /* SETP src0, COMPOP, src1 */
1339    if (!submit_op2( emit, setp_token, pred_reg,
1340                     src0, src1 ))
1341       return FALSE;
1342
1343    mov_token = inst_token( SVGA3DOP_MOV );
1344
1345    /* MOV dst, fail */
1346    if (!submit_op1( emit, mov_token, dst,
1347                     fail ))
1348       return FALSE;
1349
1350    /* MOV dst, pass (predicated)
1351     *
1352     * Note that the predicate reg (and possible modifiers) is passed
1353     * as the first source argument.
1354     */
1355    mov_token.predicated = 1;
1356    if (!submit_op2( emit, mov_token, dst,
1357                     src( pred_reg ), pass ))
1358       return FALSE;
1359
1360    return TRUE;
1361 }
1362
1363
1364 static boolean
1365 emit_select(struct svga_shader_emitter *emit,
1366             unsigned compare_func,
1367             SVGA3dShaderDestToken dst,
1368             struct src_register src0,
1369             struct src_register src1 )
1370 {
1371    /* There are some SVGA instructions which implement some selects
1372     * directly, but they are only available in the vertex shader.
1373     */
1374    if (emit->unit == PIPE_SHADER_VERTEX) {
1375       switch (compare_func) {
1376       case PIPE_FUNC_GEQUAL:
1377          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1378       case PIPE_FUNC_LEQUAL:
1379          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1380       case PIPE_FUNC_GREATER:
1381          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1382       case PIPE_FUNC_LESS:
1383          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1384       default:
1385          break;
1386       }
1387    }
1388
1389
1390    /* Otherwise, need to use the setp approach:
1391     */
1392    {
1393       struct src_register one, zero;
1394       /* zero immediate is 0,0,0,1 */
1395       zero = get_zero_immediate( emit );
1396       one  = scalar( zero, TGSI_SWIZZLE_W );
1397       zero = scalar( zero, TGSI_SWIZZLE_X );
1398
1399       return emit_conditional(
1400          emit,
1401          compare_func,
1402          dst,
1403          src0,
1404          src1,
1405          one, zero);
1406    }
1407 }
1408
1409
1410 static boolean emit_select_op(struct svga_shader_emitter *emit,
1411                               unsigned compare,
1412                               const struct tgsi_full_instruction *insn)
1413 {
1414    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1415    struct src_register src0 = translate_src_register(
1416       emit, &insn->Src[0] );
1417    struct src_register src1 = translate_src_register(
1418       emit, &insn->Src[1] );
1419
1420    return emit_select( emit, compare, dst, src0, src1 );
1421 }
1422
1423
1424 /* Translate texture instructions to SVGA3D representation.
1425  */
1426 static boolean emit_tex2(struct svga_shader_emitter *emit,
1427                          const struct tgsi_full_instruction *insn,
1428                          SVGA3dShaderDestToken dst )
1429 {
1430    SVGA3dShaderInstToken inst;
1431    struct src_register texcoord;
1432    struct src_register sampler;
1433    SVGA3dShaderDestToken tmp;
1434
1435    inst.value = 0;
1436
1437    switch (insn->Instruction.Opcode) {
1438    case TGSI_OPCODE_TEX:
1439       inst.op = SVGA3DOP_TEX;
1440       break;
1441    case TGSI_OPCODE_TXP:
1442       inst.op = SVGA3DOP_TEX;
1443       inst.control = SVGA3DOPCONT_PROJECT;
1444       break;
1445    case TGSI_OPCODE_TXB:
1446       inst.op = SVGA3DOP_TEX;
1447       inst.control = SVGA3DOPCONT_BIAS;
1448       break;
1449    case TGSI_OPCODE_TXL:
1450       inst.op = SVGA3DOP_TEXLDL;
1451       break;
1452    default:
1453       assert(0);
1454       return FALSE;
1455    }
1456
1457    texcoord = translate_src_register( emit, &insn->Src[0] );
1458    sampler = translate_src_register( emit, &insn->Src[1] );
1459
1460    if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1461        emit->dynamic_branching_level > 0)
1462       tmp = get_temp( emit );
1463
1464    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1465     * zero in that case.
1466     */
1467    if (emit->dynamic_branching_level > 0 &&
1468        inst.op == SVGA3DOP_TEX &&
1469        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1470       struct src_register zero = get_zero_immediate( emit );
1471
1472       /* MOV  tmp, texcoord */
1473       if (!submit_op1( emit,
1474                        inst_token( SVGA3DOP_MOV ),
1475                        tmp,
1476                        texcoord ))
1477          return FALSE;
1478
1479       /* MOV  tmp.w, zero */
1480       if (!submit_op1( emit,
1481                        inst_token( SVGA3DOP_MOV ),
1482                        writemask( tmp, TGSI_WRITEMASK_W ),
1483                        scalar( zero, TGSI_SWIZZLE_X )))
1484          return FALSE;
1485
1486       texcoord = src( tmp );
1487       inst.op = SVGA3DOP_TEXLDL;
1488    }
1489
1490    /* Explicit normalization of texcoords:
1491     */
1492    if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1493       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1494
1495       /* MUL  tmp, SRC0, WH */
1496       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1497                        tmp, texcoord, wh ))
1498          return FALSE;
1499
1500       texcoord = src( tmp );
1501    }
1502
1503    return submit_op2( emit, inst, dst, texcoord, sampler );
1504 }
1505
1506
1507
1508
1509 /* Translate texture instructions to SVGA3D representation.
1510  */
1511 static boolean emit_tex4(struct svga_shader_emitter *emit,
1512                          const struct tgsi_full_instruction *insn,
1513                          SVGA3dShaderDestToken dst )
1514 {
1515    SVGA3dShaderInstToken inst;
1516    struct src_register texcoord;
1517    struct src_register ddx;
1518    struct src_register ddy;
1519    struct src_register sampler;
1520
1521    texcoord = translate_src_register( emit, &insn->Src[0] );
1522    ddx      = translate_src_register( emit, &insn->Src[1] );
1523    ddy      = translate_src_register( emit, &insn->Src[2] );
1524    sampler  = translate_src_register( emit, &insn->Src[3] );
1525
1526    inst.value = 0;
1527
1528    switch (insn->Instruction.Opcode) {
1529    case TGSI_OPCODE_TXD:
1530       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1531       break;
1532    default:
1533       assert(0);
1534       return FALSE;
1535    }
1536
1537    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1538 }
1539
1540
1541 /**
1542  * Emit texture swizzle code.
1543  */
1544 static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
1545                                  SVGA3dShaderDestToken dst,
1546                                  struct src_register src,
1547                                  unsigned swizzle_x,
1548                                  unsigned swizzle_y,
1549                                  unsigned swizzle_z,
1550                                  unsigned swizzle_w)
1551 {
1552    const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1553    unsigned srcSwizzle[4];
1554    unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1555    int i;
1556
1557    /* build writemasks and srcSwizzle terms */
1558    for (i = 0; i < 4; i++) {
1559       if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
1560          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1561          zeroWritemask |= (1 << i);
1562       }
1563       else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
1564          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1565          oneWritemask |= (1 << i);
1566       }
1567       else {
1568          srcSwizzle[i] = swizzleIn[i];
1569          srcWritemask |= (1 << i);
1570       }
1571    }
1572
1573    /* write x/y/z/w comps */
1574    if (dst.mask & srcWritemask) {
1575       if (!submit_op1(emit,
1576                       inst_token(SVGA3DOP_MOV),
1577                       writemask(dst, srcWritemask),
1578                       swizzle(src,
1579                               srcSwizzle[0],
1580                               srcSwizzle[1],
1581                               srcSwizzle[2],
1582                               srcSwizzle[3])))
1583          return FALSE;
1584    }
1585
1586    /* write 0 comps */
1587    if (dst.mask & zeroWritemask) {
1588       if (!submit_op1(emit,
1589                       inst_token(SVGA3DOP_MOV),
1590                       writemask(dst, zeroWritemask),
1591                       scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X)))
1592          return FALSE;
1593    }
1594
1595    /* write 1 comps */
1596    if (dst.mask & oneWritemask) {
1597       if (!submit_op1(emit,
1598                       inst_token(SVGA3DOP_MOV),
1599                       writemask(dst, oneWritemask),
1600                       scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W)))
1601          return FALSE;
1602    }
1603
1604    return TRUE;
1605 }
1606
1607
1608 static boolean emit_tex(struct svga_shader_emitter *emit,
1609                         const struct tgsi_full_instruction *insn )
1610 {
1611    SVGA3dShaderDestToken dst =
1612       translate_dst_register( emit, insn, 0 );
1613    struct src_register src0 =
1614       translate_src_register( emit, &insn->Src[0] );
1615    struct src_register src1 =
1616       translate_src_register( emit, &insn->Src[1] );
1617
1618    SVGA3dShaderDestToken tex_result;
1619    const unsigned unit = src1.base.num;
1620
1621    /* check for shadow samplers */
1622    boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
1623                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
1624
1625    /* texture swizzle */
1626    boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
1627                       emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
1628                       emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
1629                       emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
1630
1631    boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE;
1632
1633    /* If doing compare processing or tex swizzle or saturation, we need to put
1634     * the fetched color into a temporary so it can be used as a source later on.
1635     */
1636    if (compare || swizzle || saturate) {
1637       tex_result = get_temp( emit );
1638    }
1639    else {
1640       tex_result = dst;
1641    }
1642
1643    switch(insn->Instruction.Opcode) {
1644    case TGSI_OPCODE_TEX:
1645    case TGSI_OPCODE_TXB:
1646    case TGSI_OPCODE_TXP:
1647    case TGSI_OPCODE_TXL:
1648       if (!emit_tex2( emit, insn, tex_result ))
1649          return FALSE;
1650       break;
1651    case TGSI_OPCODE_TXD:
1652       if (!emit_tex4( emit, insn, tex_result ))
1653          return FALSE;
1654       break;
1655    default:
1656       assert(0);
1657    }
1658
1659
1660    if (compare) {
1661       SVGA3dShaderDestToken dst2;
1662
1663       if (swizzle || saturate)
1664          dst2 = tex_result;
1665       else
1666          dst2 = dst;
1667
1668       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1669          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1670          /* When sampling a depth texture, the result of the comparison is in
1671           * the Y component.
1672           */
1673          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1674          struct src_register r_coord;
1675
1676          if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1677             /* Divide texcoord R by Q */
1678             if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1679                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1680                              scalar(src0, TGSI_SWIZZLE_W) ))
1681                return FALSE;
1682
1683             if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1684                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1685                              scalar(src0, TGSI_SWIZZLE_Z),
1686                              scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1687                return FALSE;
1688
1689             r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1690          }
1691          else {
1692             r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1693          }
1694
1695          /* Compare texture sample value against R component of texcoord */
1696          if (!emit_select(emit,
1697                           emit->key.fkey.tex[unit].compare_func,
1698                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
1699                           r_coord,
1700                           tex_src_x))
1701             return FALSE;
1702       }
1703
1704       if (dst.mask & TGSI_WRITEMASK_W) {
1705          struct src_register one =
1706             scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1707
1708         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1709                          writemask( dst2, TGSI_WRITEMASK_W ),
1710                          one ))
1711            return FALSE;
1712       }
1713    }
1714
1715    if (saturate && !swizzle) {
1716       /* MOV_SAT real_dst, dst */
1717       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1718          return FALSE;
1719    }
1720    else if (swizzle) {
1721       /* swizzle from tex_result to dst (handles saturation too, if any) */
1722       emit_tex_swizzle(emit,
1723                        dst, src(tex_result),
1724                        emit->key.fkey.tex[unit].swizzle_r,
1725                        emit->key.fkey.tex[unit].swizzle_g,
1726                        emit->key.fkey.tex[unit].swizzle_b,
1727                        emit->key.fkey.tex[unit].swizzle_a);
1728    }
1729
1730    return TRUE;
1731 }
1732
1733 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1734                               const struct tgsi_full_instruction *insn )
1735 {
1736    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1737    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1738    struct src_register const_int = get_loop_const( emit );
1739
1740    emit->dynamic_branching_level++;
1741
1742    return (emit_instruction( emit, inst ) &&
1743            emit_src( emit, loop_reg ) &&
1744            emit_src( emit, const_int ) );
1745 }
1746
1747 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1748                               const struct tgsi_full_instruction *insn )
1749 {
1750    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1751
1752    emit->dynamic_branching_level--;
1753
1754    return emit_instruction( emit, inst );
1755 }
1756
1757 static boolean emit_brk( struct svga_shader_emitter *emit,
1758                          const struct tgsi_full_instruction *insn )
1759 {
1760    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1761    return emit_instruction( emit, inst );
1762 }
1763
1764 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1765                                 unsigned opcode,
1766                                 const struct tgsi_full_instruction *insn )
1767 {
1768    SVGA3dShaderInstToken inst;
1769    SVGA3dShaderDestToken dst;
1770    struct src_register src;
1771
1772    inst = inst_token( opcode );
1773    dst = translate_dst_register( emit, insn, 0 );
1774    src = translate_src_register( emit, &insn->Src[0] );
1775    src = scalar( src, TGSI_SWIZZLE_X );
1776
1777    return submit_op1( emit, inst, dst, src );
1778 }
1779
1780
1781 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1782                                        unsigned opcode,
1783                                        const struct tgsi_full_instruction *insn )
1784 {
1785    const struct tgsi_full_src_register *src = insn->Src;
1786    SVGA3dShaderInstToken inst;
1787    SVGA3dShaderDestToken dst;
1788
1789    inst = inst_token( opcode );
1790    dst = translate_dst_register( emit, insn, 0 );
1791
1792    switch (insn->Instruction.NumSrcRegs) {
1793    case 0:
1794       return submit_op0( emit, inst, dst );
1795    case 1:
1796       return submit_op1( emit, inst, dst,
1797                          translate_src_register( emit, &src[0] ));
1798    case 2:
1799       return submit_op2( emit, inst, dst,
1800                          translate_src_register( emit, &src[0] ),
1801                          translate_src_register( emit, &src[1] ) );
1802    case 3:
1803       return submit_op3( emit, inst, dst,
1804                          translate_src_register( emit, &src[0] ),
1805                          translate_src_register( emit, &src[1] ),
1806                          translate_src_register( emit, &src[2] ) );
1807    default:
1808       assert(0);
1809       return FALSE;
1810    }
1811 }
1812
1813
1814 static boolean emit_deriv(struct svga_shader_emitter *emit,
1815                           const struct tgsi_full_instruction *insn )
1816 {
1817    if (emit->dynamic_branching_level > 0 &&
1818        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1819    {
1820       struct src_register zero = get_zero_immediate( emit );
1821       SVGA3dShaderDestToken dst =
1822          translate_dst_register( emit, insn, 0 );
1823
1824       /* Deriv opcodes not valid inside dynamic branching, workaround
1825        * by zeroing out the destination.
1826        */
1827       if (!submit_op1(emit,
1828                       inst_token( SVGA3DOP_MOV ),
1829                       dst,
1830                       scalar(zero, TGSI_SWIZZLE_X)))
1831          return FALSE;
1832
1833       return TRUE;
1834    }
1835    else {
1836       unsigned opcode;
1837       const struct tgsi_full_src_register *reg = &insn->Src[0];
1838       SVGA3dShaderInstToken inst;
1839       SVGA3dShaderDestToken dst;
1840       struct src_register src0;
1841
1842       switch (insn->Instruction.Opcode) {
1843       case TGSI_OPCODE_DDX:
1844          opcode = SVGA3DOP_DSX;
1845          break;
1846       case TGSI_OPCODE_DDY:
1847          opcode = SVGA3DOP_DSY;
1848          break;
1849       default:
1850          return FALSE;
1851       }
1852
1853       inst = inst_token( opcode );
1854       dst = translate_dst_register( emit, insn, 0 );
1855       src0 = translate_src_register( emit, reg );
1856
1857       /* We cannot use negate or abs on source to dsx/dsy instruction.
1858        */
1859       if (reg->Register.Absolute ||
1860           reg->Register.Negate) {
1861          SVGA3dShaderDestToken temp = get_temp( emit );
1862
1863          if (!emit_repl( emit, temp, &src0 ))
1864             return FALSE;
1865       }
1866
1867       return submit_op1( emit, inst, dst, src0 );
1868    }
1869 }
1870
1871 static boolean emit_arl(struct svga_shader_emitter *emit,
1872                         const struct tgsi_full_instruction *insn)
1873 {
1874    ++emit->current_arl;
1875    if (emit->unit == PIPE_SHADER_FRAGMENT) {
1876       /* MOVA not present in pixel shader instruction set.
1877        * Ignore this instruction altogether since it is
1878        * only used for loop counters -- and for that
1879        * we reference aL directly.
1880        */
1881       return TRUE;
1882    }
1883    if (svga_arl_needs_adjustment( emit )) {
1884       return emit_fake_arl( emit, insn );
1885    } else {
1886       /* no need to adjust, just emit straight arl */
1887       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1888    }
1889 }
1890
1891 static boolean emit_pow(struct svga_shader_emitter *emit,
1892                         const struct tgsi_full_instruction *insn)
1893 {
1894    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1895    struct src_register src0 = translate_src_register(
1896       emit, &insn->Src[0] );
1897    struct src_register src1 = translate_src_register(
1898       emit, &insn->Src[1] );
1899    boolean need_tmp = FALSE;
1900
1901    /* POW can only output to a temporary */
1902    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1903       need_tmp = TRUE;
1904
1905    /* POW src1 must not be the same register as dst */
1906    if (alias_src_dst( src1, dst ))
1907       need_tmp = TRUE;
1908
1909    /* it's a scalar op */
1910    src0 = scalar( src0, TGSI_SWIZZLE_X );
1911    src1 = scalar( src1, TGSI_SWIZZLE_X );
1912
1913    if (need_tmp) {
1914       SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1915
1916       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1917          return FALSE;
1918
1919       return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1920    }
1921    else {
1922       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1923    }
1924 }
1925
1926 static boolean emit_xpd(struct svga_shader_emitter *emit,
1927                         const struct tgsi_full_instruction *insn)
1928 {
1929    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1930    const struct src_register src0 = translate_src_register(
1931       emit, &insn->Src[0] );
1932    const struct src_register src1 = translate_src_register(
1933       emit, &insn->Src[1] );
1934    boolean need_dst_tmp = FALSE;
1935
1936    /* XPD can only output to a temporary */
1937    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1938       need_dst_tmp = TRUE;
1939
1940    /* The dst reg must not be the same as src0 or src1*/
1941    if (alias_src_dst(src0, dst) ||
1942        alias_src_dst(src1, dst))
1943       need_dst_tmp = TRUE;
1944
1945    if (need_dst_tmp) {
1946       SVGA3dShaderDestToken tmp = get_temp( emit );
1947
1948       /* Obey DX9 restrictions on mask:
1949        */
1950       tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1951
1952       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1953          return FALSE;
1954
1955       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1956          return FALSE;
1957    }
1958    else {
1959       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1960          return FALSE;
1961    }
1962
1963    /* Need to emit 1.0 to dst.w?
1964     */
1965    if (dst.mask & TGSI_WRITEMASK_W) {
1966       struct src_register zero = get_zero_immediate( emit );
1967
1968       if (!submit_op1(emit,
1969                       inst_token( SVGA3DOP_MOV ),
1970                       writemask(dst, TGSI_WRITEMASK_W),
1971                       zero))
1972          return FALSE;
1973    }
1974
1975    return TRUE;
1976 }
1977
1978
1979 static boolean emit_lrp(struct svga_shader_emitter *emit,
1980                         const struct tgsi_full_instruction *insn)
1981 {
1982    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1983    const struct src_register src0 = translate_src_register(
1984       emit, &insn->Src[0] );
1985    const struct src_register src1 = translate_src_register(
1986       emit, &insn->Src[1] );
1987    const struct src_register src2 = translate_src_register(
1988       emit, &insn->Src[2] );
1989
1990    return submit_lrp(emit, dst, src0, src1, src2);
1991 }
1992
1993
1994 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
1995                              const struct tgsi_full_instruction *insn )
1996 {
1997    if (emit->unit == PIPE_SHADER_VERTEX) {
1998       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
1999        */
2000       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2001    }
2002    else {
2003
2004       /* result[0] = 1    * 1;
2005        * result[1] = a[1] * b[1];
2006        * result[2] = a[2] * 1;
2007        * result[3] = 1    * b[3];
2008        */
2009
2010       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2011       SVGA3dShaderDestToken tmp;
2012       const struct src_register src0 = translate_src_register(
2013          emit, &insn->Src[0] );
2014       const struct src_register src1 = translate_src_register(
2015          emit, &insn->Src[1] );
2016       struct src_register zero = get_zero_immediate( emit );
2017       boolean need_tmp = FALSE;
2018
2019       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2020           alias_src_dst(src0, dst) ||
2021           alias_src_dst(src1, dst))
2022          need_tmp = TRUE;
2023
2024       if (need_tmp) {
2025          tmp = get_temp( emit );
2026       }
2027       else {
2028          tmp = dst;
2029       }
2030
2031       /* tmp.xw = 1.0
2032        */
2033       if (tmp.mask & TGSI_WRITEMASK_XW) {
2034          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2035                           writemask(tmp, TGSI_WRITEMASK_XW ),
2036                           scalar( zero, 3 )))
2037             return FALSE;
2038       }
2039
2040       /* tmp.yz = src0
2041        */
2042       if (tmp.mask & TGSI_WRITEMASK_YZ) {
2043          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2044                           writemask(tmp, TGSI_WRITEMASK_YZ ),
2045                           src0))
2046             return FALSE;
2047       }
2048
2049       /* tmp.yw = tmp * src1
2050        */
2051       if (tmp.mask & TGSI_WRITEMASK_YW) {
2052          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2053                           writemask(tmp, TGSI_WRITEMASK_YW ),
2054                           src(tmp),
2055                           src1))
2056             return FALSE;
2057       }
2058
2059       /* dst = tmp
2060        */
2061       if (need_tmp) {
2062          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2063                           dst,
2064                           src(tmp)))
2065             return FALSE;
2066       }
2067    }
2068
2069    return TRUE;
2070 }
2071
2072
2073 static boolean emit_exp(struct svga_shader_emitter *emit,
2074                         const struct tgsi_full_instruction *insn)
2075 {
2076    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2077    struct src_register src0 =
2078       translate_src_register( emit, &insn->Src[0] );
2079    struct src_register zero = get_zero_immediate( emit );
2080    SVGA3dShaderDestToken fraction;
2081
2082    if (dst.mask & TGSI_WRITEMASK_Y)
2083       fraction = dst;
2084    else if (dst.mask & TGSI_WRITEMASK_X)
2085       fraction = get_temp( emit );
2086    else
2087       fraction.value = 0;
2088
2089    /* If y is being written, fill it with src0 - floor(src0).
2090     */
2091    if (dst.mask & TGSI_WRITEMASK_XY) {
2092       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2093                        writemask( fraction, TGSI_WRITEMASK_Y ),
2094                        src0 ))
2095          return FALSE;
2096    }
2097
2098    /* If x is being written, fill it with 2 ^ floor(src0).
2099     */
2100    if (dst.mask & TGSI_WRITEMASK_X) {
2101       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2102                        writemask( dst, TGSI_WRITEMASK_X ),
2103                        src0,
2104                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2105          return FALSE;
2106
2107       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2108                        writemask( dst, TGSI_WRITEMASK_X ),
2109                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2110          return FALSE;
2111
2112       if (!(dst.mask & TGSI_WRITEMASK_Y))
2113          release_temp( emit, fraction );
2114    }
2115
2116    /* If z is being written, fill it with 2 ^ src0 (partial precision).
2117     */
2118    if (dst.mask & TGSI_WRITEMASK_Z) {
2119       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2120                        writemask( dst, TGSI_WRITEMASK_Z ),
2121                        src0 ) )
2122          return FALSE;
2123    }
2124
2125    /* If w is being written, fill it with one.
2126     */
2127    if (dst.mask & TGSI_WRITEMASK_W) {
2128       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2129                        writemask(dst, TGSI_WRITEMASK_W),
2130                        scalar( zero, TGSI_SWIZZLE_W ) ))
2131          return FALSE;
2132    }
2133
2134    return TRUE;
2135 }
2136
2137 static boolean emit_lit(struct svga_shader_emitter *emit,
2138                              const struct tgsi_full_instruction *insn )
2139 {
2140    if (emit->unit == PIPE_SHADER_VERTEX) {
2141       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2142        */
2143       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2144    }
2145    else {
2146
2147       /* D3D vs. GL semantics can be fairly easily accomodated by
2148        * variations on this sequence.
2149        *
2150        * GL:
2151        *   tmp.y = src.x
2152        *   tmp.z = pow(src.y,src.w)
2153        *   p0 = src0.xxxx > 0
2154        *   result = zero.wxxw
2155        *   (p0) result.yz = tmp
2156        *
2157        * D3D:
2158        *   tmp.y = src.x
2159        *   tmp.z = pow(src.y,src.w)
2160        *   p0 = src0.xxyy > 0
2161        *   result = zero.wxxw
2162        *   (p0) result.yz = tmp
2163        *
2164        * Will implement the GL version for now.
2165        */
2166
2167       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2168       SVGA3dShaderDestToken tmp = get_temp( emit );
2169       const struct src_register src0 = translate_src_register(
2170          emit, &insn->Src[0] );
2171       struct src_register zero = get_zero_immediate( emit );
2172
2173       /* tmp = pow(src.y, src.w)
2174        */
2175       if (dst.mask & TGSI_WRITEMASK_Z) {
2176          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2177                          tmp,
2178                          scalar(src0, 1),
2179                          scalar(src0, 3)))
2180             return FALSE;
2181       }
2182
2183       /* tmp.y = src.x
2184        */
2185       if (dst.mask & TGSI_WRITEMASK_Y) {
2186          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2187                           writemask(tmp, TGSI_WRITEMASK_Y ),
2188                           scalar(src0, 0)))
2189             return FALSE;
2190       }
2191
2192       /* Can't quite do this with emit conditional due to the extra
2193        * writemask on the predicated mov:
2194        */
2195       {
2196          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2197          SVGA3dShaderInstToken setp_token, mov_token;
2198          struct src_register predsrc;
2199
2200          setp_token = inst_token( SVGA3DOP_SETP );
2201          mov_token = inst_token( SVGA3DOP_MOV );
2202
2203          setp_token.control = SVGA3DOPCOMP_GT;
2204
2205          /* D3D vs GL semantics:
2206           */
2207          if (0)
2208             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2209          else
2210             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2211
2212          /* SETP src0.xxyy, GT, {0}.x */
2213          if (!submit_op2( emit, setp_token, pred_reg,
2214                           predsrc,
2215                           swizzle(zero, 0, 0, 0, 0) ))
2216             return FALSE;
2217
2218          /* MOV dst, fail */
2219          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2220                           swizzle(zero, 3, 0, 0, 3 )))
2221              return FALSE;
2222
2223          /* MOV dst.yz, tmp (predicated)
2224           *
2225           * Note that the predicate reg (and possible modifiers) is passed
2226           * as the first source argument.
2227           */
2228          if (dst.mask & TGSI_WRITEMASK_YZ) {
2229             mov_token.predicated = 1;
2230             if (!submit_op2( emit, mov_token,
2231                              writemask(dst, TGSI_WRITEMASK_YZ),
2232                              src( pred_reg ), src( tmp ) ))
2233                return FALSE;
2234          }
2235       }
2236    }
2237
2238    return TRUE;
2239 }
2240
2241
2242
2243
2244 static boolean emit_ex2( struct svga_shader_emitter *emit,
2245                          const struct tgsi_full_instruction *insn )
2246 {
2247    SVGA3dShaderInstToken inst;
2248    SVGA3dShaderDestToken dst;
2249    struct src_register src0;
2250
2251    inst = inst_token( SVGA3DOP_EXP );
2252    dst = translate_dst_register( emit, insn, 0 );
2253    src0 = translate_src_register( emit, &insn->Src[0] );
2254    src0 = scalar( src0, TGSI_SWIZZLE_X );
2255
2256    if (dst.mask != TGSI_WRITEMASK_XYZW) {
2257       SVGA3dShaderDestToken tmp = get_temp( emit );
2258
2259       if (!submit_op1( emit, inst, tmp, src0 ))
2260          return FALSE;
2261
2262       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2263                          dst,
2264                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2265    }
2266
2267    return submit_op1( emit, inst, dst, src0 );
2268 }
2269
2270
2271 static boolean emit_log(struct svga_shader_emitter *emit,
2272                         const struct tgsi_full_instruction *insn)
2273 {
2274    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2275    struct src_register src0 =
2276       translate_src_register( emit, &insn->Src[0] );
2277    struct src_register zero = get_zero_immediate( emit );
2278    SVGA3dShaderDestToken abs_tmp;
2279    struct src_register abs_src0;
2280    SVGA3dShaderDestToken log2_abs;
2281
2282    abs_tmp.value = 0;
2283
2284    if (dst.mask & TGSI_WRITEMASK_Z)
2285       log2_abs = dst;
2286    else if (dst.mask & TGSI_WRITEMASK_XY)
2287       log2_abs = get_temp( emit );
2288    else
2289       log2_abs.value = 0;
2290
2291    /* If z is being written, fill it with log2( abs( src0 ) ).
2292     */
2293    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2294       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2295          abs_src0 = src0;
2296       else {
2297          abs_tmp = get_temp( emit );
2298
2299          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2300                           abs_tmp,
2301                           src0 ) )
2302             return FALSE;
2303
2304          abs_src0 = src( abs_tmp );
2305       }
2306
2307       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2308
2309       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2310                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2311                        abs_src0 ) )
2312          return FALSE;
2313    }
2314
2315    if (dst.mask & TGSI_WRITEMASK_XY) {
2316       SVGA3dShaderDestToken floor_log2;
2317
2318       if (dst.mask & TGSI_WRITEMASK_X)
2319          floor_log2 = dst;
2320       else
2321          floor_log2 = get_temp( emit );
2322
2323       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2324        */
2325       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2326                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2327                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2328          return FALSE;
2329
2330       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2331                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2332                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2333                        negate( src( floor_log2 ) ) ) )
2334          return FALSE;
2335
2336       /* If y is being written, fill it with
2337        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2338        */
2339       if (dst.mask & TGSI_WRITEMASK_Y) {
2340          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2341                           writemask( dst, TGSI_WRITEMASK_Y ),
2342                           negate( scalar( src( floor_log2 ),
2343                                           TGSI_SWIZZLE_X ) ) ) )
2344             return FALSE;
2345
2346          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2347                           writemask( dst, TGSI_WRITEMASK_Y ),
2348                           src( dst ),
2349                           abs_src0 ) )
2350             return FALSE;
2351       }
2352
2353       if (!(dst.mask & TGSI_WRITEMASK_X))
2354          release_temp( emit, floor_log2 );
2355
2356       if (!(dst.mask & TGSI_WRITEMASK_Z))
2357          release_temp( emit, log2_abs );
2358    }
2359
2360    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2361        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2362       release_temp( emit, abs_tmp );
2363
2364    /* If w is being written, fill it with one.
2365     */
2366    if (dst.mask & TGSI_WRITEMASK_W) {
2367       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2368                        writemask(dst, TGSI_WRITEMASK_W),
2369                        scalar( zero, TGSI_SWIZZLE_W ) ))
2370          return FALSE;
2371    }
2372
2373    return TRUE;
2374 }
2375
2376
2377 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2378                            unsigned position,
2379                            const struct tgsi_full_instruction *insn )
2380 {
2381    unsigned i;
2382
2383    /* Note that we've finished the main function and are now emitting
2384     * subroutines.  This affects how we terminate the generated
2385     * shader.
2386     */
2387    emit->in_main_func = FALSE;
2388
2389    for (i = 0; i < emit->nr_labels; i++) {
2390       if (emit->label[i] == position) {
2391          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2392                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2393                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2394       }
2395    }
2396
2397    assert(0);
2398    return TRUE;
2399 }
2400
2401 static boolean emit_call( struct svga_shader_emitter *emit,
2402                            const struct tgsi_full_instruction *insn )
2403 {
2404    unsigned position = insn->Label.Label;
2405    unsigned i;
2406
2407    for (i = 0; i < emit->nr_labels; i++) {
2408       if (emit->label[i] == position)
2409          break;
2410    }
2411
2412    if (emit->nr_labels == Elements(emit->label))
2413       return FALSE;
2414
2415    if (i == emit->nr_labels) {
2416       emit->label[i] = position;
2417       emit->nr_labels++;
2418    }
2419
2420    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2421            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2422 }
2423
2424
2425 static boolean emit_end( struct svga_shader_emitter *emit )
2426 {
2427    if (emit->unit == PIPE_SHADER_VERTEX) {
2428       return emit_vs_postamble( emit );
2429    }
2430    else {
2431       return emit_ps_postamble( emit );
2432    }
2433 }
2434
2435
2436
2437 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2438                                       unsigned position,
2439                                       const struct tgsi_full_instruction *insn )
2440 {
2441    switch (insn->Instruction.Opcode) {
2442
2443    case TGSI_OPCODE_ARL:
2444       return emit_arl( emit, insn );
2445
2446    case TGSI_OPCODE_TEX:
2447    case TGSI_OPCODE_TXB:
2448    case TGSI_OPCODE_TXP:
2449    case TGSI_OPCODE_TXL:
2450    case TGSI_OPCODE_TXD:
2451       return emit_tex( emit, insn );
2452
2453    case TGSI_OPCODE_DDX:
2454    case TGSI_OPCODE_DDY:
2455       return emit_deriv( emit, insn );
2456
2457    case TGSI_OPCODE_BGNSUB:
2458       return emit_bgnsub( emit, position, insn );
2459
2460    case TGSI_OPCODE_ENDSUB:
2461       return TRUE;
2462
2463    case TGSI_OPCODE_CAL:
2464       return emit_call( emit, insn );
2465
2466    case TGSI_OPCODE_FLR:
2467    case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
2468       return emit_floor( emit, insn );
2469
2470    case TGSI_OPCODE_CEIL:
2471       return emit_ceil( emit, insn );
2472
2473    case TGSI_OPCODE_CMP:
2474       return emit_cmp( emit, insn );
2475
2476    case TGSI_OPCODE_DIV:
2477       return emit_div( emit, insn );
2478
2479    case TGSI_OPCODE_DP2:
2480       return emit_dp2( emit, insn );
2481
2482    case TGSI_OPCODE_DPH:
2483       return emit_dph( emit, insn );
2484
2485    case TGSI_OPCODE_NRM:
2486       return emit_nrm( emit, insn );
2487
2488    case TGSI_OPCODE_COS:
2489       return emit_cos( emit, insn );
2490
2491    case TGSI_OPCODE_SIN:
2492       return emit_sin( emit, insn );
2493
2494    case TGSI_OPCODE_SCS:
2495       return emit_sincos( emit, insn );
2496
2497    case TGSI_OPCODE_END:
2498       /* TGSI always finishes the main func with an END */
2499       return emit_end( emit );
2500
2501    case TGSI_OPCODE_KIL:
2502       return emit_kil( emit, insn );
2503
2504       /* Selection opcodes.  The underlying language is fairly
2505        * non-orthogonal about these.
2506        */
2507    case TGSI_OPCODE_SEQ:
2508       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2509
2510    case TGSI_OPCODE_SNE:
2511       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2512
2513    case TGSI_OPCODE_SGT:
2514       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2515
2516    case TGSI_OPCODE_SGE:
2517       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2518
2519    case TGSI_OPCODE_SLT:
2520       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2521
2522    case TGSI_OPCODE_SLE:
2523       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2524
2525    case TGSI_OPCODE_SUB:
2526       return emit_sub( emit, insn );
2527
2528    case TGSI_OPCODE_POW:
2529       return emit_pow( emit, insn );
2530
2531    case TGSI_OPCODE_EX2:
2532       return emit_ex2( emit, insn );
2533
2534    case TGSI_OPCODE_EXP:
2535       return emit_exp( emit, insn );
2536
2537    case TGSI_OPCODE_LOG:
2538       return emit_log( emit, insn );
2539
2540    case TGSI_OPCODE_LG2:
2541       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2542
2543    case TGSI_OPCODE_RSQ:
2544       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2545
2546    case TGSI_OPCODE_RCP:
2547       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2548
2549    case TGSI_OPCODE_CONT:
2550    case TGSI_OPCODE_RET:
2551       /* This is a noop -- we tell mesa that we can't support RET
2552        * within a function (early return), so this will always be
2553        * followed by an ENDSUB.
2554        */
2555       return TRUE;
2556
2557       /* These aren't actually used by any of the frontends we care
2558        * about:
2559        */
2560    case TGSI_OPCODE_CLAMP:
2561    case TGSI_OPCODE_ROUND:
2562    case TGSI_OPCODE_AND:
2563    case TGSI_OPCODE_OR:
2564    case TGSI_OPCODE_I2F:
2565    case TGSI_OPCODE_NOT:
2566    case TGSI_OPCODE_SHL:
2567    case TGSI_OPCODE_ISHR:
2568    case TGSI_OPCODE_XOR:
2569       return FALSE;
2570
2571    case TGSI_OPCODE_IF:
2572       return emit_if( emit, insn );
2573    case TGSI_OPCODE_ELSE:
2574       return emit_else( emit, insn );
2575    case TGSI_OPCODE_ENDIF:
2576       return emit_endif( emit, insn );
2577
2578    case TGSI_OPCODE_BGNLOOP:
2579       return emit_bgnloop2( emit, insn );
2580    case TGSI_OPCODE_ENDLOOP:
2581       return emit_endloop2( emit, insn );
2582    case TGSI_OPCODE_BRK:
2583       return emit_brk( emit, insn );
2584
2585    case TGSI_OPCODE_XPD:
2586       return emit_xpd( emit, insn );
2587
2588    case TGSI_OPCODE_KILP:
2589       return emit_kilp( emit, insn );
2590
2591    case TGSI_OPCODE_DST:
2592       return emit_dst_insn( emit, insn );
2593
2594    case TGSI_OPCODE_LIT:
2595       return emit_lit( emit, insn );
2596
2597    case TGSI_OPCODE_LRP:
2598       return emit_lrp( emit, insn );
2599
2600    case TGSI_OPCODE_SSG:
2601       return emit_ssg( emit, insn );
2602
2603    default: {
2604       unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2605
2606       if (opcode == SVGA3DOP_LAST_INST)
2607          return FALSE;
2608
2609       if (!emit_simple_instruction( emit, opcode, insn ))
2610          return FALSE;
2611    }
2612    }
2613
2614    return TRUE;
2615 }
2616
2617
2618 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2619                                     struct tgsi_full_immediate *imm)
2620 {
2621    static const float id[4] = {0,0,0,1};
2622    float value[4];
2623    unsigned i;
2624
2625    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2626    for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2627       value[i] = imm->u[i].Float;
2628
2629    for ( ; i < 4; i++ )
2630       value[i] = id[i];
2631
2632    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2633                           emit->imm_start + emit->internal_imm_count++,
2634                           value[0], value[1], value[2], value[3]);
2635 }
2636
2637 static boolean make_immediate( struct svga_shader_emitter *emit,
2638                                float a,
2639                                float b,
2640                                float c,
2641                                float d,
2642                                struct src_register *out )
2643 {
2644    unsigned idx = emit->nr_hw_float_const++;
2645
2646    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2647                         idx, a, b, c, d ))
2648       return FALSE;
2649
2650    *out = src_register( SVGA3DREG_CONST, idx );
2651
2652    return TRUE;
2653 }
2654
2655 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2656 {
2657    if (!emit->key.vkey.need_prescale) {
2658       if (!make_immediate( emit, 0, 0, .5, .5,
2659                            &emit->imm_0055))
2660          return FALSE;
2661    }
2662
2663    return TRUE;
2664 }
2665
2666 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2667 {
2668    if (emit->ps_reads_pos && emit->info.reads_z) {
2669       /*
2670        * Assemble the position from various bits of inputs. Depth and W are
2671        * passed in a texcoord this is due to D3D's vPos not hold Z or W.
2672        * Also fixup the perspective interpolation.
2673        *
2674        * temp_pos.xy = vPos.xy
2675        * temp_pos.w = rcp(texcoord1.w);
2676        * temp_pos.z = texcoord1.z * temp_pos.w;
2677        */
2678       if (!submit_op1( emit,
2679                        inst_token(SVGA3DOP_MOV),
2680                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
2681                        emit->ps_true_pos ))
2682          return FALSE;
2683
2684       if (!submit_op1( emit,
2685                        inst_token(SVGA3DOP_RCP),
2686                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
2687                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
2688          return FALSE;
2689
2690       if (!submit_op2( emit,
2691                        inst_token(SVGA3DOP_MUL),
2692                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
2693                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
2694                        scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
2695          return FALSE;
2696    }
2697
2698    return TRUE;
2699 }
2700
2701 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2702 {
2703    unsigned i;
2704
2705    /* PS oDepth is incredibly fragile and it's very hard to catch the
2706     * types of usage that break it during shader emit.  Easier just to
2707     * redirect the main program to a temporary and then only touch
2708     * oDepth with a hand-crafted MOV below.
2709     */
2710    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2711
2712       if (!submit_op1( emit,
2713                        inst_token(SVGA3DOP_MOV),
2714                        emit->true_pos,
2715                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2716          return FALSE;
2717    }
2718
2719    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2720       if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2721
2722          /* Potentially override output colors with white for XOR
2723           * logicop workaround.
2724           */
2725          if (emit->unit == PIPE_SHADER_FRAGMENT &&
2726              emit->key.fkey.white_fragments) {
2727
2728             struct src_register one = scalar( get_zero_immediate( emit ),
2729                                               TGSI_SWIZZLE_W );
2730
2731             if (!submit_op1( emit,
2732                              inst_token(SVGA3DOP_MOV),
2733                              emit->true_col[i],
2734                              one ))
2735                return FALSE;
2736          }
2737          else {
2738             if (!submit_op1( emit,
2739                              inst_token(SVGA3DOP_MOV),
2740                              emit->true_col[i],
2741                              src(emit->temp_col[i]) ))
2742                return FALSE;
2743          }
2744       }
2745    }
2746
2747    return TRUE;
2748 }
2749
2750 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2751 {
2752    /* PSIZ output is incredibly fragile and it's very hard to catch
2753     * the types of usage that break it during shader emit.  Easier
2754     * just to redirect the main program to a temporary and then only
2755     * touch PSIZ with a hand-crafted MOV below.
2756     */
2757    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2758
2759       if (!submit_op1( emit,
2760                        inst_token(SVGA3DOP_MOV),
2761                        emit->true_psiz,
2762                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2763          return FALSE;
2764    }
2765
2766    /* Need to perform various manipulations on vertex position to cope
2767     * with the different GL and D3D clip spaces.
2768     */
2769    if (emit->key.vkey.need_prescale) {
2770       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2771       SVGA3dShaderDestToken depth = emit->depth_pos;
2772       SVGA3dShaderDestToken pos = emit->true_pos;
2773       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2774       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2775                                                          offset + 0 );
2776       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2777                                                          offset + 1 );
2778
2779       if (!submit_op1( emit,
2780                        inst_token(SVGA3DOP_MOV),
2781                        writemask(depth, TGSI_WRITEMASK_W),
2782                        scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
2783          return FALSE;
2784
2785       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
2786        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2787        *   --> Note that prescale.trans.w == 0
2788        */
2789       if (!submit_op2( emit,
2790                        inst_token(SVGA3DOP_MUL),
2791                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2792                        src(temp_pos),
2793                        prescale_scale ))
2794          return FALSE;
2795
2796       if (!submit_op3( emit,
2797                        inst_token(SVGA3DOP_MAD),
2798                        pos,
2799                        swizzle(src(temp_pos), 3, 3, 3, 3),
2800                        prescale_trans,
2801                        src(temp_pos)))
2802          return FALSE;
2803
2804       /* Also write to depth value */
2805       if (!submit_op3( emit,
2806                        inst_token(SVGA3DOP_MAD),
2807                        writemask(depth, TGSI_WRITEMASK_Z),
2808                        swizzle(src(temp_pos), 3, 3, 3, 3),
2809                        prescale_trans,
2810                        src(temp_pos) ))
2811          return FALSE;
2812    }
2813    else {
2814       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2815       SVGA3dShaderDestToken depth = emit->depth_pos;
2816       SVGA3dShaderDestToken pos = emit->true_pos;
2817       struct src_register imm_0055 = emit->imm_0055;
2818
2819       /* Adjust GL clipping coordinate space to hardware (D3D-style):
2820        *
2821        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2822        * MOV result.position, temp_pos
2823        */
2824       if (!submit_op2( emit,
2825                        inst_token(SVGA3DOP_DP4),
2826                        writemask(temp_pos, TGSI_WRITEMASK_Z),
2827                        imm_0055,
2828                        src(temp_pos) ))
2829          return FALSE;
2830
2831       if (!submit_op1( emit,
2832                        inst_token(SVGA3DOP_MOV),
2833                        pos,
2834                        src(temp_pos) ))
2835          return FALSE;
2836
2837       /* Move the manipulated depth into the extra texcoord reg */
2838       if (!submit_op1( emit,
2839                        inst_token(SVGA3DOP_MOV),
2840                        writemask(depth, TGSI_WRITEMASK_ZW),
2841                        src(temp_pos) ))
2842          return FALSE;
2843    }
2844
2845    return TRUE;
2846 }
2847
2848 /*
2849   0: IF VFACE :4
2850   1:   COLOR = FrontColor;
2851   2: ELSE
2852   3:   COLOR = BackColor;
2853   4: ENDIF
2854  */
2855 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2856 {
2857    struct src_register vface, zero;
2858    struct src_register front[2];
2859    struct src_register back[2];
2860    SVGA3dShaderDestToken color[2];
2861    int count =  emit->internal_color_count;
2862    int i;
2863    SVGA3dShaderInstToken if_token;
2864
2865    if (count == 0)
2866       return TRUE;
2867
2868    vface = get_vface( emit );
2869    zero = get_zero_immediate( emit );
2870
2871    /* Can't use get_temp() to allocate the color reg as such
2872     * temporaries will be reclaimed after each instruction by the call
2873     * to reset_temp_regs().
2874     */
2875    for (i = 0; i < count; i++) {
2876       color[i] = dst_register( SVGA3DREG_TEMP,
2877                                emit->nr_hw_temp++ );
2878
2879       front[i] = emit->input_map[emit->internal_color_idx[i]];
2880
2881       /* Back is always the next input:
2882        */
2883       back[i] = front[i];
2884       back[i].base.num = front[i].base.num + 1;
2885
2886       /* Reassign the input_map to the actual front-face color:
2887        */
2888       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2889    }
2890
2891    if_token = inst_token( SVGA3DOP_IFC );
2892
2893    if (emit->key.fkey.front_ccw)
2894       if_token.control = SVGA3DOPCOMP_LT;
2895    else
2896       if_token.control = SVGA3DOPCOMP_GT;
2897
2898    zero = scalar(zero, TGSI_SWIZZLE_X);
2899
2900    if (!(emit_instruction( emit, if_token ) &&
2901          emit_src( emit, vface ) &&
2902          emit_src( emit, zero ) ))
2903       return FALSE;
2904
2905    for (i = 0; i < count; i++) {
2906       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2907          return FALSE;
2908    }
2909
2910    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2911       return FALSE;
2912
2913    for (i = 0; i < count; i++) {
2914       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2915          return FALSE;
2916    }
2917
2918    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2919       return FALSE;
2920
2921    return TRUE;
2922 }
2923
2924 /*
2925   0: SETP_GT TEMP, VFACE, 0
2926   where TEMP is a fake frontface register
2927  */
2928 static boolean emit_frontface( struct svga_shader_emitter *emit )
2929 {
2930    struct src_register vface, zero;
2931    SVGA3dShaderDestToken temp;
2932    struct src_register pass, fail;
2933
2934    vface = get_vface( emit );
2935    zero = get_zero_immediate( emit );
2936
2937    /* Can't use get_temp() to allocate the fake frontface reg as such
2938     * temporaries will be reclaimed after each instruction by the call
2939     * to reset_temp_regs().
2940     */
2941    temp = dst_register( SVGA3DREG_TEMP,
2942                         emit->nr_hw_temp++ );
2943
2944    if (emit->key.fkey.front_ccw) {
2945       pass = scalar( zero, TGSI_SWIZZLE_X );
2946       fail = scalar( zero, TGSI_SWIZZLE_W );
2947    } else {
2948       pass = scalar( zero, TGSI_SWIZZLE_W );
2949       fail = scalar( zero, TGSI_SWIZZLE_X );
2950    }
2951
2952    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2953                          temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2954                          pass, fail))
2955       return FALSE;
2956
2957    /* Reassign the input_map to the actual front-face color:
2958     */
2959    emit->input_map[emit->internal_frontface_idx] = src(temp);
2960
2961    return TRUE;
2962 }
2963
2964
2965 /**
2966  * Emit code to invert the T component of the incoming texture coordinate.
2967  * This is used for drawing point sprites when
2968  * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
2969  */
2970 static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit )
2971 {
2972    struct src_register zero = get_zero_immediate(emit);
2973    struct src_register pos_neg_one = get_pos_neg_one_immediate( emit );
2974    unsigned inverted_texcoords = emit->inverted_texcoords;
2975
2976    while (inverted_texcoords) {
2977       const unsigned unit = ffs(inverted_texcoords) - 1;
2978
2979       assert(emit->inverted_texcoords & (1 << unit));
2980
2981       assert(unit < Elements(emit->ps_true_texcoord));
2982
2983       assert(unit < Elements(emit->ps_inverted_texcoord_input));
2984
2985       assert(emit->ps_inverted_texcoord_input[unit]
2986              < Elements(emit->input_map));
2987
2988       /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
2989       if (!submit_op3(emit,
2990                       inst_token(SVGA3DOP_MAD),
2991                       dst(emit->ps_inverted_texcoord[unit]),
2992                       emit->ps_true_texcoord[unit],
2993                       swizzle(pos_neg_one, 0, 3, 0, 0),  /* (1, -1, 1, 1) */
2994                       swizzle(zero, 0, 3, 0, 0)))  /* (0, 1, 0, 0) */
2995          return FALSE;
2996
2997       /* Reassign the input_map entry to the new texcoord register */
2998       emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
2999          emit->ps_inverted_texcoord[unit];
3000
3001       inverted_texcoords &= ~(1 << unit);
3002    }
3003
3004    return TRUE;
3005 }
3006
3007
3008 static INLINE boolean
3009 needs_to_create_zero( struct svga_shader_emitter *emit )
3010 {
3011    int i;
3012
3013    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3014       if (emit->key.fkey.light_twoside)
3015          return TRUE;
3016
3017       if (emit->key.fkey.white_fragments)
3018          return TRUE;
3019
3020       if (emit->emit_frontface)
3021          return TRUE;
3022
3023       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3024           emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3025           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3026          return TRUE;
3027
3028       if (emit->inverted_texcoords)
3029          return TRUE;
3030
3031       /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3032       for (i = 0; i < emit->key.fkey.num_textures; i++) {
3033          if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
3034              emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
3035              emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
3036              emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
3037             return TRUE;
3038       }
3039    }
3040
3041    if (emit->unit == PIPE_SHADER_VERTEX) {
3042       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3043          return TRUE;
3044    }
3045
3046    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3047        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3048        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3049        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3050        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3051        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3052        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3053        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3054        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3055        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3056        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3057        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3058        emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3059        emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
3060       return TRUE;
3061
3062    for (i = 0; i < emit->key.fkey.num_textures; i++) {
3063       if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3064          return TRUE;
3065    }
3066
3067    return FALSE;
3068 }
3069
3070 static INLINE boolean
3071 needs_to_create_loop_const( struct svga_shader_emitter *emit )
3072 {
3073    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3074 }
3075
3076 static INLINE boolean
3077 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
3078 {
3079    return (emit->num_arl_consts > 0);
3080 }
3081
3082 static INLINE boolean
3083 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3084                         int num, int current_arl)
3085 {
3086    int i;
3087    assert(num < 0);
3088
3089    for (i = 0; i < emit->num_arl_consts; ++i) {
3090       if (emit->arl_consts[i].arl_num == current_arl)
3091          break;
3092    }
3093    /* new entry */
3094    if (emit->num_arl_consts == i) {
3095       ++emit->num_arl_consts;
3096    }
3097    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3098                                 num :
3099                                 emit->arl_consts[i].number;
3100    emit->arl_consts[i].arl_num = current_arl;
3101    return TRUE;
3102 }
3103
3104 static boolean
3105 pre_parse_instruction( struct svga_shader_emitter *emit,
3106                        const struct tgsi_full_instruction *insn,
3107                        int current_arl)
3108 {
3109    if (insn->Src[0].Register.Indirect &&
3110        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3111       const struct tgsi_full_src_register *reg = &insn->Src[0];
3112       if (reg->Register.Index < 0) {
3113          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3114       }
3115    }
3116
3117    if (insn->Src[1].Register.Indirect &&
3118        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3119       const struct tgsi_full_src_register *reg = &insn->Src[1];
3120       if (reg->Register.Index < 0) {
3121          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3122       }
3123    }
3124
3125    if (insn->Src[2].Register.Indirect &&
3126        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3127       const struct tgsi_full_src_register *reg = &insn->Src[2];
3128       if (reg->Register.Index < 0) {
3129          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3130       }
3131    }
3132
3133    return TRUE;
3134 }
3135
3136 static boolean
3137 pre_parse_tokens( struct svga_shader_emitter *emit,
3138                   const struct tgsi_token *tokens )
3139 {
3140    struct tgsi_parse_context parse;
3141    int current_arl = 0;
3142
3143    tgsi_parse_init( &parse, tokens );
3144
3145    while (!tgsi_parse_end_of_tokens( &parse )) {
3146       tgsi_parse_token( &parse );
3147       switch (parse.FullToken.Token.Type) {
3148       case TGSI_TOKEN_TYPE_IMMEDIATE:
3149       case TGSI_TOKEN_TYPE_DECLARATION:
3150          break;
3151       case TGSI_TOKEN_TYPE_INSTRUCTION:
3152          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3153              TGSI_OPCODE_ARL) {
3154             ++current_arl;
3155          }
3156          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3157                                      current_arl ))
3158             return FALSE;
3159          break;
3160       default:
3161          break;
3162       }
3163
3164    }
3165    return TRUE;
3166 }
3167
3168 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
3169
3170 {
3171    if (needs_to_create_zero( emit )) {
3172       create_zero_immediate( emit );
3173    }
3174    if (needs_to_create_loop_const( emit )) {
3175       create_loop_const( emit );
3176    }
3177    if (needs_to_create_arl_consts( emit )) {
3178       create_arl_consts( emit );
3179    }
3180
3181    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3182       if (!emit_ps_preamble( emit ))
3183          return FALSE;
3184
3185       if (emit->key.fkey.light_twoside) {
3186          if (!emit_light_twoside( emit ))
3187             return FALSE;
3188       }
3189       if (emit->emit_frontface) {
3190          if (!emit_frontface( emit ))
3191             return FALSE;
3192       }
3193       if (emit->inverted_texcoords) {
3194          if (!emit_inverted_texcoords( emit ))
3195             return FALSE;
3196       }
3197    }
3198
3199    return TRUE;
3200 }
3201
3202 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
3203                                        const struct tgsi_token *tokens )
3204 {
3205    struct tgsi_parse_context parse;
3206    boolean ret = TRUE;
3207    boolean helpers_emitted = FALSE;
3208    unsigned line_nr = 0;
3209
3210    tgsi_parse_init( &parse, tokens );
3211    emit->internal_imm_count = 0;
3212
3213    if (emit->unit == PIPE_SHADER_VERTEX) {
3214       ret = emit_vs_preamble( emit );
3215       if (!ret)
3216          goto done;
3217    }
3218
3219    pre_parse_tokens(emit, tokens);
3220
3221    while (!tgsi_parse_end_of_tokens( &parse )) {
3222       tgsi_parse_token( &parse );
3223
3224       switch (parse.FullToken.Token.Type) {
3225       case TGSI_TOKEN_TYPE_IMMEDIATE:
3226          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3227          if (!ret)
3228             goto done;
3229          break;
3230
3231       case TGSI_TOKEN_TYPE_DECLARATION:
3232          ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3233          if (!ret)
3234             goto done;
3235          break;
3236
3237       case TGSI_TOKEN_TYPE_INSTRUCTION:
3238          if (!helpers_emitted) {
3239             if (!svga_shader_emit_helpers( emit ))
3240                goto done;
3241             helpers_emitted = TRUE;
3242          }
3243          ret = svga_emit_instruction( emit,
3244                                       line_nr++,
3245                                       &parse.FullToken.FullInstruction );
3246          if (!ret)
3247             goto done;
3248          break;
3249       default:
3250          break;
3251       }
3252
3253       reset_temp_regs( emit );
3254    }
3255
3256    /* Need to terminate the current subroutine.  Note that the
3257     * hardware doesn't tolerate shaders without sub-routines
3258     * terminating with RET+END.
3259     */
3260    if (!emit->in_main_func) {
3261       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3262       if (!ret)
3263          goto done;
3264    }
3265
3266    assert(emit->dynamic_branching_level == 0);
3267
3268    /* Need to terminate the whole shader:
3269     */
3270    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3271    if (!ret)
3272       goto done;
3273
3274 done:
3275    tgsi_parse_free( &parse );
3276    return ret;
3277 }
3278