src/gallium/drivers/svga/svga_tgsi_insn.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_dump.h"
  29 #include "tgsi/tgsi_parse.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_math.h"
  32 #include "util/u_pstipple.h"
  33
  34 #include "svga_tgsi_emit.h"
  35 #include "svga_context.h"
  36
  37
  38 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  39 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  40
  41
  42 static unsigned
  43 translate_opcode(uint opcode)
  44 {
  45    switch (opcode) {
  46    case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
  47    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
  48    case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
  49    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
  50    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
  51    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
  52    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
  53    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
  54    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
  55    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
  56    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
  57    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
  58    default:
  59       assert(!"svga: unexpected opcode in translate_opcode()");
  60       return SVGA3DOP_LAST_INST;
  61    }
  62 }
  63
  64
  65 static unsigned
  66 translate_file(unsigned file)
  67 {
  68    switch (file) {
  69    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
  70    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
  71    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
  72    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
  73    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
  74    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
  75    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
  76    default:
  77       assert(!"svga: unexpected register file in translate_file()");
  78       return SVGA3DREG_TEMP;
  79    }
  80 }
  81
  82
  83 /**
  84  * Translate a TGSI destination register to an SVGA3DShaderDestToken.
  85  * \param insn  the TGSI instruction
  86  * \param idx  which TGSI dest register to translate (usually (always?) zero)
  87  */
  88 static SVGA3dShaderDestToken
  89 translate_dst_register( struct svga_shader_emitter *emit,
  90                         const struct tgsi_full_instruction *insn,
  91                         unsigned idx )
  92 {
  93    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
  94    SVGA3dShaderDestToken dest;
  95
  96    switch (reg->Register.File) {
  97    case TGSI_FILE_OUTPUT:
  98       /* Output registers encode semantic information in their name.
  99        * Need to lookup a table built at decl time:
 100        */
 101       dest = emit->output_map[reg->Register.Index];
 102       emit->num_output_writes++;
 103       break;
 104
 105    default:
 106       {
 107          unsigned index = reg->Register.Index;
 108          assert(index < SVGA3D_TEMPREG_MAX);
 109          index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
 110          dest = dst_register(translate_file(reg->Register.File), index);
 111       }
 112       break;
 113    }
 114
 115    if (reg->Register.Indirect) {
 116       debug_warning("Indirect indexing of dest registers is not supported!\n");
 117    }
 118
 119    dest.mask = reg->Register.WriteMask;
 120    assert(dest.mask);
 121
 122    if (insn->Instruction.Saturate)
 123       dest.dstMod = SVGA3DDSTMOD_SATURATE;
 124
 125    return dest;
 126 }
 127
 128
 129 /**
 130  * Apply a swizzle to a src_register, returning a new src_register
 131  * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
 132  * would return SRC.YYZZ
 133  */
 134 static struct src_register
 135 swizzle(struct src_register src,
 136         unsigned x, unsigned y, unsigned z, unsigned w)
 137 {
 138    assert(x < 4);
 139    assert(y < 4);
 140    assert(z < 4);
 141    assert(w < 4);
 142    x = (src.base.swizzle >> (x * 2)) & 0x3;
 143    y = (src.base.swizzle >> (y * 2)) & 0x3;
 144    z = (src.base.swizzle >> (z * 2)) & 0x3;
 145    w = (src.base.swizzle >> (w * 2)) & 0x3;
 146
 147    src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
 148
 149    return src;
 150 }
 151
 152
 153 /**
 154  * Apply a "scalar" swizzle to a src_register returning a new
 155  * src_register where all the swizzle terms are the same.
 156  * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
 157  */
 158 static struct src_register
 159 scalar(struct src_register src, unsigned comp)
 160 {
 161    assert(comp < 4);
 162    return swizzle( src, comp, comp, comp, comp );
 163 }
 164
 165
 166 static boolean
 167 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 168 {
 169    unsigned i;
 170
 171    for (i = 0; i < emit->num_arl_consts; ++i) {
 172       if (emit->arl_consts[i].arl_num == emit->current_arl)
 173          return TRUE;
 174    }
 175    return FALSE;
 176 }
 177
 178
 179 static int
 180 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 181 {
 182    unsigned i;
 183
 184    for (i = 0; i < emit->num_arl_consts; ++i) {
 185       if (emit->arl_consts[i].arl_num == emit->current_arl)
 186          return emit->arl_consts[i].number;
 187    }
 188    return 0;
 189 }
 190
 191
 192 /**
 193  * Translate a TGSI src register to a src_register.
 194  */
 195 static struct src_register
 196 translate_src_register( const struct svga_shader_emitter *emit,
 197                         const struct tgsi_full_src_register *reg )
 198 {
 199    struct src_register src;
 200
 201    switch (reg->Register.File) {
 202    case TGSI_FILE_INPUT:
 203       /* Input registers are referred to by their semantic name rather
 204        * than by index.  Use the mapping build up from the decls:
 205        */
 206       src = emit->input_map[reg->Register.Index];
 207       break;
 208
 209    case TGSI_FILE_IMMEDIATE:
 210       /* Immediates are appended after TGSI constants in the D3D
 211        * constant buffer.
 212        */
 213       src = src_register( translate_file( reg->Register.File ),
 214                           reg->Register.Index + emit->imm_start );
 215       break;
 216
 217    default:
 218       src = src_register( translate_file( reg->Register.File ),
 219                           reg->Register.Index );
 220       break;
 221    }
 222
 223    /* Indirect addressing.
 224     */
 225    if (reg->Register.Indirect) {
 226       if (emit->unit == PIPE_SHADER_FRAGMENT) {
 227          /* Pixel shaders have only loop registers for relative
 228           * addressing into inputs. Ignore the redundant address
 229           * register, the contents of aL should be in sync with it.
 230           */
 231          if (reg->Register.File == TGSI_FILE_INPUT) {
 232             src.base.relAddr = 1;
 233             src.indirect = src_token(SVGA3DREG_LOOP, 0);
 234          }
 235       }
 236       else {
 237          /* Constant buffers only.
 238           */
 239          if (reg->Register.File == TGSI_FILE_CONSTANT) {
 240             /* we shift the offset towards the minimum */
 241             if (svga_arl_needs_adjustment( emit )) {
 242                src.base.num -= svga_arl_adjustment( emit );
 243             }
 244             src.base.relAddr = 1;
 245
 246             /* Not really sure what should go in the second token:
 247              */
 248             src.indirect = src_token( SVGA3DREG_ADDR,
 249                                       reg->Indirect.Index );
 250
 251             src.indirect.swizzle = SWIZZLE_XXXX;
 252          }
 253       }
 254    }
 255
 256    src = swizzle( src,
 257                   reg->Register.SwizzleX,
 258                   reg->Register.SwizzleY,
 259                   reg->Register.SwizzleZ,
 260                   reg->Register.SwizzleW );
 261
 262    /* src.mod isn't a bitfield, unfortunately:
 263     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
 264     */
 265    if (reg->Register.Absolute) {
 266       if (reg->Register.Negate)
 267          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 268       else
 269          src.base.srcMod = SVGA3DSRCMOD_ABS;
 270    }
 271    else {
 272       if (reg->Register.Negate)
 273          src.base.srcMod = SVGA3DSRCMOD_NEG;
 274       else
 275          src.base.srcMod = SVGA3DSRCMOD_NONE;
 276    }
 277
 278    return src;
 279 }
 280
 281
 282 /*
 283  * Get a temporary register.
 284  * Note: if we exceed the temporary register limit we just use
 285  * register SVGA3D_TEMPREG_MAX - 1.
 286  */
 287 static SVGA3dShaderDestToken
 288 get_temp( struct svga_shader_emitter *emit )
 289 {
 290    int i = emit->nr_hw_temp + emit->internal_temp_count++;
 291    if (i >= SVGA3D_TEMPREG_MAX) {
 292       debug_warn_once("svga: Too many temporary registers used in shader\n");
 293       i = SVGA3D_TEMPREG_MAX - 1;
 294    }
 295    return dst_register( SVGA3DREG_TEMP, i );
 296 }
 297
 298
 299 /**
 300  * Release a single temp.  Currently only effective if it was the last
 301  * allocated temp, otherwise release will be delayed until the next
 302  * call to reset_temp_regs().
 303  */
 304 static void
 305 release_temp( struct svga_shader_emitter *emit,
 306               SVGA3dShaderDestToken temp )
 307 {
 308    if (temp.num == emit->internal_temp_count - 1)
 309       emit->internal_temp_count--;
 310 }
 311
 312
 313 /**
 314  * Release all temps.
 315  */
 316 static void
 317 reset_temp_regs(struct svga_shader_emitter *emit)
 318 {
 319    emit->internal_temp_count = 0;
 320 }
 321
 322
 323 /** Emit bytecode for a src_register */
 324 static boolean
 325 emit_src(struct svga_shader_emitter *emit, const struct src_register src)
 326 {
 327    if (src.base.relAddr) {
 328       assert(src.base.reserved0);
 329       assert(src.indirect.reserved0);
 330       return (svga_shader_emit_dword( emit, src.base.value ) &&
 331               svga_shader_emit_dword( emit, src.indirect.value ));
 332    }
 333    else {
 334       assert(src.base.reserved0);
 335       return svga_shader_emit_dword( emit, src.base.value );
 336    }
 337 }
 338
 339
 340 /** Emit bytecode for a dst_register */
 341 static boolean
 342 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
 343 {
 344    assert(dest.reserved0);
 345    assert(dest.mask);
 346    return svga_shader_emit_dword( emit, dest.value );
 347 }
 348
 349
 350 /** Emit bytecode for a 1-operand instruction */
 351 static boolean
 352 emit_op1(struct svga_shader_emitter *emit,
 353          SVGA3dShaderInstToken inst,
 354          SVGA3dShaderDestToken dest,
 355          struct src_register src0)
 356 {
 357    return (emit_instruction(emit, inst) &&
 358            emit_dst(emit, dest) &&
 359            emit_src(emit, src0));
 360 }
 361
 362
 363 /** Emit bytecode for a 2-operand instruction */
 364 static boolean
 365 emit_op2(struct svga_shader_emitter *emit,
 366          SVGA3dShaderInstToken inst,
 367          SVGA3dShaderDestToken dest,
 368          struct src_register src0,
 369          struct src_register src1)
 370 {
 371    return (emit_instruction(emit, inst) &&
 372            emit_dst(emit, dest) &&
 373            emit_src(emit, src0) &&
 374            emit_src(emit, src1));
 375 }
 376
 377
 378 /** Emit bytecode for a 3-operand instruction */
 379 static boolean
 380 emit_op3(struct svga_shader_emitter *emit,
 381          SVGA3dShaderInstToken inst,
 382          SVGA3dShaderDestToken dest,
 383          struct src_register src0,
 384          struct src_register src1,
 385          struct src_register src2)
 386 {
 387    return (emit_instruction(emit, inst) &&
 388            emit_dst(emit, dest) &&
 389            emit_src(emit, src0) &&
 390            emit_src(emit, src1) &&
 391            emit_src(emit, src2));
 392 }
 393
 394
 395 /** Emit bytecode for a 4-operand instruction */
 396 static boolean
 397 emit_op4(struct svga_shader_emitter *emit,
 398          SVGA3dShaderInstToken inst,
 399          SVGA3dShaderDestToken dest,
 400          struct src_register src0,
 401          struct src_register src1,
 402          struct src_register src2,
 403          struct src_register src3)
 404 {
 405    return (emit_instruction(emit, inst) &&
 406            emit_dst(emit, dest) &&
 407            emit_src(emit, src0) &&
 408            emit_src(emit, src1) &&
 409            emit_src(emit, src2) &&
 410            emit_src(emit, src3));
 411 }
 412
 413
 414 /**
 415  * Apply the absolute value modifier to the given src_register, returning
 416  * a new src_register.
 417  */
 418 static struct src_register
 419 absolute(struct src_register src)
 420 {
 421    src.base.srcMod = SVGA3DSRCMOD_ABS;
 422    return src;
 423 }
 424
 425
 426 /**
 427  * Apply the negation modifier to the given src_register, returning
 428  * a new src_register.
 429  */
 430 static struct src_register
 431 negate(struct src_register src)
 432 {
 433    switch (src.base.srcMod) {
 434    case SVGA3DSRCMOD_ABS:
 435       src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 436       break;
 437    case SVGA3DSRCMOD_ABSNEG:
 438       src.base.srcMod = SVGA3DSRCMOD_ABS;
 439       break;
 440    case SVGA3DSRCMOD_NEG:
 441       src.base.srcMod = SVGA3DSRCMOD_NONE;
 442       break;
 443    case SVGA3DSRCMOD_NONE:
 444       src.base.srcMod = SVGA3DSRCMOD_NEG;
 445       break;
 446    }
 447    return src;
 448 }
 449
 450
 451
 452 /* Replace the src with the temporary specified in the dst, but copying
 453  * only the necessary channels, and preserving the original swizzle (which is
 454  * important given that several opcodes have constraints in the allowed
 455  * swizzles).
 456  */
 457 static boolean
 458 emit_repl(struct svga_shader_emitter *emit,
 459           SVGA3dShaderDestToken dst,
 460           struct src_register *src0)
 461 {
 462    unsigned src0_swizzle;
 463    unsigned chan;
 464
 465    assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
 466
 467    src0_swizzle = src0->base.swizzle;
 468
 469    dst.mask = 0;
 470    for (chan = 0; chan < 4; ++chan) {
 471       unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
 472       dst.mask |= 1 << swizzle;
 473    }
 474    assert(dst.mask);
 475
 476    src0->base.swizzle = SVGA3DSWIZZLE_NONE;
 477
 478    if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
 479       return FALSE;
 480
 481    *src0 = src( dst );
 482    src0->base.swizzle = src0_swizzle;
 483
 484    return TRUE;
 485 }
 486
 487
 488 /**
 489  * Submit/emit an instruction with zero operands.
 490  */
 491 static boolean
 492 submit_op0(struct svga_shader_emitter *emit,
 493            SVGA3dShaderInstToken inst,
 494            SVGA3dShaderDestToken dest)
 495 {
 496    return (emit_instruction( emit, inst ) &&
 497            emit_dst( emit, dest ));
 498 }
 499
 500
 501 /**
 502  * Submit/emit an instruction with one operand.
 503  */
 504 static boolean
 505 submit_op1(struct svga_shader_emitter *emit,
 506            SVGA3dShaderInstToken inst,
 507            SVGA3dShaderDestToken dest,
 508            struct src_register src0)
 509 {
 510    return emit_op1( emit, inst, dest, src0 );
 511 }
 512
 513
 514 /**
 515  * Submit/emit an instruction with two operands.
 516  *
 517  * SVGA shaders may not refer to >1 constant register in a single
 518  * instruction.  This function checks for that usage and inserts a
 519  * move to temporary if detected.
 520  *
 521  * The same applies to input registers -- at most a single input
 522  * register may be read by any instruction.
 523  */
 524 static boolean
 525 submit_op2(struct svga_shader_emitter *emit,
 526            SVGA3dShaderInstToken inst,
 527            SVGA3dShaderDestToken dest,
 528            struct src_register src0,
 529            struct src_register src1)
 530 {
 531    SVGA3dShaderDestToken temp;
 532    SVGA3dShaderRegType type0, type1;
 533    boolean need_temp = FALSE;
 534
 535    temp.value = 0;
 536    type0 = SVGA3dShaderGetRegType( src0.base.value );
 537    type1 = SVGA3dShaderGetRegType( src1.base.value );
 538
 539    if (type0 == SVGA3DREG_CONST &&
 540        type1 == SVGA3DREG_CONST &&
 541        src0.base.num != src1.base.num)
 542       need_temp = TRUE;
 543
 544    if (type0 == SVGA3DREG_INPUT &&
 545        type1 == SVGA3DREG_INPUT &&
 546        src0.base.num != src1.base.num)
 547       need_temp = TRUE;
 548
 549    if (need_temp) {
 550       temp = get_temp( emit );
 551
 552       if (!emit_repl( emit, temp, &src0 ))
 553          return FALSE;
 554    }
 555
 556    if (!emit_op2( emit, inst, dest, src0, src1 ))
 557       return FALSE;
 558
 559    if (need_temp)
 560       release_temp( emit, temp );
 561
 562    return TRUE;
 563 }
 564
 565
 566 /**
 567  * Submit/emit an instruction with three operands.
 568  *
 569  * SVGA shaders may not refer to >1 constant register in a single
 570  * instruction.  This function checks for that usage and inserts a
 571  * move to temporary if detected.
 572  */
 573 static boolean
 574 submit_op3(struct svga_shader_emitter *emit,
 575            SVGA3dShaderInstToken inst,
 576            SVGA3dShaderDestToken dest,
 577            struct src_register src0,
 578            struct src_register src1,
 579            struct src_register src2)
 580 {
 581    SVGA3dShaderDestToken temp0;
 582    SVGA3dShaderDestToken temp1;
 583    boolean need_temp0 = FALSE;
 584    boolean need_temp1 = FALSE;
 585    SVGA3dShaderRegType type0, type1, type2;
 586
 587    temp0.value = 0;
 588    temp1.value = 0;
 589    type0 = SVGA3dShaderGetRegType( src0.base.value );
 590    type1 = SVGA3dShaderGetRegType( src1.base.value );
 591    type2 = SVGA3dShaderGetRegType( src2.base.value );
 592
 593    if (inst.op != SVGA3DOP_SINCOS) {
 594       if (type0 == SVGA3DREG_CONST &&
 595           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
 596            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 597          need_temp0 = TRUE;
 598
 599       if (type1 == SVGA3DREG_CONST &&
 600           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
 601          need_temp1 = TRUE;
 602    }
 603
 604    if (type0 == SVGA3DREG_INPUT &&
 605        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
 606         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 607       need_temp0 = TRUE;
 608
 609    if (type1 == SVGA3DREG_INPUT &&
 610        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
 611       need_temp1 = TRUE;
 612
 613    if (need_temp0) {
 614       temp0 = get_temp( emit );
 615
 616       if (!emit_repl( emit, temp0, &src0 ))
 617          return FALSE;
 618    }
 619
 620    if (need_temp1) {
 621       temp1 = get_temp( emit );
 622
 623       if (!emit_repl( emit, temp1, &src1 ))
 624          return FALSE;
 625    }
 626
 627    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
 628       return FALSE;
 629
 630    if (need_temp1)
 631       release_temp( emit, temp1 );
 632    if (need_temp0)
 633       release_temp( emit, temp0 );
 634    return TRUE;
 635 }
 636
 637
 638 /**
 639  * Submit/emit an instruction with four operands.
 640  *
 641  * SVGA shaders may not refer to >1 constant register in a single
 642  * instruction.  This function checks for that usage and inserts a
 643  * move to temporary if detected.
 644  */
 645 static boolean
 646 submit_op4(struct svga_shader_emitter *emit,
 647            SVGA3dShaderInstToken inst,
 648            SVGA3dShaderDestToken dest,
 649            struct src_register src0,
 650            struct src_register src1,
 651            struct src_register src2,
 652            struct src_register src3)
 653 {
 654    SVGA3dShaderDestToken temp0;
 655    SVGA3dShaderDestToken temp3;
 656    boolean need_temp0 = FALSE;
 657    boolean need_temp3 = FALSE;
 658    SVGA3dShaderRegType type0, type1, type2, type3;
 659
 660    temp0.value = 0;
 661    temp3.value = 0;
 662    type0 = SVGA3dShaderGetRegType( src0.base.value );
 663    type1 = SVGA3dShaderGetRegType( src1.base.value );
 664    type2 = SVGA3dShaderGetRegType( src2.base.value );
 665    type3 = SVGA3dShaderGetRegType( src2.base.value );
 666
 667    /* Make life a little easier - this is only used by the TXD
 668     * instruction which is guaranteed not to have a constant/input reg
 669     * in one slot at least:
 670     */
 671    assert(type1 == SVGA3DREG_SAMPLER);
 672
 673    if (type0 == SVGA3DREG_CONST &&
 674        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
 675         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 676       need_temp0 = TRUE;
 677
 678    if (type3 == SVGA3DREG_CONST &&
 679        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
 680       need_temp3 = TRUE;
 681
 682    if (type0 == SVGA3DREG_INPUT &&
 683        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
 684         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 685       need_temp0 = TRUE;
 686
 687    if (type3 == SVGA3DREG_INPUT &&
 688        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
 689       need_temp3 = TRUE;
 690
 691    if (need_temp0) {
 692       temp0 = get_temp( emit );
 693
 694       if (!emit_repl( emit, temp0, &src0 ))
 695          return FALSE;
 696    }
 697
 698    if (need_temp3) {
 699       temp3 = get_temp( emit );
 700
 701       if (!emit_repl( emit, temp3, &src3 ))
 702          return FALSE;
 703    }
 704
 705    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
 706       return FALSE;
 707
 708    if (need_temp3)
 709       release_temp( emit, temp3 );
 710    if (need_temp0)
 711       release_temp( emit, temp0 );
 712    return TRUE;
 713 }
 714
 715
 716 /**
 717  * Do the src and dest registers refer to the same register?
 718  */
 719 static boolean
 720 alias_src_dst(struct src_register src,
 721               SVGA3dShaderDestToken dst)
 722 {
 723    if (src.base.num != dst.num)
 724       return FALSE;
 725
 726    if (SVGA3dShaderGetRegType(dst.value) !=
 727        SVGA3dShaderGetRegType(src.base.value))
 728       return FALSE;
 729
 730    return TRUE;
 731 }
 732
 733
 734 /**
 735  * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I]
 736  * instructions.
 737  */
 738 static boolean
 739 emit_def_const(struct svga_shader_emitter *emit,
 740                SVGA3dShaderConstType type,
 741                unsigned idx, float a, float b, float c, float d)
 742 {
 743    SVGA3DOpDefArgs def;
 744    SVGA3dShaderInstToken opcode;
 745
 746    switch (type) {
 747    case SVGA3D_CONST_TYPE_FLOAT:
 748       opcode = inst_token( SVGA3DOP_DEF );
 749       def.dst = dst_register( SVGA3DREG_CONST, idx );
 750       def.constValues[0] = a;
 751       def.constValues[1] = b;
 752       def.constValues[2] = c;
 753       def.constValues[3] = d;
 754       break;
 755    case SVGA3D_CONST_TYPE_INT:
 756       opcode = inst_token( SVGA3DOP_DEFI );
 757       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
 758       def.constIValues[0] = (int)a;
 759       def.constIValues[1] = (int)b;
 760       def.constIValues[2] = (int)c;
 761       def.constIValues[3] = (int)d;
 762       break;
 763    default:
 764       assert(0);
 765       opcode = inst_token( SVGA3DOP_NOP );
 766       break;
 767    }
 768
 769    if (!emit_instruction(emit, opcode) ||
 770        !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
 771       return FALSE;
 772
 773    return TRUE;
 774 }
 775
 776
 777 static boolean
 778 create_loop_const( struct svga_shader_emitter *emit )
 779 {
 780    unsigned idx = emit->nr_hw_int_const++;
 781
 782    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
 783                         255, /* iteration count */
 784                         0, /* initial value */
 785                         1, /* step size */
 786                         0 /* not used, must be 0 */))
 787       return FALSE;
 788
 789    emit->loop_const_idx = idx;
 790    emit->created_loop_const = TRUE;
 791
 792    return TRUE;
 793 }
 794
 795 static boolean
 796 create_arl_consts( struct svga_shader_emitter *emit )
 797 {
 798    int i;
 799
 800    for (i = 0; i < emit->num_arl_consts; i += 4) {
 801       int j;
 802       unsigned idx = emit->nr_hw_float_const++;
 803       float vals[4];
 804       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
 805          vals[j] = (float) emit->arl_consts[i + j].number;
 806          emit->arl_consts[i + j].idx = idx;
 807          switch (j) {
 808          case 0:
 809             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
 810             break;
 811          case 1:
 812             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
 813             break;
 814          case 2:
 815             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
 816             break;
 817          case 3:
 818             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
 819             break;
 820          }
 821       }
 822       while (j < 4)
 823          vals[j++] = 0;
 824
 825       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 826                            vals[0], vals[1],
 827                            vals[2], vals[3]))
 828          return FALSE;
 829    }
 830
 831    return TRUE;
 832 }
 833
 834
 835 /**
 836  * Return the register which holds the pixel shaders front/back-
 837  * facing value.
 838  */
 839 static struct src_register
 840 get_vface( struct svga_shader_emitter *emit )
 841 {
 842    assert(emit->emitted_vface);
 843    return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
 844 }
 845
 846
 847 /**
 848  * Create/emit a "common" constant with values {0, 0.5, -1, 1}.
 849  * We can swizzle this to produce other useful constants such as
 850  * {0, 0, 0, 0}, {1, 1, 1, 1}, etc.
 851  */
 852 static boolean
 853 create_common_immediate( struct svga_shader_emitter *emit )
 854 {
 855    unsigned idx = emit->nr_hw_float_const++;
 856
 857    /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
 858     * other useful vectors.
 859     */
 860    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 861                         idx, 0.0f, 0.5f, -1.0f, 1.0f ))
 862       return FALSE;
 863    emit->common_immediate_idx[0] = idx;
 864    idx++;
 865
 866    /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
 867    if (emit->key.vs.adjust_attrib_range) {
 868       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 869                            idx, 2.0f, 0.0f, 0.0f, 0.0f ))
 870          return FALSE;
 871       emit->common_immediate_idx[1] = idx;
 872    }
 873    else {
 874       emit->common_immediate_idx[1] = -1;
 875    }
 876
 877    emit->created_common_immediate = TRUE;
 878
 879    return TRUE;
 880 }
 881
 882
 883 /**
 884  * Return swizzle/position for the given value in the "common" immediate.
 885  */
 886 static inline unsigned
 887 common_immediate_swizzle(float value)
 888 {
 889    if (value == 0.0f)
 890       return TGSI_SWIZZLE_X;
 891    else if (value == 0.5f)
 892       return TGSI_SWIZZLE_Y;
 893    else if (value == -1.0f)
 894       return TGSI_SWIZZLE_Z;
 895    else if (value == 1.0f)
 896       return TGSI_SWIZZLE_W;
 897    else {
 898       assert(!"illegal value in common_immediate_swizzle");
 899       return TGSI_SWIZZLE_X;
 900    }
 901 }
 902
 903
 904 /**
 905  * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
 906  */
 907 static struct src_register
 908 get_immediate(struct svga_shader_emitter *emit,
 909               float x, float y, float z, float w)
 910 {
 911    unsigned sx = common_immediate_swizzle(x);
 912    unsigned sy = common_immediate_swizzle(y);
 913    unsigned sz = common_immediate_swizzle(z);
 914    unsigned sw = common_immediate_swizzle(w);
 915    assert(emit->created_common_immediate);
 916    assert(emit->common_immediate_idx[0] >= 0);
 917    return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
 918                   sx, sy, sz, sw);
 919 }
 920
 921
 922 /**
 923  * returns {0, 0, 0, 0} immediate
 924  */
 925 static struct src_register
 926 get_zero_immediate( struct svga_shader_emitter *emit )
 927 {
 928    assert(emit->created_common_immediate);
 929    assert(emit->common_immediate_idx[0] >= 0);
 930    return swizzle(src_register( SVGA3DREG_CONST,
 931                                 emit->common_immediate_idx[0]),
 932                   0, 0, 0, 0);
 933 }
 934
 935
 936 /**
 937  * returns {1, 1, 1, 1} immediate
 938  */
 939 static struct src_register
 940 get_one_immediate( struct svga_shader_emitter *emit )
 941 {
 942    assert(emit->created_common_immediate);
 943    assert(emit->common_immediate_idx[0] >= 0);
 944    return swizzle(src_register( SVGA3DREG_CONST,
 945                                 emit->common_immediate_idx[0]),
 946                   3, 3, 3, 3);
 947 }
 948
 949
 950 /**
 951  * returns {0.5, 0.5, 0.5, 0.5} immediate
 952  */
 953 static struct src_register
 954 get_half_immediate( struct svga_shader_emitter *emit )
 955 {
 956    assert(emit->created_common_immediate);
 957    assert(emit->common_immediate_idx[0] >= 0);
 958    return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
 959                   1, 1, 1, 1);
 960 }
 961
 962
 963 /**
 964  * returns {2, 2, 2, 2} immediate
 965  */
 966 static struct src_register
 967 get_two_immediate( struct svga_shader_emitter *emit )
 968 {
 969    /* Note we use the second common immediate here */
 970    assert(emit->created_common_immediate);
 971    assert(emit->common_immediate_idx[1] >= 0);
 972    return swizzle(src_register( SVGA3DREG_CONST,
 973                                 emit->common_immediate_idx[1]),
 974                   0, 0, 0, 0);
 975 }
 976
 977
 978 /**
 979  * returns the loop const
 980  */
 981 static struct src_register
 982 get_loop_const( struct svga_shader_emitter *emit )
 983 {
 984    assert(emit->created_loop_const);
 985    assert(emit->loop_const_idx >= 0);
 986    return src_register( SVGA3DREG_CONSTINT,
 987                         emit->loop_const_idx );
 988 }
 989
 990
 991 static struct src_register
 992 get_fake_arl_const( struct svga_shader_emitter *emit )
 993 {
 994    struct src_register reg;
 995    int idx = 0, swizzle = 0, i;
 996
 997    for (i = 0; i < emit->num_arl_consts; ++ i) {
 998       if (emit->arl_consts[i].arl_num == emit->current_arl) {
 999          idx = emit->arl_consts[i].idx;
1000          swizzle = emit->arl_consts[i].swizzle;
1001       }
1002    }
1003
1004    reg = src_register( SVGA3DREG_CONST, idx );
1005    return scalar(reg, swizzle);
1006 }
1007
1008
1009 /**
1010  * Return a register which holds the width and height of the texture
1011  * currently bound to the given sampler.
1012  */
1013 static struct src_register
1014 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
1015 {
1016    int idx;
1017    struct src_register reg;
1018
1019    /* the width/height indexes start right after constants */
1020    idx = emit->key.tex[sampler_num].width_height_idx +
1021          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
1022
1023    reg = src_register( SVGA3DREG_CONST, idx );
1024    return reg;
1025 }
1026
1027
1028 static boolean
1029 emit_fake_arl(struct svga_shader_emitter *emit,
1030               const struct tgsi_full_instruction *insn)
1031 {
1032    const struct src_register src0 =
1033       translate_src_register(emit, &insn->Src[0] );
1034    struct src_register src1 = get_fake_arl_const( emit );
1035    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1036    SVGA3dShaderDestToken tmp = get_temp( emit );
1037
1038    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1039       return FALSE;
1040
1041    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
1042                     src1))
1043       return FALSE;
1044
1045    /* replicate the original swizzle */
1046    src1 = src(tmp);
1047    src1.base.swizzle = src0.base.swizzle;
1048
1049    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
1050                       dst, src1 );
1051 }
1052
1053
1054 static boolean
1055 emit_if(struct svga_shader_emitter *emit,
1056         const struct tgsi_full_instruction *insn)
1057 {
1058    struct src_register src0 =
1059       translate_src_register(emit, &insn->Src[0]);
1060    struct src_register zero = get_zero_immediate(emit);
1061    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
1062
1063    if_token.control = SVGA3DOPCOMPC_NE;
1064
1065    if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
1066       /*
1067        * Max different constant registers readable per IFC instruction is 1.
1068        */
1069       SVGA3dShaderDestToken tmp = get_temp( emit );
1070
1071       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1072          return FALSE;
1073
1074       src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
1075    }
1076
1077    emit->dynamic_branching_level++;
1078
1079    return (emit_instruction( emit, if_token ) &&
1080            emit_src( emit, src0 ) &&
1081            emit_src( emit, zero ) );
1082 }
1083
1084
1085 static boolean
1086 emit_else(struct svga_shader_emitter *emit,
1087           const struct tgsi_full_instruction *insn)
1088 {
1089    return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
1090 }
1091
1092
1093 static boolean
1094 emit_endif(struct svga_shader_emitter *emit,
1095            const struct tgsi_full_instruction *insn)
1096 {
1097    emit->dynamic_branching_level--;
1098
1099    return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
1100 }
1101
1102
1103 /**
1104  * Translate the following TGSI FLR instruction.
1105  *    FLR  DST, SRC
1106  * To the following SVGA3D instruction sequence.
1107  *    FRC  TMP, SRC
1108  *    SUB  DST, SRC, TMP
1109  */
1110 static boolean
1111 emit_floor(struct svga_shader_emitter *emit,
1112            const struct tgsi_full_instruction *insn )
1113 {
1114    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1115    const struct src_register src0 =
1116       translate_src_register(emit, &insn->Src[0] );
1117    SVGA3dShaderDestToken temp = get_temp( emit );
1118
1119    /* FRC  TMP, SRC */
1120    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
1121       return FALSE;
1122
1123    /* SUB  DST, SRC, TMP */
1124    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
1125                     negate( src( temp ) ) ))
1126       return FALSE;
1127
1128    return TRUE;
1129 }
1130
1131
1132 /**
1133  * Translate the following TGSI CEIL instruction.
1134  *    CEIL  DST, SRC
1135  * To the following SVGA3D instruction sequence.
1136  *    FRC  TMP, -SRC
1137  *    ADD  DST, SRC, TMP
1138  */
1139 static boolean
1140 emit_ceil(struct svga_shader_emitter *emit,
1141           const struct tgsi_full_instruction *insn)
1142 {
1143    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
1144    const struct src_register src0 =
1145       translate_src_register(emit, &insn->Src[0]);
1146    SVGA3dShaderDestToken temp = get_temp(emit);
1147
1148    /* FRC  TMP, -SRC */
1149    if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
1150       return FALSE;
1151
1152    /* ADD DST, SRC, TMP */
1153    if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
1154       return FALSE;
1155
1156    return TRUE;
1157 }
1158
1159
1160 /**
1161  * Translate the following TGSI DIV instruction.
1162  *    DIV  DST.xy, SRC0, SRC1
1163  * To the following SVGA3D instruction sequence.
1164  *    RCP  TMP.x, SRC1.xxxx
1165  *    RCP  TMP.y, SRC1.yyyy
1166  *    MUL  DST.xy, SRC0, TMP
1167  */
1168 static boolean
1169 emit_div(struct svga_shader_emitter *emit,
1170          const struct tgsi_full_instruction *insn )
1171 {
1172    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1173    const struct src_register src0 =
1174       translate_src_register(emit, &insn->Src[0] );
1175    const struct src_register src1 =
1176       translate_src_register(emit, &insn->Src[1] );
1177    SVGA3dShaderDestToken temp = get_temp( emit );
1178    unsigned i;
1179
1180    /* For each enabled element, perform a RCP instruction.  Note that
1181     * RCP is scalar in SVGA3D:
1182     */
1183    for (i = 0; i < 4; i++) {
1184       unsigned channel = 1 << i;
1185       if (dst.mask & channel) {
1186          /* RCP  TMP.?, SRC1.???? */
1187          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1188                           writemask(temp, channel),
1189                           scalar(src1, i) ))
1190             return FALSE;
1191       }
1192    }
1193
1194    /* Vector mul:
1195     * MUL  DST, SRC0, TMP
1196     */
1197    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
1198                     src( temp ) ))
1199       return FALSE;
1200
1201    return TRUE;
1202 }
1203
1204
1205 /**
1206  * Translate the following TGSI DP2 instruction.
1207  *    DP2  DST, SRC1, SRC2
1208  * To the following SVGA3D instruction sequence.
1209  *    MUL  TMP, SRC1, SRC2
1210  *    ADD  DST, TMP.xxxx, TMP.yyyy
1211  */
1212 static boolean
1213 emit_dp2(struct svga_shader_emitter *emit,
1214          const struct tgsi_full_instruction *insn )
1215 {
1216    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1217    const struct src_register src0 =
1218       translate_src_register(emit, &insn->Src[0]);
1219    const struct src_register src1 =
1220       translate_src_register(emit, &insn->Src[1]);
1221    SVGA3dShaderDestToken temp = get_temp( emit );
1222    struct src_register temp_src0, temp_src1;
1223
1224    /* MUL  TMP, SRC1, SRC2 */
1225    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1226       return FALSE;
1227
1228    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1229    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1230
1231    /* ADD  DST, TMP.xxxx, TMP.yyyy */
1232    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1233                     temp_src0, temp_src1 ))
1234       return FALSE;
1235
1236    return TRUE;
1237 }
1238
1239
1240 /**
1241  * Translate the following TGSI DPH instruction.
1242  *    DPH  DST, SRC1, SRC2
1243  * To the following SVGA3D instruction sequence.
1244  *    DP3  TMP, SRC1, SRC2
1245  *    ADD  DST, TMP, SRC2.wwww
1246  */
1247 static boolean
1248 emit_dph(struct svga_shader_emitter *emit,
1249          const struct tgsi_full_instruction *insn )
1250 {
1251    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1252    const struct src_register src0 = translate_src_register(
1253       emit, &insn->Src[0] );
1254    struct src_register src1 =
1255       translate_src_register(emit, &insn->Src[1]);
1256    SVGA3dShaderDestToken temp = get_temp( emit );
1257
1258    /* DP3  TMP, SRC1, SRC2 */
1259    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1260       return FALSE;
1261
1262    src1 = scalar(src1, TGSI_SWIZZLE_W);
1263
1264    /* ADD  DST, TMP, SRC2.wwww */
1265    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1266                     src( temp ), src1 ))
1267       return FALSE;
1268
1269    return TRUE;
1270 }
1271
1272
1273 /**
1274  * Sine / Cosine helper function.
1275  */
1276 static boolean
1277 do_emit_sincos(struct svga_shader_emitter *emit,
1278                SVGA3dShaderDestToken dst,
1279                struct src_register src0)
1280 {
1281    src0 = scalar(src0, TGSI_SWIZZLE_X);
1282    return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1283 }
1284
1285
1286 /**
1287  * Translate/emit a TGSI SIN, COS or CSC instruction.
1288  */
1289 static boolean
1290 emit_sincos(struct svga_shader_emitter *emit,
1291             const struct tgsi_full_instruction *insn)
1292 {
1293    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1294    struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
1295    SVGA3dShaderDestToken temp = get_temp( emit );
1296
1297    /* SCS TMP SRC */
1298    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1299       return FALSE;
1300
1301    /* MOV DST TMP */
1302    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1303       return FALSE;
1304
1305    return TRUE;
1306 }
1307
1308
1309 /**
1310  * Translate TGSI SIN instruction into:
1311  * SCS TMP SRC
1312  * MOV DST TMP.yyyy
1313  */
1314 static boolean
1315 emit_sin(struct svga_shader_emitter *emit,
1316          const struct tgsi_full_instruction *insn )
1317 {
1318    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1319    struct src_register src0 =
1320       translate_src_register(emit, &insn->Src[0] );
1321    SVGA3dShaderDestToken temp = get_temp( emit );
1322
1323    /* SCS TMP SRC */
1324    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1325       return FALSE;
1326
1327    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1328
1329    /* MOV DST TMP.yyyy */
1330    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1331       return FALSE;
1332
1333    return TRUE;
1334 }
1335
1336
1337 /*
1338  * Translate TGSI COS instruction into:
1339  * SCS TMP SRC
1340  * MOV DST TMP.xxxx
1341  */
1342 static boolean
1343 emit_cos(struct svga_shader_emitter *emit,
1344          const struct tgsi_full_instruction *insn)
1345 {
1346    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1347    struct src_register src0 =
1348       translate_src_register(emit, &insn->Src[0] );
1349    SVGA3dShaderDestToken temp = get_temp( emit );
1350
1351    /* SCS TMP SRC */
1352    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1353       return FALSE;
1354
1355    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1356
1357    /* MOV DST TMP.xxxx */
1358    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1359       return FALSE;
1360
1361    return TRUE;
1362 }
1363
1364
1365 /**
1366  * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction.
1367  */
1368 static boolean
1369 emit_ssg(struct svga_shader_emitter *emit,
1370          const struct tgsi_full_instruction *insn)
1371 {
1372    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1373    struct src_register src0 =
1374       translate_src_register(emit, &insn->Src[0] );
1375    SVGA3dShaderDestToken temp0 = get_temp( emit );
1376    SVGA3dShaderDestToken temp1 = get_temp( emit );
1377    struct src_register zero, one;
1378
1379    if (emit->unit == PIPE_SHADER_VERTEX) {
1380       /* SGN  DST, SRC0, TMP0, TMP1 */
1381       return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1382                          src( temp0 ), src( temp1 ) );
1383    }
1384
1385    one = get_one_immediate(emit);
1386    zero = get_zero_immediate(emit);
1387
1388    /* CMP  TMP0, SRC0, one, zero */
1389    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1390                     writemask( temp0, dst.mask ), src0, one, zero ))
1391       return FALSE;
1392
1393    /* CMP  TMP1, negate(SRC0), negate(one), zero */
1394    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1395                     writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1396                     zero ))
1397       return FALSE;
1398
1399    /* ADD  DST, TMP0, TMP1 */
1400    return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1401                       src( temp1 ) );
1402 }
1403
1404
1405 /**
1406  * Translate/emit TGSI SUB instruction as:
1407  * ADD DST, SRC0, negate(SRC1)
1408  */
1409 static boolean
1410 emit_sub(struct svga_shader_emitter *emit,
1411          const struct tgsi_full_instruction *insn)
1412 {
1413    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1414    struct src_register src0 = translate_src_register(
1415       emit, &insn->Src[0] );
1416    struct src_register src1 = translate_src_register(
1417       emit, &insn->Src[1] );
1418
1419    src1 = negate(src1);
1420
1421    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1422                     src0, src1 ))
1423       return FALSE;
1424
1425    return TRUE;
1426 }
1427
1428
1429 /**
1430  * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative).
1431  */
1432 static boolean
1433 emit_kill_if(struct svga_shader_emitter *emit,
1434              const struct tgsi_full_instruction *insn)
1435 {
1436    const struct tgsi_full_src_register *reg = &insn->Src[0];
1437    struct src_register src0, srcIn;
1438    const boolean special = (reg->Register.Absolute ||
1439                             reg->Register.Negate ||
1440                             reg->Register.Indirect ||
1441                             reg->Register.SwizzleX != 0 ||
1442                             reg->Register.SwizzleY != 1 ||
1443                             reg->Register.SwizzleZ != 2 ||
1444                             reg->Register.File != TGSI_FILE_TEMPORARY);
1445    SVGA3dShaderDestToken temp;
1446
1447    src0 = srcIn = translate_src_register( emit, reg );
1448
1449    if (special) {
1450       /* need a temp reg */
1451       temp = get_temp( emit );
1452    }
1453
1454    if (special) {
1455       /* move the source into a temp register */
1456       submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0);
1457
1458       src0 = src( temp );
1459    }
1460
1461    /* Do the texkill by checking if any of the XYZW components are < 0.
1462     * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
1463     * only used XYZ.  The MSDN documentation about this is incorrect.
1464     */
1465    if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1466       return FALSE;
1467
1468    return TRUE;
1469 }
1470
1471
1472 /**
1473  * Translate/emit unconditional kill instruction (usually found inside
1474  * an IF/ELSE/ENDIF block).
1475  */
1476 static boolean
1477 emit_kill(struct svga_shader_emitter *emit,
1478           const struct tgsi_full_instruction *insn)
1479 {
1480    SVGA3dShaderDestToken temp;
1481    struct src_register one = get_one_immediate(emit);
1482    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1483
1484    /* texkill doesn't allow negation on the operand so lets move
1485     * negation of {1} to a temp register */
1486    temp = get_temp( emit );
1487    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1488                     negate( one ) ))
1489       return FALSE;
1490
1491    return submit_op0( emit, inst, temp );
1492 }
1493
1494
1495 /**
1496  * Test if r1 and r2 are the same register.
1497  */
1498 static boolean
1499 same_register(struct src_register r1, struct src_register r2)
1500 {
1501    return (r1.base.num == r2.base.num &&
1502            r1.base.type_upper == r2.base.type_upper &&
1503            r1.base.type_lower == r2.base.type_lower);
1504 }
1505
1506
1507
1508 /**
1509  * Implement conditionals by initializing destination reg to 'fail',
1510  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1511  * based on predicate reg.
1512  *
1513  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1514  * MOV dst, fail
1515  * MOV dst, pass, p0
1516  */
1517 static boolean
1518 emit_conditional(struct svga_shader_emitter *emit,
1519                  unsigned compare_func,
1520                  SVGA3dShaderDestToken dst,
1521                  struct src_register src0,
1522                  struct src_register src1,
1523                  struct src_register pass,
1524                  struct src_register fail)
1525 {
1526    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1527    SVGA3dShaderInstToken setp_token;
1528
1529    switch (compare_func) {
1530    case PIPE_FUNC_NEVER:
1531       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1532                          dst, fail );
1533       break;
1534    case PIPE_FUNC_LESS:
1535       setp_token = inst_token_setp(SVGA3DOPCOMP_LT);
1536       break;
1537    case PIPE_FUNC_EQUAL:
1538       setp_token = inst_token_setp(SVGA3DOPCOMP_EQ);
1539       break;
1540    case PIPE_FUNC_LEQUAL:
1541       setp_token = inst_token_setp(SVGA3DOPCOMP_LE);
1542       break;
1543    case PIPE_FUNC_GREATER:
1544       setp_token = inst_token_setp(SVGA3DOPCOMP_GT);
1545       break;
1546    case PIPE_FUNC_NOTEQUAL:
1547       setp_token = inst_token_setp(SVGA3DOPCOMPC_NE);
1548       break;
1549    case PIPE_FUNC_GEQUAL:
1550       setp_token = inst_token_setp(SVGA3DOPCOMP_GE);
1551       break;
1552    case PIPE_FUNC_ALWAYS:
1553       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1554                          dst, pass );
1555       break;
1556    }
1557
1558    if (same_register(src(dst), pass)) {
1559       /* We'll get bad results if the dst and pass registers are the same
1560        * so use a temp register containing pass.
1561        */
1562       SVGA3dShaderDestToken temp = get_temp(emit);
1563       if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1564          return FALSE;
1565       pass = src(temp);
1566    }
1567
1568    /* SETP src0, COMPOP, src1 */
1569    if (!submit_op2( emit, setp_token, pred_reg,
1570                     src0, src1 ))
1571       return FALSE;
1572
1573    /* MOV dst, fail */
1574    if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail))
1575       return FALSE;
1576
1577    /* MOV dst, pass (predicated)
1578     *
1579     * Note that the predicate reg (and possible modifiers) is passed
1580     * as the first source argument.
1581     */
1582    if (!submit_op2(emit,
1583                    inst_token_predicated(SVGA3DOP_MOV), dst,
1584                    src(pred_reg), pass))
1585       return FALSE;
1586
1587    return TRUE;
1588 }
1589
1590
1591 /**
1592  * Helper for emiting 'selection' commands.  Basically:
1593  * if (src0 OP src1)
1594  *    dst = 1.0;
1595  * else
1596  *    dst = 0.0;
1597  */
1598 static boolean
1599 emit_select(struct svga_shader_emitter *emit,
1600             unsigned compare_func,
1601             SVGA3dShaderDestToken dst,
1602             struct src_register src0,
1603             struct src_register src1 )
1604 {
1605    /* There are some SVGA instructions which implement some selects
1606     * directly, but they are only available in the vertex shader.
1607     */
1608    if (emit->unit == PIPE_SHADER_VERTEX) {
1609       switch (compare_func) {
1610       case PIPE_FUNC_GEQUAL:
1611          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1612       case PIPE_FUNC_LEQUAL:
1613          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1614       case PIPE_FUNC_GREATER:
1615          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1616       case PIPE_FUNC_LESS:
1617          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1618       default:
1619          break;
1620       }
1621    }
1622
1623    /* Otherwise, need to use the setp approach:
1624     */
1625    {
1626       struct src_register one, zero;
1627       /* zero immediate is 0,0,0,1 */
1628       zero = get_zero_immediate(emit);
1629       one = get_one_immediate(emit);
1630
1631       return emit_conditional(emit, compare_func, dst, src0, src1, one, zero);
1632    }
1633 }
1634
1635
1636 /**
1637  * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction.
1638  */
1639 static boolean
1640 emit_select_op(struct svga_shader_emitter *emit,
1641                unsigned compare,
1642                const struct tgsi_full_instruction *insn)
1643 {
1644    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1645    struct src_register src0 = translate_src_register(
1646       emit, &insn->Src[0] );
1647    struct src_register src1 = translate_src_register(
1648       emit, &insn->Src[1] );
1649
1650    return emit_select( emit, compare, dst, src0, src1 );
1651 }
1652
1653
1654 /**
1655  * Translate TGSI CMP instruction.  Component-wise:
1656  * dst = (src0 < 0.0) ? src1 : src2
1657  */
1658 static boolean
1659 emit_cmp(struct svga_shader_emitter *emit,
1660          const struct tgsi_full_instruction *insn)
1661 {
1662    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1663    const struct src_register src0 =
1664       translate_src_register(emit, &insn->Src[0] );
1665    const struct src_register src1 =
1666       translate_src_register(emit, &insn->Src[1] );
1667    const struct src_register src2 =
1668       translate_src_register(emit, &insn->Src[2] );
1669
1670    if (emit->unit == PIPE_SHADER_VERTEX) {
1671       struct src_register zero = get_zero_immediate(emit);
1672       /* We used to simulate CMP with SLT+LRP.  But that didn't work when
1673        * src1 or src2 was Inf/NaN.  In particular, GLSL sqrt(0) failed
1674        * because it involves a CMP to handle the 0 case.
1675        * Use a conditional expression instead.
1676        */
1677       return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1678                               src0, zero, src1, src2);
1679    }
1680    else {
1681       assert(emit->unit == PIPE_SHADER_FRAGMENT);
1682
1683       /* CMP  DST, SRC0, SRC2, SRC1 */
1684       return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1685                          src0, src2, src1);
1686    }
1687 }
1688
1689
1690 /**
1691  * Translate/emit 2-operand (coord, sampler) texture instructions.
1692  */
1693 static boolean
1694 emit_tex2(struct svga_shader_emitter *emit,
1695           const struct tgsi_full_instruction *insn,
1696           SVGA3dShaderDestToken dst)
1697 {
1698    SVGA3dShaderInstToken inst;
1699    struct src_register texcoord;
1700    struct src_register sampler;
1701    SVGA3dShaderDestToken tmp;
1702
1703    inst.value = 0;
1704
1705    switch (insn->Instruction.Opcode) {
1706    case TGSI_OPCODE_TEX:
1707       inst.op = SVGA3DOP_TEX;
1708       break;
1709    case TGSI_OPCODE_TXP:
1710       inst.op = SVGA3DOP_TEX;
1711       inst.control = SVGA3DOPCONT_PROJECT;
1712       break;
1713    case TGSI_OPCODE_TXB:
1714       inst.op = SVGA3DOP_TEX;
1715       inst.control = SVGA3DOPCONT_BIAS;
1716       break;
1717    case TGSI_OPCODE_TXL:
1718       inst.op = SVGA3DOP_TEXLDL;
1719       break;
1720    default:
1721       assert(0);
1722       return FALSE;
1723    }
1724
1725    texcoord = translate_src_register( emit, &insn->Src[0] );
1726    sampler = translate_src_register( emit, &insn->Src[1] );
1727
1728    if (emit->key.tex[sampler.base.num].unnormalized ||
1729        emit->dynamic_branching_level > 0)
1730       tmp = get_temp( emit );
1731
1732    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1733     * zero in that case.
1734     */
1735    if (emit->dynamic_branching_level > 0 &&
1736        inst.op == SVGA3DOP_TEX &&
1737        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1738       struct src_register zero = get_zero_immediate(emit);
1739
1740       /* MOV  tmp, texcoord */
1741       if (!submit_op1( emit,
1742                        inst_token( SVGA3DOP_MOV ),
1743                        tmp,
1744                        texcoord ))
1745          return FALSE;
1746
1747       /* MOV  tmp.w, zero */
1748       if (!submit_op1( emit,
1749                        inst_token( SVGA3DOP_MOV ),
1750                        writemask( tmp, TGSI_WRITEMASK_W ),
1751                        zero ))
1752          return FALSE;
1753
1754       texcoord = src( tmp );
1755       inst.op = SVGA3DOP_TEXLDL;
1756    }
1757
1758    /* Explicit normalization of texcoords:
1759     */
1760    if (emit->key.tex[sampler.base.num].unnormalized) {
1761       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1762
1763       /* MUL  tmp, SRC0, WH */
1764       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1765                        tmp, texcoord, wh ))
1766          return FALSE;
1767
1768       texcoord = src( tmp );
1769    }
1770
1771    return submit_op2( emit, inst, dst, texcoord, sampler );
1772 }
1773
1774
1775 /**
1776  * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions.
1777  */
1778 static boolean
1779 emit_tex4(struct svga_shader_emitter *emit,
1780           const struct tgsi_full_instruction *insn,
1781           SVGA3dShaderDestToken dst )
1782 {
1783    SVGA3dShaderInstToken inst;
1784    struct src_register texcoord;
1785    struct src_register ddx;
1786    struct src_register ddy;
1787    struct src_register sampler;
1788
1789    texcoord = translate_src_register( emit, &insn->Src[0] );
1790    ddx      = translate_src_register( emit, &insn->Src[1] );
1791    ddy      = translate_src_register( emit, &insn->Src[2] );
1792    sampler  = translate_src_register( emit, &insn->Src[3] );
1793
1794    inst.value = 0;
1795
1796    switch (insn->Instruction.Opcode) {
1797    case TGSI_OPCODE_TXD:
1798       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1799       break;
1800    default:
1801       assert(0);
1802       return FALSE;
1803    }
1804
1805    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1806 }
1807
1808
1809 /**
1810  * Emit texture swizzle code.  We do this here since SVGA samplers don't
1811  * directly support swizzles.
1812  */
1813 static boolean
1814 emit_tex_swizzle(struct svga_shader_emitter *emit,
1815                  SVGA3dShaderDestToken dst,
1816                  struct src_register src,
1817                  unsigned swizzle_x,
1818                  unsigned swizzle_y,
1819                  unsigned swizzle_z,
1820                  unsigned swizzle_w)
1821 {
1822    const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1823    unsigned srcSwizzle[4];
1824    unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1825    unsigned i;
1826
1827    /* build writemasks and srcSwizzle terms */
1828    for (i = 0; i < 4; i++) {
1829       if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
1830          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1831          zeroWritemask |= (1 << i);
1832       }
1833       else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
1834          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1835          oneWritemask |= (1 << i);
1836       }
1837       else {
1838          srcSwizzle[i] = swizzleIn[i];
1839          srcWritemask |= (1 << i);
1840       }
1841    }
1842
1843    /* write x/y/z/w comps */
1844    if (dst.mask & srcWritemask) {
1845       if (!submit_op1(emit,
1846                       inst_token(SVGA3DOP_MOV),
1847                       writemask(dst, srcWritemask),
1848                       swizzle(src,
1849                               srcSwizzle[0],
1850                               srcSwizzle[1],
1851                               srcSwizzle[2],
1852                               srcSwizzle[3])))
1853          return FALSE;
1854    }
1855
1856    /* write 0 comps */
1857    if (dst.mask & zeroWritemask) {
1858       if (!submit_op1(emit,
1859                       inst_token(SVGA3DOP_MOV),
1860                       writemask(dst, zeroWritemask),
1861                       get_zero_immediate(emit)))
1862          return FALSE;
1863    }
1864
1865    /* write 1 comps */
1866    if (dst.mask & oneWritemask) {
1867       if (!submit_op1(emit,
1868                       inst_token(SVGA3DOP_MOV),
1869                       writemask(dst, oneWritemask),
1870                       get_one_immediate(emit)))
1871          return FALSE;
1872    }
1873
1874    return TRUE;
1875 }
1876
1877
1878 /**
1879  * Translate/emit a TGSI texture sample instruction.
1880  */
1881 static boolean
1882 emit_tex(struct svga_shader_emitter *emit,
1883          const struct tgsi_full_instruction *insn)
1884 {
1885    SVGA3dShaderDestToken dst =
1886       translate_dst_register( emit, insn, 0 );
1887    struct src_register src0 =
1888       translate_src_register( emit, &insn->Src[0] );
1889    struct src_register src1 =
1890       translate_src_register( emit, &insn->Src[1] );
1891
1892    SVGA3dShaderDestToken tex_result;
1893    const unsigned unit = src1.base.num;
1894
1895    /* check for shadow samplers */
1896    boolean compare = (emit->key.tex[unit].compare_mode ==
1897                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
1898
1899    /* texture swizzle */
1900    boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
1901                       emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
1902                       emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
1903                       emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
1904
1905    boolean saturate = insn->Instruction.Saturate;
1906
1907    /* If doing compare processing or tex swizzle or saturation, we need to put
1908     * the fetched color into a temporary so it can be used as a source later on.
1909     */
1910    if (compare || swizzle || saturate) {
1911       tex_result = get_temp( emit );
1912    }
1913    else {
1914       tex_result = dst;
1915    }
1916
1917    switch(insn->Instruction.Opcode) {
1918    case TGSI_OPCODE_TEX:
1919    case TGSI_OPCODE_TXB:
1920    case TGSI_OPCODE_TXP:
1921    case TGSI_OPCODE_TXL:
1922       if (!emit_tex2( emit, insn, tex_result ))
1923          return FALSE;
1924       break;
1925    case TGSI_OPCODE_TXD:
1926       if (!emit_tex4( emit, insn, tex_result ))
1927          return FALSE;
1928       break;
1929    default:
1930       assert(0);
1931    }
1932
1933    if (compare) {
1934       SVGA3dShaderDestToken dst2;
1935
1936       if (swizzle || saturate)
1937          dst2 = tex_result;
1938       else
1939          dst2 = dst;
1940
1941       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1942          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1943          /* When sampling a depth texture, the result of the comparison is in
1944           * the Y component.
1945           */
1946          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1947          struct src_register r_coord;
1948
1949          if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1950             /* Divide texcoord R by Q */
1951             if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1952                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1953                              scalar(src0, TGSI_SWIZZLE_W) ))
1954                return FALSE;
1955
1956             if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1957                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1958                              scalar(src0, TGSI_SWIZZLE_Z),
1959                              scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1960                return FALSE;
1961
1962             r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1963          }
1964          else {
1965             r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1966          }
1967
1968          /* Compare texture sample value against R component of texcoord */
1969          if (!emit_select(emit,
1970                           emit->key.tex[unit].compare_func,
1971                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
1972                           r_coord,
1973                           tex_src_x))
1974             return FALSE;
1975       }
1976
1977       if (dst.mask & TGSI_WRITEMASK_W) {
1978          struct src_register one = get_one_immediate(emit);
1979
1980         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1981                          writemask( dst2, TGSI_WRITEMASK_W ),
1982                          one ))
1983            return FALSE;
1984       }
1985    }
1986
1987    if (saturate && !swizzle) {
1988       /* MOV_SAT real_dst, dst */
1989       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1990          return FALSE;
1991    }
1992    else if (swizzle) {
1993       /* swizzle from tex_result to dst (handles saturation too, if any) */
1994       emit_tex_swizzle(emit,
1995                        dst, src(tex_result),
1996                        emit->key.tex[unit].swizzle_r,
1997                        emit->key.tex[unit].swizzle_g,
1998                        emit->key.tex[unit].swizzle_b,
1999                        emit->key.tex[unit].swizzle_a);
2000    }
2001
2002    return TRUE;
2003 }
2004
2005
2006 static boolean
2007 emit_bgnloop(struct svga_shader_emitter *emit,
2008              const struct tgsi_full_instruction *insn)
2009 {
2010    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
2011    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
2012    struct src_register const_int = get_loop_const( emit );
2013
2014    emit->dynamic_branching_level++;
2015
2016    return (emit_instruction( emit, inst ) &&
2017            emit_src( emit, loop_reg ) &&
2018            emit_src( emit, const_int ) );
2019 }
2020
2021
2022 static boolean
2023 emit_endloop(struct svga_shader_emitter *emit,
2024              const struct tgsi_full_instruction *insn)
2025 {
2026    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
2027
2028    emit->dynamic_branching_level--;
2029
2030    return emit_instruction( emit, inst );
2031 }
2032
2033
2034 /**
2035  * Translate/emit TGSI BREAK (out of loop) instruction.
2036  */
2037 static boolean
2038 emit_brk(struct svga_shader_emitter *emit,
2039          const struct tgsi_full_instruction *insn)
2040 {
2041    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
2042    return emit_instruction( emit, inst );
2043 }
2044
2045
2046 /**
2047  * Emit simple instruction which operates on one scalar value (not
2048  * a vector).  Ex: LG2, RCP, RSQ.
2049  */
2050 static boolean
2051 emit_scalar_op1(struct svga_shader_emitter *emit,
2052                 unsigned opcode,
2053                 const struct tgsi_full_instruction *insn)
2054 {
2055    SVGA3dShaderInstToken inst;
2056    SVGA3dShaderDestToken dst;
2057    struct src_register src;
2058
2059    inst = inst_token( opcode );
2060    dst = translate_dst_register( emit, insn, 0 );
2061    src = translate_src_register( emit, &insn->Src[0] );
2062    src = scalar( src, TGSI_SWIZZLE_X );
2063
2064    return submit_op1( emit, inst, dst, src );
2065 }
2066
2067
2068 /**
2069  * Translate/emit a simple instruction (one which has no special-case
2070  * code) such as ADD, MUL, MIN, MAX.
2071  */
2072 static boolean
2073 emit_simple_instruction(struct svga_shader_emitter *emit,
2074                         unsigned opcode,
2075                         const struct tgsi_full_instruction *insn)
2076 {
2077    const struct tgsi_full_src_register *src = insn->Src;
2078    SVGA3dShaderInstToken inst;
2079    SVGA3dShaderDestToken dst;
2080
2081    inst = inst_token( opcode );
2082    dst = translate_dst_register( emit, insn, 0 );
2083
2084    switch (insn->Instruction.NumSrcRegs) {
2085    case 0:
2086       return submit_op0( emit, inst, dst );
2087    case 1:
2088       return submit_op1( emit, inst, dst,
2089                          translate_src_register( emit, &src[0] ));
2090    case 2:
2091       return submit_op2( emit, inst, dst,
2092                          translate_src_register( emit, &src[0] ),
2093                          translate_src_register( emit, &src[1] ) );
2094    case 3:
2095       return submit_op3( emit, inst, dst,
2096                          translate_src_register( emit, &src[0] ),
2097                          translate_src_register( emit, &src[1] ),
2098                          translate_src_register( emit, &src[2] ) );
2099    default:
2100       assert(0);
2101       return FALSE;
2102    }
2103 }
2104
2105
2106 /**
2107  * TGSI_OPCODE_MOVE is only special-cased here to detect the
2108  * svga_fragment_shader::constant_color_output case.
2109  */
2110 static boolean
2111 emit_mov(struct svga_shader_emitter *emit,
2112          const struct tgsi_full_instruction *insn)
2113 {
2114    const struct tgsi_full_src_register *src = &insn->Src[0];
2115    const struct tgsi_full_dst_register *dst = &insn->Dst[0];
2116
2117    if (emit->unit == PIPE_SHADER_FRAGMENT &&
2118        dst->Register.File == TGSI_FILE_OUTPUT &&
2119        dst->Register.Index == 0 &&
2120        src->Register.File == TGSI_FILE_CONSTANT &&
2121        !src->Register.Indirect) {
2122       emit->constant_color_output = TRUE;
2123    }
2124
2125    return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
2126 }
2127
2128
2129 /**
2130  * Translate/emit TGSI DDX, DDY instructions.
2131  */
2132 static boolean
2133 emit_deriv(struct svga_shader_emitter *emit,
2134            const struct tgsi_full_instruction *insn )
2135 {
2136    if (emit->dynamic_branching_level > 0 &&
2137        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
2138    {
2139       SVGA3dShaderDestToken dst =
2140          translate_dst_register( emit, insn, 0 );
2141
2142       /* Deriv opcodes not valid inside dynamic branching, workaround
2143        * by zeroing out the destination.
2144        */
2145       if (!submit_op1(emit,
2146                       inst_token( SVGA3DOP_MOV ),
2147                       dst,
2148                       get_zero_immediate(emit)))
2149          return FALSE;
2150
2151       return TRUE;
2152    }
2153    else {
2154       unsigned opcode;
2155       const struct tgsi_full_src_register *reg = &insn->Src[0];
2156       SVGA3dShaderInstToken inst;
2157       SVGA3dShaderDestToken dst;
2158       struct src_register src0;
2159
2160       switch (insn->Instruction.Opcode) {
2161       case TGSI_OPCODE_DDX:
2162          opcode = SVGA3DOP_DSX;
2163          break;
2164       case TGSI_OPCODE_DDY:
2165          opcode = SVGA3DOP_DSY;
2166          break;
2167       default:
2168          return FALSE;
2169       }
2170
2171       inst = inst_token( opcode );
2172       dst = translate_dst_register( emit, insn, 0 );
2173       src0 = translate_src_register( emit, reg );
2174
2175       /* We cannot use negate or abs on source to dsx/dsy instruction.
2176        */
2177       if (reg->Register.Absolute ||
2178           reg->Register.Negate) {
2179          SVGA3dShaderDestToken temp = get_temp( emit );
2180
2181          if (!emit_repl( emit, temp, &src0 ))
2182             return FALSE;
2183       }
2184
2185       return submit_op1( emit, inst, dst, src0 );
2186    }
2187 }
2188
2189
2190 /**
2191  * Translate/emit ARL (Address Register Load) instruction.  Used to
2192  * move a value into the special 'address' register.  Used to implement
2193  * indirect/variable indexing into arrays.
2194  */
2195 static boolean
2196 emit_arl(struct svga_shader_emitter *emit,
2197          const struct tgsi_full_instruction *insn)
2198 {
2199    ++emit->current_arl;
2200    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2201       /* MOVA not present in pixel shader instruction set.
2202        * Ignore this instruction altogether since it is
2203        * only used for loop counters -- and for that
2204        * we reference aL directly.
2205        */
2206       return TRUE;
2207    }
2208    if (svga_arl_needs_adjustment( emit )) {
2209       return emit_fake_arl( emit, insn );
2210    } else {
2211       /* no need to adjust, just emit straight arl */
2212       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
2213    }
2214 }
2215
2216
2217 static boolean
2218 emit_pow(struct svga_shader_emitter *emit,
2219          const struct tgsi_full_instruction *insn)
2220 {
2221    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2222    struct src_register src0 = translate_src_register(
2223       emit, &insn->Src[0] );
2224    struct src_register src1 = translate_src_register(
2225       emit, &insn->Src[1] );
2226    boolean need_tmp = FALSE;
2227
2228    /* POW can only output to a temporary */
2229    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2230       need_tmp = TRUE;
2231
2232    /* POW src1 must not be the same register as dst */
2233    if (alias_src_dst( src1, dst ))
2234       need_tmp = TRUE;
2235
2236    /* it's a scalar op */
2237    src0 = scalar( src0, TGSI_SWIZZLE_X );
2238    src1 = scalar( src1, TGSI_SWIZZLE_X );
2239
2240    if (need_tmp) {
2241       SVGA3dShaderDestToken tmp =
2242          writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2243
2244       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2245          return FALSE;
2246
2247       return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2248                         dst, scalar(src(tmp), 0) );
2249    }
2250    else {
2251       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2252    }
2253 }
2254
2255
2256 /**
2257  * Translate/emit TGSI XPD (vector cross product) instruction.
2258  */
2259 static boolean
2260 emit_xpd(struct svga_shader_emitter *emit,
2261          const struct tgsi_full_instruction *insn)
2262 {
2263    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2264    const struct src_register src0 = translate_src_register(
2265       emit, &insn->Src[0] );
2266    const struct src_register src1 = translate_src_register(
2267       emit, &insn->Src[1] );
2268    boolean need_dst_tmp = FALSE;
2269
2270    /* XPD can only output to a temporary */
2271    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
2272       need_dst_tmp = TRUE;
2273
2274    /* The dst reg must not be the same as src0 or src1*/
2275    if (alias_src_dst(src0, dst) ||
2276        alias_src_dst(src1, dst))
2277       need_dst_tmp = TRUE;
2278
2279    if (need_dst_tmp) {
2280       SVGA3dShaderDestToken tmp = get_temp( emit );
2281
2282       /* Obey DX9 restrictions on mask:
2283        */
2284       tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
2285
2286       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
2287          return FALSE;
2288
2289       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2290          return FALSE;
2291    }
2292    else {
2293       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
2294          return FALSE;
2295    }
2296
2297    /* Need to emit 1.0 to dst.w?
2298     */
2299    if (dst.mask & TGSI_WRITEMASK_W) {
2300       struct src_register one = get_one_immediate( emit );
2301
2302       if (!submit_op1(emit,
2303                       inst_token( SVGA3DOP_MOV ),
2304                       writemask(dst, TGSI_WRITEMASK_W),
2305                       one))
2306          return FALSE;
2307    }
2308
2309    return TRUE;
2310 }
2311
2312
2313 /**
2314  * Emit a LRP (linear interpolation) instruction.
2315  */
2316 static boolean
2317 submit_lrp(struct svga_shader_emitter *emit,
2318            SVGA3dShaderDestToken dst,
2319            struct src_register src0,
2320            struct src_register src1,
2321            struct src_register src2)
2322 {
2323    SVGA3dShaderDestToken tmp;
2324    boolean need_dst_tmp = FALSE;
2325
2326    /* The dst reg must be a temporary, and not be the same as src0 or src2 */
2327    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2328        alias_src_dst(src0, dst) ||
2329        alias_src_dst(src2, dst))
2330       need_dst_tmp = TRUE;
2331
2332    if (need_dst_tmp) {
2333       tmp = get_temp( emit );
2334       tmp.mask = dst.mask;
2335    }
2336    else {
2337       tmp = dst;
2338    }
2339
2340    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
2341       return FALSE;
2342
2343    if (need_dst_tmp) {
2344       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2345          return FALSE;
2346    }
2347
2348    return TRUE;
2349 }
2350
2351
2352 /**
2353  * Translate/emit LRP (Linear Interpolation) instruction.
2354  */
2355 static boolean
2356 emit_lrp(struct svga_shader_emitter *emit,
2357          const struct tgsi_full_instruction *insn)
2358 {
2359    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2360    const struct src_register src0 = translate_src_register(
2361       emit, &insn->Src[0] );
2362    const struct src_register src1 = translate_src_register(
2363       emit, &insn->Src[1] );
2364    const struct src_register src2 = translate_src_register(
2365       emit, &insn->Src[2] );
2366
2367    return submit_lrp(emit, dst, src0, src1, src2);
2368 }
2369
2370 /**
2371  * Translate/emit DST (Distance function) instruction.
2372  */
2373 static boolean
2374 emit_dst_insn(struct svga_shader_emitter *emit,
2375               const struct tgsi_full_instruction *insn)
2376 {
2377    if (emit->unit == PIPE_SHADER_VERTEX) {
2378       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2379        */
2380       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2381    }
2382    else {
2383       /* result[0] = 1    * 1;
2384        * result[1] = a[1] * b[1];
2385        * result[2] = a[2] * 1;
2386        * result[3] = 1    * b[3];
2387        */
2388       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2389       SVGA3dShaderDestToken tmp;
2390       const struct src_register src0 = translate_src_register(
2391          emit, &insn->Src[0] );
2392       const struct src_register src1 = translate_src_register(
2393          emit, &insn->Src[1] );
2394       boolean need_tmp = FALSE;
2395
2396       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2397           alias_src_dst(src0, dst) ||
2398           alias_src_dst(src1, dst))
2399          need_tmp = TRUE;
2400
2401       if (need_tmp) {
2402          tmp = get_temp( emit );
2403       }
2404       else {
2405          tmp = dst;
2406       }
2407
2408       /* tmp.xw = 1.0
2409        */
2410       if (tmp.mask & TGSI_WRITEMASK_XW) {
2411          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2412                           writemask(tmp, TGSI_WRITEMASK_XW ),
2413                           get_one_immediate(emit)))
2414             return FALSE;
2415       }
2416
2417       /* tmp.yz = src0
2418        */
2419       if (tmp.mask & TGSI_WRITEMASK_YZ) {
2420          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2421                           writemask(tmp, TGSI_WRITEMASK_YZ ),
2422                           src0))
2423             return FALSE;
2424       }
2425
2426       /* tmp.yw = tmp * src1
2427        */
2428       if (tmp.mask & TGSI_WRITEMASK_YW) {
2429          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2430                           writemask(tmp, TGSI_WRITEMASK_YW ),
2431                           src(tmp),
2432                           src1))
2433             return FALSE;
2434       }
2435
2436       /* dst = tmp
2437        */
2438       if (need_tmp) {
2439          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2440                           dst,
2441                           src(tmp)))
2442             return FALSE;
2443       }
2444    }
2445
2446    return TRUE;
2447 }
2448
2449
2450 static boolean
2451 emit_exp(struct svga_shader_emitter *emit,
2452          const struct tgsi_full_instruction *insn)
2453 {
2454    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2455    struct src_register src0 =
2456       translate_src_register( emit, &insn->Src[0] );
2457    SVGA3dShaderDestToken fraction;
2458
2459    if (dst.mask & TGSI_WRITEMASK_Y)
2460       fraction = dst;
2461    else if (dst.mask & TGSI_WRITEMASK_X)
2462       fraction = get_temp( emit );
2463    else
2464       fraction.value = 0;
2465
2466    /* If y is being written, fill it with src0 - floor(src0).
2467     */
2468    if (dst.mask & TGSI_WRITEMASK_XY) {
2469       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2470                        writemask( fraction, TGSI_WRITEMASK_Y ),
2471                        src0 ))
2472          return FALSE;
2473    }
2474
2475    /* If x is being written, fill it with 2 ^ floor(src0).
2476     */
2477    if (dst.mask & TGSI_WRITEMASK_X) {
2478       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2479                        writemask( dst, TGSI_WRITEMASK_X ),
2480                        src0,
2481                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2482          return FALSE;
2483
2484       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2485                        writemask( dst, TGSI_WRITEMASK_X ),
2486                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2487          return FALSE;
2488
2489       if (!(dst.mask & TGSI_WRITEMASK_Y))
2490          release_temp( emit, fraction );
2491    }
2492
2493    /* If z is being written, fill it with 2 ^ src0 (partial precision).
2494     */
2495    if (dst.mask & TGSI_WRITEMASK_Z) {
2496       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2497                        writemask( dst, TGSI_WRITEMASK_Z ),
2498                        src0 ) )
2499          return FALSE;
2500    }
2501
2502    /* If w is being written, fill it with one.
2503     */
2504    if (dst.mask & TGSI_WRITEMASK_W) {
2505       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2506                        writemask(dst, TGSI_WRITEMASK_W),
2507                        get_one_immediate(emit)))
2508          return FALSE;
2509    }
2510
2511    return TRUE;
2512 }
2513
2514
2515 /**
2516  * Translate/emit LIT (Lighting helper) instruction.
2517  */
2518 static boolean
2519 emit_lit(struct svga_shader_emitter *emit,
2520          const struct tgsi_full_instruction *insn)
2521 {
2522    if (emit->unit == PIPE_SHADER_VERTEX) {
2523       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2524        */
2525       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2526    }
2527    else {
2528       /* D3D vs. GL semantics can be fairly easily accomodated by
2529        * variations on this sequence.
2530        *
2531        * GL:
2532        *   tmp.y = src.x
2533        *   tmp.z = pow(src.y,src.w)
2534        *   p0 = src0.xxxx > 0
2535        *   result = zero.wxxw
2536        *   (p0) result.yz = tmp
2537        *
2538        * D3D:
2539        *   tmp.y = src.x
2540        *   tmp.z = pow(src.y,src.w)
2541        *   p0 = src0.xxyy > 0
2542        *   result = zero.wxxw
2543        *   (p0) result.yz = tmp
2544        *
2545        * Will implement the GL version for now.
2546        */
2547       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2548       SVGA3dShaderDestToken tmp = get_temp( emit );
2549       const struct src_register src0 = translate_src_register(
2550          emit, &insn->Src[0] );
2551
2552       /* tmp = pow(src.y, src.w)
2553        */
2554       if (dst.mask & TGSI_WRITEMASK_Z) {
2555          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2556                          tmp,
2557                          scalar(src0, 1),
2558                          scalar(src0, 3)))
2559             return FALSE;
2560       }
2561
2562       /* tmp.y = src.x
2563        */
2564       if (dst.mask & TGSI_WRITEMASK_Y) {
2565          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2566                           writemask(tmp, TGSI_WRITEMASK_Y ),
2567                           scalar(src0, 0)))
2568             return FALSE;
2569       }
2570
2571       /* Can't quite do this with emit conditional due to the extra
2572        * writemask on the predicated mov:
2573        */
2574       {
2575          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2576          struct src_register predsrc;
2577
2578          /* D3D vs GL semantics:
2579           */
2580          if (0)
2581             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2582          else
2583             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2584
2585          /* SETP src0.xxyy, GT, {0}.x */
2586          if (!submit_op2( emit,
2587                           inst_token_setp(SVGA3DOPCOMP_GT),
2588                           pred_reg,
2589                           predsrc,
2590                           get_zero_immediate(emit)))
2591             return FALSE;
2592
2593          /* MOV dst, fail */
2594          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2595                           get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f)))
2596              return FALSE;
2597
2598          /* MOV dst.yz, tmp (predicated)
2599           *
2600           * Note that the predicate reg (and possible modifiers) is passed
2601           * as the first source argument.
2602           */
2603          if (dst.mask & TGSI_WRITEMASK_YZ) {
2604             if (!submit_op2( emit,
2605                              inst_token_predicated(SVGA3DOP_MOV),
2606                              writemask(dst, TGSI_WRITEMASK_YZ),
2607                              src( pred_reg ), src( tmp ) ))
2608                return FALSE;
2609          }
2610       }
2611    }
2612
2613    return TRUE;
2614 }
2615
2616
2617 static boolean
2618 emit_ex2(struct svga_shader_emitter *emit,
2619          const struct tgsi_full_instruction *insn)
2620 {
2621    SVGA3dShaderInstToken inst;
2622    SVGA3dShaderDestToken dst;
2623    struct src_register src0;
2624
2625    inst = inst_token( SVGA3DOP_EXP );
2626    dst = translate_dst_register( emit, insn, 0 );
2627    src0 = translate_src_register( emit, &insn->Src[0] );
2628    src0 = scalar( src0, TGSI_SWIZZLE_X );
2629
2630    if (dst.mask != TGSI_WRITEMASK_XYZW) {
2631       SVGA3dShaderDestToken tmp = get_temp( emit );
2632
2633       if (!submit_op1( emit, inst, tmp, src0 ))
2634          return FALSE;
2635
2636       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2637                          dst,
2638                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2639    }
2640
2641    return submit_op1( emit, inst, dst, src0 );
2642 }
2643
2644
2645 static boolean
2646 emit_log(struct svga_shader_emitter *emit,
2647          const struct tgsi_full_instruction *insn)
2648 {
2649    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2650    struct src_register src0 =
2651       translate_src_register( emit, &insn->Src[0] );
2652    SVGA3dShaderDestToken abs_tmp;
2653    struct src_register abs_src0;
2654    SVGA3dShaderDestToken log2_abs;
2655
2656    abs_tmp.value = 0;
2657
2658    if (dst.mask & TGSI_WRITEMASK_Z)
2659       log2_abs = dst;
2660    else if (dst.mask & TGSI_WRITEMASK_XY)
2661       log2_abs = get_temp( emit );
2662    else
2663       log2_abs.value = 0;
2664
2665    /* If z is being written, fill it with log2( abs( src0 ) ).
2666     */
2667    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2668       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2669          abs_src0 = src0;
2670       else {
2671          abs_tmp = get_temp( emit );
2672
2673          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2674                           abs_tmp,
2675                           src0 ) )
2676             return FALSE;
2677
2678          abs_src0 = src( abs_tmp );
2679       }
2680
2681       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2682
2683       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2684                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2685                        abs_src0 ) )
2686          return FALSE;
2687    }
2688
2689    if (dst.mask & TGSI_WRITEMASK_XY) {
2690       SVGA3dShaderDestToken floor_log2;
2691
2692       if (dst.mask & TGSI_WRITEMASK_X)
2693          floor_log2 = dst;
2694       else
2695          floor_log2 = get_temp( emit );
2696
2697       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2698        */
2699       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2700                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2701                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2702          return FALSE;
2703
2704       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2705                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2706                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2707                        negate( src( floor_log2 ) ) ) )
2708          return FALSE;
2709
2710       /* If y is being written, fill it with
2711        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2712        */
2713       if (dst.mask & TGSI_WRITEMASK_Y) {
2714          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2715                           writemask( dst, TGSI_WRITEMASK_Y ),
2716                           negate( scalar( src( floor_log2 ),
2717                                           TGSI_SWIZZLE_X ) ) ) )
2718             return FALSE;
2719
2720          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2721                           writemask( dst, TGSI_WRITEMASK_Y ),
2722                           src( dst ),
2723                           abs_src0 ) )
2724             return FALSE;
2725       }
2726
2727       if (!(dst.mask & TGSI_WRITEMASK_X))
2728          release_temp( emit, floor_log2 );
2729
2730       if (!(dst.mask & TGSI_WRITEMASK_Z))
2731          release_temp( emit, log2_abs );
2732    }
2733
2734    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2735        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2736       release_temp( emit, abs_tmp );
2737
2738    /* If w is being written, fill it with one.
2739     */
2740    if (dst.mask & TGSI_WRITEMASK_W) {
2741       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2742                        writemask(dst, TGSI_WRITEMASK_W),
2743                        get_one_immediate(emit)))
2744          return FALSE;
2745    }
2746
2747    return TRUE;
2748 }
2749
2750
2751 /**
2752  * Translate TGSI TRUNC or ROUND instruction.
2753  * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2754  * Different approaches are needed for VS versus PS.
2755  */
2756 static boolean
2757 emit_trunc_round(struct svga_shader_emitter *emit,
2758                  const struct tgsi_full_instruction *insn,
2759                  boolean round)
2760 {
2761    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2762    const struct src_register src0 =
2763       translate_src_register(emit, &insn->Src[0] );
2764    SVGA3dShaderDestToken t1 = get_temp(emit);
2765
2766    if (round) {
2767       SVGA3dShaderDestToken t0 = get_temp(emit);
2768       struct src_register half = get_half_immediate(emit);
2769
2770       /* t0 = abs(src0) + 0.5 */
2771       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2772                       absolute(src0), half))
2773          return FALSE;
2774
2775       /* t1 = fract(t0) */
2776       if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2777          return FALSE;
2778
2779       /* t1 = t0 - t1 */
2780       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2781                       negate(src(t1))))
2782          return FALSE;
2783    }
2784    else {
2785       /* trunc */
2786
2787       /* t1 = fract(abs(src0)) */
2788       if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2789          return FALSE;
2790
2791       /* t1 = abs(src0) - t1 */
2792       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2793                       negate(src(t1))))
2794          return FALSE;
2795    }
2796
2797    /*
2798     * Now we need to multiply t1 by the sign of the original value.
2799    */
2800    if (emit->unit == PIPE_SHADER_VERTEX) {
2801       /* For VS: use SGN instruction */
2802       /* Need two extra/dummy registers: */
2803       SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2804          t4 = get_temp(emit);
2805
2806       /* t2 = sign(src0) */
2807       if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2808                       src(t3), src(t4)))
2809          return FALSE;
2810
2811       /* dst = t1 * t2 */
2812       if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2813          return FALSE;
2814    }
2815    else {
2816       /* For FS: Use CMP instruction */
2817       return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2818                         src0, src(t1), negate(src(t1)));
2819    }
2820
2821    return TRUE;
2822 }
2823
2824
2825 /**
2826  * Translate/emit "begin subroutine" instruction/marker/label.
2827  */
2828 static boolean
2829 emit_bgnsub(struct svga_shader_emitter *emit,
2830             unsigned position,
2831             const struct tgsi_full_instruction *insn)
2832 {
2833    unsigned i;
2834
2835    /* Note that we've finished the main function and are now emitting
2836     * subroutines.  This affects how we terminate the generated
2837     * shader.
2838     */
2839    emit->in_main_func = FALSE;
2840
2841    for (i = 0; i < emit->nr_labels; i++) {
2842       if (emit->label[i] == position) {
2843          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2844                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2845                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2846       }
2847    }
2848
2849    assert(0);
2850    return TRUE;
2851 }
2852
2853
2854 /**
2855  * Translate/emit subroutine call instruction.
2856  */
2857 static boolean
2858 emit_call(struct svga_shader_emitter *emit,
2859           const struct tgsi_full_instruction *insn)
2860 {
2861    unsigned position = insn->Label.Label;
2862    unsigned i;
2863
2864    for (i = 0; i < emit->nr_labels; i++) {
2865       if (emit->label[i] == position)
2866          break;
2867    }
2868
2869    if (emit->nr_labels == Elements(emit->label))
2870       return FALSE;
2871
2872    if (i == emit->nr_labels) {
2873       emit->label[i] = position;
2874       emit->nr_labels++;
2875    }
2876
2877    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2878            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2879 }
2880
2881
2882 /**
2883  * Called at the end of the shader.  Actually, emit special "fix-up"
2884  * code for the vertex/fragment shader.
2885  */
2886 static boolean
2887 emit_end(struct svga_shader_emitter *emit)
2888 {
2889    if (emit->unit == PIPE_SHADER_VERTEX) {
2890       return emit_vs_postamble( emit );
2891    }
2892    else {
2893       return emit_ps_postamble( emit );
2894    }
2895 }
2896
2897
2898 /**
2899  * Translate any TGSI instruction to SVGA.
2900  */
2901 static boolean
2902 svga_emit_instruction(struct svga_shader_emitter *emit,
2903                       unsigned position,
2904                       const struct tgsi_full_instruction *insn)
2905 {
2906    switch (insn->Instruction.Opcode) {
2907
2908    case TGSI_OPCODE_ARL:
2909       return emit_arl( emit, insn );
2910
2911    case TGSI_OPCODE_TEX:
2912    case TGSI_OPCODE_TXB:
2913    case TGSI_OPCODE_TXP:
2914    case TGSI_OPCODE_TXL:
2915    case TGSI_OPCODE_TXD:
2916       return emit_tex( emit, insn );
2917
2918    case TGSI_OPCODE_DDX:
2919    case TGSI_OPCODE_DDY:
2920       return emit_deriv( emit, insn );
2921
2922    case TGSI_OPCODE_BGNSUB:
2923       return emit_bgnsub( emit, position, insn );
2924
2925    case TGSI_OPCODE_ENDSUB:
2926       return TRUE;
2927
2928    case TGSI_OPCODE_CAL:
2929       return emit_call( emit, insn );
2930
2931    case TGSI_OPCODE_FLR:
2932       return emit_floor( emit, insn );
2933
2934    case TGSI_OPCODE_TRUNC:
2935       return emit_trunc_round( emit, insn, FALSE );
2936
2937    case TGSI_OPCODE_ROUND:
2938       return emit_trunc_round( emit, insn, TRUE );
2939
2940    case TGSI_OPCODE_CEIL:
2941       return emit_ceil( emit, insn );
2942
2943    case TGSI_OPCODE_CMP:
2944       return emit_cmp( emit, insn );
2945
2946    case TGSI_OPCODE_DIV:
2947       return emit_div( emit, insn );
2948
2949    case TGSI_OPCODE_DP2:
2950       return emit_dp2( emit, insn );
2951
2952    case TGSI_OPCODE_DPH:
2953       return emit_dph( emit, insn );
2954
2955    case TGSI_OPCODE_COS:
2956       return emit_cos( emit, insn );
2957
2958    case TGSI_OPCODE_SIN:
2959       return emit_sin( emit, insn );
2960
2961    case TGSI_OPCODE_SCS:
2962       return emit_sincos( emit, insn );
2963
2964    case TGSI_OPCODE_END:
2965       /* TGSI always finishes the main func with an END */
2966       return emit_end( emit );
2967
2968    case TGSI_OPCODE_KILL_IF:
2969       return emit_kill_if( emit, insn );
2970
2971       /* Selection opcodes.  The underlying language is fairly
2972        * non-orthogonal about these.
2973        */
2974    case TGSI_OPCODE_SEQ:
2975       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2976
2977    case TGSI_OPCODE_SNE:
2978       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2979
2980    case TGSI_OPCODE_SGT:
2981       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2982
2983    case TGSI_OPCODE_SGE:
2984       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2985
2986    case TGSI_OPCODE_SLT:
2987       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2988
2989    case TGSI_OPCODE_SLE:
2990       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2991
2992    case TGSI_OPCODE_SUB:
2993       return emit_sub( emit, insn );
2994
2995    case TGSI_OPCODE_POW:
2996       return emit_pow( emit, insn );
2997
2998    case TGSI_OPCODE_EX2:
2999       return emit_ex2( emit, insn );
3000
3001    case TGSI_OPCODE_EXP:
3002       return emit_exp( emit, insn );
3003
3004    case TGSI_OPCODE_LOG:
3005       return emit_log( emit, insn );
3006
3007    case TGSI_OPCODE_LG2:
3008       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
3009
3010    case TGSI_OPCODE_RSQ:
3011       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
3012
3013    case TGSI_OPCODE_RCP:
3014       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
3015
3016    case TGSI_OPCODE_CONT:
3017       /* not expected (we return PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 0) */
3018       return FALSE;
3019
3020    case TGSI_OPCODE_RET:
3021       /* This is a noop -- we tell mesa that we can't support RET
3022        * within a function (early return), so this will always be
3023        * followed by an ENDSUB.
3024        */
3025       return TRUE;
3026
3027       /* These aren't actually used by any of the frontends we care
3028        * about:
3029        */
3030    case TGSI_OPCODE_CLAMP:
3031    case TGSI_OPCODE_AND:
3032    case TGSI_OPCODE_OR:
3033    case TGSI_OPCODE_I2F:
3034    case TGSI_OPCODE_NOT:
3035    case TGSI_OPCODE_SHL:
3036    case TGSI_OPCODE_ISHR:
3037    case TGSI_OPCODE_XOR:
3038       return FALSE;
3039
3040    case TGSI_OPCODE_IF:
3041       return emit_if( emit, insn );
3042    case TGSI_OPCODE_ELSE:
3043       return emit_else( emit, insn );
3044    case TGSI_OPCODE_ENDIF:
3045       return emit_endif( emit, insn );
3046
3047    case TGSI_OPCODE_BGNLOOP:
3048       return emit_bgnloop( emit, insn );
3049    case TGSI_OPCODE_ENDLOOP:
3050       return emit_endloop( emit, insn );
3051    case TGSI_OPCODE_BRK:
3052       return emit_brk( emit, insn );
3053
3054    case TGSI_OPCODE_XPD:
3055       return emit_xpd( emit, insn );
3056
3057    case TGSI_OPCODE_KILL:
3058       return emit_kill( emit, insn );
3059
3060    case TGSI_OPCODE_DST:
3061       return emit_dst_insn( emit, insn );
3062
3063    case TGSI_OPCODE_LIT:
3064       return emit_lit( emit, insn );
3065
3066    case TGSI_OPCODE_LRP:
3067       return emit_lrp( emit, insn );
3068
3069    case TGSI_OPCODE_SSG:
3070       return emit_ssg( emit, insn );
3071
3072    case TGSI_OPCODE_MOV:
3073       return emit_mov( emit, insn );
3074
3075    default:
3076       {
3077          unsigned opcode = translate_opcode(insn->Instruction.Opcode);
3078
3079          if (opcode == SVGA3DOP_LAST_INST)
3080             return FALSE;
3081
3082          if (!emit_simple_instruction( emit, opcode, insn ))
3083             return FALSE;
3084       }
3085    }
3086
3087    return TRUE;
3088 }
3089
3090
3091 /**
3092  * Translate/emit a TGSI IMMEDIATE declaration.
3093  * An immediate vector is a constant that's hard-coded into the shader.
3094  */
3095 static boolean
3096 svga_emit_immediate(struct svga_shader_emitter *emit,
3097                     const struct tgsi_full_immediate *imm)
3098 {
3099    static const float id[4] = {0,0,0,1};
3100    float value[4];
3101    unsigned i;
3102
3103    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
3104    for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
3105       float f = imm->u[i].Float;
3106       value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
3107    }
3108
3109    /* If the immediate has less than four values, fill in the remaining
3110     * positions from id={0,0,0,1}.
3111     */
3112    for ( ; i < 4; i++ )
3113       value[i] = id[i];
3114
3115    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3116                           emit->imm_start + emit->internal_imm_count++,
3117                           value[0], value[1], value[2], value[3]);
3118 }
3119
3120
3121 static boolean
3122 make_immediate(struct svga_shader_emitter *emit,
3123                float a, float b, float c, float d,
3124                struct src_register *out )
3125 {
3126    unsigned idx = emit->nr_hw_float_const++;
3127
3128    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3129                         idx, a, b, c, d ))
3130       return FALSE;
3131
3132    *out = src_register( SVGA3DREG_CONST, idx );
3133
3134    return TRUE;
3135 }
3136
3137
3138 /**
3139  * Emit special VS instructions at top of shader.
3140  */
3141 static boolean
3142 emit_vs_preamble(struct svga_shader_emitter *emit)
3143 {
3144    if (!emit->key.vs.need_prescale) {
3145       if (!make_immediate( emit, 0, 0, .5, .5,
3146                            &emit->imm_0055))
3147          return FALSE;
3148    }
3149
3150    return TRUE;
3151 }
3152
3153
3154 /**
3155  * Emit special PS instructions at top of shader.
3156  */
3157 static boolean
3158 emit_ps_preamble(struct svga_shader_emitter *emit)
3159 {
3160    if (emit->ps_reads_pos && emit->info.reads_z) {
3161       /*
3162        * Assemble the position from various bits of inputs. Depth and W are
3163        * passed in a texcoord this is due to D3D's vPos not hold Z or W.
3164        * Also fixup the perspective interpolation.
3165        *
3166        * temp_pos.xy = vPos.xy
3167        * temp_pos.w = rcp(texcoord1.w);
3168        * temp_pos.z = texcoord1.z * temp_pos.w;
3169        */
3170       if (!submit_op1( emit,
3171                        inst_token(SVGA3DOP_MOV),
3172                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
3173                        emit->ps_true_pos ))
3174          return FALSE;
3175
3176       if (!submit_op1( emit,
3177                        inst_token(SVGA3DOP_RCP),
3178                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
3179                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
3180          return FALSE;
3181
3182       if (!submit_op2( emit,
3183                        inst_token(SVGA3DOP_MUL),
3184                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
3185                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
3186                        scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
3187          return FALSE;
3188    }
3189
3190    return TRUE;
3191 }
3192
3193
3194 /**
3195  * Emit special PS instructions at end of shader.
3196  */
3197 static boolean
3198 emit_ps_postamble(struct svga_shader_emitter *emit)
3199 {
3200    unsigned i;
3201
3202    /* PS oDepth is incredibly fragile and it's very hard to catch the
3203     * types of usage that break it during shader emit.  Easier just to
3204     * redirect the main program to a temporary and then only touch
3205     * oDepth with a hand-crafted MOV below.
3206     */
3207    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
3208       if (!submit_op1( emit,
3209                        inst_token(SVGA3DOP_MOV),
3210                        emit->true_pos,
3211                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
3212          return FALSE;
3213    }
3214
3215    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
3216       if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) {
3217          /* Potentially override output colors with white for XOR
3218           * logicop workaround.
3219           */
3220          if (emit->unit == PIPE_SHADER_FRAGMENT &&
3221              emit->key.fs.white_fragments) {
3222             struct src_register one = get_one_immediate(emit);
3223
3224             if (!submit_op1( emit,
3225                              inst_token(SVGA3DOP_MOV),
3226                              emit->true_color_output[i],
3227                              one ))
3228                return FALSE;
3229          }
3230          else if (emit->unit == PIPE_SHADER_FRAGMENT &&
3231                   i < emit->key.fs.write_color0_to_n_cbufs) {
3232             /* Write temp color output [0] to true output [i] */
3233             if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
3234                             emit->true_color_output[i],
3235                             src(emit->temp_color_output[0]))) {
3236                return FALSE;
3237             }
3238          }
3239          else {
3240             if (!submit_op1( emit,
3241                              inst_token(SVGA3DOP_MOV),
3242                              emit->true_color_output[i],
3243                              src(emit->temp_color_output[i]) ))
3244                return FALSE;
3245          }
3246       }
3247    }
3248
3249    return TRUE;
3250 }
3251
3252
3253 /**
3254  * Emit special VS instructions at end of shader.
3255  */
3256 static boolean
3257 emit_vs_postamble(struct svga_shader_emitter *emit)
3258 {
3259    /* PSIZ output is incredibly fragile and it's very hard to catch
3260     * the types of usage that break it during shader emit.  Easier
3261     * just to redirect the main program to a temporary and then only
3262     * touch PSIZ with a hand-crafted MOV below.
3263     */
3264    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
3265       if (!submit_op1( emit,
3266                        inst_token(SVGA3DOP_MOV),
3267                        emit->true_psiz,
3268                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
3269          return FALSE;
3270    }
3271
3272    /* Need to perform various manipulations on vertex position to cope
3273     * with the different GL and D3D clip spaces.
3274     */
3275    if (emit->key.vs.need_prescale) {
3276       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3277       SVGA3dShaderDestToken depth = emit->depth_pos;
3278       SVGA3dShaderDestToken pos = emit->true_pos;
3279       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
3280       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
3281                                                          offset + 0 );
3282       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
3283                                                          offset + 1 );
3284
3285       if (!submit_op1( emit,
3286                        inst_token(SVGA3DOP_MOV),
3287                        writemask(depth, TGSI_WRITEMASK_W),
3288                        scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
3289          return FALSE;
3290
3291       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
3292        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
3293        *   --> Note that prescale.trans.w == 0
3294        */
3295       if (!submit_op2( emit,
3296                        inst_token(SVGA3DOP_MUL),
3297                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3298                        src(temp_pos),
3299                        prescale_scale ))
3300          return FALSE;
3301
3302       if (!submit_op3( emit,
3303                        inst_token(SVGA3DOP_MAD),
3304                        pos,
3305                        swizzle(src(temp_pos), 3, 3, 3, 3),
3306                        prescale_trans,
3307                        src(temp_pos)))
3308          return FALSE;
3309
3310       /* Also write to depth value */
3311       if (!submit_op3( emit,
3312                        inst_token(SVGA3DOP_MAD),
3313                        writemask(depth, TGSI_WRITEMASK_Z),
3314                        swizzle(src(temp_pos), 3, 3, 3, 3),
3315                        prescale_trans,
3316                        src(temp_pos) ))
3317          return FALSE;
3318    }
3319    else {
3320       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3321       SVGA3dShaderDestToken depth = emit->depth_pos;
3322       SVGA3dShaderDestToken pos = emit->true_pos;
3323       struct src_register imm_0055 = emit->imm_0055;
3324
3325       /* Adjust GL clipping coordinate space to hardware (D3D-style):
3326        *
3327        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3328        * MOV result.position, temp_pos
3329        */
3330       if (!submit_op2( emit,
3331                        inst_token(SVGA3DOP_DP4),
3332                        writemask(temp_pos, TGSI_WRITEMASK_Z),
3333                        imm_0055,
3334                        src(temp_pos) ))
3335          return FALSE;
3336
3337       if (!submit_op1( emit,
3338                        inst_token(SVGA3DOP_MOV),
3339                        pos,
3340                        src(temp_pos) ))
3341          return FALSE;
3342
3343       /* Move the manipulated depth into the extra texcoord reg */
3344       if (!submit_op1( emit,
3345                        inst_token(SVGA3DOP_MOV),
3346                        writemask(depth, TGSI_WRITEMASK_ZW),
3347                        src(temp_pos) ))
3348          return FALSE;
3349    }
3350
3351    return TRUE;
3352 }
3353
3354
3355 /**
3356  * For the pixel shader: emit the code which chooses the front
3357  * or back face color depending on triangle orientation.
3358  * This happens at the top of the fragment shader.
3359  *
3360  *  0: IF VFACE :4
3361  *  1:   COLOR = FrontColor;
3362  *  2: ELSE
3363  *  3:   COLOR = BackColor;
3364  *  4: ENDIF
3365  */
3366 static boolean
3367 emit_light_twoside(struct svga_shader_emitter *emit)
3368 {
3369    struct src_register vface, zero;
3370    struct src_register front[2];
3371    struct src_register back[2];
3372    SVGA3dShaderDestToken color[2];
3373    int count = emit->internal_color_count;
3374    unsigned i;
3375    SVGA3dShaderInstToken if_token;
3376
3377    if (count == 0)
3378       return TRUE;
3379
3380    vface = get_vface( emit );
3381    zero = get_zero_immediate(emit);
3382
3383    /* Can't use get_temp() to allocate the color reg as such
3384     * temporaries will be reclaimed after each instruction by the call
3385     * to reset_temp_regs().
3386     */
3387    for (i = 0; i < count; i++) {
3388       color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3389       front[i] = emit->input_map[emit->internal_color_idx[i]];
3390
3391       /* Back is always the next input:
3392        */
3393       back[i] = front[i];
3394       back[i].base.num = front[i].base.num + 1;
3395
3396       /* Reassign the input_map to the actual front-face color:
3397        */
3398       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3399    }
3400
3401    if_token = inst_token( SVGA3DOP_IFC );
3402
3403    if (emit->key.fs.front_ccw)
3404       if_token.control = SVGA3DOPCOMP_LT;
3405    else
3406       if_token.control = SVGA3DOPCOMP_GT;
3407
3408    if (!(emit_instruction( emit, if_token ) &&
3409          emit_src( emit, vface ) &&
3410          emit_src( emit, zero ) ))
3411       return FALSE;
3412
3413    for (i = 0; i < count; i++) {
3414       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3415          return FALSE;
3416    }
3417
3418    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3419       return FALSE;
3420
3421    for (i = 0; i < count; i++) {
3422       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3423          return FALSE;
3424    }
3425
3426    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3427       return FALSE;
3428
3429    return TRUE;
3430 }
3431
3432
3433 /**
3434  * Emit special setup code for the front/back face register in the FS.
3435  *  0: SETP_GT TEMP, VFACE, 0
3436  *  where TEMP is a fake frontface register
3437  */
3438 static boolean
3439 emit_frontface(struct svga_shader_emitter *emit)
3440 {
3441    struct src_register vface;
3442    SVGA3dShaderDestToken temp;
3443    struct src_register pass, fail;
3444
3445    vface = get_vface( emit );
3446
3447    /* Can't use get_temp() to allocate the fake frontface reg as such
3448     * temporaries will be reclaimed after each instruction by the call
3449     * to reset_temp_regs().
3450     */
3451    temp = dst_register( SVGA3DREG_TEMP,
3452                         emit->nr_hw_temp++ );
3453
3454    if (emit->key.fs.front_ccw) {
3455       pass = get_zero_immediate(emit);
3456       fail = get_one_immediate(emit);
3457    } else {
3458       pass = get_one_immediate(emit);
3459       fail = get_zero_immediate(emit);
3460    }
3461
3462    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3463                          temp, vface, get_zero_immediate(emit),
3464                          pass, fail))
3465       return FALSE;
3466
3467    /* Reassign the input_map to the actual front-face color:
3468     */
3469    emit->input_map[emit->internal_frontface_idx] = src(temp);
3470
3471    return TRUE;
3472 }
3473
3474
3475 /**
3476  * Emit code to invert the T component of the incoming texture coordinate.
3477  * This is used for drawing point sprites when
3478  * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3479  */
3480 static boolean
3481 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3482 {
3483    unsigned inverted_texcoords = emit->inverted_texcoords;
3484
3485    while (inverted_texcoords) {
3486       const unsigned unit = ffs(inverted_texcoords) - 1;
3487
3488       assert(emit->inverted_texcoords & (1 << unit));
3489
3490       assert(unit < Elements(emit->ps_true_texcoord));
3491
3492       assert(unit < Elements(emit->ps_inverted_texcoord_input));
3493
3494       assert(emit->ps_inverted_texcoord_input[unit]
3495              < Elements(emit->input_map));
3496
3497       /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3498       if (!submit_op3(emit,
3499                       inst_token(SVGA3DOP_MAD),
3500                       dst(emit->ps_inverted_texcoord[unit]),
3501                       emit->ps_true_texcoord[unit],
3502                       get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f),
3503                       get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f)))
3504          return FALSE;
3505
3506       /* Reassign the input_map entry to the new texcoord register */
3507       emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3508          emit->ps_inverted_texcoord[unit];
3509
3510       inverted_texcoords &= ~(1 << unit);
3511    }
3512
3513    return TRUE;
3514 }
3515
3516
3517 /**
3518  * Emit code to adjust vertex shader inputs/attributes:
3519  * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
3520  * - Set attrib W component = 1.
3521  */
3522 static boolean
3523 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
3524 {
3525    unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
3526                            emit->key.vs.adjust_attrib_w_1);
3527
3528    while (adjust_mask) {
3529       /* Adjust vertex attrib range and/or set W component = 1 */
3530       const unsigned index = u_bit_scan(&adjust_mask);
3531       struct src_register tmp;
3532
3533       /* allocate a temp reg */
3534       tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
3535       emit->nr_hw_temp++;
3536
3537       if (emit->key.vs.adjust_attrib_range & (1 << index)) {
3538          /* The vertex input/attribute is supposed to be a signed value in
3539           * the range [-1,1] but we actually fetched/converted it to the
3540           * range [0,1].  This most likely happens when the app specifies a
3541           * signed byte attribute but we interpreted it as unsigned bytes.
3542           * See also svga_translate_vertex_format().
3543           *
3544           * Here, we emit some extra instructions to adjust
3545           * the attribute values from [0,1] to [-1,1].
3546           *
3547           * The adjustment we implement is:
3548           *   new_attrib = attrib * 2.0;
3549           *   if (attrib >= 0.5)
3550           *      new_attrib = new_attrib - 2.0;
3551           * This isn't exactly right (it's off by a bit or so) but close enough.
3552           */
3553          SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
3554
3555          /* tmp = attrib * 2.0 */
3556          if (!submit_op2(emit,
3557                          inst_token(SVGA3DOP_MUL),
3558                          dst(tmp),
3559                          emit->input_map[index],
3560                          get_two_immediate(emit)))
3561             return FALSE;
3562
3563          /* pred = (attrib >= 0.5) */
3564          if (!submit_op2(emit,
3565                          inst_token_setp(SVGA3DOPCOMP_GE),
3566                          pred_reg,
3567                          emit->input_map[index],  /* vert attrib */
3568                          get_half_immediate(emit)))  /* 0.5 */
3569             return FALSE;
3570
3571          /* sub(pred) tmp, tmp, 2.0 */
3572          if (!submit_op3(emit,
3573                          inst_token_predicated(SVGA3DOP_SUB),
3574                          dst(tmp),
3575                          src(pred_reg),
3576                          tmp,
3577                          get_two_immediate(emit)))
3578             return FALSE;
3579       }
3580       else {
3581          /* just copy the vertex input attrib to the temp register */
3582          if (!submit_op1(emit,
3583                          inst_token(SVGA3DOP_MOV),
3584                          dst(tmp),
3585                          emit->input_map[index]))
3586             return FALSE;
3587       }
3588
3589       if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
3590          /* move 1 into W position of tmp */
3591          if (!submit_op1(emit,
3592                          inst_token(SVGA3DOP_MOV),
3593                          writemask(dst(tmp), TGSI_WRITEMASK_W),
3594                          get_one_immediate(emit)))
3595             return FALSE;
3596       }
3597
3598       /* Reassign the input_map entry to the new tmp register */
3599       emit->input_map[index] = tmp;
3600    }
3601
3602    return TRUE;
3603 }
3604
3605
3606 /**
3607  * Determine if we need to create the "common" immediate value which is
3608  * used for generating useful vector constants such as {0,0,0,0} and
3609  * {1,1,1,1}.
3610  * We could just do this all the time except that we want to conserve
3611  * registers whenever possible.
3612  */
3613 static boolean
3614 needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
3615 {
3616    unsigned i;
3617
3618    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3619       if (emit->key.fs.light_twoside)
3620          return TRUE;
3621
3622       if (emit->key.fs.white_fragments)
3623          return TRUE;
3624
3625       if (emit->emit_frontface)
3626          return TRUE;
3627
3628       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3629           emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3630           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3631          return TRUE;
3632
3633       if (emit->inverted_texcoords)
3634          return TRUE;
3635
3636       /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3637       for (i = 0; i < emit->key.num_textures; i++) {
3638          if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
3639              emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
3640              emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
3641              emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
3642             return TRUE;
3643       }
3644
3645       for (i = 0; i < emit->key.num_textures; i++) {
3646          if (emit->key.tex[i].compare_mode
3647              == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3648             return TRUE;
3649       }
3650    }
3651    else if (emit->unit == PIPE_SHADER_VERTEX) {
3652       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3653          return TRUE;
3654       if (emit->key.vs.adjust_attrib_range ||
3655           emit->key.vs.adjust_attrib_w_1)
3656          return TRUE;
3657    }
3658
3659    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3660        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3661        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3662        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3663        emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3664        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3665        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3666        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3667        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3668        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3669        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3670        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3671        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3672        emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3673        emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
3674       return TRUE;
3675
3676    return FALSE;
3677 }
3678
3679
3680 /**
3681  * Do we need to create a looping constant?
3682  */
3683 static boolean
3684 needs_to_create_loop_const(const struct svga_shader_emitter *emit)
3685 {
3686    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3687 }
3688
3689
3690 static boolean
3691 needs_to_create_arl_consts(const struct svga_shader_emitter *emit)
3692 {
3693    return (emit->num_arl_consts > 0);
3694 }
3695
3696
3697 static boolean
3698 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3699                         int num, int current_arl)
3700 {
3701    unsigned i;
3702    assert(num < 0);
3703
3704    for (i = 0; i < emit->num_arl_consts; ++i) {
3705       if (emit->arl_consts[i].arl_num == current_arl)
3706          break;
3707    }
3708    /* new entry */
3709    if (emit->num_arl_consts == i) {
3710       ++emit->num_arl_consts;
3711    }
3712    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3713                                 num :
3714                                 emit->arl_consts[i].number;
3715    emit->arl_consts[i].arl_num = current_arl;
3716    return TRUE;
3717 }
3718
3719
3720 static boolean
3721 pre_parse_instruction( struct svga_shader_emitter *emit,
3722                        const struct tgsi_full_instruction *insn,
3723                        int current_arl)
3724 {
3725    if (insn->Src[0].Register.Indirect &&
3726        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3727       const struct tgsi_full_src_register *reg = &insn->Src[0];
3728       if (reg->Register.Index < 0) {
3729          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3730       }
3731    }
3732
3733    if (insn->Src[1].Register.Indirect &&
3734        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3735       const struct tgsi_full_src_register *reg = &insn->Src[1];
3736       if (reg->Register.Index < 0) {
3737          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3738       }
3739    }
3740
3741    if (insn->Src[2].Register.Indirect &&
3742        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3743       const struct tgsi_full_src_register *reg = &insn->Src[2];
3744       if (reg->Register.Index < 0) {
3745          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3746       }
3747    }
3748
3749    return TRUE;
3750 }
3751
3752
3753 static boolean
3754 pre_parse_tokens( struct svga_shader_emitter *emit,
3755                   const struct tgsi_token *tokens )
3756 {
3757    struct tgsi_parse_context parse;
3758    int current_arl = 0;
3759
3760    tgsi_parse_init( &parse, tokens );
3761
3762    while (!tgsi_parse_end_of_tokens( &parse )) {
3763       tgsi_parse_token( &parse );
3764       switch (parse.FullToken.Token.Type) {
3765       case TGSI_TOKEN_TYPE_IMMEDIATE:
3766       case TGSI_TOKEN_TYPE_DECLARATION:
3767          break;
3768       case TGSI_TOKEN_TYPE_INSTRUCTION:
3769          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3770              TGSI_OPCODE_ARL) {
3771             ++current_arl;
3772          }
3773          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3774                                      current_arl ))
3775             return FALSE;
3776          break;
3777       default:
3778          break;
3779       }
3780
3781    }
3782    return TRUE;
3783 }
3784
3785
3786 static boolean
3787 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3788 {
3789    if (needs_to_create_common_immediate( emit )) {
3790       create_common_immediate( emit );
3791    }
3792    if (needs_to_create_loop_const( emit )) {
3793       create_loop_const( emit );
3794    }
3795    if (needs_to_create_arl_consts( emit )) {
3796       create_arl_consts( emit );
3797    }
3798
3799    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3800       if (!emit_ps_preamble( emit ))
3801          return FALSE;
3802
3803       if (emit->key.fs.light_twoside) {
3804          if (!emit_light_twoside( emit ))
3805             return FALSE;
3806       }
3807       if (emit->emit_frontface) {
3808          if (!emit_frontface( emit ))
3809             return FALSE;
3810       }
3811       if (emit->inverted_texcoords) {
3812          if (!emit_inverted_texcoords( emit ))
3813             return FALSE;
3814       }
3815    }
3816    else {
3817       assert(emit->unit == PIPE_SHADER_VERTEX);
3818       if (emit->key.vs.adjust_attrib_range) {
3819          if (!emit_adjusted_vertex_attribs(emit) ||
3820              emit->key.vs.adjust_attrib_w_1) {
3821             return FALSE;
3822          }
3823       }
3824    }
3825
3826    return TRUE;
3827 }
3828
3829
3830 /**
3831  * This is the main entrypoint into the TGSI instruction translater.
3832  * Translate TGSI shader tokens into an SVGA shader.
3833  */
3834 boolean
3835 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3836                               const struct tgsi_token *tokens)
3837 {
3838    struct tgsi_parse_context parse;
3839    const struct tgsi_token *new_tokens = NULL;
3840    boolean ret = TRUE;
3841    boolean helpers_emitted = FALSE;
3842    unsigned line_nr = 0;
3843
3844    if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
3845       unsigned unit;
3846
3847       new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
3848                                                         TGSI_FILE_INPUT);
3849
3850       if (new_tokens) {
3851          /* Setup texture state for stipple */
3852          emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
3853          emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
3854          emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
3855          emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
3856          emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
3857
3858          emit->pstipple_sampler_unit = unit;
3859
3860          tokens = new_tokens;
3861       }
3862    }
3863
3864    tgsi_parse_init( &parse, tokens );
3865    emit->internal_imm_count = 0;
3866
3867    if (emit->unit == PIPE_SHADER_VERTEX) {
3868       ret = emit_vs_preamble( emit );
3869       if (!ret)
3870          goto done;
3871    }
3872
3873    pre_parse_tokens(emit, tokens);
3874
3875    while (!tgsi_parse_end_of_tokens( &parse )) {
3876       tgsi_parse_token( &parse );
3877
3878       switch (parse.FullToken.Token.Type) {
3879       case TGSI_TOKEN_TYPE_IMMEDIATE:
3880          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3881          if (!ret)
3882             goto done;
3883          break;
3884
3885       case TGSI_TOKEN_TYPE_DECLARATION:
3886          ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3887          if (!ret)
3888             goto done;
3889          break;
3890
3891       case TGSI_TOKEN_TYPE_INSTRUCTION:
3892          if (!helpers_emitted) {
3893             if (!svga_shader_emit_helpers( emit ))
3894                goto done;
3895             helpers_emitted = TRUE;
3896          }
3897          ret = svga_emit_instruction( emit,
3898                                       line_nr++,
3899                                       &parse.FullToken.FullInstruction );
3900          if (!ret)
3901             goto done;
3902          break;
3903       default:
3904          break;
3905       }
3906
3907       reset_temp_regs( emit );
3908    }
3909
3910    /* Need to terminate the current subroutine.  Note that the
3911     * hardware doesn't tolerate shaders without sub-routines
3912     * terminating with RET+END.
3913     */
3914    if (!emit->in_main_func) {
3915       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3916       if (!ret)
3917          goto done;
3918    }
3919
3920    assert(emit->dynamic_branching_level == 0);
3921
3922    /* Need to terminate the whole shader:
3923     */
3924    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3925    if (!ret)
3926       goto done;
3927
3928 done:
3929    tgsi_parse_free( &parse );
3930    if (new_tokens) {
3931       tgsi_free_tokens(new_tokens);
3932    }
3933
3934    return ret;
3935 }