src/gallium/drivers/svga/svga_tgsi_insn.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_dump.h"
  29 #include "tgsi/tgsi_parse.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_math.h"
  32 #include "util/u_pstipple.h"
  33
  34 #include "svga_tgsi_emit.h"
  35 #include "svga_context.h"
  36
  37
  38 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  39 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  40
  41
  42 static unsigned
  43 translate_opcode(uint opcode)
  44 {
  45    switch (opcode) {
  46    case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
  47    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
  48    case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
  49    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
  50    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
  51    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
  52    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
  53    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
  54    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
  55    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
  56    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
  57    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
  58    default:
  59       assert(!"svga: unexpected opcode in translate_opcode()");
  60       return SVGA3DOP_LAST_INST;
  61    }
  62 }
  63
  64
  65 static unsigned
  66 translate_file(unsigned file)
  67 {
  68    switch (file) {
  69    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
  70    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
  71    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
  72    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
  73    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
  74    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
  75    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
  76    default:
  77       assert(!"svga: unexpected register file in translate_file()");
  78       return SVGA3DREG_TEMP;
  79    }
  80 }
  81
  82
  83 /**
  84  * Translate a TGSI destination register to an SVGA3DShaderDestToken.
  85  * \param insn  the TGSI instruction
  86  * \param idx  which TGSI dest register to translate (usually (always?) zero)
  87  */
  88 static SVGA3dShaderDestToken
  89 translate_dst_register( struct svga_shader_emitter *emit,
  90                         const struct tgsi_full_instruction *insn,
  91                         unsigned idx )
  92 {
  93    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
  94    SVGA3dShaderDestToken dest;
  95
  96    switch (reg->Register.File) {
  97    case TGSI_FILE_OUTPUT:
  98       /* Output registers encode semantic information in their name.
  99        * Need to lookup a table built at decl time:
 100        */
 101       dest = emit->output_map[reg->Register.Index];
 102       emit->num_output_writes++;
 103       break;
 104
 105    default:
 106       {
 107          unsigned index = reg->Register.Index;
 108          assert(index < SVGA3D_TEMPREG_MAX);
 109          index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
 110          dest = dst_register(translate_file(reg->Register.File), index);
 111       }
 112       break;
 113    }
 114
 115    if (reg->Register.Indirect) {
 116       debug_warning("Indirect indexing of dest registers is not supported!\n");
 117    }
 118
 119    dest.mask = reg->Register.WriteMask;
 120    assert(dest.mask);
 121
 122    if (insn->Instruction.Saturate)
 123       dest.dstMod = SVGA3DDSTMOD_SATURATE;
 124
 125    return dest;
 126 }
 127
 128
 129 /**
 130  * Apply a swizzle to a src_register, returning a new src_register
 131  * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
 132  * would return SRC.YYZZ
 133  */
 134 static struct src_register
 135 swizzle(struct src_register src,
 136         unsigned x, unsigned y, unsigned z, unsigned w)
 137 {
 138    assert(x < 4);
 139    assert(y < 4);
 140    assert(z < 4);
 141    assert(w < 4);
 142    x = (src.base.swizzle >> (x * 2)) & 0x3;
 143    y = (src.base.swizzle >> (y * 2)) & 0x3;
 144    z = (src.base.swizzle >> (z * 2)) & 0x3;
 145    w = (src.base.swizzle >> (w * 2)) & 0x3;
 146
 147    src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
 148
 149    return src;
 150 }
 151
 152
 153 /**
 154  * Apply a "scalar" swizzle to a src_register returning a new
 155  * src_register where all the swizzle terms are the same.
 156  * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
 157  */
 158 static struct src_register
 159 scalar(struct src_register src, unsigned comp)
 160 {
 161    assert(comp < 4);
 162    return swizzle( src, comp, comp, comp, comp );
 163 }
 164
 165
 166 static boolean
 167 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 168 {
 169    unsigned i;
 170
 171    for (i = 0; i < emit->num_arl_consts; ++i) {
 172       if (emit->arl_consts[i].arl_num == emit->current_arl)
 173          return TRUE;
 174    }
 175    return FALSE;
 176 }
 177
 178
 179 static int
 180 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 181 {
 182    unsigned i;
 183
 184    for (i = 0; i < emit->num_arl_consts; ++i) {
 185       if (emit->arl_consts[i].arl_num == emit->current_arl)
 186          return emit->arl_consts[i].number;
 187    }
 188    return 0;
 189 }
 190
 191
 192 /**
 193  * Translate a TGSI src register to a src_register.
 194  */
 195 static struct src_register
 196 translate_src_register( const struct svga_shader_emitter *emit,
 197                         const struct tgsi_full_src_register *reg )
 198 {
 199    struct src_register src;
 200
 201    switch (reg->Register.File) {
 202    case TGSI_FILE_INPUT:
 203       /* Input registers are referred to by their semantic name rather
 204        * than by index.  Use the mapping build up from the decls:
 205        */
 206       src = emit->input_map[reg->Register.Index];
 207       break;
 208
 209    case TGSI_FILE_IMMEDIATE:
 210       /* Immediates are appended after TGSI constants in the D3D
 211        * constant buffer.
 212        */
 213       src = src_register( translate_file( reg->Register.File ),
 214                           reg->Register.Index + emit->imm_start );
 215       break;
 216
 217    default:
 218       src = src_register( translate_file( reg->Register.File ),
 219                           reg->Register.Index );
 220       break;
 221    }
 222
 223    /* Indirect addressing.
 224     */
 225    if (reg->Register.Indirect) {
 226       if (emit->unit == PIPE_SHADER_FRAGMENT) {
 227          /* Pixel shaders have only loop registers for relative
 228           * addressing into inputs. Ignore the redundant address
 229           * register, the contents of aL should be in sync with it.
 230           */
 231          if (reg->Register.File == TGSI_FILE_INPUT) {
 232             src.base.relAddr = 1;
 233             src.indirect = src_token(SVGA3DREG_LOOP, 0);
 234          }
 235       }
 236       else {
 237          /* Constant buffers only.
 238           */
 239          if (reg->Register.File == TGSI_FILE_CONSTANT) {
 240             /* we shift the offset towards the minimum */
 241             if (svga_arl_needs_adjustment( emit )) {
 242                src.base.num -= svga_arl_adjustment( emit );
 243             }
 244             src.base.relAddr = 1;
 245
 246             /* Not really sure what should go in the second token:
 247              */
 248             src.indirect = src_token( SVGA3DREG_ADDR,
 249                                       reg->Indirect.Index );
 250
 251             src.indirect.swizzle = SWIZZLE_XXXX;
 252          }
 253       }
 254    }
 255
 256    src = swizzle( src,
 257                   reg->Register.SwizzleX,
 258                   reg->Register.SwizzleY,
 259                   reg->Register.SwizzleZ,
 260                   reg->Register.SwizzleW );
 261
 262    /* src.mod isn't a bitfield, unfortunately:
 263     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
 264     */
 265    if (reg->Register.Absolute) {
 266       if (reg->Register.Negate)
 267          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 268       else
 269          src.base.srcMod = SVGA3DSRCMOD_ABS;
 270    }
 271    else {
 272       if (reg->Register.Negate)
 273          src.base.srcMod = SVGA3DSRCMOD_NEG;
 274       else
 275          src.base.srcMod = SVGA3DSRCMOD_NONE;
 276    }
 277
 278    return src;
 279 }
 280
 281
 282 /*
 283  * Get a temporary register.
 284  * Note: if we exceed the temporary register limit we just use
 285  * register SVGA3D_TEMPREG_MAX - 1.
 286  */
 287 static SVGA3dShaderDestToken
 288 get_temp( struct svga_shader_emitter *emit )
 289 {
 290    int i = emit->nr_hw_temp + emit->internal_temp_count++;
 291    if (i >= SVGA3D_TEMPREG_MAX) {
 292       debug_warn_once("svga: Too many temporary registers used in shader\n");
 293       i = SVGA3D_TEMPREG_MAX - 1;
 294    }
 295    return dst_register( SVGA3DREG_TEMP, i );
 296 }
 297
 298
 299 /**
 300  * Release a single temp.  Currently only effective if it was the last
 301  * allocated temp, otherwise release will be delayed until the next
 302  * call to reset_temp_regs().
 303  */
 304 static void
 305 release_temp( struct svga_shader_emitter *emit,
 306               SVGA3dShaderDestToken temp )
 307 {
 308    if (temp.num == emit->internal_temp_count - 1)
 309       emit->internal_temp_count--;
 310 }
 311
 312
 313 /**
 314  * Release all temps.
 315  */
 316 static void
 317 reset_temp_regs(struct svga_shader_emitter *emit)
 318 {
 319    emit->internal_temp_count = 0;
 320 }
 321
 322
 323 /** Emit bytecode for a src_register */
 324 static boolean
 325 emit_src(struct svga_shader_emitter *emit, const struct src_register src)
 326 {
 327    if (src.base.relAddr) {
 328       assert(src.base.reserved0);
 329       assert(src.indirect.reserved0);
 330       return (svga_shader_emit_dword( emit, src.base.value ) &&
 331               svga_shader_emit_dword( emit, src.indirect.value ));
 332    }
 333    else {
 334       assert(src.base.reserved0);
 335       return svga_shader_emit_dword( emit, src.base.value );
 336    }
 337 }
 338
 339
 340 /** Emit bytecode for a dst_register */
 341 static boolean
 342 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
 343 {
 344    assert(dest.reserved0);
 345    assert(dest.mask);
 346    return svga_shader_emit_dword( emit, dest.value );
 347 }
 348
 349
 350 /** Emit bytecode for a 1-operand instruction */
 351 static boolean
 352 emit_op1(struct svga_shader_emitter *emit,
 353          SVGA3dShaderInstToken inst,
 354          SVGA3dShaderDestToken dest,
 355          struct src_register src0)
 356 {
 357    return (emit_instruction(emit, inst) &&
 358            emit_dst(emit, dest) &&
 359            emit_src(emit, src0));
 360 }
 361
 362
 363 /** Emit bytecode for a 2-operand instruction */
 364 static boolean
 365 emit_op2(struct svga_shader_emitter *emit,
 366          SVGA3dShaderInstToken inst,
 367          SVGA3dShaderDestToken dest,
 368          struct src_register src0,
 369          struct src_register src1)
 370 {
 371    return (emit_instruction(emit, inst) &&
 372            emit_dst(emit, dest) &&
 373            emit_src(emit, src0) &&
 374            emit_src(emit, src1));
 375 }
 376
 377
 378 /** Emit bytecode for a 3-operand instruction */
 379 static boolean
 380 emit_op3(struct svga_shader_emitter *emit,
 381          SVGA3dShaderInstToken inst,
 382          SVGA3dShaderDestToken dest,
 383          struct src_register src0,
 384          struct src_register src1,
 385          struct src_register src2)
 386 {
 387    return (emit_instruction(emit, inst) &&
 388            emit_dst(emit, dest) &&
 389            emit_src(emit, src0) &&
 390            emit_src(emit, src1) &&
 391            emit_src(emit, src2));
 392 }
 393
 394
 395 /** Emit bytecode for a 4-operand instruction */
 396 static boolean
 397 emit_op4(struct svga_shader_emitter *emit,
 398          SVGA3dShaderInstToken inst,
 399          SVGA3dShaderDestToken dest,
 400          struct src_register src0,
 401          struct src_register src1,
 402          struct src_register src2,
 403          struct src_register src3)
 404 {
 405    return (emit_instruction(emit, inst) &&
 406            emit_dst(emit, dest) &&
 407            emit_src(emit, src0) &&
 408            emit_src(emit, src1) &&
 409            emit_src(emit, src2) &&
 410            emit_src(emit, src3));
 411 }
 412
 413
 414 /**
 415  * Apply the absolute value modifier to the given src_register, returning
 416  * a new src_register.
 417  */
 418 static struct src_register
 419 absolute(struct src_register src)
 420 {
 421    src.base.srcMod = SVGA3DSRCMOD_ABS;
 422    return src;
 423 }
 424
 425
 426 /**
 427  * Apply the negation modifier to the given src_register, returning
 428  * a new src_register.
 429  */
 430 static struct src_register
 431 negate(struct src_register src)
 432 {
 433    switch (src.base.srcMod) {
 434    case SVGA3DSRCMOD_ABS:
 435       src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 436       break;
 437    case SVGA3DSRCMOD_ABSNEG:
 438       src.base.srcMod = SVGA3DSRCMOD_ABS;
 439       break;
 440    case SVGA3DSRCMOD_NEG:
 441       src.base.srcMod = SVGA3DSRCMOD_NONE;
 442       break;
 443    case SVGA3DSRCMOD_NONE:
 444       src.base.srcMod = SVGA3DSRCMOD_NEG;
 445       break;
 446    }
 447    return src;
 448 }
 449
 450
 451
 452 /* Replace the src with the temporary specified in the dst, but copying
 453  * only the necessary channels, and preserving the original swizzle (which is
 454  * important given that several opcodes have constraints in the allowed
 455  * swizzles).
 456  */
 457 static boolean
 458 emit_repl(struct svga_shader_emitter *emit,
 459           SVGA3dShaderDestToken dst,
 460           struct src_register *src0)
 461 {
 462    unsigned src0_swizzle;
 463    unsigned chan;
 464
 465    assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
 466
 467    src0_swizzle = src0->base.swizzle;
 468
 469    dst.mask = 0;
 470    for (chan = 0; chan < 4; ++chan) {
 471       unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
 472       dst.mask |= 1 << swizzle;
 473    }
 474    assert(dst.mask);
 475
 476    src0->base.swizzle = SVGA3DSWIZZLE_NONE;
 477
 478    if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
 479       return FALSE;
 480
 481    *src0 = src( dst );
 482    src0->base.swizzle = src0_swizzle;
 483
 484    return TRUE;
 485 }
 486
 487
 488 /**
 489  * Submit/emit an instruction with zero operands.
 490  */
 491 static boolean
 492 submit_op0(struct svga_shader_emitter *emit,
 493            SVGA3dShaderInstToken inst,
 494            SVGA3dShaderDestToken dest)
 495 {
 496    return (emit_instruction( emit, inst ) &&
 497            emit_dst( emit, dest ));
 498 }
 499
 500
 501 /**
 502  * Submit/emit an instruction with one operand.
 503  */
 504 static boolean
 505 submit_op1(struct svga_shader_emitter *emit,
 506            SVGA3dShaderInstToken inst,
 507            SVGA3dShaderDestToken dest,
 508            struct src_register src0)
 509 {
 510    return emit_op1( emit, inst, dest, src0 );
 511 }
 512
 513
 514 /**
 515  * Submit/emit an instruction with two operands.
 516  *
 517  * SVGA shaders may not refer to >1 constant register in a single
 518  * instruction.  This function checks for that usage and inserts a
 519  * move to temporary if detected.
 520  *
 521  * The same applies to input registers -- at most a single input
 522  * register may be read by any instruction.
 523  */
 524 static boolean
 525 submit_op2(struct svga_shader_emitter *emit,
 526            SVGA3dShaderInstToken inst,
 527            SVGA3dShaderDestToken dest,
 528            struct src_register src0,
 529            struct src_register src1)
 530 {
 531    SVGA3dShaderDestToken temp;
 532    SVGA3dShaderRegType type0, type1;
 533    boolean need_temp = FALSE;
 534
 535    temp.value = 0;
 536    type0 = SVGA3dShaderGetRegType( src0.base.value );
 537    type1 = SVGA3dShaderGetRegType( src1.base.value );
 538
 539    if (type0 == SVGA3DREG_CONST &&
 540        type1 == SVGA3DREG_CONST &&
 541        src0.base.num != src1.base.num)
 542       need_temp = TRUE;
 543
 544    if (type0 == SVGA3DREG_INPUT &&
 545        type1 == SVGA3DREG_INPUT &&
 546        src0.base.num != src1.base.num)
 547       need_temp = TRUE;
 548
 549    if (need_temp) {
 550       temp = get_temp( emit );
 551
 552       if (!emit_repl( emit, temp, &src0 ))
 553          return FALSE;
 554    }
 555
 556    if (!emit_op2( emit, inst, dest, src0, src1 ))
 557       return FALSE;
 558
 559    if (need_temp)
 560       release_temp( emit, temp );
 561
 562    return TRUE;
 563 }
 564
 565
 566 /**
 567  * Submit/emit an instruction with three operands.
 568  *
 569  * SVGA shaders may not refer to >1 constant register in a single
 570  * instruction.  This function checks for that usage and inserts a
 571  * move to temporary if detected.
 572  */
 573 static boolean
 574 submit_op3(struct svga_shader_emitter *emit,
 575            SVGA3dShaderInstToken inst,
 576            SVGA3dShaderDestToken dest,
 577            struct src_register src0,
 578            struct src_register src1,
 579            struct src_register src2)
 580 {
 581    SVGA3dShaderDestToken temp0;
 582    SVGA3dShaderDestToken temp1;
 583    boolean need_temp0 = FALSE;
 584    boolean need_temp1 = FALSE;
 585    SVGA3dShaderRegType type0, type1, type2;
 586
 587    temp0.value = 0;
 588    temp1.value = 0;
 589    type0 = SVGA3dShaderGetRegType( src0.base.value );
 590    type1 = SVGA3dShaderGetRegType( src1.base.value );
 591    type2 = SVGA3dShaderGetRegType( src2.base.value );
 592
 593    if (inst.op != SVGA3DOP_SINCOS) {
 594       if (type0 == SVGA3DREG_CONST &&
 595           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
 596            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 597          need_temp0 = TRUE;
 598
 599       if (type1 == SVGA3DREG_CONST &&
 600           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
 601          need_temp1 = TRUE;
 602    }
 603
 604    if (type0 == SVGA3DREG_INPUT &&
 605        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
 606         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 607       need_temp0 = TRUE;
 608
 609    if (type1 == SVGA3DREG_INPUT &&
 610        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
 611       need_temp1 = TRUE;
 612
 613    if (need_temp0) {
 614       temp0 = get_temp( emit );
 615
 616       if (!emit_repl( emit, temp0, &src0 ))
 617          return FALSE;
 618    }
 619
 620    if (need_temp1) {
 621       temp1 = get_temp( emit );
 622
 623       if (!emit_repl( emit, temp1, &src1 ))
 624          return FALSE;
 625    }
 626
 627    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
 628       return FALSE;
 629
 630    if (need_temp1)
 631       release_temp( emit, temp1 );
 632    if (need_temp0)
 633       release_temp( emit, temp0 );
 634    return TRUE;
 635 }
 636
 637
 638 /**
 639  * Submit/emit an instruction with four operands.
 640  *
 641  * SVGA shaders may not refer to >1 constant register in a single
 642  * instruction.  This function checks for that usage and inserts a
 643  * move to temporary if detected.
 644  */
 645 static boolean
 646 submit_op4(struct svga_shader_emitter *emit,
 647            SVGA3dShaderInstToken inst,
 648            SVGA3dShaderDestToken dest,
 649            struct src_register src0,
 650            struct src_register src1,
 651            struct src_register src2,
 652            struct src_register src3)
 653 {
 654    SVGA3dShaderDestToken temp0;
 655    SVGA3dShaderDestToken temp3;
 656    boolean need_temp0 = FALSE;
 657    boolean need_temp3 = FALSE;
 658    SVGA3dShaderRegType type0, type1, type2, type3;
 659
 660    temp0.value = 0;
 661    temp3.value = 0;
 662    type0 = SVGA3dShaderGetRegType( src0.base.value );
 663    type1 = SVGA3dShaderGetRegType( src1.base.value );
 664    type2 = SVGA3dShaderGetRegType( src2.base.value );
 665    type3 = SVGA3dShaderGetRegType( src2.base.value );
 666
 667    /* Make life a little easier - this is only used by the TXD
 668     * instruction which is guaranteed not to have a constant/input reg
 669     * in one slot at least:
 670     */
 671    assert(type1 == SVGA3DREG_SAMPLER);
 672    (void) type1;
 673
 674    if (type0 == SVGA3DREG_CONST &&
 675        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
 676         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 677       need_temp0 = TRUE;
 678
 679    if (type3 == SVGA3DREG_CONST &&
 680        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
 681       need_temp3 = TRUE;
 682
 683    if (type0 == SVGA3DREG_INPUT &&
 684        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
 685         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 686       need_temp0 = TRUE;
 687
 688    if (type3 == SVGA3DREG_INPUT &&
 689        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
 690       need_temp3 = TRUE;
 691
 692    if (need_temp0) {
 693       temp0 = get_temp( emit );
 694
 695       if (!emit_repl( emit, temp0, &src0 ))
 696          return FALSE;
 697    }
 698
 699    if (need_temp3) {
 700       temp3 = get_temp( emit );
 701
 702       if (!emit_repl( emit, temp3, &src3 ))
 703          return FALSE;
 704    }
 705
 706    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
 707       return FALSE;
 708
 709    if (need_temp3)
 710       release_temp( emit, temp3 );
 711    if (need_temp0)
 712       release_temp( emit, temp0 );
 713    return TRUE;
 714 }
 715
 716
 717 /**
 718  * Do the src and dest registers refer to the same register?
 719  */
 720 static boolean
 721 alias_src_dst(struct src_register src,
 722               SVGA3dShaderDestToken dst)
 723 {
 724    if (src.base.num != dst.num)
 725       return FALSE;
 726
 727    if (SVGA3dShaderGetRegType(dst.value) !=
 728        SVGA3dShaderGetRegType(src.base.value))
 729       return FALSE;
 730
 731    return TRUE;
 732 }
 733
 734
 735 /**
 736  * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I]
 737  * instructions.
 738  */
 739 static boolean
 740 emit_def_const(struct svga_shader_emitter *emit,
 741                SVGA3dShaderConstType type,
 742                unsigned idx, float a, float b, float c, float d)
 743 {
 744    SVGA3DOpDefArgs def;
 745    SVGA3dShaderInstToken opcode;
 746
 747    switch (type) {
 748    case SVGA3D_CONST_TYPE_FLOAT:
 749       opcode = inst_token( SVGA3DOP_DEF );
 750       def.dst = dst_register( SVGA3DREG_CONST, idx );
 751       def.constValues[0] = a;
 752       def.constValues[1] = b;
 753       def.constValues[2] = c;
 754       def.constValues[3] = d;
 755       break;
 756    case SVGA3D_CONST_TYPE_INT:
 757       opcode = inst_token( SVGA3DOP_DEFI );
 758       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
 759       def.constIValues[0] = (int)a;
 760       def.constIValues[1] = (int)b;
 761       def.constIValues[2] = (int)c;
 762       def.constIValues[3] = (int)d;
 763       break;
 764    default:
 765       assert(0);
 766       opcode = inst_token( SVGA3DOP_NOP );
 767       break;
 768    }
 769
 770    if (!emit_instruction(emit, opcode) ||
 771        !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values)))
 772       return FALSE;
 773
 774    return TRUE;
 775 }
 776
 777
 778 static boolean
 779 create_loop_const( struct svga_shader_emitter *emit )
 780 {
 781    unsigned idx = emit->nr_hw_int_const++;
 782
 783    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
 784                         255, /* iteration count */
 785                         0, /* initial value */
 786                         1, /* step size */
 787                         0 /* not used, must be 0 */))
 788       return FALSE;
 789
 790    emit->loop_const_idx = idx;
 791    emit->created_loop_const = TRUE;
 792
 793    return TRUE;
 794 }
 795
 796 static boolean
 797 create_arl_consts( struct svga_shader_emitter *emit )
 798 {
 799    int i;
 800
 801    for (i = 0; i < emit->num_arl_consts; i += 4) {
 802       int j;
 803       unsigned idx = emit->nr_hw_float_const++;
 804       float vals[4];
 805       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
 806          vals[j] = (float) emit->arl_consts[i + j].number;
 807          emit->arl_consts[i + j].idx = idx;
 808          switch (j) {
 809          case 0:
 810             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
 811             break;
 812          case 1:
 813             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
 814             break;
 815          case 2:
 816             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
 817             break;
 818          case 3:
 819             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
 820             break;
 821          }
 822       }
 823       while (j < 4)
 824          vals[j++] = 0;
 825
 826       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 827                            vals[0], vals[1],
 828                            vals[2], vals[3]))
 829          return FALSE;
 830    }
 831
 832    return TRUE;
 833 }
 834
 835
 836 /**
 837  * Return the register which holds the pixel shaders front/back-
 838  * facing value.
 839  */
 840 static struct src_register
 841 get_vface( struct svga_shader_emitter *emit )
 842 {
 843    assert(emit->emitted_vface);
 844    return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
 845 }
 846
 847
 848 /**
 849  * Create/emit a "common" constant with values {0, 0.5, -1, 1}.
 850  * We can swizzle this to produce other useful constants such as
 851  * {0, 0, 0, 0}, {1, 1, 1, 1}, etc.
 852  */
 853 static boolean
 854 create_common_immediate( struct svga_shader_emitter *emit )
 855 {
 856    unsigned idx = emit->nr_hw_float_const++;
 857
 858    /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
 859     * other useful vectors.
 860     */
 861    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 862                         idx, 0.0f, 0.5f, -1.0f, 1.0f ))
 863       return FALSE;
 864    emit->common_immediate_idx[0] = idx;
 865    idx++;
 866
 867    /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
 868    if (emit->key.vs.adjust_attrib_range) {
 869       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 870                            idx, 2.0f, 0.0f, 0.0f, 0.0f ))
 871          return FALSE;
 872       emit->common_immediate_idx[1] = idx;
 873    }
 874    else {
 875       emit->common_immediate_idx[1] = -1;
 876    }
 877
 878    emit->created_common_immediate = TRUE;
 879
 880    return TRUE;
 881 }
 882
 883
 884 /**
 885  * Return swizzle/position for the given value in the "common" immediate.
 886  */
 887 static inline unsigned
 888 common_immediate_swizzle(float value)
 889 {
 890    if (value == 0.0f)
 891       return TGSI_SWIZZLE_X;
 892    else if (value == 0.5f)
 893       return TGSI_SWIZZLE_Y;
 894    else if (value == -1.0f)
 895       return TGSI_SWIZZLE_Z;
 896    else if (value == 1.0f)
 897       return TGSI_SWIZZLE_W;
 898    else {
 899       assert(!"illegal value in common_immediate_swizzle");
 900       return TGSI_SWIZZLE_X;
 901    }
 902 }
 903
 904
 905 /**
 906  * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
 907  */
 908 static struct src_register
 909 get_immediate(struct svga_shader_emitter *emit,
 910               float x, float y, float z, float w)
 911 {
 912    unsigned sx = common_immediate_swizzle(x);
 913    unsigned sy = common_immediate_swizzle(y);
 914    unsigned sz = common_immediate_swizzle(z);
 915    unsigned sw = common_immediate_swizzle(w);
 916    assert(emit->created_common_immediate);
 917    assert(emit->common_immediate_idx[0] >= 0);
 918    return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
 919                   sx, sy, sz, sw);
 920 }
 921
 922
 923 /**
 924  * returns {0, 0, 0, 0} immediate
 925  */
 926 static struct src_register
 927 get_zero_immediate( struct svga_shader_emitter *emit )
 928 {
 929    assert(emit->created_common_immediate);
 930    assert(emit->common_immediate_idx[0] >= 0);
 931    return swizzle(src_register( SVGA3DREG_CONST,
 932                                 emit->common_immediate_idx[0]),
 933                   0, 0, 0, 0);
 934 }
 935
 936
 937 /**
 938  * returns {1, 1, 1, 1} immediate
 939  */
 940 static struct src_register
 941 get_one_immediate( struct svga_shader_emitter *emit )
 942 {
 943    assert(emit->created_common_immediate);
 944    assert(emit->common_immediate_idx[0] >= 0);
 945    return swizzle(src_register( SVGA3DREG_CONST,
 946                                 emit->common_immediate_idx[0]),
 947                   3, 3, 3, 3);
 948 }
 949
 950
 951 /**
 952  * returns {0.5, 0.5, 0.5, 0.5} immediate
 953  */
 954 static struct src_register
 955 get_half_immediate( struct svga_shader_emitter *emit )
 956 {
 957    assert(emit->created_common_immediate);
 958    assert(emit->common_immediate_idx[0] >= 0);
 959    return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
 960                   1, 1, 1, 1);
 961 }
 962
 963
 964 /**
 965  * returns {2, 2, 2, 2} immediate
 966  */
 967 static struct src_register
 968 get_two_immediate( struct svga_shader_emitter *emit )
 969 {
 970    /* Note we use the second common immediate here */
 971    assert(emit->created_common_immediate);
 972    assert(emit->common_immediate_idx[1] >= 0);
 973    return swizzle(src_register( SVGA3DREG_CONST,
 974                                 emit->common_immediate_idx[1]),
 975                   0, 0, 0, 0);
 976 }
 977
 978
 979 /**
 980  * returns the loop const
 981  */
 982 static struct src_register
 983 get_loop_const( struct svga_shader_emitter *emit )
 984 {
 985    assert(emit->created_loop_const);
 986    assert(emit->loop_const_idx >= 0);
 987    return src_register( SVGA3DREG_CONSTINT,
 988                         emit->loop_const_idx );
 989 }
 990
 991
 992 static struct src_register
 993 get_fake_arl_const( struct svga_shader_emitter *emit )
 994 {
 995    struct src_register reg;
 996    int idx = 0, swizzle = 0, i;
 997
 998    for (i = 0; i < emit->num_arl_consts; ++ i) {
 999       if (emit->arl_consts[i].arl_num == emit->current_arl) {
1000          idx = emit->arl_consts[i].idx;
1001          swizzle = emit->arl_consts[i].swizzle;
1002       }
1003    }
1004
1005    reg = src_register( SVGA3DREG_CONST, idx );
1006    return scalar(reg, swizzle);
1007 }
1008
1009
1010 /**
1011  * Return a register which holds the width and height of the texture
1012  * currently bound to the given sampler.
1013  */
1014 static struct src_register
1015 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
1016 {
1017    int idx;
1018    struct src_register reg;
1019
1020    /* the width/height indexes start right after constants */
1021    idx = emit->key.tex[sampler_num].width_height_idx +
1022          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
1023
1024    reg = src_register( SVGA3DREG_CONST, idx );
1025    return reg;
1026 }
1027
1028
1029 static boolean
1030 emit_fake_arl(struct svga_shader_emitter *emit,
1031               const struct tgsi_full_instruction *insn)
1032 {
1033    const struct src_register src0 =
1034       translate_src_register(emit, &insn->Src[0] );
1035    struct src_register src1 = get_fake_arl_const( emit );
1036    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1037    SVGA3dShaderDestToken tmp = get_temp( emit );
1038
1039    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1040       return FALSE;
1041
1042    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
1043                     src1))
1044       return FALSE;
1045
1046    /* replicate the original swizzle */
1047    src1 = src(tmp);
1048    src1.base.swizzle = src0.base.swizzle;
1049
1050    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
1051                       dst, src1 );
1052 }
1053
1054
1055 static boolean
1056 emit_if(struct svga_shader_emitter *emit,
1057         const struct tgsi_full_instruction *insn)
1058 {
1059    struct src_register src0 =
1060       translate_src_register(emit, &insn->Src[0]);
1061    struct src_register zero = get_zero_immediate(emit);
1062    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
1063
1064    if_token.control = SVGA3DOPCOMPC_NE;
1065
1066    if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
1067       /*
1068        * Max different constant registers readable per IFC instruction is 1.
1069        */
1070       SVGA3dShaderDestToken tmp = get_temp( emit );
1071
1072       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1073          return FALSE;
1074
1075       src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
1076    }
1077
1078    emit->dynamic_branching_level++;
1079
1080    return (emit_instruction( emit, if_token ) &&
1081            emit_src( emit, src0 ) &&
1082            emit_src( emit, zero ) );
1083 }
1084
1085
1086 static boolean
1087 emit_else(struct svga_shader_emitter *emit,
1088           const struct tgsi_full_instruction *insn)
1089 {
1090    return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
1091 }
1092
1093
1094 static boolean
1095 emit_endif(struct svga_shader_emitter *emit,
1096            const struct tgsi_full_instruction *insn)
1097 {
1098    emit->dynamic_branching_level--;
1099
1100    return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
1101 }
1102
1103
1104 /**
1105  * Translate the following TGSI FLR instruction.
1106  *    FLR  DST, SRC
1107  * To the following SVGA3D instruction sequence.
1108  *    FRC  TMP, SRC
1109  *    SUB  DST, SRC, TMP
1110  */
1111 static boolean
1112 emit_floor(struct svga_shader_emitter *emit,
1113            const struct tgsi_full_instruction *insn )
1114 {
1115    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1116    const struct src_register src0 =
1117       translate_src_register(emit, &insn->Src[0] );
1118    SVGA3dShaderDestToken temp = get_temp( emit );
1119
1120    /* FRC  TMP, SRC */
1121    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
1122       return FALSE;
1123
1124    /* SUB  DST, SRC, TMP */
1125    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
1126                     negate( src( temp ) ) ))
1127       return FALSE;
1128
1129    return TRUE;
1130 }
1131
1132
1133 /**
1134  * Translate the following TGSI CEIL instruction.
1135  *    CEIL  DST, SRC
1136  * To the following SVGA3D instruction sequence.
1137  *    FRC  TMP, -SRC
1138  *    ADD  DST, SRC, TMP
1139  */
1140 static boolean
1141 emit_ceil(struct svga_shader_emitter *emit,
1142           const struct tgsi_full_instruction *insn)
1143 {
1144    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
1145    const struct src_register src0 =
1146       translate_src_register(emit, &insn->Src[0]);
1147    SVGA3dShaderDestToken temp = get_temp(emit);
1148
1149    /* FRC  TMP, -SRC */
1150    if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
1151       return FALSE;
1152
1153    /* ADD DST, SRC, TMP */
1154    if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
1155       return FALSE;
1156
1157    return TRUE;
1158 }
1159
1160
1161 /**
1162  * Translate the following TGSI DIV instruction.
1163  *    DIV  DST.xy, SRC0, SRC1
1164  * To the following SVGA3D instruction sequence.
1165  *    RCP  TMP.x, SRC1.xxxx
1166  *    RCP  TMP.y, SRC1.yyyy
1167  *    MUL  DST.xy, SRC0, TMP
1168  */
1169 static boolean
1170 emit_div(struct svga_shader_emitter *emit,
1171          const struct tgsi_full_instruction *insn )
1172 {
1173    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1174    const struct src_register src0 =
1175       translate_src_register(emit, &insn->Src[0] );
1176    const struct src_register src1 =
1177       translate_src_register(emit, &insn->Src[1] );
1178    SVGA3dShaderDestToken temp = get_temp( emit );
1179    unsigned i;
1180
1181    /* For each enabled element, perform a RCP instruction.  Note that
1182     * RCP is scalar in SVGA3D:
1183     */
1184    for (i = 0; i < 4; i++) {
1185       unsigned channel = 1 << i;
1186       if (dst.mask & channel) {
1187          /* RCP  TMP.?, SRC1.???? */
1188          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1189                           writemask(temp, channel),
1190                           scalar(src1, i) ))
1191             return FALSE;
1192       }
1193    }
1194
1195    /* Vector mul:
1196     * MUL  DST, SRC0, TMP
1197     */
1198    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
1199                     src( temp ) ))
1200       return FALSE;
1201
1202    return TRUE;
1203 }
1204
1205
1206 /**
1207  * Translate the following TGSI DP2 instruction.
1208  *    DP2  DST, SRC1, SRC2
1209  * To the following SVGA3D instruction sequence.
1210  *    MUL  TMP, SRC1, SRC2
1211  *    ADD  DST, TMP.xxxx, TMP.yyyy
1212  */
1213 static boolean
1214 emit_dp2(struct svga_shader_emitter *emit,
1215          const struct tgsi_full_instruction *insn )
1216 {
1217    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1218    const struct src_register src0 =
1219       translate_src_register(emit, &insn->Src[0]);
1220    const struct src_register src1 =
1221       translate_src_register(emit, &insn->Src[1]);
1222    SVGA3dShaderDestToken temp = get_temp( emit );
1223    struct src_register temp_src0, temp_src1;
1224
1225    /* MUL  TMP, SRC1, SRC2 */
1226    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1227       return FALSE;
1228
1229    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1230    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1231
1232    /* ADD  DST, TMP.xxxx, TMP.yyyy */
1233    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1234                     temp_src0, temp_src1 ))
1235       return FALSE;
1236
1237    return TRUE;
1238 }
1239
1240
1241 /**
1242  * Translate the following TGSI DPH instruction.
1243  *    DPH  DST, SRC1, SRC2
1244  * To the following SVGA3D instruction sequence.
1245  *    DP3  TMP, SRC1, SRC2
1246  *    ADD  DST, TMP, SRC2.wwww
1247  */
1248 static boolean
1249 emit_dph(struct svga_shader_emitter *emit,
1250          const struct tgsi_full_instruction *insn )
1251 {
1252    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1253    const struct src_register src0 = translate_src_register(
1254       emit, &insn->Src[0] );
1255    struct src_register src1 =
1256       translate_src_register(emit, &insn->Src[1]);
1257    SVGA3dShaderDestToken temp = get_temp( emit );
1258
1259    /* DP3  TMP, SRC1, SRC2 */
1260    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1261       return FALSE;
1262
1263    src1 = scalar(src1, TGSI_SWIZZLE_W);
1264
1265    /* ADD  DST, TMP, SRC2.wwww */
1266    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1267                     src( temp ), src1 ))
1268       return FALSE;
1269
1270    return TRUE;
1271 }
1272
1273
1274 /**
1275  * Sine / Cosine helper function.
1276  */
1277 static boolean
1278 do_emit_sincos(struct svga_shader_emitter *emit,
1279                SVGA3dShaderDestToken dst,
1280                struct src_register src0)
1281 {
1282    src0 = scalar(src0, TGSI_SWIZZLE_X);
1283    return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1284 }
1285
1286
1287 /**
1288  * Translate/emit a TGSI SIN, COS or CSC instruction.
1289  */
1290 static boolean
1291 emit_sincos(struct svga_shader_emitter *emit,
1292             const struct tgsi_full_instruction *insn)
1293 {
1294    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1295    struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
1296    SVGA3dShaderDestToken temp = get_temp( emit );
1297
1298    /* SCS TMP SRC */
1299    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1300       return FALSE;
1301
1302    /* MOV DST TMP */
1303    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1304       return FALSE;
1305
1306    return TRUE;
1307 }
1308
1309
1310 /**
1311  * Translate TGSI SIN instruction into:
1312  * SCS TMP SRC
1313  * MOV DST TMP.yyyy
1314  */
1315 static boolean
1316 emit_sin(struct svga_shader_emitter *emit,
1317          const struct tgsi_full_instruction *insn )
1318 {
1319    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1320    struct src_register src0 =
1321       translate_src_register(emit, &insn->Src[0] );
1322    SVGA3dShaderDestToken temp = get_temp( emit );
1323
1324    /* SCS TMP SRC */
1325    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1326       return FALSE;
1327
1328    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1329
1330    /* MOV DST TMP.yyyy */
1331    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1332       return FALSE;
1333
1334    return TRUE;
1335 }
1336
1337
1338 /*
1339  * Translate TGSI COS instruction into:
1340  * SCS TMP SRC
1341  * MOV DST TMP.xxxx
1342  */
1343 static boolean
1344 emit_cos(struct svga_shader_emitter *emit,
1345          const struct tgsi_full_instruction *insn)
1346 {
1347    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1348    struct src_register src0 =
1349       translate_src_register(emit, &insn->Src[0] );
1350    SVGA3dShaderDestToken temp = get_temp( emit );
1351
1352    /* SCS TMP SRC */
1353    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1354       return FALSE;
1355
1356    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1357
1358    /* MOV DST TMP.xxxx */
1359    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1360       return FALSE;
1361
1362    return TRUE;
1363 }
1364
1365
1366 /**
1367  * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction.
1368  */
1369 static boolean
1370 emit_ssg(struct svga_shader_emitter *emit,
1371          const struct tgsi_full_instruction *insn)
1372 {
1373    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1374    struct src_register src0 =
1375       translate_src_register(emit, &insn->Src[0] );
1376    SVGA3dShaderDestToken temp0 = get_temp( emit );
1377    SVGA3dShaderDestToken temp1 = get_temp( emit );
1378    struct src_register zero, one;
1379
1380    if (emit->unit == PIPE_SHADER_VERTEX) {
1381       /* SGN  DST, SRC0, TMP0, TMP1 */
1382       return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1383                          src( temp0 ), src( temp1 ) );
1384    }
1385
1386    one = get_one_immediate(emit);
1387    zero = get_zero_immediate(emit);
1388
1389    /* CMP  TMP0, SRC0, one, zero */
1390    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1391                     writemask( temp0, dst.mask ), src0, one, zero ))
1392       return FALSE;
1393
1394    /* CMP  TMP1, negate(SRC0), negate(one), zero */
1395    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1396                     writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1397                     zero ))
1398       return FALSE;
1399
1400    /* ADD  DST, TMP0, TMP1 */
1401    return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1402                       src( temp1 ) );
1403 }
1404
1405
1406 /**
1407  * Translate/emit TGSI SUB instruction as:
1408  * ADD DST, SRC0, negate(SRC1)
1409  */
1410 static boolean
1411 emit_sub(struct svga_shader_emitter *emit,
1412          const struct tgsi_full_instruction *insn)
1413 {
1414    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1415    struct src_register src0 = translate_src_register(
1416       emit, &insn->Src[0] );
1417    struct src_register src1 = translate_src_register(
1418       emit, &insn->Src[1] );
1419
1420    src1 = negate(src1);
1421
1422    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1423                     src0, src1 ))
1424       return FALSE;
1425
1426    return TRUE;
1427 }
1428
1429
1430 /**
1431  * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative).
1432  */
1433 static boolean
1434 emit_kill_if(struct svga_shader_emitter *emit,
1435              const struct tgsi_full_instruction *insn)
1436 {
1437    const struct tgsi_full_src_register *reg = &insn->Src[0];
1438    struct src_register src0, srcIn;
1439    const boolean special = (reg->Register.Absolute ||
1440                             reg->Register.Negate ||
1441                             reg->Register.Indirect ||
1442                             reg->Register.SwizzleX != 0 ||
1443                             reg->Register.SwizzleY != 1 ||
1444                             reg->Register.SwizzleZ != 2 ||
1445                             reg->Register.File != TGSI_FILE_TEMPORARY);
1446    SVGA3dShaderDestToken temp;
1447
1448    src0 = srcIn = translate_src_register( emit, reg );
1449
1450    if (special) {
1451       /* need a temp reg */
1452       temp = get_temp( emit );
1453    }
1454
1455    if (special) {
1456       /* move the source into a temp register */
1457       submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0);
1458
1459       src0 = src( temp );
1460    }
1461
1462    /* Do the texkill by checking if any of the XYZW components are < 0.
1463     * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
1464     * only used XYZ.  The MSDN documentation about this is incorrect.
1465     */
1466    if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1467       return FALSE;
1468
1469    return TRUE;
1470 }
1471
1472
1473 /**
1474  * Translate/emit unconditional kill instruction (usually found inside
1475  * an IF/ELSE/ENDIF block).
1476  */
1477 static boolean
1478 emit_kill(struct svga_shader_emitter *emit,
1479           const struct tgsi_full_instruction *insn)
1480 {
1481    SVGA3dShaderDestToken temp;
1482    struct src_register one = get_one_immediate(emit);
1483    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1484
1485    /* texkill doesn't allow negation on the operand so lets move
1486     * negation of {1} to a temp register */
1487    temp = get_temp( emit );
1488    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1489                     negate( one ) ))
1490       return FALSE;
1491
1492    return submit_op0( emit, inst, temp );
1493 }
1494
1495
1496 /**
1497  * Test if r1 and r2 are the same register.
1498  */
1499 static boolean
1500 same_register(struct src_register r1, struct src_register r2)
1501 {
1502    return (r1.base.num == r2.base.num &&
1503            r1.base.type_upper == r2.base.type_upper &&
1504            r1.base.type_lower == r2.base.type_lower);
1505 }
1506
1507
1508
1509 /**
1510  * Implement conditionals by initializing destination reg to 'fail',
1511  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1512  * based on predicate reg.
1513  *
1514  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1515  * MOV dst, fail
1516  * MOV dst, pass, p0
1517  */
1518 static boolean
1519 emit_conditional(struct svga_shader_emitter *emit,
1520                  unsigned compare_func,
1521                  SVGA3dShaderDestToken dst,
1522                  struct src_register src0,
1523                  struct src_register src1,
1524                  struct src_register pass,
1525                  struct src_register fail)
1526 {
1527    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1528    SVGA3dShaderInstToken setp_token;
1529
1530    switch (compare_func) {
1531    case PIPE_FUNC_NEVER:
1532       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1533                          dst, fail );
1534       break;
1535    case PIPE_FUNC_LESS:
1536       setp_token = inst_token_setp(SVGA3DOPCOMP_LT);
1537       break;
1538    case PIPE_FUNC_EQUAL:
1539       setp_token = inst_token_setp(SVGA3DOPCOMP_EQ);
1540       break;
1541    case PIPE_FUNC_LEQUAL:
1542       setp_token = inst_token_setp(SVGA3DOPCOMP_LE);
1543       break;
1544    case PIPE_FUNC_GREATER:
1545       setp_token = inst_token_setp(SVGA3DOPCOMP_GT);
1546       break;
1547    case PIPE_FUNC_NOTEQUAL:
1548       setp_token = inst_token_setp(SVGA3DOPCOMPC_NE);
1549       break;
1550    case PIPE_FUNC_GEQUAL:
1551       setp_token = inst_token_setp(SVGA3DOPCOMP_GE);
1552       break;
1553    case PIPE_FUNC_ALWAYS:
1554       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1555                          dst, pass );
1556       break;
1557    }
1558
1559    if (same_register(src(dst), pass)) {
1560       /* We'll get bad results if the dst and pass registers are the same
1561        * so use a temp register containing pass.
1562        */
1563       SVGA3dShaderDestToken temp = get_temp(emit);
1564       if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1565          return FALSE;
1566       pass = src(temp);
1567    }
1568
1569    /* SETP src0, COMPOP, src1 */
1570    if (!submit_op2( emit, setp_token, pred_reg,
1571                     src0, src1 ))
1572       return FALSE;
1573
1574    /* MOV dst, fail */
1575    if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail))
1576       return FALSE;
1577
1578    /* MOV dst, pass (predicated)
1579     *
1580     * Note that the predicate reg (and possible modifiers) is passed
1581     * as the first source argument.
1582     */
1583    if (!submit_op2(emit,
1584                    inst_token_predicated(SVGA3DOP_MOV), dst,
1585                    src(pred_reg), pass))
1586       return FALSE;
1587
1588    return TRUE;
1589 }
1590
1591
1592 /**
1593  * Helper for emiting 'selection' commands.  Basically:
1594  * if (src0 OP src1)
1595  *    dst = 1.0;
1596  * else
1597  *    dst = 0.0;
1598  */
1599 static boolean
1600 emit_select(struct svga_shader_emitter *emit,
1601             unsigned compare_func,
1602             SVGA3dShaderDestToken dst,
1603             struct src_register src0,
1604             struct src_register src1 )
1605 {
1606    /* There are some SVGA instructions which implement some selects
1607     * directly, but they are only available in the vertex shader.
1608     */
1609    if (emit->unit == PIPE_SHADER_VERTEX) {
1610       switch (compare_func) {
1611       case PIPE_FUNC_GEQUAL:
1612          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1613       case PIPE_FUNC_LEQUAL:
1614          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1615       case PIPE_FUNC_GREATER:
1616          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1617       case PIPE_FUNC_LESS:
1618          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1619       default:
1620          break;
1621       }
1622    }
1623
1624    /* Otherwise, need to use the setp approach:
1625     */
1626    {
1627       struct src_register one, zero;
1628       /* zero immediate is 0,0,0,1 */
1629       zero = get_zero_immediate(emit);
1630       one = get_one_immediate(emit);
1631
1632       return emit_conditional(emit, compare_func, dst, src0, src1, one, zero);
1633    }
1634 }
1635
1636
1637 /**
1638  * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction.
1639  */
1640 static boolean
1641 emit_select_op(struct svga_shader_emitter *emit,
1642                unsigned compare,
1643                const struct tgsi_full_instruction *insn)
1644 {
1645    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1646    struct src_register src0 = translate_src_register(
1647       emit, &insn->Src[0] );
1648    struct src_register src1 = translate_src_register(
1649       emit, &insn->Src[1] );
1650
1651    return emit_select( emit, compare, dst, src0, src1 );
1652 }
1653
1654
1655 /**
1656  * Translate TGSI CMP instruction.  Component-wise:
1657  * dst = (src0 < 0.0) ? src1 : src2
1658  */
1659 static boolean
1660 emit_cmp(struct svga_shader_emitter *emit,
1661          const struct tgsi_full_instruction *insn)
1662 {
1663    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1664    const struct src_register src0 =
1665       translate_src_register(emit, &insn->Src[0] );
1666    const struct src_register src1 =
1667       translate_src_register(emit, &insn->Src[1] );
1668    const struct src_register src2 =
1669       translate_src_register(emit, &insn->Src[2] );
1670
1671    if (emit->unit == PIPE_SHADER_VERTEX) {
1672       struct src_register zero = get_zero_immediate(emit);
1673       /* We used to simulate CMP with SLT+LRP.  But that didn't work when
1674        * src1 or src2 was Inf/NaN.  In particular, GLSL sqrt(0) failed
1675        * because it involves a CMP to handle the 0 case.
1676        * Use a conditional expression instead.
1677        */
1678       return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1679                               src0, zero, src1, src2);
1680    }
1681    else {
1682       assert(emit->unit == PIPE_SHADER_FRAGMENT);
1683
1684       /* CMP  DST, SRC0, SRC2, SRC1 */
1685       return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1686                          src0, src2, src1);
1687    }
1688 }
1689
1690
1691 /**
1692  * Translate/emit 2-operand (coord, sampler) texture instructions.
1693  */
1694 static boolean
1695 emit_tex2(struct svga_shader_emitter *emit,
1696           const struct tgsi_full_instruction *insn,
1697           SVGA3dShaderDestToken dst)
1698 {
1699    SVGA3dShaderInstToken inst;
1700    struct src_register texcoord;
1701    struct src_register sampler;
1702    SVGA3dShaderDestToken tmp;
1703
1704    inst.value = 0;
1705
1706    switch (insn->Instruction.Opcode) {
1707    case TGSI_OPCODE_TEX:
1708       inst.op = SVGA3DOP_TEX;
1709       break;
1710    case TGSI_OPCODE_TXP:
1711       inst.op = SVGA3DOP_TEX;
1712       inst.control = SVGA3DOPCONT_PROJECT;
1713       break;
1714    case TGSI_OPCODE_TXB:
1715       inst.op = SVGA3DOP_TEX;
1716       inst.control = SVGA3DOPCONT_BIAS;
1717       break;
1718    case TGSI_OPCODE_TXL:
1719       inst.op = SVGA3DOP_TEXLDL;
1720       break;
1721    default:
1722       assert(0);
1723       return FALSE;
1724    }
1725
1726    texcoord = translate_src_register( emit, &insn->Src[0] );
1727    sampler = translate_src_register( emit, &insn->Src[1] );
1728
1729    if (emit->key.tex[sampler.base.num].unnormalized ||
1730        emit->dynamic_branching_level > 0)
1731       tmp = get_temp( emit );
1732
1733    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1734     * zero in that case.
1735     */
1736    if (emit->dynamic_branching_level > 0 &&
1737        inst.op == SVGA3DOP_TEX &&
1738        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1739       struct src_register zero = get_zero_immediate(emit);
1740
1741       /* MOV  tmp, texcoord */
1742       if (!submit_op1( emit,
1743                        inst_token( SVGA3DOP_MOV ),
1744                        tmp,
1745                        texcoord ))
1746          return FALSE;
1747
1748       /* MOV  tmp.w, zero */
1749       if (!submit_op1( emit,
1750                        inst_token( SVGA3DOP_MOV ),
1751                        writemask( tmp, TGSI_WRITEMASK_W ),
1752                        zero ))
1753          return FALSE;
1754
1755       texcoord = src( tmp );
1756       inst.op = SVGA3DOP_TEXLDL;
1757    }
1758
1759    /* Explicit normalization of texcoords:
1760     */
1761    if (emit->key.tex[sampler.base.num].unnormalized) {
1762       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1763
1764       /* MUL  tmp, SRC0, WH */
1765       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1766                        tmp, texcoord, wh ))
1767          return FALSE;
1768
1769       texcoord = src( tmp );
1770    }
1771
1772    return submit_op2( emit, inst, dst, texcoord, sampler );
1773 }
1774
1775
1776 /**
1777  * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions.
1778  */
1779 static boolean
1780 emit_tex4(struct svga_shader_emitter *emit,
1781           const struct tgsi_full_instruction *insn,
1782           SVGA3dShaderDestToken dst )
1783 {
1784    SVGA3dShaderInstToken inst;
1785    struct src_register texcoord;
1786    struct src_register ddx;
1787    struct src_register ddy;
1788    struct src_register sampler;
1789
1790    texcoord = translate_src_register( emit, &insn->Src[0] );
1791    ddx      = translate_src_register( emit, &insn->Src[1] );
1792    ddy      = translate_src_register( emit, &insn->Src[2] );
1793    sampler  = translate_src_register( emit, &insn->Src[3] );
1794
1795    inst.value = 0;
1796
1797    switch (insn->Instruction.Opcode) {
1798    case TGSI_OPCODE_TXD:
1799       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1800       break;
1801    default:
1802       assert(0);
1803       return FALSE;
1804    }
1805
1806    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1807 }
1808
1809
1810 /**
1811  * Emit texture swizzle code.  We do this here since SVGA samplers don't
1812  * directly support swizzles.
1813  */
1814 static boolean
1815 emit_tex_swizzle(struct svga_shader_emitter *emit,
1816                  SVGA3dShaderDestToken dst,
1817                  struct src_register src,
1818                  unsigned swizzle_x,
1819                  unsigned swizzle_y,
1820                  unsigned swizzle_z,
1821                  unsigned swizzle_w)
1822 {
1823    const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1824    unsigned srcSwizzle[4];
1825    unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1826    unsigned i;
1827
1828    /* build writemasks and srcSwizzle terms */
1829    for (i = 0; i < 4; i++) {
1830       if (swizzleIn[i] == PIPE_SWIZZLE_0) {
1831          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1832          zeroWritemask |= (1 << i);
1833       }
1834       else if (swizzleIn[i] == PIPE_SWIZZLE_1) {
1835          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1836          oneWritemask |= (1 << i);
1837       }
1838       else {
1839          srcSwizzle[i] = swizzleIn[i];
1840          srcWritemask |= (1 << i);
1841       }
1842    }
1843
1844    /* write x/y/z/w comps */
1845    if (dst.mask & srcWritemask) {
1846       if (!submit_op1(emit,
1847                       inst_token(SVGA3DOP_MOV),
1848                       writemask(dst, srcWritemask),
1849                       swizzle(src,
1850                               srcSwizzle[0],
1851                               srcSwizzle[1],
1852                               srcSwizzle[2],
1853                               srcSwizzle[3])))
1854          return FALSE;
1855    }
1856
1857    /* write 0 comps */
1858    if (dst.mask & zeroWritemask) {
1859       if (!submit_op1(emit,
1860                       inst_token(SVGA3DOP_MOV),
1861                       writemask(dst, zeroWritemask),
1862                       get_zero_immediate(emit)))
1863          return FALSE;
1864    }
1865
1866    /* write 1 comps */
1867    if (dst.mask & oneWritemask) {
1868       if (!submit_op1(emit,
1869                       inst_token(SVGA3DOP_MOV),
1870                       writemask(dst, oneWritemask),
1871                       get_one_immediate(emit)))
1872          return FALSE;
1873    }
1874
1875    return TRUE;
1876 }
1877
1878
1879 /**
1880  * Translate/emit a TGSI texture sample instruction.
1881  */
1882 static boolean
1883 emit_tex(struct svga_shader_emitter *emit,
1884          const struct tgsi_full_instruction *insn)
1885 {
1886    SVGA3dShaderDestToken dst =
1887       translate_dst_register( emit, insn, 0 );
1888    struct src_register src0 =
1889       translate_src_register( emit, &insn->Src[0] );
1890    struct src_register src1 =
1891       translate_src_register( emit, &insn->Src[1] );
1892
1893    SVGA3dShaderDestToken tex_result;
1894    const unsigned unit = src1.base.num;
1895
1896    /* check for shadow samplers */
1897    boolean compare = (emit->key.tex[unit].compare_mode ==
1898                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
1899
1900    /* texture swizzle */
1901    boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X ||
1902                       emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y ||
1903                       emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z ||
1904                       emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W);
1905
1906    boolean saturate = insn->Instruction.Saturate;
1907
1908    /* If doing compare processing or tex swizzle or saturation, we need to put
1909     * the fetched color into a temporary so it can be used as a source later on.
1910     */
1911    if (compare || swizzle || saturate) {
1912       tex_result = get_temp( emit );
1913    }
1914    else {
1915       tex_result = dst;
1916    }
1917
1918    switch(insn->Instruction.Opcode) {
1919    case TGSI_OPCODE_TEX:
1920    case TGSI_OPCODE_TXB:
1921    case TGSI_OPCODE_TXP:
1922    case TGSI_OPCODE_TXL:
1923       if (!emit_tex2( emit, insn, tex_result ))
1924          return FALSE;
1925       break;
1926    case TGSI_OPCODE_TXD:
1927       if (!emit_tex4( emit, insn, tex_result ))
1928          return FALSE;
1929       break;
1930    default:
1931       assert(0);
1932    }
1933
1934    if (compare) {
1935       SVGA3dShaderDestToken dst2;
1936
1937       if (swizzle || saturate)
1938          dst2 = tex_result;
1939       else
1940          dst2 = dst;
1941
1942       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1943          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1944          /* When sampling a depth texture, the result of the comparison is in
1945           * the Y component.
1946           */
1947          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1948          struct src_register r_coord;
1949
1950          if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1951             /* Divide texcoord R by Q */
1952             if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1953                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1954                              scalar(src0, TGSI_SWIZZLE_W) ))
1955                return FALSE;
1956
1957             if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1958                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1959                              scalar(src0, TGSI_SWIZZLE_Z),
1960                              scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1961                return FALSE;
1962
1963             r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1964          }
1965          else {
1966             r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1967          }
1968
1969          /* Compare texture sample value against R component of texcoord */
1970          if (!emit_select(emit,
1971                           emit->key.tex[unit].compare_func,
1972                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
1973                           r_coord,
1974                           tex_src_x))
1975             return FALSE;
1976       }
1977
1978       if (dst.mask & TGSI_WRITEMASK_W) {
1979          struct src_register one = get_one_immediate(emit);
1980
1981         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1982                          writemask( dst2, TGSI_WRITEMASK_W ),
1983                          one ))
1984            return FALSE;
1985       }
1986    }
1987
1988    if (saturate && !swizzle) {
1989       /* MOV_SAT real_dst, dst */
1990       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1991          return FALSE;
1992    }
1993    else if (swizzle) {
1994       /* swizzle from tex_result to dst (handles saturation too, if any) */
1995       emit_tex_swizzle(emit,
1996                        dst, src(tex_result),
1997                        emit->key.tex[unit].swizzle_r,
1998                        emit->key.tex[unit].swizzle_g,
1999                        emit->key.tex[unit].swizzle_b,
2000                        emit->key.tex[unit].swizzle_a);
2001    }
2002
2003    return TRUE;
2004 }
2005
2006
2007 static boolean
2008 emit_bgnloop(struct svga_shader_emitter *emit,
2009              const struct tgsi_full_instruction *insn)
2010 {
2011    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
2012    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
2013    struct src_register const_int = get_loop_const( emit );
2014
2015    emit->dynamic_branching_level++;
2016
2017    return (emit_instruction( emit, inst ) &&
2018            emit_src( emit, loop_reg ) &&
2019            emit_src( emit, const_int ) );
2020 }
2021
2022
2023 static boolean
2024 emit_endloop(struct svga_shader_emitter *emit,
2025              const struct tgsi_full_instruction *insn)
2026 {
2027    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
2028
2029    emit->dynamic_branching_level--;
2030
2031    return emit_instruction( emit, inst );
2032 }
2033
2034
2035 /**
2036  * Translate/emit TGSI BREAK (out of loop) instruction.
2037  */
2038 static boolean
2039 emit_brk(struct svga_shader_emitter *emit,
2040          const struct tgsi_full_instruction *insn)
2041 {
2042    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
2043    return emit_instruction( emit, inst );
2044 }
2045
2046
2047 /**
2048  * Emit simple instruction which operates on one scalar value (not
2049  * a vector).  Ex: LG2, RCP, RSQ.
2050  */
2051 static boolean
2052 emit_scalar_op1(struct svga_shader_emitter *emit,
2053                 unsigned opcode,
2054                 const struct tgsi_full_instruction *insn)
2055 {
2056    SVGA3dShaderInstToken inst;
2057    SVGA3dShaderDestToken dst;
2058    struct src_register src;
2059
2060    inst = inst_token( opcode );
2061    dst = translate_dst_register( emit, insn, 0 );
2062    src = translate_src_register( emit, &insn->Src[0] );
2063    src = scalar( src, TGSI_SWIZZLE_X );
2064
2065    return submit_op1( emit, inst, dst, src );
2066 }
2067
2068
2069 /**
2070  * Translate/emit a simple instruction (one which has no special-case
2071  * code) such as ADD, MUL, MIN, MAX.
2072  */
2073 static boolean
2074 emit_simple_instruction(struct svga_shader_emitter *emit,
2075                         unsigned opcode,
2076                         const struct tgsi_full_instruction *insn)
2077 {
2078    const struct tgsi_full_src_register *src = insn->Src;
2079    SVGA3dShaderInstToken inst;
2080    SVGA3dShaderDestToken dst;
2081
2082    inst = inst_token( opcode );
2083    dst = translate_dst_register( emit, insn, 0 );
2084
2085    switch (insn->Instruction.NumSrcRegs) {
2086    case 0:
2087       return submit_op0( emit, inst, dst );
2088    case 1:
2089       return submit_op1( emit, inst, dst,
2090                          translate_src_register( emit, &src[0] ));
2091    case 2:
2092       return submit_op2( emit, inst, dst,
2093                          translate_src_register( emit, &src[0] ),
2094                          translate_src_register( emit, &src[1] ) );
2095    case 3:
2096       return submit_op3( emit, inst, dst,
2097                          translate_src_register( emit, &src[0] ),
2098                          translate_src_register( emit, &src[1] ),
2099                          translate_src_register( emit, &src[2] ) );
2100    default:
2101       assert(0);
2102       return FALSE;
2103    }
2104 }
2105
2106
2107 /**
2108  * TGSI_OPCODE_MOVE is only special-cased here to detect the
2109  * svga_fragment_shader::constant_color_output case.
2110  */
2111 static boolean
2112 emit_mov(struct svga_shader_emitter *emit,
2113          const struct tgsi_full_instruction *insn)
2114 {
2115    const struct tgsi_full_src_register *src = &insn->Src[0];
2116    const struct tgsi_full_dst_register *dst = &insn->Dst[0];
2117
2118    if (emit->unit == PIPE_SHADER_FRAGMENT &&
2119        dst->Register.File == TGSI_FILE_OUTPUT &&
2120        dst->Register.Index == 0 &&
2121        src->Register.File == TGSI_FILE_CONSTANT &&
2122        !src->Register.Indirect) {
2123       emit->constant_color_output = TRUE;
2124    }
2125
2126    return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
2127 }
2128
2129
2130 /**
2131  * Translate/emit TGSI DDX, DDY instructions.
2132  */
2133 static boolean
2134 emit_deriv(struct svga_shader_emitter *emit,
2135            const struct tgsi_full_instruction *insn )
2136 {
2137    if (emit->dynamic_branching_level > 0 &&
2138        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
2139    {
2140       SVGA3dShaderDestToken dst =
2141          translate_dst_register( emit, insn, 0 );
2142
2143       /* Deriv opcodes not valid inside dynamic branching, workaround
2144        * by zeroing out the destination.
2145        */
2146       if (!submit_op1(emit,
2147                       inst_token( SVGA3DOP_MOV ),
2148                       dst,
2149                       get_zero_immediate(emit)))
2150          return FALSE;
2151
2152       return TRUE;
2153    }
2154    else {
2155       unsigned opcode;
2156       const struct tgsi_full_src_register *reg = &insn->Src[0];
2157       SVGA3dShaderInstToken inst;
2158       SVGA3dShaderDestToken dst;
2159       struct src_register src0;
2160
2161       switch (insn->Instruction.Opcode) {
2162       case TGSI_OPCODE_DDX:
2163          opcode = SVGA3DOP_DSX;
2164          break;
2165       case TGSI_OPCODE_DDY:
2166          opcode = SVGA3DOP_DSY;
2167          break;
2168       default:
2169          return FALSE;
2170       }
2171
2172       inst = inst_token( opcode );
2173       dst = translate_dst_register( emit, insn, 0 );
2174       src0 = translate_src_register( emit, reg );
2175
2176       /* We cannot use negate or abs on source to dsx/dsy instruction.
2177        */
2178       if (reg->Register.Absolute ||
2179           reg->Register.Negate) {
2180          SVGA3dShaderDestToken temp = get_temp( emit );
2181
2182          if (!emit_repl( emit, temp, &src0 ))
2183             return FALSE;
2184       }
2185
2186       return submit_op1( emit, inst, dst, src0 );
2187    }
2188 }
2189
2190
2191 /**
2192  * Translate/emit ARL (Address Register Load) instruction.  Used to
2193  * move a value into the special 'address' register.  Used to implement
2194  * indirect/variable indexing into arrays.
2195  */
2196 static boolean
2197 emit_arl(struct svga_shader_emitter *emit,
2198          const struct tgsi_full_instruction *insn)
2199 {
2200    ++emit->current_arl;
2201    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2202       /* MOVA not present in pixel shader instruction set.
2203        * Ignore this instruction altogether since it is
2204        * only used for loop counters -- and for that
2205        * we reference aL directly.
2206        */
2207       return TRUE;
2208    }
2209    if (svga_arl_needs_adjustment( emit )) {
2210       return emit_fake_arl( emit, insn );
2211    } else {
2212       /* no need to adjust, just emit straight arl */
2213       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
2214    }
2215 }
2216
2217
2218 static boolean
2219 emit_pow(struct svga_shader_emitter *emit,
2220          const struct tgsi_full_instruction *insn)
2221 {
2222    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2223    struct src_register src0 = translate_src_register(
2224       emit, &insn->Src[0] );
2225    struct src_register src1 = translate_src_register(
2226       emit, &insn->Src[1] );
2227    boolean need_tmp = FALSE;
2228
2229    /* POW can only output to a temporary */
2230    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2231       need_tmp = TRUE;
2232
2233    /* POW src1 must not be the same register as dst */
2234    if (alias_src_dst( src1, dst ))
2235       need_tmp = TRUE;
2236
2237    /* it's a scalar op */
2238    src0 = scalar( src0, TGSI_SWIZZLE_X );
2239    src1 = scalar( src1, TGSI_SWIZZLE_X );
2240
2241    if (need_tmp) {
2242       SVGA3dShaderDestToken tmp =
2243          writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2244
2245       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2246          return FALSE;
2247
2248       return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2249                         dst, scalar(src(tmp), 0) );
2250    }
2251    else {
2252       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2253    }
2254 }
2255
2256
2257 /**
2258  * Translate/emit TGSI XPD (vector cross product) instruction.
2259  */
2260 static boolean
2261 emit_xpd(struct svga_shader_emitter *emit,
2262          const struct tgsi_full_instruction *insn)
2263 {
2264    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2265    const struct src_register src0 = translate_src_register(
2266       emit, &insn->Src[0] );
2267    const struct src_register src1 = translate_src_register(
2268       emit, &insn->Src[1] );
2269    boolean need_dst_tmp = FALSE;
2270
2271    /* XPD can only output to a temporary */
2272    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
2273       need_dst_tmp = TRUE;
2274
2275    /* The dst reg must not be the same as src0 or src1*/
2276    if (alias_src_dst(src0, dst) ||
2277        alias_src_dst(src1, dst))
2278       need_dst_tmp = TRUE;
2279
2280    if (need_dst_tmp) {
2281       SVGA3dShaderDestToken tmp = get_temp( emit );
2282
2283       /* Obey DX9 restrictions on mask:
2284        */
2285       tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
2286
2287       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
2288          return FALSE;
2289
2290       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2291          return FALSE;
2292    }
2293    else {
2294       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
2295          return FALSE;
2296    }
2297
2298    /* Need to emit 1.0 to dst.w?
2299     */
2300    if (dst.mask & TGSI_WRITEMASK_W) {
2301       struct src_register one = get_one_immediate( emit );
2302
2303       if (!submit_op1(emit,
2304                       inst_token( SVGA3DOP_MOV ),
2305                       writemask(dst, TGSI_WRITEMASK_W),
2306                       one))
2307          return FALSE;
2308    }
2309
2310    return TRUE;
2311 }
2312
2313
2314 /**
2315  * Emit a LRP (linear interpolation) instruction.
2316  */
2317 static boolean
2318 submit_lrp(struct svga_shader_emitter *emit,
2319            SVGA3dShaderDestToken dst,
2320            struct src_register src0,
2321            struct src_register src1,
2322            struct src_register src2)
2323 {
2324    SVGA3dShaderDestToken tmp;
2325    boolean need_dst_tmp = FALSE;
2326
2327    /* The dst reg must be a temporary, and not be the same as src0 or src2 */
2328    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2329        alias_src_dst(src0, dst) ||
2330        alias_src_dst(src2, dst))
2331       need_dst_tmp = TRUE;
2332
2333    if (need_dst_tmp) {
2334       tmp = get_temp( emit );
2335       tmp.mask = dst.mask;
2336    }
2337    else {
2338       tmp = dst;
2339    }
2340
2341    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
2342       return FALSE;
2343
2344    if (need_dst_tmp) {
2345       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2346          return FALSE;
2347    }
2348
2349    return TRUE;
2350 }
2351
2352
2353 /**
2354  * Translate/emit LRP (Linear Interpolation) instruction.
2355  */
2356 static boolean
2357 emit_lrp(struct svga_shader_emitter *emit,
2358          const struct tgsi_full_instruction *insn)
2359 {
2360    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2361    const struct src_register src0 = translate_src_register(
2362       emit, &insn->Src[0] );
2363    const struct src_register src1 = translate_src_register(
2364       emit, &insn->Src[1] );
2365    const struct src_register src2 = translate_src_register(
2366       emit, &insn->Src[2] );
2367
2368    return submit_lrp(emit, dst, src0, src1, src2);
2369 }
2370
2371 /**
2372  * Translate/emit DST (Distance function) instruction.
2373  */
2374 static boolean
2375 emit_dst_insn(struct svga_shader_emitter *emit,
2376               const struct tgsi_full_instruction *insn)
2377 {
2378    if (emit->unit == PIPE_SHADER_VERTEX) {
2379       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2380        */
2381       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2382    }
2383    else {
2384       /* result[0] = 1    * 1;
2385        * result[1] = a[1] * b[1];
2386        * result[2] = a[2] * 1;
2387        * result[3] = 1    * b[3];
2388        */
2389       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2390       SVGA3dShaderDestToken tmp;
2391       const struct src_register src0 = translate_src_register(
2392          emit, &insn->Src[0] );
2393       const struct src_register src1 = translate_src_register(
2394          emit, &insn->Src[1] );
2395       boolean need_tmp = FALSE;
2396
2397       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2398           alias_src_dst(src0, dst) ||
2399           alias_src_dst(src1, dst))
2400          need_tmp = TRUE;
2401
2402       if (need_tmp) {
2403          tmp = get_temp( emit );
2404       }
2405       else {
2406          tmp = dst;
2407       }
2408
2409       /* tmp.xw = 1.0
2410        */
2411       if (tmp.mask & TGSI_WRITEMASK_XW) {
2412          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2413                           writemask(tmp, TGSI_WRITEMASK_XW ),
2414                           get_one_immediate(emit)))
2415             return FALSE;
2416       }
2417
2418       /* tmp.yz = src0
2419        */
2420       if (tmp.mask & TGSI_WRITEMASK_YZ) {
2421          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2422                           writemask(tmp, TGSI_WRITEMASK_YZ ),
2423                           src0))
2424             return FALSE;
2425       }
2426
2427       /* tmp.yw = tmp * src1
2428        */
2429       if (tmp.mask & TGSI_WRITEMASK_YW) {
2430          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2431                           writemask(tmp, TGSI_WRITEMASK_YW ),
2432                           src(tmp),
2433                           src1))
2434             return FALSE;
2435       }
2436
2437       /* dst = tmp
2438        */
2439       if (need_tmp) {
2440          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2441                           dst,
2442                           src(tmp)))
2443             return FALSE;
2444       }
2445    }
2446
2447    return TRUE;
2448 }
2449
2450
2451 static boolean
2452 emit_exp(struct svga_shader_emitter *emit,
2453          const struct tgsi_full_instruction *insn)
2454 {
2455    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2456    struct src_register src0 =
2457       translate_src_register( emit, &insn->Src[0] );
2458    SVGA3dShaderDestToken fraction;
2459
2460    if (dst.mask & TGSI_WRITEMASK_Y)
2461       fraction = dst;
2462    else if (dst.mask & TGSI_WRITEMASK_X)
2463       fraction = get_temp( emit );
2464    else
2465       fraction.value = 0;
2466
2467    /* If y is being written, fill it with src0 - floor(src0).
2468     */
2469    if (dst.mask & TGSI_WRITEMASK_XY) {
2470       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2471                        writemask( fraction, TGSI_WRITEMASK_Y ),
2472                        src0 ))
2473          return FALSE;
2474    }
2475
2476    /* If x is being written, fill it with 2 ^ floor(src0).
2477     */
2478    if (dst.mask & TGSI_WRITEMASK_X) {
2479       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2480                        writemask( dst, TGSI_WRITEMASK_X ),
2481                        src0,
2482                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2483          return FALSE;
2484
2485       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2486                        writemask( dst, TGSI_WRITEMASK_X ),
2487                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2488          return FALSE;
2489
2490       if (!(dst.mask & TGSI_WRITEMASK_Y))
2491          release_temp( emit, fraction );
2492    }
2493
2494    /* If z is being written, fill it with 2 ^ src0 (partial precision).
2495     */
2496    if (dst.mask & TGSI_WRITEMASK_Z) {
2497       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2498                        writemask( dst, TGSI_WRITEMASK_Z ),
2499                        src0 ) )
2500          return FALSE;
2501    }
2502
2503    /* If w is being written, fill it with one.
2504     */
2505    if (dst.mask & TGSI_WRITEMASK_W) {
2506       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2507                        writemask(dst, TGSI_WRITEMASK_W),
2508                        get_one_immediate(emit)))
2509          return FALSE;
2510    }
2511
2512    return TRUE;
2513 }
2514
2515
2516 /**
2517  * Translate/emit LIT (Lighting helper) instruction.
2518  */
2519 static boolean
2520 emit_lit(struct svga_shader_emitter *emit,
2521          const struct tgsi_full_instruction *insn)
2522 {
2523    if (emit->unit == PIPE_SHADER_VERTEX) {
2524       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2525        */
2526       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2527    }
2528    else {
2529       /* D3D vs. GL semantics can be fairly easily accomodated by
2530        * variations on this sequence.
2531        *
2532        * GL:
2533        *   tmp.y = src.x
2534        *   tmp.z = pow(src.y,src.w)
2535        *   p0 = src0.xxxx > 0
2536        *   result = zero.wxxw
2537        *   (p0) result.yz = tmp
2538        *
2539        * D3D:
2540        *   tmp.y = src.x
2541        *   tmp.z = pow(src.y,src.w)
2542        *   p0 = src0.xxyy > 0
2543        *   result = zero.wxxw
2544        *   (p0) result.yz = tmp
2545        *
2546        * Will implement the GL version for now.
2547        */
2548       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2549       SVGA3dShaderDestToken tmp = get_temp( emit );
2550       const struct src_register src0 = translate_src_register(
2551          emit, &insn->Src[0] );
2552
2553       /* tmp = pow(src.y, src.w)
2554        */
2555       if (dst.mask & TGSI_WRITEMASK_Z) {
2556          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2557                          tmp,
2558                          scalar(src0, 1),
2559                          scalar(src0, 3)))
2560             return FALSE;
2561       }
2562
2563       /* tmp.y = src.x
2564        */
2565       if (dst.mask & TGSI_WRITEMASK_Y) {
2566          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2567                           writemask(tmp, TGSI_WRITEMASK_Y ),
2568                           scalar(src0, 0)))
2569             return FALSE;
2570       }
2571
2572       /* Can't quite do this with emit conditional due to the extra
2573        * writemask on the predicated mov:
2574        */
2575       {
2576          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2577          struct src_register predsrc;
2578
2579          /* D3D vs GL semantics:
2580           */
2581          if (0)
2582             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2583          else
2584             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2585
2586          /* SETP src0.xxyy, GT, {0}.x */
2587          if (!submit_op2( emit,
2588                           inst_token_setp(SVGA3DOPCOMP_GT),
2589                           pred_reg,
2590                           predsrc,
2591                           get_zero_immediate(emit)))
2592             return FALSE;
2593
2594          /* MOV dst, fail */
2595          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2596                           get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f)))
2597              return FALSE;
2598
2599          /* MOV dst.yz, tmp (predicated)
2600           *
2601           * Note that the predicate reg (and possible modifiers) is passed
2602           * as the first source argument.
2603           */
2604          if (dst.mask & TGSI_WRITEMASK_YZ) {
2605             if (!submit_op2( emit,
2606                              inst_token_predicated(SVGA3DOP_MOV),
2607                              writemask(dst, TGSI_WRITEMASK_YZ),
2608                              src( pred_reg ), src( tmp ) ))
2609                return FALSE;
2610          }
2611       }
2612    }
2613
2614    return TRUE;
2615 }
2616
2617
2618 static boolean
2619 emit_ex2(struct svga_shader_emitter *emit,
2620          const struct tgsi_full_instruction *insn)
2621 {
2622    SVGA3dShaderInstToken inst;
2623    SVGA3dShaderDestToken dst;
2624    struct src_register src0;
2625
2626    inst = inst_token( SVGA3DOP_EXP );
2627    dst = translate_dst_register( emit, insn, 0 );
2628    src0 = translate_src_register( emit, &insn->Src[0] );
2629    src0 = scalar( src0, TGSI_SWIZZLE_X );
2630
2631    if (dst.mask != TGSI_WRITEMASK_XYZW) {
2632       SVGA3dShaderDestToken tmp = get_temp( emit );
2633
2634       if (!submit_op1( emit, inst, tmp, src0 ))
2635          return FALSE;
2636
2637       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2638                          dst,
2639                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2640    }
2641
2642    return submit_op1( emit, inst, dst, src0 );
2643 }
2644
2645
2646 static boolean
2647 emit_log(struct svga_shader_emitter *emit,
2648          const struct tgsi_full_instruction *insn)
2649 {
2650    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2651    struct src_register src0 =
2652       translate_src_register( emit, &insn->Src[0] );
2653    SVGA3dShaderDestToken abs_tmp;
2654    struct src_register abs_src0;
2655    SVGA3dShaderDestToken log2_abs;
2656
2657    abs_tmp.value = 0;
2658
2659    if (dst.mask & TGSI_WRITEMASK_Z)
2660       log2_abs = dst;
2661    else if (dst.mask & TGSI_WRITEMASK_XY)
2662       log2_abs = get_temp( emit );
2663    else
2664       log2_abs.value = 0;
2665
2666    /* If z is being written, fill it with log2( abs( src0 ) ).
2667     */
2668    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2669       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2670          abs_src0 = src0;
2671       else {
2672          abs_tmp = get_temp( emit );
2673
2674          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2675                           abs_tmp,
2676                           src0 ) )
2677             return FALSE;
2678
2679          abs_src0 = src( abs_tmp );
2680       }
2681
2682       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2683
2684       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2685                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2686                        abs_src0 ) )
2687          return FALSE;
2688    }
2689
2690    if (dst.mask & TGSI_WRITEMASK_XY) {
2691       SVGA3dShaderDestToken floor_log2;
2692
2693       if (dst.mask & TGSI_WRITEMASK_X)
2694          floor_log2 = dst;
2695       else
2696          floor_log2 = get_temp( emit );
2697
2698       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2699        */
2700       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2701                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2702                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2703          return FALSE;
2704
2705       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2706                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2707                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2708                        negate( src( floor_log2 ) ) ) )
2709          return FALSE;
2710
2711       /* If y is being written, fill it with
2712        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2713        */
2714       if (dst.mask & TGSI_WRITEMASK_Y) {
2715          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2716                           writemask( dst, TGSI_WRITEMASK_Y ),
2717                           negate( scalar( src( floor_log2 ),
2718                                           TGSI_SWIZZLE_X ) ) ) )
2719             return FALSE;
2720
2721          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2722                           writemask( dst, TGSI_WRITEMASK_Y ),
2723                           src( dst ),
2724                           abs_src0 ) )
2725             return FALSE;
2726       }
2727
2728       if (!(dst.mask & TGSI_WRITEMASK_X))
2729          release_temp( emit, floor_log2 );
2730
2731       if (!(dst.mask & TGSI_WRITEMASK_Z))
2732          release_temp( emit, log2_abs );
2733    }
2734
2735    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2736        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2737       release_temp( emit, abs_tmp );
2738
2739    /* If w is being written, fill it with one.
2740     */
2741    if (dst.mask & TGSI_WRITEMASK_W) {
2742       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2743                        writemask(dst, TGSI_WRITEMASK_W),
2744                        get_one_immediate(emit)))
2745          return FALSE;
2746    }
2747
2748    return TRUE;
2749 }
2750
2751
2752 /**
2753  * Translate TGSI TRUNC or ROUND instruction.
2754  * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2755  * Different approaches are needed for VS versus PS.
2756  */
2757 static boolean
2758 emit_trunc_round(struct svga_shader_emitter *emit,
2759                  const struct tgsi_full_instruction *insn,
2760                  boolean round)
2761 {
2762    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2763    const struct src_register src0 =
2764       translate_src_register(emit, &insn->Src[0] );
2765    SVGA3dShaderDestToken t1 = get_temp(emit);
2766
2767    if (round) {
2768       SVGA3dShaderDestToken t0 = get_temp(emit);
2769       struct src_register half = get_half_immediate(emit);
2770
2771       /* t0 = abs(src0) + 0.5 */
2772       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2773                       absolute(src0), half))
2774          return FALSE;
2775
2776       /* t1 = fract(t0) */
2777       if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2778          return FALSE;
2779
2780       /* t1 = t0 - t1 */
2781       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2782                       negate(src(t1))))
2783          return FALSE;
2784    }
2785    else {
2786       /* trunc */
2787
2788       /* t1 = fract(abs(src0)) */
2789       if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2790          return FALSE;
2791
2792       /* t1 = abs(src0) - t1 */
2793       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2794                       negate(src(t1))))
2795          return FALSE;
2796    }
2797
2798    /*
2799     * Now we need to multiply t1 by the sign of the original value.
2800    */
2801    if (emit->unit == PIPE_SHADER_VERTEX) {
2802       /* For VS: use SGN instruction */
2803       /* Need two extra/dummy registers: */
2804       SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2805          t4 = get_temp(emit);
2806
2807       /* t2 = sign(src0) */
2808       if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2809                       src(t3), src(t4)))
2810          return FALSE;
2811
2812       /* dst = t1 * t2 */
2813       if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2814          return FALSE;
2815    }
2816    else {
2817       /* For FS: Use CMP instruction */
2818       return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2819                         src0, src(t1), negate(src(t1)));
2820    }
2821
2822    return TRUE;
2823 }
2824
2825
2826 /**
2827  * Translate/emit "begin subroutine" instruction/marker/label.
2828  */
2829 static boolean
2830 emit_bgnsub(struct svga_shader_emitter *emit,
2831             unsigned position,
2832             const struct tgsi_full_instruction *insn)
2833 {
2834    unsigned i;
2835
2836    /* Note that we've finished the main function and are now emitting
2837     * subroutines.  This affects how we terminate the generated
2838     * shader.
2839     */
2840    emit->in_main_func = FALSE;
2841
2842    for (i = 0; i < emit->nr_labels; i++) {
2843       if (emit->label[i] == position) {
2844          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2845                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2846                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2847       }
2848    }
2849
2850    assert(0);
2851    return TRUE;
2852 }
2853
2854
2855 /**
2856  * Translate/emit subroutine call instruction.
2857  */
2858 static boolean
2859 emit_call(struct svga_shader_emitter *emit,
2860           const struct tgsi_full_instruction *insn)
2861 {
2862    unsigned position = insn->Label.Label;
2863    unsigned i;
2864
2865    for (i = 0; i < emit->nr_labels; i++) {
2866       if (emit->label[i] == position)
2867          break;
2868    }
2869
2870    if (emit->nr_labels == ARRAY_SIZE(emit->label))
2871       return FALSE;
2872
2873    if (i == emit->nr_labels) {
2874       emit->label[i] = position;
2875       emit->nr_labels++;
2876    }
2877
2878    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2879            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2880 }
2881
2882
2883 /**
2884  * Called at the end of the shader.  Actually, emit special "fix-up"
2885  * code for the vertex/fragment shader.
2886  */
2887 static boolean
2888 emit_end(struct svga_shader_emitter *emit)
2889 {
2890    if (emit->unit == PIPE_SHADER_VERTEX) {
2891       return emit_vs_postamble( emit );
2892    }
2893    else {
2894       return emit_ps_postamble( emit );
2895    }
2896 }
2897
2898
2899 /**
2900  * Translate any TGSI instruction to SVGA.
2901  */
2902 static boolean
2903 svga_emit_instruction(struct svga_shader_emitter *emit,
2904                       unsigned position,
2905                       const struct tgsi_full_instruction *insn)
2906 {
2907    switch (insn->Instruction.Opcode) {
2908
2909    case TGSI_OPCODE_ARL:
2910       return emit_arl( emit, insn );
2911
2912    case TGSI_OPCODE_TEX:
2913    case TGSI_OPCODE_TXB:
2914    case TGSI_OPCODE_TXP:
2915    case TGSI_OPCODE_TXL:
2916    case TGSI_OPCODE_TXD:
2917       return emit_tex( emit, insn );
2918
2919    case TGSI_OPCODE_DDX:
2920    case TGSI_OPCODE_DDY:
2921       return emit_deriv( emit, insn );
2922
2923    case TGSI_OPCODE_BGNSUB:
2924       return emit_bgnsub( emit, position, insn );
2925
2926    case TGSI_OPCODE_ENDSUB:
2927       return TRUE;
2928
2929    case TGSI_OPCODE_CAL:
2930       return emit_call( emit, insn );
2931
2932    case TGSI_OPCODE_FLR:
2933       return emit_floor( emit, insn );
2934
2935    case TGSI_OPCODE_TRUNC:
2936       return emit_trunc_round( emit, insn, FALSE );
2937
2938    case TGSI_OPCODE_ROUND:
2939       return emit_trunc_round( emit, insn, TRUE );
2940
2941    case TGSI_OPCODE_CEIL:
2942       return emit_ceil( emit, insn );
2943
2944    case TGSI_OPCODE_CMP:
2945       return emit_cmp( emit, insn );
2946
2947    case TGSI_OPCODE_DIV:
2948       return emit_div( emit, insn );
2949
2950    case TGSI_OPCODE_DP2:
2951       return emit_dp2( emit, insn );
2952
2953    case TGSI_OPCODE_DPH:
2954       return emit_dph( emit, insn );
2955
2956    case TGSI_OPCODE_COS:
2957       return emit_cos( emit, insn );
2958
2959    case TGSI_OPCODE_SIN:
2960       return emit_sin( emit, insn );
2961
2962    case TGSI_OPCODE_SCS:
2963       return emit_sincos( emit, insn );
2964
2965    case TGSI_OPCODE_END:
2966       /* TGSI always finishes the main func with an END */
2967       return emit_end( emit );
2968
2969    case TGSI_OPCODE_KILL_IF:
2970       return emit_kill_if( emit, insn );
2971
2972       /* Selection opcodes.  The underlying language is fairly
2973        * non-orthogonal about these.
2974        */
2975    case TGSI_OPCODE_SEQ:
2976       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2977
2978    case TGSI_OPCODE_SNE:
2979       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2980
2981    case TGSI_OPCODE_SGT:
2982       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2983
2984    case TGSI_OPCODE_SGE:
2985       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2986
2987    case TGSI_OPCODE_SLT:
2988       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2989
2990    case TGSI_OPCODE_SLE:
2991       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2992
2993    case TGSI_OPCODE_SUB:
2994       return emit_sub( emit, insn );
2995
2996    case TGSI_OPCODE_POW:
2997       return emit_pow( emit, insn );
2998
2999    case TGSI_OPCODE_EX2:
3000       return emit_ex2( emit, insn );
3001
3002    case TGSI_OPCODE_EXP:
3003       return emit_exp( emit, insn );
3004
3005    case TGSI_OPCODE_LOG:
3006       return emit_log( emit, insn );
3007
3008    case TGSI_OPCODE_LG2:
3009       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
3010
3011    case TGSI_OPCODE_RSQ:
3012       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
3013
3014    case TGSI_OPCODE_RCP:
3015       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
3016
3017    case TGSI_OPCODE_CONT:
3018       /* not expected (we return PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 0) */
3019       return FALSE;
3020
3021    case TGSI_OPCODE_RET:
3022       /* This is a noop -- we tell mesa that we can't support RET
3023        * within a function (early return), so this will always be
3024        * followed by an ENDSUB.
3025        */
3026       return TRUE;
3027
3028       /* These aren't actually used by any of the frontends we care
3029        * about:
3030        */
3031    case TGSI_OPCODE_CLAMP:
3032    case TGSI_OPCODE_AND:
3033    case TGSI_OPCODE_OR:
3034    case TGSI_OPCODE_I2F:
3035    case TGSI_OPCODE_NOT:
3036    case TGSI_OPCODE_SHL:
3037    case TGSI_OPCODE_ISHR:
3038    case TGSI_OPCODE_XOR:
3039       return FALSE;
3040
3041    case TGSI_OPCODE_IF:
3042       return emit_if( emit, insn );
3043    case TGSI_OPCODE_ELSE:
3044       return emit_else( emit, insn );
3045    case TGSI_OPCODE_ENDIF:
3046       return emit_endif( emit, insn );
3047
3048    case TGSI_OPCODE_BGNLOOP:
3049       return emit_bgnloop( emit, insn );
3050    case TGSI_OPCODE_ENDLOOP:
3051       return emit_endloop( emit, insn );
3052    case TGSI_OPCODE_BRK:
3053       return emit_brk( emit, insn );
3054
3055    case TGSI_OPCODE_XPD:
3056       return emit_xpd( emit, insn );
3057
3058    case TGSI_OPCODE_KILL:
3059       return emit_kill( emit, insn );
3060
3061    case TGSI_OPCODE_DST:
3062       return emit_dst_insn( emit, insn );
3063
3064    case TGSI_OPCODE_LIT:
3065       return emit_lit( emit, insn );
3066
3067    case TGSI_OPCODE_LRP:
3068       return emit_lrp( emit, insn );
3069
3070    case TGSI_OPCODE_SSG:
3071       return emit_ssg( emit, insn );
3072
3073    case TGSI_OPCODE_MOV:
3074       return emit_mov( emit, insn );
3075
3076    default:
3077       {
3078          unsigned opcode = translate_opcode(insn->Instruction.Opcode);
3079
3080          if (opcode == SVGA3DOP_LAST_INST)
3081             return FALSE;
3082
3083          if (!emit_simple_instruction( emit, opcode, insn ))
3084             return FALSE;
3085       }
3086    }
3087
3088    return TRUE;
3089 }
3090
3091
3092 /**
3093  * Translate/emit a TGSI IMMEDIATE declaration.
3094  * An immediate vector is a constant that's hard-coded into the shader.
3095  */
3096 static boolean
3097 svga_emit_immediate(struct svga_shader_emitter *emit,
3098                     const struct tgsi_full_immediate *imm)
3099 {
3100    static const float id[4] = {0,0,0,1};
3101    float value[4];
3102    unsigned i;
3103
3104    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
3105    for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
3106       float f = imm->u[i].Float;
3107       value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
3108    }
3109
3110    /* If the immediate has less than four values, fill in the remaining
3111     * positions from id={0,0,0,1}.
3112     */
3113    for ( ; i < 4; i++ )
3114       value[i] = id[i];
3115
3116    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3117                           emit->imm_start + emit->internal_imm_count++,
3118                           value[0], value[1], value[2], value[3]);
3119 }
3120
3121
3122 static boolean
3123 make_immediate(struct svga_shader_emitter *emit,
3124                float a, float b, float c, float d,
3125                struct src_register *out )
3126 {
3127    unsigned idx = emit->nr_hw_float_const++;
3128
3129    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3130                         idx, a, b, c, d ))
3131       return FALSE;
3132
3133    *out = src_register( SVGA3DREG_CONST, idx );
3134
3135    return TRUE;
3136 }
3137
3138
3139 /**
3140  * Emit special VS instructions at top of shader.
3141  */
3142 static boolean
3143 emit_vs_preamble(struct svga_shader_emitter *emit)
3144 {
3145    if (!emit->key.vs.need_prescale) {
3146       if (!make_immediate( emit, 0, 0, .5, .5,
3147                            &emit->imm_0055))
3148          return FALSE;
3149    }
3150
3151    return TRUE;
3152 }
3153
3154
3155 /**
3156  * Emit special PS instructions at top of shader.
3157  */
3158 static boolean
3159 emit_ps_preamble(struct svga_shader_emitter *emit)
3160 {
3161    if (emit->ps_reads_pos && emit->info.reads_z) {
3162       /*
3163        * Assemble the position from various bits of inputs. Depth and W are
3164        * passed in a texcoord this is due to D3D's vPos not hold Z or W.
3165        * Also fixup the perspective interpolation.
3166        *
3167        * temp_pos.xy = vPos.xy
3168        * temp_pos.w = rcp(texcoord1.w);
3169        * temp_pos.z = texcoord1.z * temp_pos.w;
3170        */
3171       if (!submit_op1( emit,
3172                        inst_token(SVGA3DOP_MOV),
3173                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
3174                        emit->ps_true_pos ))
3175          return FALSE;
3176
3177       if (!submit_op1( emit,
3178                        inst_token(SVGA3DOP_RCP),
3179                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
3180                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
3181          return FALSE;
3182
3183       if (!submit_op2( emit,
3184                        inst_token(SVGA3DOP_MUL),
3185                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
3186                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
3187                        scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
3188          return FALSE;
3189    }
3190
3191    return TRUE;
3192 }
3193
3194
3195 /**
3196  * Emit special PS instructions at end of shader.
3197  */
3198 static boolean
3199 emit_ps_postamble(struct svga_shader_emitter *emit)
3200 {
3201    unsigned i;
3202
3203    /* PS oDepth is incredibly fragile and it's very hard to catch the
3204     * types of usage that break it during shader emit.  Easier just to
3205     * redirect the main program to a temporary and then only touch
3206     * oDepth with a hand-crafted MOV below.
3207     */
3208    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
3209       if (!submit_op1( emit,
3210                        inst_token(SVGA3DOP_MOV),
3211                        emit->true_pos,
3212                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
3213          return FALSE;
3214    }
3215
3216    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
3217       if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) {
3218          /* Potentially override output colors with white for XOR
3219           * logicop workaround.
3220           */
3221          if (emit->unit == PIPE_SHADER_FRAGMENT &&
3222              emit->key.fs.white_fragments) {
3223             struct src_register one = get_one_immediate(emit);
3224
3225             if (!submit_op1( emit,
3226                              inst_token(SVGA3DOP_MOV),
3227                              emit->true_color_output[i],
3228                              one ))
3229                return FALSE;
3230          }
3231          else if (emit->unit == PIPE_SHADER_FRAGMENT &&
3232                   i < emit->key.fs.write_color0_to_n_cbufs) {
3233             /* Write temp color output [0] to true output [i] */
3234             if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
3235                             emit->true_color_output[i],
3236                             src(emit->temp_color_output[0]))) {
3237                return FALSE;
3238             }
3239          }
3240          else {
3241             if (!submit_op1( emit,
3242                              inst_token(SVGA3DOP_MOV),
3243                              emit->true_color_output[i],
3244                              src(emit->temp_color_output[i]) ))
3245                return FALSE;
3246          }
3247       }
3248    }
3249
3250    return TRUE;
3251 }
3252
3253
3254 /**
3255  * Emit special VS instructions at end of shader.
3256  */
3257 static boolean
3258 emit_vs_postamble(struct svga_shader_emitter *emit)
3259 {
3260    /* PSIZ output is incredibly fragile and it's very hard to catch
3261     * the types of usage that break it during shader emit.  Easier
3262     * just to redirect the main program to a temporary and then only
3263     * touch PSIZ with a hand-crafted MOV below.
3264     */
3265    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
3266       if (!submit_op1( emit,
3267                        inst_token(SVGA3DOP_MOV),
3268                        emit->true_psiz,
3269                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
3270          return FALSE;
3271    }
3272
3273    /* Need to perform various manipulations on vertex position to cope
3274     * with the different GL and D3D clip spaces.
3275     */
3276    if (emit->key.vs.need_prescale) {
3277       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3278       SVGA3dShaderDestToken depth = emit->depth_pos;
3279       SVGA3dShaderDestToken pos = emit->true_pos;
3280       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
3281       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
3282                                                          offset + 0 );
3283       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
3284                                                          offset + 1 );
3285
3286       if (!submit_op1( emit,
3287                        inst_token(SVGA3DOP_MOV),
3288                        writemask(depth, TGSI_WRITEMASK_W),
3289                        scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
3290          return FALSE;
3291
3292       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
3293        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
3294        *   --> Note that prescale.trans.w == 0
3295        */
3296       if (!submit_op2( emit,
3297                        inst_token(SVGA3DOP_MUL),
3298                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3299                        src(temp_pos),
3300                        prescale_scale ))
3301          return FALSE;
3302
3303       if (!submit_op3( emit,
3304                        inst_token(SVGA3DOP_MAD),
3305                        pos,
3306                        swizzle(src(temp_pos), 3, 3, 3, 3),
3307                        prescale_trans,
3308                        src(temp_pos)))
3309          return FALSE;
3310
3311       /* Also write to depth value */
3312       if (!submit_op3( emit,
3313                        inst_token(SVGA3DOP_MAD),
3314                        writemask(depth, TGSI_WRITEMASK_Z),
3315                        swizzle(src(temp_pos), 3, 3, 3, 3),
3316                        prescale_trans,
3317                        src(temp_pos) ))
3318          return FALSE;
3319    }
3320    else {
3321       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3322       SVGA3dShaderDestToken depth = emit->depth_pos;
3323       SVGA3dShaderDestToken pos = emit->true_pos;
3324       struct src_register imm_0055 = emit->imm_0055;
3325
3326       /* Adjust GL clipping coordinate space to hardware (D3D-style):
3327        *
3328        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3329        * MOV result.position, temp_pos
3330        */
3331       if (!submit_op2( emit,
3332                        inst_token(SVGA3DOP_DP4),
3333                        writemask(temp_pos, TGSI_WRITEMASK_Z),
3334                        imm_0055,
3335                        src(temp_pos) ))
3336          return FALSE;
3337
3338       if (!submit_op1( emit,
3339                        inst_token(SVGA3DOP_MOV),
3340                        pos,
3341                        src(temp_pos) ))
3342          return FALSE;
3343
3344       /* Move the manipulated depth into the extra texcoord reg */
3345       if (!submit_op1( emit,
3346                        inst_token(SVGA3DOP_MOV),
3347                        writemask(depth, TGSI_WRITEMASK_ZW),
3348                        src(temp_pos) ))
3349          return FALSE;
3350    }
3351
3352    return TRUE;
3353 }
3354
3355
3356 /**
3357  * For the pixel shader: emit the code which chooses the front
3358  * or back face color depending on triangle orientation.
3359  * This happens at the top of the fragment shader.
3360  *
3361  *  0: IF VFACE :4
3362  *  1:   COLOR = FrontColor;
3363  *  2: ELSE
3364  *  3:   COLOR = BackColor;
3365  *  4: ENDIF
3366  */
3367 static boolean
3368 emit_light_twoside(struct svga_shader_emitter *emit)
3369 {
3370    struct src_register vface, zero;
3371    struct src_register front[2];
3372    struct src_register back[2];
3373    SVGA3dShaderDestToken color[2];
3374    int count = emit->internal_color_count;
3375    unsigned i;
3376    SVGA3dShaderInstToken if_token;
3377
3378    if (count == 0)
3379       return TRUE;
3380
3381    vface = get_vface( emit );
3382    zero = get_zero_immediate(emit);
3383
3384    /* Can't use get_temp() to allocate the color reg as such
3385     * temporaries will be reclaimed after each instruction by the call
3386     * to reset_temp_regs().
3387     */
3388    for (i = 0; i < count; i++) {
3389       color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3390       front[i] = emit->input_map[emit->internal_color_idx[i]];
3391
3392       /* Back is always the next input:
3393        */
3394       back[i] = front[i];
3395       back[i].base.num = front[i].base.num + 1;
3396
3397       /* Reassign the input_map to the actual front-face color:
3398        */
3399       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3400    }
3401
3402    if_token = inst_token( SVGA3DOP_IFC );
3403
3404    if (emit->key.fs.front_ccw)
3405       if_token.control = SVGA3DOPCOMP_LT;
3406    else
3407       if_token.control = SVGA3DOPCOMP_GT;
3408
3409    if (!(emit_instruction( emit, if_token ) &&
3410          emit_src( emit, vface ) &&
3411          emit_src( emit, zero ) ))
3412       return FALSE;
3413
3414    for (i = 0; i < count; i++) {
3415       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3416          return FALSE;
3417    }
3418
3419    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3420       return FALSE;
3421
3422    for (i = 0; i < count; i++) {
3423       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3424          return FALSE;
3425    }
3426
3427    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3428       return FALSE;
3429
3430    return TRUE;
3431 }
3432
3433
3434 /**
3435  * Emit special setup code for the front/back face register in the FS.
3436  *  0: SETP_GT TEMP, VFACE, 0
3437  *  where TEMP is a fake frontface register
3438  */
3439 static boolean
3440 emit_frontface(struct svga_shader_emitter *emit)
3441 {
3442    struct src_register vface;
3443    SVGA3dShaderDestToken temp;
3444    struct src_register pass, fail;
3445
3446    vface = get_vface( emit );
3447
3448    /* Can't use get_temp() to allocate the fake frontface reg as such
3449     * temporaries will be reclaimed after each instruction by the call
3450     * to reset_temp_regs().
3451     */
3452    temp = dst_register( SVGA3DREG_TEMP,
3453                         emit->nr_hw_temp++ );
3454
3455    if (emit->key.fs.front_ccw) {
3456       pass = get_zero_immediate(emit);
3457       fail = get_one_immediate(emit);
3458    } else {
3459       pass = get_one_immediate(emit);
3460       fail = get_zero_immediate(emit);
3461    }
3462
3463    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3464                          temp, vface, get_zero_immediate(emit),
3465                          pass, fail))
3466       return FALSE;
3467
3468    /* Reassign the input_map to the actual front-face color:
3469     */
3470    emit->input_map[emit->internal_frontface_idx] = src(temp);
3471
3472    return TRUE;
3473 }
3474
3475
3476 /**
3477  * Emit code to invert the T component of the incoming texture coordinate.
3478  * This is used for drawing point sprites when
3479  * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3480  */
3481 static boolean
3482 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3483 {
3484    unsigned inverted_texcoords = emit->inverted_texcoords;
3485
3486    while (inverted_texcoords) {
3487       const unsigned unit = ffs(inverted_texcoords) - 1;
3488
3489       assert(emit->inverted_texcoords & (1 << unit));
3490
3491       assert(unit < ARRAY_SIZE(emit->ps_true_texcoord));
3492
3493       assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input));
3494
3495       assert(emit->ps_inverted_texcoord_input[unit]
3496              < ARRAY_SIZE(emit->input_map));
3497
3498       /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3499       if (!submit_op3(emit,
3500                       inst_token(SVGA3DOP_MAD),
3501                       dst(emit->ps_inverted_texcoord[unit]),
3502                       emit->ps_true_texcoord[unit],
3503                       get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f),
3504                       get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f)))
3505          return FALSE;
3506
3507       /* Reassign the input_map entry to the new texcoord register */
3508       emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3509          emit->ps_inverted_texcoord[unit];
3510
3511       inverted_texcoords &= ~(1 << unit);
3512    }
3513
3514    return TRUE;
3515 }
3516
3517
3518 /**
3519  * Emit code to adjust vertex shader inputs/attributes:
3520  * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
3521  * - Set attrib W component = 1.
3522  */
3523 static boolean
3524 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
3525 {
3526    unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
3527                            emit->key.vs.adjust_attrib_w_1);
3528
3529    while (adjust_mask) {
3530       /* Adjust vertex attrib range and/or set W component = 1 */
3531       const unsigned index = u_bit_scan(&adjust_mask);
3532       struct src_register tmp;
3533
3534       /* allocate a temp reg */
3535       tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
3536       emit->nr_hw_temp++;
3537
3538       if (emit->key.vs.adjust_attrib_range & (1 << index)) {
3539          /* The vertex input/attribute is supposed to be a signed value in
3540           * the range [-1,1] but we actually fetched/converted it to the
3541           * range [0,1].  This most likely happens when the app specifies a
3542           * signed byte attribute but we interpreted it as unsigned bytes.
3543           * See also svga_translate_vertex_format().
3544           *
3545           * Here, we emit some extra instructions to adjust
3546           * the attribute values from [0,1] to [-1,1].
3547           *
3548           * The adjustment we implement is:
3549           *   new_attrib = attrib * 2.0;
3550           *   if (attrib >= 0.5)
3551           *      new_attrib = new_attrib - 2.0;
3552           * This isn't exactly right (it's off by a bit or so) but close enough.
3553           */
3554          SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
3555
3556          /* tmp = attrib * 2.0 */
3557          if (!submit_op2(emit,
3558                          inst_token(SVGA3DOP_MUL),
3559                          dst(tmp),
3560                          emit->input_map[index],
3561                          get_two_immediate(emit)))
3562             return FALSE;
3563
3564          /* pred = (attrib >= 0.5) */
3565          if (!submit_op2(emit,
3566                          inst_token_setp(SVGA3DOPCOMP_GE),
3567                          pred_reg,
3568                          emit->input_map[index],  /* vert attrib */
3569                          get_half_immediate(emit)))  /* 0.5 */
3570             return FALSE;
3571
3572          /* sub(pred) tmp, tmp, 2.0 */
3573          if (!submit_op3(emit,
3574                          inst_token_predicated(SVGA3DOP_SUB),
3575                          dst(tmp),
3576                          src(pred_reg),
3577                          tmp,
3578                          get_two_immediate(emit)))
3579             return FALSE;
3580       }
3581       else {
3582          /* just copy the vertex input attrib to the temp register */
3583          if (!submit_op1(emit,
3584                          inst_token(SVGA3DOP_MOV),
3585                          dst(tmp),
3586                          emit->input_map[index]))
3587             return FALSE;
3588       }
3589
3590       if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
3591          /* move 1 into W position of tmp */
3592          if (!submit_op1(emit,
3593                          inst_token(SVGA3DOP_MOV),
3594                          writemask(dst(tmp), TGSI_WRITEMASK_W),
3595                          get_one_immediate(emit)))
3596             return FALSE;
3597       }
3598
3599       /* Reassign the input_map entry to the new tmp register */
3600       emit->input_map[index] = tmp;
3601    }
3602
3603    return TRUE;
3604 }
3605
3606
3607 /**
3608  * Determine if we need to create the "common" immediate value which is
3609  * used for generating useful vector constants such as {0,0,0,0} and
3610  * {1,1,1,1}.
3611  * We could just do this all the time except that we want to conserve
3612  * registers whenever possible.
3613  */
3614 static boolean
3615 needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
3616 {
3617    unsigned i;
3618
3619    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3620       if (emit->key.fs.light_twoside)
3621          return TRUE;
3622
3623       if (emit->key.fs.white_fragments)
3624          return TRUE;
3625
3626       if (emit->emit_frontface)
3627          return TRUE;
3628
3629       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3630           emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3631           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3632          return TRUE;
3633
3634       if (emit->inverted_texcoords)
3635          return TRUE;
3636
3637       /* look for any PIPE_SWIZZLE_0/ONE terms */
3638       for (i = 0; i < emit->key.num_textures; i++) {
3639          if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W ||
3640              emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W ||
3641              emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W ||
3642              emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W)
3643             return TRUE;
3644       }
3645
3646       for (i = 0; i < emit->key.num_textures; i++) {
3647          if (emit->key.tex[i].compare_mode
3648              == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3649             return TRUE;
3650       }
3651    }
3652    else if (emit->unit == PIPE_SHADER_VERTEX) {
3653       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3654          return TRUE;
3655       if (emit->key.vs.adjust_attrib_range ||
3656           emit->key.vs.adjust_attrib_w_1)
3657          return TRUE;
3658    }
3659
3660    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3661        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3662        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3663        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3664        emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3665        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3666        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3667        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3668        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3669        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3670        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3671        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3672        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3673        emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3674        emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
3675       return TRUE;
3676
3677    return FALSE;
3678 }
3679
3680
3681 /**
3682  * Do we need to create a looping constant?
3683  */
3684 static boolean
3685 needs_to_create_loop_const(const struct svga_shader_emitter *emit)
3686 {
3687    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3688 }
3689
3690
3691 static boolean
3692 needs_to_create_arl_consts(const struct svga_shader_emitter *emit)
3693 {
3694    return (emit->num_arl_consts > 0);
3695 }
3696
3697
3698 static boolean
3699 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3700                         int num, int current_arl)
3701 {
3702    unsigned i;
3703    assert(num < 0);
3704
3705    for (i = 0; i < emit->num_arl_consts; ++i) {
3706       if (emit->arl_consts[i].arl_num == current_arl)
3707          break;
3708    }
3709    /* new entry */
3710    if (emit->num_arl_consts == i) {
3711       ++emit->num_arl_consts;
3712    }
3713    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3714                                 num :
3715                                 emit->arl_consts[i].number;
3716    emit->arl_consts[i].arl_num = current_arl;
3717    return TRUE;
3718 }
3719
3720
3721 static boolean
3722 pre_parse_instruction( struct svga_shader_emitter *emit,
3723                        const struct tgsi_full_instruction *insn,
3724                        int current_arl)
3725 {
3726    if (insn->Src[0].Register.Indirect &&
3727        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3728       const struct tgsi_full_src_register *reg = &insn->Src[0];
3729       if (reg->Register.Index < 0) {
3730          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3731       }
3732    }
3733
3734    if (insn->Src[1].Register.Indirect &&
3735        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3736       const struct tgsi_full_src_register *reg = &insn->Src[1];
3737       if (reg->Register.Index < 0) {
3738          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3739       }
3740    }
3741
3742    if (insn->Src[2].Register.Indirect &&
3743        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3744       const struct tgsi_full_src_register *reg = &insn->Src[2];
3745       if (reg->Register.Index < 0) {
3746          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3747       }
3748    }
3749
3750    return TRUE;
3751 }
3752
3753
3754 static boolean
3755 pre_parse_tokens( struct svga_shader_emitter *emit,
3756                   const struct tgsi_token *tokens )
3757 {
3758    struct tgsi_parse_context parse;
3759    int current_arl = 0;
3760
3761    tgsi_parse_init( &parse, tokens );
3762
3763    while (!tgsi_parse_end_of_tokens( &parse )) {
3764       tgsi_parse_token( &parse );
3765       switch (parse.FullToken.Token.Type) {
3766       case TGSI_TOKEN_TYPE_IMMEDIATE:
3767       case TGSI_TOKEN_TYPE_DECLARATION:
3768          break;
3769       case TGSI_TOKEN_TYPE_INSTRUCTION:
3770          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3771              TGSI_OPCODE_ARL) {
3772             ++current_arl;
3773          }
3774          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3775                                      current_arl ))
3776             return FALSE;
3777          break;
3778       default:
3779          break;
3780       }
3781
3782    }
3783    return TRUE;
3784 }
3785
3786
3787 static boolean
3788 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3789 {
3790    if (needs_to_create_common_immediate( emit )) {
3791       create_common_immediate( emit );
3792    }
3793    if (needs_to_create_loop_const( emit )) {
3794       create_loop_const( emit );
3795    }
3796    if (needs_to_create_arl_consts( emit )) {
3797       create_arl_consts( emit );
3798    }
3799
3800    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3801       if (!svga_shader_emit_samplers_decl( emit ))
3802          return FALSE;
3803
3804       if (!emit_ps_preamble( emit ))
3805          return FALSE;
3806
3807       if (emit->key.fs.light_twoside) {
3808          if (!emit_light_twoside( emit ))
3809             return FALSE;
3810       }
3811       if (emit->emit_frontface) {
3812          if (!emit_frontface( emit ))
3813             return FALSE;
3814       }
3815       if (emit->inverted_texcoords) {
3816          if (!emit_inverted_texcoords( emit ))
3817             return FALSE;
3818       }
3819    }
3820    else {
3821       assert(emit->unit == PIPE_SHADER_VERTEX);
3822       if (emit->key.vs.adjust_attrib_range) {
3823          if (!emit_adjusted_vertex_attribs(emit) ||
3824              emit->key.vs.adjust_attrib_w_1) {
3825             return FALSE;
3826          }
3827       }
3828    }
3829
3830    return TRUE;
3831 }
3832
3833
3834 /**
3835  * This is the main entrypoint into the TGSI instruction translater.
3836  * Translate TGSI shader tokens into an SVGA shader.
3837  */
3838 boolean
3839 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3840                               const struct tgsi_token *tokens)
3841 {
3842    struct tgsi_parse_context parse;
3843    const struct tgsi_token *new_tokens = NULL;
3844    boolean ret = TRUE;
3845    boolean helpers_emitted = FALSE;
3846    unsigned line_nr = 0;
3847
3848    if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
3849       unsigned unit;
3850
3851       new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
3852                                                         TGSI_FILE_INPUT);
3853
3854       if (new_tokens) {
3855          /* Setup texture state for stipple */
3856          emit->sampler_target[unit] = TGSI_TEXTURE_2D;
3857          emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
3858          emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
3859          emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
3860          emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
3861
3862          emit->pstipple_sampler_unit = unit;
3863
3864          tokens = new_tokens;
3865       }
3866    }
3867
3868    tgsi_parse_init( &parse, tokens );
3869    emit->internal_imm_count = 0;
3870
3871    if (emit->unit == PIPE_SHADER_VERTEX) {
3872       ret = emit_vs_preamble( emit );
3873       if (!ret)
3874          goto done;
3875    }
3876
3877    pre_parse_tokens(emit, tokens);
3878
3879    while (!tgsi_parse_end_of_tokens( &parse )) {
3880       tgsi_parse_token( &parse );
3881
3882       switch (parse.FullToken.Token.Type) {
3883       case TGSI_TOKEN_TYPE_IMMEDIATE:
3884          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3885          if (!ret)
3886             goto done;
3887          break;
3888
3889       case TGSI_TOKEN_TYPE_DECLARATION:
3890          ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3891          if (!ret)
3892             goto done;
3893          break;
3894
3895       case TGSI_TOKEN_TYPE_INSTRUCTION:
3896          if (!helpers_emitted) {
3897             if (!svga_shader_emit_helpers( emit ))
3898                goto done;
3899             helpers_emitted = TRUE;
3900          }
3901          ret = svga_emit_instruction( emit,
3902                                       line_nr++,
3903                                       &parse.FullToken.FullInstruction );
3904          if (!ret)
3905             goto done;
3906          break;
3907       default:
3908          break;
3909       }
3910
3911       reset_temp_regs( emit );
3912    }
3913
3914    /* Need to terminate the current subroutine.  Note that the
3915     * hardware doesn't tolerate shaders without sub-routines
3916     * terminating with RET+END.
3917     */
3918    if (!emit->in_main_func) {
3919       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3920       if (!ret)
3921          goto done;
3922    }
3923
3924    assert(emit->dynamic_branching_level == 0);
3925
3926    /* Need to terminate the whole shader:
3927     */
3928    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3929    if (!ret)
3930       goto done;
3931
3932 done:
3933    tgsi_parse_free( &parse );
3934    if (new_tokens) {
3935       tgsi_free_tokens(new_tokens);
3936    }
3937
3938    return ret;
3939 }