src/gallium/drivers/svga/svga_tgsi_insn.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_parse.h"
  29 #include "util/u_memory.h"
  30
  31 #include "svga_tgsi_emit.h"
  32 #include "svga_context.h"
  33
  34
  35 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  36 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  37
  38
  39
  40
  41 static unsigned
  42 translate_opcode(
  43    uint opcode )
  44 {
  45    switch (opcode) {
  46    case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
  47    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
  48    case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
  49    case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
  50    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
  51    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
  52    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
  53    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
  54    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
  55    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
  56    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
  57    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
  58    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
  59    case TGSI_OPCODE_NRM4:       return SVGA3DOP_NRM;
  60    case TGSI_OPCODE_SSG:        return SVGA3DOP_SGN;
  61    default:
  62       debug_printf("Unkown opcode %u\n", opcode);
  63       assert( 0 );
  64       return SVGA3DOP_LAST_INST;
  65    }
  66 }
  67
  68
  69 static unsigned translate_file( unsigned file )
  70 {
  71    switch (file) {
  72    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
  73    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
  74    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
  75    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
  76    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
  77    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
  78    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
  79    default:
  80       assert( 0 );
  81       return SVGA3DREG_TEMP;
  82    }
  83 }
  84
  85
  86
  87
  88
  89
  90 static SVGA3dShaderDestToken
  91 translate_dst_register( struct svga_shader_emitter *emit,
  92                         const struct tgsi_full_instruction *insn,
  93                         unsigned idx )
  94 {
  95    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
  96    SVGA3dShaderDestToken dest;
  97
  98    switch (reg->Register.File) {
  99    case TGSI_FILE_OUTPUT:
 100       /* Output registers encode semantic information in their name.
 101        * Need to lookup a table built at decl time:
 102        */
 103       dest = emit->output_map[reg->Register.Index];
 104       break;
 105
 106    default:
 107       dest = dst_register( translate_file( reg->Register.File ),
 108                            reg->Register.Index );
 109       break;
 110    }
 111
 112    dest.mask = reg->Register.WriteMask;
 113    assert(dest.mask);
 114
 115    if (insn->Instruction.Saturate)
 116       dest.dstMod = SVGA3DDSTMOD_SATURATE;
 117
 118    return dest;
 119 }
 120
 121
 122 static struct src_register
 123 swizzle( struct src_register src,
 124          int x,
 125          int y,
 126          int z,
 127          int w )
 128 {
 129    x = (src.base.swizzle >> (x * 2)) & 0x3;
 130    y = (src.base.swizzle >> (y * 2)) & 0x3;
 131    z = (src.base.swizzle >> (z * 2)) & 0x3;
 132    w = (src.base.swizzle >> (w * 2)) & 0x3;
 133
 134    src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
 135
 136    return src;
 137 }
 138
 139 static struct src_register
 140 scalar( struct src_register src,
 141         int comp )
 142 {
 143    return swizzle( src, comp, comp, comp, comp );
 144 }
 145
 146 static INLINE boolean
 147 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
 148 {
 149    int i;
 150
 151    for (i = 0; i < emit->num_arl_consts; ++i) {
 152       if (emit->arl_consts[i].arl_num == emit->current_arl)
 153          return TRUE;
 154    }
 155    return FALSE;
 156 }
 157
 158 static INLINE int
 159 svga_arl_adjustment( const struct svga_shader_emitter *emit )
 160 {
 161    int i;
 162
 163    for (i = 0; i < emit->num_arl_consts; ++i) {
 164       if (emit->arl_consts[i].arl_num == emit->current_arl)
 165          return emit->arl_consts[i].number;
 166    }
 167    return 0;
 168 }
 169
 170 static struct src_register
 171 translate_src_register( const struct svga_shader_emitter *emit,
 172                         const struct tgsi_full_src_register *reg )
 173 {
 174    struct src_register src;
 175
 176    switch (reg->Register.File) {
 177    case TGSI_FILE_INPUT:
 178       /* Input registers are referred to by their semantic name rather
 179        * than by index.  Use the mapping build up from the decls:
 180        */
 181       src = emit->input_map[reg->Register.Index];
 182       break;
 183
 184    case TGSI_FILE_IMMEDIATE:
 185       /* Immediates are appended after TGSI constants in the D3D
 186        * constant buffer.
 187        */
 188       src = src_register( translate_file( reg->Register.File ),
 189                           reg->Register.Index +
 190                           emit->imm_start );
 191       break;
 192
 193    default:
 194       src = src_register( translate_file( reg->Register.File ),
 195                           reg->Register.Index );
 196
 197       break;
 198    }
 199
 200    /* Indirect addressing.
 201     */
 202    if (reg->Register.Indirect) {
 203       if (emit->unit == PIPE_SHADER_FRAGMENT) {
 204          /* Pixel shaders have only loop registers for relative
 205           * addressing into inputs. Ignore the redundant address
 206           * register, the contents of aL should be in sync with it.
 207           */
 208          if (reg->Register.File == TGSI_FILE_INPUT) {
 209             src.base.relAddr = 1;
 210             src.indirect = src_token(SVGA3DREG_LOOP, 0);
 211          }
 212       }
 213       else {
 214          /* Constant buffers only.
 215           */
 216          if (reg->Register.File == TGSI_FILE_CONSTANT) {
 217             /* we shift the offset towards the minimum */
 218             if (svga_arl_needs_adjustment( emit )) {
 219                src.base.num -= svga_arl_adjustment( emit );
 220             }
 221             src.base.relAddr = 1;
 222
 223             /* Not really sure what should go in the second token:
 224              */
 225             src.indirect = src_token( SVGA3DREG_ADDR,
 226                                       reg->Indirect.Index );
 227
 228             src.indirect.swizzle = SWIZZLE_XXXX;
 229          }
 230       }
 231    }
 232
 233    src = swizzle( src,
 234                   reg->Register.SwizzleX,
 235                   reg->Register.SwizzleY,
 236                   reg->Register.SwizzleZ,
 237                   reg->Register.SwizzleW );
 238
 239    /* src.mod isn't a bitfield, unfortunately:
 240     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
 241     */
 242    if (reg->Register.Absolute) {
 243       if (reg->Register.Negate)
 244          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
 245       else
 246          src.base.srcMod = SVGA3DSRCMOD_ABS;
 247    }
 248    else {
 249       if (reg->Register.Negate)
 250          src.base.srcMod = SVGA3DSRCMOD_NEG;
 251       else
 252          src.base.srcMod = SVGA3DSRCMOD_NONE;
 253    }
 254
 255    return src;
 256 }
 257
 258
 259 /*
 260  * Get a temporary register, return -1 if none available
 261  */
 262 static INLINE SVGA3dShaderDestToken
 263 get_temp( struct svga_shader_emitter *emit )
 264 {
 265    int i = emit->nr_hw_temp + emit->internal_temp_count++;
 266
 267    return dst_register( SVGA3DREG_TEMP, i );
 268 }
 269
 270 /* Release a single temp.  Currently only effective if it was the last
 271  * allocated temp, otherwise release will be delayed until the next
 272  * call to reset_temp_regs().
 273  */
 274 static INLINE void
 275 release_temp( struct svga_shader_emitter *emit,
 276               SVGA3dShaderDestToken temp )
 277 {
 278    if (temp.num == emit->internal_temp_count - 1)
 279       emit->internal_temp_count--;
 280 }
 281
 282 static void reset_temp_regs( struct svga_shader_emitter *emit )
 283 {
 284    emit->internal_temp_count = 0;
 285 }
 286
 287
 288 static boolean submit_op0( struct svga_shader_emitter *emit,
 289                            SVGA3dShaderInstToken inst,
 290                            SVGA3dShaderDestToken dest )
 291 {
 292    return (emit_instruction( emit, inst ) &&
 293            emit_dst( emit, dest ));
 294 }
 295
 296 static boolean submit_op1( struct svga_shader_emitter *emit,
 297                            SVGA3dShaderInstToken inst,
 298                            SVGA3dShaderDestToken dest,
 299                            struct src_register src0 )
 300 {
 301    return emit_op1( emit, inst, dest, src0 );
 302 }
 303
 304
 305 /* SVGA shaders may not refer to >1 constant register in a single
 306  * instruction.  This function checks for that usage and inserts a
 307  * move to temporary if detected.
 308  *
 309  * The same applies to input registers -- at most a single input
 310  * register may be read by any instruction.
 311  */
 312 static boolean submit_op2( struct svga_shader_emitter *emit,
 313                            SVGA3dShaderInstToken inst,
 314                            SVGA3dShaderDestToken dest,
 315                            struct src_register src0,
 316                            struct src_register src1 )
 317 {
 318    SVGA3dShaderDestToken temp;
 319    SVGA3dShaderRegType type0, type1;
 320    boolean need_temp = FALSE;
 321
 322    temp.value = 0;
 323    type0 = SVGA3dShaderGetRegType( src0.base.value );
 324    type1 = SVGA3dShaderGetRegType( src1.base.value );
 325
 326    if (type0 == SVGA3DREG_CONST &&
 327        type1 == SVGA3DREG_CONST &&
 328        src0.base.num != src1.base.num)
 329       need_temp = TRUE;
 330
 331    if (type0 == SVGA3DREG_INPUT &&
 332        type1 == SVGA3DREG_INPUT &&
 333        src0.base.num != src1.base.num)
 334       need_temp = TRUE;
 335
 336    if (need_temp)
 337    {
 338       temp = get_temp( emit );
 339
 340       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
 341          return FALSE;
 342
 343       src0 = src( temp );
 344    }
 345
 346    if (!emit_op2( emit, inst, dest, src0, src1 ))
 347       return FALSE;
 348
 349    if (need_temp)
 350       release_temp( emit, temp );
 351
 352    return TRUE;
 353 }
 354
 355
 356 /* SVGA shaders may not refer to >1 constant register in a single
 357  * instruction.  This function checks for that usage and inserts a
 358  * move to temporary if detected.
 359  */
 360 static boolean submit_op3( struct svga_shader_emitter *emit,
 361                            SVGA3dShaderInstToken inst,
 362                            SVGA3dShaderDestToken dest,
 363                            struct src_register src0,
 364                            struct src_register src1,
 365                            struct src_register src2 )
 366 {
 367    SVGA3dShaderDestToken temp0;
 368    SVGA3dShaderDestToken temp1;
 369    boolean need_temp0 = FALSE;
 370    boolean need_temp1 = FALSE;
 371    SVGA3dShaderRegType type0, type1, type2;
 372
 373    temp0.value = 0;
 374    temp1.value = 0;
 375    type0 = SVGA3dShaderGetRegType( src0.base.value );
 376    type1 = SVGA3dShaderGetRegType( src1.base.value );
 377    type2 = SVGA3dShaderGetRegType( src2.base.value );
 378
 379    if (inst.op != SVGA3DOP_SINCOS) {
 380       if (type0 == SVGA3DREG_CONST &&
 381           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
 382            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 383          need_temp0 = TRUE;
 384
 385       if (type1 == SVGA3DREG_CONST &&
 386           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
 387          need_temp1 = TRUE;
 388    }
 389
 390    if (type0 == SVGA3DREG_INPUT &&
 391        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
 392         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 393       need_temp0 = TRUE;
 394
 395    if (type1 == SVGA3DREG_INPUT &&
 396        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
 397       need_temp1 = TRUE;
 398
 399    if (need_temp0)
 400    {
 401       temp0 = get_temp( emit );
 402
 403       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
 404          return FALSE;
 405
 406       src0 = src( temp0 );
 407    }
 408
 409    if (need_temp1)
 410    {
 411       temp1 = get_temp( emit );
 412
 413       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
 414          return FALSE;
 415
 416       src1 = src( temp1 );
 417    }
 418
 419    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
 420       return FALSE;
 421
 422    if (need_temp1)
 423       release_temp( emit, temp1 );
 424    if (need_temp0)
 425       release_temp( emit, temp0 );
 426    return TRUE;
 427 }
 428
 429
 430
 431
 432 /* SVGA shaders may not refer to >1 constant register in a single
 433  * instruction.  This function checks for that usage and inserts a
 434  * move to temporary if detected.
 435  */
 436 static boolean submit_op4( struct svga_shader_emitter *emit,
 437                            SVGA3dShaderInstToken inst,
 438                            SVGA3dShaderDestToken dest,
 439                            struct src_register src0,
 440                            struct src_register src1,
 441                            struct src_register src2,
 442                            struct src_register src3)
 443 {
 444    SVGA3dShaderDestToken temp0;
 445    SVGA3dShaderDestToken temp3;
 446    boolean need_temp0 = FALSE;
 447    boolean need_temp3 = FALSE;
 448    SVGA3dShaderRegType type0, type1, type2, type3;
 449
 450    temp0.value = 0;
 451    temp3.value = 0;
 452    type0 = SVGA3dShaderGetRegType( src0.base.value );
 453    type1 = SVGA3dShaderGetRegType( src1.base.value );
 454    type2 = SVGA3dShaderGetRegType( src2.base.value );
 455    type3 = SVGA3dShaderGetRegType( src2.base.value );
 456
 457    /* Make life a little easier - this is only used by the TXD
 458     * instruction which is guaranteed not to have a constant/input reg
 459     * in one slot at least:
 460     */
 461    assert(type1 == SVGA3DREG_SAMPLER);
 462
 463    if (type0 == SVGA3DREG_CONST &&
 464        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
 465         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
 466       need_temp0 = TRUE;
 467
 468    if (type3 == SVGA3DREG_CONST &&
 469        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
 470       need_temp3 = TRUE;
 471
 472    if (type0 == SVGA3DREG_INPUT &&
 473        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
 474         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
 475       need_temp0 = TRUE;
 476
 477    if (type3 == SVGA3DREG_INPUT &&
 478        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
 479       need_temp3 = TRUE;
 480
 481    if (need_temp0)
 482    {
 483       temp0 = get_temp( emit );
 484
 485       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
 486          return FALSE;
 487
 488       src0 = src( temp0 );
 489    }
 490
 491    if (need_temp3)
 492    {
 493       temp3 = get_temp( emit );
 494
 495       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
 496          return FALSE;
 497
 498       src3 = src( temp3 );
 499    }
 500
 501    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
 502       return FALSE;
 503
 504    if (need_temp3)
 505       release_temp( emit, temp3 );
 506    if (need_temp0)
 507       release_temp( emit, temp0 );
 508    return TRUE;
 509 }
 510
 511
 512 static boolean emit_def_const( struct svga_shader_emitter *emit,
 513                                SVGA3dShaderConstType type,
 514                                unsigned idx,
 515                                float a,
 516                                float b,
 517                                float c,
 518                                float d )
 519 {
 520    SVGA3DOpDefArgs def;
 521    SVGA3dShaderInstToken opcode;
 522
 523    switch (type) {
 524    case SVGA3D_CONST_TYPE_FLOAT:
 525       opcode = inst_token( SVGA3DOP_DEF );
 526       def.dst = dst_register( SVGA3DREG_CONST, idx );
 527       def.constValues[0] = a;
 528       def.constValues[1] = b;
 529       def.constValues[2] = c;
 530       def.constValues[3] = d;
 531       break;
 532    case SVGA3D_CONST_TYPE_INT:
 533       opcode = inst_token( SVGA3DOP_DEFI );
 534       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
 535       def.constIValues[0] = (int)a;
 536       def.constIValues[1] = (int)b;
 537       def.constIValues[2] = (int)c;
 538       def.constIValues[3] = (int)d;
 539       break;
 540    default:
 541       assert(0);
 542       opcode = inst_token( SVGA3DOP_NOP );
 543       break;
 544    }
 545
 546    if (!emit_instruction(emit, opcode) ||
 547        !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
 548       return FALSE;
 549
 550    return TRUE;
 551 }
 552
 553 static INLINE boolean
 554 create_zero_immediate( struct svga_shader_emitter *emit )
 555 {
 556    unsigned idx = emit->nr_hw_float_const++;
 557
 558    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
 559                         idx, 0, 0, 0, 1 ))
 560       return FALSE;
 561
 562    emit->zero_immediate_idx = idx;
 563    emit->created_zero_immediate = TRUE;
 564
 565    return TRUE;
 566 }
 567
 568 static INLINE boolean
 569 create_loop_const( struct svga_shader_emitter *emit )
 570 {
 571    unsigned idx = emit->nr_hw_int_const++;
 572
 573    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
 574                         255, /* iteration count */
 575                         0, /* initial value */
 576                         1, /* step size */
 577                         0 /* not used, must be 0 */))
 578       return FALSE;
 579
 580    emit->loop_const_idx = idx;
 581    emit->created_loop_const = TRUE;
 582
 583    return TRUE;
 584 }
 585
 586 static INLINE boolean
 587 create_sincos_consts( struct svga_shader_emitter *emit )
 588 {
 589    unsigned idx = emit->nr_hw_float_const++;
 590
 591    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 592                         -1.5500992e-006f,
 593                         -2.1701389e-005f,
 594                         0.0026041667f,
 595                         0.00026041668f ))
 596       return FALSE;
 597
 598    emit->sincos_consts_idx = idx;
 599    idx = emit->nr_hw_float_const++;
 600
 601    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 602                         -0.020833334f,
 603                         -0.12500000f,
 604                         1.0f,
 605                         0.50000000f ))
 606       return FALSE;
 607
 608    emit->created_sincos_consts = TRUE;
 609
 610    return TRUE;
 611 }
 612
 613 static INLINE boolean
 614 create_arl_consts( struct svga_shader_emitter *emit )
 615 {
 616    int i;
 617
 618    for (i = 0; i < emit->num_arl_consts; i += 4) {
 619       int j;
 620       unsigned idx = emit->nr_hw_float_const++;
 621       float vals[4];
 622       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
 623          vals[j] = emit->arl_consts[i + j].number;
 624          emit->arl_consts[i + j].idx = idx;
 625          switch (j) {
 626          case 0:
 627             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
 628             break;
 629          case 1:
 630             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
 631             break;
 632          case 2:
 633             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
 634             break;
 635          case 3:
 636             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
 637             break;
 638          }
 639       }
 640       while (j < 4)
 641          vals[j++] = 0;
 642
 643       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
 644                            vals[0], vals[1],
 645                            vals[2], vals[3]))
 646          return FALSE;
 647    }
 648
 649    return TRUE;
 650 }
 651
 652 static INLINE struct src_register
 653 get_vface( struct svga_shader_emitter *emit )
 654 {
 655    assert(emit->emitted_vface);
 656    return src_register(SVGA3DREG_MISCTYPE,
 657                        SVGA3DMISCREG_FACE);
 658 }
 659
 660 /* returns {0, 0, 0, 1} immediate */
 661 static INLINE struct src_register
 662 get_zero_immediate( struct svga_shader_emitter *emit )
 663 {
 664    assert(emit->created_zero_immediate);
 665    assert(emit->zero_immediate_idx >= 0);
 666    return src_register( SVGA3DREG_CONST,
 667                         emit->zero_immediate_idx );
 668 }
 669
 670 /* returns the loop const */
 671 static INLINE struct src_register
 672 get_loop_const( struct svga_shader_emitter *emit )
 673 {
 674    assert(emit->created_loop_const);
 675    assert(emit->loop_const_idx >= 0);
 676    return src_register( SVGA3DREG_CONSTINT,
 677                         emit->loop_const_idx );
 678 }
 679
 680 /* returns a sincos const */
 681 static INLINE struct src_register
 682 get_sincos_const( struct svga_shader_emitter *emit,
 683                   unsigned index )
 684 {
 685    assert(emit->created_sincos_consts);
 686    assert(emit->sincos_consts_idx >= 0);
 687    assert(index == 0 || index == 1);
 688    return src_register( SVGA3DREG_CONST,
 689                         emit->sincos_consts_idx + index );
 690 }
 691
 692 static INLINE struct src_register
 693 get_fake_arl_const( struct svga_shader_emitter *emit )
 694 {
 695    struct src_register reg;
 696    int idx = 0, swizzle = 0, i;
 697
 698    for (i = 0; i < emit->num_arl_consts; ++ i) {
 699       if (emit->arl_consts[i].arl_num == emit->current_arl) {
 700          idx = emit->arl_consts[i].idx;
 701          swizzle = emit->arl_consts[i].swizzle;
 702       }
 703    }
 704
 705    reg = src_register( SVGA3DREG_CONST, idx );
 706    return scalar(reg, swizzle);
 707 }
 708
 709 static INLINE struct src_register
 710 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
 711 {
 712    int idx;
 713    struct src_register reg;
 714
 715    /* the width/height indexes start right after constants */
 716    idx = emit->key.fkey.tex[sampler_num].width_height_idx +
 717          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
 718
 719    reg = src_register( SVGA3DREG_CONST, idx );
 720    return reg;
 721 }
 722
 723 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
 724                              const struct tgsi_full_instruction *insn)
 725 {
 726    const struct src_register src0 = translate_src_register(
 727       emit, &insn->Src[0] );
 728    struct src_register src1 = get_fake_arl_const( emit );
 729    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 730    SVGA3dShaderDestToken tmp = get_temp( emit );
 731
 732    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
 733       return FALSE;
 734
 735    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
 736                     src1))
 737       return FALSE;
 738
 739    /* replicate the original swizzle */
 740    src1 = src(tmp);
 741    src1.base.swizzle = src0.base.swizzle;
 742
 743    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
 744                       dst, src1 );
 745 }
 746
 747 static boolean emit_if(struct svga_shader_emitter *emit,
 748                        const struct tgsi_full_instruction *insn)
 749 {
 750    const struct src_register src = translate_src_register(
 751       emit, &insn->Src[0] );
 752    struct src_register zero = get_zero_immediate( emit );
 753    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
 754
 755    if_token.control = SVGA3DOPCOMPC_NE;
 756    zero = scalar(zero, TGSI_SWIZZLE_X);
 757
 758    emit->dynamic_branching_level++;
 759
 760    return (emit_instruction( emit, if_token ) &&
 761            emit_src( emit, src ) &&
 762            emit_src( emit, zero ) );
 763 }
 764
 765 static boolean emit_endif(struct svga_shader_emitter *emit,
 766                        const struct tgsi_full_instruction *insn)
 767 {
 768    emit->dynamic_branching_level--;
 769
 770    return (emit_instruction( emit,
 771                              inst_token( SVGA3DOP_ENDIF )));
 772 }
 773
 774 static boolean emit_else(struct svga_shader_emitter *emit,
 775                          const struct tgsi_full_instruction *insn)
 776 {
 777    return (emit_instruction( emit,
 778                              inst_token( SVGA3DOP_ELSE )));
 779 }
 780
 781 /* Translate the following TGSI FLR instruction.
 782  *    FLR  DST, SRC
 783  * To the following SVGA3D instruction sequence.
 784  *    FRC  TMP, SRC
 785  *    SUB  DST, SRC, TMP
 786  */
 787 static boolean emit_floor(struct svga_shader_emitter *emit,
 788                           const struct tgsi_full_instruction *insn )
 789 {
 790    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 791    const struct src_register src0 = translate_src_register(
 792       emit, &insn->Src[0] );
 793    SVGA3dShaderDestToken temp = get_temp( emit );
 794
 795    /* FRC  TMP, SRC */
 796    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
 797       return FALSE;
 798
 799    /* SUB  DST, SRC, TMP */
 800    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
 801                     negate( src( temp ) ) ))
 802       return FALSE;
 803
 804    return TRUE;
 805 }
 806
 807
 808 /* Translate the following TGSI CMP instruction.
 809  *    CMP  DST, SRC0, SRC1, SRC2
 810  * To the following SVGA3D instruction sequence.
 811  *    CMP  DST, SRC0, SRC2, SRC1
 812  */
 813 static boolean emit_cmp(struct svga_shader_emitter *emit,
 814                           const struct tgsi_full_instruction *insn )
 815 {
 816    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 817    const struct src_register src0 = translate_src_register(
 818       emit, &insn->Src[0] );
 819    const struct src_register src1 = translate_src_register(
 820       emit, &insn->Src[1] );
 821    const struct src_register src2 = translate_src_register(
 822       emit, &insn->Src[2] );
 823
 824    if (emit->unit == PIPE_SHADER_VERTEX) {
 825       SVGA3dShaderDestToken temp = get_temp(emit);
 826       struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
 827
 828       /* Since vertex shaders don't support the CMP instruction,
 829        * simulate it with SLT and LRP instructions.
 830        *    SLT  TMP, SRC0, 0.0
 831        *    LRP  DST, TMP, SRC1, SRC2
 832        */
 833       if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
 834          return FALSE;
 835       return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2);
 836    }
 837
 838    /* CMP  DST, SRC0, SRC2, SRC1 */
 839    return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
 840 }
 841
 842
 843
 844 /* Translate the following TGSI DIV instruction.
 845  *    DIV  DST.xy, SRC0, SRC1
 846  * To the following SVGA3D instruction sequence.
 847  *    RCP  TMP.x, SRC1.xxxx
 848  *    RCP  TMP.y, SRC1.yyyy
 849  *    MUL  DST.xy, SRC0, TMP
 850  */
 851 static boolean emit_div(struct svga_shader_emitter *emit,
 852                         const struct tgsi_full_instruction *insn )
 853 {
 854    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 855    const struct src_register src0 = translate_src_register(
 856       emit, &insn->Src[0] );
 857    const struct src_register src1 = translate_src_register(
 858       emit, &insn->Src[1] );
 859    SVGA3dShaderDestToken temp = get_temp( emit );
 860    int i;
 861
 862    /* For each enabled element, perform a RCP instruction.  Note that
 863     * RCP is scalar in SVGA3D:
 864     */
 865    for (i = 0; i < 4; i++) {
 866       unsigned channel = 1 << i;
 867       if (dst.mask & channel) {
 868          /* RCP  TMP.?, SRC1.???? */
 869          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
 870                           writemask(temp, channel),
 871                           scalar(src1, i) ))
 872             return FALSE;
 873       }
 874    }
 875
 876    /* Then multiply them out with a single mul:
 877     *
 878     * MUL  DST, SRC0, TMP
 879     */
 880    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
 881                     src( temp ) ))
 882       return FALSE;
 883
 884    return TRUE;
 885 }
 886
 887 /* Translate the following TGSI DP2 instruction.
 888  *    DP2  DST, SRC1, SRC2
 889  * To the following SVGA3D instruction sequence.
 890  *    MUL  TMP, SRC1, SRC2
 891  *    ADD  DST, TMP.xxxx, TMP.yyyy
 892  */
 893 static boolean emit_dp2(struct svga_shader_emitter *emit,
 894                         const struct tgsi_full_instruction *insn )
 895 {
 896    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 897    const struct src_register src0 = translate_src_register(
 898       emit, &insn->Src[0] );
 899    const struct src_register src1 = translate_src_register(
 900       emit, &insn->Src[1] );
 901    SVGA3dShaderDestToken temp = get_temp( emit );
 902    struct src_register temp_src0, temp_src1;
 903
 904    /* MUL  TMP, SRC1, SRC2 */
 905    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
 906       return FALSE;
 907
 908    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
 909    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
 910
 911    /* ADD  DST, TMP.xxxx, TMP.yyyy */
 912    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
 913                     temp_src0, temp_src1 ))
 914       return FALSE;
 915
 916    return TRUE;
 917 }
 918
 919
 920 /* Translate the following TGSI DPH instruction.
 921  *    DPH  DST, SRC1, SRC2
 922  * To the following SVGA3D instruction sequence.
 923  *    DP3  TMP, SRC1, SRC2
 924  *    ADD  DST, TMP, SRC2.wwww
 925  */
 926 static boolean emit_dph(struct svga_shader_emitter *emit,
 927                         const struct tgsi_full_instruction *insn )
 928 {
 929    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 930    const struct src_register src0 = translate_src_register(
 931       emit, &insn->Src[0] );
 932    struct src_register src1 = translate_src_register(
 933       emit, &insn->Src[1] );
 934    SVGA3dShaderDestToken temp = get_temp( emit );
 935
 936    /* DP3  TMP, SRC1, SRC2 */
 937    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
 938       return FALSE;
 939
 940    src1 = scalar(src1, TGSI_SWIZZLE_W);
 941
 942    /* ADD  DST, TMP, SRC2.wwww */
 943    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
 944                     src( temp ), src1 ))
 945       return FALSE;
 946
 947    return TRUE;
 948 }
 949
 950 /* Translate the following TGSI DST instruction.
 951  *    NRM  DST, SRC
 952  * To the following SVGA3D instruction sequence.
 953  *    DP3  TMP, SRC, SRC
 954  *    RSQ  TMP, TMP
 955  *    MUL  DST, SRC, TMP
 956  */
 957 static boolean emit_nrm(struct svga_shader_emitter *emit,
 958                         const struct tgsi_full_instruction *insn )
 959 {
 960    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
 961    const struct src_register src0 = translate_src_register(
 962       emit, &insn->Src[0] );
 963    SVGA3dShaderDestToken temp = get_temp( emit );
 964
 965    /* DP3  TMP, SRC, SRC */
 966    if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
 967       return FALSE;
 968
 969    /* RSQ  TMP, TMP */
 970    if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
 971       return FALSE;
 972
 973    /* MUL  DST, SRC, TMP */
 974    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
 975                     src0, src( temp )))
 976       return FALSE;
 977
 978    return TRUE;
 979
 980 }
 981
 982 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
 983                               SVGA3dShaderDestToken dst,
 984                               struct src_register src0)
 985 {
 986    src0 = scalar(src0, TGSI_SWIZZLE_X);
 987
 988    if (emit->use_sm30) {
 989       return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
 990                          dst, src0 );
 991    } else {
 992       struct src_register const1 = get_sincos_const( emit, 0 );
 993       struct src_register const2 = get_sincos_const( emit, 1 );
 994
 995       return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
 996                          dst, src0, const1, const2 );
 997    }
 998 }
 999
1000 static boolean emit_sincos(struct svga_shader_emitter *emit,
1001                            const struct tgsi_full_instruction *insn)
1002 {
1003    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1004    struct src_register src0 = translate_src_register(
1005       emit, &insn->Src[0] );
1006    SVGA3dShaderDestToken temp = get_temp( emit );
1007
1008    /* SCS TMP SRC */
1009    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1010       return FALSE;
1011
1012    /* MOV DST TMP */
1013    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1014       return FALSE;
1015
1016    return TRUE;
1017 }
1018
1019 /*
1020  * SCS TMP SRC
1021  * MOV DST TMP.yyyy
1022  */
1023 static boolean emit_sin(struct svga_shader_emitter *emit,
1024                         const struct tgsi_full_instruction *insn )
1025 {
1026    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1027    struct src_register src0 = translate_src_register(
1028       emit, &insn->Src[0] );
1029    SVGA3dShaderDestToken temp = get_temp( emit );
1030
1031    /* SCS TMP SRC */
1032    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1033       return FALSE;
1034
1035    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1036
1037    /* MOV DST TMP.yyyy */
1038    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1039       return FALSE;
1040
1041    return TRUE;
1042 }
1043
1044 /*
1045  * SCS TMP SRC
1046  * MOV DST TMP.xxxx
1047  */
1048 static boolean emit_cos(struct svga_shader_emitter *emit,
1049                         const struct tgsi_full_instruction *insn )
1050 {
1051    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1052    struct src_register src0 = translate_src_register(
1053       emit, &insn->Src[0] );
1054    SVGA3dShaderDestToken temp = get_temp( emit );
1055
1056    /* SCS TMP SRC */
1057    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1058       return FALSE;
1059
1060    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1061
1062    /* MOV DST TMP.xxxx */
1063    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1064       return FALSE;
1065
1066    return TRUE;
1067 }
1068
1069
1070 /*
1071  * ADD DST SRC0, negate(SRC0)
1072  */
1073 static boolean emit_sub(struct svga_shader_emitter *emit,
1074                         const struct tgsi_full_instruction *insn)
1075 {
1076    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1077    struct src_register src0 = translate_src_register(
1078       emit, &insn->Src[0] );
1079    struct src_register src1 = translate_src_register(
1080       emit, &insn->Src[1] );
1081
1082    src1 = negate(src1);
1083
1084    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1085                     src0, src1 ))
1086       return FALSE;
1087
1088    return TRUE;
1089 }
1090
1091
1092 static boolean emit_kil(struct svga_shader_emitter *emit,
1093                         const struct tgsi_full_instruction *insn )
1094 {
1095    SVGA3dShaderInstToken inst;
1096    const struct tgsi_full_src_register *reg = &insn->Src[0];
1097    struct src_register src0;
1098
1099    inst = inst_token( SVGA3DOP_TEXKILL );
1100    src0 = translate_src_register( emit, reg );
1101
1102    if (reg->Register.Absolute ||
1103        reg->Register.Negate ||
1104        reg->Register.Indirect ||
1105        reg->Register.SwizzleX != 0 ||
1106        reg->Register.SwizzleY != 1 ||
1107        reg->Register.SwizzleZ != 2 ||
1108        reg->Register.File != TGSI_FILE_TEMPORARY)
1109    {
1110       SVGA3dShaderDestToken temp = get_temp( emit );
1111
1112       submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
1113       src0 = src( temp );
1114    }
1115
1116    return submit_op0( emit, inst, dst(src0) );
1117 }
1118
1119
1120 /* mesa state tracker always emits kilp as an unconditional
1121  * kil */
1122 static boolean emit_kilp(struct svga_shader_emitter *emit,
1123                         const struct tgsi_full_instruction *insn )
1124 {
1125    SVGA3dShaderInstToken inst;
1126    SVGA3dShaderDestToken temp;
1127    struct src_register one = scalar( get_zero_immediate( emit ),
1128                                      TGSI_SWIZZLE_W );
1129
1130    inst = inst_token( SVGA3DOP_TEXKILL );
1131
1132    /* texkill doesn't allow negation on the operand so lets move
1133     * negation of {1} to a temp register */
1134    temp = get_temp( emit );
1135    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1136                     negate( one ) ))
1137       return FALSE;
1138
1139    return submit_op0( emit, inst, temp );
1140 }
1141
1142 /* Implement conditionals by initializing destination reg to 'fail',
1143  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1144  * based on predicate reg.
1145  *
1146  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1147  * MOV dst, fail
1148  * MOV dst, pass, p0
1149  */
1150 static boolean
1151 emit_conditional(struct svga_shader_emitter *emit,
1152                  unsigned compare_func,
1153                  SVGA3dShaderDestToken dst,
1154                  struct src_register src0,
1155                  struct src_register src1,
1156                  struct src_register pass,
1157                  struct src_register fail)
1158 {
1159    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1160    SVGA3dShaderInstToken setp_token, mov_token;
1161    setp_token = inst_token( SVGA3DOP_SETP );
1162
1163    switch (compare_func) {
1164    case PIPE_FUNC_NEVER:
1165       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1166                          dst, fail );
1167       break;
1168    case PIPE_FUNC_LESS:
1169       setp_token.control = SVGA3DOPCOMP_LT;
1170       break;
1171    case PIPE_FUNC_EQUAL:
1172       setp_token.control = SVGA3DOPCOMP_EQ;
1173       break;
1174    case PIPE_FUNC_LEQUAL:
1175       setp_token.control = SVGA3DOPCOMP_LE;
1176       break;
1177    case PIPE_FUNC_GREATER:
1178       setp_token.control = SVGA3DOPCOMP_GT;
1179       break;
1180    case PIPE_FUNC_NOTEQUAL:
1181       setp_token.control = SVGA3DOPCOMPC_NE;
1182       break;
1183    case PIPE_FUNC_GEQUAL:
1184       setp_token.control = SVGA3DOPCOMP_GE;
1185       break;
1186    case PIPE_FUNC_ALWAYS:
1187       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1188                          dst, pass );
1189       break;
1190    }
1191
1192    /* SETP src0, COMPOP, src1 */
1193    if (!submit_op2( emit, setp_token, pred_reg,
1194                     src0, src1 ))
1195       return FALSE;
1196
1197    mov_token = inst_token( SVGA3DOP_MOV );
1198
1199    /* MOV dst, fail */
1200    if (!submit_op1( emit, mov_token, dst,
1201                     fail ))
1202       return FALSE;
1203
1204    /* MOV dst, pass (predicated)
1205     *
1206     * Note that the predicate reg (and possible modifiers) is passed
1207     * as the first source argument.
1208     */
1209    mov_token.predicated = 1;
1210    if (!submit_op2( emit, mov_token, dst,
1211                     src( pred_reg ), pass ))
1212       return FALSE;
1213
1214    return TRUE;
1215 }
1216
1217
1218 static boolean
1219 emit_select(struct svga_shader_emitter *emit,
1220             unsigned compare_func,
1221             SVGA3dShaderDestToken dst,
1222             struct src_register src0,
1223             struct src_register src1 )
1224 {
1225    /* There are some SVGA instructions which implement some selects
1226     * directly, but they are only available in the vertex shader.
1227     */
1228    if (emit->unit == PIPE_SHADER_VERTEX) {
1229       switch (compare_func) {
1230       case PIPE_FUNC_GEQUAL:
1231          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1232       case PIPE_FUNC_LEQUAL:
1233          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1234       case PIPE_FUNC_GREATER:
1235          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1236       case PIPE_FUNC_LESS:
1237          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1238       default:
1239          break;
1240       }
1241    }
1242
1243
1244    /* Otherwise, need to use the setp approach:
1245     */
1246    {
1247       struct src_register one, zero;
1248       /* zero immediate is 0,0,0,1 */
1249       zero = get_zero_immediate( emit );
1250       one  = scalar( zero, TGSI_SWIZZLE_W );
1251       zero = scalar( zero, TGSI_SWIZZLE_X );
1252
1253       return emit_conditional(
1254          emit,
1255          compare_func,
1256          dst,
1257          src0,
1258          src1,
1259          one, zero);
1260    }
1261 }
1262
1263
1264 static boolean emit_select_op(struct svga_shader_emitter *emit,
1265                               unsigned compare,
1266                               const struct tgsi_full_instruction *insn)
1267 {
1268    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1269    struct src_register src0 = translate_src_register(
1270       emit, &insn->Src[0] );
1271    struct src_register src1 = translate_src_register(
1272       emit, &insn->Src[1] );
1273
1274    return emit_select( emit, compare, dst, src0, src1 );
1275 }
1276
1277
1278 /* Translate texture instructions to SVGA3D representation.
1279  */
1280 static boolean emit_tex2(struct svga_shader_emitter *emit,
1281                          const struct tgsi_full_instruction *insn,
1282                          SVGA3dShaderDestToken dst )
1283 {
1284    SVGA3dShaderInstToken inst;
1285    struct src_register texcoord;
1286    struct src_register sampler;
1287    SVGA3dShaderDestToken tmp;
1288
1289    inst.value = 0;
1290
1291    switch (insn->Instruction.Opcode) {
1292    case TGSI_OPCODE_TEX:
1293       inst.op = SVGA3DOP_TEX;
1294       break;
1295    case TGSI_OPCODE_TXP:
1296       inst.op = SVGA3DOP_TEX;
1297       inst.control = SVGA3DOPCONT_PROJECT;
1298       break;
1299    case TGSI_OPCODE_TXB:
1300       inst.op = SVGA3DOP_TEX;
1301       inst.control = SVGA3DOPCONT_BIAS;
1302       break;
1303    case TGSI_OPCODE_TXL:
1304       inst.op = SVGA3DOP_TEXLDL;
1305       break;
1306    default:
1307       assert(0);
1308       return FALSE;
1309    }
1310
1311    texcoord = translate_src_register( emit, &insn->Src[0] );
1312    sampler = translate_src_register( emit, &insn->Src[1] );
1313
1314    if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1315        emit->dynamic_branching_level > 0)
1316       tmp = get_temp( emit );
1317
1318    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1319     * zero in that case.
1320     */
1321    if (emit->dynamic_branching_level > 0 &&
1322        inst.op == SVGA3DOP_TEX &&
1323        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1324       struct src_register zero = get_zero_immediate( emit );
1325
1326       /* MOV  tmp, texcoord */
1327       if (!submit_op1( emit,
1328                        inst_token( SVGA3DOP_MOV ),
1329                        tmp,
1330                        texcoord ))
1331          return FALSE;
1332
1333       /* MOV  tmp.w, zero */
1334       if (!submit_op1( emit,
1335                        inst_token( SVGA3DOP_MOV ),
1336                        writemask( tmp, TGSI_WRITEMASK_W ),
1337                        scalar( zero, TGSI_SWIZZLE_X )))
1338          return FALSE;
1339
1340       texcoord = src( tmp );
1341       inst.op = SVGA3DOP_TEXLDL;
1342    }
1343
1344    /* Explicit normalization of texcoords:
1345     */
1346    if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1347       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1348
1349       /* MUL  tmp, SRC0, WH */
1350       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1351                        tmp, texcoord, wh ))
1352          return FALSE;
1353
1354       texcoord = src( tmp );
1355    }
1356
1357    return submit_op2( emit, inst, dst, texcoord, sampler );
1358 }
1359
1360
1361
1362
1363 /* Translate texture instructions to SVGA3D representation.
1364  */
1365 static boolean emit_tex4(struct svga_shader_emitter *emit,
1366                          const struct tgsi_full_instruction *insn,
1367                          SVGA3dShaderDestToken dst )
1368 {
1369    SVGA3dShaderInstToken inst;
1370    struct src_register texcoord;
1371    struct src_register ddx;
1372    struct src_register ddy;
1373    struct src_register sampler;
1374
1375    texcoord = translate_src_register( emit, &insn->Src[0] );
1376    ddx      = translate_src_register( emit, &insn->Src[1] );
1377    ddy      = translate_src_register( emit, &insn->Src[2] );
1378    sampler  = translate_src_register( emit, &insn->Src[3] );
1379
1380    inst.value = 0;
1381
1382    switch (insn->Instruction.Opcode) {
1383    case TGSI_OPCODE_TXD:
1384       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1385       break;
1386    default:
1387       assert(0);
1388       return FALSE;
1389    }
1390
1391    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1392 }
1393
1394
1395 static boolean emit_tex(struct svga_shader_emitter *emit,
1396                         const struct tgsi_full_instruction *insn )
1397 {
1398    SVGA3dShaderDestToken dst =
1399       translate_dst_register( emit, insn, 0 );
1400    struct src_register src0 =
1401       translate_src_register( emit, &insn->Src[0] );
1402    struct src_register src1 =
1403       translate_src_register( emit, &insn->Src[1] );
1404
1405    SVGA3dShaderDestToken tex_result;
1406
1407    /* check for shadow samplers */
1408    boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
1409                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
1410
1411
1412    /* If doing compare processing, need to put this value into a
1413     * temporary so it can be used as a source later on.
1414     */
1415    if (compare ||
1416        (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
1417       tex_result = get_temp( emit );
1418    }
1419    else {
1420       tex_result = dst;
1421    }
1422
1423    switch(insn->Instruction.Opcode) {
1424    case TGSI_OPCODE_TEX:
1425    case TGSI_OPCODE_TXB:
1426    case TGSI_OPCODE_TXP:
1427    case TGSI_OPCODE_TXL:
1428       if (!emit_tex2( emit, insn, tex_result ))
1429          return FALSE;
1430       break;
1431    case TGSI_OPCODE_TXD:
1432       if (!emit_tex4( emit, insn, tex_result ))
1433          return FALSE;
1434       break;
1435    default:
1436       assert(0);
1437    }
1438
1439
1440    if (compare) {
1441       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1442          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1443          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1444
1445          /* Divide texcoord R by Q */
1446          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1447                           writemask(src0_zdivw, TGSI_WRITEMASK_X),
1448                           scalar(src0, TGSI_SWIZZLE_W) ))
1449             return FALSE;
1450
1451          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1452                           writemask(src0_zdivw, TGSI_WRITEMASK_X),
1453                           scalar(src0, TGSI_SWIZZLE_Z),
1454                           scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1455             return FALSE;
1456
1457          if (!emit_select(
1458                 emit,
1459                 emit->key.fkey.tex[src1.base.num].compare_func,
1460                 writemask( dst, TGSI_WRITEMASK_XYZ ),
1461                 scalar(src(src0_zdivw), TGSI_SWIZZLE_X),
1462                 tex_src_x))
1463             return FALSE;
1464       }
1465
1466       if (dst.mask & TGSI_WRITEMASK_W) {
1467          struct src_register one =
1468             scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1469
1470         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1471                          writemask( dst, TGSI_WRITEMASK_W ),
1472                          one ))
1473            return FALSE;
1474       }
1475
1476       return TRUE;
1477    }
1478    else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW)
1479    {
1480       if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1481          return FALSE;
1482    }
1483
1484    return TRUE;
1485 }
1486
1487 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1488                               const struct tgsi_full_instruction *insn )
1489 {
1490    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1491    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1492    struct src_register const_int = get_loop_const( emit );
1493
1494    emit->dynamic_branching_level++;
1495
1496    return (emit_instruction( emit, inst ) &&
1497            emit_src( emit, loop_reg ) &&
1498            emit_src( emit, const_int ) );
1499 }
1500
1501 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1502                               const struct tgsi_full_instruction *insn )
1503 {
1504    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1505
1506    emit->dynamic_branching_level--;
1507
1508    return emit_instruction( emit, inst );
1509 }
1510
1511 static boolean emit_brk( struct svga_shader_emitter *emit,
1512                          const struct tgsi_full_instruction *insn )
1513 {
1514    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1515    return emit_instruction( emit, inst );
1516 }
1517
1518 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1519                                 unsigned opcode,
1520                                 const struct tgsi_full_instruction *insn )
1521 {
1522    SVGA3dShaderInstToken inst;
1523    SVGA3dShaderDestToken dst;
1524    struct src_register src;
1525
1526    inst = inst_token( opcode );
1527    dst = translate_dst_register( emit, insn, 0 );
1528    src = translate_src_register( emit, &insn->Src[0] );
1529    src = scalar( src, TGSI_SWIZZLE_X );
1530
1531    return submit_op1( emit, inst, dst, src );
1532 }
1533
1534
1535 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1536                                        unsigned opcode,
1537                                        const struct tgsi_full_instruction *insn )
1538 {
1539    const struct tgsi_full_src_register *src = insn->Src;
1540    SVGA3dShaderInstToken inst;
1541    SVGA3dShaderDestToken dst;
1542
1543    inst = inst_token( opcode );
1544    dst = translate_dst_register( emit, insn, 0 );
1545
1546    switch (insn->Instruction.NumSrcRegs) {
1547    case 0:
1548       return submit_op0( emit, inst, dst );
1549    case 1:
1550       return submit_op1( emit, inst, dst,
1551                          translate_src_register( emit, &src[0] ));
1552    case 2:
1553       return submit_op2( emit, inst, dst,
1554                          translate_src_register( emit, &src[0] ),
1555                          translate_src_register( emit, &src[1] ) );
1556    case 3:
1557       return submit_op3( emit, inst, dst,
1558                          translate_src_register( emit, &src[0] ),
1559                          translate_src_register( emit, &src[1] ),
1560                          translate_src_register( emit, &src[2] ) );
1561    default:
1562       assert(0);
1563       return FALSE;
1564    }
1565 }
1566
1567
1568 static boolean emit_deriv(struct svga_shader_emitter *emit,
1569                           const struct tgsi_full_instruction *insn )
1570 {
1571    if (emit->dynamic_branching_level > 0 &&
1572        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1573    {
1574       struct src_register zero = get_zero_immediate( emit );
1575       SVGA3dShaderDestToken dst =
1576          translate_dst_register( emit, insn, 0 );
1577
1578       /* Deriv opcodes not valid inside dynamic branching, workaround
1579        * by zeroing out the destination.
1580        */
1581       if (!submit_op1(emit,
1582                       inst_token( SVGA3DOP_MOV ),
1583                       dst,
1584                       scalar(zero, TGSI_SWIZZLE_X)))
1585          return FALSE;
1586
1587       return TRUE;
1588    }
1589    else {
1590       unsigned opcode;
1591
1592       switch (insn->Instruction.Opcode) {
1593       case TGSI_OPCODE_DDX:
1594          opcode = SVGA3DOP_DSX;
1595          break;
1596       case TGSI_OPCODE_DDY:
1597          opcode = SVGA3DOP_DSY;
1598          break;
1599       default:
1600          return FALSE;
1601       }
1602
1603       return emit_simple_instruction( emit, opcode, insn );
1604    }
1605 }
1606
1607 static boolean emit_arl(struct svga_shader_emitter *emit,
1608                         const struct tgsi_full_instruction *insn)
1609 {
1610    ++emit->current_arl;
1611    if (emit->unit == PIPE_SHADER_FRAGMENT) {
1612       /* MOVA not present in pixel shader instruction set.
1613        * Ignore this instruction altogether since it is
1614        * only used for loop counters -- and for that
1615        * we reference aL directly.
1616        */
1617       return TRUE;
1618    }
1619    if (svga_arl_needs_adjustment( emit )) {
1620       return emit_fake_arl( emit, insn );
1621    } else {
1622       /* no need to adjust, just emit straight arl */
1623       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1624    }
1625 }
1626
1627 static boolean alias_src_dst( struct src_register src,
1628                               SVGA3dShaderDestToken dst )
1629 {
1630    if (src.base.num != dst.num)
1631       return FALSE;
1632
1633    if (SVGA3dShaderGetRegType(dst.value) !=
1634        SVGA3dShaderGetRegType(src.base.value))
1635       return FALSE;
1636
1637    return TRUE;
1638 }
1639
1640 static boolean emit_pow(struct svga_shader_emitter *emit,
1641                         const struct tgsi_full_instruction *insn)
1642 {
1643    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1644    struct src_register src0 = translate_src_register(
1645       emit, &insn->Src[0] );
1646    struct src_register src1 = translate_src_register(
1647       emit, &insn->Src[1] );
1648    boolean need_tmp = FALSE;
1649
1650    /* POW can only output to a temporary */
1651    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1652       need_tmp = TRUE;
1653
1654    /* POW src1 must not be the same register as dst */
1655    if (alias_src_dst( src1, dst ))
1656       need_tmp = TRUE;
1657
1658    /* it's a scalar op */
1659    src0 = scalar( src0, TGSI_SWIZZLE_X );
1660    src1 = scalar( src1, TGSI_SWIZZLE_X );
1661
1662    if (need_tmp) {
1663       SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1664
1665       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1666          return FALSE;
1667
1668       return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1669    }
1670    else {
1671       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1672    }
1673 }
1674
1675 static boolean emit_xpd(struct svga_shader_emitter *emit,
1676                         const struct tgsi_full_instruction *insn)
1677 {
1678    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1679    const struct src_register src0 = translate_src_register(
1680       emit, &insn->Src[0] );
1681    const struct src_register src1 = translate_src_register(
1682       emit, &insn->Src[1] );
1683    boolean need_dst_tmp = FALSE;
1684
1685    /* XPD can only output to a temporary */
1686    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1687       need_dst_tmp = TRUE;
1688
1689    /* The dst reg must not be the same as src0 or src1*/
1690    if (alias_src_dst(src0, dst) ||
1691        alias_src_dst(src1, dst))
1692       need_dst_tmp = TRUE;
1693
1694    if (need_dst_tmp) {
1695       SVGA3dShaderDestToken tmp = get_temp( emit );
1696
1697       /* Obey DX9 restrictions on mask:
1698        */
1699       tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1700
1701       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1702          return FALSE;
1703
1704       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1705          return FALSE;
1706    }
1707    else {
1708       if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1709          return FALSE;
1710    }
1711
1712    /* Need to emit 1.0 to dst.w?
1713     */
1714    if (dst.mask & TGSI_WRITEMASK_W) {
1715       struct src_register zero = get_zero_immediate( emit );
1716
1717       if (!submit_op1(emit,
1718                       inst_token( SVGA3DOP_MOV ),
1719                       writemask(dst, TGSI_WRITEMASK_W),
1720                       zero))
1721          return FALSE;
1722    }
1723
1724    return TRUE;
1725 }
1726
1727
1728 static boolean emit_lrp(struct svga_shader_emitter *emit,
1729                         const struct tgsi_full_instruction *insn)
1730 {
1731    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1732    SVGA3dShaderDestToken tmp;
1733    const struct src_register src0 = translate_src_register(
1734       emit, &insn->Src[0] );
1735    const struct src_register src1 = translate_src_register(
1736       emit, &insn->Src[1] );
1737    const struct src_register src2 = translate_src_register(
1738       emit, &insn->Src[2] );
1739    boolean need_dst_tmp = FALSE;
1740
1741    /* The dst reg must not be the same as src0 or src2 */
1742    if (alias_src_dst(src0, dst) ||
1743        alias_src_dst(src2, dst))
1744       need_dst_tmp = TRUE;
1745
1746    if (need_dst_tmp) {
1747       tmp = get_temp( emit );
1748       tmp.mask = dst.mask;
1749    }
1750    else {
1751       tmp = dst;
1752    }
1753
1754    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
1755       return FALSE;
1756
1757    if (need_dst_tmp) {
1758       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1759          return FALSE;
1760    }
1761
1762    return TRUE;
1763 }
1764
1765
1766 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
1767                              const struct tgsi_full_instruction *insn )
1768 {
1769    if (emit->unit == PIPE_SHADER_VERTEX) {
1770       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
1771        */
1772       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
1773    }
1774    else {
1775
1776       /* result[0] = 1    * 1;
1777        * result[1] = a[1] * b[1];
1778        * result[2] = a[2] * 1;
1779        * result[3] = 1    * b[3];
1780        */
1781
1782       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1783       SVGA3dShaderDestToken tmp;
1784       const struct src_register src0 = translate_src_register(
1785          emit, &insn->Src[0] );
1786       const struct src_register src1 = translate_src_register(
1787          emit, &insn->Src[1] );
1788       struct src_register zero = get_zero_immediate( emit );
1789       boolean need_tmp = FALSE;
1790
1791       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
1792           alias_src_dst(src0, dst) ||
1793           alias_src_dst(src1, dst))
1794          need_tmp = TRUE;
1795
1796       if (need_tmp) {
1797          tmp = get_temp( emit );
1798       }
1799       else {
1800          tmp = dst;
1801       }
1802
1803       /* tmp.xw = 1.0
1804        */
1805       if (tmp.mask & TGSI_WRITEMASK_XW) {
1806          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1807                           writemask(tmp, TGSI_WRITEMASK_XW ),
1808                           scalar( zero, 3 )))
1809             return FALSE;
1810       }
1811
1812       /* tmp.yz = src0
1813        */
1814       if (tmp.mask & TGSI_WRITEMASK_YZ) {
1815          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1816                           writemask(tmp, TGSI_WRITEMASK_YZ ),
1817                           src0))
1818             return FALSE;
1819       }
1820
1821       /* tmp.yw = tmp * src1
1822        */
1823       if (tmp.mask & TGSI_WRITEMASK_YW) {
1824          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1825                           writemask(tmp, TGSI_WRITEMASK_YW ),
1826                           src(tmp),
1827                           src1))
1828             return FALSE;
1829       }
1830
1831       /* dst = tmp
1832        */
1833       if (need_tmp) {
1834          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1835                           dst,
1836                           src(tmp)))
1837             return FALSE;
1838       }
1839    }
1840
1841    return TRUE;
1842 }
1843
1844
1845 static boolean emit_exp(struct svga_shader_emitter *emit,
1846                         const struct tgsi_full_instruction *insn)
1847 {
1848    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1849    struct src_register src0 =
1850       translate_src_register( emit, &insn->Src[0] );
1851    struct src_register zero = get_zero_immediate( emit );
1852    SVGA3dShaderDestToken fraction;
1853
1854    if (dst.mask & TGSI_WRITEMASK_Y)
1855       fraction = dst;
1856    else if (dst.mask & TGSI_WRITEMASK_X)
1857       fraction = get_temp( emit );
1858    else
1859       fraction.value = 0;
1860
1861    /* If y is being written, fill it with src0 - floor(src0).
1862     */
1863    if (dst.mask & TGSI_WRITEMASK_XY) {
1864       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
1865                        writemask( fraction, TGSI_WRITEMASK_Y ),
1866                        src0 ))
1867          return FALSE;
1868    }
1869
1870    /* If x is being written, fill it with 2 ^ floor(src0).
1871     */
1872    if (dst.mask & TGSI_WRITEMASK_X) {
1873       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
1874                        writemask( dst, TGSI_WRITEMASK_X ),
1875                        src0,
1876                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
1877          return FALSE;
1878
1879       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
1880                        writemask( dst, TGSI_WRITEMASK_X ),
1881                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
1882          return FALSE;
1883
1884       if (!(dst.mask & TGSI_WRITEMASK_Y))
1885          release_temp( emit, fraction );
1886    }
1887
1888    /* If z is being written, fill it with 2 ^ src0 (partial precision).
1889     */
1890    if (dst.mask & TGSI_WRITEMASK_Z) {
1891       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
1892                        writemask( dst, TGSI_WRITEMASK_Z ),
1893                        src0 ) )
1894          return FALSE;
1895    }
1896
1897    /* If w is being written, fill it with one.
1898     */
1899    if (dst.mask & TGSI_WRITEMASK_W) {
1900       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1901                        writemask(dst, TGSI_WRITEMASK_W),
1902                        scalar( zero, TGSI_SWIZZLE_W ) ))
1903          return FALSE;
1904    }
1905
1906    return TRUE;
1907 }
1908
1909 static boolean emit_lit(struct svga_shader_emitter *emit,
1910                              const struct tgsi_full_instruction *insn )
1911 {
1912    if (emit->unit == PIPE_SHADER_VERTEX) {
1913       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
1914        */
1915       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
1916    }
1917    else {
1918
1919       /* D3D vs. GL semantics can be fairly easily accomodated by
1920        * variations on this sequence.
1921        *
1922        * GL:
1923        *   tmp.y = src.x
1924        *   tmp.z = pow(src.y,src.w)
1925        *   p0 = src0.xxxx > 0
1926        *   result = zero.wxxw
1927        *   (p0) result.yz = tmp
1928        *
1929        * D3D:
1930        *   tmp.y = src.x
1931        *   tmp.z = pow(src.y,src.w)
1932        *   p0 = src0.xxyy > 0
1933        *   result = zero.wxxw
1934        *   (p0) result.yz = tmp
1935        *
1936        * Will implement the GL version for now.
1937        */
1938
1939       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1940       SVGA3dShaderDestToken tmp = get_temp( emit );
1941       const struct src_register src0 = translate_src_register(
1942          emit, &insn->Src[0] );
1943       struct src_register zero = get_zero_immediate( emit );
1944
1945       /* tmp = pow(src.y, src.w)
1946        */
1947       if (dst.mask & TGSI_WRITEMASK_Z) {
1948          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
1949                          tmp,
1950                          scalar(src0, 1),
1951                          scalar(src0, 3)))
1952             return FALSE;
1953       }
1954
1955       /* tmp.y = src.x
1956        */
1957       if (dst.mask & TGSI_WRITEMASK_Y) {
1958          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1959                           writemask(tmp, TGSI_WRITEMASK_Y ),
1960                           scalar(src0, 0)))
1961             return FALSE;
1962       }
1963
1964       /* Can't quite do this with emit conditional due to the extra
1965        * writemask on the predicated mov:
1966        */
1967       {
1968          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1969          SVGA3dShaderInstToken setp_token, mov_token;
1970          struct src_register predsrc;
1971
1972          setp_token = inst_token( SVGA3DOP_SETP );
1973          mov_token = inst_token( SVGA3DOP_MOV );
1974
1975          setp_token.control = SVGA3DOPCOMP_GT;
1976
1977          /* D3D vs GL semantics:
1978           */
1979          if (0)
1980             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
1981          else
1982             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
1983
1984          /* SETP src0.xxyy, GT, {0}.x */
1985          if (!submit_op2( emit, setp_token, pred_reg,
1986                           predsrc,
1987                           swizzle(zero, 0, 0, 0, 0) ))
1988             return FALSE;
1989
1990          /* MOV dst, fail */
1991          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
1992                           swizzle(zero, 3, 0, 0, 3 )))
1993              return FALSE;
1994
1995          /* MOV dst.yz, tmp (predicated)
1996           *
1997           * Note that the predicate reg (and possible modifiers) is passed
1998           * as the first source argument.
1999           */
2000          if (dst.mask & TGSI_WRITEMASK_YZ) {
2001             mov_token.predicated = 1;
2002             if (!submit_op2( emit, mov_token,
2003                              writemask(dst, TGSI_WRITEMASK_YZ),
2004                              src( pred_reg ), src( tmp ) ))
2005                return FALSE;
2006          }
2007       }
2008    }
2009
2010    return TRUE;
2011 }
2012
2013
2014
2015
2016 static boolean emit_ex2( struct svga_shader_emitter *emit,
2017                          const struct tgsi_full_instruction *insn )
2018 {
2019    SVGA3dShaderInstToken inst;
2020    SVGA3dShaderDestToken dst;
2021    struct src_register src0;
2022
2023    inst = inst_token( SVGA3DOP_EXP );
2024    dst = translate_dst_register( emit, insn, 0 );
2025    src0 = translate_src_register( emit, &insn->Src[0] );
2026    src0 = scalar( src0, TGSI_SWIZZLE_X );
2027
2028    if (dst.mask != TGSI_WRITEMASK_XYZW) {
2029       SVGA3dShaderDestToken tmp = get_temp( emit );
2030
2031       if (!submit_op1( emit, inst, tmp, src0 ))
2032          return FALSE;
2033
2034       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2035                          dst,
2036                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2037    }
2038
2039    return submit_op1( emit, inst, dst, src0 );
2040 }
2041
2042
2043 static boolean emit_log(struct svga_shader_emitter *emit,
2044                         const struct tgsi_full_instruction *insn)
2045 {
2046    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2047    struct src_register src0 =
2048       translate_src_register( emit, &insn->Src[0] );
2049    struct src_register zero = get_zero_immediate( emit );
2050    SVGA3dShaderDestToken abs_tmp;
2051    struct src_register abs_src0;
2052    SVGA3dShaderDestToken log2_abs;
2053
2054    abs_tmp.value = 0;
2055
2056    if (dst.mask & TGSI_WRITEMASK_Z)
2057       log2_abs = dst;
2058    else if (dst.mask & TGSI_WRITEMASK_XY)
2059       log2_abs = get_temp( emit );
2060    else
2061       log2_abs.value = 0;
2062
2063    /* If z is being written, fill it with log2( abs( src0 ) ).
2064     */
2065    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2066       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2067          abs_src0 = src0;
2068       else {
2069          abs_tmp = get_temp( emit );
2070
2071          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2072                           abs_tmp,
2073                           src0 ) )
2074             return FALSE;
2075
2076          abs_src0 = src( abs_tmp );
2077       }
2078
2079       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2080
2081       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2082                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2083                        abs_src0 ) )
2084          return FALSE;
2085    }
2086
2087    if (dst.mask & TGSI_WRITEMASK_XY) {
2088       SVGA3dShaderDestToken floor_log2;
2089
2090       if (dst.mask & TGSI_WRITEMASK_X)
2091          floor_log2 = dst;
2092       else
2093          floor_log2 = get_temp( emit );
2094
2095       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2096        */
2097       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2098                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2099                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2100          return FALSE;
2101
2102       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2103                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2104                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2105                        negate( src( floor_log2 ) ) ) )
2106          return FALSE;
2107
2108       /* If y is being written, fill it with
2109        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2110        */
2111       if (dst.mask & TGSI_WRITEMASK_Y) {
2112          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2113                           writemask( dst, TGSI_WRITEMASK_Y ),
2114                           negate( scalar( src( floor_log2 ),
2115                                           TGSI_SWIZZLE_X ) ) ) )
2116             return FALSE;
2117
2118          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2119                           writemask( dst, TGSI_WRITEMASK_Y ),
2120                           src( dst ),
2121                           abs_src0 ) )
2122             return FALSE;
2123       }
2124
2125       if (!(dst.mask & TGSI_WRITEMASK_X))
2126          release_temp( emit, floor_log2 );
2127
2128       if (!(dst.mask & TGSI_WRITEMASK_Z))
2129          release_temp( emit, log2_abs );
2130    }
2131
2132    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2133        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2134       release_temp( emit, abs_tmp );
2135
2136    /* If w is being written, fill it with one.
2137     */
2138    if (dst.mask & TGSI_WRITEMASK_W) {
2139       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2140                        writemask(dst, TGSI_WRITEMASK_W),
2141                        scalar( zero, TGSI_SWIZZLE_W ) ))
2142          return FALSE;
2143    }
2144
2145    return TRUE;
2146 }
2147
2148
2149 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2150                            unsigned position,
2151                            const struct tgsi_full_instruction *insn )
2152 {
2153    unsigned i;
2154
2155    /* Note that we've finished the main function and are now emitting
2156     * subroutines.  This affects how we terminate the generated
2157     * shader.
2158     */
2159    emit->in_main_func = FALSE;
2160
2161    for (i = 0; i < emit->nr_labels; i++) {
2162       if (emit->label[i] == position) {
2163          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2164                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2165                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2166       }
2167    }
2168
2169    assert(0);
2170    return TRUE;
2171 }
2172
2173 static boolean emit_call( struct svga_shader_emitter *emit,
2174                            const struct tgsi_full_instruction *insn )
2175 {
2176    unsigned position = insn->Label.Label;
2177    unsigned i;
2178
2179    for (i = 0; i < emit->nr_labels; i++) {
2180       if (emit->label[i] == position)
2181          break;
2182    }
2183
2184    if (emit->nr_labels == Elements(emit->label))
2185       return FALSE;
2186
2187    if (i == emit->nr_labels) {
2188       emit->label[i] = position;
2189       emit->nr_labels++;
2190    }
2191
2192    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2193            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2194 }
2195
2196
2197 static boolean emit_end( struct svga_shader_emitter *emit )
2198 {
2199    if (emit->unit == PIPE_SHADER_VERTEX) {
2200       return emit_vs_postamble( emit );
2201    }
2202    else {
2203       return emit_ps_postamble( emit );
2204    }
2205 }
2206
2207
2208
2209 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2210                                       unsigned position,
2211                                       const struct tgsi_full_instruction *insn )
2212 {
2213    switch (insn->Instruction.Opcode) {
2214
2215    case TGSI_OPCODE_ARL:
2216       return emit_arl( emit, insn );
2217
2218    case TGSI_OPCODE_TEX:
2219    case TGSI_OPCODE_TXB:
2220    case TGSI_OPCODE_TXP:
2221    case TGSI_OPCODE_TXL:
2222    case TGSI_OPCODE_TXD:
2223       return emit_tex( emit, insn );
2224
2225    case TGSI_OPCODE_DDX:
2226    case TGSI_OPCODE_DDY:
2227       return emit_deriv( emit, insn );
2228
2229    case TGSI_OPCODE_BGNSUB:
2230       return emit_bgnsub( emit, position, insn );
2231
2232    case TGSI_OPCODE_ENDSUB:
2233       return TRUE;
2234
2235    case TGSI_OPCODE_CAL:
2236       return emit_call( emit, insn );
2237
2238    case TGSI_OPCODE_FLR:
2239    case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
2240       return emit_floor( emit, insn );
2241
2242    case TGSI_OPCODE_CMP:
2243       return emit_cmp( emit, insn );
2244
2245    case TGSI_OPCODE_DIV:
2246       return emit_div( emit, insn );
2247
2248    case TGSI_OPCODE_DP2:
2249       return emit_dp2( emit, insn );
2250
2251    case TGSI_OPCODE_DPH:
2252       return emit_dph( emit, insn );
2253
2254    case TGSI_OPCODE_NRM:
2255       return emit_nrm( emit, insn );
2256
2257    case TGSI_OPCODE_COS:
2258       return emit_cos( emit, insn );
2259
2260    case TGSI_OPCODE_SIN:
2261       return emit_sin( emit, insn );
2262
2263    case TGSI_OPCODE_SCS:
2264       return emit_sincos( emit, insn );
2265
2266    case TGSI_OPCODE_END:
2267       /* TGSI always finishes the main func with an END */
2268       return emit_end( emit );
2269
2270    case TGSI_OPCODE_KIL:
2271       return emit_kil( emit, insn );
2272
2273       /* Selection opcodes.  The underlying language is fairly
2274        * non-orthogonal about these.
2275        */
2276    case TGSI_OPCODE_SEQ:
2277       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2278
2279    case TGSI_OPCODE_SNE:
2280       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2281
2282    case TGSI_OPCODE_SGT:
2283       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2284
2285    case TGSI_OPCODE_SGE:
2286       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2287
2288    case TGSI_OPCODE_SLT:
2289       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2290
2291    case TGSI_OPCODE_SLE:
2292       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2293
2294    case TGSI_OPCODE_SUB:
2295       return emit_sub( emit, insn );
2296
2297    case TGSI_OPCODE_POW:
2298       return emit_pow( emit, insn );
2299
2300    case TGSI_OPCODE_EX2:
2301       return emit_ex2( emit, insn );
2302
2303    case TGSI_OPCODE_EXP:
2304       return emit_exp( emit, insn );
2305
2306    case TGSI_OPCODE_LOG:
2307       return emit_log( emit, insn );
2308
2309    case TGSI_OPCODE_LG2:
2310       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2311
2312    case TGSI_OPCODE_RSQ:
2313       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2314
2315    case TGSI_OPCODE_RCP:
2316       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2317
2318    case TGSI_OPCODE_CONT:
2319    case TGSI_OPCODE_RET:
2320       /* This is a noop -- we tell mesa that we can't support RET
2321        * within a function (early return), so this will always be
2322        * followed by an ENDSUB.
2323        */
2324       return TRUE;
2325
2326       /* These aren't actually used by any of the frontends we care
2327        * about:
2328        */
2329    case TGSI_OPCODE_CLAMP:
2330    case TGSI_OPCODE_ROUND:
2331    case TGSI_OPCODE_AND:
2332    case TGSI_OPCODE_OR:
2333    case TGSI_OPCODE_I2F:
2334    case TGSI_OPCODE_NOT:
2335    case TGSI_OPCODE_SHL:
2336    case TGSI_OPCODE_ISHR:
2337    case TGSI_OPCODE_XOR:
2338       return FALSE;
2339
2340    case TGSI_OPCODE_IF:
2341       return emit_if( emit, insn );
2342    case TGSI_OPCODE_ELSE:
2343       return emit_else( emit, insn );
2344    case TGSI_OPCODE_ENDIF:
2345       return emit_endif( emit, insn );
2346
2347    case TGSI_OPCODE_BGNLOOP:
2348       return emit_bgnloop2( emit, insn );
2349    case TGSI_OPCODE_ENDLOOP:
2350       return emit_endloop2( emit, insn );
2351    case TGSI_OPCODE_BRK:
2352       return emit_brk( emit, insn );
2353
2354    case TGSI_OPCODE_XPD:
2355       return emit_xpd( emit, insn );
2356
2357    case TGSI_OPCODE_KILP:
2358       return emit_kilp( emit, insn );
2359
2360    case TGSI_OPCODE_DST:
2361       return emit_dst_insn( emit, insn );
2362
2363    case TGSI_OPCODE_LIT:
2364       return emit_lit( emit, insn );
2365
2366    case TGSI_OPCODE_LRP:
2367       return emit_lrp( emit, insn );
2368
2369    default: {
2370       unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2371
2372       if (opcode == SVGA3DOP_LAST_INST)
2373          return FALSE;
2374
2375       if (!emit_simple_instruction( emit, opcode, insn ))
2376          return FALSE;
2377    }
2378    }
2379
2380    return TRUE;
2381 }
2382
2383
2384 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2385                                     struct tgsi_full_immediate *imm)
2386 {
2387    static const float id[4] = {0,0,0,1};
2388    float value[4];
2389    unsigned i;
2390
2391    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2392    for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2393       value[i] = imm->u[i].Float;
2394
2395    for ( ; i < 4; i++ )
2396       value[i] = id[i];
2397
2398    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2399                           emit->imm_start + emit->internal_imm_count++,
2400                           value[0], value[1], value[2], value[3]);
2401 }
2402
2403 static boolean make_immediate( struct svga_shader_emitter *emit,
2404                                float a,
2405                                float b,
2406                                float c,
2407                                float d,
2408                                struct src_register *out )
2409 {
2410    unsigned idx = emit->nr_hw_float_const++;
2411
2412    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2413                         idx, a, b, c, d ))
2414       return FALSE;
2415
2416    *out = src_register( SVGA3DREG_CONST, idx );
2417
2418    return TRUE;
2419 }
2420
2421 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2422 {
2423    if (!emit->key.vkey.need_prescale) {
2424       if (!make_immediate( emit, 0, 0, .5, .5,
2425                            &emit->imm_0055))
2426          return FALSE;
2427    }
2428
2429    return TRUE;
2430 }
2431
2432 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2433 {
2434    unsigned i;
2435
2436    /* For SM20, need to initialize the temporaries we're using to hold
2437     * color outputs to some value.  Shaders which don't set all of
2438     * these values are likely to be rejected by the DX9 runtime.
2439     */
2440    if (!emit->use_sm30) {
2441       struct src_register zero = get_zero_immediate( emit );
2442       for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2443          if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2444
2445             if (!submit_op1( emit,
2446                              inst_token(SVGA3DOP_MOV),
2447                              emit->temp_col[i],
2448                              zero ))
2449                return FALSE;
2450          }
2451       }
2452    }
2453
2454    return TRUE;
2455 }
2456
2457 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2458 {
2459    unsigned i;
2460
2461    /* PS oDepth is incredibly fragile and it's very hard to catch the
2462     * types of usage that break it during shader emit.  Easier just to
2463     * redirect the main program to a temporary and then only touch
2464     * oDepth with a hand-crafted MOV below.
2465     */
2466    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2467
2468       if (!submit_op1( emit,
2469                        inst_token(SVGA3DOP_MOV),
2470                        emit->true_pos,
2471                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2472          return FALSE;
2473    }
2474
2475    /* Similarly for SM20 color outputs...  Luckily SM30 isn't so
2476     * fragile.
2477     */
2478    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2479       if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2480
2481          /* Potentially override output colors with white for XOR
2482           * logicop workaround.
2483           */
2484          if (emit->unit == PIPE_SHADER_FRAGMENT &&
2485              emit->key.fkey.white_fragments) {
2486
2487             struct src_register one = scalar( get_zero_immediate( emit ),
2488                                               TGSI_SWIZZLE_W );
2489
2490             if (!submit_op1( emit,
2491                              inst_token(SVGA3DOP_MOV),
2492                              emit->true_col[i],
2493                              one ))
2494                return FALSE;
2495          }
2496          else {
2497             if (!submit_op1( emit,
2498                              inst_token(SVGA3DOP_MOV),
2499                              emit->true_col[i],
2500                              src(emit->temp_col[i]) ))
2501                return FALSE;
2502          }
2503       }
2504    }
2505
2506    return TRUE;
2507 }
2508
2509 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2510 {
2511    /* PSIZ output is incredibly fragile and it's very hard to catch
2512     * the types of usage that break it during shader emit.  Easier
2513     * just to redirect the main program to a temporary and then only
2514     * touch PSIZ with a hand-crafted MOV below.
2515     */
2516    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2517
2518       if (!submit_op1( emit,
2519                        inst_token(SVGA3DOP_MOV),
2520                        emit->true_psiz,
2521                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2522          return FALSE;
2523    }
2524
2525    /* Need to perform various manipulations on vertex position to cope
2526     * with the different GL and D3D clip spaces.
2527     */
2528    if (emit->key.vkey.need_prescale) {
2529       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2530       SVGA3dShaderDestToken pos = emit->true_pos;
2531       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2532       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2533                                                          offset + 0 );
2534       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2535                                                          offset + 1 );
2536
2537       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
2538        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2539        *   --> Note that prescale.trans.w == 0
2540        */
2541       if (!submit_op2( emit,
2542                        inst_token(SVGA3DOP_MUL),
2543                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2544                        src(temp_pos),
2545                        prescale_scale ))
2546          return FALSE;
2547
2548       if (!submit_op3( emit,
2549                        inst_token(SVGA3DOP_MAD),
2550                        pos,
2551                        swizzle(src(temp_pos), 3, 3, 3, 3),
2552                        prescale_trans,
2553                        src(temp_pos)))
2554          return FALSE;
2555    }
2556    else {
2557       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2558       SVGA3dShaderDestToken pos = emit->true_pos;
2559       struct src_register imm_0055 = emit->imm_0055;
2560
2561       /* Adjust GL clipping coordinate space to hardware (D3D-style):
2562        *
2563        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2564        * MOV result.position, temp_pos
2565        */
2566       if (!submit_op2( emit,
2567                        inst_token(SVGA3DOP_DP4),
2568                        writemask(temp_pos, TGSI_WRITEMASK_Z),
2569                        imm_0055,
2570                        src(temp_pos) ))
2571          return FALSE;
2572
2573       if (!submit_op1( emit,
2574                        inst_token(SVGA3DOP_MOV),
2575                        pos,
2576                        src(temp_pos) ))
2577          return FALSE;
2578    }
2579
2580    return TRUE;
2581 }
2582
2583 /*
2584   0: IF VFACE :4
2585   1:   COLOR = FrontColor;
2586   2: ELSE
2587   3:   COLOR = BackColor;
2588   4: ENDIF
2589  */
2590 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2591 {
2592    struct src_register vface, zero;
2593    struct src_register front[2];
2594    struct src_register back[2];
2595    SVGA3dShaderDestToken color[2];
2596    int count =  emit->internal_color_count;
2597    int i;
2598    SVGA3dShaderInstToken if_token;
2599
2600    if (count == 0)
2601       return TRUE;
2602
2603    vface = get_vface( emit );
2604    zero = get_zero_immediate( emit );
2605
2606    /* Can't use get_temp() to allocate the color reg as such
2607     * temporaries will be reclaimed after each instruction by the call
2608     * to reset_temp_regs().
2609     */
2610    for (i = 0; i < count; i++) {
2611       color[i] = dst_register( SVGA3DREG_TEMP,
2612                                emit->nr_hw_temp++ );
2613
2614       front[i] = emit->input_map[emit->internal_color_idx[i]];
2615
2616       /* Back is always the next input:
2617        */
2618       back[i] = front[i];
2619       back[i].base.num = front[i].base.num + 1;
2620
2621       /* Reassign the input_map to the actual front-face color:
2622        */
2623       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2624    }
2625
2626    if_token = inst_token( SVGA3DOP_IFC );
2627
2628    if (emit->key.fkey.front_ccw)
2629       if_token.control = SVGA3DOPCOMP_LT;
2630    else
2631       if_token.control = SVGA3DOPCOMP_GT;
2632
2633    zero = scalar(zero, TGSI_SWIZZLE_X);
2634
2635    if (!(emit_instruction( emit, if_token ) &&
2636          emit_src( emit, vface ) &&
2637          emit_src( emit, zero ) ))
2638       return FALSE;
2639
2640    for (i = 0; i < count; i++) {
2641       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2642          return FALSE;
2643    }
2644
2645    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2646       return FALSE;
2647
2648    for (i = 0; i < count; i++) {
2649       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2650          return FALSE;
2651    }
2652
2653    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2654       return FALSE;
2655
2656    return TRUE;
2657 }
2658
2659 /*
2660   0: SETP_GT TEMP, VFACE, 0
2661   where TEMP is a fake frontface register
2662  */
2663 static boolean emit_frontface( struct svga_shader_emitter *emit )
2664 {
2665    struct src_register vface, zero;
2666    SVGA3dShaderDestToken temp;
2667    struct src_register pass, fail;
2668
2669    vface = get_vface( emit );
2670    zero = get_zero_immediate( emit );
2671
2672    /* Can't use get_temp() to allocate the fake frontface reg as such
2673     * temporaries will be reclaimed after each instruction by the call
2674     * to reset_temp_regs().
2675     */
2676    temp = dst_register( SVGA3DREG_TEMP,
2677                         emit->nr_hw_temp++ );
2678
2679    if (emit->key.fkey.front_ccw) {
2680       pass = scalar( zero, TGSI_SWIZZLE_X );
2681       fail = scalar( zero, TGSI_SWIZZLE_W );
2682    } else {
2683       pass = scalar( zero, TGSI_SWIZZLE_W );
2684       fail = scalar( zero, TGSI_SWIZZLE_X );
2685    }
2686
2687    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2688                          temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2689                          pass, fail))
2690       return FALSE;
2691
2692    /* Reassign the input_map to the actual front-face color:
2693     */
2694    emit->input_map[emit->internal_frontface_idx] = src(temp);
2695
2696    return TRUE;
2697 }
2698
2699 static INLINE boolean
2700 needs_to_create_zero( struct svga_shader_emitter *emit )
2701 {
2702    int i;
2703
2704    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2705       if (!emit->use_sm30)
2706          return TRUE;
2707
2708       if (emit->key.fkey.light_twoside)
2709          return TRUE;
2710
2711       if (emit->key.fkey.white_fragments)
2712          return TRUE;
2713
2714       if (emit->emit_frontface)
2715          return TRUE;
2716
2717       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
2718           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
2719          return TRUE;
2720    }
2721
2722    if (emit->unit == PIPE_SHADER_VERTEX) {
2723       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
2724          return TRUE;
2725    }
2726
2727    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
2728        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
2729        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
2730        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
2731        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
2732        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
2733        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
2734        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
2735        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
2736        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
2737        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
2738        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
2739        emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
2740        emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
2741       return TRUE;
2742
2743    for (i = 0; i < emit->key.fkey.num_textures; i++) {
2744       if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2745          return TRUE;
2746    }
2747
2748    return FALSE;
2749 }
2750
2751 static INLINE boolean
2752 needs_to_create_loop_const( struct svga_shader_emitter *emit )
2753 {
2754    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
2755 }
2756
2757 static INLINE boolean
2758 needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
2759 {
2760    return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
2761                               emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
2762                               emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
2763 }
2764
2765 static INLINE boolean
2766 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
2767 {
2768    return (emit->num_arl_consts > 0);
2769 }
2770
2771 static INLINE boolean
2772 pre_parse_add_indirect( struct svga_shader_emitter *emit,
2773                         int num, int current_arl)
2774 {
2775    int i;
2776    assert(num < 0);
2777
2778    for (i = 0; i < emit->num_arl_consts; ++i) {
2779       if (emit->arl_consts[i].arl_num == current_arl)
2780          break;
2781    }
2782    /* new entry */
2783    if (emit->num_arl_consts == i) {
2784       ++emit->num_arl_consts;
2785    }
2786    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
2787                                 num :
2788                                 emit->arl_consts[i].number;
2789    emit->arl_consts[i].arl_num = current_arl;
2790    return TRUE;
2791 }
2792
2793 static boolean
2794 pre_parse_instruction( struct svga_shader_emitter *emit,
2795                        const struct tgsi_full_instruction *insn,
2796                        int current_arl)
2797 {
2798    if (insn->Src[0].Register.Indirect &&
2799        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
2800       const struct tgsi_full_src_register *reg = &insn->Src[0];
2801       if (reg->Register.Index < 0) {
2802          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2803       }
2804    }
2805
2806    if (insn->Src[1].Register.Indirect &&
2807        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
2808       const struct tgsi_full_src_register *reg = &insn->Src[1];
2809       if (reg->Register.Index < 0) {
2810          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2811       }
2812    }
2813
2814    if (insn->Src[2].Register.Indirect &&
2815        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
2816       const struct tgsi_full_src_register *reg = &insn->Src[2];
2817       if (reg->Register.Index < 0) {
2818          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2819       }
2820    }
2821
2822    return TRUE;
2823 }
2824
2825 static boolean
2826 pre_parse_tokens( struct svga_shader_emitter *emit,
2827                   const struct tgsi_token *tokens )
2828 {
2829    struct tgsi_parse_context parse;
2830    int current_arl = 0;
2831
2832    tgsi_parse_init( &parse, tokens );
2833
2834    while (!tgsi_parse_end_of_tokens( &parse )) {
2835       tgsi_parse_token( &parse );
2836       switch (parse.FullToken.Token.Type) {
2837       case TGSI_TOKEN_TYPE_IMMEDIATE:
2838       case TGSI_TOKEN_TYPE_DECLARATION:
2839          break;
2840       case TGSI_TOKEN_TYPE_INSTRUCTION:
2841          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
2842              TGSI_OPCODE_ARL) {
2843             ++current_arl;
2844          }
2845          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
2846                                      current_arl ))
2847             return FALSE;
2848          break;
2849       default:
2850          break;
2851       }
2852
2853    }
2854    return TRUE;
2855 }
2856
2857 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
2858
2859 {
2860    if (needs_to_create_zero( emit )) {
2861       create_zero_immediate( emit );
2862    }
2863    if (needs_to_create_loop_const( emit )) {
2864       create_loop_const( emit );
2865    }
2866    if (needs_to_create_sincos_consts( emit )) {
2867       create_sincos_consts( emit );
2868    }
2869    if (needs_to_create_arl_consts( emit )) {
2870       create_arl_consts( emit );
2871    }
2872
2873    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2874       if (!emit_ps_preamble( emit ))
2875          return FALSE;
2876
2877       if (emit->key.fkey.light_twoside) {
2878          if (!emit_light_twoside( emit ))
2879             return FALSE;
2880       }
2881       if (emit->emit_frontface) {
2882          if (!emit_frontface( emit ))
2883             return FALSE;
2884       }
2885    }
2886
2887    return TRUE;
2888 }
2889
2890 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
2891                                        const struct tgsi_token *tokens )
2892 {
2893    struct tgsi_parse_context parse;
2894    boolean ret = TRUE;
2895    boolean helpers_emitted = FALSE;
2896    unsigned line_nr = 0;
2897
2898    tgsi_parse_init( &parse, tokens );
2899    emit->internal_imm_count = 0;
2900
2901    if (emit->unit == PIPE_SHADER_VERTEX) {
2902       ret = emit_vs_preamble( emit );
2903       if (!ret)
2904          goto done;
2905    }
2906
2907    pre_parse_tokens(emit, tokens);
2908
2909    while (!tgsi_parse_end_of_tokens( &parse )) {
2910       tgsi_parse_token( &parse );
2911
2912       switch (parse.FullToken.Token.Type) {
2913       case TGSI_TOKEN_TYPE_IMMEDIATE:
2914          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
2915          if (!ret)
2916             goto done;
2917          break;
2918
2919       case TGSI_TOKEN_TYPE_DECLARATION:
2920          if (emit->use_sm30)
2921             ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
2922          else
2923             ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
2924          if (!ret)
2925             goto done;
2926          break;
2927
2928       case TGSI_TOKEN_TYPE_INSTRUCTION:
2929          if (!helpers_emitted) {
2930             if (!svga_shader_emit_helpers( emit ))
2931                goto done;
2932             helpers_emitted = TRUE;
2933          }
2934          ret = svga_emit_instruction( emit,
2935                                       line_nr++,
2936                                       &parse.FullToken.FullInstruction );
2937          if (!ret)
2938             goto done;
2939          break;
2940       default:
2941          break;
2942       }
2943
2944       reset_temp_regs( emit );
2945    }
2946
2947    /* Need to terminate the current subroutine.  Note that the
2948     * hardware doesn't tolerate shaders without sub-routines
2949     * terminating with RET+END.
2950     */
2951    if (!emit->in_main_func) {
2952       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
2953       if (!ret)
2954          goto done;
2955    }
2956
2957    assert(emit->dynamic_branching_level == 0);
2958
2959    /* Need to terminate the whole shader:
2960     */
2961    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
2962    if (!ret)
2963       goto done;
2964
2965 done:
2966    assert(ret);
2967    tgsi_parse_free( &parse );
2968    return ret;
2969 }
2970