src/mesa/state_tracker/st_mesa_to_tgsi.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * \author
  30  * Michal Krol,
  31  * Keith Whitwell
  32  */
  33
  34 #include "pipe/p_compiler.h"
  35 #include "pipe/p_shader_tokens.h"
  36 #include "pipe/p_state.h"
  37 #include "pipe/p_context.h"
  38 #include "tgsi/tgsi_ureg.h"
  39 #include "st_mesa_to_tgsi.h"
  40 #include "st_context.h"
  41 #include "program/prog_instruction.h"
  42 #include "program/prog_parameter.h"
  43 #include "util/u_debug.h"
  44 #include "util/u_math.h"
  45 #include "util/u_memory.h"
  46
  47
  48 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
  49                            (1 << PROGRAM_ENV_PARAM) |    \
  50                            (1 << PROGRAM_STATE_VAR) |    \
  51                            (1 << PROGRAM_NAMED_PARAM) |  \
  52                            (1 << PROGRAM_CONSTANT) |     \
  53                            (1 << PROGRAM_UNIFORM))
  54
  55
  56 struct label {
  57    unsigned branch_target;
  58    unsigned token;
  59 };
  60
  61
  62 /**
  63  * Intermediate state used during shader translation.
  64  */
  65 struct st_translate {
  66    struct ureg_program *ureg;
  67
  68    struct ureg_dst temps[MAX_PROGRAM_TEMPS];
  69    struct ureg_src *constants;
  70    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
  71    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
  72    struct ureg_dst address[1];
  73    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
  74
  75    /* Extra info for handling point size clamping in vertex shader */
  76    struct ureg_dst pointSizeResult; /**< Actual point size output register */
  77    struct ureg_src pointSizeConst;  /**< Point size range constant register */
  78    GLint pointSizeOutIndex;         /**< Temp point size output register */
  79    GLboolean prevInstWrotePointSize;
  80
  81    const GLuint *inputMapping;
  82    const GLuint *outputMapping;
  83
  84    /* For every instruction that contains a label (eg CALL), keep
  85     * details so that we can go back afterwards and emit the correct
  86     * tgsi instruction number for each label.
  87     */
  88    struct label *labels;
  89    unsigned labels_size;
  90    unsigned labels_count;
  91
  92    /* Keep a record of the tgsi instruction number that each mesa
  93     * instruction starts at, will be used to fix up labels after
  94     * translation.
  95     */
  96    unsigned *insn;
  97    unsigned insn_size;
  98    unsigned insn_count;
  99
 100    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
 101
 102    boolean error;
 103 };
 104
 105
 106 /**
 107  * Make note of a branch to a label in the TGSI code.
 108  * After we've emitted all instructions, we'll go over the list
 109  * of labels built here and patch the TGSI code with the actual
 110  * location of each label.
 111  */
 112 static unsigned *get_label( struct st_translate *t,
 113                             unsigned branch_target )
 114 {
 115    unsigned i;
 116
 117    if (t->labels_count + 1 >= t->labels_size) {
 118       unsigned old_size = t->labels_size;
 119       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
 120       t->labels = REALLOC( t->labels,
 121                            old_size * sizeof t->labels[0],
 122                            t->labels_size * sizeof t->labels[0] );
 123       if (t->labels == NULL) {
 124          static unsigned dummy;
 125          t->error = TRUE;
 126          return &dummy;
 127       }
 128    }
 129
 130    i = t->labels_count++;
 131    t->labels[i].branch_target = branch_target;
 132    return &t->labels[i].token;
 133 }
 134
 135
 136 /**
 137  * Called prior to emitting the TGSI code for each Mesa instruction.
 138  * Allocate additional space for instructions if needed.
 139  * Update the insn[] array so the next Mesa instruction points to
 140  * the next TGSI instruction.
 141  */
 142 static void set_insn_start( struct st_translate *t,
 143                             unsigned start )
 144 {
 145    if (t->insn_count + 1 >= t->insn_size) {
 146       unsigned old_size = t->insn_size;
 147       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
 148       t->insn = REALLOC( t->insn,
 149                          old_size * sizeof t->insn[0],
 150                          t->insn_size * sizeof t->insn[0] );
 151       if (t->insn == NULL) {
 152          t->error = TRUE;
 153          return;
 154       }
 155    }
 156
 157    t->insn[t->insn_count++] = start;
 158 }
 159
 160
 161 /**
 162  * Map a Mesa dst register to a TGSI ureg_dst register.
 163  */
 164 static struct ureg_dst
 165 dst_register( struct st_translate *t,
 166               gl_register_file file,
 167               GLuint index )
 168 {
 169    switch( file ) {
 170    case PROGRAM_UNDEFINED:
 171       return ureg_dst_undef();
 172
 173    case PROGRAM_TEMPORARY:
 174       if (ureg_dst_is_undef(t->temps[index]))
 175          t->temps[index] = ureg_DECL_temporary( t->ureg );
 176
 177       return t->temps[index];
 178
 179    case PROGRAM_OUTPUT:
 180       if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
 181          t->prevInstWrotePointSize = GL_TRUE;
 182
 183       if (t->procType == TGSI_PROCESSOR_VERTEX)
 184          assert(index < VERT_RESULT_MAX);
 185       else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
 186          assert(index < FRAG_RESULT_MAX);
 187       else
 188          assert(index < GEOM_RESULT_MAX);
 189
 190       assert(t->outputMapping[index] < Elements(t->outputs));
 191
 192       return t->outputs[t->outputMapping[index]];
 193
 194    case PROGRAM_ADDRESS:
 195       return t->address[index];
 196
 197    default:
 198       debug_assert( 0 );
 199       return ureg_dst_undef();
 200    }
 201 }
 202
 203
 204 /**
 205  * Map a Mesa src register to a TGSI ureg_src register.
 206  */
 207 static struct ureg_src
 208 src_register( struct st_translate *t,
 209               gl_register_file file,
 210               GLint index )
 211 {
 212    switch( file ) {
 213    case PROGRAM_UNDEFINED:
 214       return ureg_src_undef();
 215
 216    case PROGRAM_TEMPORARY:
 217       ASSERT(index >= 0);
 218       if (ureg_dst_is_undef(t->temps[index]))
 219          t->temps[index] = ureg_DECL_temporary( t->ureg );
 220       assert(index < Elements(t->temps));
 221       return ureg_src(t->temps[index]);
 222
 223    case PROGRAM_NAMED_PARAM:
 224    case PROGRAM_ENV_PARAM:
 225    case PROGRAM_LOCAL_PARAM:
 226    case PROGRAM_UNIFORM:
 227       ASSERT(index >= 0);
 228       return t->constants[index];
 229    case PROGRAM_STATE_VAR:
 230    case PROGRAM_CONSTANT:       /* ie, immediate */
 231       if (index < 0)
 232          return ureg_DECL_constant( t->ureg, 0 );
 233       else
 234          return t->constants[index];
 235
 236    case PROGRAM_INPUT:
 237       assert(t->inputMapping[index] < Elements(t->inputs));
 238       return t->inputs[t->inputMapping[index]];
 239
 240    case PROGRAM_OUTPUT:
 241       assert(t->outputMapping[index] < Elements(t->outputs));
 242       return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
 243
 244    case PROGRAM_ADDRESS:
 245       return ureg_src(t->address[index]);
 246
 247    default:
 248       debug_assert( 0 );
 249       return ureg_src_undef();
 250    }
 251 }
 252
 253
 254 /**
 255  * Map mesa texture target to TGSI texture target.
 256  */
 257 static unsigned
 258 translate_texture_target( GLuint textarget,
 259                           GLboolean shadow )
 260 {
 261    if (shadow) {
 262       switch( textarget ) {
 263       case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_SHADOW1D;
 264       case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_SHADOW2D;
 265       case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
 266       default: break;
 267       }
 268    }
 269
 270    switch( textarget ) {
 271    case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
 272    case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
 273    case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
 274    case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
 275    case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
 276    default:
 277       debug_assert( 0 );
 278       return TGSI_TEXTURE_1D;
 279    }
 280 }
 281
 282
 283 /**
 284  * Create a TGSI ureg_dst register from a Mesa dest register.
 285  */
 286 static struct ureg_dst
 287 translate_dst( struct st_translate *t,
 288                const struct prog_dst_register *DstReg,
 289                boolean saturate )
 290 {
 291    struct ureg_dst dst = dst_register( t,
 292                                        DstReg->File,
 293                                        DstReg->Index );
 294
 295    dst = ureg_writemask( dst,
 296                          DstReg->WriteMask );
 297
 298    if (saturate)
 299       dst = ureg_saturate( dst );
 300
 301    if (DstReg->RelAddr)
 302       dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
 303
 304    return dst;
 305 }
 306
 307
 308 /**
 309  * Create a TGSI ureg_src register from a Mesa src register.
 310  */
 311 static struct ureg_src
 312 translate_src( struct st_translate *t,
 313                const struct prog_src_register *SrcReg )
 314 {
 315    struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
 316
 317    if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) {
 318       src = src_register( t, SrcReg->File, SrcReg->Index2 );
 319       if (SrcReg->RelAddr2)
 320          src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]),
 321                                             SrcReg->Index);
 322       else
 323          src = ureg_src_dimension( src, SrcReg->Index);
 324    }
 325
 326    src = ureg_swizzle( src,
 327                        GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
 328                        GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
 329                        GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
 330                        GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
 331
 332    if (SrcReg->Negate == NEGATE_XYZW)
 333       src = ureg_negate(src);
 334
 335    if (SrcReg->Abs)
 336       src = ureg_abs(src);
 337
 338    if (SrcReg->RelAddr) {
 339       src = ureg_src_indirect( src, ureg_src(t->address[0]));
 340       if (SrcReg->File != PROGRAM_INPUT &&
 341           SrcReg->File != PROGRAM_OUTPUT) {
 342          /* If SrcReg->Index was negative, it was set to zero in
 343           * src_register().  Reassign it now.  But don't do this
 344           * for input/output regs since they get remapped while
 345           * const buffers don't.
 346           */
 347          src.Index = SrcReg->Index;
 348       }
 349    }
 350
 351    return src;
 352 }
 353
 354
 355 static struct ureg_src swizzle_4v( struct ureg_src src,
 356                                    const unsigned *swz )
 357 {
 358    return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
 359 }
 360
 361
 362 /**
 363  * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
 364  *
 365  *   SWZ dst, src.x-y10
 366  *
 367  * becomes:
 368  *
 369  *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
 370  */
 371 static void emit_swz( struct st_translate *t,
 372                       struct ureg_dst dst,
 373                       const struct prog_src_register *SrcReg )
 374 {
 375    struct ureg_program *ureg = t->ureg;
 376    struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
 377
 378    unsigned negate_mask =  SrcReg->Negate;
 379
 380    unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
 381                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
 382                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
 383                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
 384
 385    unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
 386                          (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
 387                          (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
 388                          (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
 389
 390    unsigned negative_one_mask = one_mask & negate_mask;
 391    unsigned positive_one_mask = one_mask & ~negate_mask;
 392
 393    struct ureg_src imm;
 394    unsigned i;
 395    unsigned mul_swizzle[4] = {0,0,0,0};
 396    unsigned add_swizzle[4] = {0,0,0,0};
 397    unsigned src_swizzle[4] = {0,0,0,0};
 398    boolean need_add = FALSE;
 399    boolean need_mul = FALSE;
 400
 401    if (dst.WriteMask == 0)
 402       return;
 403
 404    /* Is this just a MOV?
 405     */
 406    if (zero_mask == 0 &&
 407        one_mask == 0 &&
 408        (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
 409    {
 410       ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
 411       return;
 412    }
 413
 414 #define IMM_ZERO    0
 415 #define IMM_ONE     1
 416 #define IMM_NEG_ONE 2
 417
 418    imm = ureg_imm3f( ureg, 0, 1, -1 );
 419
 420    for (i = 0; i < 4; i++) {
 421       unsigned bit = 1 << i;
 422
 423       if (dst.WriteMask & bit) {
 424          if (positive_one_mask & bit) {
 425             mul_swizzle[i] = IMM_ZERO;
 426             add_swizzle[i] = IMM_ONE;
 427             need_add = TRUE;
 428          }
 429          else if (negative_one_mask & bit) {
 430             mul_swizzle[i] = IMM_ZERO;
 431             add_swizzle[i] = IMM_NEG_ONE;
 432             need_add = TRUE;
 433          }
 434          else if (zero_mask & bit) {
 435             mul_swizzle[i] = IMM_ZERO;
 436             add_swizzle[i] = IMM_ZERO;
 437             need_add = TRUE;
 438          }
 439          else {
 440             add_swizzle[i] = IMM_ZERO;
 441             src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
 442             need_mul = TRUE;
 443             if (negate_mask & bit) {
 444                mul_swizzle[i] = IMM_NEG_ONE;
 445             }
 446             else {
 447                mul_swizzle[i] = IMM_ONE;
 448             }
 449          }
 450       }
 451    }
 452
 453    if (need_mul && need_add) {
 454       ureg_MAD( ureg,
 455                 dst,
 456                 swizzle_4v( src, src_swizzle ),
 457                 swizzle_4v( imm, mul_swizzle ),
 458                 swizzle_4v( imm, add_swizzle ) );
 459    }
 460    else if (need_mul) {
 461       ureg_MUL( ureg,
 462                 dst,
 463                 swizzle_4v( src, src_swizzle ),
 464                 swizzle_4v( imm, mul_swizzle ) );
 465    }
 466    else if (need_add) {
 467       ureg_MOV( ureg,
 468                 dst,
 469                 swizzle_4v( imm, add_swizzle ) );
 470    }
 471    else {
 472       debug_assert(0);
 473    }
 474
 475 #undef IMM_ZERO
 476 #undef IMM_ONE
 477 #undef IMM_NEG_ONE
 478 }
 479
 480
 481 /**
 482  * Negate the value of DDY to match GL semantics where (0,0) is the
 483  * lower-left corner of the window.
 484  * Note that the GL_ARB_fragment_coord_conventions extension will
 485  * effect this someday.
 486  */
 487 static void emit_ddy( struct st_translate *t,
 488                       struct ureg_dst dst,
 489                       const struct prog_src_register *SrcReg )
 490 {
 491    struct ureg_program *ureg = t->ureg;
 492    struct ureg_src src = translate_src( t, SrcReg );
 493    src = ureg_negate( src );
 494    ureg_DDY( ureg, dst, src );
 495 }
 496
 497
 498
 499 static unsigned
 500 translate_opcode( unsigned op )
 501 {
 502    switch( op ) {
 503    case OPCODE_ARL:
 504       return TGSI_OPCODE_ARL;
 505    case OPCODE_ABS:
 506       return TGSI_OPCODE_ABS;
 507    case OPCODE_ADD:
 508       return TGSI_OPCODE_ADD;
 509    case OPCODE_BGNLOOP:
 510       return TGSI_OPCODE_BGNLOOP;
 511    case OPCODE_BGNSUB:
 512       return TGSI_OPCODE_BGNSUB;
 513    case OPCODE_BRA:
 514       return TGSI_OPCODE_BRA;
 515    case OPCODE_BRK:
 516       return TGSI_OPCODE_BRK;
 517    case OPCODE_CAL:
 518       return TGSI_OPCODE_CAL;
 519    case OPCODE_CMP:
 520       return TGSI_OPCODE_CMP;
 521    case OPCODE_CONT:
 522       return TGSI_OPCODE_CONT;
 523    case OPCODE_COS:
 524       return TGSI_OPCODE_COS;
 525    case OPCODE_DDX:
 526       return TGSI_OPCODE_DDX;
 527    case OPCODE_DDY:
 528       return TGSI_OPCODE_DDY;
 529    case OPCODE_DP2:
 530       return TGSI_OPCODE_DP2;
 531    case OPCODE_DP2A:
 532       return TGSI_OPCODE_DP2A;
 533    case OPCODE_DP3:
 534       return TGSI_OPCODE_DP3;
 535    case OPCODE_DP4:
 536       return TGSI_OPCODE_DP4;
 537    case OPCODE_DPH:
 538       return TGSI_OPCODE_DPH;
 539    case OPCODE_DST:
 540       return TGSI_OPCODE_DST;
 541    case OPCODE_ELSE:
 542       return TGSI_OPCODE_ELSE;
 543    case OPCODE_EMIT_VERTEX:
 544       return TGSI_OPCODE_EMIT;
 545    case OPCODE_END_PRIMITIVE:
 546       return TGSI_OPCODE_ENDPRIM;
 547    case OPCODE_ENDIF:
 548       return TGSI_OPCODE_ENDIF;
 549    case OPCODE_ENDLOOP:
 550       return TGSI_OPCODE_ENDLOOP;
 551    case OPCODE_ENDSUB:
 552       return TGSI_OPCODE_ENDSUB;
 553    case OPCODE_EX2:
 554       return TGSI_OPCODE_EX2;
 555    case OPCODE_EXP:
 556       return TGSI_OPCODE_EXP;
 557    case OPCODE_FLR:
 558       return TGSI_OPCODE_FLR;
 559    case OPCODE_FRC:
 560       return TGSI_OPCODE_FRC;
 561    case OPCODE_IF:
 562       return TGSI_OPCODE_IF;
 563    case OPCODE_TRUNC:
 564       return TGSI_OPCODE_TRUNC;
 565    case OPCODE_KIL:
 566       return TGSI_OPCODE_KIL;
 567    case OPCODE_KIL_NV:
 568       return TGSI_OPCODE_KILP;
 569    case OPCODE_LG2:
 570       return TGSI_OPCODE_LG2;
 571    case OPCODE_LOG:
 572       return TGSI_OPCODE_LOG;
 573    case OPCODE_LIT:
 574       return TGSI_OPCODE_LIT;
 575    case OPCODE_LRP:
 576       return TGSI_OPCODE_LRP;
 577    case OPCODE_MAD:
 578       return TGSI_OPCODE_MAD;
 579    case OPCODE_MAX:
 580       return TGSI_OPCODE_MAX;
 581    case OPCODE_MIN:
 582       return TGSI_OPCODE_MIN;
 583    case OPCODE_MOV:
 584       return TGSI_OPCODE_MOV;
 585    case OPCODE_MUL:
 586       return TGSI_OPCODE_MUL;
 587    case OPCODE_NOP:
 588       return TGSI_OPCODE_NOP;
 589    case OPCODE_NRM3:
 590       return TGSI_OPCODE_NRM;
 591    case OPCODE_NRM4:
 592       return TGSI_OPCODE_NRM4;
 593    case OPCODE_POW:
 594       return TGSI_OPCODE_POW;
 595    case OPCODE_RCP:
 596       return TGSI_OPCODE_RCP;
 597    case OPCODE_RET:
 598       return TGSI_OPCODE_RET;
 599    case OPCODE_RSQ:
 600       return TGSI_OPCODE_RSQ;
 601    case OPCODE_SCS:
 602       return TGSI_OPCODE_SCS;
 603    case OPCODE_SEQ:
 604       return TGSI_OPCODE_SEQ;
 605    case OPCODE_SGE:
 606       return TGSI_OPCODE_SGE;
 607    case OPCODE_SGT:
 608       return TGSI_OPCODE_SGT;
 609    case OPCODE_SIN:
 610       return TGSI_OPCODE_SIN;
 611    case OPCODE_SLE:
 612       return TGSI_OPCODE_SLE;
 613    case OPCODE_SLT:
 614       return TGSI_OPCODE_SLT;
 615    case OPCODE_SNE:
 616       return TGSI_OPCODE_SNE;
 617    case OPCODE_SSG:
 618       return TGSI_OPCODE_SSG;
 619    case OPCODE_SUB:
 620       return TGSI_OPCODE_SUB;
 621    case OPCODE_TEX:
 622       return TGSI_OPCODE_TEX;
 623    case OPCODE_TXB:
 624       return TGSI_OPCODE_TXB;
 625    case OPCODE_TXD:
 626       return TGSI_OPCODE_TXD;
 627    case OPCODE_TXL:
 628       return TGSI_OPCODE_TXL;
 629    case OPCODE_TXP:
 630       return TGSI_OPCODE_TXP;
 631    case OPCODE_XPD:
 632       return TGSI_OPCODE_XPD;
 633    case OPCODE_END:
 634       return TGSI_OPCODE_END;
 635    default:
 636       debug_assert( 0 );
 637       return TGSI_OPCODE_NOP;
 638    }
 639 }
 640
 641
 642 static void
 643 compile_instruction(
 644    struct st_translate *t,
 645    const struct prog_instruction *inst )
 646 {
 647    struct ureg_program *ureg = t->ureg;
 648    GLuint i;
 649    struct ureg_dst dst[1];
 650    struct ureg_src src[4];
 651    unsigned num_dst;
 652    unsigned num_src;
 653
 654    num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
 655    num_src = _mesa_num_inst_src_regs( inst->Opcode );
 656
 657    if (num_dst)
 658       dst[0] = translate_dst( t,
 659                               &inst->DstReg,
 660                               inst->SaturateMode );
 661
 662    for (i = 0; i < num_src; i++)
 663       src[i] = translate_src( t, &inst->SrcReg[i] );
 664
 665    switch( inst->Opcode ) {
 666    case OPCODE_SWZ:
 667       emit_swz( t, dst[0], &inst->SrcReg[0] );
 668       return;
 669
 670    case OPCODE_BGNLOOP:
 671    case OPCODE_CAL:
 672    case OPCODE_ELSE:
 673    case OPCODE_ENDLOOP:
 674    case OPCODE_IF:
 675       debug_assert(num_dst == 0);
 676       ureg_label_insn( ureg,
 677                        translate_opcode( inst->Opcode ),
 678                        src, num_src,
 679                        get_label( t, inst->BranchTarget ));
 680       return;
 681
 682    case OPCODE_TEX:
 683    case OPCODE_TXB:
 684    case OPCODE_TXD:
 685    case OPCODE_TXL:
 686    case OPCODE_TXP:
 687       src[num_src++] = t->samplers[inst->TexSrcUnit];
 688       ureg_tex_insn( ureg,
 689                      translate_opcode( inst->Opcode ),
 690                      dst, num_dst,
 691                      translate_texture_target( inst->TexSrcTarget,
 692                                                inst->TexShadow ),
 693                      src, num_src );
 694       return;
 695
 696    case OPCODE_SCS:
 697       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
 698       ureg_insn( ureg,
 699                  translate_opcode( inst->Opcode ),
 700                  dst, num_dst,
 701                  src, num_src );
 702       break;
 703
 704    case OPCODE_XPD:
 705       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
 706       ureg_insn( ureg,
 707                  translate_opcode( inst->Opcode ),
 708                  dst, num_dst,
 709                  src, num_src );
 710       break;
 711
 712    case OPCODE_NOISE1:
 713    case OPCODE_NOISE2:
 714    case OPCODE_NOISE3:
 715    case OPCODE_NOISE4:
 716       /* At some point, a motivated person could add a better
 717        * implementation of noise.  Currently not even the nvidia
 718        * binary drivers do anything more than this.  In any case, the
 719        * place to do this is in the GL state tracker, not the poor
 720        * driver.
 721        */
 722       ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
 723       break;
 724
 725    case OPCODE_DDY:
 726       emit_ddy( t, dst[0], &inst->SrcReg[0] );
 727       break;
 728
 729    default:
 730       ureg_insn( ureg,
 731                  translate_opcode( inst->Opcode ),
 732                  dst, num_dst,
 733                  src, num_src );
 734       break;
 735    }
 736 }
 737
 738
 739 /**
 740  * Emit the TGSI instructions to adjust the WPOS pixel center convention
 741  */
 742 static void
 743 emit_adjusted_wpos( struct st_translate *t,
 744                     const struct gl_program *program, GLfloat value)
 745 {
 746    struct ureg_program *ureg = t->ureg;
 747    struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
 748    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
 749
 750    /* Note that we bias X and Y and pass Z and W through unchanged.
 751     * The shader might also use gl_FragCoord.w and .z.
 752     */
 753    ureg_ADD(ureg, wpos_temp, wpos_input,
 754             ureg_imm4f(ureg, value, value, 0.0f, 0.0f));
 755
 756    t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
 757 }
 758
 759
 760 /**
 761  * Emit the TGSI instructions for inverting the WPOS y coordinate.
 762  */
 763 static void
 764 emit_inverted_wpos( struct st_translate *t,
 765                     const struct gl_program *program )
 766 {
 767    struct ureg_program *ureg = t->ureg;
 768
 769    /* Fragment program uses fragment position input.
 770     * Need to replace instances of INPUT[WPOS] with temp T
 771     * where T = INPUT[WPOS] by y is inverted.
 772     */
 773    static const gl_state_index winSizeState[STATE_LENGTH]
 774       = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
 775
 776    /* XXX: note we are modifying the incoming shader here!  Need to
 777     * do this before emitting the constant decls below, or this
 778     * will be missed:
 779     */
 780    unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
 781                                                        winSizeState);
 782
 783    struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
 784    struct ureg_dst wpos_temp;
 785    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
 786
 787    /* MOV wpos_temp, input[wpos]
 788     */
 789    if (wpos_input.File == TGSI_FILE_TEMPORARY)
 790       wpos_temp = ureg_dst(wpos_input);
 791    else {
 792       wpos_temp = ureg_DECL_temporary( ureg );
 793       ureg_MOV( ureg, wpos_temp, wpos_input );
 794    }
 795
 796    /* SUB wpos_temp.y, winsize_const, wpos_input
 797     */
 798    ureg_SUB( ureg,
 799              ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
 800              winsize,
 801              wpos_input);
 802
 803    /* Use wpos_temp as position input from here on:
 804     */
 805    t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
 806 }
 807
 808
 809 /**
 810  * Emit fragment position/ooordinate code.
 811  */
 812 static void
 813 emit_wpos(struct st_context *st,
 814           struct st_translate *t,
 815           const struct gl_program *program,
 816           struct ureg_program *ureg)
 817 {
 818    const struct gl_fragment_program *fp =
 819       (const struct gl_fragment_program *) program;
 820    struct pipe_screen *pscreen = st->pipe->screen;
 821    boolean invert = FALSE;
 822
 823    if (fp->OriginUpperLeft) {
 824       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
 825       }
 826       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
 827          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
 828          invert = TRUE;
 829       }
 830       else
 831          assert(0);
 832    }
 833    else {
 834       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
 835          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
 836       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
 837          invert = TRUE;
 838       else
 839          assert(0);
 840    }
 841
 842    if (fp->PixelCenterInteger) {
 843       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
 844          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
 845       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
 846          emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
 847       else
 848          assert(0);
 849    }
 850    else {
 851       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
 852       }
 853       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
 854          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
 855          emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
 856       }
 857       else
 858          assert(0);
 859    }
 860
 861    /* we invert after adjustment so that we avoid the MOV to temporary,
 862     * and reuse the adjustment ADD instead */
 863    if (invert)
 864       emit_inverted_wpos(t, program);
 865 }
 866
 867
 868 /**
 869  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
 870  * TGSI uses +1 for front, -1 for back.
 871  * This function converts the TGSI value to the GL value.  Simply clamping/
 872  * saturating the value to [0,1] does the job.
 873  */
 874 static void
 875 emit_face_var( struct st_translate *t,
 876                const struct gl_program *program )
 877 {
 878    struct ureg_program *ureg = t->ureg;
 879    struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
 880    struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
 881
 882    /* MOV_SAT face_temp, input[face]
 883     */
 884    face_temp = ureg_saturate( face_temp );
 885    ureg_MOV( ureg, face_temp, face_input );
 886
 887    /* Use face_temp as face input from here on:
 888     */
 889    t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
 890 }
 891
 892
 893 static void
 894 emit_edgeflags( struct st_translate *t,
 895                  const struct gl_program *program )
 896 {
 897    struct ureg_program *ureg = t->ureg;
 898    struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
 899    struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
 900
 901    ureg_MOV( ureg, edge_dst, edge_src );
 902 }
 903
 904
 905 /**
 906  * Translate Mesa program to TGSI format.
 907  * \param program  the program to translate
 908  * \param numInputs  number of input registers used
 909  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
 910  *                      input indexes
 911  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
 912  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
 913  *                            each input
 914  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
 915  * \param numOutputs  number of output registers used
 916  * \param outputMapping  maps Mesa fragment program outputs to TGSI
 917  *                       generic outputs
 918  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
 919  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
 920  *                             each output
 921  *
 922  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
 923  */
 924 enum pipe_error
 925 st_translate_mesa_program(
 926    GLcontext *ctx,
 927    uint procType,
 928    struct ureg_program *ureg,
 929    const struct gl_program *program,
 930    GLuint numInputs,
 931    const GLuint inputMapping[],
 932    const ubyte inputSemanticName[],
 933    const ubyte inputSemanticIndex[],
 934    const GLuint interpMode[],
 935    GLuint numOutputs,
 936    const GLuint outputMapping[],
 937    const ubyte outputSemanticName[],
 938    const ubyte outputSemanticIndex[],
 939    boolean passthrough_edgeflags )
 940 {
 941    struct st_translate translate, *t;
 942    unsigned i;
 943    enum pipe_error ret = PIPE_OK;
 944
 945    assert(numInputs <= Elements(t->inputs));
 946    assert(numOutputs <= Elements(t->outputs));
 947
 948    t = &translate;
 949    memset(t, 0, sizeof *t);
 950
 951    t->procType = procType;
 952    t->inputMapping = inputMapping;
 953    t->outputMapping = outputMapping;
 954    t->ureg = ureg;
 955    t->pointSizeOutIndex = -1;
 956    t->prevInstWrotePointSize = GL_FALSE;
 957
 958    /*_mesa_print_program(program);*/
 959
 960    /*
 961     * Declare input attributes.
 962     */
 963    if (procType == TGSI_PROCESSOR_FRAGMENT) {
 964       for (i = 0; i < numInputs; i++) {
 965          if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
 966             t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
 967                                                   inputSemanticName[i],
 968                                                   inputSemanticIndex[i],
 969                                                   interpMode[i],
 970                                                   TGSI_CYLINDRICAL_WRAP_X);
 971          }
 972          else {
 973             t->inputs[i] = ureg_DECL_fs_input(ureg,
 974                                               inputSemanticName[i],
 975                                               inputSemanticIndex[i],
 976                                               interpMode[i]);
 977          }
 978       }
 979
 980       if (program->InputsRead & FRAG_BIT_WPOS) {
 981          /* Must do this after setting up t->inputs, and before
 982           * emitting constant references, below:
 983           */
 984          emit_wpos(st_context(ctx), t, program, ureg);
 985       }
 986
 987       if (program->InputsRead & FRAG_BIT_FACE) {
 988          emit_face_var( t, program );
 989       }
 990
 991       /*
 992        * Declare output attributes.
 993        */
 994       for (i = 0; i < numOutputs; i++) {
 995          switch (outputSemanticName[i]) {
 996          case TGSI_SEMANTIC_POSITION:
 997             t->outputs[i] = ureg_DECL_output( ureg,
 998                                               TGSI_SEMANTIC_POSITION, /* Z / Depth */
 999                                               outputSemanticIndex[i] );
1000
1001             t->outputs[i] = ureg_writemask( t->outputs[i],
1002                                             TGSI_WRITEMASK_Z );
1003             break;
1004          case TGSI_SEMANTIC_COLOR:
1005             t->outputs[i] = ureg_DECL_output( ureg,
1006                                               TGSI_SEMANTIC_COLOR,
1007                                               outputSemanticIndex[i] );
1008             break;
1009          default:
1010             debug_assert(0);
1011             return 0;
1012          }
1013       }
1014    }
1015    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
1016       for (i = 0; i < numInputs; i++) {
1017          t->inputs[i] = ureg_DECL_gs_input(ureg,
1018                                            i,
1019                                            inputSemanticName[i],
1020                                            inputSemanticIndex[i]);
1021       }
1022
1023       for (i = 0; i < numOutputs; i++) {
1024          t->outputs[i] = ureg_DECL_output( ureg,
1025                                            outputSemanticName[i],
1026                                            outputSemanticIndex[i] );
1027       }
1028    }
1029    else {
1030       assert(procType == TGSI_PROCESSOR_VERTEX);
1031
1032       for (i = 0; i < numInputs; i++) {
1033          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
1034       }
1035
1036       for (i = 0; i < numOutputs; i++) {
1037          t->outputs[i] = ureg_DECL_output( ureg,
1038                                            outputSemanticName[i],
1039                                            outputSemanticIndex[i] );
1040          if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
1041             /* Writing to the point size result register requires special
1042              * handling to implement clamping.
1043              */
1044             static const gl_state_index pointSizeClampState[STATE_LENGTH]
1045                = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
1046                /* XXX: note we are modifying the incoming shader here!  Need to
1047                * do this before emitting the constant decls below, or this
1048                * will be missed:
1049                */
1050             unsigned pointSizeClampConst =
1051                _mesa_add_state_reference(program->Parameters,
1052                                          pointSizeClampState);
1053             struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
1054             t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
1055             t->pointSizeResult = t->outputs[i];
1056             t->pointSizeOutIndex = i;
1057             t->outputs[i] = psizregtemp;
1058          }
1059       }
1060       if (passthrough_edgeflags)
1061          emit_edgeflags( t, program );
1062    }
1063
1064    /* Declare address register.
1065     */
1066    if (program->NumAddressRegs > 0) {
1067       debug_assert( program->NumAddressRegs == 1 );
1068       t->address[0] = ureg_DECL_address( ureg );
1069    }
1070
1071    if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
1072       /* If temps are accessed with indirect addressing, declare temporaries
1073        * in sequential order.  Else, we declare them on demand elsewhere.
1074        */
1075       for (i = 0; i < program->NumTemporaries; i++) {
1076          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
1077          t->temps[i] = ureg_DECL_temporary( t->ureg );
1078       }
1079    }
1080
1081    /* Emit constants and immediates.  Mesa uses a single index space
1082     * for these, so we put all the translated regs in t->constants.
1083     */
1084    if (program->Parameters) {
1085       t->constants = CALLOC( program->Parameters->NumParameters,
1086                              sizeof t->constants[0] );
1087       if (t->constants == NULL) {
1088          ret = PIPE_ERROR_OUT_OF_MEMORY;
1089          goto out;
1090       }
1091
1092       for (i = 0; i < program->Parameters->NumParameters; i++) {
1093          switch (program->Parameters->Parameters[i].Type) {
1094          case PROGRAM_ENV_PARAM:
1095          case PROGRAM_LOCAL_PARAM:
1096          case PROGRAM_STATE_VAR:
1097          case PROGRAM_NAMED_PARAM:
1098          case PROGRAM_UNIFORM:
1099             t->constants[i] = ureg_DECL_constant( ureg, i );
1100             break;
1101
1102             /* Emit immediates only when there's no indirect addressing of
1103              * the const buffer.
1104              * FIXME: Be smarter and recognize param arrays:
1105              * indirect addressing is only valid within the referenced
1106              * array.
1107              */
1108          case PROGRAM_CONSTANT:
1109             if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
1110                t->constants[i] = ureg_DECL_constant( ureg, i );
1111             else
1112                t->constants[i] =
1113                   ureg_DECL_immediate( ureg,
1114                                        program->Parameters->ParameterValues[i],
1115                                        4 );
1116             break;
1117          default:
1118             break;
1119          }
1120       }
1121    }
1122
1123    /* texture samplers */
1124    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1125       if (program->SamplersUsed & (1 << i)) {
1126          t->samplers[i] = ureg_DECL_sampler( ureg, i );
1127       }
1128    }
1129
1130    /* Emit each instruction in turn:
1131     */
1132    for (i = 0; i < program->NumInstructions; i++) {
1133       set_insn_start( t, ureg_get_instruction_number( ureg ));
1134       compile_instruction( t, &program->Instructions[i] );
1135
1136       if (t->prevInstWrotePointSize && program->Id) {
1137          /* The previous instruction wrote to the (fake) vertex point size
1138           * result register.  Now we need to clamp that value to the min/max
1139           * point size range, putting the result into the real point size
1140           * register.
1141           * Note that we can't do this easily at the end of program due to
1142           * possible early return.
1143           */
1144          set_insn_start( t, ureg_get_instruction_number( ureg ));
1145          ureg_MAX( t->ureg,
1146                    ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
1147                    ureg_src(t->outputs[t->pointSizeOutIndex]),
1148                    ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1149          ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
1150                    ureg_src(t->outputs[t->pointSizeOutIndex]),
1151                    ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1152       }
1153       t->prevInstWrotePointSize = GL_FALSE;
1154    }
1155
1156    /* Fix up all emitted labels:
1157     */
1158    for (i = 0; i < t->labels_count; i++) {
1159       ureg_fixup_label( ureg,
1160                         t->labels[i].token,
1161                         t->insn[t->labels[i].branch_target] );
1162    }
1163
1164 out:
1165    FREE(t->insn);
1166    FREE(t->labels);
1167    FREE(t->constants);
1168
1169    if (t->error) {
1170       debug_printf("%s: translate error flag set\n", __FUNCTION__);
1171    }
1172
1173    return ret;
1174 }
1175
1176
1177 /**
1178  * Tokens cannot be free with free otherwise the builtin gallium
1179  * malloc debugging will get confused.
1180  */
1181 void
1182 st_free_tokens(const struct tgsi_token *tokens)
1183 {
1184    FREE((void *)tokens);
1185 }