src/gallium/drivers/svga/svga_tgsi_vgpu10.c

   1 /**********************************************************
   2  * Copyright 1998-2013 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26 /**
  27  * @file svga_tgsi_vgpu10.c
  28  *
  29  * TGSI -> VGPU10 shader translation.
  30  *
  31  * \author Mingcheng Chen
  32  * \author Brian Paul
  33  */
  34
  35 #include "pipe/p_compiler.h"
  36 #include "pipe/p_shader_tokens.h"
  37 #include "pipe/p_defines.h"
  38 #include "tgsi/tgsi_build.h"
  39 #include "tgsi/tgsi_dump.h"
  40 #include "tgsi/tgsi_info.h"
  41 #include "tgsi/tgsi_parse.h"
  42 #include "tgsi/tgsi_scan.h"
  43 #include "tgsi/tgsi_strings.h"
  44 #include "tgsi/tgsi_two_side.h"
  45 #include "tgsi/tgsi_aa_point.h"
  46 #include "tgsi/tgsi_util.h"
  47 #include "util/u_math.h"
  48 #include "util/u_memory.h"
  49 #include "util/u_bitmask.h"
  50 #include "util/u_debug.h"
  51 #include "util/u_pstipple.h"
  52
  53 #include "svga_context.h"
  54 #include "svga_debug.h"
  55 #include "svga_link.h"
  56 #include "svga_shader.h"
  57 #include "svga_tgsi.h"
  58
  59 #include "VGPU10ShaderTokens.h"
  60
  61
  62 #define INVALID_INDEX 99999
  63 #define MAX_INTERNAL_TEMPS 3
  64 #define MAX_SYSTEM_VALUES 4
  65 #define MAX_IMMEDIATE_COUNT \
  66         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
  67 #define MAX_TEMP_ARRAYS 64  /* Enough? */
  68
  69
  70 /**
  71  * Clipping is complicated.  There's four different cases which we
  72  * handle during VS/GS shader translation:
  73  */
  74 enum clipping_mode
  75 {
  76    CLIP_NONE,     /**< No clipping enabled */
  77    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
  78                    * one or more user-defined clip planes are enabled.  We
  79                    * generate extra code to emit clip distances.
  80                    */
  81    CLIP_DISTANCE, /**< The shader already declares clip distance output
  82                    * registers and has code to write to them.
  83                    */
  84    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
  85                   * has code that writes to the register.  We convert the
  86                   * clipvertex position into one or more clip distances.
  87                   */
  88 };
  89
  90
  91 /* Shader signature info */
  92 struct svga_shader_signature
  93 {
  94    SVGA3dDXShaderSignatureHeader header;
  95    SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
  96    SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
  97    SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
  98 };
  99
 100 static inline void
 101 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
 102                            unsigned index,
 103                            SVGA3dDXSignatureSemanticName sgnName,
 104                            unsigned mask,
 105                            SVGA3dDXSignatureRegisterComponentType compType,
 106                            SVGA3dDXSignatureMinPrecision minPrecision)
 107 {
 108    e->registerIndex = index;
 109    e->semanticName = sgnName;
 110    e->mask = mask;
 111    e->componentType = compType;
 112    e->minPrecision = minPrecision;
 113 };
 114
 115 static const SVGA3dDXSignatureSemanticName
 116 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
 117    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
 118    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 119    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 120    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 121    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 122    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 123    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 124    SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
 125    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 126    SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
 127    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
 128    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
 129    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 130    SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
 131    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 132    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 133    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 134    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 135    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 136    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 137    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 138    SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
 139    SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
 140    SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
 141    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 142    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 143    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
 144    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
 145    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 146    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 147    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 148    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 149    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 150    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 151    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 152    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 153    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 154    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 155    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 156    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 157    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 158    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 159    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 160    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
 161    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
 162 };
 163
 164
 165 /**
 166  * Map tgsi semantic name to SVGA signature semantic name
 167  */
 168 static inline SVGA3dDXSignatureSemanticName
 169 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
 170 {
 171    assert(name < TGSI_SEMANTIC_COUNT);
 172
 173    /* Do a few asserts here to spot check the mapping */
 174    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
 175           SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
 176    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
 177           SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
 178    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
 179           SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
 180
 181    return tgsi_semantic_to_sgn_name[name];
 182 }
 183
 184
 185 struct svga_shader_emitter_v10
 186 {
 187    /* The token output buffer */
 188    unsigned size;
 189    char *buf;
 190    char *ptr;
 191
 192    /* Information about the shader and state (does not change) */
 193    struct svga_compile_key key;
 194    struct tgsi_shader_info info;
 195    unsigned unit;
 196    unsigned version; /**< Either 40 or 41 at this time */
 197
 198    unsigned cur_tgsi_token;     /**< current tgsi token position */
 199    unsigned inst_start_token;
 200    boolean discard_instruction; /**< throw away current instruction? */
 201    boolean reemit_instruction;  /**< reemit current instruction */
 202    boolean skip_instruction;    /**< skip current instruction */
 203
 204    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
 205    double (*immediates_dbl)[2];
 206    unsigned num_immediates;      /**< Number of immediates emitted */
 207    unsigned common_immediate_pos[10];  /**< literals for common immediates */
 208    unsigned num_common_immediates;
 209    boolean immediates_emitted;
 210
 211    unsigned num_outputs;      /**< include any extra outputs */
 212                               /**  The first extra output is reserved for
 213                                *   non-adjusted vertex position for
 214                                *   stream output purpose
 215                                */
 216
 217    /* Temporary Registers */
 218    unsigned num_shader_temps; /**< num of temps used by original shader */
 219    unsigned internal_temp_count;  /**< currently allocated internal temps */
 220    struct {
 221       unsigned start, size;
 222    } temp_arrays[MAX_TEMP_ARRAYS];
 223    unsigned num_temp_arrays;
 224
 225    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
 226    struct {
 227       unsigned arrayId, index;
 228       boolean initialized;
 229    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
 230
 231    unsigned initialize_temp_index;
 232
 233    /** Number of constants used by original shader for each constant buffer.
 234     * The size should probably always match with that of svga_state.constbufs.
 235     */
 236    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
 237
 238    /* Samplers */
 239    unsigned num_samplers;
 240    boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
 241    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
 242    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
 243
 244    /* Index Range declaration */
 245    struct {
 246       unsigned start_index;
 247       unsigned count;
 248       boolean required;
 249       unsigned operandType;
 250       unsigned size;
 251       unsigned dim;
 252    } index_range;
 253
 254    /* Address regs (really implemented with temps) */
 255    unsigned num_address_regs;
 256    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
 257
 258    /* Output register usage masks */
 259    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
 260
 261    /* To map TGSI system value index to VGPU shader input indexes */
 262    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
 263
 264    struct {
 265       /* vertex position scale/translation */
 266       unsigned out_index;  /**< the real position output reg */
 267       unsigned tmp_index;  /**< the fake/temp position output reg */
 268       unsigned so_index;   /**< the non-adjusted position output reg */
 269       unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
 270       unsigned prescale_scale_index, prescale_trans_index;
 271       unsigned num_prescale;      /* number of prescale factor in const buf */
 272       unsigned viewport_index;
 273       unsigned need_prescale:1;
 274       unsigned have_prescale:1;
 275    } vposition;
 276
 277    /* For vertex shaders only */
 278    struct {
 279       /* viewport constant */
 280       unsigned viewport_index;
 281
 282       unsigned vertex_id_bias_index;
 283       unsigned vertex_id_sys_index;
 284       unsigned vertex_id_tmp_index;
 285
 286       /* temp index of adjusted vertex attributes */
 287       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
 288    } vs;
 289
 290    /* For fragment shaders only */
 291    struct {
 292       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
 293       unsigned num_color_outputs;
 294       unsigned color_tmp_index;  /**< fake/temp color output reg */
 295       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
 296
 297       /* front-face */
 298       unsigned face_input_index; /**< real fragment shader face reg (bool) */
 299       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
 300
 301       unsigned pstipple_sampler_unit;
 302
 303       unsigned fragcoord_input_index;  /**< real fragment position input reg */
 304       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
 305
 306       unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
 307
 308       unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
 309       unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
 310
 311       /** TGSI index of sample mask input sys value */
 312       unsigned sample_mask_in_sys_index;
 313
 314       /** Which texture units are doing shadow comparison in the FS code */
 315       unsigned shadow_compare_units;
 316
 317       /* layer */
 318       unsigned layer_input_index;    /**< TGSI index of layer */
 319       unsigned layer_imm_index;      /**< immediate for default layer 0 */
 320    } fs;
 321
 322    /* For geometry shaders only */
 323    struct {
 324       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
 325       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
 326       unsigned input_size;       /**< size of input arrays */
 327       unsigned prim_id_index;    /**< primitive id register index */
 328       unsigned max_out_vertices; /**< maximum number of output vertices */
 329       unsigned invocations;
 330       unsigned invocation_id_sys_index;
 331
 332       unsigned viewport_index_out_index;
 333       unsigned viewport_index_tmp_index;
 334    } gs;
 335
 336    /* For tessellation control shaders only */
 337    struct {
 338       unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
 339       unsigned imm_index;                    /**< immediate for tcs */
 340       unsigned invocation_id_sys_index;      /**< invocation id */
 341       unsigned invocation_id_tmp_index;
 342       unsigned instruction_token_pos;        /* token pos for the first instruction */
 343       unsigned control_point_input_index;    /* control point input register index */
 344       unsigned control_point_addr_index;     /* control point input address register */
 345       unsigned control_point_out_index;      /* control point output register index */
 346       unsigned control_point_tmp_index;      /* control point temporary register */
 347       unsigned control_point_out_count;      /* control point output count */
 348       boolean  control_point_phase;          /* true if in control point phase */
 349       boolean  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
 350       unsigned patch_generic_out_count;      /* per-patch generic output count */
 351       unsigned patch_generic_out_index;      /* per-patch generic output register index*/
 352       unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
 353       unsigned prim_id_index;                /* primitive id */
 354       struct {
 355          unsigned out_index;      /* real tessinner output register */
 356          unsigned temp_index;     /* tessinner temp register */
 357          unsigned tgsi_index;     /* tgsi tessinner output register */
 358       } inner;
 359       struct {
 360          unsigned out_index;      /* real tessouter output register */
 361          unsigned temp_index;     /* tessouter temp register */
 362          unsigned tgsi_index;     /* tgsi tessouter output register */
 363       } outer;
 364    } tcs;
 365
 366    /* For tessellation evaluation shaders only */
 367    struct {
 368       enum pipe_prim_type prim_mode;
 369       enum pipe_tess_spacing spacing;
 370       boolean vertices_order_cw;
 371       boolean point_mode;
 372       unsigned tesscoord_sys_index;
 373       unsigned prim_id_index;                /* primitive id */
 374       struct {
 375          unsigned in_index;       /* real tessinner input register */
 376          unsigned temp_index;     /* tessinner temp register */
 377          unsigned tgsi_index;     /* tgsi tessinner input register */
 378       } inner;
 379       struct {
 380          unsigned in_index;       /* real tessouter input register */
 381          unsigned temp_index;     /* tessouter temp register */
 382          unsigned tgsi_index;     /* tgsi tessouter input register */
 383       } outer;
 384    } tes;
 385
 386    /* For vertex or geometry shaders */
 387    enum clipping_mode clip_mode;
 388    unsigned clip_dist_out_index; /**< clip distance output register index */
 389    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
 390    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
 391
 392    /** Index of temporary holding the clipvertex coordinate */
 393    unsigned clip_vertex_out_index; /**< clip vertex output register index */
 394    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
 395
 396    /* user clip plane constant slot indexes */
 397    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
 398
 399    unsigned num_output_writes;
 400    boolean constant_color_output;
 401
 402    boolean uses_flat_interp;
 403
 404    unsigned reserved_token;        /* index to the reserved token */
 405    boolean uses_precise_qualifier;
 406
 407    /* For all shaders: const reg index for RECT coord scaling */
 408    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
 409
 410    /* For all shaders: const reg index for texture buffer size */
 411    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
 412
 413    /* VS/TCS/TES/GS/FS Linkage info */
 414    struct shader_linkage linkage;
 415    struct tgsi_shader_info *prevShaderInfo;
 416
 417    /* Shader signature */
 418    struct svga_shader_signature signature;
 419
 420    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
 421
 422    /* For pipe_debug_message */
 423    struct pipe_debug_callback svga_debug_callback;
 424
 425    /* current loop depth in shader */
 426    unsigned current_loop_depth;
 427 };
 428
 429
 430 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
 431 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
 432 static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
 433 static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
 434 static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
 435 static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
 436 static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
 437 static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
 438 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
 439
 440 static boolean
 441 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
 442
 443 static boolean
 444 emit_vertex(struct svga_shader_emitter_v10 *emit,
 445             const struct tgsi_full_instruction *inst);
 446
 447 static boolean
 448 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
 449                         unsigned inst_number,
 450                         const struct tgsi_full_instruction *inst);
 451
 452 static void
 453 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
 454                        unsigned opcodeType, unsigned operandType,
 455                        unsigned dim, unsigned index, unsigned size,
 456                        unsigned name, unsigned numComp,
 457                        unsigned selMode, unsigned usageMask,
 458                        unsigned interpMode,
 459                        boolean addSignature,
 460                        SVGA3dDXSignatureSemanticName sgnName);
 461
 462 static void
 463 create_temp_array(struct svga_shader_emitter_v10 *emit,
 464                   unsigned arrayID, unsigned first, unsigned count,
 465                   unsigned startIndex);
 466
 467 static char err_buf[128];
 468
 469 static boolean
 470 expand(struct svga_shader_emitter_v10 *emit)
 471 {
 472    char *new_buf;
 473    unsigned newsize = emit->size * 2;
 474
 475    if (emit->buf != err_buf)
 476       new_buf = REALLOC(emit->buf, emit->size, newsize);
 477    else
 478       new_buf = NULL;
 479
 480    if (!new_buf) {
 481       emit->ptr = err_buf;
 482       emit->buf = err_buf;
 483       emit->size = sizeof(err_buf);
 484       return FALSE;
 485    }
 486
 487    emit->size = newsize;
 488    emit->ptr = new_buf + (emit->ptr - emit->buf);
 489    emit->buf = new_buf;
 490    return TRUE;
 491 }
 492
 493 /**
 494  * Create and initialize a new svga_shader_emitter_v10 object.
 495  */
 496 static struct svga_shader_emitter_v10 *
 497 alloc_emitter(void)
 498 {
 499    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
 500
 501    if (!emit)
 502       return NULL;
 503
 504    /* to initialize the output buffer */
 505    emit->size = 512;
 506    if (!expand(emit)) {
 507       FREE(emit);
 508       return NULL;
 509    }
 510    return emit;
 511 }
 512
 513 /**
 514  * Free an svga_shader_emitter_v10 object.
 515  */
 516 static void
 517 free_emitter(struct svga_shader_emitter_v10 *emit)
 518 {
 519    assert(emit);
 520    FREE(emit->buf);    /* will be NULL if translation succeeded */
 521    FREE(emit);
 522 }
 523
 524 static inline boolean
 525 reserve(struct svga_shader_emitter_v10 *emit,
 526         unsigned nr_dwords)
 527 {
 528    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
 529       if (!expand(emit))
 530          return FALSE;
 531    }
 532
 533    return TRUE;
 534 }
 535
 536 static boolean
 537 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
 538 {
 539    if (!reserve(emit, 1))
 540       return FALSE;
 541
 542    *(uint32 *)emit->ptr = dword;
 543    emit->ptr += sizeof dword;
 544    return TRUE;
 545 }
 546
 547 static boolean
 548 emit_dwords(struct svga_shader_emitter_v10 *emit,
 549             const uint32 *dwords,
 550             unsigned nr)
 551 {
 552    if (!reserve(emit, nr))
 553       return FALSE;
 554
 555    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
 556    emit->ptr += nr * sizeof *dwords;
 557    return TRUE;
 558 }
 559
 560 /** Return the number of tokens in the emitter's buffer */
 561 static unsigned
 562 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
 563 {
 564    return (emit->ptr - emit->buf) / sizeof(unsigned);
 565 }
 566
 567
 568 /**
 569  * Check for register overflow.  If we overflow we'll set an
 570  * error flag.  This function can be called for register declarations
 571  * or use as src/dst instruction operands.
 572  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
 573                 or VGPU10_OPCODE_DCL_x
 574  * \param index  the register index
 575  */
 576 static void
 577 check_register_index(struct svga_shader_emitter_v10 *emit,
 578                      unsigned operandType, unsigned index)
 579 {
 580    bool overflow_before = emit->register_overflow;
 581
 582    switch (operandType) {
 583    case VGPU10_OPERAND_TYPE_TEMP:
 584    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
 585    case VGPU10_OPCODE_DCL_TEMPS:
 586       if (index >= VGPU10_MAX_TEMPS) {
 587          emit->register_overflow = TRUE;
 588       }
 589       break;
 590    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
 591    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
 592       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
 593          emit->register_overflow = TRUE;
 594       }
 595       break;
 596    case VGPU10_OPERAND_TYPE_INPUT:
 597    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
 598    case VGPU10_OPCODE_DCL_INPUT:
 599    case VGPU10_OPCODE_DCL_INPUT_SGV:
 600    case VGPU10_OPCODE_DCL_INPUT_SIV:
 601    case VGPU10_OPCODE_DCL_INPUT_PS:
 602    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
 603    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
 604       if ((emit->unit == PIPE_SHADER_VERTEX &&
 605            index >= VGPU10_MAX_VS_INPUTS) ||
 606           (emit->unit == PIPE_SHADER_GEOMETRY &&
 607            index >= VGPU10_MAX_GS_INPUTS) ||
 608           (emit->unit == PIPE_SHADER_FRAGMENT &&
 609            index >= VGPU10_MAX_FS_INPUTS) ||
 610           (emit->unit == PIPE_SHADER_TESS_CTRL &&
 611            index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
 612           (emit->unit == PIPE_SHADER_TESS_EVAL &&
 613            index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
 614          emit->register_overflow = TRUE;
 615       }
 616       break;
 617    case VGPU10_OPERAND_TYPE_OUTPUT:
 618    case VGPU10_OPCODE_DCL_OUTPUT:
 619    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
 620    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
 621       /* Note: we are skipping two output indices in tcs for
 622        * tessinner/outer levels. Implementation will not exceed
 623        * number of output count but it allows index to go beyond
 624        * VGPU11_MAX_HS_OUTPUTS.
 625        * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
 626        */
 627       if ((emit->unit == PIPE_SHADER_VERTEX &&
 628            index >= VGPU10_MAX_VS_OUTPUTS) ||
 629           (emit->unit == PIPE_SHADER_GEOMETRY &&
 630            index >= VGPU10_MAX_GS_OUTPUTS) ||
 631           (emit->unit == PIPE_SHADER_FRAGMENT &&
 632            index >= VGPU10_MAX_FS_OUTPUTS) ||
 633           (emit->unit == PIPE_SHADER_TESS_CTRL &&
 634            index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
 635           (emit->unit == PIPE_SHADER_TESS_EVAL &&
 636            index >= VGPU11_MAX_DS_OUTPUTS)) {
 637          emit->register_overflow = TRUE;
 638       }
 639       break;
 640    case VGPU10_OPERAND_TYPE_SAMPLER:
 641    case VGPU10_OPCODE_DCL_SAMPLER:
 642       if (index >= VGPU10_MAX_SAMPLERS) {
 643          emit->register_overflow = TRUE;
 644       }
 645       break;
 646    case VGPU10_OPERAND_TYPE_RESOURCE:
 647    case VGPU10_OPCODE_DCL_RESOURCE:
 648       if (index >= VGPU10_MAX_RESOURCES) {
 649          emit->register_overflow = TRUE;
 650       }
 651       break;
 652    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
 653       if (index >= MAX_IMMEDIATE_COUNT) {
 654          emit->register_overflow = TRUE;
 655       }
 656       break;
 657    case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
 658       /* nothing */
 659       break;
 660    default:
 661       assert(0);
 662       ; /* nothing */
 663    }
 664
 665    if (emit->register_overflow && !overflow_before) {
 666       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
 667                    operandType, index);
 668    }
 669 }
 670
 671
 672 /**
 673  * Examine misc state to determine the clipping mode.
 674  */
 675 static void
 676 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
 677 {
 678    /* num_written_clipdistance in the shader info for tessellation
 679     * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
 680     * is not defined for this shader. So we go through all the output declarations
 681     * to set the num_written_clipdistance. This is just to determine the
 682     * clipping mode.
 683     */
 684    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
 685       unsigned i;
 686       for (i = 0; i < emit->info.num_outputs; i++) {
 687          if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
 688             emit->info.num_written_clipdistance =
 689                4 * (emit->info.output_semantic_index[i] + 1);
 690          }
 691       }
 692    }
 693
 694    if (emit->info.num_written_clipdistance > 0) {
 695       emit->clip_mode = CLIP_DISTANCE;
 696    }
 697    else if (emit->info.writes_clipvertex) {
 698       emit->clip_mode = CLIP_VERTEX;
 699    }
 700    else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
 701       /*
 702        * Only the last shader in the vertex processing stage needs to
 703        * handle the legacy clip mode.
 704        */
 705       emit->clip_mode = CLIP_LEGACY;
 706    }
 707    else {
 708       emit->clip_mode = CLIP_NONE;
 709    }
 710 }
 711
 712
 713 /**
 714  * For clip distance register declarations and clip distance register
 715  * writes we need to mask the declaration usage or instruction writemask
 716  * (respectively) against the set of the really-enabled clipping planes.
 717  *
 718  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
 719  * has a VS that writes to all 8 clip distance registers, but the plane enable
 720  * flags are a subset of that.
 721  *
 722  * This function is used to apply the plane enable flags to the register
 723  * declaration or instruction writemask.
 724  *
 725  * \param writemask  the declaration usage mask or instruction writemask
 726  * \param clip_reg_index  which clip plane register is being declared/written.
 727  *                        The legal values are 0 and 1 (two clip planes per
 728  *                        register, for a total of 8 clip planes)
 729  */
 730 static unsigned
 731 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
 732                       unsigned writemask, unsigned clip_reg_index)
 733 {
 734    unsigned shift;
 735
 736    assert(clip_reg_index < 2);
 737
 738    /* four clip planes per clip register: */
 739    shift = clip_reg_index * 4;
 740    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
 741
 742    return writemask;
 743 }
 744
 745
 746 /**
 747  * Translate gallium shader type into VGPU10 type.
 748  */
 749 static VGPU10_PROGRAM_TYPE
 750 translate_shader_type(unsigned type)
 751 {
 752    switch (type) {
 753    case PIPE_SHADER_VERTEX:
 754       return VGPU10_VERTEX_SHADER;
 755    case PIPE_SHADER_GEOMETRY:
 756       return VGPU10_GEOMETRY_SHADER;
 757    case PIPE_SHADER_FRAGMENT:
 758       return VGPU10_PIXEL_SHADER;
 759    case PIPE_SHADER_TESS_CTRL:
 760       return VGPU10_HULL_SHADER;
 761    case PIPE_SHADER_TESS_EVAL:
 762       return VGPU10_DOMAIN_SHADER;
 763    case PIPE_SHADER_COMPUTE:
 764       return VGPU10_COMPUTE_SHADER;
 765    default:
 766       assert(!"Unexpected shader type");
 767       return VGPU10_VERTEX_SHADER;
 768    }
 769 }
 770
 771
 772 /**
 773  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
 774  * Note: we only need to translate the opcodes for "simple" instructions,
 775  * as seen below.  All other opcodes are handled/translated specially.
 776  */
 777 static VGPU10_OPCODE_TYPE
 778 translate_opcode(enum tgsi_opcode opcode)
 779 {
 780    switch (opcode) {
 781    case TGSI_OPCODE_MOV:
 782       return VGPU10_OPCODE_MOV;
 783    case TGSI_OPCODE_MUL:
 784       return VGPU10_OPCODE_MUL;
 785    case TGSI_OPCODE_ADD:
 786       return VGPU10_OPCODE_ADD;
 787    case TGSI_OPCODE_DP3:
 788       return VGPU10_OPCODE_DP3;
 789    case TGSI_OPCODE_DP4:
 790       return VGPU10_OPCODE_DP4;
 791    case TGSI_OPCODE_MIN:
 792       return VGPU10_OPCODE_MIN;
 793    case TGSI_OPCODE_MAX:
 794       return VGPU10_OPCODE_MAX;
 795    case TGSI_OPCODE_MAD:
 796       return VGPU10_OPCODE_MAD;
 797    case TGSI_OPCODE_SQRT:
 798       return VGPU10_OPCODE_SQRT;
 799    case TGSI_OPCODE_FRC:
 800       return VGPU10_OPCODE_FRC;
 801    case TGSI_OPCODE_FLR:
 802       return VGPU10_OPCODE_ROUND_NI;
 803    case TGSI_OPCODE_FSEQ:
 804       return VGPU10_OPCODE_EQ;
 805    case TGSI_OPCODE_FSGE:
 806       return VGPU10_OPCODE_GE;
 807    case TGSI_OPCODE_FSNE:
 808       return VGPU10_OPCODE_NE;
 809    case TGSI_OPCODE_DDX:
 810       return VGPU10_OPCODE_DERIV_RTX;
 811    case TGSI_OPCODE_DDY:
 812       return VGPU10_OPCODE_DERIV_RTY;
 813    case TGSI_OPCODE_RET:
 814       return VGPU10_OPCODE_RET;
 815    case TGSI_OPCODE_DIV:
 816       return VGPU10_OPCODE_DIV;
 817    case TGSI_OPCODE_IDIV:
 818       return VGPU10_OPCODE_VMWARE;
 819    case TGSI_OPCODE_DP2:
 820       return VGPU10_OPCODE_DP2;
 821    case TGSI_OPCODE_BRK:
 822       return VGPU10_OPCODE_BREAK;
 823    case TGSI_OPCODE_IF:
 824       return VGPU10_OPCODE_IF;
 825    case TGSI_OPCODE_ELSE:
 826       return VGPU10_OPCODE_ELSE;
 827    case TGSI_OPCODE_ENDIF:
 828       return VGPU10_OPCODE_ENDIF;
 829    case TGSI_OPCODE_CEIL:
 830       return VGPU10_OPCODE_ROUND_PI;
 831    case TGSI_OPCODE_I2F:
 832       return VGPU10_OPCODE_ITOF;
 833    case TGSI_OPCODE_NOT:
 834       return VGPU10_OPCODE_NOT;
 835    case TGSI_OPCODE_TRUNC:
 836       return VGPU10_OPCODE_ROUND_Z;
 837    case TGSI_OPCODE_SHL:
 838       return VGPU10_OPCODE_ISHL;
 839    case TGSI_OPCODE_AND:
 840       return VGPU10_OPCODE_AND;
 841    case TGSI_OPCODE_OR:
 842       return VGPU10_OPCODE_OR;
 843    case TGSI_OPCODE_XOR:
 844       return VGPU10_OPCODE_XOR;
 845    case TGSI_OPCODE_CONT:
 846       return VGPU10_OPCODE_CONTINUE;
 847    case TGSI_OPCODE_EMIT:
 848       return VGPU10_OPCODE_EMIT;
 849    case TGSI_OPCODE_ENDPRIM:
 850       return VGPU10_OPCODE_CUT;
 851    case TGSI_OPCODE_BGNLOOP:
 852       return VGPU10_OPCODE_LOOP;
 853    case TGSI_OPCODE_ENDLOOP:
 854       return VGPU10_OPCODE_ENDLOOP;
 855    case TGSI_OPCODE_ENDSUB:
 856       return VGPU10_OPCODE_RET;
 857    case TGSI_OPCODE_NOP:
 858       return VGPU10_OPCODE_NOP;
 859    case TGSI_OPCODE_END:
 860       return VGPU10_OPCODE_RET;
 861    case TGSI_OPCODE_F2I:
 862       return VGPU10_OPCODE_FTOI;
 863    case TGSI_OPCODE_IMAX:
 864       return VGPU10_OPCODE_IMAX;
 865    case TGSI_OPCODE_IMIN:
 866       return VGPU10_OPCODE_IMIN;
 867    case TGSI_OPCODE_UDIV:
 868    case TGSI_OPCODE_UMOD:
 869    case TGSI_OPCODE_MOD:
 870       return VGPU10_OPCODE_UDIV;
 871    case TGSI_OPCODE_IMUL_HI:
 872       return VGPU10_OPCODE_IMUL;
 873    case TGSI_OPCODE_INEG:
 874       return VGPU10_OPCODE_INEG;
 875    case TGSI_OPCODE_ISHR:
 876       return VGPU10_OPCODE_ISHR;
 877    case TGSI_OPCODE_ISGE:
 878       return VGPU10_OPCODE_IGE;
 879    case TGSI_OPCODE_ISLT:
 880       return VGPU10_OPCODE_ILT;
 881    case TGSI_OPCODE_F2U:
 882       return VGPU10_OPCODE_FTOU;
 883    case TGSI_OPCODE_UADD:
 884       return VGPU10_OPCODE_IADD;
 885    case TGSI_OPCODE_U2F:
 886       return VGPU10_OPCODE_UTOF;
 887    case TGSI_OPCODE_UCMP:
 888       return VGPU10_OPCODE_MOVC;
 889    case TGSI_OPCODE_UMAD:
 890       return VGPU10_OPCODE_UMAD;
 891    case TGSI_OPCODE_UMAX:
 892       return VGPU10_OPCODE_UMAX;
 893    case TGSI_OPCODE_UMIN:
 894       return VGPU10_OPCODE_UMIN;
 895    case TGSI_OPCODE_UMUL:
 896    case TGSI_OPCODE_UMUL_HI:
 897       return VGPU10_OPCODE_UMUL;
 898    case TGSI_OPCODE_USEQ:
 899       return VGPU10_OPCODE_IEQ;
 900    case TGSI_OPCODE_USGE:
 901       return VGPU10_OPCODE_UGE;
 902    case TGSI_OPCODE_USHR:
 903       return VGPU10_OPCODE_USHR;
 904    case TGSI_OPCODE_USLT:
 905       return VGPU10_OPCODE_ULT;
 906    case TGSI_OPCODE_USNE:
 907       return VGPU10_OPCODE_INE;
 908    case TGSI_OPCODE_SWITCH:
 909       return VGPU10_OPCODE_SWITCH;
 910    case TGSI_OPCODE_CASE:
 911       return VGPU10_OPCODE_CASE;
 912    case TGSI_OPCODE_DEFAULT:
 913       return VGPU10_OPCODE_DEFAULT;
 914    case TGSI_OPCODE_ENDSWITCH:
 915       return VGPU10_OPCODE_ENDSWITCH;
 916    case TGSI_OPCODE_FSLT:
 917       return VGPU10_OPCODE_LT;
 918    case TGSI_OPCODE_ROUND:
 919       return VGPU10_OPCODE_ROUND_NE;
 920    /* Begin SM5 opcodes */
 921    case TGSI_OPCODE_F2D:
 922       return VGPU10_OPCODE_FTOD;
 923    case TGSI_OPCODE_D2F:
 924       return VGPU10_OPCODE_DTOF;
 925    case TGSI_OPCODE_DMUL:
 926       return VGPU10_OPCODE_DMUL;
 927    case TGSI_OPCODE_DADD:
 928       return VGPU10_OPCODE_DADD;
 929    case TGSI_OPCODE_DMAX:
 930       return VGPU10_OPCODE_DMAX;
 931    case TGSI_OPCODE_DMIN:
 932       return VGPU10_OPCODE_DMIN;
 933    case TGSI_OPCODE_DSEQ:
 934       return VGPU10_OPCODE_DEQ;
 935    case TGSI_OPCODE_DSGE:
 936       return VGPU10_OPCODE_DGE;
 937    case TGSI_OPCODE_DSLT:
 938       return VGPU10_OPCODE_DLT;
 939    case TGSI_OPCODE_DSNE:
 940       return VGPU10_OPCODE_DNE;
 941    case TGSI_OPCODE_IBFE:
 942       return VGPU10_OPCODE_IBFE;
 943    case TGSI_OPCODE_UBFE:
 944       return VGPU10_OPCODE_UBFE;
 945    case TGSI_OPCODE_BFI:
 946       return VGPU10_OPCODE_BFI;
 947    case TGSI_OPCODE_BREV:
 948       return VGPU10_OPCODE_BFREV;
 949    case TGSI_OPCODE_POPC:
 950       return VGPU10_OPCODE_COUNTBITS;
 951    case TGSI_OPCODE_LSB:
 952       return VGPU10_OPCODE_FIRSTBIT_LO;
 953    case TGSI_OPCODE_IMSB:
 954       return VGPU10_OPCODE_FIRSTBIT_SHI;
 955    case TGSI_OPCODE_UMSB:
 956       return VGPU10_OPCODE_FIRSTBIT_HI;
 957    case TGSI_OPCODE_INTERP_CENTROID:
 958       return VGPU10_OPCODE_EVAL_CENTROID;
 959    case TGSI_OPCODE_INTERP_SAMPLE:
 960       return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
 961    case TGSI_OPCODE_BARRIER:
 962       return VGPU10_OPCODE_SYNC;
 963
 964    /* DX11.1 Opcodes */
 965    case TGSI_OPCODE_DDIV:
 966       return VGPU10_OPCODE_DDIV;
 967    case TGSI_OPCODE_DRCP:
 968       return VGPU10_OPCODE_DRCP;
 969    case TGSI_OPCODE_D2I:
 970       return VGPU10_OPCODE_DTOI;
 971    case TGSI_OPCODE_D2U:
 972       return VGPU10_OPCODE_DTOU;
 973    case TGSI_OPCODE_I2D:
 974       return VGPU10_OPCODE_ITOD;
 975    case TGSI_OPCODE_U2D:
 976       return VGPU10_OPCODE_UTOD;
 977
 978    case TGSI_OPCODE_SAMPLE_POS:
 979       /* Note: we never actually get this opcode because there's no GLSL
 980        * function to query multisample resource sample positions.  There's
 981        * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
 982        * position of the current sample in the render target.
 983        */
 984       /* FALL-THROUGH */
 985    case TGSI_OPCODE_SAMPLE_INFO:
 986       /* NOTE: we never actually get this opcode because the GLSL compiler
 987        * implements the gl_NumSamples variable with a simple constant in the
 988        * constant buffer.
 989        */
 990       /* FALL-THROUGH */
 991    default:
 992       assert(!"Unexpected TGSI opcode in translate_opcode()");
 993       return VGPU10_OPCODE_NOP;
 994    }
 995 }
 996
 997
 998 /**
 999  * Translate a TGSI register file type into a VGPU10 operand type.
1000  * \param array  is the TGSI_FILE_TEMPORARY register an array?
1001  */
1002 static VGPU10_OPERAND_TYPE
1003 translate_register_file(enum tgsi_file_type file, boolean array)
1004 {
1005    switch (file) {
1006    case TGSI_FILE_CONSTANT:
1007       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1008    case TGSI_FILE_INPUT:
1009       return VGPU10_OPERAND_TYPE_INPUT;
1010    case TGSI_FILE_OUTPUT:
1011       return VGPU10_OPERAND_TYPE_OUTPUT;
1012    case TGSI_FILE_TEMPORARY:
1013       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014                    : VGPU10_OPERAND_TYPE_TEMP;
1015    case TGSI_FILE_IMMEDIATE:
1016       /* all immediates are 32-bit values at this time so
1017        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1018        */
1019       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1020    case TGSI_FILE_SAMPLER:
1021       return VGPU10_OPERAND_TYPE_SAMPLER;
1022    case TGSI_FILE_SYSTEM_VALUE:
1023       return VGPU10_OPERAND_TYPE_INPUT;
1024
1025    /* XXX TODO more cases to finish */
1026
1027    default:
1028       assert(!"Bad tgsi register file!");
1029       return VGPU10_OPERAND_TYPE_NULL;
1030    }
1031 }
1032
1033
1034 /**
1035  * Emit a null dst register
1036  */
1037 static void
1038 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1039 {
1040    VGPU10OperandToken0 operand;
1041
1042    operand.value = 0;
1043    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1044    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1045
1046    emit_dword(emit, operand.value);
1047 }
1048
1049
1050 /**
1051  * If the given register is a temporary, return the array ID.
1052  * Else return zero.
1053  */
1054 static unsigned
1055 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1056                   enum tgsi_file_type file, unsigned index)
1057 {
1058    if (file == TGSI_FILE_TEMPORARY) {
1059       return emit->temp_map[index].arrayId;
1060    }
1061    else {
1062       return 0;
1063    }
1064 }
1065
1066
1067 /**
1068  * If the given register is a temporary, convert the index from a TGSI
1069  * TEMPORARY index to a VGPU10 temp index.
1070  */
1071 static unsigned
1072 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1073                  enum tgsi_file_type file, unsigned index)
1074 {
1075    if (file == TGSI_FILE_TEMPORARY) {
1076       return emit->temp_map[index].index;
1077    }
1078    else {
1079       return index;
1080    }
1081 }
1082
1083
1084 /**
1085  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086  * Note: the operandType field must already be initialized.
1087  * \param file  the register file being accessed
1088  * \param indirect  using indirect addressing of the register file?
1089  * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1090  * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1091  */
1092 static VGPU10OperandToken0
1093 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1094                         VGPU10OperandToken0 operand0,
1095                         enum tgsi_file_type file,
1096                         boolean indirect,
1097                         boolean index2D, bool indirect2D)
1098 {
1099    VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1100    VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1101
1102    /*
1103     * Compute index dimensions
1104     */
1105    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1106        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1107        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1108        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1109        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1110        operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1111       /* there's no swizzle for in-line immediates */
1112       indexDim = VGPU10_OPERAND_INDEX_0D;
1113       assert(operand0.selectionMode == 0);
1114    }
1115    else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1116       indexDim = VGPU10_OPERAND_INDEX_0D;
1117    }
1118    else {
1119       indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1120    }
1121
1122    /*
1123     * Compute index representation(s) (immediate vs relative).
1124     */
1125    if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1126       index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1128
1129       index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1131    }
1132    else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1133       index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1135
1136       index1Rep = 0;
1137    }
1138    else {
1139       index0Rep = 0;
1140       index1Rep = 0;
1141    }
1142
1143    operand0.indexDimension = indexDim;
1144    operand0.index0Representation = index0Rep;
1145    operand0.index1Representation = index1Rep;
1146
1147    return operand0;
1148 }
1149
1150
1151 /**
1152  * Emit the operand for expressing an address register for indirect indexing.
1153  * Note that the address register is really just a temp register.
1154  * \param addr_reg_index  which address register to use
1155  */
1156 static void
1157 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1158                        unsigned addr_reg_index)
1159 {
1160    unsigned tmp_reg_index;
1161    VGPU10OperandToken0 operand0;
1162
1163    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1164
1165    tmp_reg_index = emit->address_reg_index[addr_reg_index];
1166
1167    /* operand0 is a simple temporary register, selecting one component */
1168    operand0.value = 0;
1169    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1170    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1171    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1172    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1173    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1174    operand0.swizzleX = 0;
1175    operand0.swizzleY = 1;
1176    operand0.swizzleZ = 2;
1177    operand0.swizzleW = 3;
1178
1179    emit_dword(emit, operand0.value);
1180    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1181 }
1182
1183
1184 /**
1185  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186  * \param emit  the emitter context
1187  * \param reg  the TGSI dst register to translate
1188  */
1189 static void
1190 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1191                   const struct tgsi_full_dst_register *reg)
1192 {
1193    enum tgsi_file_type file = reg->Register.File;
1194    unsigned index = reg->Register.Index;
1195    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1196    const unsigned sem_index = emit->info.output_semantic_index[index];
1197    unsigned writemask = reg->Register.WriteMask;
1198    const boolean indirect = reg->Register.Indirect;
1199    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1200    boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1201    VGPU10OperandToken0 operand0;
1202
1203    if (file == TGSI_FILE_TEMPORARY) {
1204       emit->temp_map[index].initialized = TRUE;
1205    }
1206
1207    if (file == TGSI_FILE_OUTPUT) {
1208       if (emit->unit == PIPE_SHADER_VERTEX ||
1209           emit->unit == PIPE_SHADER_GEOMETRY ||
1210           emit->unit == PIPE_SHADER_TESS_EVAL) {
1211          if (index == emit->vposition.out_index &&
1212              emit->vposition.tmp_index != INVALID_INDEX) {
1213             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1214              * vertex position result in a temporary so that we can modify
1215              * it in the post_helper() code.
1216              */
1217             file = TGSI_FILE_TEMPORARY;
1218             index = emit->vposition.tmp_index;
1219          }
1220          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1221                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1222             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223              * We store the clip distance in a temporary first, then
1224              * we'll copy it to the shadow copy and to CLIPDIST with the
1225              * enabled planes mask in emit_clip_distance_instructions().
1226              */
1227             file = TGSI_FILE_TEMPORARY;
1228             index = emit->clip_dist_tmp_index + sem_index;
1229          }
1230          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1231                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
1232             /* replace the CLIPVERTEX output register with a temporary */
1233             assert(emit->clip_mode == CLIP_VERTEX);
1234             assert(sem_index == 0);
1235             file = TGSI_FILE_TEMPORARY;
1236             index = emit->clip_vertex_tmp_index;
1237          }
1238          else if (sem_name == TGSI_SEMANTIC_COLOR &&
1239                   emit->key.clamp_vertex_color) {
1240
1241             /* set the saturate modifier of the instruction
1242              * to clamp the vertex color.
1243              */
1244             VGPU10OpcodeToken0 *token =
1245                (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1246             token->saturate = TRUE;
1247          }
1248          else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1249                   emit->gs.viewport_index_out_index != INVALID_INDEX) {
1250             file = TGSI_FILE_TEMPORARY;
1251             index = emit->gs.viewport_index_tmp_index;
1252          }
1253       }
1254       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1255          if (sem_name == TGSI_SEMANTIC_POSITION) {
1256             /* Fragment depth output register */
1257             operand0.value = 0;
1258             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1259             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1260             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1261             emit_dword(emit, operand0.value);
1262             return;
1263          }
1264          else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1265             /* Fragment sample mask output */
1266             operand0.value = 0;
1267             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1268             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1269             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1270             emit_dword(emit, operand0.value);
1271             return;
1272          }
1273          else if (index == emit->fs.color_out_index[0] &&
1274              emit->fs.color_tmp_index != INVALID_INDEX) {
1275             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1276              * fragment color result in a temporary so that we can read it
1277              * it in the post_helper() code.
1278              */
1279             file = TGSI_FILE_TEMPORARY;
1280             index = emit->fs.color_tmp_index;
1281          }
1282          else {
1283             /* Typically, for fragment shaders, the output register index
1284              * matches the color semantic index.  But not when we write to
1285              * the fragment depth register.  In that case, OUT[0] will be
1286              * fragdepth and OUT[1] will be the 0th color output.  We need
1287              * to use the semantic index for color outputs.
1288              */
1289             assert(sem_name == TGSI_SEMANTIC_COLOR);
1290             index = emit->info.output_semantic_index[index];
1291
1292             emit->num_output_writes++;
1293          }
1294       }
1295       else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1296          if (index == emit->tcs.inner.tgsi_index) {
1297             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298              * in temporary for now so that will be store into appropriate
1299              * registers in post_helper() in patch constant phase.
1300              */
1301             if (emit->tcs.control_point_phase) {
1302                /* Discard writing into tessfactor in control point phase */
1303                emit->discard_instruction =  TRUE;
1304             }
1305             else {
1306                file = TGSI_FILE_TEMPORARY;
1307                index = emit->tcs.inner.temp_index;
1308             }
1309          }
1310          else if (index == emit->tcs.outer.tgsi_index) {
1311             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312              * in temporary for now so that will be store into appropriate
1313              * registers in post_helper().
1314              */
1315             if (emit->tcs.control_point_phase) {
1316                /* Discard writing into tessfactor in control point phase */
1317                emit->discard_instruction =  TRUE;
1318             }
1319             else {
1320                file = TGSI_FILE_TEMPORARY;
1321                index = emit->tcs.outer.temp_index;
1322             }
1323          }
1324          else if (index >= emit->tcs.patch_generic_out_index &&
1325                   index < (emit->tcs.patch_generic_out_index +
1326                           emit->tcs.patch_generic_out_count)) {
1327             if (emit->tcs.control_point_phase) {
1328                /* Discard writing into generic patch constant outputs in
1329                   control point phase */
1330                emit->discard_instruction =  TRUE;
1331             }
1332             else {
1333                if (emit->reemit_instruction) {
1334                   /* Store results of reemitted instruction in temporary register. */
1335                   file = TGSI_FILE_TEMPORARY;
1336                   index = emit->tcs.patch_generic_tmp_index +
1337                           (index - emit->tcs.patch_generic_out_index);
1338                   /**
1339                    * Temporaries for patch constant data can be done
1340                    * as indexable temporaries.
1341                    */
1342                   tempArrayId = get_temp_array_id(emit, file, index);
1343                   index2d = tempArrayId > 0;
1344
1345                   emit->reemit_instruction = FALSE;
1346                }
1347                else {
1348                   /* If per-patch outputs is been read in shader, we
1349                    * reemit instruction and store results in temporaries in
1350                    * patch constant phase. */
1351                   if (emit->info.reads_perpatch_outputs) {
1352                      emit->reemit_instruction = TRUE;
1353                   }
1354                }
1355             }
1356          }
1357          else if (reg->Register.Dimension) {
1358             /* Only control point outputs are declared 2D in tgsi */
1359             if (emit->tcs.control_point_phase) {
1360                if (emit->reemit_instruction) {
1361                   /* Store results of reemitted instruction in temporary register. */
1362                   index2d = FALSE;
1363                   file = TGSI_FILE_TEMPORARY;
1364                   index = emit->tcs.control_point_tmp_index +
1365                           (index - emit->tcs.control_point_out_index);
1366                   emit->reemit_instruction = FALSE;
1367                }
1368                else {
1369                   /* The mapped control point outputs are 1-D */
1370                   index2d = FALSE;
1371                   if (emit->info.reads_pervertex_outputs) {
1372                      /* If per-vertex outputs is been read in shader, we
1373                       * reemit instruction and store results in temporaries
1374                       * control point phase. */
1375                      emit->reemit_instruction = TRUE;
1376                   }
1377                }
1378
1379                if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1380                    emit->clip_dist_tmp_index != INVALID_INDEX) {
1381                   /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382                    * We store the clip distance in a temporary first, then
1383                    * we'll copy it to the shadow copy and to CLIPDIST with the
1384                    * enabled planes mask in emit_clip_distance_instructions().
1385                    */
1386                   file = TGSI_FILE_TEMPORARY;
1387                   index = emit->clip_dist_tmp_index + sem_index;
1388                }
1389                else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1390                         emit->clip_vertex_tmp_index != INVALID_INDEX) {
1391                   /* replace the CLIPVERTEX output register with a temporary */
1392                   assert(emit->clip_mode == CLIP_VERTEX);
1393                   assert(sem_index == 0);
1394                   file = TGSI_FILE_TEMPORARY;
1395                   index = emit->clip_vertex_tmp_index;
1396                }
1397             }
1398             else {
1399                /* Discard writing into control point outputs in
1400                   patch constant phase */
1401                emit->discard_instruction =  TRUE;
1402             }
1403          }
1404       }
1405    }
1406
1407    /* init operand tokens to all zero */
1408    operand0.value = 0;
1409
1410    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1411
1412    /* the operand has a writemask */
1413    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1414
1415    /* Which of the four dest components to write to. Note that we can use a
1416     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1417     */
1418    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1419    operand0.mask = writemask;
1420
1421    /* translate TGSI register file type to VGPU10 operand type */
1422    operand0.operandType = translate_register_file(file, tempArrayId > 0);
1423
1424    check_register_index(emit, operand0.operandType, index);
1425
1426    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1427                                       index2d, FALSE);
1428
1429    /* Emit tokens */
1430    emit_dword(emit, operand0.value);
1431    if (tempArrayId > 0) {
1432       emit_dword(emit, tempArrayId);
1433    }
1434
1435    emit_dword(emit, remap_temp_index(emit, file, index));
1436
1437    if (indirect) {
1438       emit_indirect_register(emit, reg->Indirect.Index);
1439    }
1440 }
1441
1442
1443 /**
1444  * Check if temporary register needs to be initialize when
1445  * shader is not using indirect addressing for temporary and uninitialized
1446  * temporary is not used in loop. In these two scenarios, we cannot
1447  * determine if temporary is initialized or not.
1448  */
1449 static boolean
1450 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1451                              unsigned index)
1452 {
1453    if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY))
1454        && emit->current_loop_depth == 0) {
1455       if (!emit->temp_map[index].initialized &&
1456           emit->temp_map[index].index < emit->num_shader_temps) {
1457          return TRUE;
1458       }
1459    }
1460
1461    return FALSE;
1462 }
1463
1464
1465 /**
1466  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467  * In quite a few cases, we do register substitution.  For example, if
1468  * the TGSI register is the front/back-face register, we replace that with
1469  * a temp register containing a value we computed earlier.
1470  */
1471 static void
1472 emit_src_register(struct svga_shader_emitter_v10 *emit,
1473                   const struct tgsi_full_src_register *reg)
1474 {
1475    enum tgsi_file_type file = reg->Register.File;
1476    unsigned index = reg->Register.Index;
1477    const boolean indirect = reg->Register.Indirect;
1478    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1479    boolean index2d = (reg->Register.Dimension ||
1480                             tempArrayId > 0 ||
1481                             file == TGSI_FILE_CONSTANT);
1482    unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1483    boolean indirect2d = reg->Dimension.Indirect;
1484    unsigned swizzleX = reg->Register.SwizzleX;
1485    unsigned swizzleY = reg->Register.SwizzleY;
1486    unsigned swizzleZ = reg->Register.SwizzleZ;
1487    unsigned swizzleW = reg->Register.SwizzleW;
1488    const boolean absolute = reg->Register.Absolute;
1489    const boolean negate = reg->Register.Negate;
1490    VGPU10OperandToken0 operand0;
1491    VGPU10OperandToken1 operand1;
1492
1493    operand0.value = operand1.value = 0;
1494
1495    if (emit->unit == PIPE_SHADER_FRAGMENT){
1496       if (file == TGSI_FILE_INPUT) {
1497          if (index == emit->fs.face_input_index) {
1498             /* Replace INPUT[FACE] with TEMP[FACE] */
1499             file = TGSI_FILE_TEMPORARY;
1500             index = emit->fs.face_tmp_index;
1501          }
1502          else if (index == emit->fs.fragcoord_input_index) {
1503             /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504             file = TGSI_FILE_TEMPORARY;
1505             index = emit->fs.fragcoord_tmp_index;
1506          }
1507          else if (index == emit->fs.layer_input_index) {
1508             /* Replace INPUT[LAYER] with zero.x */
1509             file = TGSI_FILE_IMMEDIATE;
1510             index = emit->fs.layer_imm_index;
1511             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1512          }
1513          else {
1514             /* We remap fragment shader inputs to that FS input indexes
1515              * match up with VS/GS output indexes.
1516              */
1517             index = emit->linkage.input_map[index];
1518          }
1519       }
1520       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1521          if (index == emit->fs.sample_pos_sys_index) {
1522             assert(emit->version >= 41);
1523             /* Current sample position is in a temp register */
1524             file = TGSI_FILE_TEMPORARY;
1525             index = emit->fs.sample_pos_tmp_index;
1526          }
1527          else if (index == emit->fs.sample_mask_in_sys_index) {
1528             /* Emitted as vCoverage0.x */
1529             /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530              * elements where s is the maximum number of color samples supported
1531              * by the implementation. With current implementation, we should not
1532              * have more than one element. So assert if Index != 0
1533              */
1534             assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
1535                    reg->Register.Indirect);
1536             operand0.value = 0;
1537             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1538             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1539             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1540             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1541             emit_dword(emit, operand0.value);
1542             return;
1543          }
1544          else {
1545             /* Map the TGSI system value to a VGPU10 input register */
1546             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1547             file = TGSI_FILE_INPUT;
1548             index = emit->system_value_indexes[index];
1549          }
1550       }
1551    }
1552    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1553       if (file == TGSI_FILE_INPUT) {
1554          if (index == emit->gs.prim_id_index) {
1555             operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1556             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1557          }
1558          index = emit->linkage.input_map[index];
1559       }
1560       else if (file == TGSI_FILE_SYSTEM_VALUE &&
1561                index == emit->gs.invocation_id_sys_index) {
1562          /* Emitted as vGSInstanceID0.x */
1563          operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1564          operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1565          index = 0;
1566       }
1567    }
1568    else if (emit->unit == PIPE_SHADER_VERTEX) {
1569       if (file == TGSI_FILE_INPUT) {
1570          /* if input is adjusted... */
1571          if ((emit->key.vs.adjust_attrib_w_1 |
1572               emit->key.vs.adjust_attrib_itof |
1573               emit->key.vs.adjust_attrib_utof |
1574               emit->key.vs.attrib_is_bgra |
1575               emit->key.vs.attrib_puint_to_snorm |
1576               emit->key.vs.attrib_puint_to_uscaled |
1577               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1578             file = TGSI_FILE_TEMPORARY;
1579             index = emit->vs.adjusted_input[index];
1580          }
1581       }
1582       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1583          if (index == emit->vs.vertex_id_sys_index &&
1584              emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1585             file = TGSI_FILE_TEMPORARY;
1586             index = emit->vs.vertex_id_tmp_index;
1587             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1588          }
1589          else {
1590             /* Map the TGSI system value to a VGPU10 input register */
1591             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1592             file = TGSI_FILE_INPUT;
1593             index = emit->system_value_indexes[index];
1594          }
1595       }
1596    }
1597    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1598
1599       if (file == TGSI_FILE_SYSTEM_VALUE) {
1600          if (index == emit->tcs.vertices_per_patch_index) {
1601             /**
1602              * if source register is the system value for vertices_per_patch,
1603              * replace it with the immediate.
1604              */
1605             file = TGSI_FILE_IMMEDIATE;
1606             index = emit->tcs.imm_index;
1607             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1608          }
1609          else if (index == emit->tcs.invocation_id_sys_index) {
1610             if (emit->tcs.control_point_phase) {
1611                /**
1612                 * Emitted as vOutputControlPointID.x
1613                 */
1614                operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1615                operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1616                operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1617                operand0.mask = 0;
1618                emit_dword(emit, operand0.value);
1619                return;
1620             }
1621             else {
1622                /* There is no control point ID input declaration in
1623                 * the patch constant phase in hull shader.
1624                 * Since for now we are emitting all instructions in
1625                 * the patch constant phase, we are replacing the
1626                 * control point ID reference with the immediate 0.
1627                 */
1628                file = TGSI_FILE_IMMEDIATE;
1629                index = emit->tcs.imm_index;
1630                swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1631             }
1632          }
1633          else if (index == emit->tcs.prim_id_index) {
1634             /**
1635              * Emitted as vPrim.x
1636              */
1637             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1638             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1639             index = 0;
1640          }
1641       }
1642       else if (file == TGSI_FILE_INPUT) {
1643          index = emit->linkage.input_map[index];
1644          if (!emit->tcs.control_point_phase) {
1645             /* Emitted as vicp */
1646             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1647             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1648             assert(reg->Register.Dimension);
1649          }
1650       }
1651       else if (file == TGSI_FILE_OUTPUT) {
1652          if ((index >= emit->tcs.patch_generic_out_index &&
1653              index < (emit->tcs.patch_generic_out_index +
1654                       emit->tcs.patch_generic_out_count)) ||
1655              index == emit->tcs.inner.tgsi_index ||
1656              index == emit->tcs.outer.tgsi_index) {
1657             if (emit->tcs.control_point_phase) {
1658                emit->discard_instruction = TRUE;
1659             }
1660             else {
1661                /* Device doesn't allow reading from output so
1662                 * use corresponding temporary register as source */
1663                file = TGSI_FILE_TEMPORARY;
1664                if (index == emit->tcs.inner.tgsi_index) {
1665                   index = emit->tcs.inner.temp_index;
1666                }
1667                else if (index == emit->tcs.outer.tgsi_index) {
1668                   index = emit->tcs.outer.temp_index;
1669                }
1670                else {
1671                   index = emit->tcs.patch_generic_tmp_index +
1672                           (index - emit->tcs.patch_generic_out_index);
1673                }
1674
1675                /**
1676                 * Temporaries for patch constant data can be done
1677                 * as indexable temporaries.
1678                 */
1679                tempArrayId = get_temp_array_id(emit, file, index);
1680                index2d = tempArrayId > 0;
1681                index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1682             }
1683          }
1684          else if (index2d) {
1685             if (emit->tcs.control_point_phase) {
1686                /* Device doesn't allow reading from output so
1687                 * use corresponding temporary register as source */
1688                file = TGSI_FILE_TEMPORARY;
1689                index2d = FALSE;
1690                index = emit->tcs.control_point_tmp_index +
1691                        (index - emit->tcs.control_point_out_index);
1692             }
1693             else {
1694                emit->discard_instruction = TRUE;
1695             }
1696          }
1697       }
1698    }
1699    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1700       if (file == TGSI_FILE_SYSTEM_VALUE) {
1701          if (index == emit->tes.tesscoord_sys_index) {
1702             /**
1703              * Emitted as vDomain
1704              */
1705             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1706             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1707             index = 0;
1708          }
1709          else if (index == emit->tes.inner.tgsi_index) {
1710             file = TGSI_FILE_TEMPORARY;
1711             index = emit->tes.inner.temp_index;
1712          }
1713          else if (index == emit->tes.outer.tgsi_index) {
1714             file = TGSI_FILE_TEMPORARY;
1715             index = emit->tes.outer.temp_index;
1716          }
1717          else if (index == emit->tes.prim_id_index) {
1718             /**
1719              * Emitted as vPrim.x
1720              */
1721             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723             index = 0;
1724          }
1725
1726       }
1727       else if (file == TGSI_FILE_INPUT) {
1728          if (index2d) {
1729             /* 2D input is emitted as vcp (input control point). */
1730             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1731             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1732
1733             /* index specifies the element index and is remapped
1734              * to align with the tcs output index.
1735              */
1736             index = emit->linkage.input_map[index];
1737
1738             assert(index2 < emit->key.tes.vertices_per_patch);
1739          }
1740          else {
1741             if (index < emit->key.tes.tessfactor_index)
1742                /* index specifies the generic patch index.
1743                 * Remapped to match up with the tcs output index.
1744                 */
1745                index = emit->linkage.input_map[index];
1746
1747             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1748             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1749          }
1750       }
1751    }
1752
1753    if (file == TGSI_FILE_ADDRESS) {
1754       index = emit->address_reg_index[index];
1755       file = TGSI_FILE_TEMPORARY;
1756    }
1757
1758    if (file == TGSI_FILE_TEMPORARY) {
1759       if (need_temp_reg_initialization(emit, index)) {
1760          emit->initialize_temp_index = index;
1761          emit->discard_instruction = TRUE;
1762       }
1763    }
1764
1765    if (operand0.value == 0) {
1766       /* if operand0 was not set above for a special case, do the general
1767        * case now.
1768        */
1769       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1770       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1771    }
1772    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1773                                       index2d, indirect2d);
1774
1775    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1776        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1777       /* there's no swizzle for in-line immediates */
1778       if (swizzleX == swizzleY &&
1779           swizzleX == swizzleZ &&
1780           swizzleX == swizzleW) {
1781          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1782       }
1783       else {
1784          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1785       }
1786
1787       operand0.swizzleX = swizzleX;
1788       operand0.swizzleY = swizzleY;
1789       operand0.swizzleZ = swizzleZ;
1790       operand0.swizzleW = swizzleW;
1791
1792       if (absolute || negate) {
1793          operand0.extended = 1;
1794          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1795          if (absolute && !negate)
1796             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1797          if (!absolute && negate)
1798             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1799          if (absolute && negate)
1800             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1801       }
1802    }
1803
1804    /* Emit the operand tokens */
1805    emit_dword(emit, operand0.value);
1806    if (operand0.extended)
1807       emit_dword(emit, operand1.value);
1808
1809    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1810       /* Emit the four float/int in-line immediate values */
1811       unsigned *c;
1812       assert(index < ARRAY_SIZE(emit->immediates));
1813       assert(file == TGSI_FILE_IMMEDIATE);
1814       assert(swizzleX < 4);
1815       assert(swizzleY < 4);
1816       assert(swizzleZ < 4);
1817       assert(swizzleW < 4);
1818       c = (unsigned *) emit->immediates[index];
1819       emit_dword(emit, c[swizzleX]);
1820       emit_dword(emit, c[swizzleY]);
1821       emit_dword(emit, c[swizzleZ]);
1822       emit_dword(emit, c[swizzleW]);
1823    }
1824    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1825       /* Emit the register index(es) */
1826       if (index2d) {
1827          emit_dword(emit, index2);
1828
1829          if (indirect2d) {
1830             emit_indirect_register(emit, reg->DimIndirect.Index);
1831          }
1832       }
1833
1834       emit_dword(emit, remap_temp_index(emit, file, index));
1835
1836       if (indirect) {
1837          emit_indirect_register(emit, reg->Indirect.Index);
1838       }
1839    }
1840 }
1841
1842
1843 /**
1844  * Emit a resource operand (for use with a SAMPLE instruction).
1845  */
1846 static void
1847 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1848                        unsigned resource_number)
1849 {
1850    VGPU10OperandToken0 operand0;
1851
1852    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1853
1854    /* init */
1855    operand0.value = 0;
1856
1857    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1858    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1859    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1860    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1861    operand0.swizzleX = VGPU10_COMPONENT_X;
1862    operand0.swizzleY = VGPU10_COMPONENT_Y;
1863    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1864    operand0.swizzleW = VGPU10_COMPONENT_W;
1865
1866    emit_dword(emit, operand0.value);
1867    emit_dword(emit, resource_number);
1868 }
1869
1870
1871 /**
1872  * Emit a sampler operand (for use with a SAMPLE instruction).
1873  */
1874 static void
1875 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1876                       unsigned sampler_number)
1877 {
1878    VGPU10OperandToken0 operand0;
1879
1880    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1881
1882    /* init */
1883    operand0.value = 0;
1884
1885    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1886    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887
1888    emit_dword(emit, operand0.value);
1889    emit_dword(emit, sampler_number);
1890 }
1891
1892
1893 /**
1894  * Emit an operand which reads the IS_FRONT_FACING register.
1895  */
1896 static void
1897 emit_face_register(struct svga_shader_emitter_v10 *emit)
1898 {
1899    VGPU10OperandToken0 operand0;
1900    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1901
1902    /* init */
1903    operand0.value = 0;
1904
1905    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1906    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1907    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1908    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1909
1910    operand0.swizzleX = VGPU10_COMPONENT_X;
1911    operand0.swizzleY = VGPU10_COMPONENT_X;
1912    operand0.swizzleZ = VGPU10_COMPONENT_X;
1913    operand0.swizzleW = VGPU10_COMPONENT_X;
1914
1915    emit_dword(emit, operand0.value);
1916    emit_dword(emit, index);
1917 }
1918
1919
1920 /**
1921  * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1922  * instruction.
1923  */
1924 static void
1925 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
1926 {
1927    VGPU10OperandToken0 operand0;
1928
1929    /* init */
1930    operand0.value = 0;
1931
1932    /* No register index for rasterizer index (there's only one) */
1933    operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
1934    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1935    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1936    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1937    operand0.swizzleX = VGPU10_COMPONENT_X;
1938    operand0.swizzleY = VGPU10_COMPONENT_Y;
1939    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1940    operand0.swizzleW = VGPU10_COMPONENT_W;
1941
1942    emit_dword(emit, operand0.value);
1943 }
1944
1945
1946 /**
1947  * Emit tokens for the "stream" register used by the
1948  * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1949  */
1950 static void
1951 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
1952 {
1953    VGPU10OperandToken0 operand0;
1954
1955    /* init */
1956    operand0.value = 0;
1957
1958    /* No register index for rasterizer index (there's only one) */
1959    operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
1960    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1961    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1962
1963    emit_dword(emit, operand0.value);
1964    emit_dword(emit, index);
1965 }
1966
1967
1968 /**
1969  * Emit the token for a VGPU10 opcode, with precise parameter.
1970  * \param saturate   clamp result to [0,1]?
1971  */
1972 static void
1973 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
1974                     unsigned vgpu10_opcode, boolean saturate, boolean precise)
1975 {
1976    VGPU10OpcodeToken0 token0;
1977
1978    token0.value = 0;  /* init all fields to zero */
1979    token0.opcodeType = vgpu10_opcode;
1980    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1981    token0.saturate = saturate;
1982
1983    /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984     * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
1985     */
1986    token0.preciseValues = precise && emit->version >= 50;
1987
1988    emit_dword(emit, token0.value);
1989
1990    emit->uses_precise_qualifier |= token0.preciseValues;
1991 }
1992
1993
1994 /**
1995  * Emit the token for a VGPU10 opcode.
1996  * \param saturate   clamp result to [0,1]?
1997  */
1998 static void
1999 emit_opcode(struct svga_shader_emitter_v10 *emit,
2000             unsigned vgpu10_opcode, boolean saturate)
2001 {
2002    emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2003 }
2004
2005
2006 /**
2007  * Emit the token for a VGPU10 resinfo instruction.
2008  * \param modifier   return type modifier, _uint or _rcpFloat.
2009  *                   TODO: We may want to remove this parameter if it will
2010  *                   only ever be used as _uint.
2011  */
2012 static void
2013 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2014                     VGPU10_RESINFO_RETURN_TYPE modifier)
2015 {
2016    VGPU10OpcodeToken0 token0;
2017
2018    token0.value = 0;  /* init all fields to zero */
2019    token0.opcodeType = VGPU10_OPCODE_RESINFO;
2020    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2021    token0.resinfoReturnType = modifier;
2022
2023    emit_dword(emit, token0.value);
2024 }
2025
2026
2027 /**
2028  * Emit opcode tokens for a texture sample instruction.  Texture instructions
2029  * can be rather complicated (texel offsets, etc) so we have this specialized
2030  * function.
2031  */
2032 static void
2033 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2034                    unsigned vgpu10_opcode, boolean saturate,
2035                    const int offsets[3])
2036 {
2037    VGPU10OpcodeToken0 token0;
2038    VGPU10OpcodeToken1 token1;
2039
2040    token0.value = 0;  /* init all fields to zero */
2041    token0.opcodeType = vgpu10_opcode;
2042    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2043    token0.saturate = saturate;
2044
2045    if (offsets[0] || offsets[1] || offsets[2]) {
2046       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2047       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2048       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2049       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2050       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2051       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2052
2053       token0.extended = 1;
2054       token1.value = 0;
2055       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2056       token1.offsetU = offsets[0];
2057       token1.offsetV = offsets[1];
2058       token1.offsetW = offsets[2];
2059    }
2060
2061    emit_dword(emit, token0.value);
2062    if (token0.extended) {
2063       emit_dword(emit, token1.value);
2064    }
2065 }
2066
2067
2068 /**
2069  * Emit a DISCARD opcode token.
2070  * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071  * Otherwise, we'll discard the fragment if the X component is 0.
2072  */
2073 static void
2074 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2075 {
2076    VGPU10OpcodeToken0 opcode0;
2077
2078    opcode0.value = 0;
2079    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2080    if (nonzero)
2081       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2082
2083    emit_dword(emit, opcode0.value);
2084 }
2085
2086
2087 /**
2088  * We need to call this before we begin emitting a VGPU10 instruction.
2089  */
2090 static void
2091 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2092 {
2093    assert(emit->inst_start_token == 0);
2094    /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095     * Note, we can't save a pointer because it would become invalid if
2096     * we have to realloc the output buffer.
2097     */
2098    emit->inst_start_token = emit_get_num_tokens(emit);
2099 }
2100
2101
2102 /**
2103  * We need to call this after we emit the last token of a VGPU10 instruction.
2104  * This function patches in the opcode token's instructionLength field.
2105  */
2106 static void
2107 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2108 {
2109    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2110    unsigned inst_length;
2111
2112    assert(emit->inst_start_token > 0);
2113
2114    if (emit->discard_instruction) {
2115       /* Back up the emit->ptr to where this instruction started so
2116        * that we discard the current instruction.
2117        */
2118       emit->ptr = (char *) (tokens + emit->inst_start_token);
2119    }
2120    else {
2121       /* Compute instruction length and patch that into the start of
2122        * the instruction.
2123        */
2124       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2125
2126       assert(inst_length > 0);
2127
2128       tokens[emit->inst_start_token].instructionLength = inst_length;
2129    }
2130
2131    emit->inst_start_token = 0; /* reset to zero for error checking */
2132    emit->discard_instruction = FALSE;
2133 }
2134
2135
2136 /**
2137  * Return index for a free temporary register.
2138  */
2139 static unsigned
2140 get_temp_index(struct svga_shader_emitter_v10 *emit)
2141 {
2142    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2143    return emit->num_shader_temps + emit->internal_temp_count++;
2144 }
2145
2146
2147 /**
2148  * Release the temporaries which were generated by get_temp_index().
2149  */
2150 static void
2151 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2152 {
2153    emit->internal_temp_count = 0;
2154 }
2155
2156
2157 /**
2158  * Create a tgsi_full_src_register.
2159  */
2160 static struct tgsi_full_src_register
2161 make_src_reg(enum tgsi_file_type file, unsigned index)
2162 {
2163    struct tgsi_full_src_register reg;
2164
2165    memset(&reg, 0, sizeof(reg));
2166    reg.Register.File = file;
2167    reg.Register.Index = index;
2168    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2169    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2170    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2171    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2172    return reg;
2173 }
2174
2175
2176 /**
2177  * Create a tgsi_full_src_register with a swizzle such that all four
2178  * vector components have the same scalar value.
2179  */
2180 static struct tgsi_full_src_register
2181 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2182 {
2183    struct tgsi_full_src_register reg;
2184
2185    assert(component >= TGSI_SWIZZLE_X);
2186    assert(component <= TGSI_SWIZZLE_W);
2187
2188    memset(&reg, 0, sizeof(reg));
2189    reg.Register.File = file;
2190    reg.Register.Index = index;
2191    reg.Register.SwizzleX =
2192    reg.Register.SwizzleY =
2193    reg.Register.SwizzleZ =
2194    reg.Register.SwizzleW = component;
2195    return reg;
2196 }
2197
2198
2199 /**
2200  * Create a tgsi_full_src_register for a temporary.
2201  */
2202 static struct tgsi_full_src_register
2203 make_src_temp_reg(unsigned index)
2204 {
2205    return make_src_reg(TGSI_FILE_TEMPORARY, index);
2206 }
2207
2208
2209 /**
2210  * Create a tgsi_full_src_register for a constant.
2211  */
2212 static struct tgsi_full_src_register
2213 make_src_const_reg(unsigned index)
2214 {
2215    return make_src_reg(TGSI_FILE_CONSTANT, index);
2216 }
2217
2218
2219 /**
2220  * Create a tgsi_full_src_register for an immediate constant.
2221  */
2222 static struct tgsi_full_src_register
2223 make_src_immediate_reg(unsigned index)
2224 {
2225    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2226 }
2227
2228
2229 /**
2230  * Create a tgsi_full_dst_register.
2231  */
2232 static struct tgsi_full_dst_register
2233 make_dst_reg(enum tgsi_file_type file, unsigned index)
2234 {
2235    struct tgsi_full_dst_register reg;
2236
2237    memset(&reg, 0, sizeof(reg));
2238    reg.Register.File = file;
2239    reg.Register.Index = index;
2240    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2241    return reg;
2242 }
2243
2244
2245 /**
2246  * Create a tgsi_full_dst_register for a temporary.
2247  */
2248 static struct tgsi_full_dst_register
2249 make_dst_temp_reg(unsigned index)
2250 {
2251    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2252 }
2253
2254
2255 /**
2256  * Create a tgsi_full_dst_register for an output.
2257  */
2258 static struct tgsi_full_dst_register
2259 make_dst_output_reg(unsigned index)
2260 {
2261    return make_dst_reg(TGSI_FILE_OUTPUT, index);
2262 }
2263
2264
2265 /**
2266  * Create negated tgsi_full_src_register.
2267  */
2268 static struct tgsi_full_src_register
2269 negate_src(const struct tgsi_full_src_register *reg)
2270 {
2271    struct tgsi_full_src_register neg = *reg;
2272    neg.Register.Negate = !reg->Register.Negate;
2273    return neg;
2274 }
2275
2276 /**
2277  * Create absolute value of a tgsi_full_src_register.
2278  */
2279 static struct tgsi_full_src_register
2280 absolute_src(const struct tgsi_full_src_register *reg)
2281 {
2282    struct tgsi_full_src_register absolute = *reg;
2283    absolute.Register.Absolute = 1;
2284    return absolute;
2285 }
2286
2287
2288 /** Return the named swizzle term from the src register */
2289 static inline unsigned
2290 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2291 {
2292    switch (term) {
2293    case TGSI_SWIZZLE_X:
2294       return reg->Register.SwizzleX;
2295    case TGSI_SWIZZLE_Y:
2296       return reg->Register.SwizzleY;
2297    case TGSI_SWIZZLE_Z:
2298       return reg->Register.SwizzleZ;
2299    case TGSI_SWIZZLE_W:
2300       return reg->Register.SwizzleW;
2301    default:
2302       assert(!"Bad swizzle");
2303       return TGSI_SWIZZLE_X;
2304    }
2305 }
2306
2307
2308 /**
2309  * Create swizzled tgsi_full_src_register.
2310  */
2311 static struct tgsi_full_src_register
2312 swizzle_src(const struct tgsi_full_src_register *reg,
2313             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2314             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2315 {
2316    struct tgsi_full_src_register swizzled = *reg;
2317    /* Note: we swizzle the current swizzle */
2318    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2319    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2320    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2321    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2322    return swizzled;
2323 }
2324
2325
2326 /**
2327  * Create swizzled tgsi_full_src_register where all the swizzle
2328  * terms are the same.
2329  */
2330 static struct tgsi_full_src_register
2331 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2332 {
2333    struct tgsi_full_src_register swizzled = *reg;
2334    /* Note: we swizzle the current swizzle */
2335    swizzled.Register.SwizzleX =
2336    swizzled.Register.SwizzleY =
2337    swizzled.Register.SwizzleZ =
2338    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2339    return swizzled;
2340 }
2341
2342
2343 /**
2344  * Create new tgsi_full_dst_register with writemask.
2345  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2346  */
2347 static struct tgsi_full_dst_register
2348 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2349 {
2350    struct tgsi_full_dst_register masked = *reg;
2351    masked.Register.WriteMask = mask;
2352    return masked;
2353 }
2354
2355
2356 /**
2357  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2358  */
2359 static boolean
2360 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2361 {
2362    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2363            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2364            reg->Register.SwizzleZ == reg->Register.SwizzleW);
2365 }
2366
2367
2368 /**
2369  * Search the vector for the value 'x' and return its position.
2370  */
2371 static int
2372 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2373                  union tgsi_immediate_data x)
2374 {
2375    unsigned i;
2376    for (i = 0; i < 4; i++) {
2377       if (vec[i].Int == x.Int)
2378          return i;
2379    }
2380    return -1;
2381 }
2382
2383
2384 /**
2385  * Helper used by make_immediate_reg(), make_immediate_reg_4().
2386  */
2387 static int
2388 find_immediate(struct svga_shader_emitter_v10 *emit,
2389                union tgsi_immediate_data x, unsigned startIndex)
2390 {
2391    const unsigned endIndex = emit->num_immediates;
2392    unsigned i;
2393
2394    assert(emit->immediates_emitted);
2395
2396    /* Search immediates for x, y, z, w */
2397    for (i = startIndex; i < endIndex; i++) {
2398       if (x.Int == emit->immediates[i][0].Int ||
2399           x.Int == emit->immediates[i][1].Int ||
2400           x.Int == emit->immediates[i][2].Int ||
2401           x.Int == emit->immediates[i][3].Int) {
2402          return i;
2403       }
2404    }
2405    /* Should never try to use an immediate value that wasn't pre-declared */
2406    assert(!"find_immediate() failed!");
2407    return -1;
2408 }
2409
2410
2411 /**
2412  * As above, but search for a double[2] pair.
2413  */
2414 static int
2415 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2416                    double x, double y)
2417 {
2418    const unsigned endIndex = emit->num_immediates;
2419    unsigned i;
2420
2421    assert(emit->immediates_emitted);
2422
2423    /* Search immediates for x, y, z, w */
2424    for (i = 0; i < endIndex; i++) {
2425       if (x == emit->immediates_dbl[i][0] &&
2426           y == emit->immediates_dbl[i][1]) {
2427          return i;
2428       }
2429    }
2430    /* Should never try to use an immediate value that wasn't pre-declared */
2431    assert(!"find_immediate_dbl() failed!");
2432    return -1;
2433 }
2434
2435
2436
2437 /**
2438  * Return a tgsi_full_src_register for an immediate/literal
2439  * union tgsi_immediate_data[4] value.
2440  * Note: the values must have been previously declared/allocated in
2441  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2442  * vec4 immediate.
2443  */
2444 static struct tgsi_full_src_register
2445 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2446                      const union tgsi_immediate_data imm[4])
2447 {
2448    struct tgsi_full_src_register reg;
2449    unsigned i;
2450
2451    for (i = 0; i < emit->num_common_immediates; i++) {
2452       /* search for first component value */
2453       int immpos = find_immediate(emit, imm[0], i);
2454       int x, y, z, w;
2455
2456       assert(immpos >= 0);
2457
2458       /* find remaining components within the immediate vector */
2459       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2460       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2461       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2462       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2463
2464       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2465          /* found them all */
2466          memset(&reg, 0, sizeof(reg));
2467          reg.Register.File = TGSI_FILE_IMMEDIATE;
2468          reg.Register.Index = immpos;
2469          reg.Register.SwizzleX = x;
2470          reg.Register.SwizzleY = y;
2471          reg.Register.SwizzleZ = z;
2472          reg.Register.SwizzleW = w;
2473          return reg;
2474       }
2475       /* else, keep searching */
2476    }
2477
2478    assert(!"Failed to find immediate register!");
2479
2480    /* Just return IMM[0].xxxx */
2481    memset(&reg, 0, sizeof(reg));
2482    reg.Register.File = TGSI_FILE_IMMEDIATE;
2483    return reg;
2484 }
2485
2486
2487 /**
2488  * Return a tgsi_full_src_register for an immediate/literal
2489  * union tgsi_immediate_data value of the form {value, value, value, value}.
2490  * \sa make_immediate_reg_4() regarding allowed values.
2491  */
2492 static struct tgsi_full_src_register
2493 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2494                    union tgsi_immediate_data value)
2495 {
2496    struct tgsi_full_src_register reg;
2497    int immpos = find_immediate(emit, value, 0);
2498
2499    assert(immpos >= 0);
2500
2501    memset(&reg, 0, sizeof(reg));
2502    reg.Register.File = TGSI_FILE_IMMEDIATE;
2503    reg.Register.Index = immpos;
2504    reg.Register.SwizzleX =
2505    reg.Register.SwizzleY =
2506    reg.Register.SwizzleZ =
2507    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2508
2509    return reg;
2510 }
2511
2512
2513 /**
2514  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515  * \sa make_immediate_reg_4() regarding allowed values.
2516  */
2517 static struct tgsi_full_src_register
2518 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2519                           float x, float y, float z, float w)
2520 {
2521    union tgsi_immediate_data imm[4];
2522    imm[0].Float = x;
2523    imm[1].Float = y;
2524    imm[2].Float = z;
2525    imm[3].Float = w;
2526    return make_immediate_reg_4(emit, imm);
2527 }
2528
2529
2530 /**
2531  * Return a tgsi_full_src_register for an immediate/literal float value
2532  * of the form {value, value, value, value}.
2533  * \sa make_immediate_reg_4() regarding allowed values.
2534  */
2535 static struct tgsi_full_src_register
2536 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2537 {
2538    union tgsi_immediate_data imm;
2539    imm.Float = value;
2540    return make_immediate_reg(emit, imm);
2541 }
2542
2543
2544 /**
2545  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2546  */
2547 static struct tgsi_full_src_register
2548 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2549                         int x, int y, int z, int w)
2550 {
2551    union tgsi_immediate_data imm[4];
2552    imm[0].Int = x;
2553    imm[1].Int = y;
2554    imm[2].Int = z;
2555    imm[3].Int = w;
2556    return make_immediate_reg_4(emit, imm);
2557 }
2558
2559
2560 /**
2561  * Return a tgsi_full_src_register for an immediate/literal int value
2562  * of the form {value, value, value, value}.
2563  * \sa make_immediate_reg_4() regarding allowed values.
2564  */
2565 static struct tgsi_full_src_register
2566 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2567 {
2568    union tgsi_immediate_data imm;
2569    imm.Int = value;
2570    return make_immediate_reg(emit, imm);
2571 }
2572
2573
2574 static struct tgsi_full_src_register
2575 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2576 {
2577    struct tgsi_full_src_register reg;
2578    int immpos = find_immediate_dbl(emit, value, value);
2579
2580    assert(immpos >= 0);
2581
2582    memset(&reg, 0, sizeof(reg));
2583    reg.Register.File = TGSI_FILE_IMMEDIATE;
2584    reg.Register.Index = immpos;
2585    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2586    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2587    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2588    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2589
2590    return reg;
2591 }
2592
2593
2594 /**
2595  * Allocate space for a union tgsi_immediate_data[4] immediate.
2596  * \return  the index/position of the immediate.
2597  */
2598 static unsigned
2599 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2600                   const union tgsi_immediate_data imm[4])
2601 {
2602    unsigned n = emit->num_immediates++;
2603    assert(!emit->immediates_emitted);
2604    assert(n < ARRAY_SIZE(emit->immediates));
2605    emit->immediates[n][0] = imm[0];
2606    emit->immediates[n][1] = imm[1];
2607    emit->immediates[n][2] = imm[2];
2608    emit->immediates[n][3] = imm[3];
2609    return n;
2610 }
2611
2612
2613 /**
2614  * Allocate space for a float[4] immediate.
2615  * \return  the index/position of the immediate.
2616  */
2617 static unsigned
2618 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2619                        float x, float y, float z, float w)
2620 {
2621    union tgsi_immediate_data imm[4];
2622    imm[0].Float = x;
2623    imm[1].Float = y;
2624    imm[2].Float = z;
2625    imm[3].Float = w;
2626    return alloc_immediate_4(emit, imm);
2627 }
2628
2629
2630 /**
2631  * Allocate space for an int[4] immediate.
2632  * \return  the index/position of the immediate.
2633  */
2634 static unsigned
2635 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2636                        int x, int y, int z, int w)
2637 {
2638    union tgsi_immediate_data imm[4];
2639    imm[0].Int = x;
2640    imm[1].Int = y;
2641    imm[2].Int = z;
2642    imm[3].Int = w;
2643    return alloc_immediate_4(emit, imm);
2644 }
2645
2646
2647 static unsigned
2648 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2649                         double x, double y)
2650 {
2651    unsigned n = emit->num_immediates++;
2652    assert(!emit->immediates_emitted);
2653    assert(n < ARRAY_SIZE(emit->immediates));
2654    emit->immediates_dbl[n][0] = x;
2655    emit->immediates_dbl[n][1] = y;
2656    return n;
2657
2658 }
2659
2660
2661 /**
2662  * Allocate a shader input to store a system value.
2663  */
2664 static unsigned
2665 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2666 {
2667    const unsigned n = emit->linkage.input_map_max + 1 + index;
2668    assert(index < ARRAY_SIZE(emit->system_value_indexes));
2669    emit->system_value_indexes[index] = n;
2670    return n;
2671 }
2672
2673
2674 /**
2675  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2676  */
2677 static boolean
2678 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2679                       const struct tgsi_full_immediate *imm)
2680 {
2681    /* We don't actually emit any code here.  We just save the
2682     * immediate values and emit them later.
2683     */
2684    alloc_immediate_4(emit, imm->u);
2685    return TRUE;
2686 }
2687
2688
2689 /**
2690  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691  * containing all the immediate values previously allocated
2692  * with alloc_immediate_4().
2693  */
2694 static boolean
2695 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2696 {
2697    VGPU10OpcodeToken0 token;
2698
2699    assert(!emit->immediates_emitted);
2700
2701    token.value = 0;
2702    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2703    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2704
2705    /* Note: no begin/end_emit_instruction() calls */
2706    emit_dword(emit, token.value);
2707    emit_dword(emit, 2 + 4 * emit->num_immediates);
2708    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2709
2710    emit->immediates_emitted = TRUE;
2711
2712    return TRUE;
2713 }
2714
2715
2716 /**
2717  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718  * interpolation mode.
2719  * \return a VGPU10_INTERPOLATION_x value
2720  */
2721 static unsigned
2722 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2723                         enum tgsi_interpolate_mode interp,
2724                         enum tgsi_interpolate_loc interpolate_loc)
2725 {
2726    if (interp == TGSI_INTERPOLATE_COLOR) {
2727       interp = emit->key.fs.flatshade ?
2728          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2729    }
2730
2731    switch (interp) {
2732    case TGSI_INTERPOLATE_CONSTANT:
2733       return VGPU10_INTERPOLATION_CONSTANT;
2734    case TGSI_INTERPOLATE_LINEAR:
2735       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2736          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2737       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2738                  emit->version >= 41) {
2739          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2740       } else {
2741          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2742       }
2743       break;
2744    case TGSI_INTERPOLATE_PERSPECTIVE:
2745       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2746          return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2747       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2748                  emit->version >= 41) {
2749          return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2750       } else {
2751          return VGPU10_INTERPOLATION_LINEAR;
2752       }
2753       break;
2754    default:
2755       assert(!"Unexpected interpolation mode");
2756       return VGPU10_INTERPOLATION_CONSTANT;
2757    }
2758 }
2759
2760
2761 /**
2762  * Translate a TGSI property to VGPU10.
2763  * Don't emit any instructions yet, only need to gather the primitive property
2764  * information.  The output primitive topology might be changed later. The
2765  * final property instructions will be emitted as part of the pre-helper code.
2766  */
2767 static boolean
2768 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2769                      const struct tgsi_full_property *prop)
2770 {
2771    static const VGPU10_PRIMITIVE primType[] = {
2772       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
2773       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
2774       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
2775       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
2776       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
2777       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
2778       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
2779       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
2780       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
2781       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
2782       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
2783       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2786    };
2787
2788    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2789       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
2790       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
2791       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
2792       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
2793       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
2794       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2795       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2796       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
2797       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
2798       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
2799       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
2800       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2803    };
2804
2805    static const unsigned inputArraySize[] = {
2806       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
2807       1,       /* VGPU10_PRIMITIVE_POINT */
2808       2,       /* VGPU10_PRIMITIVE_LINE */
2809       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
2810       0,
2811       0,
2812       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
2813       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2814    };
2815
2816    switch (prop->Property.PropertyName) {
2817    case TGSI_PROPERTY_GS_INPUT_PRIM:
2818       assert(prop->u[0].Data < ARRAY_SIZE(primType));
2819       emit->gs.prim_type = primType[prop->u[0].Data];
2820       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2821       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2822       break;
2823
2824    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2825       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
2826       emit->gs.prim_topology = primTopology[prop->u[0].Data];
2827       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
2828       break;
2829
2830    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2831       emit->gs.max_out_vertices = prop->u[0].Data;
2832       break;
2833
2834    case TGSI_PROPERTY_GS_INVOCATIONS:
2835       emit->gs.invocations = prop->u[0].Data;
2836       break;
2837
2838    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2839    case TGSI_PROPERTY_NEXT_SHADER:
2840    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2841       /* no-op */
2842       break;
2843
2844    case TGSI_PROPERTY_TCS_VERTICES_OUT:
2845       /* This info is already captured in the shader key */
2846       break;
2847
2848    case TGSI_PROPERTY_TES_PRIM_MODE:
2849       emit->tes.prim_mode = prop->u[0].Data;
2850       break;
2851
2852    case TGSI_PROPERTY_TES_SPACING:
2853       emit->tes.spacing = prop->u[0].Data;
2854       break;
2855
2856    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
2857       emit->tes.vertices_order_cw = prop->u[0].Data;
2858       break;
2859
2860    case TGSI_PROPERTY_TES_POINT_MODE:
2861       emit->tes.point_mode = prop->u[0].Data;
2862       break;
2863
2864    default:
2865       debug_printf("Unexpected TGSI property %s\n",
2866                    tgsi_property_names[prop->Property.PropertyName]);
2867    }
2868
2869    return TRUE;
2870 }
2871
2872
2873 static void
2874 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
2875                           VGPU10OpcodeToken0 opcode0, unsigned nData,
2876                           unsigned data)
2877 {
2878    begin_emit_instruction(emit);
2879    emit_dword(emit, opcode0.value);
2880    if (nData)
2881       emit_dword(emit, data);
2882    end_emit_instruction(emit);
2883 }
2884
2885
2886 /**
2887  * Emit property instructions
2888  */
2889 static void
2890 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
2891 {
2892    VGPU10OpcodeToken0 opcode0;
2893
2894    assert(emit->unit == PIPE_SHADER_GEOMETRY);
2895
2896    /* emit input primitive type declaration */
2897    opcode0.value = 0;
2898    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
2899    opcode0.primitive = emit->gs.prim_type;
2900    emit_property_instruction(emit, opcode0, 0, 0);
2901
2902    /* emit max output vertices */
2903    opcode0.value = 0;
2904    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
2905    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
2906
2907    if (emit->version >= 50 && emit->gs.invocations > 0) {
2908       opcode0.value = 0;
2909       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
2910       emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
2911    }
2912 }
2913
2914
2915 /**
2916  * A helper function to declare tessellator domain in a hull shader or
2917  * in the domain shader.
2918  */
2919 static void
2920 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
2921                         enum pipe_prim_type prim_mode)
2922 {
2923    VGPU10OpcodeToken0 opcode0;
2924
2925    opcode0.value = 0;
2926    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
2927    switch (prim_mode) {
2928    case PIPE_PRIM_QUADS:
2929    case PIPE_PRIM_LINES:
2930       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
2931       break;
2932    case PIPE_PRIM_TRIANGLES:
2933       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
2934       break;
2935    default:
2936       debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
2937       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
2938    }
2939    begin_emit_instruction(emit);
2940    emit_dword(emit, opcode0.value);
2941    end_emit_instruction(emit);
2942 }
2943
2944
2945 /**
2946  * Emit domain shader declarations.
2947  */
2948 static void
2949 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
2950 {
2951    VGPU10OpcodeToken0 opcode0;
2952
2953    assert(emit->unit == PIPE_SHADER_TESS_EVAL);
2954
2955    /* Emit the input control point count */
2956    assert(emit->key.tes.vertices_per_patch >= 0 &&
2957           emit->key.tes.vertices_per_patch <= 32);
2958
2959    opcode0.value = 0;
2960    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
2961    opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
2962    begin_emit_instruction(emit);
2963    emit_dword(emit, opcode0.value);
2964    end_emit_instruction(emit);
2965
2966    emit_tessellator_domain(emit, emit->tes.prim_mode);
2967 }
2968
2969
2970 /**
2971  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972  * to implement some instructions.  We pre-allocate those values here
2973  * in the immediate constant buffer.
2974  */
2975 static void
2976 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
2977 {
2978    unsigned n = 0;
2979
2980    emit->common_immediate_pos[n++] =
2981       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
2982
2983    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
2984       emit->common_immediate_pos[n++] =
2985          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
2986    }
2987
2988    emit->common_immediate_pos[n++] =
2989       alloc_immediate_int4(emit, 0, 1, 0, -1);
2990
2991    if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
2992        emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
2993       emit->common_immediate_pos[n++] =
2994          alloc_immediate_int4(emit, 31, 0, 0, 0);
2995    }
2996
2997    if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
2998        emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
2999        emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3000       emit->common_immediate_pos[n++] =
3001          alloc_immediate_int4(emit, 32, 0, 0, 0);
3002    }
3003
3004    if (emit->key.vs.attrib_puint_to_snorm) {
3005       emit->common_immediate_pos[n++] =
3006          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3007    }
3008
3009    if (emit->key.vs.attrib_puint_to_uscaled) {
3010       emit->common_immediate_pos[n++] =
3011          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3012    }
3013
3014    if (emit->key.vs.attrib_puint_to_sscaled) {
3015       emit->common_immediate_pos[n++] =
3016          alloc_immediate_int4(emit, 22, 12, 2, 0);
3017
3018       emit->common_immediate_pos[n++] =
3019          alloc_immediate_int4(emit, 22, 30, 0, 0);
3020    }
3021
3022    if (emit->vposition.num_prescale > 1) {
3023       unsigned i;
3024       for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3025          emit->common_immediate_pos[n++] =
3026             alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3027       }
3028    }
3029
3030    emit->immediates_dbl = (double (*)[2]) emit->immediates;
3031
3032    if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3033       emit->common_immediate_pos[n++] =
3034          alloc_immediate_double2(emit, -1.0, -1.0);
3035    }
3036
3037    if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
3038       emit->common_immediate_pos[n++] =
3039          alloc_immediate_double2(emit, 0.0, 0.0);
3040       emit->common_immediate_pos[n++] =
3041          alloc_immediate_double2(emit, 1.0, 1.0);
3042    }
3043
3044    if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3045       emit->common_immediate_pos[n++] =
3046          alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3047    }
3048
3049    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3050
3051    unsigned i;
3052
3053    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3054       if (emit->key.tex[i].texel_bias) {
3055          /* Replace 0.0f if more immediate float value is needed */
3056          emit->common_immediate_pos[n++] =
3057             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3058          break;
3059       }
3060    }
3061
3062    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3063    emit->num_common_immediates = n;
3064 }
3065
3066
3067 /**
3068  * Emit hull shader declarations.
3069 */
3070 static void
3071 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3072 {
3073    VGPU10OpcodeToken0 opcode0;
3074
3075    /* Emit the input control point count */
3076    assert(emit->key.tcs.vertices_per_patch > 0 &&
3077           emit->key.tcs.vertices_per_patch <= 32);
3078
3079    opcode0.value = 0;
3080    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3081    opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3082    begin_emit_instruction(emit);
3083    emit_dword(emit, opcode0.value);
3084    end_emit_instruction(emit);
3085
3086    /* Emit the output control point count */
3087    assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3088
3089    opcode0.value = 0;
3090    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3091    opcode0.controlPointCount = emit->key.tcs.vertices_out;
3092    begin_emit_instruction(emit);
3093    emit_dword(emit, opcode0.value);
3094    end_emit_instruction(emit);
3095
3096    /* Emit tessellator domain */
3097    emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3098
3099    /* Emit tessellator output primitive */
3100    opcode0.value = 0;
3101    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3102    if (emit->key.tcs.point_mode) {
3103       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3104    }
3105    else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3106       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3107    }
3108    else {
3109       assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3110              emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3111
3112       if (emit->key.tcs.vertices_order_cw)
3113          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3114       else
3115          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3116    }
3117    begin_emit_instruction(emit);
3118    emit_dword(emit, opcode0.value);
3119    end_emit_instruction(emit);
3120
3121    /* Emit tessellator partitioning */
3122    opcode0.value = 0;
3123    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3124    switch (emit->key.tcs.spacing) {
3125    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3126       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3127       break;
3128    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3129       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3130       break;
3131    case PIPE_TESS_SPACING_EQUAL:
3132       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3133       break;
3134    default:
3135       debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3136       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3137    }
3138    begin_emit_instruction(emit);
3139    emit_dword(emit, opcode0.value);
3140    end_emit_instruction(emit);
3141
3142    /* Declare constant registers */
3143    emit_constant_declaration(emit);
3144
3145    /* Declare samplers and resources */
3146    emit_sampler_declarations(emit);
3147    emit_resource_declarations(emit);
3148
3149    alloc_common_immediates(emit);
3150
3151    int nVertices = emit->key.tcs.vertices_per_patch;
3152    emit->tcs.imm_index =
3153       alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3154
3155    /* Now, emit the constant block containing all the immediates
3156     * declared by shader, as well as the extra ones seen above.
3157     */
3158    emit_vgpu10_immediates_block(emit);
3159
3160 }
3161
3162
3163 /**
3164  * A helper function to determine if control point phase is needed.
3165  * Returns TRUE if there is control point output.
3166  */
3167 static boolean
3168 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3169 {
3170    unsigned i;
3171
3172    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3173
3174    /* If output control point count does not match the input count,
3175     * we need a control point phase to explicitly set the output control
3176     * points.
3177     */
3178    if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3179        emit->key.tcs.vertices_out)
3180       return TRUE;
3181
3182    for (i = 0; i < emit->info.num_outputs; i++) {
3183       switch (emit->info.output_semantic_name[i]) {
3184       case TGSI_SEMANTIC_PATCH:
3185       case TGSI_SEMANTIC_TESSOUTER:
3186       case TGSI_SEMANTIC_TESSINNER:
3187          break;
3188       default:
3189          return TRUE;
3190       }
3191    }
3192    return FALSE;
3193 }
3194
3195
3196 /**
3197  * A helper function to add shader signature for passthrough control point
3198  * phase. This signature is also generated for passthrough control point
3199  * phase from HLSL compiler and is needed by Metal Renderer.
3200  */
3201 static void
3202 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3203 {
3204    struct svga_shader_signature *sgn = &emit->signature;
3205    SVGA3dDXShaderSignatureEntry *sgnEntry;
3206    unsigned i;
3207
3208    for (i = 0; i < emit->info.num_inputs; i++) {
3209       unsigned index = emit->linkage.input_map[i];
3210       enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3211
3212       sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3213
3214       set_shader_signature_entry(sgnEntry, index,
3215                                  tgsi_semantic_to_sgn_name[sem_name],
3216                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3217                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3218                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3219
3220       sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3221
3222       set_shader_signature_entry(sgnEntry, i,
3223                                  tgsi_semantic_to_sgn_name[sem_name],
3224                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3225                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3226                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3227    }
3228 }
3229
3230
3231 /**
3232  * A helper function to emit an instruction to start the control point phase
3233  * in the hull shader.
3234  */
3235 static void
3236 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3237 {
3238    VGPU10OpcodeToken0 opcode0;
3239
3240    opcode0.value = 0;
3241    opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3242    begin_emit_instruction(emit);
3243    emit_dword(emit, opcode0.value);
3244    end_emit_instruction(emit);
3245 }
3246
3247
3248 /**
3249  * Start the hull shader control point phase
3250  */
3251 static boolean
3252 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3253 {
3254    /* If there is no control point output, skip the control point phase. */
3255    if (!needs_control_point_phase(emit)) {
3256       if (!emit->key.tcs.vertices_out) {
3257          /**
3258           * If the tcs does not explicitly generate any control point output
3259           * and the tes does not use any input control point, then
3260           * emit an empty control point phase with zero output control
3261           * point count.
3262           */
3263          emit_control_point_phase_instruction(emit);
3264
3265          /**
3266           * Since this is an empty control point phase, we will need to
3267           * add input signatures when we parse the tcs again in the
3268           * patch constant phase.
3269           */
3270          emit->tcs.fork_phase_add_signature = TRUE;
3271       }
3272       else {
3273          /**
3274           * Before skipping the control point phase, add the signature for
3275           * the passthrough control point.
3276           */
3277          emit_passthrough_control_point_signature(emit);
3278       }
3279       return FALSE;
3280    }
3281
3282    /* Start the control point phase in the hull shader */
3283    emit_control_point_phase_instruction(emit);
3284
3285    /* Declare the output control point ID */
3286    if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3287       /* Add invocation id declaration if it does not exist */
3288       emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3289    }
3290
3291    emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3292                           VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3293                           VGPU10_OPERAND_INDEX_0D,
3294                           0, 1,
3295                           VGPU10_NAME_UNDEFINED,
3296                           VGPU10_OPERAND_0_COMPONENT, 0,
3297                           0,
3298                           VGPU10_INTERPOLATION_CONSTANT, TRUE,
3299                           SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3300
3301    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3302       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3303                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3304                              VGPU10_OPERAND_INDEX_0D,
3305                              0, 1,
3306                              VGPU10_NAME_UNDEFINED,
3307                              VGPU10_OPERAND_0_COMPONENT,
3308                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3309                              0,
3310                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3311                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3312    }
3313
3314    return TRUE;
3315 }
3316
3317
3318 /**
3319  * Start the hull shader patch constant phase and
3320  * do the second pass of the tcs translation and emit
3321  * the relevant declarations and instructions for this phase.
3322  */
3323 static boolean
3324 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3325                                       struct tgsi_parse_context *parse)
3326 {
3327    unsigned inst_number = 0;
3328    boolean ret = TRUE;
3329    VGPU10OpcodeToken0 opcode0;
3330
3331    emit->skip_instruction = FALSE;
3332
3333    /* Start the patch constant phase */
3334    opcode0.value = 0;
3335    opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3336    begin_emit_instruction(emit);
3337    emit_dword(emit, opcode0.value);
3338    end_emit_instruction(emit);
3339
3340    /* Set the current phase to patch constant phase */
3341    emit->tcs.control_point_phase = FALSE;
3342
3343    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3344       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3345                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3346                              VGPU10_OPERAND_INDEX_0D,
3347                              0, 1,
3348                              VGPU10_NAME_UNDEFINED,
3349                              VGPU10_OPERAND_0_COMPONENT,
3350                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3351                              0,
3352                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3353                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3354    }
3355
3356    /* Emit declarations for this phase */
3357    emit->index_range.required =
3358       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3359    emit_tcs_input_declarations(emit);
3360
3361    if (emit->index_range.start_index != INVALID_INDEX) {
3362       emit_index_range_declaration(emit);
3363    }
3364
3365    emit->index_range.required =
3366       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3367    emit_tcs_output_declarations(emit);
3368
3369    if (emit->index_range.start_index != INVALID_INDEX) {
3370       emit_index_range_declaration(emit);
3371    }
3372    emit->index_range.required = FALSE;
3373
3374    emit_temporaries_declaration(emit);
3375
3376    /* Reset the token position to the first instruction token
3377     * in preparation for the second pass of the shader
3378     */
3379    parse->Position = emit->tcs.instruction_token_pos;
3380
3381    while (!tgsi_parse_end_of_tokens(parse)) {
3382       tgsi_parse_token(parse);
3383
3384       assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3385       ret = emit_vgpu10_instruction(emit, inst_number++,
3386                                     &parse->FullToken.FullInstruction);
3387
3388       /* Usually this applies to TCS only. If shader is reading output of
3389        * patch constant in fork phase, we should reemit all instructions
3390        * which are writting into ouput of patch constant in fork phase
3391        * to store results into temporaries.
3392        */
3393       if (emit->reemit_instruction) {
3394          assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3395          ret = emit_vgpu10_instruction(emit, inst_number,
3396                                        &parse->FullToken.FullInstruction);
3397       }
3398
3399       if (!ret)
3400          return FALSE;
3401    }
3402
3403    return TRUE;
3404 }
3405
3406
3407 /**
3408  * Emit index range declaration.
3409  */
3410 static boolean
3411 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3412 {
3413    if (emit->version < 50)
3414       return TRUE;
3415
3416    assert(emit->index_range.start_index != INVALID_INDEX);
3417    assert(emit->index_range.count != 0);
3418    assert(emit->index_range.required);
3419    assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3420    assert(emit->index_range.dim != 0);
3421    assert(emit->index_range.size != 0);
3422
3423    VGPU10OpcodeToken0 opcode0;
3424    VGPU10OperandToken0 operand0;
3425
3426    opcode0.value = 0;
3427    opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3428
3429    operand0.value = 0;
3430    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3431    operand0.indexDimension = emit->index_range.dim;
3432    operand0.operandType = emit->index_range.operandType;
3433    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3434    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3435
3436    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3437       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3438
3439    begin_emit_instruction(emit);
3440    emit_dword(emit, opcode0.value);
3441    emit_dword(emit, operand0.value);
3442
3443    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3444       emit_dword(emit, emit->index_range.size);
3445       emit_dword(emit, emit->index_range.start_index);
3446       emit_dword(emit, emit->index_range.count);
3447    }
3448    else {
3449       emit_dword(emit, emit->index_range.start_index);
3450       emit_dword(emit, emit->index_range.count);
3451    }
3452
3453    end_emit_instruction(emit);
3454
3455    /* Reset fields in emit->index_range struct except
3456     * emit->index_range.required which will be reset afterwards
3457     */
3458    emit->index_range.count = 0;
3459    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3460    emit->index_range.start_index = INVALID_INDEX;
3461    emit->index_range.size = 0;
3462    emit->index_range.dim = 0;
3463
3464    return TRUE;
3465 }
3466
3467
3468 /**
3469  * Emit a vgpu10 declaration "instruction".
3470  * \param index  the register index
3471  * \param size   array size of the operand. In most cases, it is 1,
3472  *               but for inputs to geometry shader, the array size varies
3473  *               depending on the primitive type.
3474  */
3475 static void
3476 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3477                       VGPU10OpcodeToken0 opcode0,
3478                       VGPU10OperandToken0 operand0,
3479                       VGPU10NameToken name_token,
3480                       unsigned index, unsigned size)
3481 {
3482    assert(opcode0.opcodeType);
3483    assert(operand0.mask ||
3484           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3485           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3486           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3487           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3488           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3489           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3490           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3491           (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3492
3493    begin_emit_instruction(emit);
3494    emit_dword(emit, opcode0.value);
3495
3496    emit_dword(emit, operand0.value);
3497
3498    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3499       /* Next token is the index of the register to declare */
3500       emit_dword(emit, index);
3501    }
3502    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3503       /* Next token is the size of the register */
3504       emit_dword(emit, size);
3505
3506       /* Followed by the index of the register */
3507       emit_dword(emit, index);
3508    }
3509
3510    if (name_token.value) {
3511       emit_dword(emit, name_token.value);
3512    }
3513
3514    end_emit_instruction(emit);
3515 }
3516
3517
3518 /**
3519  * Emit the declaration for a shader input.
3520  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522  * \param dim         index dimension
3523  * \param index       the input register index
3524  * \param size        array size of the operand. In most cases, it is 1,
3525  *                    but for inputs to geometry shader, the array size varies
3526  *                    depending on the primitive type. For tessellation control
3527  *                    shader, the array size is the vertex count per patch.
3528  * \param name        one of VGPU10_NAME_x
3529  * \parma numComp     number of components
3530  * \param selMode     component selection mode
3531  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532  * \param interpMode  interpolation mode
3533  */
3534 static void
3535 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3536                        VGPU10_OPCODE_TYPE opcodeType,
3537                        VGPU10_OPERAND_TYPE operandType,
3538                        VGPU10_OPERAND_INDEX_DIMENSION dim,
3539                        unsigned index, unsigned size,
3540                        VGPU10_SYSTEM_NAME name,
3541                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
3542                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3543                        unsigned usageMask,
3544                        VGPU10_INTERPOLATION_MODE interpMode,
3545                        boolean addSignature,
3546                        SVGA3dDXSignatureSemanticName sgnName)
3547 {
3548    VGPU10OpcodeToken0 opcode0;
3549    VGPU10OperandToken0 operand0;
3550    VGPU10NameToken name_token;
3551
3552    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3553    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3554           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3555           opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3556           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3557           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3558           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3559    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3560           operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3561           operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3562           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3563           operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3564           operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3565           operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3566           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3567           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3568           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3569           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3570
3571    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3572    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3573    assert(dim <= VGPU10_OPERAND_INDEX_3D);
3574    assert(name == VGPU10_NAME_UNDEFINED ||
3575           name == VGPU10_NAME_POSITION ||
3576           name == VGPU10_NAME_INSTANCE_ID ||
3577           name == VGPU10_NAME_VERTEX_ID ||
3578           name == VGPU10_NAME_PRIMITIVE_ID ||
3579           name == VGPU10_NAME_IS_FRONT_FACE ||
3580           name == VGPU10_NAME_SAMPLE_INDEX ||
3581           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3582           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3583
3584    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3585           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3586           interpMode == VGPU10_INTERPOLATION_LINEAR ||
3587           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3588           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3589           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3590           interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3591           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3592
3593    check_register_index(emit, opcodeType, index);
3594
3595    opcode0.value = operand0.value = name_token.value = 0;
3596
3597    opcode0.opcodeType = opcodeType;
3598    opcode0.interpolationMode = interpMode;
3599
3600    operand0.operandType = operandType;
3601    operand0.numComponents = numComp;
3602    operand0.selectionMode = selMode;
3603    operand0.mask = usageMask;
3604    operand0.indexDimension = dim;
3605    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3606    if (dim == VGPU10_OPERAND_INDEX_2D)
3607       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3608
3609    name_token.name = name;
3610
3611    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3612
3613    if (addSignature) {
3614       struct svga_shader_signature *sgn = &emit->signature;
3615       if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3616          /* Set patch constant signature */
3617          SVGA3dDXShaderSignatureEntry *sgnEntry =
3618             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3619          set_shader_signature_entry(sgnEntry, index,
3620                                     sgnName, usageMask,
3621                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3622                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3623
3624       } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3625                  operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3626          /* Set input signature */
3627          SVGA3dDXShaderSignatureEntry *sgnEntry =
3628             &sgn->inputs[sgn->header.numInputSignatures++];
3629          set_shader_signature_entry(sgnEntry, index,
3630                                     sgnName, usageMask,
3631                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3632                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3633       }
3634    }
3635
3636    if (emit->index_range.required) {
3637       /* Here, index_range declaration is only applicable for opcodeType
3638        * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639        * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640        * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641        * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3642        */
3643       if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3644            opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3645           (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3646            operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3647            operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3648          if (emit->index_range.start_index != INVALID_INDEX) {
3649             emit_index_range_declaration(emit);
3650          }
3651          return;
3652       }
3653
3654       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3655          /* Need record new index_range */
3656          emit->index_range.count = 1;
3657          emit->index_range.operandType = operandType;
3658          emit->index_range.start_index = index;
3659          emit->index_range.size = size;
3660          emit->index_range.dim = dim;
3661       }
3662       else if (index !=
3663                (emit->index_range.start_index + emit->index_range.count) ||
3664                emit->index_range.operandType != operandType) {
3665          /* Input index is not contiguous with index range or operandType is
3666           * different from index range's operandType. We need to emit current
3667           * index_range first and then start recording next index range.
3668           */
3669          emit_index_range_declaration(emit);
3670
3671          emit->index_range.count = 1;
3672          emit->index_range.operandType = operandType;
3673          emit->index_range.start_index = index;
3674          emit->index_range.size = size;
3675          emit->index_range.dim = dim;
3676       }
3677       else if (emit->index_range.operandType == operandType) {
3678          /* Since input index is contiguous with index range and operandType
3679           * is same as index range's operandType, increment index range count.
3680           */
3681          emit->index_range.count++;
3682       }
3683    }
3684 }
3685
3686
3687 /**
3688  * Emit the declaration for a shader output.
3689  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3690  * \param index  the output register index
3691  * \param name  one of VGPU10_NAME_x
3692  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3693  */
3694 static void
3695 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3696                         VGPU10_OPCODE_TYPE type, unsigned index,
3697                         VGPU10_SYSTEM_NAME name,
3698                         unsigned writemask,
3699                         boolean addSignature,
3700                         SVGA3dDXSignatureSemanticName sgnName)
3701 {
3702    VGPU10OpcodeToken0 opcode0;
3703    VGPU10OperandToken0 operand0;
3704    VGPU10NameToken name_token;
3705
3706    assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3707    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3708           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3709           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3710    assert(name == VGPU10_NAME_UNDEFINED ||
3711           name == VGPU10_NAME_POSITION ||
3712           name == VGPU10_NAME_PRIMITIVE_ID ||
3713           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3714           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3715           name == VGPU10_NAME_CLIP_DISTANCE);
3716
3717    check_register_index(emit, type, index);
3718
3719    opcode0.value = operand0.value = name_token.value = 0;
3720
3721    opcode0.opcodeType = type;
3722    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3723    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3724    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3725    operand0.mask = writemask;
3726    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3727    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3728
3729    name_token.name = name;
3730
3731    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3732
3733    /* Capture output signature */
3734    if (addSignature) {
3735       struct svga_shader_signature *sgn = &emit->signature;
3736       SVGA3dDXShaderSignatureEntry *sgnEntry =
3737          &sgn->outputs[sgn->header.numOutputSignatures++];
3738       set_shader_signature_entry(sgnEntry, index,
3739                                  sgnName, writemask,
3740                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3741                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3742    }
3743
3744    if (emit->index_range.required) {
3745       /* Here, index_range declaration is only applicable for opcodeType
3746        * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747        * VGPU10_OPERAND_TYPE_OUTPUT.
3748        */
3749       if (type != VGPU10_OPCODE_DCL_OUTPUT) {
3750          if (emit->index_range.start_index != INVALID_INDEX) {
3751             emit_index_range_declaration(emit);
3752          }
3753          return;
3754       }
3755
3756       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3757          /* Need record new index_range */
3758          emit->index_range.count = 1;
3759          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3760          emit->index_range.start_index = index;
3761          emit->index_range.size = 1;
3762          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3763       }
3764       else if (index !=
3765                (emit->index_range.start_index + emit->index_range.count)) {
3766          /* Output index is not contiguous with index range. We need to
3767           * emit current index_range first and then start recording next
3768           * index range.
3769           */
3770          emit_index_range_declaration(emit);
3771
3772          emit->index_range.count = 1;
3773          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3774          emit->index_range.start_index = index;
3775          emit->index_range.size = 1;
3776          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3777       }
3778       else {
3779          /* Since output index is contiguous with index range, increment
3780           * index range count.
3781           */
3782          emit->index_range.count++;
3783       }
3784    }
3785 }
3786
3787
3788 /**
3789  * Emit the declaration for the fragment depth output.
3790  */
3791 static void
3792 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
3793 {
3794    VGPU10OpcodeToken0 opcode0;
3795    VGPU10OperandToken0 operand0;
3796    VGPU10NameToken name_token;
3797
3798    assert(emit->unit == PIPE_SHADER_FRAGMENT);
3799
3800    opcode0.value = operand0.value = name_token.value = 0;
3801
3802    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3803    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
3804    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
3805    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3806    operand0.mask = 0;
3807
3808    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3809 }
3810
3811
3812 /**
3813  * Emit the declaration for the fragment sample mask/coverage output.
3814  */
3815 static void
3816 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
3817 {
3818    VGPU10OpcodeToken0 opcode0;
3819    VGPU10OperandToken0 operand0;
3820    VGPU10NameToken name_token;
3821
3822    assert(emit->unit == PIPE_SHADER_FRAGMENT);
3823    assert(emit->version >= 41);
3824
3825    opcode0.value = operand0.value = name_token.value = 0;
3826
3827    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3828    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
3829    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3830    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3831    operand0.mask = 0;
3832
3833    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3834 }
3835
3836
3837 /**
3838  * Emit output declarations for fragment shader.
3839  */
3840 static void
3841 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
3842 {
3843    unsigned int i;
3844
3845    for (i = 0; i < emit->info.num_outputs; i++) {
3846       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847       const enum tgsi_semantic semantic_name =
3848          emit->info.output_semantic_name[i];
3849       const unsigned semantic_index = emit->info.output_semantic_index[i];
3850       unsigned index = i;
3851
3852       if (semantic_name == TGSI_SEMANTIC_COLOR) {
3853          assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
3854
3855          emit->fs.color_out_index[semantic_index] = index;
3856
3857          emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
3858                                               index + 1);
3859
3860          /* The semantic index is the shader's color output/buffer index */
3861          emit_output_declaration(emit,
3862                                  VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
3863                                  VGPU10_NAME_UNDEFINED,
3864                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3865                                  TRUE,
3866                                  map_tgsi_semantic_to_sgn_name(semantic_name));
3867
3868          if (semantic_index == 0) {
3869             if (emit->key.fs.write_color0_to_n_cbufs > 1) {
3870                /* Emit declarations for the additional color outputs
3871                 * for broadcasting.
3872                 */
3873                unsigned j;
3874                for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
3875                   /* Allocate a new output index */
3876                   unsigned idx = emit->info.num_outputs + j - 1;
3877                   emit->fs.color_out_index[j] = idx;
3878                   emit_output_declaration(emit,
3879                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
3880                                         VGPU10_NAME_UNDEFINED,
3881                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3882                                         TRUE,
3883                                         map_tgsi_semantic_to_sgn_name(semantic_name));
3884                   emit->info.output_semantic_index[idx] = j;
3885                }
3886
3887                emit->fs.num_color_outputs =
3888                      emit->key.fs.write_color0_to_n_cbufs;
3889             }
3890          }
3891       }
3892       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
3893          /* Fragment depth output */
3894          emit_fragdepth_output_declaration(emit);
3895       }
3896       else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
3897          /* Sample mask output */
3898          emit_samplemask_output_declaration(emit);
3899       }
3900       else {
3901          assert(!"Bad output semantic name");
3902       }
3903    }
3904 }
3905
3906
3907 /**
3908  * Emit common output declaration for vertex processing.
3909  */
3910 static void
3911 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
3912                                unsigned index, unsigned writemask,
3913                                boolean addSignature)
3914 {
3915    const enum tgsi_semantic semantic_name =
3916          emit->info.output_semantic_name[index];
3917    const unsigned semantic_index = emit->info.output_semantic_index[index];
3918    unsigned name, type;
3919    unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3920
3921    assert(emit->unit != PIPE_SHADER_FRAGMENT &&
3922           emit->unit != PIPE_SHADER_COMPUTE);
3923
3924    switch (semantic_name) {
3925    case TGSI_SEMANTIC_POSITION:
3926       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
3927          /* position will be declared in control point only */
3928          assert(emit->tcs.control_point_phase);
3929          type = VGPU10_OPCODE_DCL_OUTPUT;
3930          name = VGPU10_NAME_UNDEFINED;
3931          emit_output_declaration(emit, type, index, name, final_mask, TRUE,
3932                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3933          return;
3934       }
3935       else {
3936          type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3937          name = VGPU10_NAME_POSITION;
3938       }
3939       /* Save the index of the vertex position output register */
3940       emit->vposition.out_index = index;
3941       break;
3942    case TGSI_SEMANTIC_CLIPDIST:
3943       type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3944       name = VGPU10_NAME_CLIP_DISTANCE;
3945       /* save the starting index of the clip distance output register */
3946       if (semantic_index == 0)
3947          emit->clip_dist_out_index = index;
3948       final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
3949       if (final_mask == 0x0)
3950          return; /* discard this do-nothing declaration */
3951       break;
3952    case TGSI_SEMANTIC_CLIPVERTEX:
3953       type = VGPU10_OPCODE_DCL_OUTPUT;
3954       name = VGPU10_NAME_UNDEFINED;
3955       emit->clip_vertex_out_index = index;
3956       break;
3957    default:
3958       /* generic output */
3959       type = VGPU10_OPCODE_DCL_OUTPUT;
3960       name = VGPU10_NAME_UNDEFINED;
3961    }
3962
3963    emit_output_declaration(emit, type, index, name, final_mask, addSignature,
3964                            map_tgsi_semantic_to_sgn_name(semantic_name));
3965 }
3966
3967
3968 /**
3969  * Emit declaration for outputs in vertex shader.
3970  */
3971 static void
3972 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
3973 {
3974    unsigned i;
3975    for (i = 0; i < emit->info.num_outputs; i++) {
3976       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
3977    }
3978 }
3979
3980
3981 /**
3982  * A helper function to determine the writemask for an output
3983  * for the specified stream.
3984  */
3985 static unsigned
3986 output_writemask_for_stream(unsigned stream, ubyte output_streams,
3987                                  ubyte output_usagemask)
3988 {
3989    unsigned i;
3990    unsigned writemask = 0;
3991
3992    for (i = 0; i < 4; i++) {
3993       if ((output_streams & 0x3) == stream)
3994          writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
3995       output_streams >>= 2;
3996    }
3997    return writemask & output_usagemask;
3998 }
3999
4000
4001 /**
4002  * Emit declaration for outputs in geometry shader.
4003  */
4004 static void
4005 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4006 {
4007    unsigned i;
4008    VGPU10OpcodeToken0 opcode0;
4009    unsigned numStreamsSupported = 1;
4010    int s;
4011
4012    if (emit->version >= 50) {
4013       numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4014    }
4015
4016    /**
4017     * Start emitting from the last stream first, so we end with
4018     * stream 0, so any of the auxiliary output declarations will
4019     * go to stream 0.
4020     */
4021    for (s = numStreamsSupported-1; s >= 0; s--) {
4022
4023       if (emit->info.num_stream_output_components[s] == 0)
4024          continue;
4025
4026       if (emit->version >= 50) {
4027          /* DCL_STREAM stream */
4028          begin_emit_instruction(emit);
4029          emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4030          emit_stream_register(emit, s);
4031          end_emit_instruction(emit);
4032       }
4033
4034       /* emit output primitive topology declaration */
4035       opcode0.value = 0;
4036       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4037       opcode0.primitiveTopology = emit->gs.prim_topology;
4038       emit_property_instruction(emit, opcode0, 0, 0);
4039
4040       for (i = 0; i < emit->info.num_outputs; i++) {
4041          unsigned writemask;
4042
4043          /* find out the writemask for this stream */
4044          writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4045                                                  emit->output_usage_mask[i]);
4046
4047          if (writemask) {
4048             enum tgsi_semantic semantic_name =
4049                emit->info.output_semantic_name[i];
4050
4051             /* TODO: Still need to take care of a special case where a
4052              *       single varying spans across multiple output registers.
4053              */
4054             switch(semantic_name) {
4055             case TGSI_SEMANTIC_PRIMID:
4056                emit_output_declaration(emit,
4057                                        VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4058                                        VGPU10_NAME_PRIMITIVE_ID,
4059                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4060                                        FALSE,
4061                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4062                break;
4063             case TGSI_SEMANTIC_LAYER:
4064                emit_output_declaration(emit,
4065                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4066                                        VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4067                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4068                                        FALSE,
4069                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4070                break;
4071             case TGSI_SEMANTIC_VIEWPORT_INDEX:
4072                emit_output_declaration(emit,
4073                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4074                                        VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4075                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4076                                        FALSE,
4077                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4078                emit->gs.viewport_index_out_index = i;
4079                break;
4080             default:
4081                emit_vertex_output_declaration(emit, i, writemask, FALSE);
4082             }
4083          }
4084       }
4085    }
4086
4087    /* For geometry shader outputs, it is possible the same register is
4088     * declared multiple times for different streams. So to avoid
4089     * redundant signature entries, geometry shader output signature is done
4090     * outside of the declaration.
4091     */
4092    struct svga_shader_signature *sgn = &emit->signature;
4093    SVGA3dDXShaderSignatureEntry *sgnEntry;
4094
4095    for (i = 0; i < emit->info.num_outputs; i++) {
4096       if (emit->output_usage_mask[i]) {
4097          enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4098
4099          sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4100          set_shader_signature_entry(sgnEntry, i,
4101                                     map_tgsi_semantic_to_sgn_name(sem_name),
4102                                     emit->output_usage_mask[i],
4103                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4104                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4105       }
4106    }
4107 }
4108
4109
4110 /**
4111  * Emit the declaration for the tess inner/outer output.
4112  * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113  * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114  * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4115  */
4116 static void
4117 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4118                            unsigned index, unsigned opcodeType,
4119                            unsigned operandType, VGPU10_SYSTEM_NAME name,
4120                            SVGA3dDXSignatureSemanticName sgnName)
4121 {
4122    VGPU10OpcodeToken0 opcode0;
4123    VGPU10OperandToken0 operand0;
4124    VGPU10NameToken name_token;
4125
4126    assert(emit->version >= 50);
4127    assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4128           (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4129            name == VGPU10_NAME_UNDEFINED));
4130    assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4131
4132    assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4133           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4134
4135    opcode0.value = operand0.value = name_token.value = 0;
4136
4137    opcode0.opcodeType = opcodeType;
4138    operand0.operandType = operandType;
4139    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4140    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4141    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4142    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4143    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4144
4145    name_token.name = name;
4146    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4147
4148    /* Capture patch constant signature */
4149    struct svga_shader_signature *sgn = &emit->signature;
4150    SVGA3dDXShaderSignatureEntry *sgnEntry =
4151       &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4152    set_shader_signature_entry(sgnEntry, index,
4153                               sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4154                               SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4155                               SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4156 }
4157
4158
4159 /**
4160  * Emit output declarations for tessellation control shader.
4161  */
4162 static void
4163 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4164 {
4165    unsigned int i;
4166    unsigned outputIndex = emit->num_outputs;
4167    struct svga_shader_signature *sgn = &emit->signature;
4168
4169    /**
4170     * Initialize patch_generic_out_count so it won't be counted twice
4171     * since this function is called twice, one for control point phase
4172     * and another time for patch constant phase.
4173     */
4174    emit->tcs.patch_generic_out_count = 0;
4175
4176    for (i = 0; i < emit->info.num_outputs; i++) {
4177       unsigned index = i;
4178       const enum tgsi_semantic semantic_name =
4179          emit->info.output_semantic_name[i];
4180
4181       switch (semantic_name) {
4182       case TGSI_SEMANTIC_TESSINNER:
4183          emit->tcs.inner.tgsi_index = i;
4184
4185          /* skip per-patch output declarations in control point phase */
4186          if (emit->tcs.control_point_phase)
4187             break;
4188
4189          emit->tcs.inner.out_index = outputIndex;
4190          switch (emit->key.tcs.prim_mode) {
4191          case PIPE_PRIM_QUADS:
4192             emit_tesslevel_declaration(emit, outputIndex++,
4193                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4194                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4195                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4196
4197             emit_tesslevel_declaration(emit, outputIndex++,
4198                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4199                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4200                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4201             break;
4202          case PIPE_PRIM_TRIANGLES:
4203             emit_tesslevel_declaration(emit, outputIndex++,
4204                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4205                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4206                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4207             break;
4208          case PIPE_PRIM_LINES:
4209             break;
4210          default:
4211             debug_printf("Unsupported primitive type");
4212          }
4213          break;
4214
4215       case TGSI_SEMANTIC_TESSOUTER:
4216          emit->tcs.outer.tgsi_index = i;
4217
4218          /* skip per-patch output declarations in control point phase */
4219          if (emit->tcs.control_point_phase)
4220             break;
4221
4222          emit->tcs.outer.out_index = outputIndex;
4223          switch (emit->key.tcs.prim_mode) {
4224          case PIPE_PRIM_QUADS:
4225             for (int j = 0; j < 4; j++) {
4226                emit_tesslevel_declaration(emit, outputIndex++,
4227                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4228                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4229                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4230             }
4231             break;
4232          case PIPE_PRIM_TRIANGLES:
4233             for (int j = 0; j < 3; j++) {
4234                emit_tesslevel_declaration(emit, outputIndex++,
4235                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4236                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4237                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4238             }
4239             break;
4240          case PIPE_PRIM_LINES:
4241             for (int j = 0; j < 2; j++) {
4242                emit_tesslevel_declaration(emit, outputIndex++,
4243                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4244                   VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4245                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4246             }
4247             break;
4248          default:
4249             debug_printf("Unsupported primitive type");
4250          }
4251          break;
4252
4253       case TGSI_SEMANTIC_PATCH:
4254          if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4255             emit->tcs.patch_generic_out_index= i;
4256          emit->tcs.patch_generic_out_count++;
4257
4258          /* skip per-patch output declarations in control point phase */
4259          if (emit->tcs.control_point_phase)
4260             break;
4261
4262          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4263                                  VGPU10_NAME_UNDEFINED,
4264                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4265                                  FALSE,
4266                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4267
4268          SVGA3dDXShaderSignatureEntry *sgnEntry =
4269             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4270          set_shader_signature_entry(sgnEntry, index,
4271                                     map_tgsi_semantic_to_sgn_name(semantic_name),
4272                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4273                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4274                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4275
4276          break;
4277
4278       default:
4279          /* save the starting index of control point outputs */
4280          if (emit->tcs.control_point_out_index == INVALID_INDEX)
4281             emit->tcs.control_point_out_index = i;
4282          emit->tcs.control_point_out_count++;
4283
4284          /* skip control point output declarations in patch constant phase */
4285          if (!emit->tcs.control_point_phase)
4286             break;
4287
4288          emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4289                                         TRUE);
4290
4291       }
4292    }
4293
4294    if (emit->tcs.control_point_phase) {
4295       /**
4296        * Add missing control point output in control point phase.
4297        */
4298       if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4299          /* use register index after tessellation factors */
4300          switch (emit->key.tcs.prim_mode) {
4301          case PIPE_PRIM_QUADS:
4302             emit->tcs.control_point_out_index = outputIndex + 6;
4303             break;
4304          case PIPE_PRIM_TRIANGLES:
4305             emit->tcs.control_point_out_index = outputIndex + 4;
4306             break;
4307          default:
4308             emit->tcs.control_point_out_index = outputIndex + 2;
4309             break;
4310          }
4311          emit->tcs.control_point_out_count++;
4312          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4313                                  emit->tcs.control_point_out_index,
4314                                  VGPU10_NAME_POSITION,
4315                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4316                                  TRUE,
4317                                  SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4318
4319          /* If tcs does not output any control point output,
4320           * we can end the hull shader control point phase here
4321           * after emitting the default control point output.
4322           */
4323          emit->skip_instruction = TRUE;
4324       }
4325    }
4326    else {
4327       if (emit->tcs.outer.out_index == INVALID_INDEX) {
4328          /* since the TCS did not declare out outer tess level output register,
4329           * we declare it here for patch constant phase only.
4330           */
4331          emit->tcs.outer.out_index = outputIndex;
4332          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4333             for (int i = 0; i < 4; i++) {
4334                emit_tesslevel_declaration(emit, outputIndex++,
4335                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4336                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4337                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4338             }
4339          }
4340          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4341             for (int i = 0; i < 3; i++) {
4342                emit_tesslevel_declaration(emit, outputIndex++,
4343                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4344                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4345                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4346             }
4347          }
4348       }
4349
4350       if (emit->tcs.inner.out_index == INVALID_INDEX) {
4351          /* since the TCS did not declare out inner tess level output register,
4352           * we declare it here
4353           */
4354          emit->tcs.inner.out_index = outputIndex;
4355          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4356             emit_tesslevel_declaration(emit, outputIndex++,
4357                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4358                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4359                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4360             emit_tesslevel_declaration(emit, outputIndex++,
4361                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4362                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4363                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4364          }
4365          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4366             emit_tesslevel_declaration(emit, outputIndex++,
4367                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4368                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4369                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4370          }
4371       }
4372    }
4373    emit->num_outputs = outputIndex;
4374 }
4375
4376
4377 /**
4378  * Emit output declarations for tessellation evaluation shader.
4379  */
4380 static void
4381 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4382 {
4383    unsigned int i;
4384
4385    for (i = 0; i < emit->info.num_outputs; i++) {
4386       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4387    }
4388 }
4389
4390
4391 /**
4392  * Emit the declaration for a system value input/output.
4393  */
4394 static void
4395 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4396                               enum tgsi_semantic semantic_name, unsigned index)
4397 {
4398    switch (semantic_name) {
4399    case TGSI_SEMANTIC_INSTANCEID:
4400       index = alloc_system_value_index(emit, index);
4401       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4402                              VGPU10_OPERAND_TYPE_INPUT,
4403                              VGPU10_OPERAND_INDEX_1D,
4404                              index, 1,
4405                              VGPU10_NAME_INSTANCE_ID,
4406                              VGPU10_OPERAND_4_COMPONENT,
4407                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4408                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4409                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4410                              map_tgsi_semantic_to_sgn_name(semantic_name));
4411       break;
4412    case TGSI_SEMANTIC_VERTEXID:
4413       emit->vs.vertex_id_sys_index = index;
4414       index = alloc_system_value_index(emit, index);
4415       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4416                              VGPU10_OPERAND_TYPE_INPUT,
4417                              VGPU10_OPERAND_INDEX_1D,
4418                              index, 1,
4419                              VGPU10_NAME_VERTEX_ID,
4420                              VGPU10_OPERAND_4_COMPONENT,
4421                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4422                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4423                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4424                              map_tgsi_semantic_to_sgn_name(semantic_name));
4425       break;
4426    case TGSI_SEMANTIC_SAMPLEID:
4427       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4428       emit->fs.sample_id_sys_index = index;
4429       index = alloc_system_value_index(emit, index);
4430       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4431                              VGPU10_OPERAND_TYPE_INPUT,
4432                              VGPU10_OPERAND_INDEX_1D,
4433                              index, 1,
4434                              VGPU10_NAME_SAMPLE_INDEX,
4435                              VGPU10_OPERAND_4_COMPONENT,
4436                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4437                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4438                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4439                              map_tgsi_semantic_to_sgn_name(semantic_name));
4440       break;
4441    case TGSI_SEMANTIC_SAMPLEPOS:
4442       /* This system value contains the position of the current sample
4443        * when using per-sample shading.  We implement this by calling
4444        * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445        * index as the argument.  See emit_sample_position_instructions().
4446        */
4447       assert(emit->version >= 41);
4448       emit->fs.sample_pos_sys_index = index;
4449       index = alloc_system_value_index(emit, index);
4450       break;
4451    case TGSI_SEMANTIC_INVOCATIONID:
4452       /* Note: invocation id input is mapped to different register depending
4453        * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454        * In TCS, it will be mapped to vOutputControlPointID#.
4455        * Since in both cases, the mapped name is unique rather than
4456        * just a generic input name ("v#"), so there is no need to remap
4457        * the index value.
4458        */
4459       assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4460              emit->unit == PIPE_SHADER_TESS_CTRL);
4461       assert(emit->version >= 50);
4462
4463       if (emit->unit == PIPE_SHADER_GEOMETRY) {
4464          emit->gs.invocation_id_sys_index = index;
4465          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4466                                 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4467                                 VGPU10_OPERAND_INDEX_0D,
4468                                 index, 1,
4469                                 VGPU10_NAME_UNDEFINED,
4470                                 VGPU10_OPERAND_0_COMPONENT,
4471                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4472                                 0,
4473                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4474                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4475       } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4476          /* The emission of the control point id will be done
4477           * in the control point phase in emit_hull_shader_control_point_phase().
4478           */
4479          emit->tcs.invocation_id_sys_index = index;
4480       }
4481       break;
4482    case TGSI_SEMANTIC_SAMPLEMASK:
4483       /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484        * rather than just a generic input name ("v#") so no need to remap the
4485        * index value.
4486        */
4487       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4488       assert(emit->version >= 50);
4489       emit->fs.sample_mask_in_sys_index = index;
4490       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4491                              VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4492                              VGPU10_OPERAND_INDEX_0D,
4493                              index, 1,
4494                              VGPU10_NAME_UNDEFINED,
4495                              VGPU10_OPERAND_1_COMPONENT,
4496                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4497                              0,
4498                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4499                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4500       break;
4501    case TGSI_SEMANTIC_TESSCOORD:
4502       assert(emit->version >= 50);
4503
4504       unsigned usageMask = 0;
4505
4506       if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4507          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4508       }
4509       else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4510                emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4511          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4512       }
4513
4514       emit->tes.tesscoord_sys_index = index;
4515       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4516                              VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4517                              VGPU10_OPERAND_INDEX_0D,
4518                              index, 1,
4519                              VGPU10_NAME_UNDEFINED,
4520                              VGPU10_OPERAND_4_COMPONENT,
4521                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4522                              usageMask,
4523                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4524                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4525       break;
4526    case TGSI_SEMANTIC_TESSINNER:
4527       assert(emit->version >= 50);
4528       emit->tes.inner.tgsi_index = index;
4529       break;
4530    case TGSI_SEMANTIC_TESSOUTER:
4531       assert(emit->version >= 50);
4532       emit->tes.outer.tgsi_index = index;
4533       break;
4534    case TGSI_SEMANTIC_VERTICESIN:
4535       assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4536       assert(emit->version >= 50);
4537
4538       /* save the system value index */
4539       emit->tcs.vertices_per_patch_index = index;
4540       break;
4541    case TGSI_SEMANTIC_PRIMID:
4542       assert(emit->version >= 50);
4543       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4544          emit->tcs.prim_id_index = index;
4545       }
4546       else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4547          emit->tes.prim_id_index = index;
4548          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4549                                 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4550                                 VGPU10_OPERAND_INDEX_0D,
4551                                 index, 1,
4552                                 VGPU10_NAME_UNDEFINED,
4553                                 VGPU10_OPERAND_0_COMPONENT,
4554                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4555                                 0,
4556                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4557                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4558       }
4559       break;
4560    default:
4561       debug_printf("unexpected system value semantic index %u / %s\n",
4562                    semantic_name, tgsi_semantic_names[semantic_name]);
4563    }
4564 }
4565
4566 /**
4567  * Translate a TGSI declaration to VGPU10.
4568  */
4569 static boolean
4570 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4571                         const struct tgsi_full_declaration *decl)
4572 {
4573    switch (decl->Declaration.File) {
4574    case TGSI_FILE_INPUT:
4575       /* do nothing - see emit_input_declarations() */
4576       return TRUE;
4577
4578    case TGSI_FILE_OUTPUT:
4579       assert(decl->Range.First == decl->Range.Last);
4580       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4581       return TRUE;
4582
4583    case TGSI_FILE_TEMPORARY:
4584       /* Don't declare the temps here.  Just keep track of how many
4585        * and emit the declaration later.
4586        */
4587       if (decl->Declaration.Array) {
4588          /* Indexed temporary array.  Save the start index of the array
4589           * and the size of the array.
4590           */
4591          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4592          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4593
4594          /* Save this array so we can emit the declaration for it later */
4595          create_temp_array(emit, arrayID, decl->Range.First,
4596                            decl->Range.Last - decl->Range.First + 1,
4597                            decl->Range.First);
4598       }
4599
4600       /* for all temps, indexed or not, keep track of highest index */
4601       emit->num_shader_temps = MAX2(emit->num_shader_temps,
4602                                     decl->Range.Last + 1);
4603       return TRUE;
4604
4605    case TGSI_FILE_CONSTANT:
4606       /* Don't declare constants here.  Just keep track and emit later. */
4607       {
4608          unsigned constbuf = 0, num_consts;
4609          if (decl->Declaration.Dimension) {
4610             constbuf = decl->Dim.Index2D;
4611          }
4612          /* We throw an assertion here when, in fact, the shader should never
4613           * have linked due to constbuf index out of bounds, so we shouldn't
4614           * have reached here.
4615           */
4616          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4617
4618          num_consts = MAX2(emit->num_shader_consts[constbuf],
4619                            decl->Range.Last + 1);
4620
4621          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4622             debug_printf("Warning: constant buffer is declared to size [%u]"
4623                          " but [%u] is the limit.\n",
4624                          num_consts,
4625                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4626          }
4627          /* The linker doesn't enforce the max UBO size so we clamp here */
4628          emit->num_shader_consts[constbuf] =
4629             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4630       }
4631       return TRUE;
4632
4633    case TGSI_FILE_IMMEDIATE:
4634       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4635       return FALSE;
4636
4637    case TGSI_FILE_SYSTEM_VALUE:
4638       emit_system_value_declaration(emit, decl->Semantic.Name,
4639                                     decl->Range.First);
4640       return TRUE;
4641
4642    case TGSI_FILE_SAMPLER:
4643       /* Don't declare samplers here.  Just keep track and emit later. */
4644       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4645       return TRUE;
4646
4647 #if 0
4648    case TGSI_FILE_RESOURCE:
4649       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651       assert(!"TGSI_FILE_RESOURCE not handled yet");
4652       return FALSE;
4653 #endif
4654
4655    case TGSI_FILE_ADDRESS:
4656       emit->num_address_regs = MAX2(emit->num_address_regs,
4657                                     decl->Range.Last + 1);
4658       return TRUE;
4659
4660    case TGSI_FILE_SAMPLER_VIEW:
4661       {
4662          unsigned unit = decl->Range.First;
4663          assert(decl->Range.First == decl->Range.Last);
4664          emit->sampler_target[unit] = decl->SamplerView.Resource;
4665          /* Note: we can ignore YZW return types for now */
4666          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4667          emit->sampler_view[unit] = TRUE;
4668       }
4669       return TRUE;
4670
4671    default:
4672       assert(!"Unexpected type of declaration");
4673       return FALSE;
4674    }
4675 }
4676
4677
4678
4679 /**
4680  * Emit input declarations for fragment shader.
4681  */
4682 static void
4683 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
4684 {
4685    unsigned i;
4686
4687    for (i = 0; i < emit->linkage.num_inputs; i++) {
4688       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4689       unsigned usage_mask = emit->info.input_usage_mask[i];
4690       unsigned index = emit->linkage.input_map[i];
4691       unsigned type, interpolationMode, name;
4692       unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4693
4694       if (usage_mask == 0)
4695          continue;  /* register is not actually used */
4696
4697       if (semantic_name == TGSI_SEMANTIC_POSITION) {
4698          /* fragment position input */
4699          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4700          interpolationMode = VGPU10_INTERPOLATION_LINEAR;
4701          name = VGPU10_NAME_POSITION;
4702          if (usage_mask & TGSI_WRITEMASK_W) {
4703             /* we need to replace use of 'w' with '1/w' */
4704             emit->fs.fragcoord_input_index = i;
4705          }
4706       }
4707       else if (semantic_name == TGSI_SEMANTIC_FACE) {
4708          /* fragment front-facing input */
4709          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4710          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4711          name = VGPU10_NAME_IS_FRONT_FACE;
4712          emit->fs.face_input_index = i;
4713       }
4714       else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4715          /* primitive ID */
4716          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4717          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4718          name = VGPU10_NAME_PRIMITIVE_ID;
4719       }
4720       else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
4721          /* sample index / ID */
4722          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4723          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4724          name = VGPU10_NAME_SAMPLE_INDEX;
4725       }
4726       else if (semantic_name == TGSI_SEMANTIC_LAYER) {
4727          /* render target array index */
4728          if (emit->key.fs.layer_to_zero) {
4729             /**
4730              * The shader from the previous stage does not write to layer,
4731              * so reading the layer index in fragment shader should return 0.
4732              */
4733             emit->fs.layer_input_index = i;
4734             continue;
4735          } else {
4736             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4737             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4738             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
4739             mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4740          }
4741       }
4742       else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
4743          /* viewport index */
4744          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4745          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4746          name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
4747          mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4748       }
4749       else {
4750          /* general fragment input */
4751          type = VGPU10_OPCODE_DCL_INPUT_PS;
4752          interpolationMode =
4753                translate_interpolation(emit,
4754                                        emit->info.input_interpolate[i],
4755                                        emit->info.input_interpolate_loc[i]);
4756
4757          /* keeps track if flat interpolation mode is being used */
4758          emit->uses_flat_interp = emit->uses_flat_interp ||
4759                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
4760
4761          name = VGPU10_NAME_UNDEFINED;
4762       }
4763
4764       emit_input_declaration(emit, type,
4765                              VGPU10_OPERAND_TYPE_INPUT,
4766                              VGPU10_OPERAND_INDEX_1D, index, 1,
4767                              name,
4768                              VGPU10_OPERAND_4_COMPONENT,
4769                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4770                              mask,
4771                              interpolationMode, TRUE,
4772                              map_tgsi_semantic_to_sgn_name(semantic_name));
4773    }
4774 }
4775
4776
4777 /**
4778  * Emit input declarations for vertex shader.
4779  */
4780 static void
4781 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
4782 {
4783    unsigned i;
4784
4785    for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
4786       unsigned usage_mask = emit->info.input_usage_mask[i];
4787       unsigned index = i;
4788
4789       if (usage_mask == 0)
4790          continue;  /* register is not actually used */
4791
4792       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4793                              VGPU10_OPERAND_TYPE_INPUT,
4794                              VGPU10_OPERAND_INDEX_1D, index, 1,
4795                              VGPU10_NAME_UNDEFINED,
4796                              VGPU10_OPERAND_4_COMPONENT,
4797                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4798                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4799                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4800                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4801    }
4802 }
4803
4804
4805 /**
4806  * Emit input declarations for geometry shader.
4807  */
4808 static void
4809 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
4810 {
4811    unsigned i;
4812
4813    for (i = 0; i < emit->info.num_inputs; i++) {
4814       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4815       unsigned usage_mask = emit->info.input_usage_mask[i];
4816       unsigned index = emit->linkage.input_map[i];
4817       unsigned opcodeType, operandType;
4818       unsigned numComp, selMode;
4819       unsigned name;
4820       unsigned dim;
4821
4822       if (usage_mask == 0)
4823          continue;  /* register is not actually used */
4824
4825       opcodeType = VGPU10_OPCODE_DCL_INPUT;
4826       operandType = VGPU10_OPERAND_TYPE_INPUT;
4827       numComp = VGPU10_OPERAND_4_COMPONENT;
4828       selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4829       name = VGPU10_NAME_UNDEFINED;
4830
4831       /* all geometry shader inputs are two dimensional except
4832        * gl_PrimitiveID
4833        */
4834       dim = VGPU10_OPERAND_INDEX_2D;
4835
4836       if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4837          /* Primitive ID */
4838          operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
4839          dim = VGPU10_OPERAND_INDEX_0D;
4840          numComp = VGPU10_OPERAND_0_COMPONENT;
4841          selMode = 0;
4842
4843          /* also save the register index so we can check for
4844           * primitive id when emit src register. We need to modify the
4845           * operand type, index dimension when emit primitive id src reg.
4846           */
4847           emit->gs.prim_id_index = i;
4848       }
4849       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4850          /* vertex position input */
4851          opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
4852          name = VGPU10_NAME_POSITION;
4853       }
4854
4855       emit_input_declaration(emit, opcodeType, operandType,
4856                              dim, index,
4857                              emit->gs.input_size,
4858                              name,
4859                              numComp, selMode,
4860                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4861                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4862                              map_tgsi_semantic_to_sgn_name(semantic_name));
4863    }
4864 }
4865
4866
4867 /**
4868  * Emit input declarations for tessellation control shader.
4869  */
4870 static void
4871 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
4872 {
4873    unsigned i;
4874    unsigned size = emit->key.tcs.vertices_per_patch;
4875    unsigned indicesMask = 0;
4876    boolean addSignature = TRUE;
4877
4878    if (!emit->tcs.control_point_phase)
4879       addSignature = emit->tcs.fork_phase_add_signature;
4880
4881    for (i = 0; i < emit->info.num_inputs; i++) {
4882       unsigned usage_mask = emit->info.input_usage_mask[i];
4883       unsigned index = emit->linkage.input_map[i];
4884       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4885       VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
4886       VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
4887       SVGA3dDXSignatureSemanticName sgn_name =
4888          map_tgsi_semantic_to_sgn_name(semantic_name);
4889
4890       /* indices that are declared */
4891       indicesMask |= 1 << index;
4892
4893       if (semantic_name == TGSI_SEMANTIC_POSITION ||
4894           index == emit->linkage.position_index) {
4895          /* save the input control point index for later use */
4896          emit->tcs.control_point_input_index = i;
4897       }
4898       else if (usage_mask == 0) {
4899          continue;  /* register is not actually used */
4900       }
4901       else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
4902          /* The shadow copy is being used here. So set the signature name
4903           * to UNDEFINED.
4904           */
4905          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
4906       }
4907
4908       /* input control points in the patch constant phase are emitted in the
4909        * vicp register rather than the v register.
4910        */
4911       if (!emit->tcs.control_point_phase) {
4912          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
4913       }
4914
4915       /* Tessellation control shader inputs are two dimensional.
4916        * The array size is determined by the patch vertex count.
4917        */
4918       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4919                              operandType,
4920                              VGPU10_OPERAND_INDEX_2D,
4921                              index, size, name,
4922                              VGPU10_OPERAND_4_COMPONENT,
4923                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4924                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4925                              VGPU10_INTERPOLATION_UNDEFINED,
4926                              addSignature, sgn_name);
4927    }
4928
4929    if (emit->tcs.control_point_phase) {
4930       if (emit->tcs.control_point_input_index == INVALID_INDEX) {
4931
4932          /* Add input control point declaration if it does not exist */
4933          if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
4934             emit->linkage.input_map[emit->linkage.num_inputs] =
4935                emit->linkage.position_index;
4936             emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
4937
4938             emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4939                                    VGPU10_OPERAND_TYPE_INPUT,
4940                                    VGPU10_OPERAND_INDEX_2D,
4941                                    emit->linkage.position_index,
4942                                    emit->key.tcs.vertices_per_patch,
4943                                    VGPU10_NAME_UNDEFINED,
4944                                    VGPU10_OPERAND_4_COMPONENT,
4945                                    VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4946                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4947                                    VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4948                                    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4949          }
4950       }
4951
4952       /* Also add an address register for the indirection to the
4953        * input control points
4954        */
4955       emit->tcs.control_point_addr_index = emit->num_address_regs++;
4956    }
4957 }
4958
4959
4960 static void
4961 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
4962 {
4963
4964    /* In tcs, tess factors are emitted as extra outputs.
4965     * The starting register index for the tess factors is captured
4966     * in the compile key.
4967     */
4968    unsigned inputIndex = emit->key.tes.tessfactor_index;
4969
4970    if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4971       if (emit->key.tes.need_tessouter) {
4972          emit->tes.outer.in_index = inputIndex;
4973          for (int i = 0; i < 4; i++) {
4974             emit_tesslevel_declaration(emit, inputIndex++,
4975                VGPU10_OPCODE_DCL_INPUT_SIV,
4976                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4977                VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4978                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4979          }
4980       }
4981
4982       if (emit->key.tes.need_tessinner) {
4983          emit->tes.inner.in_index = inputIndex;
4984          emit_tesslevel_declaration(emit, inputIndex++,
4985             VGPU10_OPCODE_DCL_INPUT_SIV,
4986             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4987             VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4988             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4989
4990          emit_tesslevel_declaration(emit, inputIndex++,
4991             VGPU10_OPCODE_DCL_INPUT_SIV,
4992             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4993             VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4994             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4995       }
4996    }
4997    else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4998       if (emit->key.tes.need_tessouter) {
4999          emit->tes.outer.in_index = inputIndex;
5000          for (int i = 0; i < 3; i++) {
5001             emit_tesslevel_declaration(emit, inputIndex++,
5002                VGPU10_OPCODE_DCL_INPUT_SIV,
5003                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5004                VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5005                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5006          }
5007       }
5008
5009       if (emit->key.tes.need_tessinner) {
5010          emit->tes.inner.in_index = inputIndex;
5011          emit_tesslevel_declaration(emit, inputIndex++,
5012             VGPU10_OPCODE_DCL_INPUT_SIV,
5013             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5014             VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5015             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5016       }
5017    }
5018    else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5019       if (emit->key.tes.need_tessouter) {
5020          emit->tes.outer.in_index = inputIndex;
5021          emit_tesslevel_declaration(emit, inputIndex++,
5022             VGPU10_OPCODE_DCL_INPUT_SIV,
5023             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5024             VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5025             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5026
5027          emit_tesslevel_declaration(emit, inputIndex++,
5028             VGPU10_OPCODE_DCL_INPUT_SIV,
5029             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5030             VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5031             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5032       }
5033    }
5034 }
5035
5036
5037 /**
5038  * Emit input declarations for tessellation evaluation shader.
5039  */
5040 static void
5041 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5042 {
5043    unsigned i;
5044
5045    for (i = 0; i < emit->info.num_inputs; i++) {
5046       unsigned usage_mask = emit->info.input_usage_mask[i];
5047       unsigned index = emit->linkage.input_map[i];
5048       unsigned size;
5049       const enum tgsi_semantic semantic_name =
5050          emit->info.input_semantic_name[i];
5051       SVGA3dDXSignatureSemanticName sgn_name;
5052       VGPU10_OPERAND_TYPE operandType;
5053       VGPU10_OPERAND_INDEX_DIMENSION dim;
5054
5055       if (usage_mask == 0)
5056          usage_mask = 1;  /* at least set usage mask to one */
5057
5058       if (semantic_name == TGSI_SEMANTIC_PATCH) {
5059          operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5060          dim = VGPU10_OPERAND_INDEX_1D;
5061          size = 1;
5062          sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5063       }
5064       else {
5065          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5066          dim = VGPU10_OPERAND_INDEX_2D;
5067          size = emit->key.tes.vertices_per_patch;
5068          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5069       }
5070
5071       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5072                              dim, index, size, VGPU10_NAME_UNDEFINED,
5073                              VGPU10_OPERAND_4_COMPONENT,
5074                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5075                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5076                              VGPU10_INTERPOLATION_UNDEFINED,
5077                              TRUE, sgn_name);
5078    }
5079
5080    emit_tessfactor_input_declarations(emit);
5081
5082    /* DX spec requires DS input controlpoint/patch-constant signatures to match
5083     * the HS output controlpoint/patch-constant signatures exactly.
5084     * Add missing input declarations even if they are not used in the shader.
5085     */
5086    if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5087       struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5088       for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5089
5090           /* If a tcs output does not have a corresponding input register in
5091            * tes, add one.
5092            */
5093           if (emit->linkage.prevShader.output_map[i] >
5094               emit->linkage.input_map_max) {
5095              const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5096
5097              if (sem_name == TGSI_SEMANTIC_PATCH) {
5098                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5099                                        VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5100                                        VGPU10_OPERAND_INDEX_1D,
5101                                        i, 1, VGPU10_NAME_UNDEFINED,
5102                                        VGPU10_OPERAND_4_COMPONENT,
5103                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5104                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5105                                        VGPU10_INTERPOLATION_UNDEFINED,
5106                                        TRUE,
5107                                        map_tgsi_semantic_to_sgn_name(sem_name));
5108
5109              } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5110                         sem_name != TGSI_SEMANTIC_TESSOUTER) {
5111                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5112                                        VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5113                                        VGPU10_OPERAND_INDEX_2D,
5114                                        i, emit->key.tes.vertices_per_patch,
5115                                        VGPU10_NAME_UNDEFINED,
5116                                        VGPU10_OPERAND_4_COMPONENT,
5117                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5118                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5119                                        VGPU10_INTERPOLATION_UNDEFINED,
5120                                        TRUE,
5121                                        map_tgsi_semantic_to_sgn_name(sem_name));
5122              }
5123              /* tessellation factors are taken care of in
5124               * emit_tessfactor_input_declarations().
5125               */
5126          }
5127       }
5128    }
5129 }
5130
5131
5132 /**
5133  * Emit all input declarations.
5134  */
5135 static boolean
5136 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5137 {
5138    emit->index_range.required =
5139       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5140
5141    switch (emit->unit) {
5142    case PIPE_SHADER_FRAGMENT:
5143       emit_fs_input_declarations(emit);
5144       break;
5145    case PIPE_SHADER_GEOMETRY:
5146       emit_gs_input_declarations(emit);
5147       break;
5148    case PIPE_SHADER_VERTEX:
5149       emit_vs_input_declarations(emit);
5150       break;
5151    case PIPE_SHADER_TESS_CTRL:
5152       emit_tcs_input_declarations(emit);
5153       break;
5154    case PIPE_SHADER_TESS_EVAL:
5155       emit_tes_input_declarations(emit);
5156       break;
5157    case PIPE_SHADER_COMPUTE:
5158       //XXX emit_cs_input_declarations(emit);
5159       break;
5160    default:
5161       assert(0);
5162    }
5163
5164    if (emit->index_range.start_index != INVALID_INDEX) {
5165       emit_index_range_declaration(emit);
5166    }
5167    emit->index_range.required = FALSE;
5168    return TRUE;
5169 }
5170
5171
5172 /**
5173  * Emit all output declarations.
5174  */
5175 static boolean
5176 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5177 {
5178    emit->index_range.required =
5179       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5180
5181    switch (emit->unit) {
5182    case PIPE_SHADER_FRAGMENT:
5183       emit_fs_output_declarations(emit);
5184       break;
5185    case PIPE_SHADER_GEOMETRY:
5186       emit_gs_output_declarations(emit);
5187       break;
5188    case PIPE_SHADER_VERTEX:
5189       emit_vs_output_declarations(emit);
5190       break;
5191    case PIPE_SHADER_TESS_CTRL:
5192       emit_tcs_output_declarations(emit);
5193       break;
5194    case PIPE_SHADER_TESS_EVAL:
5195       emit_tes_output_declarations(emit);
5196       break;
5197    case PIPE_SHADER_COMPUTE:
5198       //XXX emit_cs_output_declarations(emit);
5199       break;
5200    default:
5201       assert(0);
5202    }
5203
5204    if (emit->vposition.so_index != INVALID_INDEX &&
5205        emit->vposition.out_index != INVALID_INDEX) {
5206
5207       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5208
5209       /* Emit the declaration for the non-adjusted vertex position
5210        * for stream output purpose
5211        */
5212       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5213                               emit->vposition.so_index,
5214                               VGPU10_NAME_UNDEFINED,
5215                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5216                               TRUE,
5217                               SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5218    }
5219
5220    if (emit->clip_dist_so_index != INVALID_INDEX &&
5221        emit->clip_dist_out_index != INVALID_INDEX) {
5222
5223       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5224
5225       /* Emit the declaration for the clip distance shadow copy which
5226        * will be used for stream output purpose and for clip distance
5227        * varying variable
5228        */
5229       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5230                               emit->clip_dist_so_index,
5231                               VGPU10_NAME_UNDEFINED,
5232                               emit->output_usage_mask[emit->clip_dist_out_index],
5233                               TRUE,
5234                               SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5235
5236       if (emit->info.num_written_clipdistance > 4) {
5237          /* for the second clip distance register, each handles 4 planes */
5238          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5239                                  emit->clip_dist_so_index + 1,
5240                                  VGPU10_NAME_UNDEFINED,
5241                                  emit->output_usage_mask[emit->clip_dist_out_index+1],
5242                                  TRUE,
5243                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5244       }
5245    }
5246
5247    if (emit->index_range.start_index != INVALID_INDEX) {
5248       emit_index_range_declaration(emit);
5249    }
5250    emit->index_range.required = FALSE;
5251    return TRUE;
5252 }
5253
5254
5255 /**
5256  * A helper function to create a temporary indexable array
5257  * and initialize the corresponding entries in the temp_map array.
5258  */
5259 static void
5260 create_temp_array(struct svga_shader_emitter_v10 *emit,
5261                   unsigned arrayID, unsigned first, unsigned count,
5262                   unsigned startIndex)
5263 {
5264    unsigned i, tempIndex = startIndex;
5265
5266    emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5267    assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5268    emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5269
5270    emit->temp_arrays[arrayID].start = first;
5271    emit->temp_arrays[arrayID].size = count;
5272
5273    /* Fill in the temp_map entries for this temp array */
5274    for (i = 0; i < count; i++, tempIndex++) {
5275       emit->temp_map[tempIndex].arrayId = arrayID;
5276       emit->temp_map[tempIndex].index = i;
5277    }
5278 }
5279
5280
5281 /**
5282  * Emit the declaration for the temporary registers.
5283  */
5284 static boolean
5285 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5286 {
5287    unsigned total_temps, reg, i;
5288
5289    total_temps = emit->num_shader_temps;
5290
5291    /* If there is indirect access to non-indexable temps in the shader,
5292     * convert those temps to indexable temps. This works around a bug
5293     * in the GLSL->TGSI translator exposed in piglit test
5294     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5295     * Internal temps added by the driver remain as non-indexable temps.
5296     */
5297    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5298        emit->num_temp_arrays == 0) {
5299       create_temp_array(emit, 1, 0, total_temps, 0);
5300    }
5301
5302    /* Allocate extra temps for specially-implemented instructions,
5303     * such as LIT.
5304     */
5305    total_temps += MAX_INTERNAL_TEMPS;
5306
5307    /* Allocate extra temps for clip distance or clip vertex.
5308     */
5309    if (emit->clip_mode == CLIP_DISTANCE) {
5310       /* We need to write the clip distance to a temporary register
5311        * first. Then it will be copied to the shadow copy for
5312        * the clip distance varying variable and stream output purpose.
5313        * It will also be copied to the actual CLIPDIST register
5314        * according to the enabled clip planes
5315        */
5316       emit->clip_dist_tmp_index = total_temps++;
5317       if (emit->info.num_written_clipdistance > 4)
5318          total_temps++; /* second clip register */
5319    }
5320    else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5321       /* If the current shader is in the last vertex processing stage,
5322        * We need to convert the TGSI CLIPVERTEX output to one or more
5323        * clip distances.  Allocate a temp reg for the clipvertex here.
5324        */
5325       assert(emit->info.writes_clipvertex > 0);
5326       emit->clip_vertex_tmp_index = total_temps;
5327       total_temps++;
5328    }
5329
5330    if (emit->info.uses_vertexid) {
5331       assert(emit->unit == PIPE_SHADER_VERTEX);
5332       emit->vs.vertex_id_tmp_index = total_temps++;
5333    }
5334
5335    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5336       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5337           emit->key.clip_plane_enable ||
5338           emit->vposition.so_index != INVALID_INDEX) {
5339          emit->vposition.tmp_index = total_temps;
5340          total_temps += 1;
5341       }
5342
5343       if (emit->vposition.need_prescale) {
5344          emit->vposition.prescale_scale_index = total_temps++;
5345          emit->vposition.prescale_trans_index = total_temps++;
5346       }
5347
5348       if (emit->unit == PIPE_SHADER_VERTEX) {
5349          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5350                                  emit->key.vs.adjust_attrib_itof |
5351                                  emit->key.vs.adjust_attrib_utof |
5352                                  emit->key.vs.attrib_is_bgra |
5353                                  emit->key.vs.attrib_puint_to_snorm |
5354                                  emit->key.vs.attrib_puint_to_uscaled |
5355                                  emit->key.vs.attrib_puint_to_sscaled);
5356          while (attrib_mask) {
5357             unsigned index = u_bit_scan(&attrib_mask);
5358             emit->vs.adjusted_input[index] = total_temps++;
5359          }
5360       }
5361       else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5362          if (emit->key.gs.writes_viewport_index)
5363             emit->gs.viewport_index_tmp_index = total_temps++;
5364       }
5365    }
5366    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5367       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5368           emit->key.fs.write_color0_to_n_cbufs > 1) {
5369          /* Allocate a temp to hold the output color */
5370          emit->fs.color_tmp_index = total_temps;
5371          total_temps += 1;
5372       }
5373
5374       if (emit->fs.face_input_index != INVALID_INDEX) {
5375          /* Allocate a temp for the +/-1 face register */
5376          emit->fs.face_tmp_index = total_temps;
5377          total_temps += 1;
5378       }
5379
5380       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5381          /* Allocate a temp for modified fragment position register */
5382          emit->fs.fragcoord_tmp_index = total_temps;
5383          total_temps += 1;
5384       }
5385
5386       if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5387          /* Allocate a temp for the sample position */
5388          emit->fs.sample_pos_tmp_index = total_temps++;
5389       }
5390    }
5391    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5392       if (emit->vposition.need_prescale) {
5393          emit->vposition.tmp_index = total_temps++;
5394          emit->vposition.prescale_scale_index = total_temps++;
5395          emit->vposition.prescale_trans_index = total_temps++;
5396       }
5397
5398       if (emit->tes.inner.tgsi_index) {
5399          emit->tes.inner.temp_index = total_temps;
5400          total_temps += 1;
5401       }
5402
5403       if (emit->tes.outer.tgsi_index) {
5404          emit->tes.outer.temp_index = total_temps;
5405          total_temps += 1;
5406       }
5407    }
5408    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5409       if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5410          if (!emit->tcs.control_point_phase) {
5411             emit->tcs.inner.temp_index = total_temps;
5412             total_temps += 1;
5413          }
5414       }
5415       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5416          if (!emit->tcs.control_point_phase) {
5417             emit->tcs.outer.temp_index = total_temps;
5418             total_temps += 1;
5419          }
5420       }
5421
5422       if (emit->tcs.control_point_phase &&
5423           emit->info.reads_pervertex_outputs) {
5424          emit->tcs.control_point_tmp_index = total_temps;
5425          total_temps += emit->tcs.control_point_out_count;
5426       }
5427       else if (!emit->tcs.control_point_phase &&
5428                emit->info.reads_perpatch_outputs) {
5429
5430          /* If there is indirect access to the patch constant outputs
5431           * in the control point phase, then an indexable temporary array
5432           * will be created for these patch constant outputs.
5433           * Note, indirect access can only be applicable to
5434           * patch constant outputs in the control point phase.
5435           */
5436          if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5437             unsigned arrayID =
5438                emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5439             create_temp_array(emit, arrayID, 0,
5440                               emit->tcs.patch_generic_out_count, total_temps);
5441          }
5442          emit->tcs.patch_generic_tmp_index = total_temps;
5443          total_temps += emit->tcs.patch_generic_out_count;
5444       }
5445
5446       emit->tcs.invocation_id_tmp_index = total_temps++;
5447    }
5448
5449    for (i = 0; i < emit->num_address_regs; i++) {
5450       emit->address_reg_index[i] = total_temps++;
5451    }
5452
5453    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5454     * temp indexes.  Basically, we compact all the non-array temp register
5455     * indexes into a consecutive series.
5456     *
5457     * Before, we may have some TGSI declarations like:
5458     *   DCL TEMP[0..1], LOCAL
5459     *   DCL TEMP[2..4], ARRAY(1), LOCAL
5460     *   DCL TEMP[5..7], ARRAY(2), LOCAL
5461     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5462     *
5463     * After, we'll have a map like this:
5464     *   temp_map[0] = { array 0, index 0 }
5465     *   temp_map[1] = { array 0, index 1 }
5466     *   temp_map[2] = { array 1, index 0 }
5467     *   temp_map[3] = { array 1, index 1 }
5468     *   temp_map[4] = { array 1, index 2 }
5469     *   temp_map[5] = { array 2, index 0 }
5470     *   temp_map[6] = { array 2, index 1 }
5471     *   temp_map[7] = { array 2, index 2 }
5472     *   temp_map[8] = { array 0, index 2 }
5473     *   temp_map[9] = { array 0, index 3 }
5474     *
5475     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5476     * temps numbered 0..3
5477     *
5478     * Any time we emit a temporary register index, we'll have to use the
5479     * temp_map[] table to convert the TGSI index to the VGPU10 index.
5480     *
5481     * Finally, we recompute the total_temps value here.
5482     */
5483    reg = 0;
5484    for (i = 0; i < total_temps; i++) {
5485       if (emit->temp_map[i].arrayId == 0) {
5486          emit->temp_map[i].index = reg++;
5487       }
5488    }
5489
5490    if (0) {
5491       debug_printf("total_temps %u\n", total_temps);
5492       for (i = 0; i < total_temps; i++) {
5493          debug_printf("temp %u ->  array %u  index %u\n",
5494                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5495       }
5496    }
5497
5498    total_temps = reg;
5499
5500    /* Emit declaration of ordinary temp registers */
5501    if (total_temps > 0) {
5502       VGPU10OpcodeToken0 opcode0;
5503
5504       opcode0.value = 0;
5505       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5506
5507       begin_emit_instruction(emit);
5508       emit_dword(emit, opcode0.value);
5509       emit_dword(emit, total_temps);
5510       end_emit_instruction(emit);
5511    }
5512
5513    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5514     * it's unused.
5515     */
5516    for (i = 1; i < emit->num_temp_arrays; i++) {
5517       unsigned num_temps = emit->temp_arrays[i].size;
5518
5519       if (num_temps > 0) {
5520          VGPU10OpcodeToken0 opcode0;
5521
5522          opcode0.value = 0;
5523          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5524
5525          begin_emit_instruction(emit);
5526          emit_dword(emit, opcode0.value);
5527          emit_dword(emit, i); /* which array */
5528          emit_dword(emit, num_temps);
5529          emit_dword(emit, 4); /* num components */
5530          end_emit_instruction(emit);
5531
5532          total_temps += num_temps;
5533       }
5534    }
5535
5536    /* Check that the grand total of all regular and indexed temps is
5537     * under the limit.
5538     */
5539    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5540
5541    return TRUE;
5542 }
5543
5544
5545 static boolean
5546 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5547 {
5548    VGPU10OpcodeToken0 opcode0;
5549    VGPU10OperandToken0 operand0;
5550    unsigned total_consts, i;
5551
5552    opcode0.value = 0;
5553    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5554    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5555    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5556
5557    operand0.value = 0;
5558    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5559    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5560    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5561    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5562    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5563    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5564    operand0.swizzleX = 0;
5565    operand0.swizzleY = 1;
5566    operand0.swizzleZ = 2;
5567    operand0.swizzleW = 3;
5568
5569    /**
5570     * Emit declaration for constant buffer [0].  We also allocate
5571     * room for the extra constants here.
5572     */
5573    total_consts = emit->num_shader_consts[0];
5574
5575    /* Now, allocate constant slots for the "extra" constants.
5576     * Note: it's critical that these extra constant locations
5577     * exactly match what's emitted by the "extra" constants code
5578     * in svga_state_constants.c
5579     */
5580
5581    /* Vertex position scale/translation */
5582    if (emit->vposition.need_prescale) {
5583       emit->vposition.prescale_cbuf_index = total_consts;
5584       total_consts += (2 * emit->vposition.num_prescale);
5585    }
5586
5587    if (emit->unit == PIPE_SHADER_VERTEX) {
5588       if (emit->key.vs.undo_viewport) {
5589          emit->vs.viewport_index = total_consts++;
5590       }
5591       if (emit->key.vs.need_vertex_id_bias) {
5592          emit->vs.vertex_id_bias_index = total_consts++;
5593       }
5594    }
5595
5596    /* user-defined clip planes */
5597    if (emit->key.clip_plane_enable) {
5598       unsigned n = util_bitcount(emit->key.clip_plane_enable);
5599       assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5600              emit->unit != PIPE_SHADER_COMPUTE);
5601       for (i = 0; i < n; i++) {
5602          emit->clip_plane_const[i] = total_consts++;
5603       }
5604    }
5605
5606    for (i = 0; i < emit->num_samplers; i++) {
5607
5608       if (emit->sampler_view[i]) {
5609
5610          /* Texcoord scale factors for RECT textures */
5611          if (emit->key.tex[i].unnormalized) {
5612             emit->texcoord_scale_index[i] = total_consts++;
5613          }
5614
5615          /* Texture buffer sizes */
5616          if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
5617             emit->texture_buffer_size_index[i] = total_consts++;
5618          }
5619       }
5620    }
5621
5622    if (total_consts > 0) {
5623       begin_emit_instruction(emit);
5624       emit_dword(emit, opcode0.value);
5625       emit_dword(emit, operand0.value);
5626       emit_dword(emit, 0);  /* which const buffer slot */
5627       emit_dword(emit, total_consts);
5628       end_emit_instruction(emit);
5629    }
5630
5631    /* Declare remaining constant buffers (UBOs) */
5632    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5633       if (emit->num_shader_consts[i] > 0) {
5634          begin_emit_instruction(emit);
5635          emit_dword(emit, opcode0.value);
5636          emit_dword(emit, operand0.value);
5637          emit_dword(emit, i);  /* which const buffer slot */
5638          emit_dword(emit, emit->num_shader_consts[i]);
5639          end_emit_instruction(emit);
5640       }
5641    }
5642
5643    return TRUE;
5644 }
5645
5646
5647 /**
5648  * Emit declarations for samplers.
5649  */
5650 static boolean
5651 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
5652 {
5653    unsigned i;
5654
5655    for (i = 0; i < emit->num_samplers; i++) {
5656       VGPU10OpcodeToken0 opcode0;
5657       VGPU10OperandToken0 operand0;
5658
5659       opcode0.value = 0;
5660       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
5661       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
5662
5663       operand0.value = 0;
5664       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5665       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
5666       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5667       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5668
5669       begin_emit_instruction(emit);
5670       emit_dword(emit, opcode0.value);
5671       emit_dword(emit, operand0.value);
5672       emit_dword(emit, i);
5673       end_emit_instruction(emit);
5674    }
5675
5676    return TRUE;
5677 }
5678
5679
5680 /**
5681  * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5682  */
5683 static unsigned
5684 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
5685                                    unsigned num_samples,
5686                                    boolean is_array)
5687 {
5688    if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
5689       target = TGSI_TEXTURE_2D;
5690    }
5691    else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
5692       target = TGSI_TEXTURE_2D_ARRAY;
5693    }
5694
5695    switch (target) {
5696    case TGSI_TEXTURE_BUFFER:
5697       return VGPU10_RESOURCE_DIMENSION_BUFFER;
5698    case TGSI_TEXTURE_1D:
5699       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5700    case TGSI_TEXTURE_2D:
5701    case TGSI_TEXTURE_RECT:
5702       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5703    case TGSI_TEXTURE_3D:
5704       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5705    case TGSI_TEXTURE_CUBE:
5706    case TGSI_TEXTURE_SHADOWCUBE:
5707       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5708    case TGSI_TEXTURE_SHADOW1D:
5709       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5710    case TGSI_TEXTURE_SHADOW2D:
5711    case TGSI_TEXTURE_SHADOWRECT:
5712       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5713    case TGSI_TEXTURE_1D_ARRAY:
5714    case TGSI_TEXTURE_SHADOW1D_ARRAY:
5715       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5716          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5717    case TGSI_TEXTURE_2D_ARRAY:
5718    case TGSI_TEXTURE_SHADOW2D_ARRAY:
5719       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5720          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5721    case TGSI_TEXTURE_2D_MSAA:
5722       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5723    case TGSI_TEXTURE_2D_ARRAY_MSAA:
5724       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5725          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5726    case TGSI_TEXTURE_CUBE_ARRAY:
5727    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
5728       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5729          : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5730    default:
5731       assert(!"Unexpected resource type");
5732       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5733    }
5734 }
5735
5736
5737 /**
5738  * Given a tgsi_return_type, return true iff it is an integer type.
5739  */
5740 static boolean
5741 is_integer_type(enum tgsi_return_type type)
5742 {
5743    switch (type) {
5744       case TGSI_RETURN_TYPE_SINT:
5745       case TGSI_RETURN_TYPE_UINT:
5746          return TRUE;
5747       case TGSI_RETURN_TYPE_FLOAT:
5748       case TGSI_RETURN_TYPE_UNORM:
5749       case TGSI_RETURN_TYPE_SNORM:
5750          return FALSE;
5751       case TGSI_RETURN_TYPE_COUNT:
5752       default:
5753          assert(!"is_integer_type: Unknown tgsi_return_type");
5754          return FALSE;
5755    }
5756 }
5757
5758
5759 /**
5760  * Emit declarations for resources.
5761  * XXX When we're sure that all TGSI shaders will be generated with
5762  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5763  * rework this code.
5764  */
5765 static boolean
5766 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
5767 {
5768    unsigned i;
5769
5770    /* Emit resource decl for each sampler */
5771    for (i = 0; i < emit->num_samplers; i++) {
5772       VGPU10OpcodeToken0 opcode0;
5773       VGPU10OperandToken0 operand0;
5774       VGPU10ResourceReturnTypeToken return_type;
5775       VGPU10_RESOURCE_RETURN_TYPE rt;
5776
5777       opcode0.value = 0;
5778       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
5779       opcode0.resourceDimension =
5780          tgsi_texture_to_resource_dimension(emit->sampler_target[i],
5781                                             emit->key.tex[i].num_samples,
5782                                             emit->key.tex[i].is_array);
5783       opcode0.sampleCount = emit->key.tex[i].num_samples;
5784       operand0.value = 0;
5785       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5786       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5787       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5788       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5789
5790 #if 1
5791       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5792       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
5793       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
5794       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
5795       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
5796       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
5797       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
5798       rt = emit->sampler_return_type[i] + 1;
5799 #else
5800       switch (emit->sampler_return_type[i]) {
5801          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
5802          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
5803          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
5804          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
5805          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
5806          case TGSI_RETURN_TYPE_COUNT:
5807          default:
5808             rt = VGPU10_RETURN_TYPE_FLOAT;
5809             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5810       }
5811 #endif
5812
5813       return_type.value = 0;
5814       return_type.component0 = rt;
5815       return_type.component1 = rt;
5816       return_type.component2 = rt;
5817       return_type.component3 = rt;
5818
5819       begin_emit_instruction(emit);
5820       emit_dword(emit, opcode0.value);
5821       emit_dword(emit, operand0.value);
5822       emit_dword(emit, i);
5823       emit_dword(emit, return_type.value);
5824       end_emit_instruction(emit);
5825    }
5826
5827    return TRUE;
5828 }
5829
5830 /**
5831  * Emit instruction with n=1, 2 or 3 source registers.
5832  */
5833 static void
5834 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
5835                      unsigned opcode,
5836                      const struct tgsi_full_dst_register *dst,
5837                      const struct tgsi_full_src_register *src1,
5838                      const struct tgsi_full_src_register *src2,
5839                      const struct tgsi_full_src_register *src3,
5840                      boolean saturate, bool precise)
5841 {
5842    begin_emit_instruction(emit);
5843    emit_opcode_precise(emit, opcode, saturate, precise);
5844    emit_dst_register(emit, dst);
5845    emit_src_register(emit, src1);
5846    if (src2) {
5847       emit_src_register(emit, src2);
5848    }
5849    if (src3) {
5850       emit_src_register(emit, src3);
5851    }
5852    end_emit_instruction(emit);
5853 }
5854
5855 static void
5856 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
5857                      unsigned opcode,
5858                      const struct tgsi_full_dst_register *dst,
5859                      const struct tgsi_full_src_register *src)
5860 {
5861    emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
5862 }
5863
5864 static void
5865 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
5866                      VGPU10_OPCODE_TYPE opcode,
5867                      const struct tgsi_full_dst_register *dst,
5868                      const struct tgsi_full_src_register *src1,
5869                      const struct tgsi_full_src_register *src2)
5870 {
5871    emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
5872 }
5873
5874 static void
5875 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
5876                      VGPU10_OPCODE_TYPE opcode,
5877                      const struct tgsi_full_dst_register *dst,
5878                      const struct tgsi_full_src_register *src1,
5879                      const struct tgsi_full_src_register *src2,
5880                      const struct tgsi_full_src_register *src3)
5881 {
5882    emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
5883 }
5884
5885 static void
5886 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
5887                      VGPU10_OPCODE_TYPE opcode)
5888 {
5889    begin_emit_instruction(emit);
5890    emit_opcode(emit, opcode, FALSE);
5891    end_emit_instruction(emit);
5892 }
5893
5894 /**
5895  * Tessellation inner/outer levels needs to be store into its
5896  * appropriate registers depending on prim_mode.
5897  */
5898 static void
5899 store_tesslevels(struct svga_shader_emitter_v10 *emit)
5900 {
5901    int i;
5902
5903    /* tessellation levels are required input/out in hull shader.
5904     * emitting the inner/outer tessellation levels, either from
5905     * values provided in tcs or fallback default values which is 1.0
5906     */
5907    if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
5908       struct tgsi_full_src_register temp_src;
5909
5910       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5911          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5912       else
5913          temp_src = make_immediate_reg_float(emit, 1.0f);
5914
5915       for (i = 0; i < 2; i++) {
5916          struct tgsi_full_src_register src =
5917             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5918          struct tgsi_full_dst_register dst =
5919             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
5920          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5921          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5922       }
5923
5924       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5925          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5926       else
5927          temp_src = make_immediate_reg_float(emit, 1.0f);
5928
5929       for (i = 0; i < 4; i++) {
5930          struct tgsi_full_src_register src =
5931             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5932          struct tgsi_full_dst_register dst =
5933             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5934          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5935          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5936       }
5937    }
5938    else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
5939       struct tgsi_full_src_register temp_src;
5940
5941       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5942          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5943       else
5944          temp_src = make_immediate_reg_float(emit, 1.0f);
5945
5946       struct tgsi_full_src_register src =
5947          scalar_src(&temp_src, TGSI_SWIZZLE_X);
5948       struct tgsi_full_dst_register dst =
5949          make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
5950       dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5951       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5952
5953       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5954          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5955       else
5956          temp_src = make_immediate_reg_float(emit, 1.0f);
5957
5958       for (i = 0; i < 3; i++) {
5959          struct tgsi_full_src_register src =
5960             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5961          struct tgsi_full_dst_register dst =
5962             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5963          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5964          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5965       }
5966    }
5967    else if (emit->key.tcs.prim_mode ==  PIPE_PRIM_LINES) {
5968       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5969          struct tgsi_full_src_register temp_src =
5970             make_src_temp_reg(emit->tcs.outer.temp_index);
5971          for (i = 0; i < 2; i++) {
5972             struct tgsi_full_src_register src =
5973                scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5974             struct tgsi_full_dst_register dst =
5975                make_dst_reg(TGSI_FILE_OUTPUT,
5976                             emit->tcs.outer.out_index + i);
5977             dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5978             emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5979          }
5980       }
5981    }
5982    else {
5983       debug_printf("Unsupported primitive type");
5984    }
5985 }
5986
5987
5988 /**
5989  * Emit the actual clip distance instructions to be used for clipping
5990  * by copying the clip distance from the temporary registers to the
5991  * CLIPDIST registers written with the enabled planes mask.
5992  * Also copy the clip distance from the temporary to the clip distance
5993  * shadow copy register which will be referenced by the input shader
5994  */
5995 static void
5996 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
5997 {
5998    struct tgsi_full_src_register tmp_clip_dist_src;
5999    struct tgsi_full_dst_register clip_dist_dst;
6000
6001    unsigned i;
6002    unsigned clip_plane_enable = emit->key.clip_plane_enable;
6003    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6004    int num_written_clipdist = emit->info.num_written_clipdistance;
6005
6006    assert(emit->clip_dist_out_index != INVALID_INDEX);
6007    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6008
6009    /**
6010     * Temporary reset the temporary clip dist register index so
6011     * that the copy to the real clip dist register will not
6012     * attempt to copy to the temporary register again
6013     */
6014    emit->clip_dist_tmp_index = INVALID_INDEX;
6015
6016    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6017
6018       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6019
6020       /**
6021        * copy to the shadow copy for use by varying variable and
6022        * stream output. All clip distances
6023        * will be written regardless of the enabled clipping planes.
6024        */
6025       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6026                                    emit->clip_dist_so_index + i);
6027
6028       /* MOV clip_dist_so, tmp_clip_dist */
6029       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6030                            &tmp_clip_dist_src);
6031
6032       /**
6033        * copy those clip distances to enabled clipping planes
6034        * to CLIPDIST registers for clipping
6035        */
6036       if (clip_plane_enable & 0xf) {
6037          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6038                                       emit->clip_dist_out_index + i);
6039          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6040
6041          /* MOV CLIPDIST, tmp_clip_dist */
6042          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6043                               &tmp_clip_dist_src);
6044       }
6045       /* four clip planes per clip register */
6046       clip_plane_enable >>= 4;
6047    }
6048    /**
6049     * set the temporary clip dist register index back to the
6050     * temporary index for the next vertex
6051     */
6052    emit->clip_dist_tmp_index = clip_dist_tmp_index;
6053 }
6054
6055 /* Declare clip distance output registers for user-defined clip planes
6056  * or the TGSI_CLIPVERTEX output.
6057  */
6058 static void
6059 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6060 {
6061    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6062    unsigned index = emit->num_outputs;
6063    unsigned plane_mask;
6064
6065    assert(emit->unit != PIPE_SHADER_FRAGMENT);
6066    assert(num_clip_planes <= 8);
6067
6068    if (emit->clip_mode != CLIP_LEGACY &&
6069        emit->clip_mode != CLIP_VERTEX) {
6070       return;
6071    }
6072
6073    if (num_clip_planes == 0)
6074       return;
6075
6076    /* Convert clip vertex to clip distances only in the last vertex stage */
6077    if (!emit->key.last_vertex_stage)
6078       return;
6079
6080    /* Declare one or two clip output registers.  The number of components
6081     * in the mask reflects the number of clip planes.  For example, if 5
6082     * clip planes are needed, we'll declare outputs similar to:
6083     * dcl_output_siv o2.xyzw, clip_distance
6084     * dcl_output_siv o3.x, clip_distance
6085     */
6086    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6087
6088    plane_mask = (1 << num_clip_planes) - 1;
6089    if (plane_mask & 0xf) {
6090       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6091       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6092                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6093                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6094       emit->num_outputs++;
6095    }
6096    if (plane_mask & 0xf0) {
6097       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6098       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6099                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6100                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6101       emit->num_outputs++;
6102    }
6103 }
6104
6105
6106 /**
6107  * Emit the instructions for writing to the clip distance registers
6108  * to handle legacy/automatic clip planes.
6109  * For each clip plane, the distance is the dot product of the vertex
6110  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6111  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6112  * output registers already declared.
6113  */
6114 static void
6115 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6116                              unsigned vpos_tmp_index)
6117 {
6118    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6119
6120    assert(emit->clip_mode == CLIP_LEGACY);
6121    assert(num_clip_planes <= 8);
6122
6123    assert(emit->unit == PIPE_SHADER_VERTEX ||
6124           emit->unit == PIPE_SHADER_GEOMETRY ||
6125           emit->unit == PIPE_SHADER_TESS_EVAL);
6126
6127    for (i = 0; i < num_clip_planes; i++) {
6128       struct tgsi_full_dst_register dst;
6129       struct tgsi_full_src_register plane_src, vpos_src;
6130       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6131       unsigned comp = i % 4;
6132       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6133
6134       /* create dst, src regs */
6135       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6136       dst = writemask_dst(&dst, writemask);
6137
6138       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6139       vpos_src = make_src_temp_reg(vpos_tmp_index);
6140
6141       /* DP4 clip_dist, plane, vpos */
6142       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6143                            &plane_src, &vpos_src);
6144    }
6145 }
6146
6147
6148 /**
6149  * Emit the instructions for computing the clip distance results from
6150  * the clip vertex temporary.
6151  * For each clip plane, the distance is the dot product of the clip vertex
6152  * position (found in a temp reg) and the clip plane coefficients.
6153  */
6154 static void
6155 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6156 {
6157    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6158    unsigned i;
6159    struct tgsi_full_dst_register dst;
6160    struct tgsi_full_src_register clipvert_src;
6161    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6162
6163    assert(emit->unit == PIPE_SHADER_VERTEX ||
6164           emit->unit == PIPE_SHADER_GEOMETRY ||
6165           emit->unit == PIPE_SHADER_TESS_EVAL);
6166
6167    assert(emit->clip_mode == CLIP_VERTEX);
6168
6169    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6170
6171    for (i = 0; i < num_clip; i++) {
6172       struct tgsi_full_src_register plane_src;
6173       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6174       unsigned comp = i % 4;
6175       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6176
6177       /* create dst, src regs */
6178       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6179       dst = writemask_dst(&dst, writemask);
6180
6181       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6182
6183       /* DP4 clip_dist, plane, vpos */
6184       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6185                            &plane_src, &clipvert_src);
6186    }
6187
6188    /* copy temporary clip vertex register to the clip vertex register */
6189
6190    assert(emit->clip_vertex_out_index != INVALID_INDEX);
6191
6192    /**
6193     * temporary reset the temporary clip vertex register index so
6194     * that copy to the clip vertex register will not attempt
6195     * to copy to the temporary register again
6196     */
6197    emit->clip_vertex_tmp_index = INVALID_INDEX;
6198
6199    /* MOV clip_vertex, clip_vertex_tmp */
6200    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6201    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6202                         &dst, &clipvert_src);
6203
6204    /**
6205     * set the temporary clip vertex register index back to the
6206     * temporary index for the next vertex
6207     */
6208    emit->clip_vertex_tmp_index = clip_vertex_tmp;
6209 }
6210
6211 /**
6212  * Emit code to convert RGBA to BGRA
6213  */
6214 static void
6215 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6216                      const struct tgsi_full_dst_register *dst,
6217                      const struct tgsi_full_src_register *src)
6218 {
6219    struct tgsi_full_src_register bgra_src =
6220       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6221
6222    begin_emit_instruction(emit);
6223    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6224    emit_dst_register(emit, dst);
6225    emit_src_register(emit, &bgra_src);
6226    end_emit_instruction(emit);
6227 }
6228
6229
6230 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6231 static void
6232 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6233                     const struct tgsi_full_dst_register *dst,
6234                     const struct tgsi_full_src_register *src)
6235 {
6236    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6237    struct tgsi_full_src_register two =
6238       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6239    struct tgsi_full_src_register neg_two =
6240       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6241
6242    unsigned val_tmp = get_temp_index(emit);
6243    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6244    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6245
6246    unsigned bias_tmp = get_temp_index(emit);
6247    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6248    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6249
6250    /* val = src * 2.0 */
6251    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6252
6253    /* bias = src > 0.5 */
6254    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6255
6256    /* bias = bias & -2.0 */
6257    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6258                         &bias_src, &neg_two);
6259
6260    /* dst = val + bias */
6261    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6262                         &val_src, &bias_src);
6263
6264    free_temp_indexes(emit);
6265 }
6266
6267
6268 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6269 static void
6270 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6271                       const struct tgsi_full_dst_register *dst,
6272                       const struct tgsi_full_src_register *src)
6273 {
6274    struct tgsi_full_src_register scale =
6275       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6276
6277    /* dst = src * scale */
6278    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6279 }
6280
6281
6282 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6283 static void
6284 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6285                       const struct tgsi_full_dst_register *dst,
6286                       const struct tgsi_full_src_register *src)
6287 {
6288    struct tgsi_full_src_register lshift =
6289       make_immediate_reg_int4(emit, 22, 12, 2, 0);
6290    struct tgsi_full_src_register rshift =
6291       make_immediate_reg_int4(emit, 22, 22, 22, 30);
6292
6293    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6294
6295    unsigned tmp = get_temp_index(emit);
6296    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6297    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6298
6299    /*
6300     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
6301     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
6302     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
6303     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
6304     * dst = i_to_f(r,g,b,a);     # convert to float
6305     */
6306    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6307                         &src_xxxx, &lshift);
6308    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6309                         &tmp_src, &rshift);
6310    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6311
6312    free_temp_indexes(emit);
6313 }
6314
6315
6316 /**
6317  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6318  */
6319 static boolean
6320 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6321               const struct tgsi_full_instruction *inst)
6322 {
6323    unsigned index = inst->Dst[0].Register.Index;
6324    struct tgsi_full_dst_register dst;
6325    VGPU10_OPCODE_TYPE opcode;
6326
6327    assert(index < MAX_VGPU10_ADDR_REGS);
6328    dst = make_dst_temp_reg(emit->address_reg_index[index]);
6329    dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6330
6331    /* ARL dst, s0
6332     * Translates into:
6333     * FTOI address_tmp, s0
6334     *
6335     * UARL dst, s0
6336     * Translates into:
6337     * MOV address_tmp, s0
6338     */
6339    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6340       opcode = VGPU10_OPCODE_FTOI;
6341    else
6342       opcode = VGPU10_OPCODE_MOV;
6343
6344    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6345
6346    return TRUE;
6347 }
6348
6349
6350 /**
6351  * Emit code for TGSI_OPCODE_CAL instruction.
6352  */
6353 static boolean
6354 emit_cal(struct svga_shader_emitter_v10 *emit,
6355          const struct tgsi_full_instruction *inst)
6356 {
6357    unsigned label = inst->Label.Label;
6358    VGPU10OperandToken0 operand;
6359    operand.value = 0;
6360    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
6361
6362    begin_emit_instruction(emit);
6363    emit_dword(emit, operand.value);
6364    emit_dword(emit, label);
6365    end_emit_instruction(emit);
6366
6367    return TRUE;
6368 }
6369
6370
6371 /**
6372  * Emit code for TGSI_OPCODE_IABS instruction.
6373  */
6374 static boolean
6375 emit_iabs(struct svga_shader_emitter_v10 *emit,
6376           const struct tgsi_full_instruction *inst)
6377 {
6378    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6379     * dst.y = (src0.y < 0) ? -src0.y : src0.y
6380     * dst.z = (src0.z < 0) ? -src0.z : src0.z
6381     * dst.w = (src0.w < 0) ? -src0.w : src0.w
6382     *
6383     * Translates into
6384     *   IMAX dst, src, neg(src)
6385     */
6386    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
6387    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
6388                         &inst->Src[0], &neg_src);
6389
6390    return TRUE;
6391 }
6392
6393
6394 /**
6395  * Emit code for TGSI_OPCODE_CMP instruction.
6396  */
6397 static boolean
6398 emit_cmp(struct svga_shader_emitter_v10 *emit,
6399          const struct tgsi_full_instruction *inst)
6400 {
6401    /* dst.x = (src0.x < 0) ? src1.x : src2.x
6402     * dst.y = (src0.y < 0) ? src1.y : src2.y
6403     * dst.z = (src0.z < 0) ? src1.z : src2.z
6404     * dst.w = (src0.w < 0) ? src1.w : src2.w
6405     *
6406     * Translates into
6407     *   LT tmp, src0, 0.0
6408     *   MOVC dst, tmp, src1, src2
6409     */
6410    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6411    unsigned tmp = get_temp_index(emit);
6412    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6413    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6414
6415    emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
6416                         &inst->Src[0], &zero, NULL, FALSE,
6417                         inst->Instruction.Precise);
6418    emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
6419                         &tmp_src, &inst->Src[1], &inst->Src[2],
6420                         inst->Instruction.Saturate, FALSE);
6421
6422    free_temp_indexes(emit);
6423
6424    return TRUE;
6425 }
6426
6427
6428 /**
6429  * Emit code for TGSI_OPCODE_DST instruction.
6430  */
6431 static boolean
6432 emit_dst(struct svga_shader_emitter_v10 *emit,
6433          const struct tgsi_full_instruction *inst)
6434 {
6435    /*
6436     * dst.x = 1
6437     * dst.y = src0.y * src1.y
6438     * dst.z = src0.z
6439     * dst.w = src1.w
6440     */
6441
6442    struct tgsi_full_src_register s0_yyyy =
6443       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6444    struct tgsi_full_src_register s0_zzzz =
6445       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
6446    struct tgsi_full_src_register s1_yyyy =
6447       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
6448    struct tgsi_full_src_register s1_wwww =
6449       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
6450
6451    /*
6452     * If dst and either src0 and src1 are the same we need
6453     * to create a temporary for it and insert a extra move.
6454     */
6455    unsigned tmp_move = get_temp_index(emit);
6456    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6457    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6458
6459    /* MOV dst.x, 1.0 */
6460    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6461       struct tgsi_full_dst_register dst_x =
6462          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6463       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6464
6465       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6466    }
6467
6468    /* MUL dst.y, s0.y, s1.y */
6469    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6470       struct tgsi_full_dst_register dst_y =
6471          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6472
6473       emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
6474                            &s1_yyyy, NULL, inst->Instruction.Saturate,
6475                            inst->Instruction.Precise);
6476    }
6477
6478    /* MOV dst.z, s0.z */
6479    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6480       struct tgsi_full_dst_register dst_z =
6481          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6482
6483       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6484                            &dst_z, &s0_zzzz, NULL, NULL,
6485                            inst->Instruction.Saturate,
6486                            inst->Instruction.Precise);
6487   }
6488
6489    /* MOV dst.w, s1.w */
6490    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6491       struct tgsi_full_dst_register dst_w =
6492          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6493
6494       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6495                            &dst_w, &s1_wwww, NULL, NULL,
6496                            inst->Instruction.Saturate,
6497                            inst->Instruction.Precise);
6498    }
6499
6500    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6501    free_temp_indexes(emit);
6502
6503    return TRUE;
6504 }
6505
6506
6507 /**
6508  * A helper function to return the stream index as specified in
6509  * the immediate register
6510  */
6511 static inline unsigned
6512 find_stream_index(struct svga_shader_emitter_v10 *emit,
6513                   const struct tgsi_full_src_register *src)
6514 {
6515    return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
6516 }
6517
6518
6519 /**
6520  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6521  */
6522 static boolean
6523 emit_endprim(struct svga_shader_emitter_v10 *emit,
6524              const struct tgsi_full_instruction *inst)
6525 {
6526    assert(emit->unit == PIPE_SHADER_GEOMETRY);
6527
6528    begin_emit_instruction(emit);
6529    if (emit->version >= 50) {
6530       unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
6531
6532       if (emit->info.num_stream_output_components[streamIndex] == 0) {
6533          /**
6534           * If there is no output for this stream, discard this instruction.
6535           */
6536          emit->discard_instruction = TRUE;
6537       }
6538       else {
6539          emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
6540          assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
6541          emit_stream_register(emit, streamIndex);
6542       }
6543    }
6544    else {
6545       emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
6546    }
6547    end_emit_instruction(emit);
6548    return TRUE;
6549 }
6550
6551
6552 /**
6553  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6554  */
6555 static boolean
6556 emit_ex2(struct svga_shader_emitter_v10 *emit,
6557          const struct tgsi_full_instruction *inst)
6558 {
6559    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6560     * while VGPU10 computes four values.
6561     *
6562     * dst = EX2(src):
6563     *   dst.xyzw = 2.0 ^ src.x
6564     */
6565
6566    struct tgsi_full_src_register src_xxxx =
6567       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6568                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6569
6570    /* EXP tmp, s0.xxxx */
6571    emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
6572                         NULL, NULL,
6573                         inst->Instruction.Saturate,
6574                         inst->Instruction.Precise);
6575
6576    return TRUE;
6577 }
6578
6579
6580 /**
6581  * Emit code for TGSI_OPCODE_EXP instruction.
6582  */
6583 static boolean
6584 emit_exp(struct svga_shader_emitter_v10 *emit,
6585          const struct tgsi_full_instruction *inst)
6586 {
6587    /*
6588     * dst.x = 2 ^ floor(s0.x)
6589     * dst.y = s0.x - floor(s0.x)
6590     * dst.z = 2 ^ s0.x
6591     * dst.w = 1.0
6592     */
6593
6594    struct tgsi_full_src_register src_xxxx =
6595       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6596    unsigned tmp = get_temp_index(emit);
6597    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6598    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6599
6600    /*
6601     * If dst and src are the same we need to create
6602     * a temporary for it and insert a extra move.
6603     */
6604    unsigned tmp_move = get_temp_index(emit);
6605    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6606    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6607
6608    /* only use X component of temp reg */
6609    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6610    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6611
6612    /* ROUND_NI tmp.x, s0.x */
6613    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
6614                         &src_xxxx); /* round to -infinity */
6615
6616    /* EXP dst.x, tmp.x */
6617    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6618       struct tgsi_full_dst_register dst_x =
6619          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6620
6621       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
6622                            NULL, NULL,
6623                            inst->Instruction.Saturate,
6624                            inst->Instruction.Precise);
6625    }
6626
6627    /* ADD dst.y, s0.x, -tmp */
6628    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6629       struct tgsi_full_dst_register dst_y =
6630          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6631       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
6632
6633       emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
6634                            &neg_tmp_src, NULL,
6635                            inst->Instruction.Saturate,
6636                            inst->Instruction.Precise);
6637    }
6638
6639    /* EXP dst.z, s0.x */
6640    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6641       struct tgsi_full_dst_register dst_z =
6642          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6643
6644       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
6645                            NULL, NULL,
6646                            inst->Instruction.Saturate,
6647                            inst->Instruction.Precise);
6648    }
6649
6650    /* MOV dst.w, 1.0 */
6651    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6652       struct tgsi_full_dst_register dst_w =
6653          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6654       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6655
6656       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6657    }
6658
6659    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6660
6661    free_temp_indexes(emit);
6662
6663    return TRUE;
6664 }
6665
6666
6667 /**
6668  * Emit code for TGSI_OPCODE_IF instruction.
6669  */
6670 static boolean
6671 emit_if(struct svga_shader_emitter_v10 *emit,
6672         const struct tgsi_full_src_register *src)
6673 {
6674    VGPU10OpcodeToken0 opcode0;
6675
6676    /* The src register should be a scalar */
6677    assert(src->Register.SwizzleX == src->Register.SwizzleY &&
6678           src->Register.SwizzleX == src->Register.SwizzleZ &&
6679           src->Register.SwizzleX == src->Register.SwizzleW);
6680
6681    /* The only special thing here is that we need to set the
6682     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6683     * src.x is non-zero.
6684     */
6685    opcode0.value = 0;
6686    opcode0.opcodeType = VGPU10_OPCODE_IF;
6687    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
6688
6689    begin_emit_instruction(emit);
6690    emit_dword(emit, opcode0.value);
6691    emit_src_register(emit, src);
6692    end_emit_instruction(emit);
6693
6694    return TRUE;
6695 }
6696
6697
6698 /**
6699  * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6700  * the register components are negative).
6701  */
6702 static boolean
6703 emit_kill_if(struct svga_shader_emitter_v10 *emit,
6704              const struct tgsi_full_instruction *inst)
6705 {
6706    unsigned tmp = get_temp_index(emit);
6707    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6708    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6709
6710    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6711
6712    struct tgsi_full_dst_register tmp_dst_x =
6713       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6714    struct tgsi_full_src_register tmp_src_xxxx =
6715       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6716
6717    /* tmp = src[0] < 0.0 */
6718    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
6719
6720    if (!same_swizzle_terms(&inst->Src[0])) {
6721       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6722        * logically OR the swizzle terms.  Most uses of KILL_IF only
6723        * test one channel so it's good to avoid these extra steps.
6724        */
6725       struct tgsi_full_src_register tmp_src_yyyy =
6726          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
6727       struct tgsi_full_src_register tmp_src_zzzz =
6728          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
6729       struct tgsi_full_src_register tmp_src_wwww =
6730          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
6731
6732       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6733                            &tmp_src_yyyy);
6734       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6735                            &tmp_src_zzzz);
6736       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6737                            &tmp_src_wwww);
6738    }
6739
6740    begin_emit_instruction(emit);
6741    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
6742    emit_src_register(emit, &tmp_src_xxxx);
6743    end_emit_instruction(emit);
6744
6745    free_temp_indexes(emit);
6746
6747    return TRUE;
6748 }
6749
6750
6751 /**
6752  * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6753  */
6754 static boolean
6755 emit_kill(struct svga_shader_emitter_v10 *emit,
6756           const struct tgsi_full_instruction *inst)
6757 {
6758    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6759
6760    /* DISCARD if 0.0 is zero */
6761    begin_emit_instruction(emit);
6762    emit_discard_opcode(emit, FALSE);
6763    emit_src_register(emit, &zero);
6764    end_emit_instruction(emit);
6765
6766    return TRUE;
6767 }
6768
6769
6770 /**
6771  * Emit code for TGSI_OPCODE_LG2 instruction.
6772  */
6773 static boolean
6774 emit_lg2(struct svga_shader_emitter_v10 *emit,
6775          const struct tgsi_full_instruction *inst)
6776 {
6777    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6778     * while VGPU10 computes four values.
6779     *
6780     * dst = LG2(src):
6781     *   dst.xyzw = log2(src.x)
6782     */
6783
6784    struct tgsi_full_src_register src_xxxx =
6785       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6786                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6787
6788    /* LOG tmp, s0.xxxx */
6789    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
6790                         &inst->Dst[0], &src_xxxx, NULL, NULL,
6791                         inst->Instruction.Saturate,
6792                         inst->Instruction.Precise);
6793
6794    return TRUE;
6795 }
6796
6797
6798 /**
6799  * Emit code for TGSI_OPCODE_LIT instruction.
6800  */
6801 static boolean
6802 emit_lit(struct svga_shader_emitter_v10 *emit,
6803          const struct tgsi_full_instruction *inst)
6804 {
6805    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6806
6807    /*
6808     * If dst and src are the same we need to create
6809     * a temporary for it and insert a extra move.
6810     */
6811    unsigned tmp_move = get_temp_index(emit);
6812    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6813    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6814
6815    /*
6816     * dst.x = 1
6817     * dst.y = max(src.x, 0)
6818     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6819     * dst.w = 1
6820     */
6821
6822    /* MOV dst.x, 1.0 */
6823    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6824       struct tgsi_full_dst_register dst_x =
6825          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6826       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6827    }
6828
6829    /* MOV dst.w, 1.0 */
6830    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6831       struct tgsi_full_dst_register dst_w =
6832          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6833       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6834    }
6835
6836    /* MAX dst.y, src.x, 0.0 */
6837    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6838       struct tgsi_full_dst_register dst_y =
6839          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6840       struct tgsi_full_src_register zero =
6841          make_immediate_reg_float(emit, 0.0f);
6842       struct tgsi_full_src_register src_xxxx =
6843          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6844                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6845
6846       emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
6847                            &zero, NULL, inst->Instruction.Saturate, FALSE);
6848    }
6849
6850    /*
6851     * tmp1 = clamp(src.w, -128, 128);
6852     *   MAX tmp1, src.w, -128
6853     *   MIN tmp1, tmp1, 128
6854     *
6855     * tmp2 = max(tmp2, 0);
6856     *   MAX tmp2, src.y, 0
6857     *
6858     * tmp1 = pow(tmp2, tmp1);
6859     *   LOG tmp2, tmp2
6860     *   MUL tmp1, tmp2, tmp1
6861     *   EXP tmp1, tmp1
6862     *
6863     * tmp1 = (src.w == 0) ? 1 : tmp1;
6864     *   EQ tmp2, 0, src.w
6865     *   MOVC tmp1, tmp2, 1.0, tmp1
6866     *
6867     * dst.z = (0 < src.x) ? tmp1 : 0;
6868     *   LT tmp2, 0, src.x
6869     *   MOVC dst.z, tmp2, tmp1, 0.0
6870     */
6871    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6872       struct tgsi_full_dst_register dst_z =
6873          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6874
6875       unsigned tmp1 = get_temp_index(emit);
6876       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
6877       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
6878       unsigned tmp2 = get_temp_index(emit);
6879       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
6880       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
6881
6882       struct tgsi_full_src_register src_xxxx =
6883          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6884       struct tgsi_full_src_register src_yyyy =
6885          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6886       struct tgsi_full_src_register src_wwww =
6887          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
6888
6889       struct tgsi_full_src_register zero =
6890          make_immediate_reg_float(emit, 0.0f);
6891       struct tgsi_full_src_register lowerbound =
6892          make_immediate_reg_float(emit, -128.0f);
6893       struct tgsi_full_src_register upperbound =
6894          make_immediate_reg_float(emit, 128.0f);
6895
6896       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
6897                            &lowerbound);
6898       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
6899                            &upperbound);
6900       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
6901                            &zero);
6902
6903       /* POW tmp1, tmp2, tmp1 */
6904       /* LOG tmp2, tmp2 */
6905       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
6906
6907       /* MUL tmp1, tmp2, tmp1 */
6908       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
6909                            &tmp1_src);
6910
6911       /* EXP tmp1, tmp1 */
6912       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
6913
6914       /* EQ tmp2, 0, src.w */
6915       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
6916       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6917       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
6918                            &tmp2_src, &one, &tmp1_src);
6919
6920       /* LT tmp2, 0, src.x */
6921       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
6922       /* MOVC dst.z, tmp2, tmp1, 0.0 */
6923       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
6924                            &tmp2_src, &tmp1_src, &zero);
6925    }
6926
6927    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6928    free_temp_indexes(emit);
6929
6930    return TRUE;
6931 }
6932
6933
6934 /**
6935  * Emit Level Of Detail Query (LODQ) instruction.
6936  */
6937 static boolean
6938 emit_lodq(struct svga_shader_emitter_v10 *emit,
6939           const struct tgsi_full_instruction *inst)
6940 {
6941    const uint unit = inst->Src[1].Register.Index;
6942
6943    assert(emit->version >= 41);
6944
6945    /* LOD dst, coord, resource, sampler */
6946    begin_emit_instruction(emit);
6947    emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
6948    emit_dst_register(emit, &inst->Dst[0]);
6949    emit_src_register(emit, &inst->Src[0]); /* coord */
6950    emit_resource_register(emit, unit);
6951    emit_sampler_register(emit, unit);
6952    end_emit_instruction(emit);
6953
6954    return TRUE;
6955 }
6956
6957
6958 /**
6959  * Emit code for TGSI_OPCODE_LOG instruction.
6960  */
6961 static boolean
6962 emit_log(struct svga_shader_emitter_v10 *emit,
6963          const struct tgsi_full_instruction *inst)
6964 {
6965    /*
6966     * dst.x = floor(lg2(abs(s0.x)))
6967     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
6968     * dst.z = lg2(abs(s0.x))
6969     * dst.w = 1.0
6970     */
6971
6972    struct tgsi_full_src_register src_xxxx =
6973       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6974    unsigned tmp = get_temp_index(emit);
6975    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6976    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6977    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
6978
6979    /* only use X component of temp reg */
6980    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6981    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6982
6983    /* LOG tmp.x, abs(s0.x) */
6984    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
6985       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
6986    }
6987
6988    /* MOV dst.z, tmp.x */
6989    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6990       struct tgsi_full_dst_register dst_z =
6991          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
6992
6993       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6994                            &dst_z, &tmp_src, NULL, NULL,
6995                            inst->Instruction.Saturate, FALSE);
6996    }
6997
6998    /* FLR tmp.x, tmp.x */
6999    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7000       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7001    }
7002
7003    /* MOV dst.x, tmp.x */
7004    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7005       struct tgsi_full_dst_register dst_x =
7006          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7007
7008       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7009                            &dst_x, &tmp_src, NULL, NULL,
7010                            inst->Instruction.Saturate, FALSE);
7011    }
7012
7013    /* EXP tmp.x, tmp.x */
7014    /* DIV dst.y, abs(s0.x), tmp.x */
7015    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7016       struct tgsi_full_dst_register dst_y =
7017          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7018
7019       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7020       emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7021                            &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7022    }
7023
7024    /* MOV dst.w, 1.0 */
7025    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7026       struct tgsi_full_dst_register dst_w =
7027          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7028       struct tgsi_full_src_register one =
7029          make_immediate_reg_float(emit, 1.0f);
7030
7031       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7032    }
7033
7034    free_temp_indexes(emit);
7035
7036    return TRUE;
7037 }
7038
7039
7040 /**
7041  * Emit code for TGSI_OPCODE_LRP instruction.
7042  */
7043 static boolean
7044 emit_lrp(struct svga_shader_emitter_v10 *emit,
7045          const struct tgsi_full_instruction *inst)
7046 {
7047    /* dst = LRP(s0, s1, s2):
7048     *   dst = s0 * (s1 - s2) + s2
7049     * Translates into:
7050     *   SUB tmp, s1, s2;        tmp = s1 - s2
7051     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7052     */
7053    unsigned tmp = get_temp_index(emit);
7054    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7055    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7056    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7057
7058    /* ADD tmp, s1, -s2 */
7059    emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7060                         &inst->Src[1], &neg_src2, NULL, FALSE,
7061                         inst->Instruction.Precise);
7062
7063    /* MAD dst, s1, tmp, s3 */
7064    emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7065                         &inst->Src[0], &src_tmp, &inst->Src[2],
7066                         inst->Instruction.Saturate,
7067                         inst->Instruction.Precise);
7068
7069    free_temp_indexes(emit);
7070
7071    return TRUE;
7072 }
7073
7074
7075 /**
7076  * Emit code for TGSI_OPCODE_POW instruction.
7077  */
7078 static boolean
7079 emit_pow(struct svga_shader_emitter_v10 *emit,
7080          const struct tgsi_full_instruction *inst)
7081 {
7082    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7083     * src1.x while VGPU10 computes four values.
7084     *
7085     * dst = POW(src0, src1):
7086     *   dst.xyzw = src0.x ^ src1.x
7087     */
7088    unsigned tmp = get_temp_index(emit);
7089    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7090    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7091    struct tgsi_full_src_register src0_xxxx =
7092       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7093                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7094    struct tgsi_full_src_register src1_xxxx =
7095       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7096                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7097
7098    /* LOG tmp, s0.xxxx */
7099    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7100                         &tmp_dst, &src0_xxxx, NULL, NULL,
7101                         FALSE, inst->Instruction.Precise);
7102
7103    /* MUL tmp, tmp, s1.xxxx */
7104    emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7105                         &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7106                         FALSE, inst->Instruction.Precise);
7107
7108    /* EXP tmp, s0.xxxx */
7109    emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7110                         &inst->Dst[0], &tmp_src, NULL, NULL,
7111                         inst->Instruction.Saturate,
7112                         inst->Instruction.Precise);
7113
7114    /* free tmp */
7115    free_temp_indexes(emit);
7116
7117    return TRUE;
7118 }
7119
7120
7121 /**
7122  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7123  */
7124 static boolean
7125 emit_rcp(struct svga_shader_emitter_v10 *emit,
7126          const struct tgsi_full_instruction *inst)
7127 {
7128    if (emit->version >= 50) {
7129       /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7130        * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7131        * to manipulate the src register's swizzle.
7132        */
7133       struct tgsi_full_src_register src = inst->Src[0];
7134       src.Register.SwizzleY =
7135       src.Register.SwizzleZ =
7136       src.Register.SwizzleW = src.Register.SwizzleX;
7137
7138       begin_emit_instruction(emit);
7139       emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7140                           inst->Instruction.Saturate,
7141                           inst->Instruction.Precise);
7142       emit_dst_register(emit, &inst->Dst[0]);
7143       emit_src_register(emit, &src);
7144       end_emit_instruction(emit);
7145    }
7146    else {
7147       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7148
7149       unsigned tmp = get_temp_index(emit);
7150       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7151       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7152
7153       struct tgsi_full_dst_register tmp_dst_x =
7154          writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7155       struct tgsi_full_src_register tmp_src_xxxx =
7156          scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7157
7158       /* DIV tmp.x, 1.0, s0 */
7159       emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7160                            &tmp_dst_x, &one, &inst->Src[0], NULL,
7161                            FALSE, inst->Instruction.Precise);
7162
7163       /* MOV dst, tmp.xxxx */
7164       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7165                            &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7166                            inst->Instruction.Saturate,
7167                            inst->Instruction.Precise);
7168
7169       free_temp_indexes(emit);
7170    }
7171
7172    return TRUE;
7173 }
7174
7175
7176 /**
7177  * Emit code for TGSI_OPCODE_RSQ instruction.
7178  */
7179 static boolean
7180 emit_rsq(struct svga_shader_emitter_v10 *emit,
7181          const struct tgsi_full_instruction *inst)
7182 {
7183    /* dst = RSQ(src):
7184     *   dst.xyzw = 1 / sqrt(src.x)
7185     * Translates into:
7186     *   RSQ tmp, src.x
7187     *   MOV dst, tmp.xxxx
7188     */
7189
7190    unsigned tmp = get_temp_index(emit);
7191    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7192    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7193
7194    struct tgsi_full_dst_register tmp_dst_x =
7195       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7196    struct tgsi_full_src_register tmp_src_xxxx =
7197       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7198
7199    /* RSQ tmp, src.x */
7200    emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7201                         &tmp_dst_x, &inst->Src[0], NULL, NULL,
7202                         FALSE, inst->Instruction.Precise);
7203
7204    /* MOV dst, tmp.xxxx */
7205    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7206                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7207                         inst->Instruction.Saturate,
7208                         inst->Instruction.Precise);
7209
7210    /* free tmp */
7211    free_temp_indexes(emit);
7212
7213    return TRUE;
7214 }
7215
7216
7217 /**
7218  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7219  */
7220 static boolean
7221 emit_seq(struct svga_shader_emitter_v10 *emit,
7222          const struct tgsi_full_instruction *inst)
7223 {
7224    /* dst = SEQ(s0, s1):
7225     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7226     * Translates into:
7227     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7228     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7229     */
7230    unsigned tmp = get_temp_index(emit);
7231    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7232    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7233    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7234    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7235
7236    /* EQ tmp, s0, s1 */
7237    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7238                         &inst->Src[1]);
7239
7240    /* MOVC dst, tmp, one, zero */
7241    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7242                         &one, &zero);
7243
7244    free_temp_indexes(emit);
7245
7246    return TRUE;
7247 }
7248
7249
7250 /**
7251  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7252  */
7253 static boolean
7254 emit_sge(struct svga_shader_emitter_v10 *emit,
7255          const struct tgsi_full_instruction *inst)
7256 {
7257    /* dst = SGE(s0, s1):
7258     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7259     * Translates into:
7260     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7261     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7262     */
7263    unsigned tmp = get_temp_index(emit);
7264    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7265    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7266    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7267    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7268
7269    /* GE tmp, s0, s1 */
7270    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7271                         &inst->Src[1]);
7272
7273    /* MOVC dst, tmp, one, zero */
7274    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7275                         &one, &zero);
7276
7277    free_temp_indexes(emit);
7278
7279    return TRUE;
7280 }
7281
7282
7283 /**
7284  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7285  */
7286 static boolean
7287 emit_sgt(struct svga_shader_emitter_v10 *emit,
7288          const struct tgsi_full_instruction *inst)
7289 {
7290    /* dst = SGT(s0, s1):
7291     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
7292     * Translates into:
7293     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7294     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7295     */
7296    unsigned tmp = get_temp_index(emit);
7297    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7298    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7299    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7300    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7301
7302    /* LT tmp, s1, s0 */
7303    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7304                         &inst->Src[0]);
7305
7306    /* MOVC dst, tmp, one, zero */
7307    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7308                         &one, &zero);
7309
7310    free_temp_indexes(emit);
7311
7312    return TRUE;
7313 }
7314
7315
7316 /**
7317  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7318  */
7319 static boolean
7320 emit_sincos(struct svga_shader_emitter_v10 *emit,
7321          const struct tgsi_full_instruction *inst)
7322 {
7323    unsigned tmp = get_temp_index(emit);
7324    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7325    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7326
7327    struct tgsi_full_src_register tmp_src_xxxx =
7328       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7329    struct tgsi_full_dst_register tmp_dst_x =
7330       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7331
7332    begin_emit_instruction(emit);
7333    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7334
7335    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7336    {
7337       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
7338       emit_null_dst_register(emit);  /* second destination register */
7339    }
7340    else {
7341       emit_null_dst_register(emit);
7342       emit_dst_register(emit, &tmp_dst_x);
7343    }
7344
7345    emit_src_register(emit, &inst->Src[0]);
7346    end_emit_instruction(emit);
7347
7348    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7349                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7350                         inst->Instruction.Saturate,
7351                         inst->Instruction.Precise);
7352
7353    free_temp_indexes(emit);
7354
7355    return TRUE;
7356 }
7357
7358
7359 /**
7360  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7361  */
7362 static boolean
7363 emit_sle(struct svga_shader_emitter_v10 *emit,
7364          const struct tgsi_full_instruction *inst)
7365 {
7366    /* dst = SLE(s0, s1):
7367     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
7368     * Translates into:
7369     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7370     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7371     */
7372    unsigned tmp = get_temp_index(emit);
7373    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7374    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7375    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7376    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7377
7378    /* GE tmp, s1, s0 */
7379    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
7380                         &inst->Src[0]);
7381
7382    /* MOVC dst, tmp, one, zero */
7383    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7384                         &one, &zero);
7385
7386    free_temp_indexes(emit);
7387
7388    return TRUE;
7389 }
7390
7391
7392 /**
7393  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7394  */
7395 static boolean
7396 emit_slt(struct svga_shader_emitter_v10 *emit,
7397          const struct tgsi_full_instruction *inst)
7398 {
7399    /* dst = SLT(s0, s1):
7400     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
7401     * Translates into:
7402     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7403     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7404     */
7405    unsigned tmp = get_temp_index(emit);
7406    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7407    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7408    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7409    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7410
7411    /* LT tmp, s0, s1 */
7412    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
7413                         &inst->Src[1]);
7414
7415    /* MOVC dst, tmp, one, zero */
7416    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7417                         &one, &zero);
7418
7419    free_temp_indexes(emit);
7420
7421    return TRUE;
7422 }
7423
7424
7425 /**
7426  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7427  */
7428 static boolean
7429 emit_sne(struct svga_shader_emitter_v10 *emit,
7430          const struct tgsi_full_instruction *inst)
7431 {
7432    /* dst = SNE(s0, s1):
7433     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
7434     * Translates into:
7435     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7436     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7437     */
7438    unsigned tmp = get_temp_index(emit);
7439    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7440    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7441    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7442    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7443
7444    /* NE tmp, s0, s1 */
7445    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
7446                         &inst->Src[1]);
7447
7448    /* MOVC dst, tmp, one, zero */
7449    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7450                         &one, &zero);
7451
7452    free_temp_indexes(emit);
7453
7454    return TRUE;
7455 }
7456
7457
7458 /**
7459  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7460  */
7461 static boolean
7462 emit_ssg(struct svga_shader_emitter_v10 *emit,
7463          const struct tgsi_full_instruction *inst)
7464 {
7465    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7466     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7467     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7468     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7469     * Translates into:
7470     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7471     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7472     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7473     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
7474     */
7475    struct tgsi_full_src_register zero =
7476       make_immediate_reg_float(emit, 0.0f);
7477    struct tgsi_full_src_register one =
7478       make_immediate_reg_float(emit, 1.0f);
7479    struct tgsi_full_src_register neg_one =
7480       make_immediate_reg_float(emit, -1.0f);
7481
7482    unsigned tmp1 = get_temp_index(emit);
7483    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7484    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7485
7486    unsigned tmp2 = get_temp_index(emit);
7487    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7488    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7489
7490    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
7491                         &zero);
7492    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
7493                         &neg_one, &zero);
7494    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
7495                         &inst->Src[0]);
7496    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
7497                         &one, &tmp2_src);
7498
7499    free_temp_indexes(emit);
7500
7501    return TRUE;
7502 }
7503
7504
7505 /**
7506  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7507  */
7508 static boolean
7509 emit_issg(struct svga_shader_emitter_v10 *emit,
7510           const struct tgsi_full_instruction *inst)
7511 {
7512    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7513     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7514     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7515     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7516     * Translates into:
7517     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
7518     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
7519     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
7520     */
7521    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7522
7523    unsigned tmp1 = get_temp_index(emit);
7524    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7525    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7526
7527    unsigned tmp2 = get_temp_index(emit);
7528    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7529    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7530
7531    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
7532
7533    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
7534                         &inst->Src[0], &zero);
7535    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
7536                         &zero, &inst->Src[0]);
7537    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
7538                         &tmp1_src, &neg_tmp2);
7539
7540    free_temp_indexes(emit);
7541
7542    return TRUE;
7543 }
7544
7545
7546 /**
7547  * Emit a comparison instruction.  The dest register will get
7548  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7549  */
7550 static void
7551 emit_comparison(struct svga_shader_emitter_v10 *emit,
7552                 SVGA3dCmpFunc func,
7553                 const struct tgsi_full_dst_register *dst,
7554                 const struct tgsi_full_src_register *src0,
7555                 const struct tgsi_full_src_register *src1)
7556 {
7557    struct tgsi_full_src_register immediate;
7558    VGPU10OpcodeToken0 opcode0;
7559    boolean swapSrc = FALSE;
7560
7561    /* Sanity checks for svga vs. gallium enums */
7562    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
7563    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
7564
7565    opcode0.value = 0;
7566
7567    switch (func) {
7568    case SVGA3D_CMP_NEVER:
7569       immediate = make_immediate_reg_int(emit, 0);
7570       /* MOV dst, {0} */
7571       begin_emit_instruction(emit);
7572       emit_dword(emit, VGPU10_OPCODE_MOV);
7573       emit_dst_register(emit, dst);
7574       emit_src_register(emit, &immediate);
7575       end_emit_instruction(emit);
7576       return;
7577    case SVGA3D_CMP_ALWAYS:
7578       immediate = make_immediate_reg_int(emit, -1);
7579       /* MOV dst, {-1} */
7580       begin_emit_instruction(emit);
7581       emit_dword(emit, VGPU10_OPCODE_MOV);
7582       emit_dst_register(emit, dst);
7583       emit_src_register(emit, &immediate);
7584       end_emit_instruction(emit);
7585       return;
7586    case SVGA3D_CMP_LESS:
7587       opcode0.opcodeType = VGPU10_OPCODE_LT;
7588       break;
7589    case SVGA3D_CMP_EQUAL:
7590       opcode0.opcodeType = VGPU10_OPCODE_EQ;
7591       break;
7592    case SVGA3D_CMP_LESSEQUAL:
7593       opcode0.opcodeType = VGPU10_OPCODE_GE;
7594       swapSrc = TRUE;
7595       break;
7596    case SVGA3D_CMP_GREATER:
7597       opcode0.opcodeType = VGPU10_OPCODE_LT;
7598       swapSrc = TRUE;
7599       break;
7600    case SVGA3D_CMP_NOTEQUAL:
7601       opcode0.opcodeType = VGPU10_OPCODE_NE;
7602       break;
7603    case SVGA3D_CMP_GREATEREQUAL:
7604       opcode0.opcodeType = VGPU10_OPCODE_GE;
7605       break;
7606    default:
7607       assert(!"Unexpected comparison mode");
7608       opcode0.opcodeType = VGPU10_OPCODE_EQ;
7609    }
7610
7611    begin_emit_instruction(emit);
7612    emit_dword(emit, opcode0.value);
7613    emit_dst_register(emit, dst);
7614    if (swapSrc) {
7615       emit_src_register(emit, src1);
7616       emit_src_register(emit, src0);
7617    }
7618    else {
7619       emit_src_register(emit, src0);
7620       emit_src_register(emit, src1);
7621    }
7622    end_emit_instruction(emit);
7623 }
7624
7625
7626 /**
7627  * Get texel/address offsets for a texture instruction.
7628  */
7629 static void
7630 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
7631                   const struct tgsi_full_instruction *inst, int offsets[3])
7632 {
7633    if (inst->Texture.NumOffsets == 1) {
7634       /* According to OpenGL Shader Language spec the offsets are only
7635        * fetched from a previously-declared immediate/literal.
7636        */
7637       const struct tgsi_texture_offset *off = inst->TexOffsets;
7638       const unsigned index = off[0].Index;
7639       const unsigned swizzleX = off[0].SwizzleX;
7640       const unsigned swizzleY = off[0].SwizzleY;
7641       const unsigned swizzleZ = off[0].SwizzleZ;
7642       const union tgsi_immediate_data *imm = emit->immediates[index];
7643
7644       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
7645
7646       offsets[0] = imm[swizzleX].Int;
7647       offsets[1] = imm[swizzleY].Int;
7648       offsets[2] = imm[swizzleZ].Int;
7649    }
7650    else {
7651       offsets[0] = offsets[1] = offsets[2] = 0;
7652    }
7653 }
7654
7655
7656 /**
7657  * Set up the coordinate register for texture sampling.
7658  * When we're sampling from a RECT texture we have to scale the
7659  * unnormalized coordinate to a normalized coordinate.
7660  * We do that by multiplying the coordinate by an "extra" constant.
7661  * An alternative would be to use the RESINFO instruction to query the
7662  * texture's size.
7663  */
7664 static struct tgsi_full_src_register
7665 setup_texcoord(struct svga_shader_emitter_v10 *emit,
7666                unsigned unit,
7667                const struct tgsi_full_src_register *coord)
7668 {
7669    if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) {
7670       unsigned scale_index = emit->texcoord_scale_index[unit];
7671       unsigned tmp = get_temp_index(emit);
7672       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7673       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7674       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
7675
7676       if (emit->key.tex[unit].texel_bias) {
7677          /* to fix texture coordinate rounding issue, 0.0001 offset is
7678           * been added. This fixes piglit test fbo-blit-scaled-linear. */
7679          struct tgsi_full_src_register offset =
7680             make_immediate_reg_float(emit, 0.0001f);
7681
7682          /* ADD tmp, coord, offset */
7683          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
7684                               coord, &offset);
7685          /* MUL tmp, tmp, scale */
7686          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7687                               &tmp_src, &scale_src);
7688       }
7689       else {
7690          /* MUL tmp, coord, const[] */
7691          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7692                               coord, &scale_src);
7693       }
7694       return tmp_src;
7695    }
7696    else {
7697       /* use texcoord as-is */
7698       return *coord;
7699    }
7700 }
7701
7702
7703 /**
7704  * For SAMPLE_C instructions, emit the extra src register which indicates
7705  * the reference/comparision value.
7706  */
7707 static void
7708 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
7709                           enum tgsi_texture_type target,
7710                           const struct tgsi_full_src_register *coord)
7711 {
7712    struct tgsi_full_src_register coord_src_ref;
7713    int component;
7714
7715    assert(tgsi_is_shadow_target(target));
7716
7717    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
7718    assert(component >= 0);
7719
7720    coord_src_ref = scalar_src(coord, component);
7721
7722    emit_src_register(emit, &coord_src_ref);
7723 }
7724
7725
7726 /**
7727  * Info for implementing texture swizzles.
7728  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7729  * functions use this to encapsulate the extra steps needed to perform
7730  * a texture swizzle, or shadow/depth comparisons.
7731  * The shadow/depth comparison is only done here if for the cases where
7732  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7733  */
7734 struct tex_swizzle_info
7735 {
7736    boolean swizzled;
7737    boolean shadow_compare;
7738    unsigned unit;
7739    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
7740    struct tgsi_full_src_register tmp_src;
7741    struct tgsi_full_dst_register tmp_dst;
7742    const struct tgsi_full_dst_register *inst_dst;
7743    const struct tgsi_full_src_register *coord_src;
7744 };
7745
7746
7747 /**
7748  * Do setup for handling texture swizzles or shadow compares.
7749  * \param unit  the texture unit
7750  * \param inst  the TGSI texture instruction
7751  * \param shadow_compare  do shadow/depth comparison?
7752  * \param swz  returns the swizzle info
7753  */
7754 static void
7755 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7756                   unsigned unit,
7757                   const struct tgsi_full_instruction *inst,
7758                   boolean shadow_compare,
7759                   struct tex_swizzle_info *swz)
7760 {
7761    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
7762                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
7763                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
7764                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
7765
7766    swz->shadow_compare = shadow_compare;
7767    swz->texture_target = inst->Texture.Texture;
7768
7769    if (swz->swizzled || shadow_compare) {
7770       /* Allocate temp register for the result of the SAMPLE instruction
7771        * and the source of the MOV/compare/swizzle instructions.
7772        */
7773       unsigned tmp = get_temp_index(emit);
7774       swz->tmp_src = make_src_temp_reg(tmp);
7775       swz->tmp_dst = make_dst_temp_reg(tmp);
7776
7777       swz->unit = unit;
7778    }
7779    swz->inst_dst = &inst->Dst[0];
7780    swz->coord_src = &inst->Src[0];
7781
7782    emit->fs.shadow_compare_units |= shadow_compare << unit;
7783 }
7784
7785
7786 /**
7787  * Returns the register to put the SAMPLE instruction results into.
7788  * This will either be the original instruction dst reg (if no swizzle
7789  * and no shadow comparison) or a temporary reg if there is a swizzle.
7790  */
7791 static const struct tgsi_full_dst_register *
7792 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
7793 {
7794    return (swz->swizzled || swz->shadow_compare)
7795       ? &swz->tmp_dst : swz->inst_dst;
7796 }
7797
7798
7799 /**
7800  * This emits the MOV instruction that actually implements a texture swizzle
7801  * and/or shadow comparison.
7802  */
7803 static void
7804 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7805                 const struct tex_swizzle_info *swz)
7806 {
7807    if (swz->shadow_compare) {
7808       /* Emit extra instructions to compare the fetched texel value against
7809        * a texture coordinate component.  The result of the comparison
7810        * is 0.0 or 1.0.
7811        */
7812       struct tgsi_full_src_register coord_src;
7813       struct tgsi_full_src_register texel_src =
7814          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
7815       struct tgsi_full_src_register one =
7816          make_immediate_reg_float(emit, 1.0f);
7817       /* convert gallium comparison func to SVGA comparison func */
7818       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
7819
7820       int component =
7821          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
7822       assert(component >= 0);
7823       coord_src = scalar_src(swz->coord_src, component);
7824
7825       /* COMPARE tmp, coord, texel */
7826       emit_comparison(emit, compare_func,
7827                       &swz->tmp_dst, &coord_src, &texel_src);
7828
7829       /* AND dest, tmp, {1.0} */
7830       begin_emit_instruction(emit);
7831       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
7832       if (swz->swizzled) {
7833          emit_dst_register(emit, &swz->tmp_dst);
7834       }
7835       else {
7836          emit_dst_register(emit, swz->inst_dst);
7837       }
7838       emit_src_register(emit, &swz->tmp_src);
7839       emit_src_register(emit, &one);
7840       end_emit_instruction(emit);
7841    }
7842
7843    if (swz->swizzled) {
7844       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
7845       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
7846       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
7847       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
7848       unsigned writemask_0 = 0, writemask_1 = 0;
7849       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
7850
7851       /* Swizzle w/out zero/one terms */
7852       struct tgsi_full_src_register src_swizzled =
7853          swizzle_src(&swz->tmp_src,
7854                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
7855                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
7856                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
7857                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
7858
7859       /* MOV dst, color(tmp).<swizzle> */
7860       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
7861                            swz->inst_dst, &src_swizzled);
7862
7863       /* handle swizzle zero terms */
7864       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
7865                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
7866                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
7867                      ((swz_a == PIPE_SWIZZLE_0) << 3));
7868       writemask_0 &= swz->inst_dst->Register.WriteMask;
7869
7870       if (writemask_0) {
7871          struct tgsi_full_src_register zero = int_tex ?
7872             make_immediate_reg_int(emit, 0) :
7873             make_immediate_reg_float(emit, 0.0f);
7874          struct tgsi_full_dst_register dst =
7875             writemask_dst(swz->inst_dst, writemask_0);
7876
7877          /* MOV dst.writemask_0, {0,0,0,0} */
7878          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
7879       }
7880
7881       /* handle swizzle one terms */
7882       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
7883                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
7884                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
7885                      ((swz_a == PIPE_SWIZZLE_1) << 3));
7886       writemask_1 &= swz->inst_dst->Register.WriteMask;
7887
7888       if (writemask_1) {
7889          struct tgsi_full_src_register one = int_tex ?
7890             make_immediate_reg_int(emit, 1) :
7891             make_immediate_reg_float(emit, 1.0f);
7892          struct tgsi_full_dst_register dst =
7893             writemask_dst(swz->inst_dst, writemask_1);
7894
7895          /* MOV dst.writemask_1, {1,1,1,1} */
7896          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
7897       }
7898    }
7899 }
7900
7901
7902 /**
7903  * Emit code for TGSI_OPCODE_SAMPLE instruction.
7904  */
7905 static boolean
7906 emit_sample(struct svga_shader_emitter_v10 *emit,
7907             const struct tgsi_full_instruction *inst)
7908 {
7909    const unsigned resource_unit = inst->Src[1].Register.Index;
7910    const unsigned sampler_unit = inst->Src[2].Register.Index;
7911    struct tgsi_full_src_register coord;
7912    int offsets[3];
7913    struct tex_swizzle_info swz_info;
7914
7915    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
7916
7917    get_texel_offsets(emit, inst, offsets);
7918
7919    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
7920
7921    /* SAMPLE dst, coord(s0), resource, sampler */
7922    begin_emit_instruction(emit);
7923
7924    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7925     * with LOD=0.  But our virtual GPU accepts this as-is.
7926     */
7927    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
7928                       inst->Instruction.Saturate, offsets);
7929    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
7930    emit_src_register(emit, &coord);
7931    emit_resource_register(emit, resource_unit);
7932    emit_sampler_register(emit, sampler_unit);
7933    end_emit_instruction(emit);
7934
7935    end_tex_swizzle(emit, &swz_info);
7936
7937    free_temp_indexes(emit);
7938
7939    return TRUE;
7940 }
7941
7942
7943 /**
7944  * Check if a texture instruction is valid.
7945  * An example of an invalid texture instruction is doing shadow comparison
7946  * with an integer-valued texture.
7947  * If we detect an invalid texture instruction, we replace it with:
7948  *   MOV dst, {1,1,1,1};
7949  * \return TRUE if valid, FALSE if invalid.
7950  */
7951 static boolean
7952 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
7953                          const struct tgsi_full_instruction *inst)
7954 {
7955    const unsigned unit = inst->Src[1].Register.Index;
7956    const enum tgsi_texture_type target = inst->Texture.Texture;
7957    boolean valid = TRUE;
7958
7959    if (tgsi_is_shadow_target(target) &&
7960        is_integer_type(emit->sampler_return_type[unit])) {
7961       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
7962       valid = FALSE;
7963    }
7964    /* XXX might check for other conditions in the future here */
7965
7966    if (!valid) {
7967       /* emit a MOV dst, {1,1,1,1} instruction. */
7968       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7969       begin_emit_instruction(emit);
7970       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
7971       emit_dst_register(emit, &inst->Dst[0]);
7972       emit_src_register(emit, &one);
7973       end_emit_instruction(emit);
7974    }
7975
7976    return valid;
7977 }
7978
7979
7980 /**
7981  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
7982  */
7983 static boolean
7984 emit_tex(struct svga_shader_emitter_v10 *emit,
7985          const struct tgsi_full_instruction *inst)
7986 {
7987    const uint unit = inst->Src[1].Register.Index;
7988    const enum tgsi_texture_type target = inst->Texture.Texture;
7989    VGPU10_OPCODE_TYPE opcode;
7990    struct tgsi_full_src_register coord;
7991    int offsets[3];
7992    struct tex_swizzle_info swz_info;
7993
7994    /* check that the sampler returns a float */
7995    if (!is_valid_tex_instruction(emit, inst))
7996       return TRUE;
7997
7998    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
7999
8000    get_texel_offsets(emit, inst, offsets);
8001
8002    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8003
8004    /* SAMPLE dst, coord(s0), resource, sampler */
8005    begin_emit_instruction(emit);
8006
8007    if (tgsi_is_shadow_target(target))
8008       opcode = VGPU10_OPCODE_SAMPLE_C;
8009    else
8010       opcode = VGPU10_OPCODE_SAMPLE;
8011
8012    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8013    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8014    emit_src_register(emit, &coord);
8015    emit_resource_register(emit, unit);
8016    emit_sampler_register(emit, unit);
8017    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8018       emit_tex_compare_refcoord(emit, target, &coord);
8019    }
8020    end_emit_instruction(emit);
8021
8022    end_tex_swizzle(emit, &swz_info);
8023
8024    free_temp_indexes(emit);
8025
8026    return TRUE;
8027 }
8028
8029 /**
8030  * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8031  */
8032 static boolean
8033 emit_tg4(struct svga_shader_emitter_v10 *emit,
8034          const struct tgsi_full_instruction *inst)
8035 {
8036    const uint unit = inst->Src[2].Register.Index;
8037    struct tgsi_full_src_register src;
8038    struct tgsi_full_src_register offset_src, sampler, ref;
8039    int offsets[3];
8040
8041    /* check that the sampler returns a float */
8042    if (!is_valid_tex_instruction(emit, inst))
8043       return TRUE;
8044
8045    if (emit->version >= 50) {
8046       unsigned target = inst->Texture.Texture;
8047       int index = inst->Src[1].Register.Index;
8048       const union tgsi_immediate_data *imm = emit->immediates[index];
8049       int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8050       unsigned select_swizzle = PIPE_SWIZZLE_X;
8051
8052       if (!tgsi_is_shadow_target(target)) {
8053          switch (select_comp) {
8054          case 0:
8055             select_swizzle = emit->key.tex[unit].swizzle_r;
8056             break;
8057          case 1:
8058             select_swizzle = emit->key.tex[unit].swizzle_g;
8059             break;
8060          case 2:
8061             select_swizzle = emit->key.tex[unit].swizzle_b;
8062             break;
8063          case 3:
8064             select_swizzle = emit->key.tex[unit].swizzle_a;
8065             break;
8066          default:
8067             assert(!"Unexpected component in texture gather swizzle");
8068          }
8069       }
8070       else {
8071          select_swizzle = emit->key.tex[unit].swizzle_r;
8072       }
8073
8074       if (select_swizzle == PIPE_SWIZZLE_1) {
8075          src = make_immediate_reg_float(emit, 1.0);
8076          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8077          return TRUE;
8078       }
8079       else if (select_swizzle == PIPE_SWIZZLE_0) {
8080          src = make_immediate_reg_float(emit, 0.0);
8081          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8082          return TRUE;
8083       }
8084
8085       src = setup_texcoord(emit, unit, &inst->Src[0]);
8086
8087       /* GATHER4 dst, coord, resource, sampler */
8088       /* GATHER4_C dst, coord, resource, sampler ref */
8089       /* GATHER4_PO dst, coord, offset resource, sampler */
8090       /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8091       begin_emit_instruction(emit);
8092       if (inst->Texture.NumOffsets == 1) {
8093          if (tgsi_is_shadow_target(target)) {
8094             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8095                         inst->Instruction.Saturate);
8096          }
8097          else {
8098             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8099                         inst->Instruction.Saturate);
8100          }
8101       }
8102       else {
8103          if (tgsi_is_shadow_target(target)) {
8104             emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8105                         inst->Instruction.Saturate);
8106          }
8107          else {
8108             emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8109                         inst->Instruction.Saturate);
8110          }
8111       }
8112
8113       emit_dst_register(emit, &inst->Dst[0]);
8114       emit_src_register(emit, &src);
8115       if (inst->Texture.NumOffsets == 1) {
8116          /* offset */
8117          offset_src = make_src_reg(inst->TexOffsets[0].File,
8118                                    inst->TexOffsets[0].Index);
8119          offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8120                                   inst->TexOffsets[0].SwizzleY,
8121                                   inst->TexOffsets[0].SwizzleZ,
8122                                   TGSI_SWIZZLE_W);
8123          emit_src_register(emit, &offset_src);
8124       }
8125
8126       /* resource */
8127       emit_resource_register(emit, unit);
8128
8129       /* sampler */
8130       sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8131       sampler.Register.SwizzleX =
8132       sampler.Register.SwizzleY =
8133       sampler.Register.SwizzleZ =
8134       sampler.Register.SwizzleW = select_swizzle;
8135       emit_src_register(emit, &sampler);
8136
8137       if (tgsi_is_shadow_target(target)) {
8138          /* ref */
8139          if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8140             ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8141             emit_tex_compare_refcoord(emit, target, &ref);
8142          }
8143          else {
8144             emit_tex_compare_refcoord(emit, target, &src);
8145          }
8146       }
8147
8148       end_emit_instruction(emit);
8149       free_temp_indexes(emit);
8150    }
8151    else {
8152       /* Only a single channel is supported in SM4_1 and we report
8153        * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8154        * Only the 0th component will be gathered.
8155        */
8156       switch (emit->key.tex[unit].swizzle_r) {
8157       case PIPE_SWIZZLE_X:
8158          get_texel_offsets(emit, inst, offsets);
8159          src = setup_texcoord(emit, unit, &inst->Src[0]);
8160
8161          /* Gather dst, coord, resource, sampler */
8162          begin_emit_instruction(emit);
8163          emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8164                             inst->Instruction.Saturate, offsets);
8165          emit_dst_register(emit, &inst->Dst[0]);
8166          emit_src_register(emit, &src);
8167          emit_resource_register(emit, unit);
8168
8169          /* sampler */
8170          sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8171          sampler.Register.SwizzleX =
8172          sampler.Register.SwizzleY =
8173          sampler.Register.SwizzleZ =
8174          sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8175          emit_src_register(emit, &sampler);
8176
8177          end_emit_instruction(emit);
8178          break;
8179       case PIPE_SWIZZLE_W:
8180       case PIPE_SWIZZLE_1:
8181          src = make_immediate_reg_float(emit, 1.0);
8182          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8183          break;
8184       case PIPE_SWIZZLE_Y:
8185       case PIPE_SWIZZLE_Z:
8186       case PIPE_SWIZZLE_0:
8187       default:
8188          src = make_immediate_reg_float(emit, 0.0);
8189          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8190          break;
8191       }
8192    }
8193
8194    return TRUE;
8195 }
8196
8197
8198
8199 /**
8200  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8201  */
8202 static boolean
8203 emit_tex2(struct svga_shader_emitter_v10 *emit,
8204          const struct tgsi_full_instruction *inst)
8205 {
8206    const uint unit = inst->Src[2].Register.Index;
8207    unsigned target = inst->Texture.Texture;
8208    struct tgsi_full_src_register coord, ref;
8209    int offsets[3];
8210    struct tex_swizzle_info swz_info;
8211
8212    /* check that the sampler returns a float */
8213    if (!is_valid_tex_instruction(emit, inst))
8214       return TRUE;
8215
8216    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8217
8218    get_texel_offsets(emit, inst, offsets);
8219
8220    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8221    ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8222
8223    /* SAMPLE_C dst, coord, resource, sampler, ref */
8224    begin_emit_instruction(emit);
8225    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
8226                       inst->Instruction.Saturate, offsets);
8227    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8228    emit_src_register(emit, &coord);
8229    emit_resource_register(emit, unit);
8230    emit_sampler_register(emit, unit);
8231    emit_tex_compare_refcoord(emit, target, &ref);
8232    end_emit_instruction(emit);
8233
8234    end_tex_swizzle(emit, &swz_info);
8235
8236    free_temp_indexes(emit);
8237
8238    return TRUE;
8239 }
8240
8241
8242 /**
8243  * Emit code for TGSI_OPCODE_TXP (projective texture)
8244  */
8245 static boolean
8246 emit_txp(struct svga_shader_emitter_v10 *emit,
8247          const struct tgsi_full_instruction *inst)
8248 {
8249    const uint unit = inst->Src[1].Register.Index;
8250    const enum tgsi_texture_type target = inst->Texture.Texture;
8251    VGPU10_OPCODE_TYPE opcode;
8252    int offsets[3];
8253    unsigned tmp = get_temp_index(emit);
8254    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8255    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8256    struct tgsi_full_src_register src0_wwww =
8257       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8258    struct tgsi_full_src_register coord;
8259    struct tex_swizzle_info swz_info;
8260
8261    /* check that the sampler returns a float */
8262    if (!is_valid_tex_instruction(emit, inst))
8263       return TRUE;
8264
8265    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8266
8267    get_texel_offsets(emit, inst, offsets);
8268
8269    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8270
8271    /* DIV tmp, coord, coord.wwww */
8272    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8273                         &coord, &src0_wwww);
8274
8275    /* SAMPLE dst, coord(tmp), resource, sampler */
8276    begin_emit_instruction(emit);
8277
8278    if (tgsi_is_shadow_target(target))
8279       /* NOTE: for non-fragment shaders, we should use
8280        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8281        */
8282       opcode = VGPU10_OPCODE_SAMPLE_C;
8283    else
8284       opcode = VGPU10_OPCODE_SAMPLE;
8285
8286    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8287    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8288    emit_src_register(emit, &tmp_src);  /* projected coord */
8289    emit_resource_register(emit, unit);
8290    emit_sampler_register(emit, unit);
8291    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8292       emit_tex_compare_refcoord(emit, target, &tmp_src);
8293    }
8294    end_emit_instruction(emit);
8295
8296    end_tex_swizzle(emit, &swz_info);
8297
8298    free_temp_indexes(emit);
8299
8300    return TRUE;
8301 }
8302
8303
8304 /**
8305  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8306  */
8307 static boolean
8308 emit_txd(struct svga_shader_emitter_v10 *emit,
8309          const struct tgsi_full_instruction *inst)
8310 {
8311    const uint unit = inst->Src[3].Register.Index;
8312    const enum tgsi_texture_type target = inst->Texture.Texture;
8313    int offsets[3];
8314    struct tgsi_full_src_register coord;
8315    struct tex_swizzle_info swz_info;
8316
8317    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8318                      &swz_info);
8319
8320    get_texel_offsets(emit, inst, offsets);
8321
8322    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8323
8324    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8325    begin_emit_instruction(emit);
8326    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
8327                       inst->Instruction.Saturate, offsets);
8328    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8329    emit_src_register(emit, &coord);
8330    emit_resource_register(emit, unit);
8331    emit_sampler_register(emit, unit);
8332    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
8333    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
8334    end_emit_instruction(emit);
8335
8336    end_tex_swizzle(emit, &swz_info);
8337
8338    free_temp_indexes(emit);
8339
8340    return TRUE;
8341 }
8342
8343
8344 /**
8345  * Emit code for TGSI_OPCODE_TXF (texel fetch)
8346  */
8347 static boolean
8348 emit_txf(struct svga_shader_emitter_v10 *emit,
8349          const struct tgsi_full_instruction *inst)
8350 {
8351    const uint unit = inst->Src[1].Register.Index;
8352    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
8353       && emit->key.tex[unit].num_samples > 1;
8354    int offsets[3];
8355    struct tex_swizzle_info swz_info;
8356
8357    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8358
8359    get_texel_offsets(emit, inst, offsets);
8360
8361    if (msaa) {
8362       assert(emit->key.tex[unit].num_samples > 1);
8363
8364       /* Fetch one sample from an MSAA texture */
8365       struct tgsi_full_src_register sampleIndex =
8366          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8367       /* LD_MS dst, coord(s0), resource, sampleIndex */
8368       begin_emit_instruction(emit);
8369       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
8370                          inst->Instruction.Saturate, offsets);
8371       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8372       emit_src_register(emit, &inst->Src[0]);
8373       emit_resource_register(emit, unit);
8374       emit_src_register(emit, &sampleIndex);
8375       end_emit_instruction(emit);
8376    }
8377    else {
8378       /* Fetch one texel specified by integer coordinate */
8379       /* LD dst, coord(s0), resource */
8380       begin_emit_instruction(emit);
8381       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
8382                          inst->Instruction.Saturate, offsets);
8383       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8384       emit_src_register(emit, &inst->Src[0]);
8385       emit_resource_register(emit, unit);
8386       end_emit_instruction(emit);
8387    }
8388
8389    end_tex_swizzle(emit, &swz_info);
8390
8391    free_temp_indexes(emit);
8392
8393    return TRUE;
8394 }
8395
8396
8397 /**
8398  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8399  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8400  */
8401 static boolean
8402 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
8403              const struct tgsi_full_instruction *inst)
8404 {
8405    const enum tgsi_texture_type target = inst->Texture.Texture;
8406    VGPU10_OPCODE_TYPE opcode;
8407    unsigned unit;
8408    int offsets[3];
8409    struct tgsi_full_src_register coord, lod_bias;
8410    struct tex_swizzle_info swz_info;
8411
8412    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
8413           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
8414           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
8415
8416    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
8417       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8418       unit = inst->Src[2].Register.Index;
8419    }
8420    else {
8421       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8422       unit = inst->Src[1].Register.Index;
8423    }
8424
8425    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8426                      &swz_info);
8427
8428    get_texel_offsets(emit, inst, offsets);
8429
8430    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8431
8432    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8433    begin_emit_instruction(emit);
8434    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
8435       opcode = VGPU10_OPCODE_SAMPLE_L;
8436    }
8437    else {
8438       opcode = VGPU10_OPCODE_SAMPLE_B;
8439    }
8440    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8441    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8442    emit_src_register(emit, &coord);
8443    emit_resource_register(emit, unit);
8444    emit_sampler_register(emit, unit);
8445    emit_src_register(emit, &lod_bias);
8446    end_emit_instruction(emit);
8447
8448    end_tex_swizzle(emit, &swz_info);
8449
8450    free_temp_indexes(emit);
8451
8452    return TRUE;
8453 }
8454
8455
8456 /**
8457  * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8458  */
8459 static boolean
8460 emit_txl2(struct svga_shader_emitter_v10 *emit,
8461           const struct tgsi_full_instruction *inst)
8462 {
8463    unsigned target = inst->Texture.Texture;
8464    unsigned opcode, unit;
8465    int offsets[3];
8466    struct tgsi_full_src_register coord, lod;
8467    struct tex_swizzle_info swz_info;
8468
8469    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
8470
8471    lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8472    unit = inst->Src[2].Register.Index;
8473
8474    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8475                      &swz_info);
8476
8477    get_texel_offsets(emit, inst, offsets);
8478
8479    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8480
8481    /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8482    begin_emit_instruction(emit);
8483    opcode = VGPU10_OPCODE_SAMPLE_L;
8484    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8485    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8486    emit_src_register(emit, &coord);
8487    emit_resource_register(emit, unit);
8488    emit_sampler_register(emit, unit);
8489    emit_src_register(emit, &lod);
8490    end_emit_instruction(emit);
8491
8492    end_tex_swizzle(emit, &swz_info);
8493
8494    free_temp_indexes(emit);
8495
8496    return TRUE;
8497 }
8498
8499
8500 /**
8501  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8502  */
8503 static boolean
8504 emit_txq(struct svga_shader_emitter_v10 *emit,
8505          const struct tgsi_full_instruction *inst)
8506 {
8507    const uint unit = inst->Src[1].Register.Index;
8508
8509    if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
8510       /* RESINFO does not support querying texture buffers, so we instead
8511        * store texture buffer sizes in shader constants, then copy them to
8512        * implement TXQ instead of emitting RESINFO.
8513        * MOV dst, const[texture_buffer_size_index[unit]]
8514        */
8515       struct tgsi_full_src_register size_src =
8516          make_src_const_reg(emit->texture_buffer_size_index[unit]);
8517       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
8518    } else {
8519       /* RESINFO dst, srcMipLevel, resource */
8520       begin_emit_instruction(emit);
8521       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
8522       emit_dst_register(emit, &inst->Dst[0]);
8523       emit_src_register(emit, &inst->Src[0]);
8524       emit_resource_register(emit, unit);
8525       end_emit_instruction(emit);
8526    }
8527
8528    free_temp_indexes(emit);
8529
8530    return TRUE;
8531 }
8532
8533
8534 /**
8535  * Does this opcode produce a double-precision result?
8536  * XXX perhaps move this to a TGSI utility.
8537  */
8538 static bool
8539 opcode_has_dbl_dst(unsigned opcode)
8540 {
8541    switch (opcode) {
8542    case TGSI_OPCODE_F2D:
8543    case TGSI_OPCODE_DABS:
8544    case TGSI_OPCODE_DADD:
8545    case TGSI_OPCODE_DFRAC:
8546    case TGSI_OPCODE_DMAX:
8547    case TGSI_OPCODE_DMIN:
8548    case TGSI_OPCODE_DMUL:
8549    case TGSI_OPCODE_DNEG:
8550    case TGSI_OPCODE_I2D:
8551    case TGSI_OPCODE_U2D:
8552       // XXX more TBD
8553       return true;
8554    default:
8555       return false;
8556    }
8557 }
8558
8559
8560 /**
8561  * Does this opcode use double-precision source registers?
8562  */
8563 static bool
8564 opcode_has_dbl_src(unsigned opcode)
8565 {
8566    switch (opcode) {
8567    case TGSI_OPCODE_D2F:
8568    case TGSI_OPCODE_DABS:
8569    case TGSI_OPCODE_DADD:
8570    case TGSI_OPCODE_DFRAC:
8571    case TGSI_OPCODE_DMAX:
8572    case TGSI_OPCODE_DMIN:
8573    case TGSI_OPCODE_DMUL:
8574    case TGSI_OPCODE_DNEG:
8575    case TGSI_OPCODE_D2I:
8576    case TGSI_OPCODE_D2U:
8577       // XXX more TBD
8578       return true;
8579    default:
8580       return false;
8581    }
8582 }
8583
8584
8585 /**
8586  * Check that the swizzle for reading from a double-precision register
8587  * is valid.
8588  */
8589 static void
8590 check_double_src_swizzle(const struct tgsi_full_src_register *reg)
8591 {
8592    assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
8593            reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
8594           (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
8595            reg->Register.SwizzleY == PIPE_SWIZZLE_W));
8596
8597    assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
8598            reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
8599           (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
8600            reg->Register.SwizzleW == PIPE_SWIZZLE_W));
8601 }
8602
8603
8604 /**
8605  * Check that the writemask for a double-precision instruction is valid.
8606  */
8607 static void
8608 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
8609 {
8610    ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
8611
8612    switch (inst->Instruction.Opcode) {
8613    case TGSI_OPCODE_DABS:
8614    case TGSI_OPCODE_DADD:
8615    case TGSI_OPCODE_DFRAC:
8616    case TGSI_OPCODE_DNEG:
8617    case TGSI_OPCODE_DMAD:
8618    case TGSI_OPCODE_DMAX:
8619    case TGSI_OPCODE_DMIN:
8620    case TGSI_OPCODE_DMUL:
8621    case TGSI_OPCODE_DRCP:
8622    case TGSI_OPCODE_DSQRT:
8623    case TGSI_OPCODE_F2D:
8624       assert(writemask == TGSI_WRITEMASK_XYZW ||
8625              writemask == TGSI_WRITEMASK_XY ||
8626              writemask == TGSI_WRITEMASK_ZW);
8627       break;
8628    case TGSI_OPCODE_DSEQ:
8629    case TGSI_OPCODE_DSGE:
8630    case TGSI_OPCODE_DSNE:
8631    case TGSI_OPCODE_DSLT:
8632    case TGSI_OPCODE_D2I:
8633    case TGSI_OPCODE_D2U:
8634       /* Write to 1 or 2 components only */
8635       assert(util_bitcount(writemask) <= 2);
8636       break;
8637    default:
8638       /* XXX this list may be incomplete */
8639       ;
8640    }
8641 }
8642
8643
8644 /**
8645  * Double-precision absolute value.
8646  */
8647 static boolean
8648 emit_dabs(struct svga_shader_emitter_v10 *emit,
8649           const struct tgsi_full_instruction *inst)
8650 {
8651    assert(emit->version >= 50);
8652    check_double_src_swizzle(&inst->Src[0]);
8653    check_double_dst_writemask(inst);
8654
8655    struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
8656
8657    /* DMOV dst, |src| */
8658    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
8659
8660    return TRUE;
8661 }
8662
8663
8664 /**
8665  * Double-precision negation
8666  */
8667 static boolean
8668 emit_dneg(struct svga_shader_emitter_v10 *emit,
8669           const struct tgsi_full_instruction *inst)
8670 {
8671    assert(emit->version >= 50);
8672    check_double_src_swizzle(&inst->Src[0]);
8673    check_double_dst_writemask(inst);
8674
8675    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
8676
8677    /* DMOV dst, -src */
8678    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
8679
8680    return TRUE;
8681 }
8682
8683
8684 /**
8685  * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
8686  */
8687 static boolean
8688 emit_dmad(struct svga_shader_emitter_v10 *emit,
8689           const struct tgsi_full_instruction *inst)
8690 {
8691    assert(emit->version >= 50);
8692    check_double_src_swizzle(&inst->Src[0]);
8693    check_double_src_swizzle(&inst->Src[1]);
8694    check_double_src_swizzle(&inst->Src[2]);
8695    check_double_dst_writemask(inst);
8696
8697    unsigned tmp = get_temp_index(emit);
8698    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8699    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8700
8701    /* DMUL tmp, src[0], src[1] */
8702    emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
8703                         &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
8704                         FALSE, inst->Instruction.Precise);
8705
8706    /* DADD dst, tmp, src[2] */
8707    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
8708                         &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
8709                         inst->Instruction.Saturate, inst->Instruction.Precise);
8710    free_temp_indexes(emit);
8711
8712    return TRUE;
8713 }
8714
8715
8716 /**
8717  * Double precision reciprocal square root
8718  */
8719 static boolean
8720 emit_drsq(struct svga_shader_emitter_v10 *emit,
8721           const struct tgsi_full_dst_register *dst,
8722           const struct tgsi_full_src_register *src)
8723 {
8724    assert(emit->version >= 50);
8725
8726    VGPU10OpcodeToken0 token0;
8727    begin_emit_instruction(emit);
8728
8729    token0.value = 0;
8730    token0.opcodeType = VGPU10_OPCODE_VMWARE;
8731    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
8732    emit_dword(emit, token0.value);
8733
8734    emit_dst_register(emit, dst);
8735
8736    check_double_src_swizzle(src);
8737    emit_src_register(emit, src);
8738
8739    end_emit_instruction(emit);
8740
8741    return TRUE;
8742 }
8743
8744
8745 /**
8746  * There is no SM5 opcode for double precision square root.
8747  * It will be implemented with DRSQ.
8748  * dst = src * DRSQ(src)
8749  */
8750 static boolean
8751 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
8752           const struct tgsi_full_instruction *inst)
8753 {
8754    assert(emit->version >= 50);
8755
8756    check_double_src_swizzle(&inst->Src[0]);
8757
8758    /* temporary register to hold the source */
8759    unsigned tmp = get_temp_index(emit);
8760    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8761    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8762
8763    /* temporary register to hold the DEQ result */
8764    unsigned tmp_cond = get_temp_index(emit);
8765    struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
8766    struct tgsi_full_dst_register tmp_cond_dst_xy =
8767       writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8768    struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
8769    struct tgsi_full_src_register tmp_cond_src_xy =
8770          swizzle_src(&tmp_cond_src,
8771                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
8772                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
8773
8774    /* The reciprocal square root of zero yields INF.
8775     * So if the source is 0, we replace it with 1 in the tmp register.
8776     * The later multiplication of zero in the original source will yield 0
8777     * in the result.
8778     */
8779
8780    /* tmp1 = (src == 0) ? 1 : src;
8781     *   EQ tmp1, 0, src
8782     *   MOVC tmp, tmp1, 1.0, src
8783     */
8784    struct tgsi_full_src_register zero =
8785                make_immediate_reg_double(emit, 0);
8786
8787    struct tgsi_full_src_register one =
8788                make_immediate_reg_double(emit, 1.0);
8789
8790    emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
8791                         &zero, &inst->Src[0]);
8792    emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
8793                         &tmp_cond_src_xy, &one, &inst->Src[0]);
8794
8795    struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
8796    struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
8797
8798    /* DRSQ tmp_rsq, tmp */
8799    emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
8800
8801    /* DMUL dst, tmp_rsq, src[0] */
8802    emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
8803                         &tmp_rsq_src, &inst->Src[0]);
8804
8805    free_temp_indexes(emit);
8806
8807    return TRUE;
8808 }
8809
8810
8811 static boolean
8812 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
8813                    const struct tgsi_full_instruction *inst)
8814 {
8815    assert(emit->version >= 50);
8816
8817    /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8818     * where (0,0) is the center of the pixel.  We need to translate that
8819     * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8820     * Also need to flip the Y axis (I think).
8821     */
8822    unsigned tmp = get_temp_index(emit);
8823    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8824    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8825    struct tgsi_full_dst_register tmp_dst_xy =
8826       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8827    struct tgsi_full_src_register const16 =
8828       make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
8829
8830    /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8831    emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
8832                         &tmp_dst_xy, &inst->Src[1], &const16);
8833
8834    /* FTOI tmp.xy, tmp */
8835    emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
8836
8837    /* EVAL_SNAPPED dst, src0, tmp */
8838    emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
8839                         &inst->Dst[0], &inst->Src[0], &tmp_src);
8840
8841    free_temp_indexes(emit);
8842
8843    return TRUE;
8844 }
8845
8846
8847 /**
8848  * Emit a simple instruction (like ADD, MUL, MIN, etc).
8849  */
8850 static boolean
8851 emit_simple(struct svga_shader_emitter_v10 *emit,
8852             const struct tgsi_full_instruction *inst)
8853 {
8854    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8855    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
8856    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
8857    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
8858    unsigned i;
8859
8860    if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
8861       emit->current_loop_depth++;
8862    }
8863    else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
8864       emit->current_loop_depth--;
8865    }
8866
8867    begin_emit_instruction(emit);
8868    emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
8869                        inst->Instruction.Saturate,
8870                        inst->Instruction.Precise);
8871    for (i = 0; i < op->num_dst; i++) {
8872       if (dbl_dst) {
8873          check_double_dst_writemask(inst);
8874       }
8875       emit_dst_register(emit, &inst->Dst[i]);
8876    }
8877    for (i = 0; i < op->num_src; i++) {
8878       if (dbl_src) {
8879          check_double_src_swizzle(&inst->Src[i]);
8880       }
8881       emit_src_register(emit, &inst->Src[i]);
8882    }
8883    end_emit_instruction(emit);
8884
8885    return TRUE;
8886 }
8887
8888
8889 /**
8890  * Emit MSB instruction (like IMSB, UMSB).
8891  *
8892  * GLSL returns the index starting from the LSB;
8893  * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8894  * To get correct location as per glsl from SM5 device, we should
8895  * return (31 - index) if returned index is not -1.
8896  */
8897 static boolean
8898 emit_msb(struct svga_shader_emitter_v10 *emit,
8899          const struct tgsi_full_instruction *inst)
8900 {
8901    const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
8902
8903    assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
8904
8905    struct tgsi_full_src_register index_src =
8906       make_src_reg(index_dst->Register.File, index_dst->Register.Index);
8907    struct tgsi_full_src_register imm31 =
8908       make_immediate_reg_int(emit, 31);
8909    imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
8910    struct tgsi_full_src_register neg_one =
8911       make_immediate_reg_int(emit, -1);
8912    neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
8913    unsigned tmp = get_temp_index(emit);
8914    const struct tgsi_full_dst_register tmp_dst =
8915       make_dst_temp_reg(tmp);
8916    const struct tgsi_full_dst_register tmp_dst_x =
8917       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8918    const struct tgsi_full_src_register tmp_src_x =
8919        make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
8920    int writemask = TGSI_WRITEMASK_X;
8921    int src_swizzle = TGSI_SWIZZLE_X;
8922    int dst_writemask = index_dst->Register.WriteMask;
8923
8924    emit_simple(emit, inst);
8925
8926    /* index conversion from SM5 to GLSL */
8927    while (writemask & dst_writemask) {
8928       struct tgsi_full_src_register index_src_comp =
8929          scalar_src(&index_src, src_swizzle);
8930       struct tgsi_full_dst_register index_dst_comp =
8931          writemask_dst(index_dst, writemask);
8932
8933       /* check if index_src_comp != -1 */
8934       emit_instruction_op2(emit, VGPU10_OPCODE_INE,
8935                            &tmp_dst_x, &index_src_comp, &neg_one);
8936
8937       /* if */
8938       emit_if(emit, &tmp_src_x);
8939
8940       index_src_comp = negate_src(&index_src_comp);
8941       /* SUB DST, IMM{31}, DST */
8942       emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
8943                            &index_dst_comp, &imm31, &index_src_comp);
8944
8945       /* endif */
8946       emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
8947
8948       writemask = writemask << 1;
8949       src_swizzle = src_swizzle + 1;
8950    }
8951    free_temp_indexes(emit);
8952    return TRUE;
8953 }
8954
8955
8956 /**
8957  * Emit a BFE instruction (like UBFE, IBFE).
8958  * tgsi representation:
8959  * U/IBFE dst, value, offset, width
8960  * SM5 representation:
8961  * U/IBFE dst, width, offset, value
8962  * Note: SM5 has width & offset range (0-31);
8963  *      whereas GLSL has width & offset range (0-32)
8964  */
8965 static boolean
8966 emit_bfe(struct svga_shader_emitter_v10 *emit,
8967          const struct tgsi_full_instruction *inst)
8968 {
8969    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8970    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
8971    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
8972    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
8973    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
8974
8975    unsigned tmp1 = get_temp_index(emit);
8976    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
8977    const struct tgsi_full_dst_register cond1_dst_x =
8978       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
8979    const struct tgsi_full_src_register cond1_src_x =
8980       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
8981
8982    unsigned tmp2 = get_temp_index(emit);
8983    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
8984    const struct tgsi_full_dst_register cond2_dst_x =
8985       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
8986    const struct tgsi_full_src_register cond2_src_x =
8987       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
8988
8989    /**
8990     * In SM5, when width = 32  and offset = 0, it returns 0.
8991     * On the other hand GLSL, expects value to be copied as it is, to dst.
8992     */
8993
8994    /* cond1 = width ! = 32 */
8995    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
8996                         &cond1_dst_x, &inst->Src[2], &imm32);
8997
8998    /* cond2 = offset ! = 0 */
8999    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9000                         &cond2_dst_x, &inst->Src[1], &zero);
9001
9002    /* cond 2 = cond1 & cond 2 */
9003    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9004                         &cond2_src_x,
9005                         &cond1_src_x);
9006    /* IF */
9007    emit_if(emit, &cond2_src_x);
9008
9009    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9010                         &inst->Src[0]);
9011
9012    /* ELSE */
9013    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9014
9015    /* U/IBFE dst, width, offset, value */
9016    emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9017                         &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9018
9019    /* ENDIF */
9020    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9021
9022    free_temp_indexes(emit);
9023    return TRUE;
9024 }
9025
9026
9027 /**
9028  * Emit BFI  instruction
9029  * tgsi representation:
9030  * BFI dst, base, insert, offset, width
9031  * SM5 representation:
9032  * BFI dst, width, offset, insert, base
9033  * Note: SM5 has width & offset range (0-31);
9034  *      whereas GLSL has width & offset range (0-32)
9035  */
9036 static boolean
9037 emit_bfi(struct svga_shader_emitter_v10 *emit,
9038          const struct tgsi_full_instruction *inst)
9039 {
9040    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9041    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9042    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9043
9044    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9045    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9046
9047    unsigned tmp1 = get_temp_index(emit);
9048    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9049    const struct tgsi_full_dst_register cond1_dst_x =
9050       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9051    const struct tgsi_full_src_register cond1_src_x =
9052       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9053
9054    unsigned tmp2 = get_temp_index(emit);
9055    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9056    const struct tgsi_full_dst_register cond2_dst_x =
9057       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9058    const struct tgsi_full_src_register cond2_src_x =
9059       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9060
9061    /**
9062     * In SM5, when width = 32  and offset = 0, it returns 0.
9063     * On the other hand GLSL, expects insert to be copied as it is, to dst.
9064     */
9065
9066    /* cond1 = width == 32 */
9067    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9068                         &cond1_dst_x, &inst->Src[3], &imm32);
9069
9070    /* cond1 = offset == 0 */
9071    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9072                         &cond2_dst_x, &inst->Src[2], &zero);
9073
9074    /* cond2 = cond1 & cond2 */
9075    emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9076                         &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9077
9078    /* if */
9079    emit_if(emit, &cond2_src_x);
9080
9081    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9082                         &inst->Src[1]);
9083
9084    /* else */
9085    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9086
9087    /* BFI dst, width, offset, insert, base */
9088    begin_emit_instruction(emit);
9089    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9090    emit_dst_register(emit, &inst->Dst[0]);
9091    emit_src_register(emit, &inst->Src[3]);
9092    emit_src_register(emit, &inst->Src[2]);
9093    emit_src_register(emit, &inst->Src[1]);
9094    emit_src_register(emit, &inst->Src[0]);
9095    end_emit_instruction(emit);
9096
9097    /* endif */
9098    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9099
9100    free_temp_indexes(emit);
9101    return TRUE;
9102 }
9103
9104
9105 /**
9106  * We only special case the MOV instruction to try to detect constant
9107  * color writes in the fragment shader.
9108  */
9109 static boolean
9110 emit_mov(struct svga_shader_emitter_v10 *emit,
9111          const struct tgsi_full_instruction *inst)
9112 {
9113    const struct tgsi_full_src_register *src = &inst->Src[0];
9114    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9115
9116    if (emit->unit == PIPE_SHADER_FRAGMENT &&
9117        dst->Register.File == TGSI_FILE_OUTPUT &&
9118        dst->Register.Index == 0 &&
9119        src->Register.File == TGSI_FILE_CONSTANT &&
9120        !src->Register.Indirect) {
9121       emit->constant_color_output = TRUE;
9122    }
9123
9124    return emit_simple(emit, inst);
9125 }
9126
9127
9128 /**
9129  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9130  * where TGSI only uses one dest register.
9131  */
9132 static boolean
9133 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9134                  const struct tgsi_full_instruction *inst,
9135                  unsigned dst_count,
9136                  unsigned dst_index)
9137 {
9138    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9139    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9140    unsigned i;
9141
9142    begin_emit_instruction(emit);
9143    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9144
9145    for (i = 0; i < dst_count; i++) {
9146       if (i == dst_index) {
9147          emit_dst_register(emit, &inst->Dst[0]);
9148       } else {
9149          emit_null_dst_register(emit);
9150       }
9151    }
9152
9153    for (i = 0; i < op->num_src; i++) {
9154       emit_src_register(emit, &inst->Src[i]);
9155    }
9156    end_emit_instruction(emit);
9157
9158    return TRUE;
9159 }
9160
9161
9162 /**
9163  * Emit a vmware specific VGPU10 instruction.
9164  */
9165 static boolean
9166 emit_vmware(struct svga_shader_emitter_v10 *emit,
9167             const struct tgsi_full_instruction *inst,
9168             VGPU10_VMWARE_OPCODE_TYPE subopcode)
9169 {
9170    VGPU10OpcodeToken0 token0;
9171    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9172    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9173    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9174    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9175
9176    unsigned i;
9177
9178    begin_emit_instruction(emit);
9179
9180    assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9181
9182    token0.value = 0;
9183    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9184    token0.vmwareOpcodeType = subopcode;
9185    emit_dword(emit, token0.value);
9186
9187    if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9188       /* IDIV only uses the first dest register. */
9189       emit_dst_register(emit, &inst->Dst[0]);
9190       emit_null_dst_register(emit);
9191    } else {
9192       for (i = 0; i < op->num_dst; i++) {
9193          if (dbl_dst) {
9194             check_double_dst_writemask(inst);
9195          }
9196          emit_dst_register(emit, &inst->Dst[i]);
9197       }
9198    }
9199
9200    for (i = 0; i < op->num_src; i++) {
9201       if (dbl_src) {
9202          check_double_src_swizzle(&inst->Src[i]);
9203       }
9204       emit_src_register(emit, &inst->Src[i]);
9205    }
9206    end_emit_instruction(emit);
9207
9208    return TRUE;
9209 }
9210
9211
9212 /**
9213  * Translate a single TGSI instruction to VGPU10.
9214  */
9215 static boolean
9216 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
9217                         unsigned inst_number,
9218                         const struct tgsi_full_instruction *inst)
9219 {
9220    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9221
9222    if (emit->skip_instruction)
9223       return TRUE;
9224
9225    switch (opcode) {
9226    case TGSI_OPCODE_ADD:
9227    case TGSI_OPCODE_AND:
9228    case TGSI_OPCODE_BGNLOOP:
9229    case TGSI_OPCODE_BRK:
9230    case TGSI_OPCODE_CEIL:
9231    case TGSI_OPCODE_CONT:
9232    case TGSI_OPCODE_DDX:
9233    case TGSI_OPCODE_DDY:
9234    case TGSI_OPCODE_DIV:
9235    case TGSI_OPCODE_DP2:
9236    case TGSI_OPCODE_DP3:
9237    case TGSI_OPCODE_DP4:
9238    case TGSI_OPCODE_ELSE:
9239    case TGSI_OPCODE_ENDIF:
9240    case TGSI_OPCODE_ENDLOOP:
9241    case TGSI_OPCODE_ENDSUB:
9242    case TGSI_OPCODE_F2I:
9243    case TGSI_OPCODE_F2U:
9244    case TGSI_OPCODE_FLR:
9245    case TGSI_OPCODE_FRC:
9246    case TGSI_OPCODE_FSEQ:
9247    case TGSI_OPCODE_FSGE:
9248    case TGSI_OPCODE_FSLT:
9249    case TGSI_OPCODE_FSNE:
9250    case TGSI_OPCODE_I2F:
9251    case TGSI_OPCODE_IMAX:
9252    case TGSI_OPCODE_IMIN:
9253    case TGSI_OPCODE_INEG:
9254    case TGSI_OPCODE_ISGE:
9255    case TGSI_OPCODE_ISHR:
9256    case TGSI_OPCODE_ISLT:
9257    case TGSI_OPCODE_MAD:
9258    case TGSI_OPCODE_MAX:
9259    case TGSI_OPCODE_MIN:
9260    case TGSI_OPCODE_MUL:
9261    case TGSI_OPCODE_NOP:
9262    case TGSI_OPCODE_NOT:
9263    case TGSI_OPCODE_OR:
9264    case TGSI_OPCODE_UADD:
9265    case TGSI_OPCODE_USEQ:
9266    case TGSI_OPCODE_USGE:
9267    case TGSI_OPCODE_USLT:
9268    case TGSI_OPCODE_UMIN:
9269    case TGSI_OPCODE_UMAD:
9270    case TGSI_OPCODE_UMAX:
9271    case TGSI_OPCODE_ROUND:
9272    case TGSI_OPCODE_SQRT:
9273    case TGSI_OPCODE_SHL:
9274    case TGSI_OPCODE_TRUNC:
9275    case TGSI_OPCODE_U2F:
9276    case TGSI_OPCODE_UCMP:
9277    case TGSI_OPCODE_USHR:
9278    case TGSI_OPCODE_USNE:
9279    case TGSI_OPCODE_XOR:
9280    /* Begin SM5 opcodes */
9281    case TGSI_OPCODE_F2D:
9282    case TGSI_OPCODE_D2F:
9283    case TGSI_OPCODE_DADD:
9284    case TGSI_OPCODE_DMUL:
9285    case TGSI_OPCODE_DMAX:
9286    case TGSI_OPCODE_DMIN:
9287    case TGSI_OPCODE_DSGE:
9288    case TGSI_OPCODE_DSLT:
9289    case TGSI_OPCODE_DSEQ:
9290    case TGSI_OPCODE_DSNE:
9291    case TGSI_OPCODE_BREV:
9292    case TGSI_OPCODE_POPC:
9293    case TGSI_OPCODE_LSB:
9294    case TGSI_OPCODE_INTERP_CENTROID:
9295    case TGSI_OPCODE_INTERP_SAMPLE:
9296       /* simple instructions */
9297       return emit_simple(emit, inst);
9298    case TGSI_OPCODE_RET:
9299       if (emit->unit == PIPE_SHADER_TESS_CTRL &&
9300           !emit->tcs.control_point_phase) {
9301
9302          /* store the tessellation levels in the patch constant phase only */
9303          store_tesslevels(emit);
9304       }
9305       return emit_simple(emit, inst);
9306
9307    case TGSI_OPCODE_IMSB:
9308    case TGSI_OPCODE_UMSB:
9309       return emit_msb(emit, inst);
9310    case TGSI_OPCODE_IBFE:
9311    case TGSI_OPCODE_UBFE:
9312       return emit_bfe(emit, inst);
9313    case TGSI_OPCODE_BFI:
9314       return emit_bfi(emit, inst);
9315    case TGSI_OPCODE_MOV:
9316       return emit_mov(emit, inst);
9317    case TGSI_OPCODE_EMIT:
9318       return emit_vertex(emit, inst);
9319    case TGSI_OPCODE_ENDPRIM:
9320       return emit_endprim(emit, inst);
9321    case TGSI_OPCODE_IABS:
9322       return emit_iabs(emit, inst);
9323    case TGSI_OPCODE_ARL:
9324       /* fall-through */
9325    case TGSI_OPCODE_UARL:
9326       return emit_arl_uarl(emit, inst);
9327    case TGSI_OPCODE_BGNSUB:
9328       /* no-op */
9329       return TRUE;
9330    case TGSI_OPCODE_CAL:
9331       return emit_cal(emit, inst);
9332    case TGSI_OPCODE_CMP:
9333       return emit_cmp(emit, inst);
9334    case TGSI_OPCODE_COS:
9335       return emit_sincos(emit, inst);
9336    case TGSI_OPCODE_DST:
9337       return emit_dst(emit, inst);
9338    case TGSI_OPCODE_EX2:
9339       return emit_ex2(emit, inst);
9340    case TGSI_OPCODE_EXP:
9341       return emit_exp(emit, inst);
9342    case TGSI_OPCODE_IF:
9343       return emit_if(emit, &inst->Src[0]);
9344    case TGSI_OPCODE_KILL:
9345       return emit_kill(emit, inst);
9346    case TGSI_OPCODE_KILL_IF:
9347       return emit_kill_if(emit, inst);
9348    case TGSI_OPCODE_LG2:
9349       return emit_lg2(emit, inst);
9350    case TGSI_OPCODE_LIT:
9351       return emit_lit(emit, inst);
9352    case TGSI_OPCODE_LODQ:
9353       return emit_lodq(emit, inst);
9354    case TGSI_OPCODE_LOG:
9355       return emit_log(emit, inst);
9356    case TGSI_OPCODE_LRP:
9357       return emit_lrp(emit, inst);
9358    case TGSI_OPCODE_POW:
9359       return emit_pow(emit, inst);
9360    case TGSI_OPCODE_RCP:
9361       return emit_rcp(emit, inst);
9362    case TGSI_OPCODE_RSQ:
9363       return emit_rsq(emit, inst);
9364    case TGSI_OPCODE_SAMPLE:
9365       return emit_sample(emit, inst);
9366    case TGSI_OPCODE_SEQ:
9367       return emit_seq(emit, inst);
9368    case TGSI_OPCODE_SGE:
9369       return emit_sge(emit, inst);
9370    case TGSI_OPCODE_SGT:
9371       return emit_sgt(emit, inst);
9372    case TGSI_OPCODE_SIN:
9373       return emit_sincos(emit, inst);
9374    case TGSI_OPCODE_SLE:
9375       return emit_sle(emit, inst);
9376    case TGSI_OPCODE_SLT:
9377       return emit_slt(emit, inst);
9378    case TGSI_OPCODE_SNE:
9379       return emit_sne(emit, inst);
9380    case TGSI_OPCODE_SSG:
9381       return emit_ssg(emit, inst);
9382    case TGSI_OPCODE_ISSG:
9383       return emit_issg(emit, inst);
9384    case TGSI_OPCODE_TEX:
9385       return emit_tex(emit, inst);
9386    case TGSI_OPCODE_TG4:
9387       return emit_tg4(emit, inst);
9388    case TGSI_OPCODE_TEX2:
9389       return emit_tex2(emit, inst);
9390    case TGSI_OPCODE_TXP:
9391       return emit_txp(emit, inst);
9392    case TGSI_OPCODE_TXB:
9393    case TGSI_OPCODE_TXB2:
9394    case TGSI_OPCODE_TXL:
9395       return emit_txl_txb(emit, inst);
9396    case TGSI_OPCODE_TXD:
9397       return emit_txd(emit, inst);
9398    case TGSI_OPCODE_TXF:
9399       return emit_txf(emit, inst);
9400    case TGSI_OPCODE_TXL2:
9401       return emit_txl2(emit, inst);
9402    case TGSI_OPCODE_TXQ:
9403       return emit_txq(emit, inst);
9404    case TGSI_OPCODE_UIF:
9405       return emit_if(emit, &inst->Src[0]);
9406    case TGSI_OPCODE_UMUL_HI:
9407    case TGSI_OPCODE_IMUL_HI:
9408    case TGSI_OPCODE_UDIV:
9409       /* These cases use only the FIRST of two destination registers */
9410       return emit_simple_1dst(emit, inst, 2, 0);
9411    case TGSI_OPCODE_IDIV:
9412       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
9413    case TGSI_OPCODE_UMUL:
9414    case TGSI_OPCODE_UMOD:
9415    case TGSI_OPCODE_MOD:
9416       /* These cases use only the SECOND of two destination registers */
9417       return emit_simple_1dst(emit, inst, 2, 1);
9418
9419    /* Begin SM5 opcodes */
9420    case TGSI_OPCODE_DABS:
9421       return emit_dabs(emit, inst);
9422    case TGSI_OPCODE_DNEG:
9423       return emit_dneg(emit, inst);
9424    case TGSI_OPCODE_DRCP:
9425       return emit_simple(emit, inst);
9426    case TGSI_OPCODE_DSQRT:
9427       return emit_dsqrt(emit, inst);
9428    case TGSI_OPCODE_DMAD:
9429       return emit_dmad(emit, inst);
9430    case TGSI_OPCODE_DFRAC:
9431       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
9432    case TGSI_OPCODE_D2I:
9433    case TGSI_OPCODE_D2U:
9434       return emit_simple(emit, inst);
9435    case TGSI_OPCODE_I2D:
9436    case TGSI_OPCODE_U2D:
9437       return emit_simple(emit, inst);
9438    case TGSI_OPCODE_DRSQ:
9439       return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
9440    case TGSI_OPCODE_DDIV:
9441       return emit_simple(emit, inst);
9442    case TGSI_OPCODE_INTERP_OFFSET:
9443       return emit_interp_offset(emit, inst);
9444
9445    /* The following opcodes should never be seen here.  We return zero
9446     * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9447     * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9448     */
9449    case TGSI_OPCODE_FMA:
9450    case TGSI_OPCODE_LDEXP:
9451    case TGSI_OPCODE_DSSG:
9452    case TGSI_OPCODE_DFRACEXP:
9453    case TGSI_OPCODE_DLDEXP:
9454    case TGSI_OPCODE_DTRUNC:
9455    case TGSI_OPCODE_DCEIL:
9456    case TGSI_OPCODE_DFLR:
9457       debug_printf("Unexpected TGSI opcode %s.  "
9458                    "Should have been translated away by the GLSL compiler.\n",
9459                    tgsi_get_opcode_name(opcode));
9460       return FALSE;
9461
9462    case TGSI_OPCODE_LOAD:
9463    case TGSI_OPCODE_STORE:
9464    case TGSI_OPCODE_ATOMAND:
9465    case TGSI_OPCODE_ATOMCAS:
9466    case TGSI_OPCODE_ATOMIMAX:
9467    case TGSI_OPCODE_ATOMIMIN:
9468    case TGSI_OPCODE_ATOMOR:
9469    case TGSI_OPCODE_ATOMUADD:
9470    case TGSI_OPCODE_ATOMUMAX:
9471    case TGSI_OPCODE_ATOMUMIN:
9472    case TGSI_OPCODE_ATOMXCHG:
9473    case TGSI_OPCODE_ATOMXOR:
9474       return FALSE;
9475    case TGSI_OPCODE_BARRIER:
9476       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
9477          /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9478           * in shader, don't do anything for this opcode and continue rest
9479           * of shader translation
9480           */
9481          pipe_debug_message(&emit->svga_debug_callback, INFO,
9482                             "barrier instruction is not supported in tessellation control shader\n");
9483          return TRUE;
9484       }
9485       else {
9486          return emit_simple(emit, inst);
9487       }
9488
9489    case TGSI_OPCODE_END:
9490       if (!emit_post_helpers(emit))
9491          return FALSE;
9492       return emit_simple(emit, inst);
9493
9494    default:
9495       debug_printf("Unimplemented tgsi instruction %s\n",
9496                    tgsi_get_opcode_name(opcode));
9497       return FALSE;
9498    }
9499
9500    return TRUE;
9501 }
9502
9503
9504 /**
9505  * Emit the extra instructions to adjust the vertex position.
9506  * There are two possible adjustments:
9507  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9508  *    "prescale" and "pretranslate" values.
9509  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9510  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
9511  */
9512 static void
9513 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
9514 {
9515    struct tgsi_full_src_register tmp_pos_src;
9516    struct tgsi_full_dst_register pos_dst;
9517    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
9518
9519    /* Don't bother to emit any extra vertex instructions if vertex position is
9520     * not written out
9521     */
9522    if (emit->vposition.out_index == INVALID_INDEX)
9523       return;
9524
9525    /**
9526     * Reset the temporary vertex position register index
9527     * so that emit_dst_register() will use the real vertex position output
9528     */
9529    emit->vposition.tmp_index = INVALID_INDEX;
9530
9531    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
9532    pos_dst = make_dst_output_reg(emit->vposition.out_index);
9533
9534    /* If non-adjusted vertex position register index
9535     * is valid, copy the vertex position from the temporary
9536     * vertex position register before it is modified by the
9537     * prescale computation.
9538     */
9539    if (emit->vposition.so_index != INVALID_INDEX) {
9540       struct tgsi_full_dst_register pos_so_dst =
9541          make_dst_output_reg(emit->vposition.so_index);
9542
9543       /* MOV pos_so, tmp_pos */
9544       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
9545    }
9546
9547    if (emit->vposition.need_prescale) {
9548       /* This code adjusts the vertex position to match the VGPU10 convention.
9549        * If p is the position computed by the shader (usually by applying the
9550        * modelview and projection matrices), the new position q is computed by:
9551        *
9552        * q.x = p.w * trans.x + p.x * scale.x
9553        * q.y = p.w * trans.y + p.y * scale.y
9554        * q.z = p.w * trans.z + p.z * scale.z;
9555        * q.w = p.w * trans.w + p.w;
9556        */
9557       struct tgsi_full_src_register tmp_pos_src_w =
9558          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9559       struct tgsi_full_dst_register tmp_pos_dst =
9560          make_dst_temp_reg(vs_pos_tmp_index);
9561       struct tgsi_full_dst_register tmp_pos_dst_xyz =
9562          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
9563
9564       struct tgsi_full_src_register prescale_scale =
9565          make_src_temp_reg(emit->vposition.prescale_scale_index);
9566       struct tgsi_full_src_register prescale_trans =
9567          make_src_temp_reg(emit->vposition.prescale_trans_index);
9568
9569       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9570       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
9571                            &tmp_pos_src, &prescale_scale);
9572
9573       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9574       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
9575                            &prescale_trans, &tmp_pos_src);
9576    }
9577    else if (emit->key.vs.undo_viewport) {
9578       /* This code computes the final vertex position from the temporary
9579        * vertex position by undoing the viewport transformation and the
9580        * divide-by-W operation (we convert window coords back to clip coords).
9581        * This is needed when we use the 'draw' module for fallbacks.
9582        * If p is the temp pos in window coords, then the NDC coord q is:
9583        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9584        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9585        *   q.z = p.z * p.w
9586        *   q.w = p.w
9587        * CONST[vs_viewport_index] contains:
9588        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9589        */
9590       struct tgsi_full_dst_register tmp_pos_dst =
9591          make_dst_temp_reg(vs_pos_tmp_index);
9592       struct tgsi_full_dst_register tmp_pos_dst_xy =
9593          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
9594       struct tgsi_full_src_register tmp_pos_src_wwww =
9595          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9596
9597       struct tgsi_full_dst_register pos_dst_xyz =
9598          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
9599       struct tgsi_full_dst_register pos_dst_w =
9600          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
9601
9602       struct tgsi_full_src_register vp_xyzw =
9603          make_src_const_reg(emit->vs.viewport_index);
9604       struct tgsi_full_src_register vp_zwww =
9605          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
9606                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
9607
9608       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9609       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
9610                            &tmp_pos_src, &vp_zwww);
9611
9612       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9613       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
9614                            &tmp_pos_src, &vp_xyzw);
9615
9616       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9617       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
9618                            &tmp_pos_src, &tmp_pos_src_wwww);
9619
9620       /* MOV pos.w, tmp_pos.w */
9621       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
9622    }
9623    else if (vs_pos_tmp_index != INVALID_INDEX) {
9624       /* This code is to handle the case where the temporary vertex
9625        * position register is created when the vertex shader has stream
9626        * output and prescale is disabled because rasterization is to be
9627        * discarded.
9628        */
9629       struct tgsi_full_dst_register pos_dst =
9630          make_dst_output_reg(emit->vposition.out_index);
9631
9632       /* MOV pos, tmp_pos */
9633       begin_emit_instruction(emit);
9634       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9635       emit_dst_register(emit, &pos_dst);
9636       emit_src_register(emit, &tmp_pos_src);
9637       end_emit_instruction(emit);
9638    }
9639
9640    /* Restore original vposition.tmp_index value for the next GS vertex.
9641     * It doesn't matter for VS.
9642     */
9643    emit->vposition.tmp_index = vs_pos_tmp_index;
9644 }
9645
9646 static void
9647 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
9648 {
9649    if (emit->clip_mode == CLIP_DISTANCE) {
9650       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9651       emit_clip_distance_instructions(emit);
9652
9653    } else if (emit->clip_mode == CLIP_VERTEX &&
9654               emit->key.last_vertex_stage) {
9655       /* Convert TGSI CLIPVERTEX to CLIPDIST */
9656       emit_clip_vertex_instructions(emit);
9657    }
9658
9659    /**
9660     * Emit vertex position and take care of legacy user planes only if
9661     * there is a valid vertex position register index.
9662     * This is to take care of the case
9663     * where the shader doesn't output vertex position. Then in
9664     * this case, don't bother to emit more vertex instructions.
9665     */
9666    if (emit->vposition.out_index == INVALID_INDEX)
9667       return;
9668
9669    /**
9670     * Emit per-vertex clipping instructions for legacy user defined clip planes.
9671     * NOTE: we must emit the clip distance instructions before the
9672     * emit_vpos_instructions() call since the later function will change
9673     * the TEMP[vs_pos_tmp_index] value.
9674     */
9675    if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
9676       /* Emit CLIPDIST for legacy user defined clip planes */
9677       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
9678    }
9679 }
9680
9681
9682 /**
9683  * Emit extra per-vertex instructions.  This includes clip-coordinate
9684  * space conversion and computing clip distances.  This is called for
9685  * each GS emit-vertex instruction and at the end of VS translation.
9686  */
9687 static void
9688 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
9689 {
9690    /* Emit clipping instructions based on clipping mode */
9691    emit_clipping_instructions(emit);
9692
9693    /* Emit vertex position instructions */
9694    emit_vpos_instructions(emit);
9695 }
9696
9697
9698 /**
9699  * Translate the TGSI_OPCODE_EMIT GS instruction.
9700  */
9701 static boolean
9702 emit_vertex(struct svga_shader_emitter_v10 *emit,
9703             const struct tgsi_full_instruction *inst)
9704 {
9705    unsigned ret = TRUE;
9706
9707    assert(emit->unit == PIPE_SHADER_GEOMETRY);
9708
9709    /**
9710     * Emit the viewport array index for the first vertex.
9711     */
9712    if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
9713       struct tgsi_full_dst_register viewport_index_out =
9714          make_dst_output_reg(emit->gs.viewport_index_out_index);
9715       struct tgsi_full_dst_register viewport_index_out_x =
9716          writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
9717       struct tgsi_full_src_register viewport_index_tmp =
9718          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
9719
9720       /* Set the out index to INVALID_INDEX, so it will not
9721        * be assigned to a temp again in emit_dst_register, and
9722        * the viewport index will not be assigned again in the
9723        * subsequent vertices.
9724        */
9725       emit->gs.viewport_index_out_index = INVALID_INDEX;
9726       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9727                            &viewport_index_out_x, &viewport_index_tmp);
9728    }
9729
9730    /**
9731     * Find the stream index associated with this emit vertex instruction.
9732     */
9733    assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
9734    unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
9735
9736    /**
9737     * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9738     * outputs are always associated with vertex stream zero.
9739     * So emit the extra vertex instructions for position or clip distance
9740     * for stream zero only.
9741     */
9742    if (streamIndex == 0) {
9743       /**
9744        * Before emitting vertex instructions, emit the temporaries for
9745        * the prescale constants based on the viewport index if needed.
9746        */
9747       if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
9748          emit_temp_prescale_instructions(emit);
9749
9750       emit_vertex_instructions(emit);
9751    }
9752
9753    begin_emit_instruction(emit);
9754    if (emit->version >= 50) {
9755       if (emit->info.num_stream_output_components[streamIndex] == 0) {
9756          /**
9757           * If there is no output for this stream, discard this instruction.
9758           */
9759          emit->discard_instruction = TRUE;
9760       }
9761       else {
9762          emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
9763          emit_stream_register(emit, streamIndex);
9764       }
9765    }
9766    else {
9767       emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
9768    }
9769    end_emit_instruction(emit);
9770
9771    return ret;
9772 }
9773
9774
9775 /**
9776  * Emit the extra code to convert from VGPU10's boolean front-face
9777  * register to TGSI's signed front-face register.
9778  *
9779  * TODO: Make temporary front-face register a scalar.
9780  */
9781 static void
9782 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
9783 {
9784    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9785
9786    if (emit->fs.face_input_index != INVALID_INDEX) {
9787       /* convert vgpu10 boolean face register to gallium +/-1 value */
9788       struct tgsi_full_dst_register tmp_dst =
9789          make_dst_temp_reg(emit->fs.face_tmp_index);
9790       struct tgsi_full_src_register one =
9791          make_immediate_reg_float(emit, 1.0f);
9792       struct tgsi_full_src_register neg_one =
9793          make_immediate_reg_float(emit, -1.0f);
9794
9795       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9796       begin_emit_instruction(emit);
9797       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
9798       emit_dst_register(emit, &tmp_dst);
9799       emit_face_register(emit);
9800       emit_src_register(emit, &one);
9801       emit_src_register(emit, &neg_one);
9802       end_emit_instruction(emit);
9803    }
9804 }
9805
9806
9807 /**
9808  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9809  */
9810 static void
9811 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
9812 {
9813    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9814
9815    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
9816       struct tgsi_full_dst_register tmp_dst =
9817          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
9818       struct tgsi_full_dst_register tmp_dst_xyz =
9819          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
9820       struct tgsi_full_dst_register tmp_dst_w =
9821          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9822       struct tgsi_full_src_register one =
9823          make_immediate_reg_float(emit, 1.0f);
9824       struct tgsi_full_src_register fragcoord =
9825          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
9826
9827       /* save the input index */
9828       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
9829       /* set to invalid to prevent substitution in emit_src_register() */
9830       emit->fs.fragcoord_input_index = INVALID_INDEX;
9831
9832       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9833       begin_emit_instruction(emit);
9834       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9835       emit_dst_register(emit, &tmp_dst_xyz);
9836       emit_src_register(emit, &fragcoord);
9837       end_emit_instruction(emit);
9838
9839       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9840       begin_emit_instruction(emit);
9841       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
9842       emit_dst_register(emit, &tmp_dst_w);
9843       emit_src_register(emit, &one);
9844       emit_src_register(emit, &fragcoord);
9845       end_emit_instruction(emit);
9846
9847       /* restore saved value */
9848       emit->fs.fragcoord_input_index = fragcoord_input_index;
9849    }
9850 }
9851
9852
9853 /**
9854  * Emit the extra code to get the current sample position value and
9855  * put it into a temp register.
9856  */
9857 static void
9858 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
9859 {
9860    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9861
9862    if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
9863       assert(emit->version >= 41);
9864
9865       struct tgsi_full_dst_register tmp_dst =
9866          make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
9867       struct tgsi_full_src_register half =
9868          make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
9869
9870       struct tgsi_full_src_register tmp_src =
9871          make_src_temp_reg(emit->fs.sample_pos_tmp_index);
9872       struct tgsi_full_src_register sample_index_reg =
9873          make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
9874                              emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
9875
9876       /* The first src register is a shader resource (if we want a
9877        * multisampled resource sample position) or the rasterizer register
9878        * (if we want the current sample position in the color buffer).  We
9879        * want the later.
9880        */
9881
9882       /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9883       begin_emit_instruction(emit);
9884       emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
9885       emit_dst_register(emit, &tmp_dst);
9886       emit_rasterizer_register(emit);
9887       emit_src_register(emit, &sample_index_reg);
9888       end_emit_instruction(emit);
9889
9890       /* Convert from D3D coords to GL coords by adding 0.5 bias */
9891       /* ADD dst, dst, half */
9892       begin_emit_instruction(emit);
9893       emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
9894       emit_dst_register(emit, &tmp_dst);
9895       emit_src_register(emit, &tmp_src);
9896       emit_src_register(emit, &half);
9897       end_emit_instruction(emit);
9898    }
9899 }
9900
9901
9902 /**
9903  * Emit extra instructions to adjust VS inputs/attributes.  This can
9904  * mean casting a vertex attribute from int to float or setting the
9905  * W component to 1, or both.
9906  */
9907 static void
9908 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
9909 {
9910    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
9911    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
9912    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
9913    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
9914    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
9915    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
9916    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
9917
9918    unsigned adjust_mask = (save_w_1_mask |
9919                            save_itof_mask |
9920                            save_utof_mask |
9921                            save_is_bgra_mask |
9922                            save_puint_to_snorm_mask |
9923                            save_puint_to_uscaled_mask |
9924                            save_puint_to_sscaled_mask);
9925
9926    assert(emit->unit == PIPE_SHADER_VERTEX);
9927
9928    if (adjust_mask) {
9929       struct tgsi_full_src_register one =
9930          make_immediate_reg_float(emit, 1.0f);
9931
9932       struct tgsi_full_src_register one_int =
9933          make_immediate_reg_int(emit, 1);
9934
9935       /* We need to turn off these bitmasks while emitting the
9936        * instructions below, then restore them afterward.
9937        */
9938       emit->key.vs.adjust_attrib_w_1 = 0;
9939       emit->key.vs.adjust_attrib_itof = 0;
9940       emit->key.vs.adjust_attrib_utof = 0;
9941       emit->key.vs.attrib_is_bgra = 0;
9942       emit->key.vs.attrib_puint_to_snorm = 0;
9943       emit->key.vs.attrib_puint_to_uscaled = 0;
9944       emit->key.vs.attrib_puint_to_sscaled = 0;
9945
9946       while (adjust_mask) {
9947          unsigned index = u_bit_scan(&adjust_mask);
9948
9949          /* skip the instruction if this vertex attribute is not being used */
9950          if (emit->info.input_usage_mask[index] == 0)
9951             continue;
9952
9953          unsigned tmp = emit->vs.adjusted_input[index];
9954          struct tgsi_full_src_register input_src =
9955             make_src_reg(TGSI_FILE_INPUT, index);
9956
9957          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9958          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9959          struct tgsi_full_dst_register tmp_dst_w =
9960             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9961
9962          /* ITOF/UTOF/MOV tmp, input[index] */
9963          if (save_itof_mask & (1 << index)) {
9964             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
9965                                  &tmp_dst, &input_src);
9966          }
9967          else if (save_utof_mask & (1 << index)) {
9968             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
9969                                  &tmp_dst, &input_src);
9970          }
9971          else if (save_puint_to_snorm_mask & (1 << index)) {
9972             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
9973          }
9974          else if (save_puint_to_uscaled_mask & (1 << index)) {
9975             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
9976          }
9977          else if (save_puint_to_sscaled_mask & (1 << index)) {
9978             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
9979          }
9980          else {
9981             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
9982             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9983                                  &tmp_dst, &input_src);
9984          }
9985
9986          if (save_is_bgra_mask & (1 << index)) {
9987             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
9988          }
9989
9990          if (save_w_1_mask & (1 << index)) {
9991             /* MOV tmp.w, 1.0 */
9992             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
9993                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9994                                     &tmp_dst_w, &one_int);
9995             }
9996             else {
9997                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9998                                     &tmp_dst_w, &one);
9999             }
10000          }
10001       }
10002
10003       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
10004       emit->key.vs.adjust_attrib_itof = save_itof_mask;
10005       emit->key.vs.adjust_attrib_utof = save_utof_mask;
10006       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
10007       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
10008       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
10009       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
10010    }
10011 }
10012
10013
10014 /* Find zero-value immedate for default layer index */
10015 static void
10016 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
10017 {
10018    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10019
10020    /* immediate for default layer index 0 */
10021    if (emit->fs.layer_input_index != INVALID_INDEX) {
10022       union tgsi_immediate_data imm;
10023       imm.Int = 0;
10024       emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
10025    }
10026 }
10027
10028
10029 static void
10030 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10031                              unsigned cbuf_index,
10032                              struct tgsi_full_dst_register *scale,
10033                              struct tgsi_full_dst_register *translate)
10034 {
10035    struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
10036    struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
10037
10038    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
10039    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
10040 }
10041
10042
10043 /**
10044  * A recursive helper function to find the prescale from the constant buffer
10045  */
10046 static void
10047 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10048                         unsigned index, unsigned num_prescale,
10049                         struct tgsi_full_src_register *vp_index,
10050                         struct tgsi_full_dst_register *scale,
10051                         struct tgsi_full_dst_register *translate,
10052                         struct tgsi_full_src_register *tmp_src,
10053                         struct tgsi_full_dst_register *tmp_dst)
10054 {
10055    if (num_prescale == 0)
10056       return;
10057
10058    if (index > 0) {
10059       /* ELSE */
10060       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10061    }
10062
10063    struct tgsi_full_src_register index_src =
10064                                     make_immediate_reg_int(emit, index);
10065
10066    if (index == 0) {
10067       /* GE tmp, vp_index, index */
10068       emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
10069                            vp_index, &index_src);
10070    } else {
10071       /* EQ tmp, vp_index, index */
10072       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
10073                            vp_index, &index_src);
10074    }
10075
10076    /* IF tmp */
10077    emit_if(emit, tmp_src);
10078    emit_temp_prescale_from_cbuf(emit,
10079                                 emit->vposition.prescale_cbuf_index + 2 * index,
10080                                 scale, translate);
10081
10082    find_prescale_from_cbuf(emit, index+1, num_prescale-1,
10083                            vp_index, scale, translate,
10084                            tmp_src, tmp_dst);
10085
10086    /* ENDIF */
10087    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10088 }
10089
10090
10091 /**
10092  * This helper function emits instructions to set the prescale
10093  * and translate temporaries to the correct constants from the
10094  * constant buffer according to the designated viewport.
10095  */
10096 static void
10097 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
10098 {
10099    struct tgsi_full_dst_register prescale_scale =
10100          make_dst_temp_reg(emit->vposition.prescale_scale_index);
10101    struct tgsi_full_dst_register prescale_translate =
10102          make_dst_temp_reg(emit->vposition.prescale_trans_index);
10103
10104    unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
10105
10106    if (emit->vposition.num_prescale == 1) {
10107       emit_temp_prescale_from_cbuf(emit,
10108                                    prescale_cbuf_index,
10109                                    &prescale_scale, &prescale_translate);
10110    } else {
10111       /**
10112        * Since SM5 device does not support dynamic indexing, we need
10113        * to do the if-else to find the prescale constants for the
10114        * specified viewport.
10115        */
10116       struct tgsi_full_src_register vp_index_src =
10117          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
10118
10119       struct tgsi_full_src_register vp_index_src_x =
10120          scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
10121
10122       unsigned tmp = get_temp_index(emit);
10123       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10124       struct tgsi_full_src_register tmp_src_x =
10125                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10126       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10127
10128       find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
10129                               &vp_index_src_x,
10130                               &prescale_scale, &prescale_translate,
10131                               &tmp_src_x, &tmp_dst);
10132    }
10133
10134    /* Mark prescale temporaries are emitted */
10135    emit->vposition.have_prescale = 1;
10136 }
10137
10138
10139 /**
10140  * A helper function to emit an instruction in a vertex shader to add a bias
10141  * to the VertexID system value. This patches the VertexID in the SVGA vertex
10142  * shader to include the base vertex of an indexed primitive or the start index
10143  * of a non-indexed primitive.
10144  */
10145 static void
10146 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
10147 {
10148    struct tgsi_full_src_register vertex_id_bias_index =
10149       make_src_const_reg(emit->vs.vertex_id_bias_index);
10150    struct tgsi_full_src_register vertex_id_sys_src =
10151       make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
10152    struct tgsi_full_src_register vertex_id_sys_src_x =
10153       scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
10154    struct tgsi_full_dst_register vertex_id_tmp_dst =
10155       make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
10156
10157    /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10158    unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
10159    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
10160    emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
10161                         &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
10162                         FALSE);
10163    emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
10164 }
10165
10166 /**
10167  * Hull Shader must have control point outputs. But tessellation
10168  * control shader can return without writing to control point output.
10169  * In this case, the control point output is assumed to be passthrough
10170  * from the control point input.
10171  * This helper function is to write out a control point output first in case
10172  * the tessellation control shader returns before writing a
10173  * control point output.
10174  */
10175 static void
10176 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
10177 {
10178    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10179    assert(emit->tcs.control_point_phase);
10180    assert(emit->tcs.control_point_input_index != INVALID_INDEX);
10181    assert(emit->tcs.control_point_out_index != INVALID_INDEX);
10182    assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
10183
10184    /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10185
10186    struct tgsi_full_src_register invocation_src;
10187    struct tgsi_full_dst_register addr_dst;
10188    struct tgsi_full_dst_register addr_dst_x;
10189    unsigned addr_tmp;
10190
10191    addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
10192    addr_dst = make_dst_temp_reg(addr_tmp);
10193    addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
10194
10195    invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
10196                                  emit->tcs.invocation_id_sys_index);
10197
10198    begin_emit_instruction(emit);
10199    emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10200    emit_dst_register(emit, &addr_dst_x);
10201    emit_src_register(emit, &invocation_src);
10202    end_emit_instruction(emit);
10203
10204
10205    /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10206
10207    struct tgsi_full_src_register input_control_point;
10208    struct tgsi_full_dst_register output_control_point;
10209
10210    input_control_point = make_src_reg(TGSI_FILE_INPUT,
10211                                       emit->tcs.control_point_input_index);
10212    input_control_point.Register.Dimension = 1;
10213    input_control_point.Dimension.Indirect = 1;
10214    input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
10215    input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
10216    output_control_point =
10217       make_dst_output_reg(emit->tcs.control_point_out_index);
10218
10219    begin_emit_instruction(emit);
10220    emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10221    emit_dst_register(emit, &output_control_point);
10222    emit_src_register(emit, &input_control_point);
10223    end_emit_instruction(emit);
10224 }
10225
10226 /**
10227  * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10228  * values in domain shader. SM5 has tessfactors as floating point values where
10229  * as tgsi emit them as vector. This function allows to construct temp
10230  * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10231  * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10232  * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10233  */
10234 static void
10235 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
10236 {
10237    struct tgsi_full_src_register src;
10238    struct tgsi_full_dst_register dst;
10239
10240    if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
10241       dst = make_dst_temp_reg(emit->tes.inner.temp_index);
10242
10243       switch (emit->tes.prim_mode) {
10244       case PIPE_PRIM_QUADS:
10245          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10246                   emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
10247          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10248          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10249          /* fallthrough */
10250       case PIPE_PRIM_TRIANGLES:
10251          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10252                   emit->tes.inner.in_index, TGSI_SWIZZLE_X);
10253          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10254          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10255          break;
10256       case PIPE_PRIM_LINES:
10257          /**
10258           * As per SM5 spec, InsideTessFactor for isolines are unused.
10259           * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10260           * any application try to read tessInnerLevel in TES when primitive type
10261           * is isolines, then instead of driver throwing segfault for accesing it,
10262           * return atleast vec(1.0f)
10263           */
10264          src = make_immediate_reg_float(emit, 1.0f);
10265          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10266          break;
10267       default:
10268          break;
10269       }
10270    }
10271
10272    if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
10273       dst = make_dst_temp_reg(emit->tes.outer.temp_index);
10274
10275       switch (emit->tes.prim_mode) {
10276       case PIPE_PRIM_QUADS:
10277          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10278                   emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
10279          dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
10280          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10281          /* fallthrough */
10282       case PIPE_PRIM_TRIANGLES:
10283          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10284                   emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
10285          dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
10286          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10287          /* fallthrough */
10288       case PIPE_PRIM_LINES:
10289          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10290                   emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
10291          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10292          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10293
10294          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10295                   emit->tes.outer.in_index , TGSI_SWIZZLE_X);
10296          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10297          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10298
10299          break;
10300       default:
10301          break;
10302       }
10303    }
10304 }
10305
10306
10307 static void
10308 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
10309 {
10310    struct tgsi_full_src_register src;
10311    struct tgsi_full_dst_register dst;
10312    unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
10313                                                  emit->initialize_temp_index);
10314    src = make_immediate_reg_float(emit, 0.0f);
10315    dst = make_dst_temp_reg(vgpu10_temp_index);
10316    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10317    emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
10318    emit->initialize_temp_index = INVALID_INDEX;
10319 }
10320
10321
10322 /**
10323  * Emit any extra/helper declarations/code that we might need between
10324  * the declaration section and code section.
10325  */
10326 static boolean
10327 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
10328 {
10329    /* Properties */
10330    if (emit->unit == PIPE_SHADER_GEOMETRY)
10331       emit_property_instructions(emit);
10332    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10333       emit_hull_shader_declarations(emit);
10334
10335       /* Save the position of the first instruction token so that we can
10336        * do a second pass of the instructions for the patch constant phase.
10337        */
10338       emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
10339       emit->tcs.fork_phase_add_signature = FALSE;
10340
10341       if (!emit_hull_shader_control_point_phase(emit)) {
10342          emit->skip_instruction = TRUE;
10343          return TRUE;
10344       }
10345
10346       /* Set the current tcs phase to control point phase */
10347       emit->tcs.control_point_phase = TRUE;
10348    }
10349    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10350       emit_domain_shader_declarations(emit);
10351    }
10352
10353    /* Declare inputs */
10354    if (!emit_input_declarations(emit))
10355       return FALSE;
10356
10357    /* Declare outputs */
10358    if (!emit_output_declarations(emit))
10359       return FALSE;
10360
10361    /* Declare temporary registers */
10362    emit_temporaries_declaration(emit);
10363
10364    /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10365     * will already be declared in hs_decls (emit_hull_shader_declarations)
10366     */
10367    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10368       /* Declare constant registers */
10369       emit_constant_declaration(emit);
10370
10371       /* Declare samplers and resources */
10372       emit_sampler_declarations(emit);
10373       emit_resource_declarations(emit);
10374
10375       alloc_common_immediates(emit);
10376       /* Now, emit the constant block containing all the immediates
10377        * declared by shader, as well as the extra ones seen above.
10378        */
10379    }
10380
10381    if (emit->unit != PIPE_SHADER_FRAGMENT) {
10382       /*
10383        * Declare clip distance output registers for ClipVertex or
10384        * user defined planes
10385        */
10386       emit_clip_distance_declarations(emit);
10387    }
10388
10389    if (emit->unit == PIPE_SHADER_FRAGMENT &&
10390        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10391       float alpha = emit->key.fs.alpha_ref;
10392       emit->fs.alpha_ref_index =
10393          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
10394    }
10395
10396    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10397       /**
10398        * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10399        * hs_decls
10400        */
10401       emit_vgpu10_immediates_block(emit);
10402    }
10403    else {
10404       emit_tcs_default_control_point_output(emit);
10405    }
10406
10407    if (emit->unit == PIPE_SHADER_FRAGMENT) {
10408       emit_frontface_instructions(emit);
10409       emit_fragcoord_instructions(emit);
10410       emit_sample_position_instructions(emit);
10411       emit_default_layer_instructions(emit);
10412    }
10413    else if (emit->unit == PIPE_SHADER_VERTEX) {
10414       emit_vertex_attrib_instructions(emit);
10415
10416       if (emit->info.uses_vertexid)
10417          emit_vertex_id_nobase_instruction(emit);
10418    }
10419    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10420       emit_temp_tessfactor_instructions(emit);
10421    }
10422
10423    /**
10424     * For geometry shader that writes to viewport index, the prescale
10425     * temporaries will be done at the first vertex emission.
10426     */
10427    if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
10428       emit_temp_prescale_instructions(emit);
10429
10430    return TRUE;
10431 }
10432
10433
10434 /**
10435  * The device has no direct support for the pipe_blend_state::alpha_to_one
10436  * option so we implement it here with shader code.
10437  *
10438  * Note that this is kind of pointless, actually.  Here we're clobbering
10439  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
10440  * up with 100% coverage.  That's almost certainly not what the user wants.
10441  * The work-around is to add extra shader code to compute coverage from alpha
10442  * and write it to the coverage output register (if the user's shader doesn't
10443  * do so already).  We'll probably do that in the future.
10444  */
10445 static void
10446 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
10447                                unsigned fs_color_tmp_index)
10448 {
10449    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
10450    unsigned i;
10451
10452    /* Note: it's not 100% clear from the spec if we're supposed to clobber
10453     * the alpha for all render targets.  But that's what NVIDIA does and
10454     * that's what Piglit tests.
10455     */
10456    for (i = 0; i < emit->fs.num_color_outputs; i++) {
10457       struct tgsi_full_dst_register color_dst;
10458
10459       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
10460          /* write to the temp color register */
10461          color_dst = make_dst_temp_reg(fs_color_tmp_index);
10462       }
10463       else {
10464          /* write directly to the color[i] output */
10465          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
10466       }
10467
10468       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
10469
10470       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
10471    }
10472 }
10473
10474
10475 /**
10476  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
10477  * against the alpha reference value and discards the fragment if the
10478  * comparison fails.
10479  */
10480 static void
10481 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
10482                              unsigned fs_color_tmp_index)
10483 {
10484    /* compare output color's alpha to alpha ref and kill */
10485    unsigned tmp = get_temp_index(emit);
10486    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10487    struct tgsi_full_src_register tmp_src_x =
10488       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10489    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10490    struct tgsi_full_src_register color_src =
10491       make_src_temp_reg(fs_color_tmp_index);
10492    struct tgsi_full_src_register color_src_w =
10493       scalar_src(&color_src, TGSI_SWIZZLE_W);
10494    struct tgsi_full_src_register ref_src =
10495       make_src_immediate_reg(emit->fs.alpha_ref_index);
10496    struct tgsi_full_dst_register color_dst =
10497       make_dst_output_reg(emit->fs.color_out_index[0]);
10498
10499    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10500
10501    /* dst = src0 'alpha_func' src1 */
10502    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
10503                    &color_src_w, &ref_src);
10504
10505    /* DISCARD if dst.x == 0 */
10506    begin_emit_instruction(emit);
10507    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
10508    emit_src_register(emit, &tmp_src_x);
10509    end_emit_instruction(emit);
10510
10511    /* If we don't need to broadcast the color below, emit the final color here.
10512     */
10513    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
10514       /* MOV output.color, tempcolor */
10515       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10516    }
10517
10518    free_temp_indexes(emit);
10519 }
10520
10521
10522 /**
10523  * Emit instructions for writing a single color output to multiple
10524  * color buffers.
10525  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10526  * when key.fs.white_fragments is true).
10527  * property is set and the number of render targets is greater than one.
10528  * \param fs_color_tmp_index  index of the temp register that holds the
10529  *                            color to broadcast.
10530  */
10531 static void
10532 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
10533                                  unsigned fs_color_tmp_index)
10534 {
10535    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
10536    unsigned i;
10537    struct tgsi_full_src_register color_src;
10538
10539    if (emit->key.fs.white_fragments) {
10540       /* set all color outputs to white */
10541       color_src = make_immediate_reg_float(emit, 1.0f);
10542    }
10543    else {
10544       /* set all color outputs to TEMP[fs_color_tmp_index] */
10545       assert(fs_color_tmp_index != INVALID_INDEX);
10546       color_src = make_src_temp_reg(fs_color_tmp_index);
10547    }
10548
10549    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10550
10551    for (i = 0; i < n; i++) {
10552       unsigned output_reg = emit->fs.color_out_index[i];
10553       struct tgsi_full_dst_register color_dst =
10554          make_dst_output_reg(output_reg);
10555
10556       /* Fill in this semantic here since we'll use it later in
10557        * emit_dst_register().
10558        */
10559       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
10560
10561       /* MOV output.color[i], tempcolor */
10562       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10563    }
10564 }
10565
10566
10567 /**
10568  * Emit extra helper code after the original shader code, but before the
10569  * last END/RET instruction.
10570  * For vertex shaders this means emitting the extra code to apply the
10571  * prescale scale/translation.
10572  */
10573 static boolean
10574 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
10575 {
10576    if (emit->unit == PIPE_SHADER_VERTEX) {
10577       emit_vertex_instructions(emit);
10578    }
10579    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
10580       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
10581
10582       assert(!(emit->key.fs.white_fragments &&
10583                emit->key.fs.write_color0_to_n_cbufs == 0));
10584
10585       /* We no longer want emit_dst_register() to substitute the
10586        * temporary fragment color register for the real color output.
10587        */
10588       emit->fs.color_tmp_index = INVALID_INDEX;
10589
10590       if (emit->key.fs.alpha_to_one) {
10591          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
10592       }
10593       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10594          emit_alpha_test_instructions(emit, fs_color_tmp_index);
10595       }
10596       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
10597           emit->key.fs.white_fragments) {
10598          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
10599       }
10600    }
10601    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10602       if (!emit->tcs.control_point_phase) {
10603          /* store the tessellation levels in the patch constant phase only */
10604          store_tesslevels(emit);
10605       }
10606       else {
10607          emit_clipping_instructions(emit);
10608       }
10609    }
10610    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10611       emit_vertex_instructions(emit);
10612    }
10613
10614    return TRUE;
10615 }
10616
10617
10618 /**
10619  * Translate the TGSI tokens into VGPU10 tokens.
10620  */
10621 static boolean
10622 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
10623                          const struct tgsi_token *tokens)
10624 {
10625    struct tgsi_parse_context parse;
10626    boolean ret = TRUE;
10627    boolean pre_helpers_emitted = FALSE;
10628    unsigned inst_number = 0;
10629
10630    tgsi_parse_init(&parse, tokens);
10631
10632    while (!tgsi_parse_end_of_tokens(&parse)) {
10633
10634       /* Save the current tgsi token starting position */
10635       emit->cur_tgsi_token = parse.Position;
10636
10637       tgsi_parse_token(&parse);
10638
10639       switch (parse.FullToken.Token.Type) {
10640       case TGSI_TOKEN_TYPE_IMMEDIATE:
10641          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
10642          if (!ret)
10643             goto done;
10644          break;
10645
10646       case TGSI_TOKEN_TYPE_DECLARATION:
10647          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
10648          if (!ret)
10649             goto done;
10650          break;
10651
10652       case TGSI_TOKEN_TYPE_INSTRUCTION:
10653          if (!pre_helpers_emitted) {
10654             ret = emit_pre_helpers(emit);
10655             if (!ret)
10656                goto done;
10657             pre_helpers_emitted = TRUE;
10658          }
10659          ret = emit_vgpu10_instruction(emit, inst_number++,
10660                                        &parse.FullToken.FullInstruction);
10661
10662          /* Usually this applies to TCS only. If shader is reading control
10663           * point outputs in control point phase, we should reemit all
10664           * instructions which are writting into control point output in
10665           * control phase to store results into temporaries.
10666           */
10667          if (emit->reemit_instruction) {
10668             assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10669             ret = emit_vgpu10_instruction(emit, inst_number,
10670                                           &parse.FullToken.FullInstruction);
10671          }
10672          else if (emit->initialize_temp_index != INVALID_INDEX) {
10673             emit_initialize_temp_instruction(emit);
10674             emit->initialize_temp_index = INVALID_INDEX;
10675             ret = emit_vgpu10_instruction(emit, inst_number - 1,
10676                                           &parse.FullToken.FullInstruction);
10677          }
10678
10679          if (!ret)
10680             goto done;
10681          break;
10682
10683       case TGSI_TOKEN_TYPE_PROPERTY:
10684          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
10685          if (!ret)
10686             goto done;
10687          break;
10688
10689       default:
10690          break;
10691       }
10692    }
10693
10694    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10695       ret = emit_hull_shader_patch_constant_phase(emit, &parse);
10696    }
10697
10698 done:
10699    tgsi_parse_free(&parse);
10700    return ret;
10701 }
10702
10703
10704 /**
10705  * Emit the first VGPU10 shader tokens.
10706  */
10707 static boolean
10708 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
10709 {
10710    VGPU10ProgramToken ptoken;
10711
10712    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
10713    ptoken.value = 0; /* init whole token to zero */
10714    ptoken.majorVersion = emit->version / 10;
10715    ptoken.minorVersion = emit->version % 10;
10716    ptoken.programType = translate_shader_type(emit->unit);
10717    if (!emit_dword(emit, ptoken.value))
10718       return FALSE;
10719
10720    /* Second token: total length of shader, in tokens.  We can't fill this
10721     * in until we're all done.  Emit zero for now.
10722     */
10723    if (!emit_dword(emit, 0))
10724       return FALSE;
10725
10726    if (emit->version >= 50) {
10727       VGPU10OpcodeToken0 token;
10728
10729       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10730          /* For hull shader, we need to start the declarations phase first before
10731           * emitting any declarations including the global flags.
10732           */
10733          token.value = 0;
10734          token.opcodeType = VGPU10_OPCODE_HS_DECLS;
10735          begin_emit_instruction(emit);
10736          emit_dword(emit, token.value);
10737          end_emit_instruction(emit);
10738       }
10739
10740       /* Emit global flags */
10741       token.value = 0;    /* init whole token to zero */
10742       token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10743       token.enableDoublePrecisionFloatOps = 1;  /* set bit */
10744       token.instructionLength = 1;
10745       if (!emit_dword(emit, token.value))
10746          return FALSE;
10747    }
10748
10749    if (emit->version >= 40) {
10750       VGPU10OpcodeToken0 token;
10751
10752       /* Reserved for global flag such as refactoringAllowed.
10753        * If the shader does not use the precise qualifier, we will set the
10754        * refactoringAllowed global flag; otherwise, we will leave the reserved
10755        * token to NOP.
10756        */
10757       emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
10758       token.value = 0;
10759       token.opcodeType = VGPU10_OPCODE_NOP;
10760       token.instructionLength = 1;
10761       if (!emit_dword(emit, token.value))
10762          return FALSE;
10763    }
10764
10765    return TRUE;
10766 }
10767
10768
10769 static boolean
10770 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
10771 {
10772    VGPU10ProgramToken *tokens;
10773
10774    /* Replace the second token with total shader length */
10775    tokens = (VGPU10ProgramToken *) emit->buf;
10776    tokens[1].value = emit_get_num_tokens(emit);
10777
10778    if (emit->version >= 40 && !emit->uses_precise_qualifier) {
10779       /* Replace the reserved token with the RefactoringAllowed global flag */
10780       VGPU10OpcodeToken0 *ptoken;
10781
10782       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
10783       assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
10784       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10785       ptoken->refactoringAllowed = 1;
10786    }
10787
10788    return TRUE;
10789 }
10790
10791
10792 /**
10793  * Modify the FS to read the BCOLORs and use the FACE register
10794  * to choose between the front/back colors.
10795  */
10796 static const struct tgsi_token *
10797 transform_fs_twoside(const struct tgsi_token *tokens)
10798 {
10799    if (0) {
10800       debug_printf("Before tgsi_add_two_side ------------------\n");
10801       tgsi_dump(tokens,0);
10802    }
10803    tokens = tgsi_add_two_side(tokens);
10804    if (0) {
10805       debug_printf("After tgsi_add_two_side ------------------\n");
10806       tgsi_dump(tokens, 0);
10807    }
10808    return tokens;
10809 }
10810
10811
10812 /**
10813  * Modify the FS to do polygon stipple.
10814  */
10815 static const struct tgsi_token *
10816 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
10817                       const struct tgsi_token *tokens)
10818 {
10819    const struct tgsi_token *new_tokens;
10820    unsigned unit;
10821
10822    if (0) {
10823       debug_printf("Before pstipple ------------------\n");
10824       tgsi_dump(tokens,0);
10825    }
10826
10827    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
10828                                                      TGSI_FILE_INPUT);
10829
10830    emit->fs.pstipple_sampler_unit = unit;
10831
10832    /* Setup texture state for stipple */
10833    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
10834    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
10835    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
10836    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
10837    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
10838
10839    if (0) {
10840       debug_printf("After pstipple ------------------\n");
10841       tgsi_dump(new_tokens, 0);
10842    }
10843
10844    return new_tokens;
10845 }
10846
10847 /**
10848  * Modify the FS to support anti-aliasing point.
10849  */
10850 static const struct tgsi_token *
10851 transform_fs_aapoint(const struct tgsi_token *tokens,
10852                      int aa_coord_index)
10853 {
10854    if (0) {
10855       debug_printf("Before tgsi_add_aa_point ------------------\n");
10856       tgsi_dump(tokens,0);
10857    }
10858    tokens = tgsi_add_aa_point(tokens, aa_coord_index);
10859    if (0) {
10860       debug_printf("After tgsi_add_aa_point ------------------\n");
10861       tgsi_dump(tokens, 0);
10862    }
10863    return tokens;
10864 }
10865
10866
10867 /**
10868  * A helper function to determine the shader in the previous stage and
10869  * then call the linker function to determine the input mapping for this
10870  * shader to match the output indices from the shader in the previous stage.
10871  */
10872 static void
10873 compute_input_mapping(struct svga_context *svga,
10874                       struct svga_shader_emitter_v10 *emit,
10875                       enum pipe_shader_type unit)
10876 {
10877    struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
10878
10879    if (unit == PIPE_SHADER_FRAGMENT) {
10880       prevShader = svga->curr.gs ?
10881          &svga->curr.gs->base : (svga->curr.tes ?
10882          &svga->curr.tes->base : &svga->curr.vs->base);
10883    } else if (unit == PIPE_SHADER_GEOMETRY) {
10884       prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
10885    } else if (unit == PIPE_SHADER_TESS_EVAL) {
10886       assert(svga->curr.tcs);
10887       prevShader = &svga->curr.tcs->base;
10888    } else if (unit == PIPE_SHADER_TESS_CTRL) {
10889       assert(svga->curr.vs);
10890       prevShader = &svga->curr.vs->base;
10891    }
10892
10893    if (prevShader != NULL) {
10894       svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
10895       emit->prevShaderInfo = &prevShader->info;
10896    }
10897    else {
10898       /**
10899        * Since vertex shader does not need to go through the linker to
10900        * establish the input map, we need to make sure the highest index
10901        * of input registers is set properly here.
10902        */
10903       emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
10904                                          emit->info.file_max[TGSI_FILE_INPUT]);
10905    }
10906 }
10907
10908
10909 /**
10910  * Copies the shader signature info to the shader variant
10911  */
10912 static void
10913 copy_shader_signature(struct svga_shader_signature *sgn,
10914                       struct svga_shader_variant *variant)
10915 {
10916    SVGA3dDXShaderSignatureHeader *header = &sgn->header;
10917
10918    /* Calculate the signature length */
10919    variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
10920                            (header->numInputSignatures +
10921                             header->numOutputSignatures +
10922                             header->numPatchConstantSignatures) *
10923                            sizeof(SVGA3dDXShaderSignatureEntry);
10924
10925    /* Allocate buffer for the signature info */
10926    variant->signature =
10927       (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
10928
10929    char *sgnBuf = (char *)variant->signature;
10930    unsigned sgnLen;
10931
10932    /* Copy the signature info to the shader variant structure */
10933    memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
10934    sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
10935
10936    if (header->numInputSignatures) {
10937       sgnLen =
10938          header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10939       memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
10940       sgnBuf += sgnLen;
10941    }
10942
10943    if (header->numOutputSignatures) {
10944       sgnLen =
10945          header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10946       memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
10947       sgnBuf += sgnLen;
10948    }
10949
10950    if (header->numPatchConstantSignatures) {
10951       sgnLen =
10952          header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10953       memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
10954    }
10955 }
10956
10957
10958 /**
10959  * This is the main entrypoint for the TGSI -> VPGU10 translator.
10960  */
10961 struct svga_shader_variant *
10962 svga_tgsi_vgpu10_translate(struct svga_context *svga,
10963                            const struct svga_shader *shader,
10964                            const struct svga_compile_key *key,
10965                            enum pipe_shader_type unit)
10966 {
10967    struct svga_shader_variant *variant = NULL;
10968    struct svga_shader_emitter_v10 *emit;
10969    const struct tgsi_token *tokens = shader->tokens;
10970
10971    (void) make_immediate_reg_double;   /* unused at this time */
10972
10973    assert(unit == PIPE_SHADER_VERTEX ||
10974           unit == PIPE_SHADER_GEOMETRY ||
10975           unit == PIPE_SHADER_FRAGMENT ||
10976           unit == PIPE_SHADER_TESS_CTRL ||
10977           unit == PIPE_SHADER_TESS_EVAL ||
10978           unit == PIPE_SHADER_COMPUTE);
10979
10980    /* These two flags cannot be used together */
10981    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
10982
10983    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
10984    /*
10985     * Setup the code emitter
10986     */
10987    emit = alloc_emitter();
10988    if (!emit)
10989       goto done;
10990
10991    emit->unit = unit;
10992    if (svga_have_sm5(svga)) {
10993       emit->version = 50;
10994    } else if (svga_have_sm4_1(svga)) {
10995       emit->version = 41;
10996    } else {
10997       emit->version = 40;
10998    }
10999
11000    emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
11001
11002    emit->key = *key;
11003
11004    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
11005                                     emit->key.gs.need_prescale ||
11006                                     emit->key.tes.need_prescale);
11007
11008    /* Determine how many prescale factors in the constant buffer */
11009    emit->vposition.num_prescale = 1;
11010    if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
11011       assert(emit->unit == PIPE_SHADER_GEOMETRY);
11012       emit->vposition.num_prescale = emit->key.gs.num_prescale;
11013    }
11014
11015    emit->vposition.tmp_index = INVALID_INDEX;
11016    emit->vposition.so_index = INVALID_INDEX;
11017    emit->vposition.out_index = INVALID_INDEX;
11018
11019    emit->vs.vertex_id_sys_index = INVALID_INDEX;
11020    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11021    emit->vs.vertex_id_bias_index = INVALID_INDEX;
11022
11023    emit->fs.color_tmp_index = INVALID_INDEX;
11024    emit->fs.face_input_index = INVALID_INDEX;
11025    emit->fs.fragcoord_input_index = INVALID_INDEX;
11026    emit->fs.sample_id_sys_index = INVALID_INDEX;
11027    emit->fs.sample_pos_sys_index = INVALID_INDEX;
11028    emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
11029    emit->fs.layer_input_index = INVALID_INDEX;
11030    emit->fs.layer_imm_index = INVALID_INDEX;
11031
11032    emit->gs.prim_id_index = INVALID_INDEX;
11033    emit->gs.invocation_id_sys_index = INVALID_INDEX;
11034    emit->gs.viewport_index_out_index = INVALID_INDEX;
11035    emit->gs.viewport_index_tmp_index = INVALID_INDEX;
11036
11037    emit->tcs.vertices_per_patch_index = INVALID_INDEX;
11038    emit->tcs.invocation_id_sys_index = INVALID_INDEX;
11039    emit->tcs.control_point_input_index = INVALID_INDEX;
11040    emit->tcs.control_point_addr_index = INVALID_INDEX;
11041    emit->tcs.control_point_out_index = INVALID_INDEX;
11042    emit->tcs.control_point_tmp_index = INVALID_INDEX;
11043    emit->tcs.control_point_out_count = 0;
11044    emit->tcs.inner.out_index = INVALID_INDEX;
11045    emit->tcs.inner.out_index = INVALID_INDEX;
11046    emit->tcs.inner.temp_index = INVALID_INDEX;
11047    emit->tcs.inner.tgsi_index = INVALID_INDEX;
11048    emit->tcs.outer.out_index = INVALID_INDEX;
11049    emit->tcs.outer.temp_index = INVALID_INDEX;
11050    emit->tcs.outer.tgsi_index = INVALID_INDEX;
11051    emit->tcs.patch_generic_out_count = 0;
11052    emit->tcs.patch_generic_out_index = INVALID_INDEX;
11053    emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
11054    emit->tcs.prim_id_index = INVALID_INDEX;
11055
11056    emit->tes.tesscoord_sys_index = INVALID_INDEX;
11057    emit->tes.inner.in_index = INVALID_INDEX;
11058    emit->tes.inner.temp_index = INVALID_INDEX;
11059    emit->tes.inner.tgsi_index = INVALID_INDEX;
11060    emit->tes.outer.in_index = INVALID_INDEX;
11061    emit->tes.outer.temp_index = INVALID_INDEX;
11062    emit->tes.outer.tgsi_index = INVALID_INDEX;
11063    emit->tes.prim_id_index = INVALID_INDEX;
11064
11065    emit->clip_dist_out_index = INVALID_INDEX;
11066    emit->clip_dist_tmp_index = INVALID_INDEX;
11067    emit->clip_dist_so_index = INVALID_INDEX;
11068    emit->clip_vertex_out_index = INVALID_INDEX;
11069    emit->clip_vertex_tmp_index = INVALID_INDEX;
11070    emit->svga_debug_callback = svga->debug.callback;
11071
11072    emit->index_range.start_index = INVALID_INDEX;
11073    emit->index_range.count = 0;
11074    emit->index_range.required = FALSE;
11075    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
11076    emit->index_range.dim = 0;
11077    emit->index_range.size = 0;
11078
11079    emit->current_loop_depth = 0;
11080
11081    emit->initialize_temp_index = INVALID_INDEX;
11082
11083    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
11084       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
11085    }
11086
11087    if (unit == PIPE_SHADER_FRAGMENT) {
11088       if (key->fs.light_twoside) {
11089          tokens = transform_fs_twoside(tokens);
11090       }
11091       if (key->fs.pstipple) {
11092          const struct tgsi_token *new_tokens =
11093             transform_fs_pstipple(emit, tokens);
11094          if (tokens != shader->tokens) {
11095             /* free the two-sided shader tokens */
11096             tgsi_free_tokens(tokens);
11097          }
11098          tokens = new_tokens;
11099       }
11100       if (key->fs.aa_point) {
11101          tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
11102       }
11103    }
11104
11105    if (SVGA_DEBUG & DEBUG_TGSI) {
11106       debug_printf("#####################################\n");
11107       debug_printf("### TGSI Shader %u\n", shader->id);
11108       tgsi_dump(tokens, 0);
11109    }
11110
11111    /**
11112     * Rescan the header if the token string is different from the one
11113     * included in the shader; otherwise, the header info is already up-to-date
11114     */
11115    if (tokens != shader->tokens) {
11116       tgsi_scan_shader(tokens, &emit->info);
11117    } else {
11118       emit->info = shader->info;
11119    }
11120
11121    emit->num_outputs = emit->info.num_outputs;
11122
11123    /**
11124     * Compute input mapping to match the outputs from shader
11125     * in the previous stage
11126     */
11127    compute_input_mapping(svga, emit, unit);
11128
11129    determine_clipping_mode(emit);
11130
11131    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
11132        unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
11133       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
11134          /* if there is stream output declarations associated
11135           * with this shader or the shader writes to ClipDistance
11136           * then reserve extra registers for the non-adjusted vertex position
11137           * and the ClipDistance shadow copy.
11138           */
11139          emit->vposition.so_index = emit->num_outputs++;
11140
11141          if (emit->clip_mode == CLIP_DISTANCE) {
11142             emit->clip_dist_so_index = emit->num_outputs++;
11143             if (emit->info.num_written_clipdistance > 4)
11144                emit->num_outputs++;
11145          }
11146       }
11147    }
11148
11149    /*
11150     * Do actual shader translation.
11151     */
11152    if (!emit_vgpu10_header(emit)) {
11153       debug_printf("svga: emit VGPU10 header failed\n");
11154       goto cleanup;
11155    }
11156
11157    if (!emit_vgpu10_instructions(emit, tokens)) {
11158       debug_printf("svga: emit VGPU10 instructions failed\n");
11159       goto cleanup;
11160    }
11161
11162    if (!emit_vgpu10_tail(emit)) {
11163       debug_printf("svga: emit VGPU10 tail failed\n");
11164       goto cleanup;
11165    }
11166
11167    if (emit->register_overflow) {
11168       goto cleanup;
11169    }
11170
11171    /*
11172     * Create, initialize the 'variant' object.
11173     */
11174    variant = svga_new_shader_variant(svga, unit);
11175    if (!variant)
11176       goto cleanup;
11177
11178    variant->shader = shader;
11179    variant->nr_tokens = emit_get_num_tokens(emit);
11180    variant->tokens = (const unsigned *)emit->buf;
11181
11182    /* Copy shader signature info to the shader variant */
11183    if (svga_have_sm5(svga)) {
11184       copy_shader_signature(&emit->signature, variant);
11185    }
11186
11187    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
11188    memcpy(&variant->key, key, sizeof(*key));
11189    variant->id = UTIL_BITMASK_INVALID_INDEX;
11190
11191    /* The extra constant starting offset starts with the number of
11192     * shader constants declared in the shader.
11193     */
11194    variant->extra_const_start = emit->num_shader_consts[0];
11195    if (key->gs.wide_point) {
11196       /**
11197        * The extra constant added in the transformed shader
11198        * for inverse viewport scale is to be supplied by the driver.
11199        * So the extra constant starting offset needs to be reduced by 1.
11200        */
11201       assert(variant->extra_const_start > 0);
11202       variant->extra_const_start--;
11203    }
11204
11205    if (unit == PIPE_SHADER_FRAGMENT) {
11206       struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
11207
11208       fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
11209
11210       /* If there was exactly one write to a fragment shader output register
11211        * and it came from a constant buffer, we know all fragments will have
11212        * the same color (except for blending).
11213        */
11214       fs_variant->constant_color_output =
11215          emit->constant_color_output && emit->num_output_writes == 1;
11216
11217       /** keep track in the variant if flat interpolation is used
11218        *  for any of the varyings.
11219        */
11220       fs_variant->uses_flat_interp = emit->uses_flat_interp;
11221
11222       fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
11223    }
11224    else if (unit == PIPE_SHADER_TESS_EVAL) {
11225       struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
11226
11227       /* Keep track in the tes variant some of the layout parameters.
11228        * These parameters will be referenced by the tcs to emit
11229        * the necessary declarations for the hull shader.
11230        */
11231       tes_variant->prim_mode = emit->tes.prim_mode;
11232       tes_variant->spacing = emit->tes.spacing;
11233       tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
11234       tes_variant->point_mode = emit->tes.point_mode;
11235    }
11236
11237
11238    if (tokens != shader->tokens) {
11239       tgsi_free_tokens(tokens);
11240    }
11241
11242 cleanup:
11243    free_emitter(emit);
11244
11245 done:
11246    SVGA_STATS_TIME_POP(svga_sws(svga));
11247    return variant;
11248 }