src/mesa/state_tracker/st_glsl_to_tgsi.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  * Copyright © 2011 Bryan Cain
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24  * DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 /**
  28  * \file glsl_to_tgsi.cpp
  29  *
  30  * Translate GLSL IR to TGSI.
  31  */
  32
  33 #include "st_glsl_to_tgsi.h"
  34
  35 #include "compiler/glsl/glsl_parser_extras.h"
  36 #include "compiler/glsl/ir_optimization.h"
  37 #include "compiler/glsl/program.h"
  38
  39 #include "main/errors.h"
  40 #include "main/shaderobj.h"
  41 #include "main/uniforms.h"
  42 #include "main/shaderapi.h"
  43 #include "main/shaderimage.h"
  44 #include "program/prog_instruction.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_screen.h"
  48 #include "tgsi/tgsi_ureg.h"
  49 #include "tgsi/tgsi_info.h"
  50 #include "util/u_math.h"
  51 #include "util/u_memory.h"
  52 #include "st_program.h"
  53 #include "st_mesa_to_tgsi.h"
  54 #include "st_format.h"
  55 #include "st_glsl_types.h"
  56 #include "st_nir.h"
  57
  58
  59 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
  60                            (1 << PROGRAM_CONSTANT) |     \
  61                            (1 << PROGRAM_UNIFORM))
  62
  63 #define MAX_GLSL_TEXTURE_OFFSET 4
  64
  65 class st_src_reg;
  66 class st_dst_reg;
  67
  68 static int swizzle_for_size(int size);
  69
  70 static int swizzle_for_type(const glsl_type *type, int component = 0)
  71 {
  72    unsigned num_elements = 4;
  73
  74    if (type) {
  75       type = type->without_array();
  76       if (type->is_scalar() || type->is_vector() || type->is_matrix())
  77          num_elements = type->vector_elements;
  78    }
  79
  80    int swizzle = swizzle_for_size(num_elements);
  81    assert(num_elements + component <= 4);
  82
  83    swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1);
  84    return swizzle;
  85 }
  86
  87 /**
  88  * This struct is a corresponding struct to TGSI ureg_src.
  89  */
  90 class st_src_reg {
  91 public:
  92    st_src_reg(gl_register_file file, int index, const glsl_type *type,
  93               int component = 0)
  94    {
  95       this->file = file;
  96       this->index = index;
  97       this->swizzle = swizzle_for_type(type, component);
  98       this->negate = 0;
  99       this->index2D = 0;
 100       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
 101       this->reladdr = NULL;
 102       this->reladdr2 = NULL;
 103       this->has_index2 = false;
 104       this->double_reg2 = false;
 105       this->array_id = 0;
 106       this->is_double_vertex_input = false;
 107    }
 108
 109    st_src_reg(gl_register_file file, int index, enum glsl_base_type type)
 110    {
 111       this->type = type;
 112       this->file = file;
 113       this->index = index;
 114       this->index2D = 0;
 115       this->swizzle = SWIZZLE_XYZW;
 116       this->negate = 0;
 117       this->reladdr = NULL;
 118       this->reladdr2 = NULL;
 119       this->has_index2 = false;
 120       this->double_reg2 = false;
 121       this->array_id = 0;
 122       this->is_double_vertex_input = false;
 123    }
 124
 125    st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D)
 126    {
 127       this->type = type;
 128       this->file = file;
 129       this->index = index;
 130       this->index2D = index2D;
 131       this->swizzle = SWIZZLE_XYZW;
 132       this->negate = 0;
 133       this->reladdr = NULL;
 134       this->reladdr2 = NULL;
 135       this->has_index2 = false;
 136       this->double_reg2 = false;
 137       this->array_id = 0;
 138       this->is_double_vertex_input = false;
 139    }
 140
 141    st_src_reg()
 142    {
 143       this->type = GLSL_TYPE_ERROR;
 144       this->file = PROGRAM_UNDEFINED;
 145       this->index = 0;
 146       this->index2D = 0;
 147       this->swizzle = 0;
 148       this->negate = 0;
 149       this->reladdr = NULL;
 150       this->reladdr2 = NULL;
 151       this->has_index2 = false;
 152       this->double_reg2 = false;
 153       this->array_id = 0;
 154       this->is_double_vertex_input = false;
 155    }
 156
 157    explicit st_src_reg(st_dst_reg reg);
 158
 159    gl_register_file file; /**< PROGRAM_* from Mesa */
 160    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 161    int index2D;
 162    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
 163    int negate; /**< NEGATE_XYZW mask from mesa */
 164    enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 165    /** Register index should be offset by the integer in this reg. */
 166    st_src_reg *reladdr;
 167    st_src_reg *reladdr2;
 168    bool has_index2;
 169    /*
 170     * Is this the second half of a double register pair?
 171     * currently used for input mapping only.
 172     */
 173    bool double_reg2;
 174    unsigned array_id;
 175    bool is_double_vertex_input;
 176 };
 177
 178 class st_dst_reg {
 179 public:
 180    st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index)
 181    {
 182       this->file = file;
 183       this->index = index;
 184       this->index2D = 0;
 185       this->writemask = writemask;
 186       this->reladdr = NULL;
 187       this->reladdr2 = NULL;
 188       this->has_index2 = false;
 189       this->type = type;
 190       this->array_id = 0;
 191    }
 192
 193    st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type)
 194    {
 195       this->file = file;
 196       this->index = 0;
 197       this->index2D = 0;
 198       this->writemask = writemask;
 199       this->reladdr = NULL;
 200       this->reladdr2 = NULL;
 201       this->has_index2 = false;
 202       this->type = type;
 203       this->array_id = 0;
 204    }
 205
 206    st_dst_reg()
 207    {
 208       this->type = GLSL_TYPE_ERROR;
 209       this->file = PROGRAM_UNDEFINED;
 210       this->index = 0;
 211       this->index2D = 0;
 212       this->writemask = 0;
 213       this->reladdr = NULL;
 214       this->reladdr2 = NULL;
 215       this->has_index2 = false;
 216       this->array_id = 0;
 217    }
 218
 219    explicit st_dst_reg(st_src_reg reg);
 220
 221    gl_register_file file; /**< PROGRAM_* from Mesa */
 222    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 223    int index2D;
 224    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 225    enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 226    /** Register index should be offset by the integer in this reg. */
 227    st_src_reg *reladdr;
 228    st_src_reg *reladdr2;
 229    bool has_index2;
 230    unsigned array_id;
 231 };
 232
 233 st_src_reg::st_src_reg(st_dst_reg reg)
 234 {
 235    this->type = reg.type;
 236    this->file = reg.file;
 237    this->index = reg.index;
 238    this->swizzle = SWIZZLE_XYZW;
 239    this->negate = 0;
 240    this->reladdr = reg.reladdr;
 241    this->index2D = reg.index2D;
 242    this->reladdr2 = reg.reladdr2;
 243    this->has_index2 = reg.has_index2;
 244    this->double_reg2 = false;
 245    this->array_id = reg.array_id;
 246    this->is_double_vertex_input = false;
 247 }
 248
 249 st_dst_reg::st_dst_reg(st_src_reg reg)
 250 {
 251    this->type = reg.type;
 252    this->file = reg.file;
 253    this->index = reg.index;
 254    this->writemask = WRITEMASK_XYZW;
 255    this->reladdr = reg.reladdr;
 256    this->index2D = reg.index2D;
 257    this->reladdr2 = reg.reladdr2;
 258    this->has_index2 = reg.has_index2;
 259    this->array_id = reg.array_id;
 260 }
 261
 262 class glsl_to_tgsi_instruction : public exec_node {
 263 public:
 264    DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
 265
 266    unsigned op;
 267    st_dst_reg dst[2];
 268    st_src_reg src[4];
 269    /** Pointer to the ir source this tree came from for debugging */
 270    ir_instruction *ir;
 271    GLboolean cond_update;
 272    bool saturate;
 273    bool is_64bit_expanded;
 274    st_src_reg sampler; /**< sampler register */
 275    int sampler_base;
 276    int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
 277    int tex_target; /**< One of TEXTURE_*_INDEX */
 278    glsl_base_type tex_type;
 279    GLboolean tex_shadow;
 280    unsigned image_format;
 281
 282    st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
 283    unsigned tex_offset_num_offset;
 284    int dead_mask; /**< Used in dead code elimination */
 285
 286    st_src_reg buffer; /**< buffer register */
 287    unsigned buffer_access; /**< buffer access type */
 288
 289    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 290    const struct tgsi_opcode_info *info;
 291 };
 292
 293 class variable_storage : public exec_node {
 294 public:
 295    variable_storage(ir_variable *var, gl_register_file file, int index,
 296                     unsigned array_id = 0)
 297       : file(file), index(index), component(0), var(var), array_id(array_id)
 298    {
 299       /* empty */
 300    }
 301
 302    gl_register_file file;
 303    int index;
 304
 305    /* Explicit component location. This is given in terms of the GLSL-style
 306     * swizzles where each double is a single component, i.e. for 64-bit types
 307     * it can only be 0 or 1.
 308     */
 309    int component;
 310    ir_variable *var; /* variable that maps to this, if any */
 311    unsigned array_id;
 312 };
 313
 314 class immediate_storage : public exec_node {
 315 public:
 316    immediate_storage(gl_constant_value *values, int size32, int type)
 317    {
 318       memcpy(this->values, values, size32 * sizeof(gl_constant_value));
 319       this->size32 = size32;
 320       this->type = type;
 321    }
 322
 323    /* doubles are stored across 2 gl_constant_values */
 324    gl_constant_value values[4];
 325    int size32; /**< Number of 32-bit components (1-4) */
 326    int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
 327 };
 328
 329 class function_entry : public exec_node {
 330 public:
 331    ir_function_signature *sig;
 332
 333    /**
 334     * identifier of this function signature used by the program.
 335     *
 336     * At the point that TGSI instructions for function calls are
 337     * generated, we don't know the address of the first instruction of
 338     * the function body.  So we make the BranchTarget that is called a
 339     * small integer and rewrite them during set_branchtargets().
 340     */
 341    int sig_id;
 342
 343    /**
 344     * Pointer to first instruction of the function body.
 345     *
 346     * Set during function body emits after main() is processed.
 347     */
 348    glsl_to_tgsi_instruction *bgn_inst;
 349
 350    /**
 351     * Index of the first instruction of the function body in actual TGSI.
 352     *
 353     * Set after conversion from glsl_to_tgsi_instruction to TGSI.
 354     */
 355    int inst;
 356
 357    /** Storage for the return value. */
 358    st_src_reg return_reg;
 359 };
 360
 361 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 362 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 363
 364 struct inout_decl {
 365    unsigned mesa_index;
 366    unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
 367    unsigned size;
 368    enum glsl_base_type base_type;
 369    ubyte usage_mask; /* GLSL-style usage-mask,  i.e. single bit per double */
 370 };
 371
 372 static struct inout_decl *
 373 find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id)
 374 {
 375    assert(array_id != 0);
 376
 377    for (unsigned i = 0; i < count; i++) {
 378       struct inout_decl *decl = &decls[i];
 379
 380       if (array_id == decl->array_id) {
 381          return decl;
 382       }
 383    }
 384
 385    return NULL;
 386 }
 387
 388 static enum glsl_base_type
 389 find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id)
 390 {
 391    if (!array_id)
 392       return GLSL_TYPE_ERROR;
 393    struct inout_decl *decl = find_inout_array(decls, count, array_id);
 394    if (decl)
 395       return decl->base_type;
 396    return GLSL_TYPE_ERROR;
 397 }
 398
 399 struct rename_reg_pair {
 400    int old_reg;
 401    int new_reg;
 402 };
 403
 404 struct glsl_to_tgsi_visitor : public ir_visitor {
 405 public:
 406    glsl_to_tgsi_visitor();
 407    ~glsl_to_tgsi_visitor();
 408
 409    function_entry *current_function;
 410
 411    struct gl_context *ctx;
 412    struct gl_program *prog;
 413    struct gl_shader_program *shader_program;
 414    struct gl_linked_shader *shader;
 415    struct gl_shader_compiler_options *options;
 416
 417    int next_temp;
 418
 419    unsigned *array_sizes;
 420    unsigned max_num_arrays;
 421    unsigned next_array;
 422
 423    struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS];
 424    unsigned num_inputs;
 425    unsigned num_input_arrays;
 426    struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS];
 427    unsigned num_outputs;
 428    unsigned num_output_arrays;
 429
 430    int num_address_regs;
 431    uint32_t samplers_used;
 432    glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
 433    int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
 434    int buffers_used;
 435    int images_used;
 436    int image_targets[PIPE_MAX_SHADER_IMAGES];
 437    unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
 438    bool indirect_addr_consts;
 439    int wpos_transform_const;
 440
 441    int glsl_version;
 442    bool native_integers;
 443    bool have_sqrt;
 444    bool have_fma;
 445    bool use_shared_memory;
 446
 447    variable_storage *find_variable_storage(ir_variable *var);
 448
 449    int add_constant(gl_register_file file, gl_constant_value values[8],
 450                     int size, int datatype, GLuint *swizzle_out);
 451
 452    function_entry *get_function_signature(ir_function_signature *sig);
 453
 454    st_src_reg get_temp(const glsl_type *type);
 455    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 456
 457    st_src_reg st_src_reg_for_double(double val);
 458    st_src_reg st_src_reg_for_float(float val);
 459    st_src_reg st_src_reg_for_int(int val);
 460    st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
 461
 462    /**
 463     * \name Visit methods
 464     *
 465     * As typical for the visitor pattern, there must be one \c visit method for
 466     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 467     * the hierarchy should not have \c visit methods.
 468     */
 469    /*@{*/
 470    virtual void visit(ir_variable *);
 471    virtual void visit(ir_loop *);
 472    virtual void visit(ir_loop_jump *);
 473    virtual void visit(ir_function_signature *);
 474    virtual void visit(ir_function *);
 475    virtual void visit(ir_expression *);
 476    virtual void visit(ir_swizzle *);
 477    virtual void visit(ir_dereference_variable  *);
 478    virtual void visit(ir_dereference_array *);
 479    virtual void visit(ir_dereference_record *);
 480    virtual void visit(ir_assignment *);
 481    virtual void visit(ir_constant *);
 482    virtual void visit(ir_call *);
 483    virtual void visit(ir_return *);
 484    virtual void visit(ir_discard *);
 485    virtual void visit(ir_texture *);
 486    virtual void visit(ir_if *);
 487    virtual void visit(ir_emit_vertex *);
 488    virtual void visit(ir_end_primitive *);
 489    virtual void visit(ir_barrier *);
 490    /*@}*/
 491
 492    void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE;
 493
 494    void visit_atomic_counter_intrinsic(ir_call *);
 495    void visit_ssbo_intrinsic(ir_call *);
 496    void visit_membar_intrinsic(ir_call *);
 497    void visit_shared_intrinsic(ir_call *);
 498    void visit_image_intrinsic(ir_call *);
 499
 500    st_src_reg result;
 501
 502    /** List of variable_storage */
 503    exec_list variables;
 504
 505    /** List of immediate_storage */
 506    exec_list immediates;
 507    unsigned num_immediates;
 508
 509    /** List of function_entry */
 510    exec_list function_signatures;
 511    int next_signature_id;
 512
 513    /** List of glsl_to_tgsi_instruction */
 514    exec_list instructions;
 515
 516    glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
 517                                       st_dst_reg dst = undef_dst,
 518                                       st_src_reg src0 = undef_src,
 519                                       st_src_reg src1 = undef_src,
 520                                       st_src_reg src2 = undef_src,
 521                                       st_src_reg src3 = undef_src);
 522
 523    glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
 524                                       st_dst_reg dst, st_dst_reg dst1,
 525                                       st_src_reg src0 = undef_src,
 526                                       st_src_reg src1 = undef_src,
 527                                       st_src_reg src2 = undef_src,
 528                                       st_src_reg src3 = undef_src);
 529
 530    unsigned get_opcode(ir_instruction *ir, unsigned op,
 531                     st_dst_reg dst,
 532                     st_src_reg src0, st_src_reg src1);
 533
 534    /**
 535     * Emit the correct dot-product instruction for the type of arguments
 536     */
 537    glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
 538                                      st_dst_reg dst,
 539                                      st_src_reg src0,
 540                                      st_src_reg src1,
 541                                      unsigned elements);
 542
 543    void emit_scalar(ir_instruction *ir, unsigned op,
 544                     st_dst_reg dst, st_src_reg src0);
 545
 546    void emit_scalar(ir_instruction *ir, unsigned op,
 547                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 548
 549    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 550
 551    void get_deref_offsets(ir_dereference *ir,
 552                           unsigned *array_size,
 553                           unsigned *base,
 554                           unsigned *index,
 555                           st_src_reg *reladdr);
 556   void calc_deref_offsets(ir_dereference *head,
 557                           ir_dereference *tail,
 558                           unsigned *array_elements,
 559                           unsigned *base,
 560                           unsigned *index,
 561                           st_src_reg *indirect,
 562                           unsigned *location);
 563
 564    bool try_emit_mad(ir_expression *ir,
 565               int mul_operand);
 566    bool try_emit_mad_for_and_not(ir_expression *ir,
 567               int mul_operand);
 568
 569    void emit_swz(ir_expression *ir);
 570
 571    bool process_move_condition(ir_rvalue *ir);
 572
 573    void simplify_cmp(void);
 574
 575    void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
 576    void get_first_temp_read(int *first_reads);
 577    void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
 578    void get_last_temp_write(int *last_writes);
 579
 580    void copy_propagate(void);
 581    int eliminate_dead_code(void);
 582
 583    void merge_two_dsts(void);
 584    void merge_registers(void);
 585    void renumber_registers(void);
 586
 587    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
 588                        st_dst_reg *l, st_src_reg *r,
 589                        st_src_reg *cond, bool cond_swap);
 590
 591    void *mem_ctx;
 592 };
 593
 594 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
 595 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
 596 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
 597
 598 static void
 599 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
 600
 601 static void
 602 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
 603 {
 604    va_list args;
 605    va_start(args, fmt);
 606    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
 607    va_end(args);
 608
 609    prog->LinkStatus = GL_FALSE;
 610 }
 611
 612 static int
 613 swizzle_for_size(int size)
 614 {
 615    static const int size_swizzles[4] = {
 616       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 617       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 618       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 619       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 620    };
 621
 622    assert((size >= 1) && (size <= 4));
 623    return size_swizzles[size - 1];
 624 }
 625
 626 static bool
 627 is_resource_instruction(unsigned opcode)
 628 {
 629    switch (opcode) {
 630    case TGSI_OPCODE_RESQ:
 631    case TGSI_OPCODE_LOAD:
 632    case TGSI_OPCODE_ATOMUADD:
 633    case TGSI_OPCODE_ATOMXCHG:
 634    case TGSI_OPCODE_ATOMCAS:
 635    case TGSI_OPCODE_ATOMAND:
 636    case TGSI_OPCODE_ATOMOR:
 637    case TGSI_OPCODE_ATOMXOR:
 638    case TGSI_OPCODE_ATOMUMIN:
 639    case TGSI_OPCODE_ATOMUMAX:
 640    case TGSI_OPCODE_ATOMIMIN:
 641    case TGSI_OPCODE_ATOMIMAX:
 642       return true;
 643    default:
 644       return false;
 645    }
 646 }
 647
 648 static unsigned
 649 num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
 650 {
 651    return op->info->num_dst;
 652 }
 653
 654 static unsigned
 655 num_inst_src_regs(const glsl_to_tgsi_instruction *op)
 656 {
 657    return op->info->is_tex || is_resource_instruction(op->op) ?
 658       op->info->num_src - 1 : op->info->num_src;
 659 }
 660
 661 glsl_to_tgsi_instruction *
 662 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
 663                                st_dst_reg dst, st_dst_reg dst1,
 664                                st_src_reg src0, st_src_reg src1,
 665                                st_src_reg src2, st_src_reg src3)
 666 {
 667    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
 668    int num_reladdr = 0, i, j;
 669    bool dst_is_64bit[2];
 670
 671    op = get_opcode(ir, op, dst, src0, src1);
 672
 673    /* If we have to do relative addressing, we want to load the ARL
 674     * reg directly for one of the regs, and preload the other reladdr
 675     * sources into temps.
 676     */
 677    num_reladdr += dst.reladdr != NULL || dst.reladdr2;
 678    num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
 679    num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
 680    num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
 681    num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
 682    num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
 683
 684    reladdr_to_temp(ir, &src3, &num_reladdr);
 685    reladdr_to_temp(ir, &src2, &num_reladdr);
 686    reladdr_to_temp(ir, &src1, &num_reladdr);
 687    reladdr_to_temp(ir, &src0, &num_reladdr);
 688
 689    if (dst.reladdr || dst.reladdr2) {
 690       if (dst.reladdr)
 691          emit_arl(ir, address_reg, *dst.reladdr);
 692       if (dst.reladdr2)
 693          emit_arl(ir, address_reg2, *dst.reladdr2);
 694       num_reladdr--;
 695    }
 696    if (dst1.reladdr) {
 697       emit_arl(ir, address_reg, *dst1.reladdr);
 698       num_reladdr--;
 699    }
 700    assert(num_reladdr == 0);
 701
 702    inst->op = op;
 703    inst->info = tgsi_get_opcode_info(op);
 704    inst->dst[0] = dst;
 705    inst->dst[1] = dst1;
 706    inst->src[0] = src0;
 707    inst->src[1] = src1;
 708    inst->src[2] = src2;
 709    inst->src[3] = src3;
 710    inst->is_64bit_expanded = false;
 711    inst->ir = ir;
 712    inst->dead_mask = 0;
 713    /* default to float, for paths where this is not initialized
 714     * (since 0==UINT which is likely wrong):
 715     */
 716    inst->tex_type = GLSL_TYPE_FLOAT;
 717
 718    inst->function = NULL;
 719
 720    /* Update indirect addressing status used by TGSI */
 721    if (dst.reladdr || dst.reladdr2) {
 722       switch(dst.file) {
 723       case PROGRAM_STATE_VAR:
 724       case PROGRAM_CONSTANT:
 725       case PROGRAM_UNIFORM:
 726          this->indirect_addr_consts = true;
 727          break;
 728       case PROGRAM_IMMEDIATE:
 729          assert(!"immediates should not have indirect addressing");
 730          break;
 731       default:
 732          break;
 733       }
 734    }
 735    else {
 736       for (i = 0; i < 4; i++) {
 737          if(inst->src[i].reladdr) {
 738             switch(inst->src[i].file) {
 739             case PROGRAM_STATE_VAR:
 740             case PROGRAM_CONSTANT:
 741             case PROGRAM_UNIFORM:
 742                this->indirect_addr_consts = true;
 743                break;
 744             case PROGRAM_IMMEDIATE:
 745                assert(!"immediates should not have indirect addressing");
 746                break;
 747             default:
 748                break;
 749             }
 750          }
 751       }
 752    }
 753
 754    /*
 755     * This section contains the double processing.
 756     * GLSL just represents doubles as single channel values,
 757     * however most HW and TGSI represent doubles as pairs of register channels.
 758     *
 759     * so we have to fixup destination writemask/index and src swizzle/indexes.
 760     * dest writemasks need to translate from single channel write mask
 761     * to a dual-channel writemask, but also need to modify the index,
 762     * if we are touching the Z,W fields in the pre-translated writemask.
 763     *
 764     * src channels have similiar index modifications along with swizzle
 765     * changes to we pick the XY, ZW pairs from the correct index.
 766     *
 767     * GLSL [0].x -> TGSI [0].xy
 768     * GLSL [0].y -> TGSI [0].zw
 769     * GLSL [0].z -> TGSI [1].xy
 770     * GLSL [0].w -> TGSI [1].zw
 771     */
 772    for (j = 0; j < 2; j++) {
 773       dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
 774       if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
 775          enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id);
 776          if (glsl_base_type_is_64bit(type))
 777             dst_is_64bit[j] = true;
 778       }
 779    }
 780
 781    if (dst_is_64bit[0] || dst_is_64bit[1] ||
 782        glsl_base_type_is_64bit(inst->src[0].type)) {
 783       glsl_to_tgsi_instruction *dinst = NULL;
 784       int initial_src_swz[4], initial_src_idx[4];
 785       int initial_dst_idx[2], initial_dst_writemask[2];
 786       /* select the writemask for dst0 or dst1 */
 787       unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
 788
 789       /* copy out the writemask, index and swizzles for all src/dsts. */
 790       for (j = 0; j < 2; j++) {
 791          initial_dst_writemask[j] = inst->dst[j].writemask;
 792          initial_dst_idx[j] = inst->dst[j].index;
 793       }
 794
 795       for (j = 0; j < 4; j++) {
 796          initial_src_swz[j] = inst->src[j].swizzle;
 797          initial_src_idx[j] = inst->src[j].index;
 798       }
 799
 800       /*
 801        * scan all the components in the dst writemask
 802        * generate an instruction for each of them if required.
 803        */
 804       st_src_reg addr;
 805       while (writemask) {
 806
 807          int i = u_bit_scan(&writemask);
 808
 809          /* before emitting the instruction, see if we have to adjust store
 810           * address */
 811          if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
 812              addr.file == PROGRAM_UNDEFINED) {
 813             /* We have to advance the buffer address by 16 */
 814             addr = get_temp(glsl_type::uint_type);
 815             emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
 816                      inst->src[0], st_src_reg_for_int(16));
 817          }
 818
 819
 820          /* first time use previous instruction */
 821          if (dinst == NULL) {
 822             dinst = inst;
 823          } else {
 824             /* create a new instructions for subsequent attempts */
 825             dinst = new(mem_ctx) glsl_to_tgsi_instruction();
 826             *dinst = *inst;
 827             dinst->next = NULL;
 828             dinst->prev = NULL;
 829          }
 830          this->instructions.push_tail(dinst);
 831          dinst->is_64bit_expanded = true;
 832
 833          /* modify the destination if we are splitting */
 834          for (j = 0; j < 2; j++) {
 835             if (dst_is_64bit[j]) {
 836                dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
 837                dinst->dst[j].index = initial_dst_idx[j];
 838                if (i > 1) {
 839                   if (dinst->op == TGSI_OPCODE_STORE) {
 840                      dinst->src[0] = addr;
 841                   } else {
 842                      dinst->dst[j].index++;
 843                   }
 844                }
 845             } else {
 846                /* if we aren't writing to a double, just get the bit of the initial writemask
 847                   for this channel */
 848                dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
 849             }
 850          }
 851
 852          /* modify the src registers */
 853          for (j = 0; j < 4; j++) {
 854             int swz = GET_SWZ(initial_src_swz[j], i);
 855
 856             if (glsl_base_type_is_64bit(dinst->src[j].type)) {
 857                dinst->src[j].index = initial_src_idx[j];
 858                if (swz > 1) {
 859                   dinst->src[j].double_reg2 = true;
 860                   dinst->src[j].index++;
 861                }
 862
 863                if (swz & 1)
 864                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
 865                else
 866                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
 867
 868             } else {
 869                /* some opcodes are special case in what they use as sources
 870                   - F2D is a float src0, DLDEXP is integer src1 */
 871                if (op == TGSI_OPCODE_F2D ||
 872                    op == TGSI_OPCODE_DLDEXP ||
 873                    (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
 874                   dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
 875                }
 876             }
 877          }
 878       }
 879       inst = dinst;
 880    } else {
 881       this->instructions.push_tail(inst);
 882    }
 883
 884
 885    return inst;
 886 }
 887
 888 glsl_to_tgsi_instruction *
 889 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
 890                                st_dst_reg dst,
 891                                st_src_reg src0, st_src_reg src1,
 892                                st_src_reg src2, st_src_reg src3)
 893 {
 894    return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
 895 }
 896
 897 /**
 898  * Determines whether to use an integer, unsigned integer, or float opcode
 899  * based on the operands and input opcode, then emits the result.
 900  */
 901 unsigned
 902 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
 903                                  st_dst_reg dst,
 904                                  st_src_reg src0, st_src_reg src1)
 905 {
 906    enum glsl_base_type type = GLSL_TYPE_FLOAT;
 907
 908    if (op == TGSI_OPCODE_MOV)
 909        return op;
 910
 911    assert(src0.type != GLSL_TYPE_ARRAY);
 912    assert(src0.type != GLSL_TYPE_STRUCT);
 913    assert(src1.type != GLSL_TYPE_ARRAY);
 914    assert(src1.type != GLSL_TYPE_STRUCT);
 915
 916    if (is_resource_instruction(op))
 917       type = src1.type;
 918    else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
 919       type = GLSL_TYPE_DOUBLE;
 920    else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
 921       type = GLSL_TYPE_FLOAT;
 922    else if (native_integers)
 923       type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 924
 925 #define case5(c, f, i, u, d)                    \
 926    case TGSI_OPCODE_##c: \
 927       if (type == GLSL_TYPE_DOUBLE)           \
 928          op = TGSI_OPCODE_##d; \
 929       else if (type == GLSL_TYPE_INT)       \
 930          op = TGSI_OPCODE_##i; \
 931       else if (type == GLSL_TYPE_UINT) \
 932          op = TGSI_OPCODE_##u; \
 933       else \
 934          op = TGSI_OPCODE_##f; \
 935       break;
 936
 937 #define case4(c, f, i, u)                    \
 938    case TGSI_OPCODE_##c: \
 939       if (type == GLSL_TYPE_INT) \
 940          op = TGSI_OPCODE_##i; \
 941       else if (type == GLSL_TYPE_UINT) \
 942          op = TGSI_OPCODE_##u; \
 943       else \
 944          op = TGSI_OPCODE_##f; \
 945       break;
 946
 947 #define case3(f, i, u)  case4(f, f, i, u)
 948 #define case4d(f, i, u, d)  case5(f, f, i, u, d)
 949 #define case3fid(f, i, d) case5(f, f, i, i, d)
 950 #define case2fi(f, i)   case4(f, f, i, i)
 951 #define case2iu(i, u)   case4(i, LAST, i, u)
 952
 953 #define casecomp(c, f, i, u, d)                   \
 954    case TGSI_OPCODE_##c: \
 955       if (type == GLSL_TYPE_DOUBLE) \
 956          op = TGSI_OPCODE_##d; \
 957       else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)       \
 958          op = TGSI_OPCODE_##i; \
 959       else if (type == GLSL_TYPE_UINT) \
 960          op = TGSI_OPCODE_##u; \
 961       else if (native_integers) \
 962          op = TGSI_OPCODE_##f; \
 963       else \
 964          op = TGSI_OPCODE_##c; \
 965       break;
 966
 967    switch(op) {
 968       case3fid(ADD, UADD, DADD);
 969       case3fid(MUL, UMUL, DMUL);
 970       case3fid(MAD, UMAD, DMAD);
 971       case3fid(FMA, UMAD, DFMA);
 972       case3(DIV, IDIV, UDIV);
 973       case4d(MAX, IMAX, UMAX, DMAX);
 974       case4d(MIN, IMIN, UMIN, DMIN);
 975       case2iu(MOD, UMOD);
 976
 977       casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
 978       casecomp(SNE, FSNE, USNE, USNE, DSNE);
 979       casecomp(SGE, FSGE, ISGE, USGE, DSGE);
 980       casecomp(SLT, FSLT, ISLT, USLT, DSLT);
 981
 982       case2iu(ISHR, USHR);
 983
 984       case3fid(SSG, ISSG, DSSG);
 985       case3fid(ABS, IABS, DABS);
 986
 987       case2iu(IBFE, UBFE);
 988       case2iu(IMSB, UMSB);
 989       case2iu(IMUL_HI, UMUL_HI);
 990
 991       case3fid(SQRT, SQRT, DSQRT);
 992
 993       case3fid(RCP, RCP, DRCP);
 994       case3fid(RSQ, RSQ, DRSQ);
 995
 996       case3fid(FRC, FRC, DFRAC);
 997       case3fid(TRUNC, TRUNC, DTRUNC);
 998       case3fid(CEIL, CEIL, DCEIL);
 999       case3fid(FLR, FLR, DFLR);
1000       case3fid(ROUND, ROUND, DROUND);
1001
1002       case2iu(ATOMIMAX, ATOMUMAX);
1003       case2iu(ATOMIMIN, ATOMUMIN);
1004
1005       default: break;
1006    }
1007
1008    assert(op != TGSI_OPCODE_LAST);
1009    return op;
1010 }
1011
1012 glsl_to_tgsi_instruction *
1013 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
1014                               st_dst_reg dst, st_src_reg src0, st_src_reg src1,
1015                               unsigned elements)
1016 {
1017    static const unsigned dot_opcodes[] = {
1018       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
1019    };
1020
1021    return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
1022 }
1023
1024 /**
1025  * Emits TGSI scalar opcodes to produce unique answers across channels.
1026  *
1027  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
1028  * channel determines the result across all channels.  So to do a vec4
1029  * of this operation, we want to emit a scalar per source channel used
1030  * to produce dest channels.
1031  */
1032 void
1033 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
1034                                   st_dst_reg dst,
1035                                   st_src_reg orig_src0, st_src_reg orig_src1)
1036 {
1037    int i, j;
1038    int done_mask = ~dst.writemask;
1039
1040    /* TGSI RCP is a scalar operation splatting results to all channels,
1041     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
1042     * dst channels.
1043     */
1044    for (i = 0; i < 4; i++) {
1045       GLuint this_mask = (1 << i);
1046       st_src_reg src0 = orig_src0;
1047       st_src_reg src1 = orig_src1;
1048
1049       if (done_mask & this_mask)
1050          continue;
1051
1052       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
1053       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
1054       for (j = i + 1; j < 4; j++) {
1055          /* If there is another enabled component in the destination that is
1056           * derived from the same inputs, generate its value on this pass as
1057           * well.
1058           */
1059          if (!(done_mask & (1 << j)) &&
1060              GET_SWZ(src0.swizzle, j) == src0_swiz &&
1061              GET_SWZ(src1.swizzle, j) == src1_swiz) {
1062             this_mask |= (1 << j);
1063          }
1064       }
1065       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
1066                                    src0_swiz, src0_swiz);
1067       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
1068                                    src1_swiz, src1_swiz);
1069
1070       dst.writemask = this_mask;
1071       emit_asm(ir, op, dst, src0, src1);
1072       done_mask |= this_mask;
1073    }
1074 }
1075
1076 void
1077 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
1078                                   st_dst_reg dst, st_src_reg src0)
1079 {
1080    st_src_reg undef = undef_src;
1081
1082    undef.swizzle = SWIZZLE_XXXX;
1083
1084    emit_scalar(ir, op, dst, src0, undef);
1085 }
1086
1087 void
1088 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
1089                                st_dst_reg dst, st_src_reg src0)
1090 {
1091    int op = TGSI_OPCODE_ARL;
1092
1093    if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
1094       op = TGSI_OPCODE_UARL;
1095
1096    assert(dst.file == PROGRAM_ADDRESS);
1097    if (dst.index >= this->num_address_regs)
1098       this->num_address_regs = dst.index + 1;
1099
1100    emit_asm(NULL, op, dst, src0);
1101 }
1102
1103 int
1104 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
1105                                    gl_constant_value values[8], int size, int datatype,
1106                                    GLuint *swizzle_out)
1107 {
1108    if (file == PROGRAM_CONSTANT) {
1109       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
1110                                               size, datatype, swizzle_out);
1111    }
1112
1113    assert(file == PROGRAM_IMMEDIATE);
1114
1115    int index = 0;
1116    immediate_storage *entry;
1117    int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
1118    int i;
1119
1120    /* Search immediate storage to see if we already have an identical
1121     * immediate that we can use instead of adding a duplicate entry.
1122     */
1123    foreach_in_list(immediate_storage, entry, &this->immediates) {
1124       immediate_storage *tmp = entry;
1125
1126       for (i = 0; i * 4 < size32; i++) {
1127          int slot_size = MIN2(size32 - (i * 4), 4);
1128          if (tmp->type != datatype || tmp->size32 != slot_size)
1129             break;
1130          if (memcmp(tmp->values, &values[i * 4],
1131                     slot_size * sizeof(gl_constant_value)))
1132             break;
1133
1134          /* Everything matches, keep going until the full size is matched */
1135          tmp = (immediate_storage *)tmp->next;
1136       }
1137
1138       /* The full value matched */
1139       if (i * 4 >= size32)
1140          return index;
1141
1142       index++;
1143    }
1144
1145    for (i = 0; i * 4 < size32; i++) {
1146       int slot_size = MIN2(size32 - (i * 4), 4);
1147       /* Add this immediate to the list. */
1148       entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
1149       this->immediates.push_tail(entry);
1150       this->num_immediates++;
1151    }
1152    return index;
1153 }
1154
1155 st_src_reg
1156 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
1157 {
1158    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
1159    union gl_constant_value uval;
1160
1161    uval.f = val;
1162    src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
1163
1164    return src;
1165 }
1166
1167 st_src_reg
1168 glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
1169 {
1170    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
1171    union gl_constant_value uval[2];
1172
1173    memcpy(uval, &val, sizeof(uval));
1174    src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
1175    src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
1176    return src;
1177 }
1178
1179 st_src_reg
1180 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
1181 {
1182    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
1183    union gl_constant_value uval;
1184
1185    assert(native_integers);
1186
1187    uval.i = val;
1188    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
1189
1190    return src;
1191 }
1192
1193 st_src_reg
1194 glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
1195 {
1196    if (native_integers)
1197       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
1198                                        st_src_reg_for_int(val);
1199    else
1200       return st_src_reg_for_float(val);
1201 }
1202
1203 static int
1204 attrib_type_size(const struct glsl_type *type, bool is_vs_input)
1205 {
1206    return st_glsl_attrib_type_size(type, is_vs_input);
1207 }
1208
1209 static int
1210 type_size(const struct glsl_type *type)
1211 {
1212    return st_glsl_type_size(type);
1213 }
1214
1215 /**
1216  * If the given GLSL type is an array or matrix or a structure containing
1217  * an array/matrix member, return true.  Else return false.
1218  *
1219  * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY
1220  * or PROGRAM_ARRAY) should be used for variables of this type.  Anytime
1221  * we have an array that might be indexed with a variable, we need to use
1222  * the later storage type.
1223  */
1224 static bool
1225 type_has_array_or_matrix(const glsl_type *type)
1226 {
1227    if (type->is_array() || type->is_matrix())
1228       return true;
1229
1230    if (type->is_record()) {
1231       for (unsigned i = 0; i < type->length; i++) {
1232          if (type_has_array_or_matrix(type->fields.structure[i].type)) {
1233             return true;
1234          }
1235       }
1236    }
1237
1238    return false;
1239 }
1240
1241
1242 /**
1243  * In the initial pass of codegen, we assign temporary numbers to
1244  * intermediate results.  (not SSA -- variable assignments will reuse
1245  * storage).
1246  */
1247 st_src_reg
1248 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1249 {
1250    st_src_reg src;
1251
1252    src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1253    src.reladdr = NULL;
1254    src.negate = 0;
1255
1256    if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
1257       if (next_array >= max_num_arrays) {
1258          max_num_arrays += 32;
1259          array_sizes = (unsigned*)
1260             realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
1261       }
1262
1263       src.file = PROGRAM_ARRAY;
1264       src.index = next_array << 16 | 0x8000;
1265       array_sizes[next_array] = type_size(type);
1266       ++next_array;
1267
1268    } else {
1269       src.file = PROGRAM_TEMPORARY;
1270       src.index = next_temp;
1271       next_temp += type_size(type);
1272    }
1273
1274    if (type->is_array() || type->is_record()) {
1275       src.swizzle = SWIZZLE_NOOP;
1276    } else {
1277       src.swizzle = swizzle_for_size(type->vector_elements);
1278    }
1279
1280    return src;
1281 }
1282
1283 variable_storage *
1284 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1285 {
1286
1287    foreach_in_list(variable_storage, entry, &this->variables) {
1288       if (entry->var == var)
1289          return entry;
1290    }
1291
1292    return NULL;
1293 }
1294
1295 void
1296 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1297 {
1298    if (strcmp(ir->name, "gl_FragCoord") == 0) {
1299       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1300
1301       fp->OriginUpperLeft = ir->data.origin_upper_left;
1302       fp->PixelCenterInteger = ir->data.pixel_center_integer;
1303    }
1304
1305    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1306       unsigned int i;
1307       const ir_state_slot *const slots = ir->get_state_slots();
1308       assert(slots != NULL);
1309
1310       /* Check if this statevar's setup in the STATE file exactly
1311        * matches how we'll want to reference it as a
1312        * struct/array/whatever.  If not, then we need to move it into
1313        * temporary storage and hope that it'll get copy-propagated
1314        * out.
1315        */
1316       for (i = 0; i < ir->get_num_state_slots(); i++) {
1317          if (slots[i].swizzle != SWIZZLE_XYZW) {
1318             break;
1319          }
1320       }
1321
1322       variable_storage *storage;
1323       st_dst_reg dst;
1324       if (i == ir->get_num_state_slots()) {
1325          /* We'll set the index later. */
1326          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1327          this->variables.push_tail(storage);
1328
1329          dst = undef_dst;
1330       } else {
1331          /* The variable_storage constructor allocates slots based on the size
1332           * of the type.  However, this had better match the number of state
1333           * elements that we're going to copy into the new temporary.
1334           */
1335          assert((int) ir->get_num_state_slots() == type_size(ir->type));
1336
1337          dst = st_dst_reg(get_temp(ir->type));
1338
1339          storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
1340
1341          this->variables.push_tail(storage);
1342       }
1343
1344
1345       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
1346          int index = _mesa_add_state_reference(this->prog->Parameters,
1347                                                (gl_state_index *)slots[i].tokens);
1348
1349          if (storage->file == PROGRAM_STATE_VAR) {
1350             if (storage->index == -1) {
1351                storage->index = index;
1352             } else {
1353                assert(index == storage->index + (int)i);
1354             }
1355          } else {
1356             /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1357              * the data being moved since MOV does not care about the type of
1358              * data it is moving, and we don't want to declare registers with
1359              * array or struct types.
1360              */
1361             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1362             src.swizzle = slots[i].swizzle;
1363             emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
1364             /* even a float takes up a whole vec4 reg in a struct/array. */
1365             dst.index++;
1366          }
1367       }
1368
1369       if (storage->file == PROGRAM_TEMPORARY &&
1370           dst.index != storage->index + (int) ir->get_num_state_slots()) {
1371          fail_link(this->shader_program,
1372                   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1373                   ir->name, dst.index - storage->index,
1374                   type_size(ir->type));
1375       }
1376    }
1377 }
1378
1379 void
1380 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1381 {
1382    emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
1383
1384    visit_exec_list(&ir->body_instructions, this);
1385
1386    emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
1387 }
1388
1389 void
1390 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1391 {
1392    switch (ir->mode) {
1393    case ir_loop_jump::jump_break:
1394       emit_asm(NULL, TGSI_OPCODE_BRK);
1395       break;
1396    case ir_loop_jump::jump_continue:
1397       emit_asm(NULL, TGSI_OPCODE_CONT);
1398       break;
1399    }
1400 }
1401
1402
1403 void
1404 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1405 {
1406    assert(0);
1407    (void)ir;
1408 }
1409
1410 void
1411 glsl_to_tgsi_visitor::visit(ir_function *ir)
1412 {
1413    /* Ignore function bodies other than main() -- we shouldn't see calls to
1414     * them since they should all be inlined before we get to glsl_to_tgsi.
1415     */
1416    if (strcmp(ir->name, "main") == 0) {
1417       const ir_function_signature *sig;
1418       exec_list empty;
1419
1420       sig = ir->matching_signature(NULL, &empty, false);
1421
1422       assert(sig);
1423
1424       foreach_in_list(ir_instruction, ir, &sig->body) {
1425          ir->accept(this);
1426       }
1427    }
1428 }
1429
1430 bool
1431 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1432 {
1433    int nonmul_operand = 1 - mul_operand;
1434    st_src_reg a, b, c;
1435    st_dst_reg result_dst;
1436
1437    ir_expression *expr = ir->operands[mul_operand]->as_expression();
1438    if (!expr || expr->operation != ir_binop_mul)
1439       return false;
1440
1441    expr->operands[0]->accept(this);
1442    a = this->result;
1443    expr->operands[1]->accept(this);
1444    b = this->result;
1445    ir->operands[nonmul_operand]->accept(this);
1446    c = this->result;
1447
1448    this->result = get_temp(ir->type);
1449    result_dst = st_dst_reg(this->result);
1450    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1451    emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1452
1453    return true;
1454 }
1455
1456 /**
1457  * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1458  *
1459  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1460  * implemented using multiplication, and logical-or is implemented using
1461  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1462  * As result, the logical expression (a & !b) can be rewritten as:
1463  *
1464  *     - a * !b
1465  *     - a * (1 - b)
1466  *     - (a * 1) - (a * b)
1467  *     - a + -(a * b)
1468  *     - a + (a * -b)
1469  *
1470  * This final expression can be implemented as a single MAD(a, -b, a)
1471  * instruction.
1472  */
1473 bool
1474 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1475 {
1476    const int other_operand = 1 - try_operand;
1477    st_src_reg a, b;
1478
1479    ir_expression *expr = ir->operands[try_operand]->as_expression();
1480    if (!expr || expr->operation != ir_unop_logic_not)
1481       return false;
1482
1483    ir->operands[other_operand]->accept(this);
1484    a = this->result;
1485    expr->operands[0]->accept(this);
1486    b = this->result;
1487
1488    b.negate = ~b.negate;
1489
1490    this->result = get_temp(ir->type);
1491    emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1492
1493    return true;
1494 }
1495
1496 void
1497 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1498                                       st_src_reg *reg, int *num_reladdr)
1499 {
1500    if (!reg->reladdr && !reg->reladdr2)
1501       return;
1502
1503    if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
1504    if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
1505
1506    if (*num_reladdr != 1) {
1507       st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type);
1508
1509       emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1510       *reg = temp;
1511    }
1512
1513    (*num_reladdr)--;
1514 }
1515
1516 void
1517 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1518 {
1519    st_src_reg op[ARRAY_SIZE(ir->operands)];
1520
1521    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1522     */
1523    if (ir->operation == ir_binop_add) {
1524       if (try_emit_mad(ir, 1))
1525          return;
1526       if (try_emit_mad(ir, 0))
1527          return;
1528    }
1529
1530    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1531     */
1532    if (!native_integers && ir->operation == ir_binop_logic_and) {
1533       if (try_emit_mad_for_and_not(ir, 1))
1534          return;
1535       if (try_emit_mad_for_and_not(ir, 0))
1536          return;
1537    }
1538
1539    if (ir->operation == ir_quadop_vector)
1540       assert(!"ir_quadop_vector should have been lowered");
1541
1542    for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) {
1543       this->result.file = PROGRAM_UNDEFINED;
1544       ir->operands[operand]->accept(this);
1545       if (this->result.file == PROGRAM_UNDEFINED) {
1546          printf("Failed to get tree for expression operand:\n");
1547          ir->operands[operand]->print();
1548          printf("\n");
1549          exit(1);
1550       }
1551       op[operand] = this->result;
1552
1553       /* Matrix expression operands should have been broken down to vector
1554        * operations already.
1555        */
1556       assert(!ir->operands[operand]->type->is_matrix());
1557    }
1558
1559    visit_expression(ir, op);
1560 }
1561
1562 /* The non-recursive part of the expression visitor lives in a separate
1563  * function and should be prevented from being inlined, to avoid a stack
1564  * explosion when deeply nested expressions are visited.
1565  */
1566 void
1567 glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
1568 {
1569    st_src_reg result_src;
1570    st_dst_reg result_dst;
1571
1572    int vector_elements = ir->operands[0]->type->vector_elements;
1573    if (ir->operands[1]) {
1574       vector_elements = MAX2(vector_elements,
1575                              ir->operands[1]->type->vector_elements);
1576    }
1577
1578    this->result.file = PROGRAM_UNDEFINED;
1579
1580    /* Storage for our result.  Ideally for an assignment we'd be using
1581     * the actual storage for the result here, instead.
1582     */
1583    result_src = get_temp(ir->type);
1584    /* convenience for the emit functions below. */
1585    result_dst = st_dst_reg(result_src);
1586    /* Limit writes to the channels that will be used by result_src later.
1587     * This does limit this temp's use as a temporary for multi-instruction
1588     * sequences.
1589     */
1590    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1591
1592    switch (ir->operation) {
1593    case ir_unop_logic_not:
1594       if (result_dst.type != GLSL_TYPE_FLOAT)
1595          emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1596       else {
1597          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1598           * older GPUs implement SEQ using multiple instructions (i915 uses two
1599           * SGE instructions and a MUL instruction).  Since our logic values are
1600           * 0.0 and 1.0, 1-x also implements !x.
1601           */
1602          op[0].negate = ~op[0].negate;
1603          emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1604       }
1605       break;
1606    case ir_unop_neg:
1607       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1608          emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1609       else if (result_dst.type == GLSL_TYPE_DOUBLE)
1610          emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
1611       else {
1612          op[0].negate = ~op[0].negate;
1613          result_src = op[0];
1614       }
1615       break;
1616    case ir_unop_subroutine_to_int:
1617       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1618       break;
1619    case ir_unop_abs:
1620       emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1621       break;
1622    case ir_unop_sign:
1623       emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1624       break;
1625    case ir_unop_rcp:
1626       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1627       break;
1628
1629    case ir_unop_exp2:
1630       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1631       break;
1632    case ir_unop_exp:
1633    case ir_unop_log:
1634       assert(!"not reached: should be handled by ir_explog_to_explog2");
1635       break;
1636    case ir_unop_log2:
1637       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1638       break;
1639    case ir_unop_sin:
1640       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1641       break;
1642    case ir_unop_cos:
1643       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1644       break;
1645    case ir_unop_saturate: {
1646       glsl_to_tgsi_instruction *inst;
1647       inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1648       inst->saturate = true;
1649       break;
1650    }
1651
1652    case ir_unop_dFdx:
1653    case ir_unop_dFdx_coarse:
1654       emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1655       break;
1656    case ir_unop_dFdx_fine:
1657       emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
1658       break;
1659    case ir_unop_dFdy:
1660    case ir_unop_dFdy_coarse:
1661    case ir_unop_dFdy_fine:
1662    {
1663       /* The X component contains 1 or -1 depending on whether the framebuffer
1664        * is a FBO or the window system buffer, respectively.
1665        * It is then multiplied with the source operand of DDY.
1666        */
1667       static const gl_state_index transform_y_state[STATE_LENGTH]
1668          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1669
1670       unsigned transform_y_index =
1671          _mesa_add_state_reference(this->prog->Parameters,
1672                                    transform_y_state);
1673
1674       st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1675                                           transform_y_index,
1676                                           glsl_type::vec4_type);
1677       transform_y.swizzle = SWIZZLE_XXXX;
1678
1679       st_src_reg temp = get_temp(glsl_type::vec4_type);
1680
1681       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1682       emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
1683            TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
1684       break;
1685    }
1686
1687    case ir_unop_frexp_sig:
1688       emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
1689       break;
1690
1691    case ir_unop_frexp_exp:
1692       emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
1693       break;
1694
1695    case ir_unop_noise: {
1696       /* At some point, a motivated person could add a better
1697        * implementation of noise.  Currently not even the nvidia
1698        * binary drivers do anything more than this.  In any case, the
1699        * place to do this is in the GL state tracker, not the poor
1700        * driver.
1701        */
1702       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1703       break;
1704    }
1705
1706    case ir_binop_add:
1707       emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1708       break;
1709    case ir_binop_sub:
1710       emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1711       break;
1712
1713    case ir_binop_mul:
1714       emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1715       break;
1716    case ir_binop_div:
1717       if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
1718          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1719       else
1720          emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1721       break;
1722    case ir_binop_mod:
1723       if (result_dst.type == GLSL_TYPE_FLOAT)
1724          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1725       else
1726          emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1727       break;
1728
1729    case ir_binop_less:
1730       emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1731       break;
1732    case ir_binop_greater:
1733       emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1734       break;
1735    case ir_binop_lequal:
1736       emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1737       break;
1738    case ir_binop_gequal:
1739       emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1740       break;
1741    case ir_binop_equal:
1742       emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1743       break;
1744    case ir_binop_nequal:
1745       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1746       break;
1747    case ir_binop_all_equal:
1748       /* "==" operator producing a scalar boolean. */
1749       if (ir->operands[0]->type->is_vector() ||
1750           ir->operands[1]->type->is_vector()) {
1751          st_src_reg temp = get_temp(native_integers ?
1752                                     glsl_type::uvec4_type :
1753                                     glsl_type::vec4_type);
1754
1755          if (native_integers) {
1756             st_dst_reg temp_dst = st_dst_reg(temp);
1757             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1758
1759             if (ir->operands[0]->type->is_boolean() &&
1760                 ir->operands[1]->as_constant() &&
1761                 ir->operands[1]->as_constant()->is_one()) {
1762                emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1763             } else {
1764                emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1765             }
1766
1767             /* Emit 1-3 AND operations to combine the SEQ results. */
1768             switch (ir->operands[0]->type->vector_elements) {
1769             case 2:
1770                break;
1771             case 3:
1772                temp_dst.writemask = WRITEMASK_Y;
1773                temp1.swizzle = SWIZZLE_YYYY;
1774                temp2.swizzle = SWIZZLE_ZZZZ;
1775                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1776                break;
1777             case 4:
1778                temp_dst.writemask = WRITEMASK_X;
1779                temp1.swizzle = SWIZZLE_XXXX;
1780                temp2.swizzle = SWIZZLE_YYYY;
1781                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1782                temp_dst.writemask = WRITEMASK_Y;
1783                temp1.swizzle = SWIZZLE_ZZZZ;
1784                temp2.swizzle = SWIZZLE_WWWW;
1785                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1786             }
1787
1788             temp1.swizzle = SWIZZLE_XXXX;
1789             temp2.swizzle = SWIZZLE_YYYY;
1790             emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1791          } else {
1792             emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1793
1794             /* After the dot-product, the value will be an integer on the
1795              * range [0,4].  Zero becomes 1.0, and positive values become zero.
1796              */
1797             emit_dp(ir, result_dst, temp, temp, vector_elements);
1798
1799             /* Negating the result of the dot-product gives values on the range
1800              * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1801              * This is achieved using SGE.
1802              */
1803             st_src_reg sge_src = result_src;
1804             sge_src.negate = ~sge_src.negate;
1805             emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1806          }
1807       } else {
1808          emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1809       }
1810       break;
1811    case ir_binop_any_nequal:
1812       /* "!=" operator producing a scalar boolean. */
1813       if (ir->operands[0]->type->is_vector() ||
1814           ir->operands[1]->type->is_vector()) {
1815          st_src_reg temp = get_temp(native_integers ?
1816                                     glsl_type::uvec4_type :
1817                                     glsl_type::vec4_type);
1818          if (ir->operands[0]->type->is_boolean() &&
1819              ir->operands[1]->as_constant() &&
1820              ir->operands[1]->as_constant()->is_zero()) {
1821             emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1822          } else {
1823             emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1824          }
1825
1826          if (native_integers) {
1827             st_dst_reg temp_dst = st_dst_reg(temp);
1828             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1829
1830             /* Emit 1-3 OR operations to combine the SNE results. */
1831             switch (ir->operands[0]->type->vector_elements) {
1832             case 2:
1833                break;
1834             case 3:
1835                temp_dst.writemask = WRITEMASK_Y;
1836                temp1.swizzle = SWIZZLE_YYYY;
1837                temp2.swizzle = SWIZZLE_ZZZZ;
1838                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1839                break;
1840             case 4:
1841                temp_dst.writemask = WRITEMASK_X;
1842                temp1.swizzle = SWIZZLE_XXXX;
1843                temp2.swizzle = SWIZZLE_YYYY;
1844                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1845                temp_dst.writemask = WRITEMASK_Y;
1846                temp1.swizzle = SWIZZLE_ZZZZ;
1847                temp2.swizzle = SWIZZLE_WWWW;
1848                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1849             }
1850
1851             temp1.swizzle = SWIZZLE_XXXX;
1852             temp2.swizzle = SWIZZLE_YYYY;
1853             emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1854          } else {
1855             /* After the dot-product, the value will be an integer on the
1856              * range [0,4].  Zero stays zero, and positive values become 1.0.
1857              */
1858             glsl_to_tgsi_instruction *const dp =
1859                   emit_dp(ir, result_dst, temp, temp, vector_elements);
1860             if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1861                /* The clamping to [0,1] can be done for free in the fragment
1862                 * shader with a saturate.
1863                 */
1864                dp->saturate = true;
1865             } else {
1866                /* Negating the result of the dot-product gives values on the range
1867                 * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1868                 * achieved using SLT.
1869                 */
1870                st_src_reg slt_src = result_src;
1871                slt_src.negate = ~slt_src.negate;
1872                emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1873             }
1874          }
1875       } else {
1876          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1877       }
1878       break;
1879
1880    case ir_binop_logic_xor:
1881       if (native_integers)
1882          emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1883       else
1884          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1885       break;
1886
1887    case ir_binop_logic_or: {
1888       if (native_integers) {
1889          /* If integers are used as booleans, we can use an actual "or"
1890           * instruction.
1891           */
1892          assert(native_integers);
1893          emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1894       } else {
1895          /* After the addition, the value will be an integer on the
1896           * range [0,2].  Zero stays zero, and positive values become 1.0.
1897           */
1898          glsl_to_tgsi_instruction *add =
1899             emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1900          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1901             /* The clamping to [0,1] can be done for free in the fragment
1902              * shader with a saturate if floats are being used as boolean values.
1903              */
1904             add->saturate = true;
1905          } else {
1906             /* Negating the result of the addition gives values on the range
1907              * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1908              * is achieved using SLT.
1909              */
1910             st_src_reg slt_src = result_src;
1911             slt_src.negate = ~slt_src.negate;
1912             emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1913          }
1914       }
1915       break;
1916    }
1917
1918    case ir_binop_logic_and:
1919       /* If native integers are disabled, the bool args are stored as float 0.0
1920        * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
1921        * actual AND opcode.
1922        */
1923       if (native_integers)
1924          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1925       else
1926          emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1927       break;
1928
1929    case ir_binop_dot:
1930       assert(ir->operands[0]->type->is_vector());
1931       assert(ir->operands[0]->type == ir->operands[1]->type);
1932       emit_dp(ir, result_dst, op[0], op[1],
1933               ir->operands[0]->type->vector_elements);
1934       break;
1935
1936    case ir_unop_sqrt:
1937       if (have_sqrt) {
1938          emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1939       } else {
1940          /* This is the only instruction sequence that makes the game "Risen"
1941           * render correctly. ABS is not required for the game, but since GLSL
1942           * declares negative values as "undefined", allowing us to do whatever
1943           * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ
1944           * behavior.
1945           */
1946          emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1947          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src);
1948          emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src);
1949       }
1950       break;
1951    case ir_unop_rsq:
1952       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1953       break;
1954    case ir_unop_i2f:
1955       if (native_integers) {
1956          emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1957          break;
1958       }
1959       /* fallthrough to next case otherwise */
1960    case ir_unop_b2f:
1961       if (native_integers) {
1962          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1963          break;
1964       }
1965       /* fallthrough to next case otherwise */
1966    case ir_unop_i2u:
1967    case ir_unop_u2i:
1968       /* Converting between signed and unsigned integers is a no-op. */
1969       result_src = op[0];
1970       result_src.type = result_dst.type;
1971       break;
1972    case ir_unop_b2i:
1973       if (native_integers) {
1974          /* Booleans are stored as integers using ~0 for true and 0 for false.
1975           * GLSL requires that int(bool) return 1 for true and 0 for false.
1976           * This conversion is done with AND, but it could be done with NEG.
1977           */
1978          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1979       } else {
1980          /* Booleans and integers are both stored as floats when native
1981           * integers are disabled.
1982           */
1983          result_src = op[0];
1984       }
1985       break;
1986    case ir_unop_f2i:
1987       if (native_integers)
1988          emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1989       else
1990          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1991       break;
1992    case ir_unop_f2u:
1993       if (native_integers)
1994          emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
1995       else
1996          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1997       break;
1998    case ir_unop_bitcast_f2i:
1999    case ir_unop_bitcast_f2u:
2000       /* Make sure we don't propagate the negate modifier to integer opcodes. */
2001       if (op[0].negate)
2002          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2003       else
2004          result_src = op[0];
2005       result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT :
2006                                                                GLSL_TYPE_UINT;
2007       break;
2008    case ir_unop_bitcast_i2f:
2009    case ir_unop_bitcast_u2f:
2010       result_src = op[0];
2011       result_src.type = GLSL_TYPE_FLOAT;
2012       break;
2013    case ir_unop_f2b:
2014       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
2015       break;
2016    case ir_unop_d2b:
2017       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
2018       break;
2019    case ir_unop_i2b:
2020       if (native_integers)
2021          emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
2022       else
2023          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
2024       break;
2025    case ir_unop_trunc:
2026       emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
2027       break;
2028    case ir_unop_ceil:
2029       emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
2030       break;
2031    case ir_unop_floor:
2032       emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
2033       break;
2034    case ir_unop_round_even:
2035       emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
2036       break;
2037    case ir_unop_fract:
2038       emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
2039       break;
2040
2041    case ir_binop_min:
2042       emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
2043       break;
2044    case ir_binop_max:
2045       emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
2046       break;
2047    case ir_binop_pow:
2048       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
2049       break;
2050
2051    case ir_unop_bit_not:
2052       if (native_integers) {
2053          emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
2054          break;
2055       }
2056    case ir_unop_u2f:
2057       if (native_integers) {
2058          emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
2059          break;
2060       }
2061    case ir_binop_lshift:
2062       if (native_integers) {
2063          emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
2064          break;
2065       }
2066    case ir_binop_rshift:
2067       if (native_integers) {
2068          emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
2069          break;
2070       }
2071    case ir_binop_bit_and:
2072       if (native_integers) {
2073          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
2074          break;
2075       }
2076    case ir_binop_bit_xor:
2077       if (native_integers) {
2078          emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
2079          break;
2080       }
2081    case ir_binop_bit_or:
2082       if (native_integers) {
2083          emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
2084          break;
2085       }
2086
2087       assert(!"GLSL 1.30 features unsupported");
2088       break;
2089
2090    case ir_binop_ubo_load: {
2091       ir_constant *const_uniform_block = ir->operands[0]->as_constant();
2092       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
2093       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
2094       unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
2095       st_src_reg index_reg = get_temp(glsl_type::uint_type);
2096       st_src_reg cbuf;
2097
2098       cbuf.type = ir->type->base_type;
2099       cbuf.file = PROGRAM_CONSTANT;
2100       cbuf.index = 0;
2101       cbuf.reladdr = NULL;
2102       cbuf.negate = 0;
2103
2104       assert(ir->type->is_vector() || ir->type->is_scalar());
2105
2106       if (const_offset_ir) {
2107          /* Constant index into constant buffer */
2108          cbuf.reladdr = NULL;
2109          cbuf.index = const_offset / 16;
2110       }
2111       else {
2112          /* Relative/variable index into constant buffer */
2113          emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
2114               st_src_reg_for_int(4));
2115          cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
2116          memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
2117       }
2118
2119       if (const_uniform_block) {
2120          /* Constant constant buffer */
2121          cbuf.reladdr2 = NULL;
2122          cbuf.index2D = const_block;
2123          cbuf.has_index2 = true;
2124       }
2125       else {
2126          /* Relative/variable constant buffer */
2127          cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
2128          cbuf.index2D = 1;
2129          memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
2130          cbuf.has_index2 = true;
2131       }
2132
2133       cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
2134       if (glsl_base_type_is_64bit(cbuf.type))
2135          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
2136                                        const_offset % 16 / 8,
2137                                        const_offset % 16 / 8,
2138                                        const_offset % 16 / 8);
2139       else
2140          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
2141                                        const_offset % 16 / 4,
2142                                        const_offset % 16 / 4,
2143                                        const_offset % 16 / 4);
2144
2145       if (ir->type->base_type == GLSL_TYPE_BOOL) {
2146          emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
2147       } else {
2148          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
2149       }
2150       break;
2151    }
2152    case ir_triop_lrp:
2153       /* note: we have to reorder the three args here */
2154       emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
2155       break;
2156    case ir_triop_csel:
2157       if (this->ctx->Const.NativeIntegers)
2158          emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
2159       else {
2160          op[0].negate = ~op[0].negate;
2161          emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
2162       }
2163       break;
2164    case ir_triop_bitfield_extract:
2165       emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
2166       break;
2167    case ir_quadop_bitfield_insert:
2168       emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
2169       break;
2170    case ir_unop_bitfield_reverse:
2171       emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
2172       break;
2173    case ir_unop_bit_count:
2174       emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
2175       break;
2176    case ir_unop_find_msb:
2177       emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
2178       break;
2179    case ir_unop_find_lsb:
2180       emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
2181       break;
2182    case ir_binop_imul_high:
2183       emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
2184       break;
2185    case ir_triop_fma:
2186       /* In theory, MAD is incorrect here. */
2187       if (have_fma)
2188          emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
2189       else
2190          emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
2191       break;
2192    case ir_unop_interpolate_at_centroid:
2193       emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
2194       break;
2195    case ir_binop_interpolate_at_offset: {
2196       /* The y coordinate needs to be flipped for the default fb */
2197       static const gl_state_index transform_y_state[STATE_LENGTH]
2198          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
2199
2200       unsigned transform_y_index =
2201          _mesa_add_state_reference(this->prog->Parameters,
2202                                    transform_y_state);
2203
2204       st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
2205                                           transform_y_index,
2206                                           glsl_type::vec4_type);
2207       transform_y.swizzle = SWIZZLE_XXXX;
2208
2209       st_src_reg temp = get_temp(glsl_type::vec2_type);
2210       st_dst_reg temp_dst = st_dst_reg(temp);
2211
2212       emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]);
2213       temp_dst.writemask = WRITEMASK_Y;
2214       emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]);
2215       emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp);
2216       break;
2217    }
2218    case ir_binop_interpolate_at_sample:
2219       emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
2220       break;
2221
2222    case ir_unop_d2f:
2223       emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
2224       break;
2225    case ir_unop_f2d:
2226       emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
2227       break;
2228    case ir_unop_d2i:
2229       emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
2230       break;
2231    case ir_unop_i2d:
2232       emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
2233       break;
2234    case ir_unop_d2u:
2235       emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
2236       break;
2237    case ir_unop_u2d:
2238       emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
2239       break;
2240    case ir_unop_unpack_double_2x32:
2241    case ir_unop_pack_double_2x32:
2242       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2243       break;
2244
2245    case ir_binop_ldexp:
2246       if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
2247          emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
2248       } else {
2249          assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
2250       }
2251       break;
2252
2253    case ir_unop_pack_half_2x16:
2254       emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]);
2255       break;
2256    case ir_unop_unpack_half_2x16:
2257       emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
2258       break;
2259
2260    case ir_unop_get_buffer_size: {
2261       ir_constant *const_offset = ir->operands[0]->as_constant();
2262       st_src_reg buffer(
2263             PROGRAM_BUFFER,
2264             ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
2265             (const_offset ? const_offset->value.u[0] : 0),
2266             GLSL_TYPE_UINT);
2267       if (!const_offset) {
2268          buffer.reladdr = ralloc(mem_ctx, st_src_reg);
2269          *buffer.reladdr = op[0];
2270          emit_arl(ir, sampler_reladdr, op[0]);
2271       }
2272       emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
2273       break;
2274    }
2275
2276    case ir_unop_vote_any:
2277       emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]);
2278       break;
2279    case ir_unop_vote_all:
2280       emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]);
2281       break;
2282    case ir_unop_vote_eq:
2283       emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
2284       break;
2285
2286    case ir_unop_pack_snorm_2x16:
2287    case ir_unop_pack_unorm_2x16:
2288    case ir_unop_pack_snorm_4x8:
2289    case ir_unop_pack_unorm_4x8:
2290
2291    case ir_unop_unpack_snorm_2x16:
2292    case ir_unop_unpack_unorm_2x16:
2293    case ir_unop_unpack_snorm_4x8:
2294    case ir_unop_unpack_unorm_4x8:
2295
2296    case ir_quadop_vector:
2297    case ir_binop_vector_extract:
2298    case ir_triop_vector_insert:
2299    case ir_binop_carry:
2300    case ir_binop_borrow:
2301    case ir_unop_ssbo_unsized_array_length:
2302       /* This operation is not supported, or should have already been handled.
2303        */
2304       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
2305       break;
2306    }
2307
2308    this->result = result_src;
2309 }
2310
2311
2312 void
2313 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
2314 {
2315    st_src_reg src;
2316    int i;
2317    int swizzle[4];
2318
2319    /* Note that this is only swizzles in expressions, not those on the left
2320     * hand side of an assignment, which do write masking.  See ir_assignment
2321     * for that.
2322     */
2323
2324    ir->val->accept(this);
2325    src = this->result;
2326    assert(src.file != PROGRAM_UNDEFINED);
2327    assert(ir->type->vector_elements > 0);
2328
2329    for (i = 0; i < 4; i++) {
2330       if (i < ir->type->vector_elements) {
2331          switch (i) {
2332          case 0:
2333             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2334             break;
2335          case 1:
2336             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2337             break;
2338          case 2:
2339             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2340             break;
2341          case 3:
2342             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2343             break;
2344          }
2345       } else {
2346          /* If the type is smaller than a vec4, replicate the last
2347           * channel out.
2348           */
2349          swizzle[i] = swizzle[ir->type->vector_elements - 1];
2350       }
2351    }
2352
2353    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2354
2355    this->result = src;
2356 }
2357
2358 /* Test if the variable is an array. Note that geometry and
2359  * tessellation shader inputs are outputs are always arrays (except
2360  * for patch inputs), so only the array element type is considered.
2361  */
2362 static bool
2363 is_inout_array(unsigned stage, ir_variable *var, bool *remove_array)
2364 {
2365    const glsl_type *type = var->type;
2366
2367    *remove_array = false;
2368
2369    if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
2370        (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
2371       return false;
2372
2373    if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
2374         (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
2375         stage == MESA_SHADER_TESS_CTRL) &&
2376        !var->data.patch) {
2377       if (!var->type->is_array())
2378          return false; /* a system value probably */
2379
2380       type = var->type->fields.array;
2381       *remove_array = true;
2382    }
2383
2384    return type->is_array() || type->is_matrix();
2385 }
2386
2387 void
2388 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2389 {
2390    variable_storage *entry = find_variable_storage(ir->var);
2391    ir_variable *var = ir->var;
2392    bool remove_array;
2393
2394    if (!entry) {
2395       switch (var->data.mode) {
2396       case ir_var_uniform:
2397          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2398                                                var->data.param_index);
2399          this->variables.push_tail(entry);
2400          break;
2401       case ir_var_shader_in: {
2402          /* The linker assigns locations for varyings and attributes,
2403           * including deprecated builtins (like gl_Color), user-assign
2404           * generic attributes (glBindVertexLocation), and
2405           * user-defined varyings.
2406           */
2407          assert(var->data.location != -1);
2408
2409          const glsl_type *type_without_array = var->type->without_array();
2410          struct inout_decl *decl = &inputs[num_inputs];
2411          unsigned component = var->data.location_frac;
2412          unsigned num_components;
2413          num_inputs++;
2414
2415          if (type_without_array->is_64bit())
2416             component = component / 2;
2417          if (type_without_array->vector_elements)
2418             num_components = type_without_array->vector_elements;
2419          else
2420             num_components = 4;
2421
2422          decl->mesa_index = var->data.location;
2423          decl->base_type = type_without_array->base_type;
2424          decl->usage_mask = u_bit_consecutive(component, num_components);
2425
2426          if (is_inout_array(shader->Stage, var, &remove_array)) {
2427             decl->array_id = num_input_arrays + 1;
2428             num_input_arrays++;
2429          } else {
2430             decl->array_id = 0;
2431          }
2432
2433          if (remove_array)
2434             decl->size = type_size(var->type->fields.array);
2435          else
2436             decl->size = type_size(var->type);
2437
2438          entry = new(mem_ctx) variable_storage(var,
2439                                                PROGRAM_INPUT,
2440                                                decl->mesa_index,
2441                                                decl->array_id);
2442          entry->component = component;
2443
2444          this->variables.push_tail(entry);
2445          break;
2446       }
2447       case ir_var_shader_out: {
2448          assert(var->data.location != -1);
2449
2450          const glsl_type *type_without_array = var->type->without_array();
2451          struct inout_decl *decl = &outputs[num_outputs];
2452          unsigned component = var->data.location_frac;
2453          unsigned num_components;
2454          num_outputs++;
2455
2456          if (type_without_array->is_64bit())
2457             component = component / 2;
2458          if (type_without_array->vector_elements)
2459             num_components = type_without_array->vector_elements;
2460          else
2461             num_components = 4;
2462
2463          decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index;
2464          decl->base_type = type_without_array->base_type;
2465          decl->usage_mask = u_bit_consecutive(component, num_components);
2466
2467          if (is_inout_array(shader->Stage, var, &remove_array)) {
2468             decl->array_id = num_output_arrays + 1;
2469             num_output_arrays++;
2470          } else {
2471             decl->array_id = 0;
2472          }
2473
2474          if (remove_array)
2475             decl->size = type_size(var->type->fields.array);
2476          else
2477             decl->size = type_size(var->type);
2478
2479          entry = new(mem_ctx) variable_storage(var,
2480                                                PROGRAM_OUTPUT,
2481                                                decl->mesa_index,
2482                                                decl->array_id);
2483          entry->component = component;
2484
2485          this->variables.push_tail(entry);
2486          break;
2487       }
2488       case ir_var_system_value:
2489          entry = new(mem_ctx) variable_storage(var,
2490                                                PROGRAM_SYSTEM_VALUE,
2491                                                var->data.location);
2492          break;
2493       case ir_var_auto:
2494       case ir_var_temporary:
2495          st_src_reg src = get_temp(var->type);
2496
2497          entry = new(mem_ctx) variable_storage(var, src.file, src.index);
2498          this->variables.push_tail(entry);
2499
2500          break;
2501       }
2502
2503       if (!entry) {
2504          printf("Failed to make storage for %s\n", var->name);
2505          exit(1);
2506       }
2507    }
2508
2509    this->result = st_src_reg(entry->file, entry->index, var->type, entry->component);
2510    this->result.array_id = entry->array_id;
2511    if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double())
2512       this->result.is_double_vertex_input = true;
2513    if (!native_integers)
2514       this->result.type = GLSL_TYPE_FLOAT;
2515 }
2516
2517 static void
2518 shrink_array_declarations(struct inout_decl *decls, unsigned count,
2519                           GLbitfield64* usage_mask,
2520                           GLbitfield64 double_usage_mask,
2521                           GLbitfield* patch_usage_mask)
2522 {
2523    unsigned i;
2524    int j;
2525
2526    /* Fix array declarations by removing unused array elements at both ends
2527     * of the arrays. For example, mat4[3] where only mat[1] is used.
2528     */
2529    for (i = 0; i < count; i++) {
2530       struct inout_decl *decl = &decls[i];
2531       if (!decl->array_id)
2532          continue;
2533
2534       /* Shrink the beginning. */
2535       for (j = 0; j < (int)decl->size; j++) {
2536          if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2537             if (*patch_usage_mask &
2538                 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2539                break;
2540          }
2541          else {
2542             if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2543                break;
2544             if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2545                break;
2546          }
2547
2548          decl->mesa_index++;
2549          decl->size--;
2550          j--;
2551       }
2552
2553       /* Shrink the end. */
2554       for (j = decl->size-1; j >= 0; j--) {
2555          if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2556             if (*patch_usage_mask &
2557                 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2558                break;
2559          }
2560          else {
2561             if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2562                break;
2563             if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2564                break;
2565          }
2566
2567          decl->size--;
2568       }
2569
2570       /* When not all entries of an array are accessed, we mark them as used
2571        * here anyway, to ensure that the input/output mapping logic doesn't get
2572        * confused.
2573        *
2574        * TODO This happens when an array isn't used via indirect access, which
2575        * some game ports do (at least eON-based). There is an optimization
2576        * opportunity here by replacing the array declaration with non-array
2577        * declarations of those slots that are actually used.
2578        */
2579       for (j = 1; j < (int)decl->size; ++j) {
2580          if (decl->mesa_index >= VARYING_SLOT_PATCH0)
2581             *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j);
2582          else
2583             *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j);
2584       }
2585    }
2586 }
2587
2588 void
2589 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2590 {
2591    ir_constant *index;
2592    st_src_reg src;
2593    int element_size = type_size(ir->type);
2594    bool is_2D = false;
2595
2596    index = ir->array_index->constant_expression_value();
2597
2598    ir->array->accept(this);
2599    src = this->result;
2600
2601    if (ir->array->ir_type != ir_type_dereference_array) {
2602       switch (this->prog->Target) {
2603       case GL_TESS_CONTROL_PROGRAM_NV:
2604          is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
2605                  !ir->variable_referenced()->data.patch;
2606          break;
2607       case GL_TESS_EVALUATION_PROGRAM_NV:
2608          is_2D = src.file == PROGRAM_INPUT &&
2609                  !ir->variable_referenced()->data.patch;
2610          break;
2611       case GL_GEOMETRY_PROGRAM_NV:
2612          is_2D = src.file == PROGRAM_INPUT;
2613          break;
2614       }
2615    }
2616
2617    if (is_2D)
2618       element_size = 1;
2619
2620    if (index) {
2621
2622       if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
2623           src.file == PROGRAM_INPUT)
2624          element_size = attrib_type_size(ir->type, true);
2625       if (is_2D) {
2626          src.index2D = index->value.i[0];
2627          src.has_index2 = true;
2628       } else
2629          src.index += index->value.i[0] * element_size;
2630    } else {
2631       /* Variable index array dereference.  It eats the "vec4" of the
2632        * base of the array and an index that offsets the TGSI register
2633        * index.
2634        */
2635       ir->array_index->accept(this);
2636
2637       st_src_reg index_reg;
2638
2639       if (element_size == 1) {
2640          index_reg = this->result;
2641       } else {
2642          index_reg = get_temp(native_integers ?
2643                               glsl_type::int_type : glsl_type::float_type);
2644
2645          emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2646               this->result, st_src_reg_for_type(index_reg.type, element_size));
2647       }
2648
2649       /* If there was already a relative address register involved, add the
2650        * new and the old together to get the new offset.
2651        */
2652       if (!is_2D && src.reladdr != NULL) {
2653          st_src_reg accum_reg = get_temp(native_integers ?
2654                                 glsl_type::int_type : glsl_type::float_type);
2655
2656          emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2657               index_reg, *src.reladdr);
2658
2659          index_reg = accum_reg;
2660       }
2661
2662       if (is_2D) {
2663          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
2664          memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
2665          src.index2D = 0;
2666          src.has_index2 = true;
2667       } else {
2668          src.reladdr = ralloc(mem_ctx, st_src_reg);
2669          memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2670       }
2671    }
2672
2673    /* Change the register type to the element type of the array. */
2674    src.type = ir->type->base_type;
2675
2676    this->result = src;
2677 }
2678
2679 void
2680 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2681 {
2682    unsigned int i;
2683    const glsl_type *struct_type = ir->record->type;
2684    int offset = 0;
2685
2686    ir->record->accept(this);
2687
2688    for (i = 0; i < struct_type->length; i++) {
2689       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2690          break;
2691       offset += type_size(struct_type->fields.structure[i].type);
2692    }
2693
2694    /* If the type is smaller than a vec4, replicate the last channel out. */
2695    if (ir->type->is_scalar() || ir->type->is_vector())
2696       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2697    else
2698       this->result.swizzle = SWIZZLE_NOOP;
2699
2700    this->result.index += offset;
2701    this->result.type = ir->type->base_type;
2702 }
2703
2704 /**
2705  * We want to be careful in assignment setup to hit the actual storage
2706  * instead of potentially using a temporary like we might with the
2707  * ir_dereference handler.
2708  */
2709 static st_dst_reg
2710 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component)
2711 {
2712    /* The LHS must be a dereference.  If the LHS is a variable indexed array
2713     * access of a vector, it must be separated into a series conditional moves
2714     * before reaching this point (see ir_vec_index_to_cond_assign).
2715     */
2716    assert(ir->as_dereference());
2717    ir_dereference_array *deref_array = ir->as_dereference_array();
2718    if (deref_array) {
2719       assert(!deref_array->array->type->is_vector());
2720    }
2721
2722    /* Use the rvalue deref handler for the most part.  We write swizzles using
2723     * the writemask, but we do extract the base component for enhanced layouts
2724     * from the source swizzle.
2725     */
2726    ir->accept(v);
2727    *component = GET_SWZ(v->result.swizzle, 0);
2728    return st_dst_reg(v->result);
2729 }
2730
2731 /**
2732  * Process the condition of a conditional assignment
2733  *
2734  * Examines the condition of a conditional assignment to generate the optimal
2735  * first operand of a \c CMP instruction.  If the condition is a relational
2736  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2737  * used as the source for the \c CMP instruction.  Otherwise the comparison
2738  * is processed to a boolean result, and the boolean result is used as the
2739  * operand to the CMP instruction.
2740  */
2741 bool
2742 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2743 {
2744    ir_rvalue *src_ir = ir;
2745    bool negate = true;
2746    bool switch_order = false;
2747
2748    ir_expression *const expr = ir->as_expression();
2749
2750    if (native_integers) {
2751       if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2752          enum glsl_base_type type = expr->operands[0]->type->base_type;
2753          if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
2754              type == GLSL_TYPE_BOOL) {
2755             if (expr->operation == ir_binop_equal) {
2756                if (expr->operands[0]->is_zero()) {
2757                   src_ir = expr->operands[1];
2758                   switch_order = true;
2759                }
2760                else if (expr->operands[1]->is_zero()) {
2761                   src_ir = expr->operands[0];
2762                   switch_order = true;
2763                }
2764             }
2765             else if (expr->operation == ir_binop_nequal) {
2766                if (expr->operands[0]->is_zero()) {
2767                   src_ir = expr->operands[1];
2768                }
2769                else if (expr->operands[1]->is_zero()) {
2770                   src_ir = expr->operands[0];
2771                }
2772             }
2773          }
2774       }
2775
2776       src_ir->accept(this);
2777       return switch_order;
2778    }
2779
2780    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2781       bool zero_on_left = false;
2782
2783       if (expr->operands[0]->is_zero()) {
2784          src_ir = expr->operands[1];
2785          zero_on_left = true;
2786       } else if (expr->operands[1]->is_zero()) {
2787          src_ir = expr->operands[0];
2788          zero_on_left = false;
2789       }
2790
2791       /*      a is -  0  +            -  0  +
2792        * (a <  0)  T  F  F  ( a < 0)  T  F  F
2793        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2794        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2795        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2796        * (a >  0)  F  F  T  (-a < 0)  F  F  T
2797        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2798        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2799        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2800        *
2801        * Note that exchanging the order of 0 and 'a' in the comparison simply
2802        * means that the value of 'a' should be negated.
2803        */
2804       if (src_ir != ir) {
2805          switch (expr->operation) {
2806          case ir_binop_less:
2807             switch_order = false;
2808             negate = zero_on_left;
2809             break;
2810
2811          case ir_binop_greater:
2812             switch_order = false;
2813             negate = !zero_on_left;
2814             break;
2815
2816          case ir_binop_lequal:
2817             switch_order = true;
2818             negate = !zero_on_left;
2819             break;
2820
2821          case ir_binop_gequal:
2822             switch_order = true;
2823             negate = zero_on_left;
2824             break;
2825
2826          default:
2827             /* This isn't the right kind of comparison afterall, so make sure
2828              * the whole condition is visited.
2829              */
2830             src_ir = ir;
2831             break;
2832          }
2833       }
2834    }
2835
2836    src_ir->accept(this);
2837
2838    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2839     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2840     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2841     * computing the condition.
2842     */
2843    if (negate)
2844       this->result.negate = ~this->result.negate;
2845
2846    return switch_order;
2847 }
2848
2849 void
2850 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2851                                      st_dst_reg *l, st_src_reg *r,
2852                                      st_src_reg *cond, bool cond_swap)
2853 {
2854    if (type->base_type == GLSL_TYPE_STRUCT) {
2855       for (unsigned int i = 0; i < type->length; i++) {
2856          emit_block_mov(ir, type->fields.structure[i].type, l, r,
2857                         cond, cond_swap);
2858       }
2859       return;
2860    }
2861
2862    if (type->is_array()) {
2863       for (unsigned int i = 0; i < type->length; i++) {
2864          emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
2865       }
2866       return;
2867    }
2868
2869    if (type->is_matrix()) {
2870       const struct glsl_type *vec_type;
2871
2872       vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
2873                                          type->vector_elements, 1);
2874
2875       for (int i = 0; i < type->matrix_columns; i++) {
2876          emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
2877       }
2878       return;
2879    }
2880
2881    assert(type->is_scalar() || type->is_vector());
2882
2883    r->type = type->base_type;
2884    if (cond) {
2885       st_src_reg l_src = st_src_reg(*l);
2886       l_src.swizzle = swizzle_for_size(type->vector_elements);
2887
2888       if (native_integers) {
2889          emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
2890               cond_swap ? l_src : *r,
2891               cond_swap ? *r : l_src);
2892       } else {
2893          emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
2894               cond_swap ? l_src : *r,
2895               cond_swap ? *r : l_src);
2896       }
2897    } else {
2898       emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
2899    }
2900    l->index++;
2901    r->index++;
2902    if (type->is_dual_slot()) {
2903       l->index++;
2904       if (r->is_double_vertex_input == false)
2905          r->index++;
2906    }
2907 }
2908
2909 void
2910 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2911 {
2912    int dst_component;
2913    st_dst_reg l;
2914    st_src_reg r;
2915
2916    ir->rhs->accept(this);
2917    r = this->result;
2918
2919    l = get_assignment_lhs(ir->lhs, this, &dst_component);
2920
2921    {
2922       int swizzles[4];
2923       int first_enabled_chan = 0;
2924       int rhs_chan = 0;
2925       ir_variable *variable = ir->lhs->variable_referenced();
2926
2927       if (shader->Stage == MESA_SHADER_FRAGMENT &&
2928           variable->data.mode == ir_var_shader_out &&
2929           (variable->data.location == FRAG_RESULT_DEPTH ||
2930            variable->data.location == FRAG_RESULT_STENCIL)) {
2931          assert(ir->lhs->type->is_scalar());
2932          assert(ir->write_mask == WRITEMASK_X);
2933
2934          if (variable->data.location == FRAG_RESULT_DEPTH)
2935             l.writemask = WRITEMASK_Z;
2936          else {
2937             assert(variable->data.location == FRAG_RESULT_STENCIL);
2938             l.writemask = WRITEMASK_Y;
2939          }
2940       } else if (ir->write_mask == 0) {
2941          assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2942
2943          if (ir->lhs->type->is_array() || ir->lhs->type->is_matrix()) {
2944             unsigned num_elements = ir->lhs->type->without_array()->vector_elements;
2945             l.writemask = u_bit_consecutive(0, num_elements);
2946          } else {
2947             l.writemask = WRITEMASK_XYZW;
2948          }
2949       } else {
2950          l.writemask = ir->write_mask;
2951       }
2952
2953       for (int i = 0; i < 4; i++) {
2954          if (l.writemask & (1 << i)) {
2955             first_enabled_chan = GET_SWZ(r.swizzle, i);
2956             break;
2957          }
2958       }
2959
2960       l.writemask = l.writemask << dst_component;
2961
2962       /* Swizzle a small RHS vector into the channels being written.
2963        *
2964        * glsl ir treats write_mask as dictating how many channels are
2965        * present on the RHS while TGSI treats write_mask as just
2966        * showing which channels of the vec4 RHS get written.
2967        */
2968       for (int i = 0; i < 4; i++) {
2969          if (l.writemask & (1 << i))
2970             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2971          else
2972             swizzles[i] = first_enabled_chan;
2973       }
2974       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2975                                 swizzles[2], swizzles[3]);
2976    }
2977
2978    assert(l.file != PROGRAM_UNDEFINED);
2979    assert(r.file != PROGRAM_UNDEFINED);
2980
2981    if (ir->condition) {
2982       const bool switch_order = this->process_move_condition(ir->condition);
2983       st_src_reg condition = this->result;
2984
2985       emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
2986    } else if (ir->rhs->as_expression() &&
2987               this->instructions.get_tail() &&
2988               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2989               !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded &&
2990               type_size(ir->lhs->type) == 1 &&
2991               l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
2992       /* To avoid emitting an extra MOV when assigning an expression to a
2993        * variable, emit the last instruction of the expression again, but
2994        * replace the destination register with the target of the assignment.
2995        * Dead code elimination will remove the original instruction.
2996        */
2997       glsl_to_tgsi_instruction *inst, *new_inst;
2998       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2999       new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
3000       new_inst->saturate = inst->saturate;
3001       inst->dead_mask = inst->dst[0].writemask;
3002    } else {
3003       emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
3004    }
3005 }
3006
3007
3008 void
3009 glsl_to_tgsi_visitor::visit(ir_constant *ir)
3010 {
3011    st_src_reg src;
3012    GLdouble stack_vals[4] = { 0 };
3013    gl_constant_value *values = (gl_constant_value *) stack_vals;
3014    GLenum gl_type = GL_NONE;
3015    unsigned int i;
3016    static int in_array = 0;
3017    gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
3018
3019    /* Unfortunately, 4 floats is all we can get into
3020     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
3021     * aggregate constant and move each constant value into it.  If we
3022     * get lucky, copy propagation will eliminate the extra moves.
3023     */
3024    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
3025       st_src_reg temp_base = get_temp(ir->type);
3026       st_dst_reg temp = st_dst_reg(temp_base);
3027
3028       foreach_in_list(ir_constant, field_value, &ir->components) {
3029          int size = type_size(field_value->type);
3030
3031          assert(size > 0);
3032
3033          field_value->accept(this);
3034          src = this->result;
3035
3036          for (i = 0; i < (unsigned int)size; i++) {
3037             emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3038
3039             src.index++;
3040             temp.index++;
3041          }
3042       }
3043       this->result = temp_base;
3044       return;
3045    }
3046
3047    if (ir->type->is_array()) {
3048       st_src_reg temp_base = get_temp(ir->type);
3049       st_dst_reg temp = st_dst_reg(temp_base);
3050       int size = type_size(ir->type->fields.array);
3051
3052       assert(size > 0);
3053       in_array++;
3054
3055       for (i = 0; i < ir->type->length; i++) {
3056          ir->array_elements[i]->accept(this);
3057          src = this->result;
3058          for (int j = 0; j < size; j++) {
3059             emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3060
3061             src.index++;
3062             temp.index++;
3063          }
3064       }
3065       this->result = temp_base;
3066       in_array--;
3067       return;
3068    }
3069
3070    if (ir->type->is_matrix()) {
3071       st_src_reg mat = get_temp(ir->type);
3072       st_dst_reg mat_column = st_dst_reg(mat);
3073
3074       for (i = 0; i < ir->type->matrix_columns; i++) {
3075          switch (ir->type->base_type) {
3076          case GLSL_TYPE_FLOAT:
3077             values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
3078
3079             src = st_src_reg(file, -1, ir->type->base_type);
3080             src.index = add_constant(file,
3081                                      values,
3082                                      ir->type->vector_elements,
3083                                      GL_FLOAT,
3084                                      &src.swizzle);
3085             emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3086             break;
3087          case GLSL_TYPE_DOUBLE:
3088             values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
3089             src = st_src_reg(file, -1, ir->type->base_type);
3090             src.index = add_constant(file,
3091                                      values,
3092                                      ir->type->vector_elements,
3093                                      GL_DOUBLE,
3094                                      &src.swizzle);
3095             if (ir->type->vector_elements >= 2) {
3096                mat_column.writemask = WRITEMASK_XY;
3097                src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3098                emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3099             } else {
3100                mat_column.writemask = WRITEMASK_X;
3101                src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
3102                emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3103             }
3104             src.index++;
3105             if (ir->type->vector_elements > 2) {
3106                if (ir->type->vector_elements == 4) {
3107                   mat_column.writemask = WRITEMASK_ZW;
3108                   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3109                   emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3110                } else {
3111                   mat_column.writemask = WRITEMASK_Z;
3112                   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
3113                   emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3114                   mat_column.writemask = WRITEMASK_XYZW;
3115                   src.swizzle = SWIZZLE_XYZW;
3116                }
3117                mat_column.index++;
3118             }
3119             break;
3120          default:
3121             unreachable("Illegal matrix constant type.\n");
3122             break;
3123          }
3124          mat_column.index++;
3125       }
3126       this->result = mat;
3127       return;
3128    }
3129
3130    switch (ir->type->base_type) {
3131    case GLSL_TYPE_FLOAT:
3132       gl_type = GL_FLOAT;
3133       for (i = 0; i < ir->type->vector_elements; i++) {
3134          values[i].f = ir->value.f[i];
3135       }
3136       break;
3137    case GLSL_TYPE_DOUBLE:
3138       gl_type = GL_DOUBLE;
3139       for (i = 0; i < ir->type->vector_elements; i++) {
3140          memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
3141       }
3142       break;
3143    case GLSL_TYPE_UINT:
3144       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
3145       for (i = 0; i < ir->type->vector_elements; i++) {
3146          if (native_integers)
3147             values[i].u = ir->value.u[i];
3148          else
3149             values[i].f = ir->value.u[i];
3150       }
3151       break;
3152    case GLSL_TYPE_INT:
3153       gl_type = native_integers ? GL_INT : GL_FLOAT;
3154       for (i = 0; i < ir->type->vector_elements; i++) {
3155          if (native_integers)
3156             values[i].i = ir->value.i[i];
3157          else
3158             values[i].f = ir->value.i[i];
3159       }
3160       break;
3161    case GLSL_TYPE_BOOL:
3162       gl_type = native_integers ? GL_BOOL : GL_FLOAT;
3163       for (i = 0; i < ir->type->vector_elements; i++) {
3164          values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
3165       }
3166       break;
3167    default:
3168       assert(!"Non-float/uint/int/bool constant");
3169    }
3170
3171    this->result = st_src_reg(file, -1, ir->type);
3172    this->result.index = add_constant(file,
3173                                      values,
3174                                      ir->type->vector_elements,
3175                                      gl_type,
3176                                      &this->result.swizzle);
3177 }
3178
3179 function_entry *
3180 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
3181 {
3182    foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
3183       if (entry->sig == sig)
3184          return entry;
3185    }
3186
3187    entry = ralloc(mem_ctx, function_entry);
3188    entry->sig = sig;
3189    entry->sig_id = this->next_signature_id++;
3190    entry->bgn_inst = NULL;
3191
3192    /* Allocate storage for all the parameters. */
3193    foreach_in_list(ir_variable, param, &sig->parameters) {
3194       variable_storage *storage;
3195
3196       storage = find_variable_storage(param);
3197       assert(!storage);
3198
3199       st_src_reg src = get_temp(param->type);
3200
3201       storage = new(mem_ctx) variable_storage(param, src.file, src.index);
3202       this->variables.push_tail(storage);
3203    }
3204
3205    if (!sig->return_type->is_void()) {
3206       entry->return_reg = get_temp(sig->return_type);
3207    } else {
3208       entry->return_reg = undef_src;
3209    }
3210
3211    this->function_signatures.push_tail(entry);
3212    return entry;
3213 }
3214
3215 void
3216 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
3217 {
3218    exec_node *param = ir->actual_parameters.get_head();
3219    ir_dereference *deref = static_cast<ir_dereference *>(param);
3220    ir_variable *location = deref->variable_referenced();
3221
3222    st_src_reg buffer(
3223          PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
3224
3225    /* Calculate the surface offset */
3226    st_src_reg offset;
3227    unsigned array_size = 0, base = 0, index = 0;
3228
3229    get_deref_offsets(deref, &array_size, &base, &index, &offset);
3230
3231    if (offset.file != PROGRAM_UNDEFINED) {
3232       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
3233                offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
3234       emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
3235                offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
3236    } else {
3237       offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
3238    }
3239
3240    ir->return_deref->accept(this);
3241    st_dst_reg dst(this->result);
3242    dst.writemask = WRITEMASK_X;
3243
3244    glsl_to_tgsi_instruction *inst;
3245
3246    if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) {
3247       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
3248    } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) {
3249       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3250                       st_src_reg_for_int(1));
3251    } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) {
3252       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3253                       st_src_reg_for_int(-1));
3254       emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
3255    } else {
3256       param = param->get_next();
3257       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3258       val->accept(this);
3259
3260       st_src_reg data = this->result, data2 = undef_src;
3261       unsigned opcode;
3262       switch (ir->callee->intrinsic_id) {
3263       case ir_intrinsic_atomic_counter_add:
3264          opcode = TGSI_OPCODE_ATOMUADD;
3265          break;
3266       case ir_intrinsic_atomic_counter_min:
3267          opcode = TGSI_OPCODE_ATOMIMIN;
3268          break;
3269       case ir_intrinsic_atomic_counter_max:
3270          opcode = TGSI_OPCODE_ATOMIMAX;
3271          break;
3272       case ir_intrinsic_atomic_counter_and:
3273          opcode = TGSI_OPCODE_ATOMAND;
3274          break;
3275       case ir_intrinsic_atomic_counter_or:
3276          opcode = TGSI_OPCODE_ATOMOR;
3277          break;
3278       case ir_intrinsic_atomic_counter_xor:
3279          opcode = TGSI_OPCODE_ATOMXOR;
3280          break;
3281       case ir_intrinsic_atomic_counter_exchange:
3282          opcode = TGSI_OPCODE_ATOMXCHG;
3283          break;
3284       case ir_intrinsic_atomic_counter_comp_swap: {
3285          opcode = TGSI_OPCODE_ATOMCAS;
3286          param = param->get_next();
3287          val = ((ir_instruction *)param)->as_rvalue();
3288          val->accept(this);
3289          data2 = this->result;
3290          break;
3291       }
3292       default:
3293          assert(!"Unexpected intrinsic");
3294          return;
3295       }
3296
3297       inst = emit_asm(ir, opcode, dst, offset, data, data2);
3298    }
3299
3300    inst->buffer = buffer;
3301 }
3302
3303 void
3304 glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
3305 {
3306    exec_node *param = ir->actual_parameters.get_head();
3307
3308    ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
3309
3310    param = param->get_next();
3311    ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3312
3313    ir_constant *const_block = block->as_constant();
3314
3315    st_src_reg buffer(
3316          PROGRAM_BUFFER,
3317          ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
3318          (const_block ? const_block->value.u[0] : 0),
3319          GLSL_TYPE_UINT);
3320
3321    if (!const_block) {
3322       block->accept(this);
3323       buffer.reladdr = ralloc(mem_ctx, st_src_reg);
3324       *buffer.reladdr = this->result;
3325       emit_arl(ir, sampler_reladdr, this->result);
3326    }
3327
3328    /* Calculate the surface offset */
3329    offset->accept(this);
3330    st_src_reg off = this->result;
3331
3332    st_dst_reg dst = undef_dst;
3333    if (ir->return_deref) {
3334       ir->return_deref->accept(this);
3335       dst = st_dst_reg(this->result);
3336       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3337    }
3338
3339    glsl_to_tgsi_instruction *inst;
3340
3341    if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) {
3342       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3343       if (dst.type == GLSL_TYPE_BOOL)
3344          emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
3345    } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) {
3346       param = param->get_next();
3347       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3348       val->accept(this);
3349
3350       param = param->get_next();
3351       ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3352       assert(write_mask);
3353       dst.writemask = write_mask->value.u[0];
3354
3355       dst.type = this->result.type;
3356       inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3357    } else {
3358       param = param->get_next();
3359       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3360       val->accept(this);
3361
3362       st_src_reg data = this->result, data2 = undef_src;
3363       unsigned opcode;
3364       switch (ir->callee->intrinsic_id) {
3365       case ir_intrinsic_ssbo_atomic_add:
3366          opcode = TGSI_OPCODE_ATOMUADD;
3367          break;
3368       case ir_intrinsic_ssbo_atomic_min:
3369          opcode = TGSI_OPCODE_ATOMIMIN;
3370          break;
3371       case ir_intrinsic_ssbo_atomic_max:
3372          opcode = TGSI_OPCODE_ATOMIMAX;
3373          break;
3374       case ir_intrinsic_ssbo_atomic_and:
3375          opcode = TGSI_OPCODE_ATOMAND;
3376          break;
3377       case ir_intrinsic_ssbo_atomic_or:
3378          opcode = TGSI_OPCODE_ATOMOR;
3379          break;
3380       case ir_intrinsic_ssbo_atomic_xor:
3381          opcode = TGSI_OPCODE_ATOMXOR;
3382          break;
3383       case ir_intrinsic_ssbo_atomic_exchange:
3384          opcode = TGSI_OPCODE_ATOMXCHG;
3385          break;
3386       case ir_intrinsic_ssbo_atomic_comp_swap:
3387          opcode = TGSI_OPCODE_ATOMCAS;
3388          param = param->get_next();
3389          val = ((ir_instruction *)param)->as_rvalue();
3390          val->accept(this);
3391          data2 = this->result;
3392          break;
3393       default:
3394          assert(!"Unexpected intrinsic");
3395          return;
3396       }
3397
3398       inst = emit_asm(ir, opcode, dst, off, data, data2);
3399    }
3400
3401    param = param->get_next();
3402    ir_constant *access = NULL;
3403    if (!param->is_tail_sentinel()) {
3404       access = ((ir_instruction *)param)->as_constant();
3405       assert(access);
3406    }
3407
3408    /* The emit_asm() might have actually split the op into pieces, e.g. for
3409     * double stores. We have to go back and fix up all the generated ops.
3410     */
3411    unsigned op = inst->op;
3412    do {
3413       inst->buffer = buffer;
3414       if (access)
3415          inst->buffer_access = access->value.u[0];
3416       inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3417       if (inst->op == TGSI_OPCODE_UADD)
3418          inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3419    } while (inst && inst->op == op && inst->buffer.file == PROGRAM_UNDEFINED);
3420 }
3421
3422 void
3423 glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
3424 {
3425    switch (ir->callee->intrinsic_id) {
3426    case ir_intrinsic_memory_barrier:
3427       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3428                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3429                                   TGSI_MEMBAR_ATOMIC_BUFFER |
3430                                   TGSI_MEMBAR_SHADER_IMAGE |
3431                                   TGSI_MEMBAR_SHARED));
3432       break;
3433    case ir_intrinsic_memory_barrier_atomic_counter:
3434       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3435                st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
3436       break;
3437    case ir_intrinsic_memory_barrier_buffer:
3438       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3439                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
3440       break;
3441    case ir_intrinsic_memory_barrier_image:
3442       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3443                st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
3444       break;
3445    case ir_intrinsic_memory_barrier_shared:
3446       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3447                st_src_reg_for_int(TGSI_MEMBAR_SHARED));
3448       break;
3449    case ir_intrinsic_group_memory_barrier:
3450       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3451                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3452                                   TGSI_MEMBAR_ATOMIC_BUFFER |
3453                                   TGSI_MEMBAR_SHADER_IMAGE |
3454                                   TGSI_MEMBAR_SHARED |
3455                                   TGSI_MEMBAR_THREAD_GROUP));
3456       break;
3457    default:
3458       assert(!"Unexpected memory barrier intrinsic");
3459    }
3460 }
3461
3462 void
3463 glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
3464 {
3465    exec_node *param = ir->actual_parameters.get_head();
3466
3467    ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3468
3469    st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
3470
3471    /* Calculate the surface offset */
3472    offset->accept(this);
3473    st_src_reg off = this->result;
3474
3475    st_dst_reg dst = undef_dst;
3476    if (ir->return_deref) {
3477       ir->return_deref->accept(this);
3478       dst = st_dst_reg(this->result);
3479       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3480    }
3481
3482    glsl_to_tgsi_instruction *inst;
3483
3484    if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) {
3485       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3486       inst->buffer = buffer;
3487    } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) {
3488       param = param->get_next();
3489       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3490       val->accept(this);
3491
3492       param = param->get_next();
3493       ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3494       assert(write_mask);
3495       dst.writemask = write_mask->value.u[0];
3496
3497       dst.type = this->result.type;
3498       inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3499       inst->buffer = buffer;
3500    } else {
3501       param = param->get_next();
3502       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3503       val->accept(this);
3504
3505       st_src_reg data = this->result, data2 = undef_src;
3506       unsigned opcode;
3507       switch (ir->callee->intrinsic_id) {
3508       case ir_intrinsic_shared_atomic_add:
3509          opcode = TGSI_OPCODE_ATOMUADD;
3510          break;
3511       case ir_intrinsic_shared_atomic_min:
3512          opcode = TGSI_OPCODE_ATOMIMIN;
3513          break;
3514       case ir_intrinsic_shared_atomic_max:
3515          opcode = TGSI_OPCODE_ATOMIMAX;
3516          break;
3517       case ir_intrinsic_shared_atomic_and:
3518          opcode = TGSI_OPCODE_ATOMAND;
3519          break;
3520       case ir_intrinsic_shared_atomic_or:
3521          opcode = TGSI_OPCODE_ATOMOR;
3522          break;
3523       case ir_intrinsic_shared_atomic_xor:
3524          opcode = TGSI_OPCODE_ATOMXOR;
3525          break;
3526       case ir_intrinsic_shared_atomic_exchange:
3527          opcode = TGSI_OPCODE_ATOMXCHG;
3528          break;
3529       case ir_intrinsic_shared_atomic_comp_swap:
3530          opcode = TGSI_OPCODE_ATOMCAS;
3531          param = param->get_next();
3532          val = ((ir_instruction *)param)->as_rvalue();
3533          val->accept(this);
3534          data2 = this->result;
3535          break;
3536       default:
3537          assert(!"Unexpected intrinsic");
3538          return;
3539       }
3540
3541       inst = emit_asm(ir, opcode, dst, off, data, data2);
3542       inst->buffer = buffer;
3543    }
3544 }
3545
3546 void
3547 glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
3548 {
3549    exec_node *param = ir->actual_parameters.get_head();
3550
3551    ir_dereference *img = (ir_dereference *)param;
3552    const ir_variable *imgvar = img->variable_referenced();
3553    const glsl_type *type = imgvar->type->without_array();
3554    unsigned sampler_array_size = 1, sampler_base = 0;
3555
3556    st_src_reg reladdr;
3557    st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
3558
3559    get_deref_offsets(img, &sampler_array_size, &sampler_base,
3560                      (unsigned int *)&image.index, &reladdr);
3561    if (reladdr.file != PROGRAM_UNDEFINED) {
3562       image.reladdr = ralloc(mem_ctx, st_src_reg);
3563       *image.reladdr = reladdr;
3564       emit_arl(ir, sampler_reladdr, reladdr);
3565    }
3566
3567    st_dst_reg dst = undef_dst;
3568    if (ir->return_deref) {
3569       ir->return_deref->accept(this);
3570       dst = st_dst_reg(this->result);
3571       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3572    }
3573
3574    glsl_to_tgsi_instruction *inst;
3575
3576    if (ir->callee->intrinsic_id == ir_intrinsic_image_size) {
3577       dst.writemask = WRITEMASK_XYZ;
3578       inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
3579    } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) {
3580       st_src_reg res = get_temp(glsl_type::ivec4_type);
3581       st_dst_reg dstres = st_dst_reg(res);
3582       dstres.writemask = WRITEMASK_W;
3583       inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
3584       res.swizzle = SWIZZLE_WWWW;
3585       emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
3586    } else {
3587       st_src_reg arg1 = undef_src, arg2 = undef_src;
3588       st_src_reg coord;
3589       st_dst_reg coord_dst;
3590       coord = get_temp(glsl_type::ivec4_type);
3591       coord_dst = st_dst_reg(coord);
3592       coord_dst.writemask = (1 << type->coordinate_components()) - 1;
3593       param = param->get_next();
3594       ((ir_dereference *)param)->accept(this);
3595       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3596       coord.swizzle = SWIZZLE_XXXX;
3597       switch (type->coordinate_components()) {
3598       case 4: assert(!"unexpected coord count");
3599       /* fallthrough */
3600       case 3: coord.swizzle |= SWIZZLE_Z << 6;
3601       /* fallthrough */
3602       case 2: coord.swizzle |= SWIZZLE_Y << 3;
3603       }
3604
3605       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
3606          param = param->get_next();
3607          ((ir_dereference *)param)->accept(this);
3608          st_src_reg sample = this->result;
3609          sample.swizzle = SWIZZLE_XXXX;
3610          coord_dst.writemask = WRITEMASK_W;
3611          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
3612          coord.swizzle |= SWIZZLE_W << 9;
3613       }
3614
3615       param = param->get_next();
3616       if (!param->is_tail_sentinel()) {
3617          ((ir_dereference *)param)->accept(this);
3618          arg1 = this->result;
3619          param = param->get_next();
3620       }
3621
3622       if (!param->is_tail_sentinel()) {
3623          ((ir_dereference *)param)->accept(this);
3624          arg2 = this->result;
3625          param = param->get_next();
3626       }
3627
3628       assert(param->is_tail_sentinel());
3629
3630       unsigned opcode;
3631       switch (ir->callee->intrinsic_id) {
3632       case ir_intrinsic_image_load:
3633          opcode = TGSI_OPCODE_LOAD;
3634          break;
3635       case ir_intrinsic_image_store:
3636          opcode = TGSI_OPCODE_STORE;
3637          break;
3638       case ir_intrinsic_image_atomic_add:
3639          opcode = TGSI_OPCODE_ATOMUADD;
3640          break;
3641       case ir_intrinsic_image_atomic_min:
3642          opcode = TGSI_OPCODE_ATOMIMIN;
3643          break;
3644       case ir_intrinsic_image_atomic_max:
3645          opcode = TGSI_OPCODE_ATOMIMAX;
3646          break;
3647       case ir_intrinsic_image_atomic_and:
3648          opcode = TGSI_OPCODE_ATOMAND;
3649          break;
3650       case ir_intrinsic_image_atomic_or:
3651          opcode = TGSI_OPCODE_ATOMOR;
3652          break;
3653       case ir_intrinsic_image_atomic_xor:
3654          opcode = TGSI_OPCODE_ATOMXOR;
3655          break;
3656       case ir_intrinsic_image_atomic_exchange:
3657          opcode = TGSI_OPCODE_ATOMXCHG;
3658          break;
3659       case ir_intrinsic_image_atomic_comp_swap:
3660          opcode = TGSI_OPCODE_ATOMCAS;
3661          break;
3662       default:
3663          assert(!"Unexpected intrinsic");
3664          return;
3665       }
3666
3667       inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
3668       if (opcode == TGSI_OPCODE_STORE)
3669          inst->dst[0].writemask = WRITEMASK_XYZW;
3670    }
3671
3672    inst->buffer = image;
3673    inst->sampler_array_size = sampler_array_size;
3674    inst->sampler_base = sampler_base;
3675
3676    switch (type->sampler_dimensionality) {
3677    case GLSL_SAMPLER_DIM_1D:
3678       inst->tex_target = (type->sampler_array)
3679          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
3680       break;
3681    case GLSL_SAMPLER_DIM_2D:
3682       inst->tex_target = (type->sampler_array)
3683          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
3684       break;
3685    case GLSL_SAMPLER_DIM_3D:
3686       inst->tex_target = TEXTURE_3D_INDEX;
3687       break;
3688    case GLSL_SAMPLER_DIM_CUBE:
3689       inst->tex_target = (type->sampler_array)
3690          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
3691       break;
3692    case GLSL_SAMPLER_DIM_RECT:
3693       inst->tex_target = TEXTURE_RECT_INDEX;
3694       break;
3695    case GLSL_SAMPLER_DIM_BUF:
3696       inst->tex_target = TEXTURE_BUFFER_INDEX;
3697       break;
3698    case GLSL_SAMPLER_DIM_EXTERNAL:
3699       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
3700       break;
3701    case GLSL_SAMPLER_DIM_MS:
3702       inst->tex_target = (type->sampler_array)
3703          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
3704       break;
3705    default:
3706       assert(!"Should not get here.");
3707    }
3708
3709    inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
3710          _mesa_get_shader_image_format(imgvar->data.image_format));
3711
3712    if (imgvar->data.image_coherent)
3713       inst->buffer_access |= TGSI_MEMORY_COHERENT;
3714    if (imgvar->data.image_restrict)
3715       inst->buffer_access |= TGSI_MEMORY_RESTRICT;
3716    if (imgvar->data.image_volatile)
3717       inst->buffer_access |= TGSI_MEMORY_VOLATILE;
3718 }
3719
3720 void
3721 glsl_to_tgsi_visitor::visit(ir_call *ir)
3722 {
3723    glsl_to_tgsi_instruction *call_inst;
3724    ir_function_signature *sig = ir->callee;
3725    function_entry *entry;
3726    int i;
3727
3728    /* Filter out intrinsics */
3729    switch (sig->intrinsic_id) {
3730    case ir_intrinsic_invalid:
3731       break;
3732
3733    case ir_intrinsic_atomic_counter_read:
3734    case ir_intrinsic_atomic_counter_increment:
3735    case ir_intrinsic_atomic_counter_predecrement:
3736    case ir_intrinsic_atomic_counter_add:
3737    case ir_intrinsic_atomic_counter_min:
3738    case ir_intrinsic_atomic_counter_max:
3739    case ir_intrinsic_atomic_counter_and:
3740    case ir_intrinsic_atomic_counter_or:
3741    case ir_intrinsic_atomic_counter_xor:
3742    case ir_intrinsic_atomic_counter_exchange:
3743    case ir_intrinsic_atomic_counter_comp_swap:
3744       visit_atomic_counter_intrinsic(ir);
3745       return;
3746
3747    case ir_intrinsic_ssbo_load:
3748    case ir_intrinsic_ssbo_store:
3749    case ir_intrinsic_ssbo_atomic_add:
3750    case ir_intrinsic_ssbo_atomic_min:
3751    case ir_intrinsic_ssbo_atomic_max:
3752    case ir_intrinsic_ssbo_atomic_and:
3753    case ir_intrinsic_ssbo_atomic_or:
3754    case ir_intrinsic_ssbo_atomic_xor:
3755    case ir_intrinsic_ssbo_atomic_exchange:
3756    case ir_intrinsic_ssbo_atomic_comp_swap:
3757       visit_ssbo_intrinsic(ir);
3758       return;
3759
3760    case ir_intrinsic_memory_barrier:
3761    case ir_intrinsic_memory_barrier_atomic_counter:
3762    case ir_intrinsic_memory_barrier_buffer:
3763    case ir_intrinsic_memory_barrier_image:
3764    case ir_intrinsic_memory_barrier_shared:
3765    case ir_intrinsic_group_memory_barrier:
3766       visit_membar_intrinsic(ir);
3767       return;
3768
3769    case ir_intrinsic_shared_load:
3770    case ir_intrinsic_shared_store:
3771    case ir_intrinsic_shared_atomic_add:
3772    case ir_intrinsic_shared_atomic_min:
3773    case ir_intrinsic_shared_atomic_max:
3774    case ir_intrinsic_shared_atomic_and:
3775    case ir_intrinsic_shared_atomic_or:
3776    case ir_intrinsic_shared_atomic_xor:
3777    case ir_intrinsic_shared_atomic_exchange:
3778    case ir_intrinsic_shared_atomic_comp_swap:
3779       visit_shared_intrinsic(ir);
3780       return;
3781
3782    case ir_intrinsic_image_load:
3783    case ir_intrinsic_image_store:
3784    case ir_intrinsic_image_atomic_add:
3785    case ir_intrinsic_image_atomic_min:
3786    case ir_intrinsic_image_atomic_max:
3787    case ir_intrinsic_image_atomic_and:
3788    case ir_intrinsic_image_atomic_or:
3789    case ir_intrinsic_image_atomic_xor:
3790    case ir_intrinsic_image_atomic_exchange:
3791    case ir_intrinsic_image_atomic_comp_swap:
3792    case ir_intrinsic_image_size:
3793    case ir_intrinsic_image_samples:
3794       visit_image_intrinsic(ir);
3795       return;
3796
3797    case ir_intrinsic_generic_load:
3798    case ir_intrinsic_generic_store:
3799    case ir_intrinsic_generic_atomic_add:
3800    case ir_intrinsic_generic_atomic_and:
3801    case ir_intrinsic_generic_atomic_or:
3802    case ir_intrinsic_generic_atomic_xor:
3803    case ir_intrinsic_generic_atomic_min:
3804    case ir_intrinsic_generic_atomic_max:
3805    case ir_intrinsic_generic_atomic_exchange:
3806    case ir_intrinsic_generic_atomic_comp_swap:
3807    case ir_intrinsic_shader_clock:
3808       unreachable("Invalid intrinsic");
3809    }
3810
3811    entry = get_function_signature(sig);
3812    /* Process in parameters. */
3813    foreach_two_lists(formal_node, &sig->parameters,
3814                      actual_node, &ir->actual_parameters) {
3815       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3816       ir_variable *param = (ir_variable *) formal_node;
3817
3818       if (param->data.mode == ir_var_function_in ||
3819           param->data.mode == ir_var_function_inout) {
3820          variable_storage *storage = find_variable_storage(param);
3821          assert(storage);
3822
3823          param_rval->accept(this);
3824          st_src_reg r = this->result;
3825
3826          st_dst_reg l;
3827          l.file = storage->file;
3828          l.index = storage->index;
3829          l.reladdr = NULL;
3830          l.writemask = WRITEMASK_XYZW;
3831
3832          for (i = 0; i < type_size(param->type); i++) {
3833             emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3834             l.index++;
3835             r.index++;
3836          }
3837       }
3838    }
3839
3840    /* Emit call instruction */
3841    call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
3842    call_inst->function = entry;
3843
3844    /* Process out parameters. */
3845    foreach_two_lists(formal_node, &sig->parameters,
3846                      actual_node, &ir->actual_parameters) {
3847       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3848       ir_variable *param = (ir_variable *) formal_node;
3849
3850       if (param->data.mode == ir_var_function_out ||
3851           param->data.mode == ir_var_function_inout) {
3852          variable_storage *storage = find_variable_storage(param);
3853          assert(storage);
3854
3855          st_src_reg r;
3856          r.file = storage->file;
3857          r.index = storage->index;
3858          r.reladdr = NULL;
3859          r.swizzle = SWIZZLE_NOOP;
3860          r.negate = 0;
3861
3862          param_rval->accept(this);
3863          st_dst_reg l = st_dst_reg(this->result);
3864
3865          for (i = 0; i < type_size(param->type); i++) {
3866             emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3867             l.index++;
3868             r.index++;
3869          }
3870       }
3871    }
3872
3873    /* Process return value. */
3874    this->result = entry->return_reg;
3875 }
3876
3877 void
3878 glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head,
3879                                          ir_dereference *tail,
3880                                          unsigned *array_elements,
3881                                          unsigned *base,
3882                                          unsigned *index,
3883                                          st_src_reg *indirect,
3884                                          unsigned *location)
3885 {
3886    switch (tail->ir_type) {
3887    case ir_type_dereference_record: {
3888       ir_dereference_record *deref_record = tail->as_dereference_record();
3889       const glsl_type *struct_type = deref_record->record->type;
3890       int field_index = deref_record->record->type->field_index(deref_record->field);
3891
3892       calc_deref_offsets(head, deref_record->record->as_dereference(), array_elements, base, index, indirect, location);
3893
3894       assert(field_index >= 0);
3895       *location += struct_type->record_location_offset(field_index);
3896       break;
3897    }
3898
3899    case ir_type_dereference_array: {
3900       ir_dereference_array *deref_arr = tail->as_dereference_array();
3901       ir_constant *array_index = deref_arr->array_index->constant_expression_value();
3902
3903       if (!array_index) {
3904          st_src_reg temp_reg;
3905          st_dst_reg temp_dst;
3906
3907          temp_reg = get_temp(glsl_type::uint_type);
3908          temp_dst = st_dst_reg(temp_reg);
3909          temp_dst.writemask = 1;
3910
3911          deref_arr->array_index->accept(this);
3912          if (*array_elements != 1)
3913             emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements));
3914          else
3915             emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result);
3916
3917          if (indirect->file == PROGRAM_UNDEFINED)
3918             *indirect = temp_reg;
3919          else {
3920             temp_dst = st_dst_reg(*indirect);
3921             temp_dst.writemask = 1;
3922             emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg);
3923          }
3924       } else
3925          *index += array_index->value.u[0] * *array_elements;
3926
3927       *array_elements *= deref_arr->array->type->length;
3928
3929       calc_deref_offsets(head, deref_arr->array->as_dereference(), array_elements, base, index, indirect, location);
3930       break;
3931    }
3932    default:
3933       break;
3934    }
3935 }
3936
3937 void
3938 glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
3939                                         unsigned *array_size,
3940                                         unsigned *base,
3941                                         unsigned *index,
3942                                         st_src_reg *reladdr)
3943 {
3944    GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
3945    unsigned location = 0;
3946    ir_variable *var = ir->variable_referenced();
3947
3948    memset(reladdr, 0, sizeof(*reladdr));
3949    reladdr->file = PROGRAM_UNDEFINED;
3950
3951    *base = 0;
3952    *array_size = 1;
3953
3954    assert(var);
3955    location = var->data.location;
3956    calc_deref_offsets(ir, ir, array_size, base, index, reladdr, &location);
3957
3958    /*
3959     * If we end up with no indirect then adjust the base to the index,
3960     * and set the array size to 1.
3961     */
3962    if (reladdr->file == PROGRAM_UNDEFINED) {
3963       *base = *index;
3964       *array_size = 1;
3965    }
3966
3967    if (location != 0xffffffff) {
3968       *base += this->shader_program->UniformStorage[location].opaque[shader].index;
3969       *index += this->shader_program->UniformStorage[location].opaque[shader].index;
3970    }
3971 }
3972
3973 void
3974 glsl_to_tgsi_visitor::visit(ir_texture *ir)
3975 {
3976    st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
3977    st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
3978    st_src_reg levels_src, reladdr;
3979    st_dst_reg result_dst, coord_dst, cube_sc_dst;
3980    glsl_to_tgsi_instruction *inst = NULL;
3981    unsigned opcode = TGSI_OPCODE_NOP;
3982    const glsl_type *sampler_type = ir->sampler->type;
3983    unsigned sampler_array_size = 1, sampler_index = 0, sampler_base = 0;
3984    bool is_cube_array = false;
3985    unsigned i;
3986
3987    /* if we are a cube array sampler */
3988    if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
3989         sampler_type->sampler_array)) {
3990       is_cube_array = true;
3991    }
3992
3993    if (ir->coordinate) {
3994       ir->coordinate->accept(this);
3995
3996       /* Put our coords in a temp.  We'll need to modify them for shadow,
3997        * projection, or LOD, so the only case we'd use it as-is is if
3998        * we're doing plain old texturing.  The optimization passes on
3999        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
4000        */
4001       coord = get_temp(glsl_type::vec4_type);
4002       coord_dst = st_dst_reg(coord);
4003       coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
4004       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4005    }
4006
4007    if (ir->projector) {
4008       ir->projector->accept(this);
4009       projector = this->result;
4010    }
4011
4012    /* Storage for our result.  Ideally for an assignment we'd be using
4013     * the actual storage for the result here, instead.
4014     */
4015    result_src = get_temp(ir->type);
4016    result_dst = st_dst_reg(result_src);
4017
4018    switch (ir->op) {
4019    case ir_tex:
4020       opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
4021       if (ir->offset) {
4022          ir->offset->accept(this);
4023          offset[0] = this->result;
4024       }
4025       break;
4026    case ir_txb:
4027       if (is_cube_array ||
4028           sampler_type == glsl_type::samplerCubeShadow_type) {
4029          opcode = TGSI_OPCODE_TXB2;
4030       }
4031       else {
4032          opcode = TGSI_OPCODE_TXB;
4033       }
4034       ir->lod_info.bias->accept(this);
4035       lod_info = this->result;
4036       if (ir->offset) {
4037          ir->offset->accept(this);
4038          offset[0] = this->result;
4039       }
4040       break;
4041    case ir_txl:
4042       opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
4043       ir->lod_info.lod->accept(this);
4044       lod_info = this->result;
4045       if (ir->offset) {
4046          ir->offset->accept(this);
4047          offset[0] = this->result;
4048       }
4049       break;
4050    case ir_txd:
4051       opcode = TGSI_OPCODE_TXD;
4052       ir->lod_info.grad.dPdx->accept(this);
4053       dx = this->result;
4054       ir->lod_info.grad.dPdy->accept(this);
4055       dy = this->result;
4056       if (ir->offset) {
4057          ir->offset->accept(this);
4058          offset[0] = this->result;
4059       }
4060       break;
4061    case ir_txs:
4062       opcode = TGSI_OPCODE_TXQ;
4063       ir->lod_info.lod->accept(this);
4064       lod_info = this->result;
4065       break;
4066    case ir_query_levels:
4067       opcode = TGSI_OPCODE_TXQ;
4068       lod_info = undef_src;
4069       levels_src = get_temp(ir->type);
4070       break;
4071    case ir_txf:
4072       opcode = TGSI_OPCODE_TXF;
4073       ir->lod_info.lod->accept(this);
4074       lod_info = this->result;
4075       if (ir->offset) {
4076          ir->offset->accept(this);
4077          offset[0] = this->result;
4078       }
4079       break;
4080    case ir_txf_ms:
4081       opcode = TGSI_OPCODE_TXF;
4082       ir->lod_info.sample_index->accept(this);
4083       sample_index = this->result;
4084       break;
4085    case ir_tg4:
4086       opcode = TGSI_OPCODE_TG4;
4087       ir->lod_info.component->accept(this);
4088       component = this->result;
4089       if (ir->offset) {
4090          ir->offset->accept(this);
4091          if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
4092             const glsl_type *elt_type = ir->offset->type->fields.array;
4093             for (i = 0; i < ir->offset->type->length; i++) {
4094                offset[i] = this->result;
4095                offset[i].index += i * type_size(elt_type);
4096                offset[i].type = elt_type->base_type;
4097                offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
4098             }
4099          } else {
4100             offset[0] = this->result;
4101          }
4102       }
4103       break;
4104    case ir_lod:
4105       opcode = TGSI_OPCODE_LODQ;
4106       break;
4107    case ir_texture_samples:
4108       opcode = TGSI_OPCODE_TXQS;
4109       break;
4110    case ir_samples_identical:
4111       unreachable("Unexpected ir_samples_identical opcode");
4112    }
4113
4114    if (ir->projector) {
4115       if (opcode == TGSI_OPCODE_TEX) {
4116          /* Slot the projector in as the last component of the coord. */
4117          coord_dst.writemask = WRITEMASK_W;
4118          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
4119          coord_dst.writemask = WRITEMASK_XYZW;
4120          opcode = TGSI_OPCODE_TXP;
4121       } else {
4122          st_src_reg coord_w = coord;
4123          coord_w.swizzle = SWIZZLE_WWWW;
4124
4125          /* For the other TEX opcodes there's no projective version
4126           * since the last slot is taken up by LOD info.  Do the
4127           * projective divide now.
4128           */
4129          coord_dst.writemask = WRITEMASK_W;
4130          emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
4131
4132          /* In the case where we have to project the coordinates "by hand,"
4133           * the shadow comparator value must also be projected.
4134           */
4135          st_src_reg tmp_src = coord;
4136          if (ir->shadow_comparitor) {
4137             /* Slot the shadow value in as the second to last component of the
4138              * coord.
4139              */
4140             ir->shadow_comparitor->accept(this);
4141
4142             tmp_src = get_temp(glsl_type::vec4_type);
4143             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
4144
4145             /* Projective division not allowed for array samplers. */
4146             assert(!sampler_type->sampler_array);
4147
4148             tmp_dst.writemask = WRITEMASK_Z;
4149             emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
4150
4151             tmp_dst.writemask = WRITEMASK_XY;
4152             emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
4153          }
4154
4155          coord_dst.writemask = WRITEMASK_XYZ;
4156          emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
4157
4158          coord_dst.writemask = WRITEMASK_XYZW;
4159          coord.swizzle = SWIZZLE_XYZW;
4160       }
4161    }
4162
4163    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
4164     * comparator was put in the correct place (and projected) by the code,
4165     * above, that handles by-hand projection.
4166     */
4167    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
4168       /* Slot the shadow value in as the second to last component of the
4169        * coord.
4170        */
4171       ir->shadow_comparitor->accept(this);
4172
4173       if (is_cube_array) {
4174          cube_sc = get_temp(glsl_type::float_type);
4175          cube_sc_dst = st_dst_reg(cube_sc);
4176          cube_sc_dst.writemask = WRITEMASK_X;
4177          emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
4178          cube_sc_dst.writemask = WRITEMASK_X;
4179       }
4180       else {
4181          if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
4182               sampler_type->sampler_array) ||
4183              sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
4184             coord_dst.writemask = WRITEMASK_W;
4185          } else {
4186             coord_dst.writemask = WRITEMASK_Z;
4187          }
4188          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4189          coord_dst.writemask = WRITEMASK_XYZW;
4190       }
4191    }
4192
4193    if (ir->op == ir_txf_ms) {
4194       coord_dst.writemask = WRITEMASK_W;
4195       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
4196       coord_dst.writemask = WRITEMASK_XYZW;
4197    } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
4198        opcode == TGSI_OPCODE_TXF) {
4199       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
4200       coord_dst.writemask = WRITEMASK_W;
4201       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
4202       coord_dst.writemask = WRITEMASK_XYZW;
4203    }
4204
4205    get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
4206                      &sampler_index, &reladdr);
4207    if (reladdr.file != PROGRAM_UNDEFINED)
4208       emit_arl(ir, sampler_reladdr, reladdr);
4209
4210    if (opcode == TGSI_OPCODE_TXD)
4211       inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
4212    else if (opcode == TGSI_OPCODE_TXQ) {
4213       if (ir->op == ir_query_levels) {
4214          /* the level is stored in W */
4215          inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
4216          result_dst.writemask = WRITEMASK_X;
4217          levels_src.swizzle = SWIZZLE_WWWW;
4218          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
4219       } else
4220          inst = emit_asm(ir, opcode, result_dst, lod_info);
4221    } else if (opcode == TGSI_OPCODE_TXQS) {
4222       inst = emit_asm(ir, opcode, result_dst);
4223    } else if (opcode == TGSI_OPCODE_TXF) {
4224       inst = emit_asm(ir, opcode, result_dst, coord);
4225    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
4226       inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
4227    } else if (opcode == TGSI_OPCODE_TEX2) {
4228       inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4229    } else if (opcode == TGSI_OPCODE_TG4) {
4230       if (is_cube_array && ir->shadow_comparitor) {
4231          inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4232       } else {
4233          inst = emit_asm(ir, opcode, result_dst, coord, component);
4234       }
4235    } else
4236       inst = emit_asm(ir, opcode, result_dst, coord);
4237
4238    if (ir->shadow_comparitor)
4239       inst->tex_shadow = GL_TRUE;
4240
4241    inst->sampler.index = sampler_index;
4242    inst->sampler_array_size = sampler_array_size;
4243    inst->sampler_base = sampler_base;
4244
4245    if (reladdr.file != PROGRAM_UNDEFINED) {
4246       inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
4247       memcpy(inst->sampler.reladdr, &reladdr, sizeof(reladdr));
4248    }
4249
4250    if (ir->offset) {
4251       for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
4252          inst->tex_offsets[i] = offset[i];
4253       inst->tex_offset_num_offset = i;
4254    }
4255
4256    switch (sampler_type->sampler_dimensionality) {
4257    case GLSL_SAMPLER_DIM_1D:
4258       inst->tex_target = (sampler_type->sampler_array)
4259          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
4260       break;
4261    case GLSL_SAMPLER_DIM_2D:
4262       inst->tex_target = (sampler_type->sampler_array)
4263          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
4264       break;
4265    case GLSL_SAMPLER_DIM_3D:
4266       inst->tex_target = TEXTURE_3D_INDEX;
4267       break;
4268    case GLSL_SAMPLER_DIM_CUBE:
4269       inst->tex_target = (sampler_type->sampler_array)
4270          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
4271       break;
4272    case GLSL_SAMPLER_DIM_RECT:
4273       inst->tex_target = TEXTURE_RECT_INDEX;
4274       break;
4275    case GLSL_SAMPLER_DIM_BUF:
4276       inst->tex_target = TEXTURE_BUFFER_INDEX;
4277       break;
4278    case GLSL_SAMPLER_DIM_EXTERNAL:
4279       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
4280       break;
4281    case GLSL_SAMPLER_DIM_MS:
4282       inst->tex_target = (sampler_type->sampler_array)
4283          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
4284       break;
4285    default:
4286       assert(!"Should not get here.");
4287    }
4288
4289    inst->tex_type = ir->type->base_type;
4290
4291    this->result = result_src;
4292 }
4293
4294 void
4295 glsl_to_tgsi_visitor::visit(ir_return *ir)
4296 {
4297    if (ir->get_value()) {
4298       st_dst_reg l;
4299       int i;
4300
4301       assert(current_function);
4302
4303       ir->get_value()->accept(this);
4304       st_src_reg r = this->result;
4305
4306       l = st_dst_reg(current_function->return_reg);
4307
4308       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
4309          emit_asm(ir, TGSI_OPCODE_MOV, l, r);
4310          l.index++;
4311          r.index++;
4312       }
4313    }
4314
4315    emit_asm(ir, TGSI_OPCODE_RET);
4316 }
4317
4318 void
4319 glsl_to_tgsi_visitor::visit(ir_discard *ir)
4320 {
4321    if (ir->condition) {
4322       ir->condition->accept(this);
4323       st_src_reg condition = this->result;
4324
4325       /* Convert the bool condition to a float so we can negate. */
4326       if (native_integers) {
4327          st_src_reg temp = get_temp(ir->condition->type);
4328          emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
4329               condition, st_src_reg_for_float(1.0));
4330          condition = temp;
4331       }
4332
4333       condition.negate = ~condition.negate;
4334       emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
4335    } else {
4336       /* unconditional kil */
4337       emit_asm(ir, TGSI_OPCODE_KILL);
4338    }
4339 }
4340
4341 void
4342 glsl_to_tgsi_visitor::visit(ir_if *ir)
4343 {
4344    unsigned if_opcode;
4345    glsl_to_tgsi_instruction *if_inst;
4346
4347    ir->condition->accept(this);
4348    assert(this->result.file != PROGRAM_UNDEFINED);
4349
4350    if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
4351
4352    if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
4353
4354    this->instructions.push_tail(if_inst);
4355
4356    visit_exec_list(&ir->then_instructions, this);
4357
4358    if (!ir->else_instructions.is_empty()) {
4359       emit_asm(ir->condition, TGSI_OPCODE_ELSE);
4360       visit_exec_list(&ir->else_instructions, this);
4361    }
4362
4363    if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
4364 }
4365
4366
4367 void
4368 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
4369 {
4370    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4371
4372    ir->stream->accept(this);
4373    emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
4374 }
4375
4376 void
4377 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
4378 {
4379    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4380
4381    ir->stream->accept(this);
4382    emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
4383 }
4384
4385 void
4386 glsl_to_tgsi_visitor::visit(ir_barrier *ir)
4387 {
4388    assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
4389           this->prog->Target == GL_COMPUTE_PROGRAM_NV);
4390
4391    emit_asm(ir, TGSI_OPCODE_BARRIER);
4392 }
4393
4394 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
4395 {
4396    STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS);
4397
4398    result.file = PROGRAM_UNDEFINED;
4399    next_temp = 1;
4400    array_sizes = NULL;
4401    max_num_arrays = 0;
4402    next_array = 0;
4403    num_inputs = 0;
4404    num_outputs = 0;
4405    num_input_arrays = 0;
4406    num_output_arrays = 0;
4407    next_signature_id = 1;
4408    num_immediates = 0;
4409    current_function = NULL;
4410    num_address_regs = 0;
4411    samplers_used = 0;
4412    buffers_used = 0;
4413    images_used = 0;
4414    indirect_addr_consts = false;
4415    wpos_transform_const = -1;
4416    glsl_version = 0;
4417    native_integers = false;
4418    mem_ctx = ralloc_context(NULL);
4419    ctx = NULL;
4420    prog = NULL;
4421    shader_program = NULL;
4422    shader = NULL;
4423    options = NULL;
4424    have_sqrt = false;
4425    have_fma = false;
4426    use_shared_memory = false;
4427 }
4428
4429 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
4430 {
4431    free(array_sizes);
4432    ralloc_free(mem_ctx);
4433 }
4434
4435 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
4436 {
4437    delete v;
4438 }
4439
4440
4441 /**
4442  * Count resources used by the given gpu program (number of texture
4443  * samplers, etc).
4444  */
4445 static void
4446 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
4447 {
4448    v->samplers_used = 0;
4449    v->buffers_used = 0;
4450    v->images_used = 0;
4451
4452    foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
4453       if (inst->info->is_tex) {
4454          for (int i = 0; i < inst->sampler_array_size; i++) {
4455             unsigned idx = inst->sampler_base + i;
4456             v->samplers_used |= 1u << idx;
4457
4458             debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
4459             v->sampler_types[idx] = inst->tex_type;
4460             v->sampler_targets[idx] =
4461                st_translate_texture_target(inst->tex_target, inst->tex_shadow);
4462
4463             if (inst->tex_shadow) {
4464                prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
4465             }
4466          }
4467       }
4468
4469       if (inst->tex_target == TEXTURE_EXTERNAL_INDEX)
4470          prog->ExternalSamplersUsed |= 1 << inst->sampler.index;
4471
4472       if (inst->buffer.file != PROGRAM_UNDEFINED && (
4473                 is_resource_instruction(inst->op) ||
4474                 inst->op == TGSI_OPCODE_STORE)) {
4475          if (inst->buffer.file == PROGRAM_BUFFER) {
4476             v->buffers_used |= 1 << inst->buffer.index;
4477          } else if (inst->buffer.file == PROGRAM_MEMORY) {
4478             v->use_shared_memory = true;
4479          } else {
4480             assert(inst->buffer.file == PROGRAM_IMAGE);
4481             for (int i = 0; i < inst->sampler_array_size; i++) {
4482                unsigned idx = inst->sampler_base + i;
4483                v->images_used |= 1 << idx;
4484                v->image_targets[idx] =
4485                   st_translate_texture_target(inst->tex_target, false);
4486                v->image_formats[idx] = inst->image_format;
4487             }
4488          }
4489       }
4490    }
4491    prog->SamplersUsed = v->samplers_used;
4492
4493    if (v->shader_program != NULL)
4494       _mesa_update_shader_textures_used(v->shader_program, prog);
4495 }
4496
4497 /**
4498  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
4499  * are read from the given src in this instruction
4500  */
4501 static int
4502 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
4503 {
4504    int read_mask = 0, comp;
4505
4506    /* Now, given the src swizzle and the written channels, find which
4507     * components are actually read
4508     */
4509    for (comp = 0; comp < 4; ++comp) {
4510       const unsigned coord = GET_SWZ(src.swizzle, comp);
4511       assert(coord < 4);
4512       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
4513          read_mask |= 1 << coord;
4514    }
4515
4516    return read_mask;
4517 }
4518
4519 /**
4520  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
4521  * instruction is the first instruction to write to register T0.  There are
4522  * several lowering passes done in GLSL IR (e.g. branches and
4523  * relative addressing) that create a large number of conditional assignments
4524  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
4525  *
4526  * Here is why this conversion is safe:
4527  * CMP T0, T1 T2 T0 can be expanded to:
4528  * if (T1 < 0.0)
4529  *   MOV T0, T2;
4530  * else
4531  *   MOV T0, T0;
4532  *
4533  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
4534  * as the original program.  If (T1 < 0.0) evaluates to false, executing
4535  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
4536  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
4537  * because any instruction that was going to read from T0 after this was going
4538  * to read a garbage value anyway.
4539  */
4540 void
4541 glsl_to_tgsi_visitor::simplify_cmp(void)
4542 {
4543    int tempWritesSize = 0;
4544    unsigned *tempWrites = NULL;
4545    unsigned outputWrites[VARYING_SLOT_TESS_MAX];
4546
4547    memset(outputWrites, 0, sizeof(outputWrites));
4548
4549    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4550       unsigned prevWriteMask = 0;
4551
4552       /* Give up if we encounter relative addressing or flow control. */
4553       if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
4554           inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
4555           tgsi_get_opcode_info(inst->op)->is_branch ||
4556           inst->op == TGSI_OPCODE_BGNSUB ||
4557           inst->op == TGSI_OPCODE_CONT ||
4558           inst->op == TGSI_OPCODE_END ||
4559           inst->op == TGSI_OPCODE_ENDSUB ||
4560           inst->op == TGSI_OPCODE_RET) {
4561          break;
4562       }
4563
4564       if (inst->dst[0].file == PROGRAM_OUTPUT) {
4565          assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
4566          prevWriteMask = outputWrites[inst->dst[0].index];
4567          outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4568       } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
4569          if (inst->dst[0].index >= tempWritesSize) {
4570             const int inc = 4096;
4571
4572             tempWrites = (unsigned*)
4573                          realloc(tempWrites,
4574                                  (tempWritesSize + inc) * sizeof(unsigned));
4575             if (!tempWrites)
4576                return;
4577
4578             memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
4579             tempWritesSize += inc;
4580          }
4581
4582          prevWriteMask = tempWrites[inst->dst[0].index];
4583          tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4584       } else
4585          continue;
4586
4587       /* For a CMP to be considered a conditional write, the destination
4588        * register and source register two must be the same. */
4589       if (inst->op == TGSI_OPCODE_CMP
4590           && !(inst->dst[0].writemask & prevWriteMask)
4591           && inst->src[2].file == inst->dst[0].file
4592           && inst->src[2].index == inst->dst[0].index
4593           && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
4594
4595          inst->op = TGSI_OPCODE_MOV;
4596          inst->info = tgsi_get_opcode_info(inst->op);
4597          inst->src[0] = inst->src[1];
4598       }
4599    }
4600
4601    free(tempWrites);
4602 }
4603
4604 /* Replaces all references to a temporary register index with another index. */
4605 void
4606 glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
4607 {
4608    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4609       unsigned j;
4610       int k;
4611       for (j = 0; j < num_inst_src_regs(inst); j++) {
4612          if (inst->src[j].file == PROGRAM_TEMPORARY)
4613             for (k = 0; k < num_renames; k++)
4614                if (inst->src[j].index == renames[k].old_reg)
4615                   inst->src[j].index = renames[k].new_reg;
4616       }
4617
4618       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4619          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4620             for (k = 0; k < num_renames; k++)
4621                if (inst->tex_offsets[j].index == renames[k].old_reg)
4622                   inst->tex_offsets[j].index = renames[k].new_reg;
4623       }
4624
4625       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4626          if (inst->dst[j].file == PROGRAM_TEMPORARY)
4627              for (k = 0; k < num_renames; k++)
4628                 if (inst->dst[j].index == renames[k].old_reg)
4629                    inst->dst[j].index = renames[k].new_reg;
4630       }
4631    }
4632 }
4633
4634 void
4635 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
4636 {
4637    int depth = 0; /* loop depth */
4638    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4639    unsigned i = 0, j;
4640
4641    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4642       for (j = 0; j < num_inst_src_regs(inst); j++) {
4643          if (inst->src[j].file == PROGRAM_TEMPORARY) {
4644             if (first_reads[inst->src[j].index] == -1)
4645                 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
4646          }
4647       }
4648       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4649          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
4650             if (first_reads[inst->tex_offsets[j].index] == -1)
4651                first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
4652          }
4653       }
4654       if (inst->op == TGSI_OPCODE_BGNLOOP) {
4655          if(depth++ == 0)
4656             loop_start = i;
4657       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4658          if (--depth == 0)
4659             loop_start = -1;
4660       }
4661       assert(depth >= 0);
4662       i++;
4663    }
4664 }
4665
4666 void
4667 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
4668 {
4669    int depth = 0; /* loop depth */
4670    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4671    unsigned i = 0, j;
4672    int k;
4673    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4674       for (j = 0; j < num_inst_src_regs(inst); j++) {
4675          if (inst->src[j].file == PROGRAM_TEMPORARY)
4676             last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
4677       }
4678       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4679          if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4680             if (first_writes[inst->dst[j].index] == -1)
4681                first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
4682             last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
4683          }
4684       }
4685       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4686          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4687             last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
4688       }
4689       if (inst->op == TGSI_OPCODE_BGNLOOP) {
4690          if(depth++ == 0)
4691             loop_start = i;
4692       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4693          if (--depth == 0) {
4694             loop_start = -1;
4695             for (k = 0; k < this->next_temp; k++) {
4696                if (last_reads[k] == -2) {
4697                   last_reads[k] = i;
4698                }
4699             }
4700          }
4701       }
4702       assert(depth >= 0);
4703       i++;
4704    }
4705 }
4706
4707 void
4708 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
4709 {
4710    int depth = 0; /* loop depth */
4711    int i = 0, k;
4712    unsigned j;
4713
4714    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4715       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4716          if (inst->dst[j].file == PROGRAM_TEMPORARY)
4717             last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
4718       }
4719
4720       if (inst->op == TGSI_OPCODE_BGNLOOP)
4721          depth++;
4722       else if (inst->op == TGSI_OPCODE_ENDLOOP)
4723          if (--depth == 0) {
4724             for (k = 0; k < this->next_temp; k++) {
4725                if (last_writes[k] == -2) {
4726                   last_writes[k] = i;
4727                }
4728             }
4729          }
4730       assert(depth >= 0);
4731       i++;
4732    }
4733 }
4734
4735 /*
4736  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
4737  * channels for copy propagation and updates following instructions to
4738  * use the original versions.
4739  *
4740  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4741  * will occur.  As an example, a TXP production before this pass:
4742  *
4743  * 0: MOV TEMP[1], INPUT[4].xyyy;
4744  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4745  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
4746  *
4747  * and after:
4748  *
4749  * 0: MOV TEMP[1], INPUT[4].xyyy;
4750  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4751  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4752  *
4753  * which allows for dead code elimination on TEMP[1]'s writes.
4754  */
4755 void
4756 glsl_to_tgsi_visitor::copy_propagate(void)
4757 {
4758    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
4759                                                   glsl_to_tgsi_instruction *,
4760                                                   this->next_temp * 4);
4761    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4762    int level = 0;
4763
4764    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4765       assert(inst->dst[0].file != PROGRAM_TEMPORARY
4766              || inst->dst[0].index < this->next_temp);
4767
4768       /* First, do any copy propagation possible into the src regs. */
4769       for (int r = 0; r < 3; r++) {
4770          glsl_to_tgsi_instruction *first = NULL;
4771          bool good = true;
4772          int acp_base = inst->src[r].index * 4;
4773
4774          if (inst->src[r].file != PROGRAM_TEMPORARY ||
4775              inst->src[r].reladdr ||
4776              inst->src[r].reladdr2)
4777             continue;
4778
4779          /* See if we can find entries in the ACP consisting of MOVs
4780           * from the same src register for all the swizzled channels
4781           * of this src register reference.
4782           */
4783          for (int i = 0; i < 4; i++) {
4784             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4785             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
4786
4787             if (!copy_chan) {
4788                good = false;
4789                break;
4790             }
4791
4792             assert(acp_level[acp_base + src_chan] <= level);
4793
4794             if (!first) {
4795                first = copy_chan;
4796             } else {
4797                if (first->src[0].file != copy_chan->src[0].file ||
4798                    first->src[0].index != copy_chan->src[0].index ||
4799                    first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
4800                    first->src[0].index2D != copy_chan->src[0].index2D) {
4801                   good = false;
4802                   break;
4803                }
4804             }
4805          }
4806
4807          if (good) {
4808             /* We've now validated that we can copy-propagate to
4809              * replace this src register reference.  Do it.
4810              */
4811             inst->src[r].file = first->src[0].file;
4812             inst->src[r].index = first->src[0].index;
4813             inst->src[r].index2D = first->src[0].index2D;
4814             inst->src[r].has_index2 = first->src[0].has_index2;
4815             inst->src[r].double_reg2 = first->src[0].double_reg2;
4816             inst->src[r].array_id = first->src[0].array_id;
4817
4818             int swizzle = 0;
4819             for (int i = 0; i < 4; i++) {
4820                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4821                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
4822                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
4823             }
4824             inst->src[r].swizzle = swizzle;
4825          }
4826       }
4827
4828       switch (inst->op) {
4829       case TGSI_OPCODE_BGNLOOP:
4830       case TGSI_OPCODE_ENDLOOP:
4831          /* End of a basic block, clear the ACP entirely. */
4832          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4833          break;
4834
4835       case TGSI_OPCODE_IF:
4836       case TGSI_OPCODE_UIF:
4837          ++level;
4838          break;
4839
4840       case TGSI_OPCODE_ENDIF:
4841       case TGSI_OPCODE_ELSE:
4842          /* Clear all channels written inside the block from the ACP, but
4843           * leaving those that were not touched.
4844           */
4845          for (int r = 0; r < this->next_temp; r++) {
4846             for (int c = 0; c < 4; c++) {
4847                if (!acp[4 * r + c])
4848                   continue;
4849
4850                if (acp_level[4 * r + c] >= level)
4851                   acp[4 * r + c] = NULL;
4852             }
4853          }
4854          if (inst->op == TGSI_OPCODE_ENDIF)
4855             --level;
4856          break;
4857
4858       default:
4859          /* Continuing the block, clear any written channels from
4860           * the ACP.
4861           */
4862          for (int d = 0; d < 2; d++) {
4863             if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
4864                /* Any temporary might be written, so no copy propagation
4865                 * across this instruction.
4866                 */
4867                memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4868             } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
4869                        inst->dst[d].reladdr) {
4870                /* Any output might be written, so no copy propagation
4871                 * from outputs across this instruction.
4872                 */
4873                for (int r = 0; r < this->next_temp; r++) {
4874                   for (int c = 0; c < 4; c++) {
4875                      if (!acp[4 * r + c])
4876                         continue;
4877
4878                      if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
4879                         acp[4 * r + c] = NULL;
4880                   }
4881                }
4882             } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
4883                        inst->dst[d].file == PROGRAM_OUTPUT) {
4884                /* Clear where it's used as dst. */
4885                if (inst->dst[d].file == PROGRAM_TEMPORARY) {
4886                   for (int c = 0; c < 4; c++) {
4887                      if (inst->dst[d].writemask & (1 << c))
4888                         acp[4 * inst->dst[d].index + c] = NULL;
4889                   }
4890                }
4891
4892                /* Clear where it's used as src. */
4893                for (int r = 0; r < this->next_temp; r++) {
4894                   for (int c = 0; c < 4; c++) {
4895                      if (!acp[4 * r + c])
4896                         continue;
4897
4898                      int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
4899
4900                      if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
4901                          acp[4 * r + c]->src[0].index == inst->dst[d].index &&
4902                          inst->dst[d].writemask & (1 << src_chan)) {
4903                         acp[4 * r + c] = NULL;
4904                      }
4905                   }
4906                }
4907             }
4908          }
4909          break;
4910       }
4911
4912       /* If this is a copy, add it to the ACP. */
4913       if (inst->op == TGSI_OPCODE_MOV &&
4914           inst->dst[0].file == PROGRAM_TEMPORARY &&
4915           !(inst->dst[0].file == inst->src[0].file &&
4916              inst->dst[0].index == inst->src[0].index) &&
4917           !inst->dst[0].reladdr &&
4918           !inst->dst[0].reladdr2 &&
4919           !inst->saturate &&
4920           inst->src[0].file != PROGRAM_ARRAY &&
4921           !inst->src[0].reladdr &&
4922           !inst->src[0].reladdr2 &&
4923           !inst->src[0].negate) {
4924          for (int i = 0; i < 4; i++) {
4925             if (inst->dst[0].writemask & (1 << i)) {
4926                acp[4 * inst->dst[0].index + i] = inst;
4927                acp_level[4 * inst->dst[0].index + i] = level;
4928             }
4929          }
4930       }
4931    }
4932
4933    ralloc_free(acp_level);
4934    ralloc_free(acp);
4935 }
4936
4937 /*
4938  * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
4939  * code elimination.
4940  *
4941  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4942  * will occur.  As an example, a TXP production after copy propagation but
4943  * before this pass:
4944  *
4945  * 0: MOV TEMP[1], INPUT[4].xyyy;
4946  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4947  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4948  *
4949  * and after this pass:
4950  *
4951  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4952  */
4953 int
4954 glsl_to_tgsi_visitor::eliminate_dead_code(void)
4955 {
4956    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
4957                                                      glsl_to_tgsi_instruction *,
4958                                                      this->next_temp * 4);
4959    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4960    int level = 0;
4961    int removed = 0;
4962
4963    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4964       assert(inst->dst[0].file != PROGRAM_TEMPORARY
4965              || inst->dst[0].index < this->next_temp);
4966
4967       switch (inst->op) {
4968       case TGSI_OPCODE_BGNLOOP:
4969       case TGSI_OPCODE_ENDLOOP:
4970       case TGSI_OPCODE_CONT:
4971       case TGSI_OPCODE_BRK:
4972          /* End of a basic block, clear the write array entirely.
4973           *
4974           * This keeps us from killing dead code when the writes are
4975           * on either side of a loop, even when the register isn't touched
4976           * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
4977           * dead code of this type, so it shouldn't make a difference as long as
4978           * the dead code elimination pass in the GLSL compiler does its job.
4979           */
4980          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4981          break;
4982
4983       case TGSI_OPCODE_ENDIF:
4984       case TGSI_OPCODE_ELSE:
4985          /* Promote the recorded level of all channels written inside the
4986           * preceding if or else block to the level above the if/else block.
4987           */
4988          for (int r = 0; r < this->next_temp; r++) {
4989             for (int c = 0; c < 4; c++) {
4990                if (!writes[4 * r + c])
4991                   continue;
4992
4993                if (write_level[4 * r + c] == level)
4994                   write_level[4 * r + c] = level-1;
4995             }
4996          }
4997          if(inst->op == TGSI_OPCODE_ENDIF)
4998             --level;
4999          break;
5000
5001       case TGSI_OPCODE_IF:
5002       case TGSI_OPCODE_UIF:
5003          ++level;
5004          /* fallthrough to default case to mark the condition as read */
5005       default:
5006          /* Continuing the block, clear any channels from the write array that
5007           * are read by this instruction.
5008           */
5009          for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
5010             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
5011                /* Any temporary might be read, so no dead code elimination
5012                 * across this instruction.
5013                 */
5014                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
5015             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
5016                /* Clear where it's used as src. */
5017                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
5018                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
5019                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
5020                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
5021
5022                for (int c = 0; c < 4; c++) {
5023                   if (src_chans & (1 << c))
5024                      writes[4 * inst->src[i].index + c] = NULL;
5025                }
5026             }
5027          }
5028          for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
5029             if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
5030                /* Any temporary might be read, so no dead code elimination
5031                 * across this instruction.
5032                 */
5033                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
5034             } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
5035                /* Clear where it's used as src. */
5036                int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
5037                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
5038                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
5039                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
5040
5041                for (int c = 0; c < 4; c++) {
5042                   if (src_chans & (1 << c))
5043                      writes[4 * inst->tex_offsets[i].index + c] = NULL;
5044                }
5045             }
5046          }
5047          break;
5048       }
5049
5050       /* If this instruction writes to a temporary, add it to the write array.
5051        * If there is already an instruction in the write array for one or more
5052        * of the channels, flag that channel write as dead.
5053        */
5054       for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
5055          if (inst->dst[i].file == PROGRAM_TEMPORARY &&
5056              !inst->dst[i].reladdr) {
5057             for (int c = 0; c < 4; c++) {
5058                if (inst->dst[i].writemask & (1 << c)) {
5059                   if (writes[4 * inst->dst[i].index + c]) {
5060                      if (write_level[4 * inst->dst[i].index + c] < level)
5061                         continue;
5062                      else
5063                         writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
5064                   }
5065                   writes[4 * inst->dst[i].index + c] = inst;
5066                   write_level[4 * inst->dst[i].index + c] = level;
5067                }
5068             }
5069          }
5070       }
5071    }
5072
5073    /* Anything still in the write array at this point is dead code. */
5074    for (int r = 0; r < this->next_temp; r++) {
5075       for (int c = 0; c < 4; c++) {
5076          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
5077          if (inst)
5078             inst->dead_mask |= (1 << c);
5079       }
5080    }
5081
5082    /* Now actually remove the instructions that are completely dead and update
5083     * the writemask of other instructions with dead channels.
5084     */
5085    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
5086       if (!inst->dead_mask || !inst->dst[0].writemask)
5087          continue;
5088       /* No amount of dead masks should remove memory stores */
5089       if (inst->info->is_store)
5090          continue;
5091
5092       if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
5093          inst->remove();
5094          delete inst;
5095          removed++;
5096       } else {
5097          if (glsl_base_type_is_64bit(inst->dst[0].type)) {
5098             if (inst->dead_mask == WRITEMASK_XY ||
5099                 inst->dead_mask == WRITEMASK_ZW)
5100                inst->dst[0].writemask &= ~(inst->dead_mask);
5101          } else
5102             inst->dst[0].writemask &= ~(inst->dead_mask);
5103       }
5104    }
5105
5106    ralloc_free(write_level);
5107    ralloc_free(writes);
5108
5109    return removed;
5110 }
5111
5112 /* merge DFRACEXP instructions into one. */
5113 void
5114 glsl_to_tgsi_visitor::merge_two_dsts(void)
5115 {
5116    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
5117       glsl_to_tgsi_instruction *inst2;
5118       bool merged;
5119       if (num_inst_dst_regs(inst) != 2)
5120          continue;
5121
5122       if (inst->dst[0].file != PROGRAM_UNDEFINED &&
5123           inst->dst[1].file != PROGRAM_UNDEFINED)
5124          continue;
5125
5126       inst2 = (glsl_to_tgsi_instruction *) inst->next;
5127       do {
5128
5129          if (inst->src[0].file == inst2->src[0].file &&
5130              inst->src[0].index == inst2->src[0].index &&
5131              inst->src[0].type == inst2->src[0].type &&
5132              inst->src[0].swizzle == inst2->src[0].swizzle)
5133             break;
5134          inst2 = (glsl_to_tgsi_instruction *) inst2->next;
5135       } while (inst2);
5136
5137       if (!inst2)
5138          continue;
5139       merged = false;
5140       if (inst->dst[0].file == PROGRAM_UNDEFINED) {
5141          merged = true;
5142          inst->dst[0] = inst2->dst[0];
5143       } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
5144          inst->dst[1] = inst2->dst[1];
5145          merged = true;
5146       }
5147
5148       if (merged) {
5149          inst2->remove();
5150          delete inst2;
5151       }
5152    }
5153 }
5154
5155 /* Merges temporary registers together where possible to reduce the number of
5156  * registers needed to run a program.
5157  *
5158  * Produces optimal code only after copy propagation and dead code elimination
5159  * have been run. */
5160 void
5161 glsl_to_tgsi_visitor::merge_registers(void)
5162 {
5163    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5164    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
5165    struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5166    int i, j;
5167    int num_renames = 0;
5168
5169    /* Read the indices of the last read and first write to each temp register
5170     * into an array so that we don't have to traverse the instruction list as
5171     * much. */
5172    for (i = 0; i < this->next_temp; i++) {
5173       last_reads[i] = -1;
5174       first_writes[i] = -1;
5175    }
5176    get_last_temp_read_first_temp_write(last_reads, first_writes);
5177
5178    /* Start looking for registers with non-overlapping usages that can be
5179     * merged together. */
5180    for (i = 0; i < this->next_temp; i++) {
5181       /* Don't touch unused registers. */
5182       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
5183
5184       for (j = 0; j < this->next_temp; j++) {
5185          /* Don't touch unused registers. */
5186          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
5187
5188          /* We can merge the two registers if the first write to j is after or
5189           * in the same instruction as the last read from i.  Note that the
5190           * register at index i will always be used earlier or at the same time
5191           * as the register at index j. */
5192          if (first_writes[i] <= first_writes[j] &&
5193              last_reads[i] <= first_writes[j]) {
5194             renames[num_renames].old_reg = j;
5195             renames[num_renames].new_reg = i;
5196             num_renames++;
5197
5198             /* Update the first_writes and last_reads arrays with the new
5199              * values for the merged register index, and mark the newly unused
5200              * register index as such. */
5201             assert(last_reads[j] >= last_reads[i]);
5202             last_reads[i] = last_reads[j];
5203             first_writes[j] = -1;
5204             last_reads[j] = -1;
5205          }
5206       }
5207    }
5208
5209    rename_temp_registers(num_renames, renames);
5210    ralloc_free(renames);
5211    ralloc_free(last_reads);
5212    ralloc_free(first_writes);
5213 }
5214
5215 /* Reassign indices to temporary registers by reusing unused indices created
5216  * by optimization passes. */
5217 void
5218 glsl_to_tgsi_visitor::renumber_registers(void)
5219 {
5220    int i = 0;
5221    int new_index = 0;
5222    int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5223    struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5224    int num_renames = 0;
5225    for (i = 0; i < this->next_temp; i++) {
5226       first_reads[i] = -1;
5227    }
5228    get_first_temp_read(first_reads);
5229
5230    for (i = 0; i < this->next_temp; i++) {
5231       if (first_reads[i] < 0) continue;
5232       if (i != new_index) {
5233          renames[num_renames].old_reg = i;
5234          renames[num_renames].new_reg = new_index;
5235          num_renames++;
5236       }
5237       new_index++;
5238    }
5239
5240    rename_temp_registers(num_renames, renames);
5241    this->next_temp = new_index;
5242    ralloc_free(renames);
5243    ralloc_free(first_reads);
5244 }
5245
5246 /* ------------------------- TGSI conversion stuff -------------------------- */
5247 struct label {
5248    unsigned branch_target;
5249    unsigned token;
5250 };
5251
5252 /**
5253  * Intermediate state used during shader translation.
5254  */
5255 struct st_translate {
5256    struct ureg_program *ureg;
5257
5258    unsigned temps_size;
5259    struct ureg_dst *temps;
5260
5261    struct ureg_dst *arrays;
5262    unsigned num_temp_arrays;
5263    struct ureg_src *constants;
5264    int num_constants;
5265    struct ureg_src *immediates;
5266    int num_immediates;
5267    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
5268    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
5269    struct ureg_dst address[3];
5270    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
5271    struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
5272    struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
5273    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
5274    struct ureg_src shared_memory;
5275    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
5276    unsigned *array_sizes;
5277    struct inout_decl *input_decls;
5278    unsigned num_input_decls;
5279    struct inout_decl *output_decls;
5280    unsigned num_output_decls;
5281
5282    const GLuint *inputMapping;
5283    const GLuint *outputMapping;
5284
5285    /* For every instruction that contains a label (eg CALL), keep
5286     * details so that we can go back afterwards and emit the correct
5287     * tgsi instruction number for each label.
5288     */
5289    struct label *labels;
5290    unsigned labels_size;
5291    unsigned labels_count;
5292
5293    /* Keep a record of the tgsi instruction number that each mesa
5294     * instruction starts at, will be used to fix up labels after
5295     * translation.
5296     */
5297    unsigned *insn;
5298    unsigned insn_size;
5299    unsigned insn_count;
5300
5301    unsigned procType;  /**< PIPE_SHADER_VERTEX/FRAGMENT */
5302
5303    boolean error;
5304 };
5305
5306 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
5307 unsigned
5308 _mesa_sysval_to_semantic(unsigned sysval)
5309 {
5310    switch (sysval) {
5311    /* Vertex shader */
5312    case SYSTEM_VALUE_VERTEX_ID:
5313       return TGSI_SEMANTIC_VERTEXID;
5314    case SYSTEM_VALUE_INSTANCE_ID:
5315       return TGSI_SEMANTIC_INSTANCEID;
5316    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
5317       return TGSI_SEMANTIC_VERTEXID_NOBASE;
5318    case SYSTEM_VALUE_BASE_VERTEX:
5319       return TGSI_SEMANTIC_BASEVERTEX;
5320    case SYSTEM_VALUE_BASE_INSTANCE:
5321       return TGSI_SEMANTIC_BASEINSTANCE;
5322    case SYSTEM_VALUE_DRAW_ID:
5323       return TGSI_SEMANTIC_DRAWID;
5324
5325    /* Geometry shader */
5326    case SYSTEM_VALUE_INVOCATION_ID:
5327       return TGSI_SEMANTIC_INVOCATIONID;
5328
5329    /* Fragment shader */
5330    case SYSTEM_VALUE_FRAG_COORD:
5331       return TGSI_SEMANTIC_POSITION;
5332    case SYSTEM_VALUE_FRONT_FACE:
5333       return TGSI_SEMANTIC_FACE;
5334    case SYSTEM_VALUE_SAMPLE_ID:
5335       return TGSI_SEMANTIC_SAMPLEID;
5336    case SYSTEM_VALUE_SAMPLE_POS:
5337       return TGSI_SEMANTIC_SAMPLEPOS;
5338    case SYSTEM_VALUE_SAMPLE_MASK_IN:
5339       return TGSI_SEMANTIC_SAMPLEMASK;
5340    case SYSTEM_VALUE_HELPER_INVOCATION:
5341       return TGSI_SEMANTIC_HELPER_INVOCATION;
5342
5343    /* Tessellation shader */
5344    case SYSTEM_VALUE_TESS_COORD:
5345       return TGSI_SEMANTIC_TESSCOORD;
5346    case SYSTEM_VALUE_VERTICES_IN:
5347       return TGSI_SEMANTIC_VERTICESIN;
5348    case SYSTEM_VALUE_PRIMITIVE_ID:
5349       return TGSI_SEMANTIC_PRIMID;
5350    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
5351       return TGSI_SEMANTIC_TESSOUTER;
5352    case SYSTEM_VALUE_TESS_LEVEL_INNER:
5353       return TGSI_SEMANTIC_TESSINNER;
5354
5355    /* Compute shader */
5356    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
5357       return TGSI_SEMANTIC_THREAD_ID;
5358    case SYSTEM_VALUE_WORK_GROUP_ID:
5359       return TGSI_SEMANTIC_BLOCK_ID;
5360    case SYSTEM_VALUE_NUM_WORK_GROUPS:
5361       return TGSI_SEMANTIC_GRID_SIZE;
5362    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
5363       return TGSI_SEMANTIC_BLOCK_SIZE;
5364
5365    /* Unhandled */
5366    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
5367    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
5368    case SYSTEM_VALUE_VERTEX_CNT:
5369    default:
5370       assert(!"Unexpected SYSTEM_VALUE_ enum");
5371       return TGSI_SEMANTIC_COUNT;
5372    }
5373 }
5374
5375
5376 /**
5377  * Make note of a branch to a label in the TGSI code.
5378  * After we've emitted all instructions, we'll go over the list
5379  * of labels built here and patch the TGSI code with the actual
5380  * location of each label.
5381  */
5382 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
5383 {
5384    unsigned i;
5385
5386    if (t->labels_count + 1 >= t->labels_size) {
5387       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
5388       t->labels = (struct label *)realloc(t->labels,
5389                                           t->labels_size * sizeof(struct label));
5390       if (t->labels == NULL) {
5391          static unsigned dummy;
5392          t->error = TRUE;
5393          return &dummy;
5394       }
5395    }
5396
5397    i = t->labels_count++;
5398    t->labels[i].branch_target = branch_target;
5399    return &t->labels[i].token;
5400 }
5401
5402 /**
5403  * Called prior to emitting the TGSI code for each instruction.
5404  * Allocate additional space for instructions if needed.
5405  * Update the insn[] array so the next glsl_to_tgsi_instruction points to
5406  * the next TGSI instruction.
5407  */
5408 static void set_insn_start(struct st_translate *t, unsigned start)
5409 {
5410    if (t->insn_count + 1 >= t->insn_size) {
5411       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
5412       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
5413       if (t->insn == NULL) {
5414          t->error = TRUE;
5415          return;
5416       }
5417    }
5418
5419    t->insn[t->insn_count++] = start;
5420 }
5421
5422 /**
5423  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
5424  */
5425 static struct ureg_src
5426 emit_immediate(struct st_translate *t,
5427                gl_constant_value values[4],
5428                int type, int size)
5429 {
5430    struct ureg_program *ureg = t->ureg;
5431
5432    switch(type)
5433    {
5434    case GL_FLOAT:
5435       return ureg_DECL_immediate(ureg, &values[0].f, size);
5436    case GL_DOUBLE:
5437       return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
5438    case GL_INT:
5439       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
5440    case GL_UNSIGNED_INT:
5441    case GL_BOOL:
5442       return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
5443    default:
5444       assert(!"should not get here - type must be float, int, uint, or bool");
5445       return ureg_src_undef();
5446    }
5447 }
5448
5449 /**
5450  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
5451  */
5452 static struct ureg_dst
5453 dst_register(struct st_translate *t, gl_register_file file, unsigned index,
5454              unsigned array_id)
5455 {
5456    unsigned array;
5457
5458    switch(file) {
5459    case PROGRAM_UNDEFINED:
5460       return ureg_dst_undef();
5461
5462    case PROGRAM_TEMPORARY:
5463       /* Allocate space for temporaries on demand. */
5464       if (index >= t->temps_size) {
5465          const int inc = align(index - t->temps_size + 1, 4096);
5466
5467          t->temps = (struct ureg_dst*)
5468                     realloc(t->temps,
5469                             (t->temps_size + inc) * sizeof(struct ureg_dst));
5470          if (!t->temps)
5471             return ureg_dst_undef();
5472
5473          memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
5474          t->temps_size += inc;
5475       }
5476
5477       if (ureg_dst_is_undef(t->temps[index]))
5478          t->temps[index] = ureg_DECL_local_temporary(t->ureg);
5479
5480       return t->temps[index];
5481
5482    case PROGRAM_ARRAY:
5483       array = index >> 16;
5484
5485       assert(array < t->num_temp_arrays);
5486
5487       if (ureg_dst_is_undef(t->arrays[array]))
5488          t->arrays[array] = ureg_DECL_array_temporary(
5489             t->ureg, t->array_sizes[array], TRUE);
5490
5491       return ureg_dst_array_offset(t->arrays[array],
5492                                    (int)(index & 0xFFFF) - 0x8000);
5493
5494    case PROGRAM_OUTPUT:
5495       if (!array_id) {
5496          if (t->procType == PIPE_SHADER_FRAGMENT)
5497             assert(index < 2 * FRAG_RESULT_MAX);
5498          else if (t->procType == PIPE_SHADER_TESS_CTRL ||
5499                   t->procType == PIPE_SHADER_TESS_EVAL)
5500             assert(index < VARYING_SLOT_TESS_MAX);
5501          else
5502             assert(index < VARYING_SLOT_MAX);
5503
5504          assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
5505          assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
5506          return t->outputs[t->outputMapping[index]];
5507       }
5508       else {
5509          struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id);
5510          unsigned mesa_index = decl->mesa_index;
5511          int slot = t->outputMapping[mesa_index];
5512
5513          assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
5514
5515          struct ureg_dst dst = t->outputs[slot];
5516          dst.ArrayID = array_id;
5517          return ureg_dst_array_offset(dst, index - mesa_index);
5518       }
5519
5520    case PROGRAM_ADDRESS:
5521       return t->address[index];
5522
5523    default:
5524       assert(!"unknown dst register file");
5525       return ureg_dst_undef();
5526    }
5527 }
5528
5529 /**
5530  * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
5531  */
5532 static struct ureg_src
5533 src_register(struct st_translate *t, const st_src_reg *reg)
5534 {
5535    int index = reg->index;
5536    int double_reg2 = reg->double_reg2 ? 1 : 0;
5537
5538    switch(reg->file) {
5539    case PROGRAM_UNDEFINED:
5540       return ureg_imm4f(t->ureg, 0, 0, 0, 0);
5541
5542    case PROGRAM_TEMPORARY:
5543    case PROGRAM_ARRAY:
5544    case PROGRAM_OUTPUT:
5545       return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
5546
5547    case PROGRAM_UNIFORM:
5548       assert(reg->index >= 0);
5549       return reg->index < t->num_constants ?
5550                t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5551    case PROGRAM_STATE_VAR:
5552    case PROGRAM_CONSTANT:       /* ie, immediate */
5553       if (reg->has_index2)
5554          return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
5555       else
5556          return reg->index >= 0 && reg->index < t->num_constants ?
5557                   t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5558
5559    case PROGRAM_IMMEDIATE:
5560       assert(reg->index >= 0 && reg->index < t->num_immediates);
5561       return t->immediates[reg->index];
5562
5563    case PROGRAM_INPUT:
5564       /* GLSL inputs are 64-bit containers, so we have to
5565        * map back to the original index and add the offset after
5566        * mapping. */
5567       index -= double_reg2;
5568       if (!reg->array_id) {
5569          assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
5570          assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
5571          return t->inputs[t->inputMapping[index] + double_reg2];
5572       }
5573       else {
5574          struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, reg->array_id);
5575          unsigned mesa_index = decl->mesa_index;
5576          int slot = t->inputMapping[mesa_index];
5577
5578          assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
5579
5580          struct ureg_src src = t->inputs[slot];
5581          src.ArrayID = reg->array_id;
5582          return ureg_src_array_offset(src, index + double_reg2 - mesa_index);
5583       }
5584
5585    case PROGRAM_ADDRESS:
5586       return ureg_src(t->address[reg->index]);
5587
5588    case PROGRAM_SYSTEM_VALUE:
5589       assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
5590       return t->systemValues[reg->index];
5591
5592    default:
5593       assert(!"unknown src register file");
5594       return ureg_src_undef();
5595    }
5596 }
5597
5598 /**
5599  * Create a TGSI ureg_dst register from an st_dst_reg.
5600  */
5601 static struct ureg_dst
5602 translate_dst(struct st_translate *t,
5603               const st_dst_reg *dst_reg,
5604               bool saturate)
5605 {
5606    struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
5607                                       dst_reg->array_id);
5608
5609    if (dst.File == TGSI_FILE_NULL)
5610       return dst;
5611
5612    dst = ureg_writemask(dst, dst_reg->writemask);
5613
5614    if (saturate)
5615       dst = ureg_saturate(dst);
5616
5617    if (dst_reg->reladdr != NULL) {
5618       assert(dst_reg->file != PROGRAM_TEMPORARY);
5619       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
5620    }
5621
5622    if (dst_reg->has_index2) {
5623       if (dst_reg->reladdr2)
5624          dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
5625                                            dst_reg->index2D);
5626       else
5627          dst = ureg_dst_dimension(dst, dst_reg->index2D);
5628    }
5629
5630    return dst;
5631 }
5632
5633 /**
5634  * Create a TGSI ureg_src register from an st_src_reg.
5635  */
5636 static struct ureg_src
5637 translate_src(struct st_translate *t, const st_src_reg *src_reg)
5638 {
5639    struct ureg_src src = src_register(t, src_reg);
5640
5641    if (src_reg->has_index2) {
5642       /* 2D indexes occur with geometry shader inputs (attrib, vertex)
5643        * and UBO constant buffers (buffer, position).
5644        */
5645       if (src_reg->reladdr2)
5646          src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
5647                                            src_reg->index2D);
5648       else
5649          src = ureg_src_dimension(src, src_reg->index2D);
5650    }
5651
5652    src = ureg_swizzle(src,
5653                       GET_SWZ(src_reg->swizzle, 0) & 0x3,
5654                       GET_SWZ(src_reg->swizzle, 1) & 0x3,
5655                       GET_SWZ(src_reg->swizzle, 2) & 0x3,
5656                       GET_SWZ(src_reg->swizzle, 3) & 0x3);
5657
5658    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
5659       src = ureg_negate(src);
5660
5661    if (src_reg->reladdr != NULL) {
5662       assert(src_reg->file != PROGRAM_TEMPORARY);
5663       src = ureg_src_indirect(src, ureg_src(t->address[0]));
5664    }
5665
5666    return src;
5667 }
5668
5669 static struct tgsi_texture_offset
5670 translate_tex_offset(struct st_translate *t,
5671                      const st_src_reg *in_offset, int idx)
5672 {
5673    struct tgsi_texture_offset offset;
5674    struct ureg_src imm_src;
5675    struct ureg_dst dst;
5676    int array;
5677
5678    switch (in_offset->file) {
5679    case PROGRAM_IMMEDIATE:
5680       assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
5681       imm_src = t->immediates[in_offset->index];
5682
5683       offset.File = imm_src.File;
5684       offset.Index = imm_src.Index;
5685       offset.SwizzleX = imm_src.SwizzleX;
5686       offset.SwizzleY = imm_src.SwizzleY;
5687       offset.SwizzleZ = imm_src.SwizzleZ;
5688       offset.Padding = 0;
5689       break;
5690    case PROGRAM_INPUT:
5691       imm_src = t->inputs[t->inputMapping[in_offset->index]];
5692       offset.File = imm_src.File;
5693       offset.Index = imm_src.Index;
5694       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5695       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5696       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5697       offset.Padding = 0;
5698       break;
5699    case PROGRAM_TEMPORARY:
5700       imm_src = ureg_src(t->temps[in_offset->index]);
5701       offset.File = imm_src.File;
5702       offset.Index = imm_src.Index;
5703       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5704       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5705       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5706       offset.Padding = 0;
5707       break;
5708    case PROGRAM_ARRAY:
5709       array = in_offset->index >> 16;
5710
5711       assert(array >= 0);
5712       assert(array < (int)t->num_temp_arrays);
5713
5714       dst = t->arrays[array];
5715       offset.File = dst.File;
5716       offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
5717       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5718       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5719       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5720       offset.Padding = 0;
5721       break;
5722    default:
5723       break;
5724    }
5725    return offset;
5726 }
5727
5728 static void
5729 compile_tgsi_instruction(struct st_translate *t,
5730                          const glsl_to_tgsi_instruction *inst)
5731 {
5732    struct ureg_program *ureg = t->ureg;
5733    int i;
5734    struct ureg_dst dst[2];
5735    struct ureg_src src[4];
5736    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
5737
5738    int num_dst;
5739    int num_src;
5740    unsigned tex_target = 0;
5741
5742    num_dst = num_inst_dst_regs(inst);
5743    num_src = num_inst_src_regs(inst);
5744
5745    for (i = 0; i < num_dst; i++)
5746       dst[i] = translate_dst(t,
5747                              &inst->dst[i],
5748                              inst->saturate);
5749
5750    for (i = 0; i < num_src; i++)
5751       src[i] = translate_src(t, &inst->src[i]);
5752
5753    switch(inst->op) {
5754    case TGSI_OPCODE_BGNLOOP:
5755    case TGSI_OPCODE_CAL:
5756    case TGSI_OPCODE_ELSE:
5757    case TGSI_OPCODE_ENDLOOP:
5758    case TGSI_OPCODE_IF:
5759    case TGSI_OPCODE_UIF:
5760       assert(num_dst == 0);
5761       ureg_label_insn(ureg,
5762                       inst->op,
5763                       src, num_src,
5764                       get_label(t,
5765                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
5766       return;
5767
5768    case TGSI_OPCODE_TEX:
5769    case TGSI_OPCODE_TXB:
5770    case TGSI_OPCODE_TXD:
5771    case TGSI_OPCODE_TXL:
5772    case TGSI_OPCODE_TXP:
5773    case TGSI_OPCODE_TXQ:
5774    case TGSI_OPCODE_TXQS:
5775    case TGSI_OPCODE_TXF:
5776    case TGSI_OPCODE_TEX2:
5777    case TGSI_OPCODE_TXB2:
5778    case TGSI_OPCODE_TXL2:
5779    case TGSI_OPCODE_TG4:
5780    case TGSI_OPCODE_LODQ:
5781       src[num_src] = t->samplers[inst->sampler.index];
5782       assert(src[num_src].File != TGSI_FILE_NULL);
5783       if (inst->sampler.reladdr)
5784          src[num_src] =
5785             ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
5786       num_src++;
5787       for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
5788          texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
5789       }
5790       tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5791
5792       ureg_tex_insn(ureg,
5793                     inst->op,
5794                     dst, num_dst,
5795                     tex_target,
5796                     texoffsets, inst->tex_offset_num_offset,
5797                     src, num_src);
5798       return;
5799
5800    case TGSI_OPCODE_RESQ:
5801    case TGSI_OPCODE_LOAD:
5802    case TGSI_OPCODE_ATOMUADD:
5803    case TGSI_OPCODE_ATOMXCHG:
5804    case TGSI_OPCODE_ATOMCAS:
5805    case TGSI_OPCODE_ATOMAND:
5806    case TGSI_OPCODE_ATOMOR:
5807    case TGSI_OPCODE_ATOMXOR:
5808    case TGSI_OPCODE_ATOMUMIN:
5809    case TGSI_OPCODE_ATOMUMAX:
5810    case TGSI_OPCODE_ATOMIMIN:
5811    case TGSI_OPCODE_ATOMIMAX:
5812       for (i = num_src - 1; i >= 0; i--)
5813          src[i + 1] = src[i];
5814       num_src++;
5815       if (inst->buffer.file == PROGRAM_MEMORY) {
5816          src[0] = t->shared_memory;
5817       } else if (inst->buffer.file == PROGRAM_BUFFER) {
5818          src[0] = t->buffers[inst->buffer.index];
5819       } else {
5820          src[0] = t->images[inst->buffer.index];
5821          tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5822       }
5823       if (inst->buffer.reladdr)
5824          src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
5825       assert(src[0].File != TGSI_FILE_NULL);
5826       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5827                        inst->buffer_access,
5828                        tex_target, inst->image_format);
5829       break;
5830
5831    case TGSI_OPCODE_STORE:
5832       if (inst->buffer.file == PROGRAM_MEMORY) {
5833          dst[0] = ureg_dst(t->shared_memory);
5834       } else if (inst->buffer.file == PROGRAM_BUFFER) {
5835          dst[0] = ureg_dst(t->buffers[inst->buffer.index]);
5836       } else {
5837          dst[0] = ureg_dst(t->images[inst->buffer.index]);
5838          tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5839       }
5840       dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
5841       if (inst->buffer.reladdr)
5842          dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
5843       assert(dst[0].File != TGSI_FILE_NULL);
5844       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5845                        inst->buffer_access,
5846                        tex_target, inst->image_format);
5847       break;
5848
5849    case TGSI_OPCODE_SCS:
5850       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
5851       ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
5852       break;
5853
5854    default:
5855       ureg_insn(ureg,
5856                 inst->op,
5857                 dst, num_dst,
5858                 src, num_src);
5859       break;
5860    }
5861 }
5862
5863 /**
5864  * Emit the TGSI instructions for inverting and adjusting WPOS.
5865  * This code is unavoidable because it also depends on whether
5866  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
5867  */
5868 static void
5869 emit_wpos_adjustment(struct gl_context *ctx,
5870                      struct st_translate *t,
5871                      int wpos_transform_const,
5872                      boolean invert,
5873                      GLfloat adjX, GLfloat adjY[2])
5874 {
5875    struct ureg_program *ureg = t->ureg;
5876
5877    assert(wpos_transform_const >= 0);
5878
5879    /* Fragment program uses fragment position input.
5880     * Need to replace instances of INPUT[WPOS] with temp T
5881     * where T = INPUT[WPOS] is inverted by Y.
5882     */
5883    struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
5884    struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
5885    struct ureg_src *wpos =
5886       ctx->Const.GLSLFragCoordIsSysVal ?
5887          &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
5888          &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
5889    struct ureg_src wpos_input = *wpos;
5890
5891    /* First, apply the coordinate shift: */
5892    if (adjX || adjY[0] || adjY[1]) {
5893       if (adjY[0] != adjY[1]) {
5894          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
5895           * depending on whether inversion is actually going to be applied
5896           * or not, which is determined by testing against the inversion
5897           * state variable used below, which will be either +1 or -1.
5898           */
5899          struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
5900
5901          ureg_CMP(ureg, adj_temp,
5902                   ureg_scalar(wpostrans, invert ? 2 : 0),
5903                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
5904                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
5905          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
5906       } else {
5907          ureg_ADD(ureg, wpos_temp, wpos_input,
5908                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
5909       }
5910       wpos_input = ureg_src(wpos_temp);
5911    } else {
5912       /* MOV wpos_temp, input[wpos]
5913        */
5914       ureg_MOV( ureg, wpos_temp, wpos_input );
5915    }
5916
5917    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
5918     * inversion/identity, or the other way around if we're drawing to an FBO.
5919     */
5920    if (invert) {
5921       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
5922        */
5923       ureg_MAD( ureg,
5924                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5925                 wpos_input,
5926                 ureg_scalar(wpostrans, 0),
5927                 ureg_scalar(wpostrans, 1));
5928    } else {
5929       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
5930        */
5931       ureg_MAD( ureg,
5932                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5933                 wpos_input,
5934                 ureg_scalar(wpostrans, 2),
5935                 ureg_scalar(wpostrans, 3));
5936    }
5937
5938    /* Use wpos_temp as position input from here on:
5939     */
5940    *wpos = ureg_src(wpos_temp);
5941 }
5942
5943
5944 /**
5945  * Emit fragment position/ooordinate code.
5946  */
5947 static void
5948 emit_wpos(struct st_context *st,
5949           struct st_translate *t,
5950           const struct gl_program *program,
5951           struct ureg_program *ureg,
5952           int wpos_transform_const)
5953 {
5954    const struct gl_fragment_program *fp =
5955       (const struct gl_fragment_program *) program;
5956    struct pipe_screen *pscreen = st->pipe->screen;
5957    GLfloat adjX = 0.0f;
5958    GLfloat adjY[2] = { 0.0f, 0.0f };
5959    boolean invert = FALSE;
5960
5961    /* Query the pixel center conventions supported by the pipe driver and set
5962     * adjX, adjY to help out if it cannot handle the requested one internally.
5963     *
5964     * The bias of the y-coordinate depends on whether y-inversion takes place
5965     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
5966     * drawing to an FBO (causes additional inversion), and whether the pipe
5967     * driver origin and the requested origin differ (the latter condition is
5968     * stored in the 'invert' variable).
5969     *
5970     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
5971     *
5972     * center shift only:
5973     * i -> h: +0.5
5974     * h -> i: -0.5
5975     *
5976     * inversion only:
5977     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
5978     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
5979     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
5980     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
5981     *
5982     * inversion and center shift:
5983     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
5984     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
5985     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
5986     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
5987     */
5988    if (fp->OriginUpperLeft) {
5989       /* Fragment shader wants origin in upper-left */
5990       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
5991          /* the driver supports upper-left origin */
5992       }
5993       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
5994          /* the driver supports lower-left origin, need to invert Y */
5995          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5996                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5997          invert = TRUE;
5998       }
5999       else
6000          assert(0);
6001    }
6002    else {
6003       /* Fragment shader wants origin in lower-left */
6004       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
6005          /* the driver supports lower-left origin */
6006          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
6007                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
6008       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
6009          /* the driver supports upper-left origin, need to invert Y */
6010          invert = TRUE;
6011       else
6012          assert(0);
6013    }
6014
6015    if (fp->PixelCenterInteger) {
6016       /* Fragment shader wants pixel center integer */
6017       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
6018          /* the driver supports pixel center integer */
6019          adjY[1] = 1.0f;
6020          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
6021                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
6022       }
6023       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
6024          /* the driver supports pixel center half integer, need to bias X,Y */
6025          adjX = -0.5f;
6026          adjY[0] = -0.5f;
6027          adjY[1] = 0.5f;
6028       }
6029       else
6030          assert(0);
6031    }
6032    else {
6033       /* Fragment shader wants pixel center half integer */
6034       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
6035          /* the driver supports pixel center half integer */
6036       }
6037       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
6038          /* the driver supports pixel center integer, need to bias X,Y */
6039          adjX = adjY[0] = adjY[1] = 0.5f;
6040          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
6041                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
6042       }
6043       else
6044          assert(0);
6045    }
6046
6047    /* we invert after adjustment so that we avoid the MOV to temporary,
6048     * and reuse the adjustment ADD instead */
6049    emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY);
6050 }
6051
6052 /**
6053  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
6054  * TGSI uses +1 for front, -1 for back.
6055  * This function converts the TGSI value to the GL value.  Simply clamping/
6056  * saturating the value to [0,1] does the job.
6057  */
6058 static void
6059 emit_face_var(struct gl_context *ctx, struct st_translate *t)
6060 {
6061    struct ureg_program *ureg = t->ureg;
6062    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
6063    struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
6064
6065    if (ctx->Const.NativeIntegers) {
6066       ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
6067    }
6068    else {
6069       /* MOV_SAT face_temp, input[face] */
6070       ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
6071    }
6072
6073    /* Use face_temp as face input from here on: */
6074    t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
6075 }
6076
6077 static void
6078 emit_compute_block_size(const struct gl_program *program,
6079                         struct ureg_program *ureg) {
6080    const struct gl_compute_program *cp =
6081       (const struct gl_compute_program *)program;
6082
6083    ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
6084                        cp->LocalSize[0]);
6085    ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
6086                        cp->LocalSize[1]);
6087    ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
6088                        cp->LocalSize[2]);
6089 }
6090
6091 /**
6092  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
6093  * \param program  the program to translate
6094  * \param numInputs  number of input registers used
6095  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
6096  *                      input indexes
6097  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
6098  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
6099  *                            each input
6100  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
6101  * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input
6102  * \param numOutputs  number of output registers used
6103  * \param outputMapping  maps Mesa fragment program outputs to TGSI
6104  *                       generic outputs
6105  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
6106  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
6107  *                             each output
6108  *
6109  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
6110  */
6111 extern "C" enum pipe_error
6112 st_translate_program(
6113    struct gl_context *ctx,
6114    uint procType,
6115    struct ureg_program *ureg,
6116    glsl_to_tgsi_visitor *program,
6117    const struct gl_program *proginfo,
6118    GLuint numInputs,
6119    const GLuint inputMapping[],
6120    const GLuint inputSlotToAttr[],
6121    const ubyte inputSemanticName[],
6122    const ubyte inputSemanticIndex[],
6123    const GLuint interpMode[],
6124    const GLuint interpLocation[],
6125    GLuint numOutputs,
6126    const GLuint outputMapping[],
6127    const GLuint outputSlotToAttr[],
6128    const ubyte outputSemanticName[],
6129    const ubyte outputSemanticIndex[])
6130 {
6131    struct st_translate *t;
6132    unsigned i;
6133    struct gl_program_constants *frag_const =
6134       &ctx->Const.Program[MESA_SHADER_FRAGMENT];
6135    enum pipe_error ret = PIPE_OK;
6136
6137    assert(numInputs <= ARRAY_SIZE(t->inputs));
6138    assert(numOutputs <= ARRAY_SIZE(t->outputs));
6139
6140    t = CALLOC_STRUCT(st_translate);
6141    if (!t) {
6142       ret = PIPE_ERROR_OUT_OF_MEMORY;
6143       goto out;
6144    }
6145
6146    t->procType = procType;
6147    t->inputMapping = inputMapping;
6148    t->outputMapping = outputMapping;
6149    t->ureg = ureg;
6150    t->num_temp_arrays = program->next_array;
6151    if (t->num_temp_arrays)
6152       t->arrays = (struct ureg_dst*)
6153                   calloc(t->num_temp_arrays, sizeof(t->arrays[0]));
6154
6155    /*
6156     * Declare input attributes.
6157     */
6158    switch (procType) {
6159    case PIPE_SHADER_FRAGMENT:
6160    case PIPE_SHADER_GEOMETRY:
6161    case PIPE_SHADER_TESS_EVAL:
6162    case PIPE_SHADER_TESS_CTRL:
6163       for (i = 0; i < program->num_inputs; ++i) {
6164          struct inout_decl *decl = &program->inputs[i];
6165          unsigned slot = inputMapping[decl->mesa_index];
6166          struct ureg_src src;
6167          ubyte tgsi_usage_mask = decl->usage_mask;
6168
6169          if (glsl_base_type_is_64bit(decl->base_type)) {
6170             if (tgsi_usage_mask == 1)
6171                tgsi_usage_mask = TGSI_WRITEMASK_XY;
6172             else if (tgsi_usage_mask == 2)
6173                tgsi_usage_mask = TGSI_WRITEMASK_ZW;
6174             else
6175                tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
6176          }
6177
6178          src = ureg_DECL_fs_input_cyl_centroid_layout(ureg,
6179                   inputSemanticName[slot], inputSemanticIndex[slot],
6180                   interpMode ? interpMode[slot] : 0, 0, interpLocation ? interpLocation[slot] : 0,
6181                   slot, tgsi_usage_mask, decl->array_id, decl->size);
6182
6183          for (unsigned j = 0; j < decl->size; ++j) {
6184             if (t->inputs[slot + j].File != TGSI_FILE_INPUT) {
6185                /* The ArrayID is set up in dst_register */
6186                t->inputs[slot + j] = src;
6187                t->inputs[slot + j].ArrayID = 0;
6188                t->inputs[slot + j].Index += j;
6189             }
6190          }
6191       }
6192       break;
6193    case PIPE_SHADER_VERTEX:
6194       for (i = 0; i < numInputs; i++) {
6195          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
6196       }
6197       break;
6198    case PIPE_SHADER_COMPUTE:
6199       break;
6200    default:
6201       assert(0);
6202    }
6203
6204    /*
6205     * Declare output attributes.
6206     */
6207    switch (procType) {
6208    case PIPE_SHADER_FRAGMENT:
6209    case PIPE_SHADER_COMPUTE:
6210       break;
6211    case PIPE_SHADER_GEOMETRY:
6212    case PIPE_SHADER_TESS_EVAL:
6213    case PIPE_SHADER_TESS_CTRL:
6214    case PIPE_SHADER_VERTEX:
6215       for (i = 0; i < program->num_outputs; ++i) {
6216          struct inout_decl *decl = &program->outputs[i];
6217          unsigned slot = outputMapping[decl->mesa_index];
6218          struct ureg_dst dst;
6219          ubyte tgsi_usage_mask = decl->usage_mask;
6220
6221          if (glsl_base_type_is_64bit(decl->base_type)) {
6222             if (tgsi_usage_mask == 1)
6223                tgsi_usage_mask = TGSI_WRITEMASK_XY;
6224             else if (tgsi_usage_mask == 2)
6225                tgsi_usage_mask = TGSI_WRITEMASK_ZW;
6226             else
6227                tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
6228          }
6229
6230          dst = ureg_DECL_output_layout(ureg,
6231                      outputSemanticName[slot], outputSemanticIndex[slot],
6232                      slot, tgsi_usage_mask, decl->array_id, decl->size);
6233
6234          for (unsigned j = 0; j < decl->size; ++j) {
6235             if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) {
6236                /* The ArrayID is set up in dst_register */
6237                t->outputs[slot + j] = dst;
6238                t->outputs[slot + j].ArrayID = 0;
6239                t->outputs[slot + j].Index += j;
6240             }
6241          }
6242       }
6243       break;
6244    default:
6245       assert(0);
6246    }
6247
6248    if (procType == PIPE_SHADER_FRAGMENT) {
6249       if (program->shader->info.EarlyFragmentTests)
6250          ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
6251
6252       if (proginfo->InputsRead & VARYING_BIT_POS) {
6253           /* Must do this after setting up t->inputs. */
6254           emit_wpos(st_context(ctx), t, proginfo, ureg,
6255                     program->wpos_transform_const);
6256       }
6257
6258       if (proginfo->InputsRead & VARYING_BIT_FACE)
6259          emit_face_var(ctx, t);
6260
6261       for (i = 0; i < numOutputs; i++) {
6262          switch (outputSemanticName[i]) {
6263          case TGSI_SEMANTIC_POSITION:
6264             t->outputs[i] = ureg_DECL_output(ureg,
6265                                              TGSI_SEMANTIC_POSITION, /* Z/Depth */
6266                                              outputSemanticIndex[i]);
6267             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
6268             break;
6269          case TGSI_SEMANTIC_STENCIL:
6270             t->outputs[i] = ureg_DECL_output(ureg,
6271                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
6272                                              outputSemanticIndex[i]);
6273             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
6274             break;
6275          case TGSI_SEMANTIC_COLOR:
6276             t->outputs[i] = ureg_DECL_output(ureg,
6277                                              TGSI_SEMANTIC_COLOR,
6278                                              outputSemanticIndex[i]);
6279             break;
6280          case TGSI_SEMANTIC_SAMPLEMASK:
6281             t->outputs[i] = ureg_DECL_output(ureg,
6282                                              TGSI_SEMANTIC_SAMPLEMASK,
6283                                              outputSemanticIndex[i]);
6284             /* TODO: If we ever support more than 32 samples, this will have
6285              * to become an array.
6286              */
6287             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6288             break;
6289          default:
6290             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
6291             ret = PIPE_ERROR_BAD_INPUT;
6292             goto out;
6293          }
6294       }
6295    }
6296    else if (procType == PIPE_SHADER_VERTEX) {
6297       for (i = 0; i < numOutputs; i++) {
6298          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
6299             /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
6300             ureg_MOV(ureg,
6301                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
6302                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
6303             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6304          }
6305       }
6306    }
6307
6308    if (procType == PIPE_SHADER_COMPUTE) {
6309       emit_compute_block_size(proginfo, ureg);
6310    }
6311
6312    /* Declare address register.
6313     */
6314    if (program->num_address_regs > 0) {
6315       assert(program->num_address_regs <= 3);
6316       for (int i = 0; i < program->num_address_regs; i++)
6317          t->address[i] = ureg_DECL_address(ureg);
6318    }
6319
6320    /* Declare misc input registers
6321     */
6322    {
6323       GLbitfield sysInputs = proginfo->SystemValuesRead;
6324
6325       for (i = 0; sysInputs; i++) {
6326          if (sysInputs & (1 << i)) {
6327             unsigned semName = _mesa_sysval_to_semantic(i);
6328
6329             t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
6330
6331             if (semName == TGSI_SEMANTIC_INSTANCEID ||
6332                 semName == TGSI_SEMANTIC_VERTEXID) {
6333                /* From Gallium perspective, these system values are always
6334                 * integer, and require native integer support.  However, if
6335                 * native integer is supported on the vertex stage but not the
6336                 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
6337                 * assumes these system values are floats. To resolve the
6338                 * inconsistency, we insert a U2F.
6339                 */
6340                struct st_context *st = st_context(ctx);
6341                struct pipe_screen *pscreen = st->pipe->screen;
6342                assert(procType == PIPE_SHADER_VERTEX);
6343                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
6344                (void) pscreen;
6345                if (!ctx->Const.NativeIntegers) {
6346                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
6347                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
6348                   t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
6349                }
6350             }
6351
6352             if (procType == PIPE_SHADER_FRAGMENT &&
6353                 semName == TGSI_SEMANTIC_POSITION)
6354                emit_wpos(st_context(ctx), t, proginfo, ureg,
6355                          program->wpos_transform_const);
6356
6357             sysInputs &= ~(1 << i);
6358          }
6359       }
6360    }
6361
6362    t->array_sizes = program->array_sizes;
6363    t->input_decls = program->inputs;
6364    t->num_input_decls = program->num_inputs;
6365    t->output_decls = program->outputs;
6366    t->num_output_decls = program->num_outputs;
6367
6368    /* Emit constants and uniforms.  TGSI uses a single index space for these,
6369     * so we put all the translated regs in t->constants.
6370     */
6371    if (proginfo->Parameters) {
6372       t->constants = (struct ureg_src *)
6373          calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
6374       if (t->constants == NULL) {
6375          ret = PIPE_ERROR_OUT_OF_MEMORY;
6376          goto out;
6377       }
6378       t->num_constants = proginfo->Parameters->NumParameters;
6379
6380       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
6381          switch (proginfo->Parameters->Parameters[i].Type) {
6382          case PROGRAM_STATE_VAR:
6383          case PROGRAM_UNIFORM:
6384             t->constants[i] = ureg_DECL_constant(ureg, i);
6385             break;
6386
6387          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
6388           * addressing of the const buffer.
6389           * FIXME: Be smarter and recognize param arrays:
6390           * indirect addressing is only valid within the referenced
6391           * array.
6392           */
6393          case PROGRAM_CONSTANT:
6394             if (program->indirect_addr_consts)
6395                t->constants[i] = ureg_DECL_constant(ureg, i);
6396             else
6397                t->constants[i] = emit_immediate(t,
6398                                                 proginfo->Parameters->ParameterValues[i],
6399                                                 proginfo->Parameters->Parameters[i].DataType,
6400                                                 4);
6401             break;
6402          default:
6403             break;
6404          }
6405       }
6406    }
6407
6408    if (program->shader) {
6409       unsigned num_ubos = program->shader->NumUniformBlocks;
6410
6411       for (i = 0; i < num_ubos; i++) {
6412          unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
6413          unsigned num_const_vecs = (size + 15) / 16;
6414          unsigned first, last;
6415          assert(num_const_vecs > 0);
6416          first = 0;
6417          last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
6418          ureg_DECL_constant2D(t->ureg, first, last, i + 1);
6419       }
6420    }
6421
6422    /* Emit immediate values.
6423     */
6424    t->immediates = (struct ureg_src *)
6425       calloc(program->num_immediates, sizeof(struct ureg_src));
6426    if (t->immediates == NULL) {
6427       ret = PIPE_ERROR_OUT_OF_MEMORY;
6428       goto out;
6429    }
6430    t->num_immediates = program->num_immediates;
6431
6432    i = 0;
6433    foreach_in_list(immediate_storage, imm, &program->immediates) {
6434       assert(i < program->num_immediates);
6435       t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
6436    }
6437    assert(i == program->num_immediates);
6438
6439    /* texture samplers */
6440    for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
6441       if (program->samplers_used & (1u << i)) {
6442          unsigned type;
6443
6444          t->samplers[i] = ureg_DECL_sampler(ureg, i);
6445
6446          switch (program->sampler_types[i]) {
6447          case GLSL_TYPE_INT:
6448             type = TGSI_RETURN_TYPE_SINT;
6449             break;
6450          case GLSL_TYPE_UINT:
6451             type = TGSI_RETURN_TYPE_UINT;
6452             break;
6453          case GLSL_TYPE_FLOAT:
6454             type = TGSI_RETURN_TYPE_FLOAT;
6455             break;
6456          default:
6457             unreachable("not reached");
6458          }
6459
6460          ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
6461                                  type, type, type, type );
6462       }
6463    }
6464
6465    for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
6466       if (program->buffers_used & (1 << i)) {
6467          t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
6468       }
6469    }
6470
6471    for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
6472         i++) {
6473       if (program->buffers_used & (1 << i)) {
6474          t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
6475       }
6476    }
6477
6478    if (program->use_shared_memory)
6479       t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
6480
6481    for (i = 0; i < program->shader->NumImages; i++) {
6482       if (program->images_used & (1 << i)) {
6483          t->images[i] = ureg_DECL_image(ureg, i,
6484                                         program->image_targets[i],
6485                                         program->image_formats[i],
6486                                         true, false);
6487       }
6488    }
6489
6490    /* Emit each instruction in turn:
6491     */
6492    foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
6493       set_insn_start(t, ureg_get_instruction_number(ureg));
6494       compile_tgsi_instruction(t, inst);
6495    }
6496
6497    /* Fix up all emitted labels:
6498     */
6499    for (i = 0; i < t->labels_count; i++) {
6500       ureg_fixup_label(ureg, t->labels[i].token,
6501                        t->insn[t->labels[i].branch_target]);
6502    }
6503
6504    /* Set the next shader stage hint for VS and TES. */
6505    switch (procType) {
6506    case PIPE_SHADER_VERTEX:
6507    case PIPE_SHADER_TESS_EVAL:
6508       if (program->shader_program->SeparateShader)
6509          break;
6510
6511       for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
6512          if (program->shader_program->_LinkedShaders[i]) {
6513             unsigned next;
6514
6515             switch (i) {
6516             case MESA_SHADER_TESS_CTRL:
6517                next = PIPE_SHADER_TESS_CTRL;
6518                break;
6519             case MESA_SHADER_TESS_EVAL:
6520                next = PIPE_SHADER_TESS_EVAL;
6521                break;
6522             case MESA_SHADER_GEOMETRY:
6523                next = PIPE_SHADER_GEOMETRY;
6524                break;
6525             case MESA_SHADER_FRAGMENT:
6526                next = PIPE_SHADER_FRAGMENT;
6527                break;
6528             default:
6529                assert(0);
6530                continue;
6531             }
6532
6533             ureg_set_next_shader_processor(ureg, next);
6534             break;
6535          }
6536       }
6537       break;
6538    }
6539
6540 out:
6541    if (t) {
6542       free(t->arrays);
6543       free(t->temps);
6544       free(t->insn);
6545       free(t->labels);
6546       free(t->constants);
6547       t->num_constants = 0;
6548       free(t->immediates);
6549       t->num_immediates = 0;
6550
6551       if (t->error) {
6552          debug_printf("%s: translate error flag set\n", __func__);
6553       }
6554
6555       FREE(t);
6556    }
6557
6558    return ret;
6559 }
6560 /* ----------------------------- End TGSI code ------------------------------ */
6561
6562
6563 /**
6564  * Convert a shader's GLSL IR into a Mesa gl_program, although without
6565  * generating Mesa IR.
6566  */
6567 static struct gl_program *
6568 get_mesa_program_tgsi(struct gl_context *ctx,
6569                       struct gl_shader_program *shader_program,
6570                       struct gl_linked_shader *shader)
6571 {
6572    glsl_to_tgsi_visitor* v;
6573    struct gl_program *prog;
6574    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
6575    bool progress;
6576    struct gl_shader_compiler_options *options =
6577          &ctx->Const.ShaderCompilerOptions[shader->Stage];
6578    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6579    enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
6580
6581    validate_ir_tree(shader->ir);
6582
6583    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
6584    if (!prog)
6585       return NULL;
6586    prog->Parameters = _mesa_new_parameter_list();
6587    v = new glsl_to_tgsi_visitor();
6588    v->ctx = ctx;
6589    v->prog = prog;
6590    v->shader_program = shader_program;
6591    v->shader = shader;
6592    v->options = options;
6593    v->glsl_version = ctx->Const.GLSLVersion;
6594    v->native_integers = ctx->Const.NativeIntegers;
6595
6596    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
6597                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
6598    v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
6599                                            PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
6600
6601    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
6602    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
6603                                                prog->Parameters);
6604
6605    /* Remove reads from output registers. */
6606    lower_output_reads(shader->Stage, shader->ir);
6607
6608    /* Emit intermediate IR for main(). */
6609    visit_exec_list(shader->ir, v);
6610
6611    /* Now emit bodies for any functions that were used. */
6612    do {
6613       progress = GL_FALSE;
6614
6615       foreach_in_list(function_entry, entry, &v->function_signatures) {
6616          if (!entry->bgn_inst) {
6617             v->current_function = entry;
6618
6619             entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB);
6620             entry->bgn_inst->function = entry;
6621
6622             visit_exec_list(&entry->sig->body, v);
6623
6624             glsl_to_tgsi_instruction *last;
6625             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
6626             if (last->op != TGSI_OPCODE_RET)
6627                v->emit_asm(NULL, TGSI_OPCODE_RET);
6628
6629             glsl_to_tgsi_instruction *end;
6630             end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB);
6631             end->function = entry;
6632
6633             progress = GL_TRUE;
6634          }
6635       }
6636    } while (progress);
6637
6638 #if 0
6639    /* Print out some information (for debugging purposes) used by the
6640     * optimization passes. */
6641    {
6642       int i;
6643       int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6644       int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6645       int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6646       int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6647
6648       for (i = 0; i < v->next_temp; i++) {
6649          first_writes[i] = -1;
6650          first_reads[i] = -1;
6651          last_writes[i] = -1;
6652          last_reads[i] = -1;
6653       }
6654       v->get_first_temp_read(first_reads);
6655       v->get_last_temp_read_first_temp_write(last_reads, first_writes);
6656       v->get_last_temp_write(last_writes);
6657       for (i = 0; i < v->next_temp; i++)
6658          printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
6659                 first_writes[i],
6660                 last_reads[i],
6661                 last_writes[i]);
6662       ralloc_free(first_writes);
6663       ralloc_free(first_reads);
6664       ralloc_free(last_writes);
6665       ralloc_free(last_reads);
6666    }
6667 #endif
6668
6669    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
6670    v->simplify_cmp();
6671
6672    if (shader->Stage != MESA_SHADER_TESS_CTRL &&
6673        shader->Stage != MESA_SHADER_TESS_EVAL)
6674       v->copy_propagate();
6675
6676    while (v->eliminate_dead_code());
6677
6678    v->merge_two_dsts();
6679    v->merge_registers();
6680    v->renumber_registers();
6681
6682    /* Write the END instruction. */
6683    v->emit_asm(NULL, TGSI_OPCODE_END);
6684
6685    if (ctx->_Shader->Flags & GLSL_DUMP) {
6686       _mesa_log("\n");
6687       _mesa_log("GLSL IR for linked %s program %d:\n",
6688              _mesa_shader_stage_to_string(shader->Stage),
6689              shader_program->Name);
6690       _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
6691       _mesa_log("\n\n");
6692    }
6693
6694    prog->Instructions = NULL;
6695    prog->NumInstructions = 0;
6696
6697    do_set_program_inouts(shader->ir, prog, shader->Stage);
6698    shrink_array_declarations(v->inputs, v->num_inputs,
6699                              &prog->InputsRead, prog->DoubleInputsRead, &prog->PatchInputsRead);
6700    shrink_array_declarations(v->outputs, v->num_outputs,
6701                              &prog->OutputsWritten, 0ULL, &prog->PatchOutputsWritten);
6702    count_resources(v, prog);
6703
6704    /* The GLSL IR won't be needed anymore. */
6705    ralloc_free(shader->ir);
6706    shader->ir = NULL;
6707
6708    /* This must be done before the uniform storage is associated. */
6709    if (shader->Stage == MESA_SHADER_FRAGMENT &&
6710        (prog->InputsRead & VARYING_BIT_POS ||
6711         prog->SystemValuesRead & (1 << SYSTEM_VALUE_FRAG_COORD))) {
6712       static const gl_state_index wposTransformState[STATE_LENGTH] = {
6713          STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
6714       };
6715
6716       v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
6717                                                           wposTransformState);
6718    }
6719
6720    _mesa_reference_program(ctx, &shader->Program, prog);
6721
6722    /* Avoid reallocation of the program parameter list, because the uniform
6723     * storage is only associated with the original parameter list.
6724     * This should be enough for Bitmap and DrawPixels constants.
6725     */
6726    _mesa_reserve_parameter_storage(prog->Parameters, 8);
6727
6728    /* This has to be done last.  Any operation the can cause
6729     * prog->ParameterValues to get reallocated (e.g., anything that adds a
6730     * program constant) has to happen before creating this linkage.
6731     */
6732    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
6733    if (!shader_program->LinkStatus) {
6734       free_glsl_to_tgsi_visitor(v);
6735       return NULL;
6736    }
6737
6738    struct st_vertex_program *stvp;
6739    struct st_fragment_program *stfp;
6740    struct st_geometry_program *stgp;
6741    struct st_tessctrl_program *sttcp;
6742    struct st_tesseval_program *sttep;
6743    struct st_compute_program *stcp;
6744
6745    switch (shader->Stage) {
6746    case MESA_SHADER_VERTEX:
6747       stvp = (struct st_vertex_program *)prog;
6748       stvp->glsl_to_tgsi = v;
6749       break;
6750    case MESA_SHADER_FRAGMENT:
6751       stfp = (struct st_fragment_program *)prog;
6752       stfp->glsl_to_tgsi = v;
6753       break;
6754    case MESA_SHADER_GEOMETRY:
6755       stgp = (struct st_geometry_program *)prog;
6756       stgp->glsl_to_tgsi = v;
6757       break;
6758    case MESA_SHADER_TESS_CTRL:
6759       sttcp = (struct st_tessctrl_program *)prog;
6760       sttcp->glsl_to_tgsi = v;
6761       break;
6762    case MESA_SHADER_TESS_EVAL:
6763       sttep = (struct st_tesseval_program *)prog;
6764       sttep->glsl_to_tgsi = v;
6765       break;
6766    case MESA_SHADER_COMPUTE:
6767       stcp = (struct st_compute_program *)prog;
6768       stcp->glsl_to_tgsi = v;
6769       break;
6770    default:
6771       assert(!"should not be reached");
6772       return NULL;
6773    }
6774
6775    return prog;
6776 }
6777
6778 static void
6779 set_affected_state_flags(uint64_t *states,
6780                          struct gl_program *prog,
6781                          struct gl_linked_shader *shader,
6782                          uint64_t new_constants,
6783                          uint64_t new_sampler_views,
6784                          uint64_t new_samplers,
6785                          uint64_t new_images,
6786                          uint64_t new_ubos,
6787                          uint64_t new_ssbos,
6788                          uint64_t new_atomics)
6789 {
6790    if (prog->Parameters->NumParameters)
6791       *states |= new_constants;
6792
6793    if (shader->num_samplers)
6794       *states |= new_sampler_views | new_samplers;
6795
6796    if (shader->NumImages)
6797       *states |= new_images;
6798
6799    if (shader->NumUniformBlocks)
6800       *states |= new_ubos;
6801
6802    if (shader->NumShaderStorageBlocks)
6803       *states |= new_ssbos;
6804
6805    if (shader->NumAtomicBuffers)
6806       *states |= new_atomics;
6807 }
6808
6809 static struct gl_program *
6810 get_mesa_program(struct gl_context *ctx,
6811                  struct gl_shader_program *shader_program,
6812                  struct gl_linked_shader *shader)
6813 {
6814    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6815    enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
6816    enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
6817       pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
6818    struct gl_program *prog = NULL;
6819
6820    if (preferred_ir == PIPE_SHADER_IR_NIR) {
6821       /* TODO only for GLSL VS/FS for now: */
6822       switch (shader->Stage) {
6823       case MESA_SHADER_VERTEX:
6824       case MESA_SHADER_FRAGMENT:
6825          prog = st_nir_get_mesa_program(ctx, shader_program, shader);
6826       default:
6827          break;
6828       }
6829    } else {
6830       prog = get_mesa_program_tgsi(ctx, shader_program, shader);
6831    }
6832
6833    if (prog) {
6834       uint64_t *states;
6835
6836       /* This determines which states will be updated when the shader is
6837        * bound.
6838        */
6839       switch (shader->Stage) {
6840       case MESA_SHADER_VERTEX:
6841          states = &((struct st_vertex_program*)prog)->affected_states;
6842
6843          *states = ST_NEW_VS_STATE |
6844                    ST_NEW_RASTERIZER |
6845                    ST_NEW_VERTEX_ARRAYS;
6846
6847          set_affected_state_flags(states, prog, shader,
6848                                   ST_NEW_VS_CONSTANTS,
6849                                   ST_NEW_VS_SAMPLER_VIEWS,
6850                                   ST_NEW_RENDER_SAMPLERS,
6851                                   ST_NEW_VS_IMAGES,
6852                                   ST_NEW_VS_UBOS,
6853                                   ST_NEW_VS_SSBOS,
6854                                   ST_NEW_VS_ATOMICS);
6855          break;
6856
6857       case MESA_SHADER_TESS_CTRL:
6858          states = &((struct st_tessctrl_program*)prog)->affected_states;
6859
6860          *states = ST_NEW_TCS_STATE;
6861
6862          set_affected_state_flags(states, prog, shader,
6863                                   ST_NEW_TCS_CONSTANTS,
6864                                   ST_NEW_TCS_SAMPLER_VIEWS,
6865                                   ST_NEW_RENDER_SAMPLERS,
6866                                   ST_NEW_TCS_IMAGES,
6867                                   ST_NEW_TCS_UBOS,
6868                                   ST_NEW_TCS_SSBOS,
6869                                   ST_NEW_TCS_ATOMICS);
6870          break;
6871
6872       case MESA_SHADER_TESS_EVAL:
6873          states = &((struct st_tesseval_program*)prog)->affected_states;
6874
6875          *states = ST_NEW_TES_STATE |
6876                    ST_NEW_RASTERIZER;
6877
6878          set_affected_state_flags(states, prog, shader,
6879                                   ST_NEW_TES_CONSTANTS,
6880                                   ST_NEW_TES_SAMPLER_VIEWS,
6881                                   ST_NEW_RENDER_SAMPLERS,
6882                                   ST_NEW_TES_IMAGES,
6883                                   ST_NEW_TES_UBOS,
6884                                   ST_NEW_TES_SSBOS,
6885                                   ST_NEW_TES_ATOMICS);
6886          break;
6887
6888       case MESA_SHADER_GEOMETRY:
6889          states = &((struct st_geometry_program*)prog)->affected_states;
6890
6891          *states = ST_NEW_GS_STATE |
6892                    ST_NEW_RASTERIZER;
6893
6894          set_affected_state_flags(states, prog, shader,
6895                                   ST_NEW_GS_CONSTANTS,
6896                                   ST_NEW_GS_SAMPLER_VIEWS,
6897                                   ST_NEW_RENDER_SAMPLERS,
6898                                   ST_NEW_GS_IMAGES,
6899                                   ST_NEW_GS_UBOS,
6900                                   ST_NEW_GS_SSBOS,
6901                                   ST_NEW_GS_ATOMICS);
6902          break;
6903
6904       case MESA_SHADER_FRAGMENT:
6905          states = &((struct st_fragment_program*)prog)->affected_states;
6906
6907          /* gl_FragCoord and glDrawPixels always use constants. */
6908          *states = ST_NEW_FS_STATE |
6909                    ST_NEW_SAMPLE_SHADING |
6910                    ST_NEW_FS_CONSTANTS;
6911
6912          set_affected_state_flags(states, prog, shader,
6913                                   ST_NEW_FS_CONSTANTS,
6914                                   ST_NEW_FS_SAMPLER_VIEWS,
6915                                   ST_NEW_RENDER_SAMPLERS,
6916                                   ST_NEW_FS_IMAGES,
6917                                   ST_NEW_FS_UBOS,
6918                                   ST_NEW_FS_SSBOS,
6919                                   ST_NEW_FS_ATOMICS);
6920          break;
6921
6922       case MESA_SHADER_COMPUTE:
6923          states = &((struct st_compute_program*)prog)->affected_states;
6924
6925          *states = ST_NEW_CS_STATE;
6926
6927          set_affected_state_flags(states, prog, shader,
6928                                   ST_NEW_CS_CONSTANTS,
6929                                   ST_NEW_CS_SAMPLER_VIEWS,
6930                                   ST_NEW_CS_SAMPLERS,
6931                                   ST_NEW_CS_IMAGES,
6932                                   ST_NEW_CS_UBOS,
6933                                   ST_NEW_CS_SSBOS,
6934                                   ST_NEW_CS_ATOMICS);
6935          break;
6936
6937       default:
6938          unreachable("unhandled shader stage");
6939       }
6940    }
6941
6942    return prog;
6943 }
6944
6945
6946 extern "C" {
6947
6948 /**
6949  * Link a shader.
6950  * Called via ctx->Driver.LinkShader()
6951  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
6952  * with code lowering and other optimizations.
6953  */
6954 GLboolean
6955 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
6956 {
6957    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6958    assert(prog->LinkStatus);
6959
6960    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6961       if (prog->_LinkedShaders[i] == NULL)
6962          continue;
6963
6964       bool progress;
6965       exec_list *ir = prog->_LinkedShaders[i]->ir;
6966       gl_shader_stage stage = prog->_LinkedShaders[i]->Stage;
6967       const struct gl_shader_compiler_options *options =
6968             &ctx->Const.ShaderCompilerOptions[stage];
6969       enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage);
6970       bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
6971                                                    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
6972       bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
6973                                                    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
6974
6975       /* If there are forms of indirect addressing that the driver
6976        * cannot handle, perform the lowering pass.
6977        */
6978       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
6979           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
6980          lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
6981                                              options->EmitNoIndirectInput,
6982                                              options->EmitNoIndirectOutput,
6983                                              options->EmitNoIndirectTemp,
6984                                              options->EmitNoIndirectUniform);
6985       }
6986
6987       if (ctx->Extensions.ARB_shading_language_packing) {
6988          unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
6989                                LOWER_UNPACK_SNORM_2x16 |
6990                                LOWER_PACK_UNORM_2x16 |
6991                                LOWER_UNPACK_UNORM_2x16 |
6992                                LOWER_PACK_SNORM_4x8 |
6993                                LOWER_UNPACK_SNORM_4x8 |
6994                                LOWER_UNPACK_UNORM_4x8 |
6995                                LOWER_PACK_UNORM_4x8;
6996
6997          if (ctx->Extensions.ARB_gpu_shader5)
6998             lower_inst |= LOWER_PACK_USE_BFI |
6999                           LOWER_PACK_USE_BFE;
7000          if (!ctx->st->has_half_float_packing)
7001             lower_inst |= LOWER_PACK_HALF_2x16 |
7002                           LOWER_UNPACK_HALF_2x16;
7003
7004          lower_packing_builtins(ir, lower_inst);
7005       }
7006
7007       if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
7008          lower_offset_arrays(ir);
7009       do_mat_op_to_vec(ir);
7010       lower_instructions(ir,
7011                          MOD_TO_FLOOR |
7012                          DIV_TO_MUL_RCP |
7013                          EXP_TO_EXP2 |
7014                          LOG_TO_LOG2 |
7015                          LDEXP_TO_ARITH |
7016                          (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
7017                          CARRY_TO_ARITH |
7018                          BORROW_TO_ARITH |
7019                          (have_dround ? 0 : DOPS_TO_DFRAC) |
7020                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
7021                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
7022                          (options->EmitNoSat ? SAT_TO_CLAMP : 0) |
7023                          /* Assume that if ARB_gpu_shader5 is not supported
7024                           * then all of the extended integer functions need
7025                           * lowering.  It may be necessary to add some caps
7026                           * for individual instructions.
7027                           */
7028                          (!ctx->Extensions.ARB_gpu_shader5
7029                           ? BIT_COUNT_TO_MATH |
7030                             EXTRACT_TO_SHIFTS |
7031                             INSERT_TO_SHIFTS |
7032                             REVERSE_TO_SHIFTS |
7033                             FIND_LSB_TO_FLOAT_CAST |
7034                             FIND_MSB_TO_FLOAT_CAST |
7035                             IMUL_HIGH_TO_MUL
7036                           : 0));
7037
7038       do_vec_index_to_cond_assign(ir);
7039       lower_vector_insert(ir, true);
7040       lower_quadop_vector(ir, false);
7041       lower_noise(ir);
7042       if (options->MaxIfDepth == 0) {
7043          lower_discard(ir);
7044       }
7045
7046       do {
7047          progress = false;
7048
7049          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
7050
7051          progress = do_common_optimization(ir, true, true, options,
7052                                            ctx->Const.NativeIntegers)
7053            || progress;
7054
7055          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
7056
7057       } while (progress);
7058
7059       validate_ir_tree(ir);
7060    }
7061
7062    build_program_resource_list(ctx, prog);
7063
7064    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
7065       struct gl_program *linked_prog;
7066
7067       if (prog->_LinkedShaders[i] == NULL)
7068          continue;
7069
7070       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
7071
7072       if (linked_prog) {
7073          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
7074                                  linked_prog);
7075          if (!ctx->Driver.ProgramStringNotify(ctx,
7076                                               _mesa_shader_stage_to_program(i),
7077                                               linked_prog)) {
7078             _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
7079                                     NULL);
7080             _mesa_reference_program(ctx, &linked_prog, NULL);
7081             return GL_FALSE;
7082          }
7083       }
7084
7085       _mesa_reference_program(ctx, &linked_prog, NULL);
7086    }
7087
7088    return GL_TRUE;
7089 }
7090
7091 void
7092 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
7093                                 const GLuint outputMapping[],
7094                                 struct pipe_stream_output_info *so)
7095 {
7096    struct gl_transform_feedback_info *info =
7097       &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
7098    st_translate_stream_output_info2(info, outputMapping, so);
7099 }
7100
7101 void
7102 st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
7103                                 const GLuint outputMapping[],
7104                                 struct pipe_stream_output_info *so)
7105 {
7106    unsigned i;
7107
7108    for (i = 0; i < info->NumOutputs; i++) {
7109       so->output[i].register_index =
7110          outputMapping[info->Outputs[i].OutputRegister];
7111       so->output[i].start_component = info->Outputs[i].ComponentOffset;
7112       so->output[i].num_components = info->Outputs[i].NumComponents;
7113       so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
7114       so->output[i].dst_offset = info->Outputs[i].DstOffset;
7115       so->output[i].stream = info->Outputs[i].StreamId;
7116    }
7117
7118    for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
7119       so->stride[i] = info->Buffers[i].Stride;
7120    }
7121    so->num_outputs = info->NumOutputs;
7122 }
7123
7124 } /* extern "C" */