src/mesa/state_tracker/st_glsl_to_tgsi.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  * Copyright © 2011 Bryan Cain
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24  * DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 /**
  28  * \file glsl_to_tgsi.cpp
  29  *
  30  * Translate GLSL IR to TGSI.
  31  */
  32
  33 #include "st_glsl_to_tgsi.h"
  34
  35 #include "compiler/glsl/glsl_parser_extras.h"
  36 #include "compiler/glsl/ir_optimization.h"
  37 #include "compiler/glsl/program.h"
  38
  39 #include "main/errors.h"
  40 #include "main/shaderobj.h"
  41 #include "main/uniforms.h"
  42 #include "main/shaderapi.h"
  43 #include "main/shaderimage.h"
  44 #include "program/prog_instruction.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_screen.h"
  48 #include "tgsi/tgsi_ureg.h"
  49 #include "tgsi/tgsi_info.h"
  50 #include "util/u_math.h"
  51 #include "util/u_memory.h"
  52 #include "st_program.h"
  53 #include "st_mesa_to_tgsi.h"
  54 #include "st_format.h"
  55
  56
  57 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
  58                            (1 << PROGRAM_CONSTANT) |     \
  59                            (1 << PROGRAM_UNIFORM))
  60
  61 #define MAX_GLSL_TEXTURE_OFFSET 4
  62
  63 class st_src_reg;
  64 class st_dst_reg;
  65
  66 static int swizzle_for_size(int size);
  67
  68 /**
  69  * This struct is a corresponding struct to TGSI ureg_src.
  70  */
  71 class st_src_reg {
  72 public:
  73    st_src_reg(gl_register_file file, int index, const glsl_type *type)
  74    {
  75       this->file = file;
  76       this->index = index;
  77       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  78          this->swizzle = swizzle_for_size(type->vector_elements);
  79       else
  80          this->swizzle = SWIZZLE_XYZW;
  81       this->negate = 0;
  82       this->index2D = 0;
  83       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
  84       this->reladdr = NULL;
  85       this->reladdr2 = NULL;
  86       this->has_index2 = false;
  87       this->double_reg2 = false;
  88       this->array_id = 0;
  89       this->is_double_vertex_input = false;
  90    }
  91
  92    st_src_reg(gl_register_file file, int index, int type)
  93    {
  94       this->type = type;
  95       this->file = file;
  96       this->index = index;
  97       this->index2D = 0;
  98       this->swizzle = SWIZZLE_XYZW;
  99       this->negate = 0;
 100       this->reladdr = NULL;
 101       this->reladdr2 = NULL;
 102       this->has_index2 = false;
 103       this->double_reg2 = false;
 104       this->array_id = 0;
 105       this->is_double_vertex_input = false;
 106    }
 107
 108    st_src_reg(gl_register_file file, int index, int type, int index2D)
 109    {
 110       this->type = type;
 111       this->file = file;
 112       this->index = index;
 113       this->index2D = index2D;
 114       this->swizzle = SWIZZLE_XYZW;
 115       this->negate = 0;
 116       this->reladdr = NULL;
 117       this->reladdr2 = NULL;
 118       this->has_index2 = false;
 119       this->double_reg2 = false;
 120       this->array_id = 0;
 121       this->is_double_vertex_input = false;
 122    }
 123
 124    st_src_reg()
 125    {
 126       this->type = GLSL_TYPE_ERROR;
 127       this->file = PROGRAM_UNDEFINED;
 128       this->index = 0;
 129       this->index2D = 0;
 130       this->swizzle = 0;
 131       this->negate = 0;
 132       this->reladdr = NULL;
 133       this->reladdr2 = NULL;
 134       this->has_index2 = false;
 135       this->double_reg2 = false;
 136       this->array_id = 0;
 137       this->is_double_vertex_input = false;
 138    }
 139
 140    explicit st_src_reg(st_dst_reg reg);
 141
 142    gl_register_file file; /**< PROGRAM_* from Mesa */
 143    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 144    int index2D;
 145    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
 146    int negate; /**< NEGATE_XYZW mask from mesa */
 147    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 148    /** Register index should be offset by the integer in this reg. */
 149    st_src_reg *reladdr;
 150    st_src_reg *reladdr2;
 151    bool has_index2;
 152    /*
 153     * Is this the second half of a double register pair?
 154     * currently used for input mapping only.
 155     */
 156    bool double_reg2;
 157    unsigned array_id;
 158    bool is_double_vertex_input;
 159 };
 160
 161 class st_dst_reg {
 162 public:
 163    st_dst_reg(gl_register_file file, int writemask, int type, int index)
 164    {
 165       this->file = file;
 166       this->index = index;
 167       this->index2D = 0;
 168       this->writemask = writemask;
 169       this->reladdr = NULL;
 170       this->reladdr2 = NULL;
 171       this->has_index2 = false;
 172       this->type = type;
 173       this->array_id = 0;
 174    }
 175
 176    st_dst_reg(gl_register_file file, int writemask, int type)
 177    {
 178       this->file = file;
 179       this->index = 0;
 180       this->index2D = 0;
 181       this->writemask = writemask;
 182       this->reladdr = NULL;
 183       this->reladdr2 = NULL;
 184       this->has_index2 = false;
 185       this->type = type;
 186       this->array_id = 0;
 187    }
 188
 189    st_dst_reg()
 190    {
 191       this->type = GLSL_TYPE_ERROR;
 192       this->file = PROGRAM_UNDEFINED;
 193       this->index = 0;
 194       this->index2D = 0;
 195       this->writemask = 0;
 196       this->reladdr = NULL;
 197       this->reladdr2 = NULL;
 198       this->has_index2 = false;
 199       this->array_id = 0;
 200    }
 201
 202    explicit st_dst_reg(st_src_reg reg);
 203
 204    gl_register_file file; /**< PROGRAM_* from Mesa */
 205    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 206    int index2D;
 207    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 208    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 209    /** Register index should be offset by the integer in this reg. */
 210    st_src_reg *reladdr;
 211    st_src_reg *reladdr2;
 212    bool has_index2;
 213    unsigned array_id;
 214 };
 215
 216 st_src_reg::st_src_reg(st_dst_reg reg)
 217 {
 218    this->type = reg.type;
 219    this->file = reg.file;
 220    this->index = reg.index;
 221    this->swizzle = SWIZZLE_XYZW;
 222    this->negate = 0;
 223    this->reladdr = reg.reladdr;
 224    this->index2D = reg.index2D;
 225    this->reladdr2 = reg.reladdr2;
 226    this->has_index2 = reg.has_index2;
 227    this->double_reg2 = false;
 228    this->array_id = reg.array_id;
 229    this->is_double_vertex_input = false;
 230 }
 231
 232 st_dst_reg::st_dst_reg(st_src_reg reg)
 233 {
 234    this->type = reg.type;
 235    this->file = reg.file;
 236    this->index = reg.index;
 237    this->writemask = WRITEMASK_XYZW;
 238    this->reladdr = reg.reladdr;
 239    this->index2D = reg.index2D;
 240    this->reladdr2 = reg.reladdr2;
 241    this->has_index2 = reg.has_index2;
 242    this->array_id = reg.array_id;
 243 }
 244
 245 class glsl_to_tgsi_instruction : public exec_node {
 246 public:
 247    DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
 248
 249    unsigned op;
 250    st_dst_reg dst[2];
 251    st_src_reg src[4];
 252    /** Pointer to the ir source this tree came from for debugging */
 253    ir_instruction *ir;
 254    GLboolean cond_update;
 255    bool saturate;
 256    st_src_reg sampler; /**< sampler register */
 257    int sampler_base;
 258    int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
 259    int tex_target; /**< One of TEXTURE_*_INDEX */
 260    glsl_base_type tex_type;
 261    GLboolean tex_shadow;
 262    unsigned image_format;
 263
 264    st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
 265    unsigned tex_offset_num_offset;
 266    int dead_mask; /**< Used in dead code elimination */
 267
 268    st_src_reg buffer; /**< buffer register */
 269    unsigned buffer_access; /**< buffer access type */
 270
 271    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 272    const struct tgsi_opcode_info *info;
 273 };
 274
 275 class variable_storage : public exec_node {
 276 public:
 277    variable_storage(ir_variable *var, gl_register_file file, int index,
 278                     unsigned array_id = 0)
 279       : file(file), index(index), var(var), array_id(array_id)
 280    {
 281       /* empty */
 282    }
 283
 284    gl_register_file file;
 285    int index;
 286    ir_variable *var; /* variable that maps to this, if any */
 287    unsigned array_id;
 288 };
 289
 290 class immediate_storage : public exec_node {
 291 public:
 292    immediate_storage(gl_constant_value *values, int size32, int type)
 293    {
 294       memcpy(this->values, values, size32 * sizeof(gl_constant_value));
 295       this->size32 = size32;
 296       this->type = type;
 297    }
 298
 299    /* doubles are stored across 2 gl_constant_values */
 300    gl_constant_value values[4];
 301    int size32; /**< Number of 32-bit components (1-4) */
 302    int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
 303 };
 304
 305 class function_entry : public exec_node {
 306 public:
 307    ir_function_signature *sig;
 308
 309    /**
 310     * identifier of this function signature used by the program.
 311     *
 312     * At the point that TGSI instructions for function calls are
 313     * generated, we don't know the address of the first instruction of
 314     * the function body.  So we make the BranchTarget that is called a
 315     * small integer and rewrite them during set_branchtargets().
 316     */
 317    int sig_id;
 318
 319    /**
 320     * Pointer to first instruction of the function body.
 321     *
 322     * Set during function body emits after main() is processed.
 323     */
 324    glsl_to_tgsi_instruction *bgn_inst;
 325
 326    /**
 327     * Index of the first instruction of the function body in actual TGSI.
 328     *
 329     * Set after conversion from glsl_to_tgsi_instruction to TGSI.
 330     */
 331    int inst;
 332
 333    /** Storage for the return value. */
 334    st_src_reg return_reg;
 335 };
 336
 337 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 338 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 339
 340 struct array_decl {
 341    unsigned mesa_index;
 342    unsigned array_id;
 343    unsigned array_size;
 344    unsigned array_type;
 345 };
 346
 347 static unsigned
 348 find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id)
 349 {
 350    unsigned i;
 351
 352    for (i = 0; i < count; i++) {
 353       struct array_decl *decl = &arrays[i];
 354
 355       if (array_id == decl->array_id) {
 356          return decl->array_type;
 357       }
 358    }
 359    return GLSL_TYPE_ERROR;
 360 }
 361
 362 struct rename_reg_pair {
 363    int old_reg;
 364    int new_reg;
 365 };
 366
 367 struct glsl_to_tgsi_visitor : public ir_visitor {
 368 public:
 369    glsl_to_tgsi_visitor();
 370    ~glsl_to_tgsi_visitor();
 371
 372    function_entry *current_function;
 373
 374    struct gl_context *ctx;
 375    struct gl_program *prog;
 376    struct gl_shader_program *shader_program;
 377    struct gl_shader *shader;
 378    struct gl_shader_compiler_options *options;
 379
 380    int next_temp;
 381
 382    unsigned *array_sizes;
 383    unsigned max_num_arrays;
 384    unsigned next_array;
 385
 386    struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS];
 387    unsigned num_input_arrays;
 388    struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS];
 389    unsigned num_output_arrays;
 390
 391    int num_address_regs;
 392    uint32_t samplers_used;
 393    glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
 394    int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
 395    int buffers_used;
 396    int images_used;
 397    int image_targets[PIPE_MAX_SHADER_IMAGES];
 398    unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
 399    bool indirect_addr_consts;
 400    int wpos_transform_const;
 401
 402    int glsl_version;
 403    bool native_integers;
 404    bool have_sqrt;
 405    bool have_fma;
 406    bool use_shared_memory;
 407
 408    variable_storage *find_variable_storage(ir_variable *var);
 409
 410    int add_constant(gl_register_file file, gl_constant_value values[8],
 411                     int size, int datatype, GLuint *swizzle_out);
 412
 413    function_entry *get_function_signature(ir_function_signature *sig);
 414
 415    st_src_reg get_temp(const glsl_type *type);
 416    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 417
 418    st_src_reg st_src_reg_for_double(double val);
 419    st_src_reg st_src_reg_for_float(float val);
 420    st_src_reg st_src_reg_for_int(int val);
 421    st_src_reg st_src_reg_for_type(int type, int val);
 422
 423    /**
 424     * \name Visit methods
 425     *
 426     * As typical for the visitor pattern, there must be one \c visit method for
 427     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 428     * the hierarchy should not have \c visit methods.
 429     */
 430    /*@{*/
 431    virtual void visit(ir_variable *);
 432    virtual void visit(ir_loop *);
 433    virtual void visit(ir_loop_jump *);
 434    virtual void visit(ir_function_signature *);
 435    virtual void visit(ir_function *);
 436    virtual void visit(ir_expression *);
 437    virtual void visit(ir_swizzle *);
 438    virtual void visit(ir_dereference_variable  *);
 439    virtual void visit(ir_dereference_array *);
 440    virtual void visit(ir_dereference_record *);
 441    virtual void visit(ir_assignment *);
 442    virtual void visit(ir_constant *);
 443    virtual void visit(ir_call *);
 444    virtual void visit(ir_return *);
 445    virtual void visit(ir_discard *);
 446    virtual void visit(ir_texture *);
 447    virtual void visit(ir_if *);
 448    virtual void visit(ir_emit_vertex *);
 449    virtual void visit(ir_end_primitive *);
 450    virtual void visit(ir_barrier *);
 451    /*@}*/
 452
 453    void visit_atomic_counter_intrinsic(ir_call *);
 454    void visit_ssbo_intrinsic(ir_call *);
 455    void visit_membar_intrinsic(ir_call *);
 456    void visit_shared_intrinsic(ir_call *);
 457    void visit_image_intrinsic(ir_call *);
 458
 459    st_src_reg result;
 460
 461    /** List of variable_storage */
 462    exec_list variables;
 463
 464    /** List of immediate_storage */
 465    exec_list immediates;
 466    unsigned num_immediates;
 467
 468    /** List of function_entry */
 469    exec_list function_signatures;
 470    int next_signature_id;
 471
 472    /** List of glsl_to_tgsi_instruction */
 473    exec_list instructions;
 474
 475    glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
 476                                       st_dst_reg dst = undef_dst,
 477                                       st_src_reg src0 = undef_src,
 478                                       st_src_reg src1 = undef_src,
 479                                       st_src_reg src2 = undef_src,
 480                                       st_src_reg src3 = undef_src);
 481
 482    glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
 483                                       st_dst_reg dst, st_dst_reg dst1,
 484                                       st_src_reg src0 = undef_src,
 485                                       st_src_reg src1 = undef_src,
 486                                       st_src_reg src2 = undef_src,
 487                                       st_src_reg src3 = undef_src);
 488
 489    unsigned get_opcode(ir_instruction *ir, unsigned op,
 490                     st_dst_reg dst,
 491                     st_src_reg src0, st_src_reg src1);
 492
 493    /**
 494     * Emit the correct dot-product instruction for the type of arguments
 495     */
 496    glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
 497                                      st_dst_reg dst,
 498                                      st_src_reg src0,
 499                                      st_src_reg src1,
 500                                      unsigned elements);
 501
 502    void emit_scalar(ir_instruction *ir, unsigned op,
 503                     st_dst_reg dst, st_src_reg src0);
 504
 505    void emit_scalar(ir_instruction *ir, unsigned op,
 506                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 507
 508    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 509
 510    void get_deref_offsets(ir_dereference *ir,
 511                           unsigned *array_size,
 512                           unsigned *base,
 513                           unsigned *index,
 514                           st_src_reg *reladdr);
 515   void calc_deref_offsets(ir_dereference *head,
 516                           ir_dereference *tail,
 517                           unsigned *array_elements,
 518                           unsigned *base,
 519                           unsigned *index,
 520                           st_src_reg *indirect,
 521                           unsigned *location);
 522
 523    bool try_emit_mad(ir_expression *ir,
 524               int mul_operand);
 525    bool try_emit_mad_for_and_not(ir_expression *ir,
 526               int mul_operand);
 527
 528    void emit_swz(ir_expression *ir);
 529
 530    bool process_move_condition(ir_rvalue *ir);
 531
 532    void simplify_cmp(void);
 533
 534    void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
 535    void get_first_temp_read(int *first_reads);
 536    void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
 537    void get_last_temp_write(int *last_writes);
 538
 539    void copy_propagate(void);
 540    int eliminate_dead_code(void);
 541
 542    void merge_two_dsts(void);
 543    void merge_registers(void);
 544    void renumber_registers(void);
 545
 546    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
 547                        st_dst_reg *l, st_src_reg *r,
 548                        st_src_reg *cond, bool cond_swap);
 549
 550    void *mem_ctx;
 551 };
 552
 553 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
 554 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
 555 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
 556
 557 static void
 558 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
 559
 560 static void
 561 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
 562 {
 563    va_list args;
 564    va_start(args, fmt);
 565    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
 566    va_end(args);
 567
 568    prog->LinkStatus = GL_FALSE;
 569 }
 570
 571 static int
 572 swizzle_for_size(int size)
 573 {
 574    static const int size_swizzles[4] = {
 575       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 576       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 577       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 578       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 579    };
 580
 581    assert((size >= 1) && (size <= 4));
 582    return size_swizzles[size - 1];
 583 }
 584
 585 static bool
 586 is_resource_instruction(unsigned opcode)
 587 {
 588    switch (opcode) {
 589    case TGSI_OPCODE_RESQ:
 590    case TGSI_OPCODE_LOAD:
 591    case TGSI_OPCODE_ATOMUADD:
 592    case TGSI_OPCODE_ATOMXCHG:
 593    case TGSI_OPCODE_ATOMCAS:
 594    case TGSI_OPCODE_ATOMAND:
 595    case TGSI_OPCODE_ATOMOR:
 596    case TGSI_OPCODE_ATOMXOR:
 597    case TGSI_OPCODE_ATOMUMIN:
 598    case TGSI_OPCODE_ATOMUMAX:
 599    case TGSI_OPCODE_ATOMIMIN:
 600    case TGSI_OPCODE_ATOMIMAX:
 601       return true;
 602    default:
 603       return false;
 604    }
 605 }
 606
 607 static unsigned
 608 num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
 609 {
 610    return op->info->num_dst;
 611 }
 612
 613 static unsigned
 614 num_inst_src_regs(const glsl_to_tgsi_instruction *op)
 615 {
 616    return op->info->is_tex || is_resource_instruction(op->op) ?
 617       op->info->num_src - 1 : op->info->num_src;
 618 }
 619
 620 glsl_to_tgsi_instruction *
 621 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
 622                                st_dst_reg dst, st_dst_reg dst1,
 623                                st_src_reg src0, st_src_reg src1,
 624                                st_src_reg src2, st_src_reg src3)
 625 {
 626    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
 627    int num_reladdr = 0, i, j;
 628    bool dst_is_double[2];
 629
 630    op = get_opcode(ir, op, dst, src0, src1);
 631
 632    /* If we have to do relative addressing, we want to load the ARL
 633     * reg directly for one of the regs, and preload the other reladdr
 634     * sources into temps.
 635     */
 636    num_reladdr += dst.reladdr != NULL || dst.reladdr2;
 637    num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
 638    num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
 639    num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
 640    num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
 641    num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
 642
 643    reladdr_to_temp(ir, &src3, &num_reladdr);
 644    reladdr_to_temp(ir, &src2, &num_reladdr);
 645    reladdr_to_temp(ir, &src1, &num_reladdr);
 646    reladdr_to_temp(ir, &src0, &num_reladdr);
 647
 648    if (dst.reladdr || dst.reladdr2) {
 649       if (dst.reladdr)
 650          emit_arl(ir, address_reg, *dst.reladdr);
 651       if (dst.reladdr2)
 652          emit_arl(ir, address_reg2, *dst.reladdr2);
 653       num_reladdr--;
 654    }
 655    if (dst1.reladdr) {
 656       emit_arl(ir, address_reg, *dst1.reladdr);
 657       num_reladdr--;
 658    }
 659    assert(num_reladdr == 0);
 660
 661    inst->op = op;
 662    inst->info = tgsi_get_opcode_info(op);
 663    inst->dst[0] = dst;
 664    inst->dst[1] = dst1;
 665    inst->src[0] = src0;
 666    inst->src[1] = src1;
 667    inst->src[2] = src2;
 668    inst->src[3] = src3;
 669    inst->ir = ir;
 670    inst->dead_mask = 0;
 671    /* default to float, for paths where this is not initialized
 672     * (since 0==UINT which is likely wrong):
 673     */
 674    inst->tex_type = GLSL_TYPE_FLOAT;
 675
 676    inst->function = NULL;
 677
 678    /* Update indirect addressing status used by TGSI */
 679    if (dst.reladdr || dst.reladdr2) {
 680       switch(dst.file) {
 681       case PROGRAM_STATE_VAR:
 682       case PROGRAM_CONSTANT:
 683       case PROGRAM_UNIFORM:
 684          this->indirect_addr_consts = true;
 685          break;
 686       case PROGRAM_IMMEDIATE:
 687          assert(!"immediates should not have indirect addressing");
 688          break;
 689       default:
 690          break;
 691       }
 692    }
 693    else {
 694       for (i = 0; i < 4; i++) {
 695          if(inst->src[i].reladdr) {
 696             switch(inst->src[i].file) {
 697             case PROGRAM_STATE_VAR:
 698             case PROGRAM_CONSTANT:
 699             case PROGRAM_UNIFORM:
 700                this->indirect_addr_consts = true;
 701                break;
 702             case PROGRAM_IMMEDIATE:
 703                assert(!"immediates should not have indirect addressing");
 704                break;
 705             default:
 706                break;
 707             }
 708          }
 709       }
 710    }
 711
 712    /*
 713     * This section contains the double processing.
 714     * GLSL just represents doubles as single channel values,
 715     * however most HW and TGSI represent doubles as pairs of register channels.
 716     *
 717     * so we have to fixup destination writemask/index and src swizzle/indexes.
 718     * dest writemasks need to translate from single channel write mask
 719     * to a dual-channel writemask, but also need to modify the index,
 720     * if we are touching the Z,W fields in the pre-translated writemask.
 721     *
 722     * src channels have similiar index modifications along with swizzle
 723     * changes to we pick the XY, ZW pairs from the correct index.
 724     *
 725     * GLSL [0].x -> TGSI [0].xy
 726     * GLSL [0].y -> TGSI [0].zw
 727     * GLSL [0].z -> TGSI [1].xy
 728     * GLSL [0].w -> TGSI [1].zw
 729     */
 730    for (j = 0; j < 2; j++) {
 731       dst_is_double[j] = false;
 732       if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
 733          dst_is_double[j] = true;
 734       else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
 735          unsigned type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id);
 736          if (type == GLSL_TYPE_DOUBLE)
 737             dst_is_double[j] = true;
 738       }
 739    }
 740
 741    if (dst_is_double[0] || dst_is_double[1] ||
 742        inst->src[0].type == GLSL_TYPE_DOUBLE) {
 743       glsl_to_tgsi_instruction *dinst = NULL;
 744       int initial_src_swz[4], initial_src_idx[4];
 745       int initial_dst_idx[2], initial_dst_writemask[2];
 746       /* select the writemask for dst0 or dst1 */
 747       unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
 748
 749       /* copy out the writemask, index and swizzles for all src/dsts. */
 750       for (j = 0; j < 2; j++) {
 751          initial_dst_writemask[j] = inst->dst[j].writemask;
 752          initial_dst_idx[j] = inst->dst[j].index;
 753       }
 754
 755       for (j = 0; j < 4; j++) {
 756          initial_src_swz[j] = inst->src[j].swizzle;
 757          initial_src_idx[j] = inst->src[j].index;
 758       }
 759
 760       /*
 761        * scan all the components in the dst writemask
 762        * generate an instruction for each of them if required.
 763        */
 764       st_src_reg addr;
 765       while (writemask) {
 766
 767          int i = u_bit_scan(&writemask);
 768
 769          /* before emitting the instruction, see if we have to adjust store
 770           * address */
 771          if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
 772              addr.file == PROGRAM_UNDEFINED) {
 773             /* We have to advance the buffer address by 16 */
 774             addr = get_temp(glsl_type::uint_type);
 775             emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
 776                      inst->src[0], st_src_reg_for_int(16));
 777          }
 778
 779
 780          /* first time use previous instruction */
 781          if (dinst == NULL) {
 782             dinst = inst;
 783          } else {
 784             /* create a new instructions for subsequent attempts */
 785             dinst = new(mem_ctx) glsl_to_tgsi_instruction();
 786             *dinst = *inst;
 787             dinst->next = NULL;
 788             dinst->prev = NULL;
 789          }
 790          this->instructions.push_tail(dinst);
 791
 792          /* modify the destination if we are splitting */
 793          for (j = 0; j < 2; j++) {
 794             if (dst_is_double[j]) {
 795                dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
 796                dinst->dst[j].index = initial_dst_idx[j];
 797                if (i > 1) {
 798                   if (dinst->op == TGSI_OPCODE_STORE) {
 799                      dinst->src[0] = addr;
 800                   } else {
 801                      dinst->dst[j].index++;
 802                   }
 803                }
 804             } else {
 805                /* if we aren't writing to a double, just get the bit of the initial writemask
 806                   for this channel */
 807                dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
 808             }
 809          }
 810
 811          /* modify the src registers */
 812          for (j = 0; j < 4; j++) {
 813             int swz = GET_SWZ(initial_src_swz[j], i);
 814
 815             if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
 816                dinst->src[j].index = initial_src_idx[j];
 817                if (swz > 1) {
 818                   dinst->src[j].double_reg2 = true;
 819                   dinst->src[j].index++;
 820                }
 821
 822                if (swz & 1)
 823                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
 824                else
 825                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
 826
 827             } else {
 828                /* some opcodes are special case in what they use as sources
 829                   - F2D is a float src0, DLDEXP is integer src1 */
 830                if (op == TGSI_OPCODE_F2D ||
 831                    op == TGSI_OPCODE_DLDEXP ||
 832                    (op == TGSI_OPCODE_UCMP && dst_is_double[0])) {
 833                   dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
 834                }
 835             }
 836          }
 837       }
 838       inst = dinst;
 839    } else {
 840       this->instructions.push_tail(inst);
 841    }
 842
 843
 844    return inst;
 845 }
 846
 847 glsl_to_tgsi_instruction *
 848 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
 849                                st_dst_reg dst,
 850                                st_src_reg src0, st_src_reg src1,
 851                                st_src_reg src2, st_src_reg src3)
 852 {
 853    return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
 854 }
 855
 856 /**
 857  * Determines whether to use an integer, unsigned integer, or float opcode
 858  * based on the operands and input opcode, then emits the result.
 859  */
 860 unsigned
 861 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
 862                                  st_dst_reg dst,
 863                                  st_src_reg src0, st_src_reg src1)
 864 {
 865    int type = GLSL_TYPE_FLOAT;
 866
 867    if (op == TGSI_OPCODE_MOV)
 868        return op;
 869
 870    assert(src0.type != GLSL_TYPE_ARRAY);
 871    assert(src0.type != GLSL_TYPE_STRUCT);
 872    assert(src1.type != GLSL_TYPE_ARRAY);
 873    assert(src1.type != GLSL_TYPE_STRUCT);
 874
 875    if (is_resource_instruction(op))
 876       type = src1.type;
 877    else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
 878       type = GLSL_TYPE_DOUBLE;
 879    else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
 880       type = GLSL_TYPE_FLOAT;
 881    else if (native_integers)
 882       type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 883
 884 #define case5(c, f, i, u, d)                    \
 885    case TGSI_OPCODE_##c: \
 886       if (type == GLSL_TYPE_DOUBLE)           \
 887          op = TGSI_OPCODE_##d; \
 888       else if (type == GLSL_TYPE_INT)       \
 889          op = TGSI_OPCODE_##i; \
 890       else if (type == GLSL_TYPE_UINT) \
 891          op = TGSI_OPCODE_##u; \
 892       else \
 893          op = TGSI_OPCODE_##f; \
 894       break;
 895
 896 #define case4(c, f, i, u)                    \
 897    case TGSI_OPCODE_##c: \
 898       if (type == GLSL_TYPE_INT) \
 899          op = TGSI_OPCODE_##i; \
 900       else if (type == GLSL_TYPE_UINT) \
 901          op = TGSI_OPCODE_##u; \
 902       else \
 903          op = TGSI_OPCODE_##f; \
 904       break;
 905
 906 #define case3(f, i, u)  case4(f, f, i, u)
 907 #define case4d(f, i, u, d)  case5(f, f, i, u, d)
 908 #define case3fid(f, i, d) case5(f, f, i, i, d)
 909 #define case2fi(f, i)   case4(f, f, i, i)
 910 #define case2iu(i, u)   case4(i, LAST, i, u)
 911
 912 #define casecomp(c, f, i, u, d)                   \
 913    case TGSI_OPCODE_##c: \
 914       if (type == GLSL_TYPE_DOUBLE) \
 915          op = TGSI_OPCODE_##d; \
 916       else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)       \
 917          op = TGSI_OPCODE_##i; \
 918       else if (type == GLSL_TYPE_UINT) \
 919          op = TGSI_OPCODE_##u; \
 920       else if (native_integers) \
 921          op = TGSI_OPCODE_##f; \
 922       else \
 923          op = TGSI_OPCODE_##c; \
 924       break;
 925
 926    switch(op) {
 927       case3fid(ADD, UADD, DADD);
 928       case3fid(MUL, UMUL, DMUL);
 929       case3fid(MAD, UMAD, DMAD);
 930       case3fid(FMA, UMAD, DFMA);
 931       case3(DIV, IDIV, UDIV);
 932       case4d(MAX, IMAX, UMAX, DMAX);
 933       case4d(MIN, IMIN, UMIN, DMIN);
 934       case2iu(MOD, UMOD);
 935
 936       casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
 937       casecomp(SNE, FSNE, USNE, USNE, DSNE);
 938       casecomp(SGE, FSGE, ISGE, USGE, DSGE);
 939       casecomp(SLT, FSLT, ISLT, USLT, DSLT);
 940
 941       case2iu(ISHR, USHR);
 942
 943       case3fid(SSG, ISSG, DSSG);
 944       case3fid(ABS, IABS, DABS);
 945
 946       case2iu(IBFE, UBFE);
 947       case2iu(IMSB, UMSB);
 948       case2iu(IMUL_HI, UMUL_HI);
 949
 950       case3fid(SQRT, SQRT, DSQRT);
 951
 952       case3fid(RCP, RCP, DRCP);
 953       case3fid(RSQ, RSQ, DRSQ);
 954
 955       case3fid(FRC, FRC, DFRAC);
 956       case3fid(TRUNC, TRUNC, DTRUNC);
 957       case3fid(CEIL, CEIL, DCEIL);
 958       case3fid(FLR, FLR, DFLR);
 959       case3fid(ROUND, ROUND, DROUND);
 960
 961       case2iu(ATOMIMAX, ATOMUMAX);
 962       case2iu(ATOMIMIN, ATOMUMIN);
 963
 964       default: break;
 965    }
 966
 967    assert(op != TGSI_OPCODE_LAST);
 968    return op;
 969 }
 970
 971 glsl_to_tgsi_instruction *
 972 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
 973                               st_dst_reg dst, st_src_reg src0, st_src_reg src1,
 974                               unsigned elements)
 975 {
 976    static const unsigned dot_opcodes[] = {
 977       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
 978    };
 979
 980    return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
 981 }
 982
 983 /**
 984  * Emits TGSI scalar opcodes to produce unique answers across channels.
 985  *
 986  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
 987  * channel determines the result across all channels.  So to do a vec4
 988  * of this operation, we want to emit a scalar per source channel used
 989  * to produce dest channels.
 990  */
 991 void
 992 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 993                                   st_dst_reg dst,
 994                                   st_src_reg orig_src0, st_src_reg orig_src1)
 995 {
 996    int i, j;
 997    int done_mask = ~dst.writemask;
 998
 999    /* TGSI RCP is a scalar operation splatting results to all channels,
1000     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
1001     * dst channels.
1002     */
1003    for (i = 0; i < 4; i++) {
1004       GLuint this_mask = (1 << i);
1005       st_src_reg src0 = orig_src0;
1006       st_src_reg src1 = orig_src1;
1007
1008       if (done_mask & this_mask)
1009          continue;
1010
1011       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
1012       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
1013       for (j = i + 1; j < 4; j++) {
1014          /* If there is another enabled component in the destination that is
1015           * derived from the same inputs, generate its value on this pass as
1016           * well.
1017           */
1018          if (!(done_mask & (1 << j)) &&
1019              GET_SWZ(src0.swizzle, j) == src0_swiz &&
1020              GET_SWZ(src1.swizzle, j) == src1_swiz) {
1021             this_mask |= (1 << j);
1022          }
1023       }
1024       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
1025                                    src0_swiz, src0_swiz);
1026       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
1027                                    src1_swiz, src1_swiz);
1028
1029       dst.writemask = this_mask;
1030       emit_asm(ir, op, dst, src0, src1);
1031       done_mask |= this_mask;
1032    }
1033 }
1034
1035 void
1036 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
1037                                   st_dst_reg dst, st_src_reg src0)
1038 {
1039    st_src_reg undef = undef_src;
1040
1041    undef.swizzle = SWIZZLE_XXXX;
1042
1043    emit_scalar(ir, op, dst, src0, undef);
1044 }
1045
1046 void
1047 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
1048                                st_dst_reg dst, st_src_reg src0)
1049 {
1050    int op = TGSI_OPCODE_ARL;
1051
1052    if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
1053       op = TGSI_OPCODE_UARL;
1054
1055    assert(dst.file == PROGRAM_ADDRESS);
1056    if (dst.index >= this->num_address_regs)
1057       this->num_address_regs = dst.index + 1;
1058
1059    emit_asm(NULL, op, dst, src0);
1060 }
1061
1062 int
1063 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
1064                                    gl_constant_value values[8], int size, int datatype,
1065                                    GLuint *swizzle_out)
1066 {
1067    if (file == PROGRAM_CONSTANT) {
1068       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
1069                                               size, datatype, swizzle_out);
1070    }
1071
1072    assert(file == PROGRAM_IMMEDIATE);
1073
1074    int index = 0;
1075    immediate_storage *entry;
1076    int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
1077    int i;
1078
1079    /* Search immediate storage to see if we already have an identical
1080     * immediate that we can use instead of adding a duplicate entry.
1081     */
1082    foreach_in_list(immediate_storage, entry, &this->immediates) {
1083       immediate_storage *tmp = entry;
1084
1085       for (i = 0; i * 4 < size32; i++) {
1086          int slot_size = MIN2(size32 - (i * 4), 4);
1087          if (tmp->type != datatype || tmp->size32 != slot_size)
1088             break;
1089          if (memcmp(tmp->values, &values[i * 4],
1090                     slot_size * sizeof(gl_constant_value)))
1091             break;
1092
1093          /* Everything matches, keep going until the full size is matched */
1094          tmp = (immediate_storage *)tmp->next;
1095       }
1096
1097       /* The full value matched */
1098       if (i * 4 >= size32)
1099          return index;
1100
1101       index++;
1102    }
1103
1104    for (i = 0; i * 4 < size32; i++) {
1105       int slot_size = MIN2(size32 - (i * 4), 4);
1106       /* Add this immediate to the list. */
1107       entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
1108       this->immediates.push_tail(entry);
1109       this->num_immediates++;
1110    }
1111    return index;
1112 }
1113
1114 st_src_reg
1115 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
1116 {
1117    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
1118    union gl_constant_value uval;
1119
1120    uval.f = val;
1121    src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
1122
1123    return src;
1124 }
1125
1126 st_src_reg
1127 glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
1128 {
1129    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
1130    union gl_constant_value uval[2];
1131
1132    uval[0].u = *(uint32_t *)&val;
1133    uval[1].u = *(((uint32_t *)&val) + 1);
1134    src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
1135
1136    return src;
1137 }
1138
1139 st_src_reg
1140 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
1141 {
1142    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
1143    union gl_constant_value uval;
1144
1145    assert(native_integers);
1146
1147    uval.i = val;
1148    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
1149
1150    return src;
1151 }
1152
1153 st_src_reg
1154 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
1155 {
1156    if (native_integers)
1157       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
1158                                        st_src_reg_for_int(val);
1159    else
1160       return st_src_reg_for_float(val);
1161 }
1162
1163 static int
1164 attrib_type_size(const struct glsl_type *type, bool is_vs_input)
1165 {
1166    unsigned int i;
1167    int size;
1168
1169    switch (type->base_type) {
1170    case GLSL_TYPE_UINT:
1171    case GLSL_TYPE_INT:
1172    case GLSL_TYPE_FLOAT:
1173    case GLSL_TYPE_BOOL:
1174       if (type->is_matrix()) {
1175          return type->matrix_columns;
1176       } else {
1177          /* Regardless of size of vector, it gets a vec4. This is bad
1178           * packing for things like floats, but otherwise arrays become a
1179           * mess.  Hopefully a later pass over the code can pack scalars
1180           * down if appropriate.
1181           */
1182          return 1;
1183       }
1184       break;
1185    case GLSL_TYPE_DOUBLE:
1186       if (type->is_matrix()) {
1187          if (type->vector_elements <= 2 || is_vs_input)
1188             return type->matrix_columns;
1189          else
1190             return type->matrix_columns * 2;
1191       } else {
1192          /* For doubles if we have a double or dvec2 they fit in one
1193           * vec4, else they need 2 vec4s.
1194           */
1195          if (type->vector_elements <= 2 || is_vs_input)
1196             return 1;
1197          else
1198             return 2;
1199       }
1200       break;
1201    case GLSL_TYPE_ARRAY:
1202       assert(type->length > 0);
1203       return attrib_type_size(type->fields.array, is_vs_input) * type->length;
1204    case GLSL_TYPE_STRUCT:
1205       size = 0;
1206       for (i = 0; i < type->length; i++) {
1207          size += attrib_type_size(type->fields.structure[i].type, is_vs_input);
1208       }
1209       return size;
1210    case GLSL_TYPE_SAMPLER:
1211    case GLSL_TYPE_IMAGE:
1212    case GLSL_TYPE_SUBROUTINE:
1213       /* Samplers take up one slot in UNIFORMS[], but they're baked in
1214        * at link time.
1215        */
1216       return 1;
1217    case GLSL_TYPE_ATOMIC_UINT:
1218    case GLSL_TYPE_INTERFACE:
1219    case GLSL_TYPE_VOID:
1220    case GLSL_TYPE_ERROR:
1221    case GLSL_TYPE_FUNCTION:
1222       assert(!"Invalid type in type_size");
1223       break;
1224    }
1225    return 0;
1226 }
1227
1228 static int
1229 type_size(const struct glsl_type *type)
1230 {
1231   return attrib_type_size(type, false);
1232 }
1233
1234 /**
1235  * If the given GLSL type is an array or matrix or a structure containing
1236  * an array/matrix member, return true.  Else return false.
1237  *
1238  * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY
1239  * or PROGRAM_ARRAY) should be used for variables of this type.  Anytime
1240  * we have an array that might be indexed with a variable, we need to use
1241  * the later storage type.
1242  */
1243 static bool
1244 type_has_array_or_matrix(const glsl_type *type)
1245 {
1246    if (type->is_array() || type->is_matrix())
1247       return true;
1248
1249    if (type->is_record()) {
1250       for (unsigned i = 0; i < type->length; i++) {
1251          if (type_has_array_or_matrix(type->fields.structure[i].type)) {
1252             return true;
1253          }
1254       }
1255    }
1256
1257    return false;
1258 }
1259
1260
1261 /**
1262  * In the initial pass of codegen, we assign temporary numbers to
1263  * intermediate results.  (not SSA -- variable assignments will reuse
1264  * storage).
1265  */
1266 st_src_reg
1267 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1268 {
1269    st_src_reg src;
1270
1271    src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1272    src.reladdr = NULL;
1273    src.negate = 0;
1274
1275    if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
1276       if (next_array >= max_num_arrays) {
1277          max_num_arrays += 32;
1278          array_sizes = (unsigned*)
1279             realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
1280       }
1281
1282       src.file = PROGRAM_ARRAY;
1283       src.index = next_array << 16 | 0x8000;
1284       array_sizes[next_array] = type_size(type);
1285       ++next_array;
1286
1287    } else {
1288       src.file = PROGRAM_TEMPORARY;
1289       src.index = next_temp;
1290       next_temp += type_size(type);
1291    }
1292
1293    if (type->is_array() || type->is_record()) {
1294       src.swizzle = SWIZZLE_NOOP;
1295    } else {
1296       src.swizzle = swizzle_for_size(type->vector_elements);
1297    }
1298
1299    return src;
1300 }
1301
1302 variable_storage *
1303 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1304 {
1305
1306    foreach_in_list(variable_storage, entry, &this->variables) {
1307       if (entry->var == var)
1308          return entry;
1309    }
1310
1311    return NULL;
1312 }
1313
1314 void
1315 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1316 {
1317    if (strcmp(ir->name, "gl_FragCoord") == 0) {
1318       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1319
1320       fp->OriginUpperLeft = ir->data.origin_upper_left;
1321       fp->PixelCenterInteger = ir->data.pixel_center_integer;
1322    }
1323
1324    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1325       unsigned int i;
1326       const ir_state_slot *const slots = ir->get_state_slots();
1327       assert(slots != NULL);
1328
1329       /* Check if this statevar's setup in the STATE file exactly
1330        * matches how we'll want to reference it as a
1331        * struct/array/whatever.  If not, then we need to move it into
1332        * temporary storage and hope that it'll get copy-propagated
1333        * out.
1334        */
1335       for (i = 0; i < ir->get_num_state_slots(); i++) {
1336          if (slots[i].swizzle != SWIZZLE_XYZW) {
1337             break;
1338          }
1339       }
1340
1341       variable_storage *storage;
1342       st_dst_reg dst;
1343       if (i == ir->get_num_state_slots()) {
1344          /* We'll set the index later. */
1345          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1346          this->variables.push_tail(storage);
1347
1348          dst = undef_dst;
1349       } else {
1350          /* The variable_storage constructor allocates slots based on the size
1351           * of the type.  However, this had better match the number of state
1352           * elements that we're going to copy into the new temporary.
1353           */
1354          assert((int) ir->get_num_state_slots() == type_size(ir->type));
1355
1356          dst = st_dst_reg(get_temp(ir->type));
1357
1358          storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
1359
1360          this->variables.push_tail(storage);
1361       }
1362
1363
1364       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
1365          int index = _mesa_add_state_reference(this->prog->Parameters,
1366                                                (gl_state_index *)slots[i].tokens);
1367
1368          if (storage->file == PROGRAM_STATE_VAR) {
1369             if (storage->index == -1) {
1370                storage->index = index;
1371             } else {
1372                assert(index == storage->index + (int)i);
1373             }
1374          } else {
1375             /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1376              * the data being moved since MOV does not care about the type of
1377              * data it is moving, and we don't want to declare registers with
1378              * array or struct types.
1379              */
1380             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1381             src.swizzle = slots[i].swizzle;
1382             emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
1383             /* even a float takes up a whole vec4 reg in a struct/array. */
1384             dst.index++;
1385          }
1386       }
1387
1388       if (storage->file == PROGRAM_TEMPORARY &&
1389           dst.index != storage->index + (int) ir->get_num_state_slots()) {
1390          fail_link(this->shader_program,
1391                   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1392                   ir->name, dst.index - storage->index,
1393                   type_size(ir->type));
1394       }
1395    }
1396 }
1397
1398 void
1399 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1400 {
1401    emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
1402
1403    visit_exec_list(&ir->body_instructions, this);
1404
1405    emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
1406 }
1407
1408 void
1409 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1410 {
1411    switch (ir->mode) {
1412    case ir_loop_jump::jump_break:
1413       emit_asm(NULL, TGSI_OPCODE_BRK);
1414       break;
1415    case ir_loop_jump::jump_continue:
1416       emit_asm(NULL, TGSI_OPCODE_CONT);
1417       break;
1418    }
1419 }
1420
1421
1422 void
1423 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1424 {
1425    assert(0);
1426    (void)ir;
1427 }
1428
1429 void
1430 glsl_to_tgsi_visitor::visit(ir_function *ir)
1431 {
1432    /* Ignore function bodies other than main() -- we shouldn't see calls to
1433     * them since they should all be inlined before we get to glsl_to_tgsi.
1434     */
1435    if (strcmp(ir->name, "main") == 0) {
1436       const ir_function_signature *sig;
1437       exec_list empty;
1438
1439       sig = ir->matching_signature(NULL, &empty, false);
1440
1441       assert(sig);
1442
1443       foreach_in_list(ir_instruction, ir, &sig->body) {
1444          ir->accept(this);
1445       }
1446    }
1447 }
1448
1449 bool
1450 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1451 {
1452    int nonmul_operand = 1 - mul_operand;
1453    st_src_reg a, b, c;
1454    st_dst_reg result_dst;
1455
1456    ir_expression *expr = ir->operands[mul_operand]->as_expression();
1457    if (!expr || expr->operation != ir_binop_mul)
1458       return false;
1459
1460    expr->operands[0]->accept(this);
1461    a = this->result;
1462    expr->operands[1]->accept(this);
1463    b = this->result;
1464    ir->operands[nonmul_operand]->accept(this);
1465    c = this->result;
1466
1467    this->result = get_temp(ir->type);
1468    result_dst = st_dst_reg(this->result);
1469    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1470    emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1471
1472    return true;
1473 }
1474
1475 /**
1476  * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1477  *
1478  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1479  * implemented using multiplication, and logical-or is implemented using
1480  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1481  * As result, the logical expression (a & !b) can be rewritten as:
1482  *
1483  *     - a * !b
1484  *     - a * (1 - b)
1485  *     - (a * 1) - (a * b)
1486  *     - a + -(a * b)
1487  *     - a + (a * -b)
1488  *
1489  * This final expression can be implemented as a single MAD(a, -b, a)
1490  * instruction.
1491  */
1492 bool
1493 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1494 {
1495    const int other_operand = 1 - try_operand;
1496    st_src_reg a, b;
1497
1498    ir_expression *expr = ir->operands[try_operand]->as_expression();
1499    if (!expr || expr->operation != ir_unop_logic_not)
1500       return false;
1501
1502    ir->operands[other_operand]->accept(this);
1503    a = this->result;
1504    expr->operands[0]->accept(this);
1505    b = this->result;
1506
1507    b.negate = ~b.negate;
1508
1509    this->result = get_temp(ir->type);
1510    emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1511
1512    return true;
1513 }
1514
1515 void
1516 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1517                                       st_src_reg *reg, int *num_reladdr)
1518 {
1519    if (!reg->reladdr && !reg->reladdr2)
1520       return;
1521
1522    if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
1523    if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
1524
1525    if (*num_reladdr != 1) {
1526       st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type);
1527
1528       emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1529       *reg = temp;
1530    }
1531
1532    (*num_reladdr)--;
1533 }
1534
1535 void
1536 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1537 {
1538    unsigned int operand;
1539    st_src_reg op[ARRAY_SIZE(ir->operands)];
1540    st_src_reg result_src;
1541    st_dst_reg result_dst;
1542
1543    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1544     */
1545    if (ir->operation == ir_binop_add) {
1546       if (try_emit_mad(ir, 1))
1547          return;
1548       if (try_emit_mad(ir, 0))
1549          return;
1550    }
1551
1552    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1553     */
1554    if (!native_integers && ir->operation == ir_binop_logic_and) {
1555       if (try_emit_mad_for_and_not(ir, 1))
1556          return;
1557       if (try_emit_mad_for_and_not(ir, 0))
1558          return;
1559    }
1560
1561    if (ir->operation == ir_quadop_vector)
1562       assert(!"ir_quadop_vector should have been lowered");
1563
1564    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1565       this->result.file = PROGRAM_UNDEFINED;
1566       ir->operands[operand]->accept(this);
1567       if (this->result.file == PROGRAM_UNDEFINED) {
1568          printf("Failed to get tree for expression operand:\n");
1569          ir->operands[operand]->print();
1570          printf("\n");
1571          exit(1);
1572       }
1573       op[operand] = this->result;
1574
1575       /* Matrix expression operands should have been broken down to vector
1576        * operations already.
1577        */
1578       assert(!ir->operands[operand]->type->is_matrix());
1579    }
1580
1581    int vector_elements = ir->operands[0]->type->vector_elements;
1582    if (ir->operands[1]) {
1583       vector_elements = MAX2(vector_elements,
1584                              ir->operands[1]->type->vector_elements);
1585    }
1586
1587    this->result.file = PROGRAM_UNDEFINED;
1588
1589    /* Storage for our result.  Ideally for an assignment we'd be using
1590     * the actual storage for the result here, instead.
1591     */
1592    result_src = get_temp(ir->type);
1593    /* convenience for the emit functions below. */
1594    result_dst = st_dst_reg(result_src);
1595    /* Limit writes to the channels that will be used by result_src later.
1596     * This does limit this temp's use as a temporary for multi-instruction
1597     * sequences.
1598     */
1599    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1600
1601    switch (ir->operation) {
1602    case ir_unop_logic_not:
1603       if (result_dst.type != GLSL_TYPE_FLOAT)
1604          emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1605       else {
1606          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1607           * older GPUs implement SEQ using multiple instructions (i915 uses two
1608           * SGE instructions and a MUL instruction).  Since our logic values are
1609           * 0.0 and 1.0, 1-x also implements !x.
1610           */
1611          op[0].negate = ~op[0].negate;
1612          emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1613       }
1614       break;
1615    case ir_unop_neg:
1616       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1617          emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1618       else if (result_dst.type == GLSL_TYPE_DOUBLE)
1619          emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
1620       else {
1621          op[0].negate = ~op[0].negate;
1622          result_src = op[0];
1623       }
1624       break;
1625    case ir_unop_subroutine_to_int:
1626       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1627       break;
1628    case ir_unop_abs:
1629       emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1630       break;
1631    case ir_unop_sign:
1632       emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1633       break;
1634    case ir_unop_rcp:
1635       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1636       break;
1637
1638    case ir_unop_exp2:
1639       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1640       break;
1641    case ir_unop_exp:
1642    case ir_unop_log:
1643       assert(!"not reached: should be handled by ir_explog_to_explog2");
1644       break;
1645    case ir_unop_log2:
1646       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1647       break;
1648    case ir_unop_sin:
1649       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1650       break;
1651    case ir_unop_cos:
1652       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1653       break;
1654    case ir_unop_saturate: {
1655       glsl_to_tgsi_instruction *inst;
1656       inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1657       inst->saturate = true;
1658       break;
1659    }
1660
1661    case ir_unop_dFdx:
1662    case ir_unop_dFdx_coarse:
1663       emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1664       break;
1665    case ir_unop_dFdx_fine:
1666       emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
1667       break;
1668    case ir_unop_dFdy:
1669    case ir_unop_dFdy_coarse:
1670    case ir_unop_dFdy_fine:
1671    {
1672       /* The X component contains 1 or -1 depending on whether the framebuffer
1673        * is a FBO or the window system buffer, respectively.
1674        * It is then multiplied with the source operand of DDY.
1675        */
1676       static const gl_state_index transform_y_state[STATE_LENGTH]
1677          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1678
1679       unsigned transform_y_index =
1680          _mesa_add_state_reference(this->prog->Parameters,
1681                                    transform_y_state);
1682
1683       st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1684                                           transform_y_index,
1685                                           glsl_type::vec4_type);
1686       transform_y.swizzle = SWIZZLE_XXXX;
1687
1688       st_src_reg temp = get_temp(glsl_type::vec4_type);
1689
1690       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1691       emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
1692            TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
1693       break;
1694    }
1695
1696    case ir_unop_frexp_sig:
1697       emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
1698       break;
1699
1700    case ir_unop_frexp_exp:
1701       emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
1702       break;
1703
1704    case ir_unop_noise: {
1705       /* At some point, a motivated person could add a better
1706        * implementation of noise.  Currently not even the nvidia
1707        * binary drivers do anything more than this.  In any case, the
1708        * place to do this is in the GL state tracker, not the poor
1709        * driver.
1710        */
1711       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1712       break;
1713    }
1714
1715    case ir_binop_add:
1716       emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1717       break;
1718    case ir_binop_sub:
1719       emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1720       break;
1721
1722    case ir_binop_mul:
1723       emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1724       break;
1725    case ir_binop_div:
1726       if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
1727          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1728       else
1729          emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1730       break;
1731    case ir_binop_mod:
1732       if (result_dst.type == GLSL_TYPE_FLOAT)
1733          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1734       else
1735          emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1736       break;
1737
1738    case ir_binop_less:
1739       emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1740       break;
1741    case ir_binop_greater:
1742       emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1743       break;
1744    case ir_binop_lequal:
1745       emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1746       break;
1747    case ir_binop_gequal:
1748       emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1749       break;
1750    case ir_binop_equal:
1751       emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1752       break;
1753    case ir_binop_nequal:
1754       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1755       break;
1756    case ir_binop_all_equal:
1757       /* "==" operator producing a scalar boolean. */
1758       if (ir->operands[0]->type->is_vector() ||
1759           ir->operands[1]->type->is_vector()) {
1760          st_src_reg temp = get_temp(native_integers ?
1761                                     glsl_type::uvec4_type :
1762                                     glsl_type::vec4_type);
1763
1764          if (native_integers) {
1765             st_dst_reg temp_dst = st_dst_reg(temp);
1766             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1767
1768             if (ir->operands[0]->type->is_boolean() &&
1769                 ir->operands[1]->as_constant() &&
1770                 ir->operands[1]->as_constant()->is_one()) {
1771                emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1772             } else {
1773                emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1774             }
1775
1776             /* Emit 1-3 AND operations to combine the SEQ results. */
1777             switch (ir->operands[0]->type->vector_elements) {
1778             case 2:
1779                break;
1780             case 3:
1781                temp_dst.writemask = WRITEMASK_Y;
1782                temp1.swizzle = SWIZZLE_YYYY;
1783                temp2.swizzle = SWIZZLE_ZZZZ;
1784                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1785                break;
1786             case 4:
1787                temp_dst.writemask = WRITEMASK_X;
1788                temp1.swizzle = SWIZZLE_XXXX;
1789                temp2.swizzle = SWIZZLE_YYYY;
1790                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1791                temp_dst.writemask = WRITEMASK_Y;
1792                temp1.swizzle = SWIZZLE_ZZZZ;
1793                temp2.swizzle = SWIZZLE_WWWW;
1794                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1795             }
1796
1797             temp1.swizzle = SWIZZLE_XXXX;
1798             temp2.swizzle = SWIZZLE_YYYY;
1799             emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1800          } else {
1801             emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1802
1803             /* After the dot-product, the value will be an integer on the
1804              * range [0,4].  Zero becomes 1.0, and positive values become zero.
1805              */
1806             emit_dp(ir, result_dst, temp, temp, vector_elements);
1807
1808             /* Negating the result of the dot-product gives values on the range
1809              * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1810              * This is achieved using SGE.
1811              */
1812             st_src_reg sge_src = result_src;
1813             sge_src.negate = ~sge_src.negate;
1814             emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1815          }
1816       } else {
1817          emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1818       }
1819       break;
1820    case ir_binop_any_nequal:
1821       /* "!=" operator producing a scalar boolean. */
1822       if (ir->operands[0]->type->is_vector() ||
1823           ir->operands[1]->type->is_vector()) {
1824          st_src_reg temp = get_temp(native_integers ?
1825                                     glsl_type::uvec4_type :
1826                                     glsl_type::vec4_type);
1827          if (ir->operands[0]->type->is_boolean() &&
1828              ir->operands[1]->as_constant() &&
1829              ir->operands[1]->as_constant()->is_zero()) {
1830             emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1831          } else {
1832             emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1833          }
1834
1835          if (native_integers) {
1836             st_dst_reg temp_dst = st_dst_reg(temp);
1837             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1838
1839             /* Emit 1-3 OR operations to combine the SNE results. */
1840             switch (ir->operands[0]->type->vector_elements) {
1841             case 2:
1842                break;
1843             case 3:
1844                temp_dst.writemask = WRITEMASK_Y;
1845                temp1.swizzle = SWIZZLE_YYYY;
1846                temp2.swizzle = SWIZZLE_ZZZZ;
1847                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1848                break;
1849             case 4:
1850                temp_dst.writemask = WRITEMASK_X;
1851                temp1.swizzle = SWIZZLE_XXXX;
1852                temp2.swizzle = SWIZZLE_YYYY;
1853                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1854                temp_dst.writemask = WRITEMASK_Y;
1855                temp1.swizzle = SWIZZLE_ZZZZ;
1856                temp2.swizzle = SWIZZLE_WWWW;
1857                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1858             }
1859
1860             temp1.swizzle = SWIZZLE_XXXX;
1861             temp2.swizzle = SWIZZLE_YYYY;
1862             emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1863          } else {
1864             /* After the dot-product, the value will be an integer on the
1865              * range [0,4].  Zero stays zero, and positive values become 1.0.
1866              */
1867             glsl_to_tgsi_instruction *const dp =
1868                   emit_dp(ir, result_dst, temp, temp, vector_elements);
1869             if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1870                /* The clamping to [0,1] can be done for free in the fragment
1871                 * shader with a saturate.
1872                 */
1873                dp->saturate = true;
1874             } else {
1875                /* Negating the result of the dot-product gives values on the range
1876                 * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1877                 * achieved using SLT.
1878                 */
1879                st_src_reg slt_src = result_src;
1880                slt_src.negate = ~slt_src.negate;
1881                emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1882             }
1883          }
1884       } else {
1885          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1886       }
1887       break;
1888
1889    case ir_binop_logic_xor:
1890       if (native_integers)
1891          emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1892       else
1893          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1894       break;
1895
1896    case ir_binop_logic_or: {
1897       if (native_integers) {
1898          /* If integers are used as booleans, we can use an actual "or"
1899           * instruction.
1900           */
1901          assert(native_integers);
1902          emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1903       } else {
1904          /* After the addition, the value will be an integer on the
1905           * range [0,2].  Zero stays zero, and positive values become 1.0.
1906           */
1907          glsl_to_tgsi_instruction *add =
1908             emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1909          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1910             /* The clamping to [0,1] can be done for free in the fragment
1911              * shader with a saturate if floats are being used as boolean values.
1912              */
1913             add->saturate = true;
1914          } else {
1915             /* Negating the result of the addition gives values on the range
1916              * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1917              * is achieved using SLT.
1918              */
1919             st_src_reg slt_src = result_src;
1920             slt_src.negate = ~slt_src.negate;
1921             emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1922          }
1923       }
1924       break;
1925    }
1926
1927    case ir_binop_logic_and:
1928       /* If native integers are disabled, the bool args are stored as float 0.0
1929        * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
1930        * actual AND opcode.
1931        */
1932       if (native_integers)
1933          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1934       else
1935          emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1936       break;
1937
1938    case ir_binop_dot:
1939       assert(ir->operands[0]->type->is_vector());
1940       assert(ir->operands[0]->type == ir->operands[1]->type);
1941       emit_dp(ir, result_dst, op[0], op[1],
1942               ir->operands[0]->type->vector_elements);
1943       break;
1944
1945    case ir_unop_sqrt:
1946       if (have_sqrt) {
1947          emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1948       } else {
1949          /* sqrt(x) = x * rsq(x). */
1950          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1951          emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1952          /* For incoming channels <= 0, set the result to 0. */
1953          op[0].negate = ~op[0].negate;
1954          emit_asm(ir, TGSI_OPCODE_CMP, result_dst,
1955               op[0], result_src, st_src_reg_for_float(0.0));
1956       }
1957       break;
1958    case ir_unop_rsq:
1959       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1960       break;
1961    case ir_unop_i2f:
1962       if (native_integers) {
1963          emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1964          break;
1965       }
1966       /* fallthrough to next case otherwise */
1967    case ir_unop_b2f:
1968       if (native_integers) {
1969          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1970          break;
1971       }
1972       /* fallthrough to next case otherwise */
1973    case ir_unop_i2u:
1974    case ir_unop_u2i:
1975       /* Converting between signed and unsigned integers is a no-op. */
1976       result_src = op[0];
1977       result_src.type = result_dst.type;
1978       break;
1979    case ir_unop_b2i:
1980       if (native_integers) {
1981          /* Booleans are stored as integers using ~0 for true and 0 for false.
1982           * GLSL requires that int(bool) return 1 for true and 0 for false.
1983           * This conversion is done with AND, but it could be done with NEG.
1984           */
1985          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1986       } else {
1987          /* Booleans and integers are both stored as floats when native
1988           * integers are disabled.
1989           */
1990          result_src = op[0];
1991       }
1992       break;
1993    case ir_unop_f2i:
1994       if (native_integers)
1995          emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1996       else
1997          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1998       break;
1999    case ir_unop_f2u:
2000       if (native_integers)
2001          emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
2002       else
2003          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
2004       break;
2005    case ir_unop_bitcast_f2i:
2006       result_src = op[0];
2007       result_src.type = GLSL_TYPE_INT;
2008       break;
2009    case ir_unop_bitcast_f2u:
2010       result_src = op[0];
2011       result_src.type = GLSL_TYPE_UINT;
2012       break;
2013    case ir_unop_bitcast_i2f:
2014    case ir_unop_bitcast_u2f:
2015       result_src = op[0];
2016       result_src.type = GLSL_TYPE_FLOAT;
2017       break;
2018    case ir_unop_f2b:
2019       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
2020       break;
2021    case ir_unop_d2b:
2022       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
2023       break;
2024    case ir_unop_i2b:
2025       if (native_integers)
2026          emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
2027       else
2028          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
2029       break;
2030    case ir_unop_trunc:
2031       emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
2032       break;
2033    case ir_unop_ceil:
2034       emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
2035       break;
2036    case ir_unop_floor:
2037       emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
2038       break;
2039    case ir_unop_round_even:
2040       emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
2041       break;
2042    case ir_unop_fract:
2043       emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
2044       break;
2045
2046    case ir_binop_min:
2047       emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
2048       break;
2049    case ir_binop_max:
2050       emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
2051       break;
2052    case ir_binop_pow:
2053       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
2054       break;
2055
2056    case ir_unop_bit_not:
2057       if (native_integers) {
2058          emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
2059          break;
2060       }
2061    case ir_unop_u2f:
2062       if (native_integers) {
2063          emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
2064          break;
2065       }
2066    case ir_binop_lshift:
2067       if (native_integers) {
2068          emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
2069          break;
2070       }
2071    case ir_binop_rshift:
2072       if (native_integers) {
2073          emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
2074          break;
2075       }
2076    case ir_binop_bit_and:
2077       if (native_integers) {
2078          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
2079          break;
2080       }
2081    case ir_binop_bit_xor:
2082       if (native_integers) {
2083          emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
2084          break;
2085       }
2086    case ir_binop_bit_or:
2087       if (native_integers) {
2088          emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
2089          break;
2090       }
2091
2092       assert(!"GLSL 1.30 features unsupported");
2093       break;
2094
2095    case ir_binop_ubo_load: {
2096       ir_constant *const_uniform_block = ir->operands[0]->as_constant();
2097       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
2098       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
2099       unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
2100       st_src_reg index_reg = get_temp(glsl_type::uint_type);
2101       st_src_reg cbuf;
2102
2103       cbuf.type = ir->type->base_type;
2104       cbuf.file = PROGRAM_CONSTANT;
2105       cbuf.index = 0;
2106       cbuf.reladdr = NULL;
2107       cbuf.negate = 0;
2108
2109       assert(ir->type->is_vector() || ir->type->is_scalar());
2110
2111       if (const_offset_ir) {
2112          /* Constant index into constant buffer */
2113          cbuf.reladdr = NULL;
2114          cbuf.index = const_offset / 16;
2115       }
2116       else {
2117          /* Relative/variable index into constant buffer */
2118          emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
2119               st_src_reg_for_int(4));
2120          cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
2121          memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
2122       }
2123
2124       if (const_uniform_block) {
2125          /* Constant constant buffer */
2126          cbuf.reladdr2 = NULL;
2127          cbuf.index2D = const_block;
2128          cbuf.has_index2 = true;
2129       }
2130       else {
2131          /* Relative/variable constant buffer */
2132          cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
2133          cbuf.index2D = 1;
2134          memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
2135          cbuf.has_index2 = true;
2136       }
2137
2138       cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
2139       if (cbuf.type == GLSL_TYPE_DOUBLE)
2140          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
2141                                        const_offset % 16 / 8,
2142                                        const_offset % 16 / 8,
2143                                        const_offset % 16 / 8);
2144       else
2145          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
2146                                        const_offset % 16 / 4,
2147                                        const_offset % 16 / 4,
2148                                        const_offset % 16 / 4);
2149
2150       if (ir->type->base_type == GLSL_TYPE_BOOL) {
2151          emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
2152       } else {
2153          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
2154       }
2155       break;
2156    }
2157    case ir_triop_lrp:
2158       /* note: we have to reorder the three args here */
2159       emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
2160       break;
2161    case ir_triop_csel:
2162       if (this->ctx->Const.NativeIntegers)
2163          emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
2164       else {
2165          op[0].negate = ~op[0].negate;
2166          emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
2167       }
2168       break;
2169    case ir_triop_bitfield_extract:
2170       emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
2171       break;
2172    case ir_quadop_bitfield_insert:
2173       emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
2174       break;
2175    case ir_unop_bitfield_reverse:
2176       emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
2177       break;
2178    case ir_unop_bit_count:
2179       emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
2180       break;
2181    case ir_unop_find_msb:
2182       emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
2183       break;
2184    case ir_unop_find_lsb:
2185       emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
2186       break;
2187    case ir_binop_imul_high:
2188       emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
2189       break;
2190    case ir_triop_fma:
2191       /* In theory, MAD is incorrect here. */
2192       if (have_fma)
2193          emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
2194       else
2195          emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
2196       break;
2197    case ir_unop_interpolate_at_centroid:
2198       emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
2199       break;
2200    case ir_binop_interpolate_at_offset:
2201       emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
2202       break;
2203    case ir_binop_interpolate_at_sample:
2204       emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
2205       break;
2206
2207    case ir_unop_d2f:
2208       emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
2209       break;
2210    case ir_unop_f2d:
2211       emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
2212       break;
2213    case ir_unop_d2i:
2214       emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
2215       break;
2216    case ir_unop_i2d:
2217       emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
2218       break;
2219    case ir_unop_d2u:
2220       emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
2221       break;
2222    case ir_unop_u2d:
2223       emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
2224       break;
2225    case ir_unop_unpack_double_2x32:
2226    case ir_unop_pack_double_2x32:
2227       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2228       break;
2229
2230    case ir_binop_ldexp:
2231       if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
2232          emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
2233       } else {
2234          assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
2235       }
2236       break;
2237
2238    case ir_unop_pack_half_2x16:
2239       emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]);
2240       break;
2241    case ir_unop_unpack_half_2x16:
2242       emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
2243       break;
2244
2245    case ir_unop_get_buffer_size: {
2246       ir_constant *const_offset = ir->operands[0]->as_constant();
2247       st_src_reg buffer(
2248             PROGRAM_BUFFER,
2249             ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
2250             (const_offset ? const_offset->value.u[0] : 0),
2251             GLSL_TYPE_UINT);
2252       if (!const_offset) {
2253          buffer.reladdr = ralloc(mem_ctx, st_src_reg);
2254          memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
2255          emit_arl(ir, sampler_reladdr, op[0]);
2256       }
2257       emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
2258       break;
2259    }
2260
2261    case ir_unop_pack_snorm_2x16:
2262    case ir_unop_pack_unorm_2x16:
2263    case ir_unop_pack_snorm_4x8:
2264    case ir_unop_pack_unorm_4x8:
2265
2266    case ir_unop_unpack_snorm_2x16:
2267    case ir_unop_unpack_unorm_2x16:
2268    case ir_unop_unpack_snorm_4x8:
2269    case ir_unop_unpack_unorm_4x8:
2270
2271    case ir_quadop_vector:
2272    case ir_binop_vector_extract:
2273    case ir_triop_vector_insert:
2274    case ir_binop_carry:
2275    case ir_binop_borrow:
2276    case ir_unop_ssbo_unsized_array_length:
2277       /* This operation is not supported, or should have already been handled.
2278        */
2279       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
2280       break;
2281    }
2282
2283    this->result = result_src;
2284 }
2285
2286
2287 void
2288 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
2289 {
2290    st_src_reg src;
2291    int i;
2292    int swizzle[4];
2293
2294    /* Note that this is only swizzles in expressions, not those on the left
2295     * hand side of an assignment, which do write masking.  See ir_assignment
2296     * for that.
2297     */
2298
2299    ir->val->accept(this);
2300    src = this->result;
2301    assert(src.file != PROGRAM_UNDEFINED);
2302    assert(ir->type->vector_elements > 0);
2303
2304    for (i = 0; i < 4; i++) {
2305       if (i < ir->type->vector_elements) {
2306          switch (i) {
2307          case 0:
2308             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2309             break;
2310          case 1:
2311             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2312             break;
2313          case 2:
2314             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2315             break;
2316          case 3:
2317             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2318             break;
2319          }
2320       } else {
2321          /* If the type is smaller than a vec4, replicate the last
2322           * channel out.
2323           */
2324          swizzle[i] = swizzle[ir->type->vector_elements - 1];
2325       }
2326    }
2327
2328    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2329
2330    this->result = src;
2331 }
2332
2333 /* Test if the variable is an array. Note that geometry and
2334  * tessellation shader inputs are outputs are always arrays (except
2335  * for patch inputs), so only the array element type is considered.
2336  */
2337 static bool
2338 is_inout_array(unsigned stage, ir_variable *var, bool *is_2d)
2339 {
2340    const glsl_type *type = var->type;
2341
2342    if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
2343        (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
2344       return false;
2345
2346    *is_2d = false;
2347
2348    if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
2349         (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
2350         stage == MESA_SHADER_TESS_CTRL) &&
2351        !var->data.patch) {
2352       if (!var->type->is_array())
2353          return false; /* a system value probably */
2354
2355       type = var->type->fields.array;
2356       *is_2d = true;
2357    }
2358
2359    return type->is_array() || type->is_matrix();
2360 }
2361
2362 void
2363 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2364 {
2365    variable_storage *entry = find_variable_storage(ir->var);
2366    ir_variable *var = ir->var;
2367    bool is_2d;
2368
2369    if (!entry) {
2370       switch (var->data.mode) {
2371       case ir_var_uniform:
2372          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2373                                                var->data.param_index);
2374          this->variables.push_tail(entry);
2375          break;
2376       case ir_var_shader_in:
2377          /* The linker assigns locations for varyings and attributes,
2378           * including deprecated builtins (like gl_Color), user-assign
2379           * generic attributes (glBindVertexLocation), and
2380           * user-defined varyings.
2381           */
2382          assert(var->data.location != -1);
2383
2384          if (is_inout_array(shader->Stage, var, &is_2d)) {
2385             struct array_decl *decl = &input_arrays[num_input_arrays];
2386
2387             decl->mesa_index = var->data.location;
2388             decl->array_id = num_input_arrays + 1;
2389             if (is_2d) {
2390                decl->array_size = type_size(var->type->fields.array);
2391                decl->array_type = var->type->fields.array->without_array()->base_type;
2392             } else {
2393                decl->array_size = type_size(var->type);
2394                decl->array_type = var->type->without_array()->base_type;
2395             }
2396             num_input_arrays++;
2397
2398             entry = new(mem_ctx) variable_storage(var,
2399                                                   PROGRAM_INPUT,
2400                                                   var->data.location,
2401                                                   decl->array_id);
2402          }
2403          else {
2404             entry = new(mem_ctx) variable_storage(var,
2405                                                   PROGRAM_INPUT,
2406                                                   var->data.location);
2407          }
2408          this->variables.push_tail(entry);
2409          break;
2410       case ir_var_shader_out:
2411          assert(var->data.location != -1);
2412
2413          if (is_inout_array(shader->Stage, var, &is_2d)) {
2414             struct array_decl *decl = &output_arrays[num_output_arrays];
2415
2416             decl->mesa_index = var->data.location;
2417             decl->array_id = num_output_arrays + 1;
2418             if (is_2d) {
2419                decl->array_size = type_size(var->type->fields.array);
2420                decl->array_type = var->type->fields.array->without_array()->base_type;
2421             } else {
2422                decl->array_size = type_size(var->type);
2423                decl->array_type = var->type->without_array()->base_type;
2424             }
2425             num_output_arrays++;
2426
2427             entry = new(mem_ctx) variable_storage(var,
2428                                                   PROGRAM_OUTPUT,
2429                                                   var->data.location,
2430                                                   decl->array_id);
2431          }
2432          else {
2433             entry = new(mem_ctx) variable_storage(var,
2434                                                   PROGRAM_OUTPUT,
2435                                                   var->data.location
2436                                                   + var->data.index);
2437          }
2438          this->variables.push_tail(entry);
2439          break;
2440       case ir_var_system_value:
2441          entry = new(mem_ctx) variable_storage(var,
2442                                                PROGRAM_SYSTEM_VALUE,
2443                                                var->data.location);
2444          break;
2445       case ir_var_auto:
2446       case ir_var_temporary:
2447          st_src_reg src = get_temp(var->type);
2448
2449          entry = new(mem_ctx) variable_storage(var, src.file, src.index);
2450          this->variables.push_tail(entry);
2451
2452          break;
2453       }
2454
2455       if (!entry) {
2456          printf("Failed to make storage for %s\n", var->name);
2457          exit(1);
2458       }
2459    }
2460
2461    this->result = st_src_reg(entry->file, entry->index, var->type);
2462    this->result.array_id = entry->array_id;
2463    if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double())
2464       this->result.is_double_vertex_input = true;
2465    if (!native_integers)
2466       this->result.type = GLSL_TYPE_FLOAT;
2467 }
2468
2469 static void
2470 shrink_array_declarations(struct array_decl *arrays, unsigned count,
2471                           GLbitfield64 usage_mask,
2472                           GLbitfield64 double_usage_mask,
2473                           GLbitfield patch_usage_mask)
2474 {
2475    unsigned i, j;
2476
2477    /* Fix array declarations by removing unused array elements at both ends
2478     * of the arrays. For example, mat4[3] where only mat[1] is used.
2479     */
2480    for (i = 0; i < count; i++) {
2481       struct array_decl *decl = &arrays[i];
2482
2483       /* Shrink the beginning. */
2484       for (j = 0; j < decl->array_size; j++) {
2485          if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2486             if (patch_usage_mask &
2487                 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2488                break;
2489          }
2490          else {
2491             if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2492                break;
2493             if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2494                break;
2495          }
2496
2497          decl->mesa_index++;
2498          decl->array_size--;
2499          j--;
2500       }
2501
2502       /* Shrink the end. */
2503       for (j = decl->array_size-1; j >= 0; j--) {
2504          if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2505             if (patch_usage_mask &
2506                 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2507                break;
2508          }
2509          else {
2510             if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2511                break;
2512             if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2513                break;
2514          }
2515
2516          decl->array_size--;
2517       }
2518    }
2519 }
2520
2521 void
2522 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2523 {
2524    ir_constant *index;
2525    st_src_reg src;
2526    int element_size = type_size(ir->type);
2527    bool is_2D = false;
2528
2529    index = ir->array_index->constant_expression_value();
2530
2531    ir->array->accept(this);
2532    src = this->result;
2533
2534    if (ir->array->ir_type != ir_type_dereference_array) {
2535       switch (this->prog->Target) {
2536       case GL_TESS_CONTROL_PROGRAM_NV:
2537          is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
2538                  !ir->variable_referenced()->data.patch;
2539          break;
2540       case GL_TESS_EVALUATION_PROGRAM_NV:
2541          is_2D = src.file == PROGRAM_INPUT &&
2542                  !ir->variable_referenced()->data.patch;
2543          break;
2544       case GL_GEOMETRY_PROGRAM_NV:
2545          is_2D = src.file == PROGRAM_INPUT;
2546          break;
2547       }
2548    }
2549
2550    if (is_2D)
2551       element_size = 1;
2552
2553    if (index) {
2554
2555       if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
2556           src.file == PROGRAM_INPUT)
2557          element_size = attrib_type_size(ir->type, true);
2558       if (is_2D) {
2559          src.index2D = index->value.i[0];
2560          src.has_index2 = true;
2561       } else
2562          src.index += index->value.i[0] * element_size;
2563    } else {
2564       /* Variable index array dereference.  It eats the "vec4" of the
2565        * base of the array and an index that offsets the TGSI register
2566        * index.
2567        */
2568       ir->array_index->accept(this);
2569
2570       st_src_reg index_reg;
2571
2572       if (element_size == 1) {
2573          index_reg = this->result;
2574       } else {
2575          index_reg = get_temp(native_integers ?
2576                               glsl_type::int_type : glsl_type::float_type);
2577
2578          emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2579               this->result, st_src_reg_for_type(index_reg.type, element_size));
2580       }
2581
2582       /* If there was already a relative address register involved, add the
2583        * new and the old together to get the new offset.
2584        */
2585       if (!is_2D && src.reladdr != NULL) {
2586          st_src_reg accum_reg = get_temp(native_integers ?
2587                                 glsl_type::int_type : glsl_type::float_type);
2588
2589          emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2590               index_reg, *src.reladdr);
2591
2592          index_reg = accum_reg;
2593       }
2594
2595       if (is_2D) {
2596          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
2597          memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
2598          src.index2D = 0;
2599          src.has_index2 = true;
2600       } else {
2601          src.reladdr = ralloc(mem_ctx, st_src_reg);
2602          memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2603       }
2604    }
2605
2606    /* If the type is smaller than a vec4, replicate the last channel out. */
2607    if (ir->type->is_scalar() || ir->type->is_vector())
2608       src.swizzle = swizzle_for_size(ir->type->vector_elements);
2609    else
2610       src.swizzle = SWIZZLE_NOOP;
2611
2612    /* Change the register type to the element type of the array. */
2613    src.type = ir->type->base_type;
2614
2615    this->result = src;
2616 }
2617
2618 void
2619 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2620 {
2621    unsigned int i;
2622    const glsl_type *struct_type = ir->record->type;
2623    int offset = 0;
2624
2625    ir->record->accept(this);
2626
2627    for (i = 0; i < struct_type->length; i++) {
2628       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2629          break;
2630       offset += type_size(struct_type->fields.structure[i].type);
2631    }
2632
2633    /* If the type is smaller than a vec4, replicate the last channel out. */
2634    if (ir->type->is_scalar() || ir->type->is_vector())
2635       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2636    else
2637       this->result.swizzle = SWIZZLE_NOOP;
2638
2639    this->result.index += offset;
2640    this->result.type = ir->type->base_type;
2641 }
2642
2643 /**
2644  * We want to be careful in assignment setup to hit the actual storage
2645  * instead of potentially using a temporary like we might with the
2646  * ir_dereference handler.
2647  */
2648 static st_dst_reg
2649 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2650 {
2651    /* The LHS must be a dereference.  If the LHS is a variable indexed array
2652     * access of a vector, it must be separated into a series conditional moves
2653     * before reaching this point (see ir_vec_index_to_cond_assign).
2654     */
2655    assert(ir->as_dereference());
2656    ir_dereference_array *deref_array = ir->as_dereference_array();
2657    if (deref_array) {
2658       assert(!deref_array->array->type->is_vector());
2659    }
2660
2661    /* Use the rvalue deref handler for the most part.  We'll ignore
2662     * swizzles in it and write swizzles using writemask, though.
2663     */
2664    ir->accept(v);
2665    return st_dst_reg(v->result);
2666 }
2667
2668 /**
2669  * Process the condition of a conditional assignment
2670  *
2671  * Examines the condition of a conditional assignment to generate the optimal
2672  * first operand of a \c CMP instruction.  If the condition is a relational
2673  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2674  * used as the source for the \c CMP instruction.  Otherwise the comparison
2675  * is processed to a boolean result, and the boolean result is used as the
2676  * operand to the CMP instruction.
2677  */
2678 bool
2679 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2680 {
2681    ir_rvalue *src_ir = ir;
2682    bool negate = true;
2683    bool switch_order = false;
2684
2685    ir_expression *const expr = ir->as_expression();
2686
2687    if (native_integers) {
2688       if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2689          enum glsl_base_type type = expr->operands[0]->type->base_type;
2690          if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
2691              type == GLSL_TYPE_BOOL) {
2692             if (expr->operation == ir_binop_equal) {
2693                if (expr->operands[0]->is_zero()) {
2694                   src_ir = expr->operands[1];
2695                   switch_order = true;
2696                }
2697                else if (expr->operands[1]->is_zero()) {
2698                   src_ir = expr->operands[0];
2699                   switch_order = true;
2700                }
2701             }
2702             else if (expr->operation == ir_binop_nequal) {
2703                if (expr->operands[0]->is_zero()) {
2704                   src_ir = expr->operands[1];
2705                }
2706                else if (expr->operands[1]->is_zero()) {
2707                   src_ir = expr->operands[0];
2708                }
2709             }
2710          }
2711       }
2712
2713       src_ir->accept(this);
2714       return switch_order;
2715    }
2716
2717    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2718       bool zero_on_left = false;
2719
2720       if (expr->operands[0]->is_zero()) {
2721          src_ir = expr->operands[1];
2722          zero_on_left = true;
2723       } else if (expr->operands[1]->is_zero()) {
2724          src_ir = expr->operands[0];
2725          zero_on_left = false;
2726       }
2727
2728       /*      a is -  0  +            -  0  +
2729        * (a <  0)  T  F  F  ( a < 0)  T  F  F
2730        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2731        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2732        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2733        * (a >  0)  F  F  T  (-a < 0)  F  F  T
2734        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2735        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2736        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2737        *
2738        * Note that exchanging the order of 0 and 'a' in the comparison simply
2739        * means that the value of 'a' should be negated.
2740        */
2741       if (src_ir != ir) {
2742          switch (expr->operation) {
2743          case ir_binop_less:
2744             switch_order = false;
2745             negate = zero_on_left;
2746             break;
2747
2748          case ir_binop_greater:
2749             switch_order = false;
2750             negate = !zero_on_left;
2751             break;
2752
2753          case ir_binop_lequal:
2754             switch_order = true;
2755             negate = !zero_on_left;
2756             break;
2757
2758          case ir_binop_gequal:
2759             switch_order = true;
2760             negate = zero_on_left;
2761             break;
2762
2763          default:
2764             /* This isn't the right kind of comparison afterall, so make sure
2765              * the whole condition is visited.
2766              */
2767             src_ir = ir;
2768             break;
2769          }
2770       }
2771    }
2772
2773    src_ir->accept(this);
2774
2775    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2776     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2777     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2778     * computing the condition.
2779     */
2780    if (negate)
2781       this->result.negate = ~this->result.negate;
2782
2783    return switch_order;
2784 }
2785
2786 void
2787 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2788                                      st_dst_reg *l, st_src_reg *r,
2789                                      st_src_reg *cond, bool cond_swap)
2790 {
2791    if (type->base_type == GLSL_TYPE_STRUCT) {
2792       for (unsigned int i = 0; i < type->length; i++) {
2793          emit_block_mov(ir, type->fields.structure[i].type, l, r,
2794                         cond, cond_swap);
2795       }
2796       return;
2797    }
2798
2799    if (type->is_array()) {
2800       for (unsigned int i = 0; i < type->length; i++) {
2801          emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
2802       }
2803       return;
2804    }
2805
2806    if (type->is_matrix()) {
2807       const struct glsl_type *vec_type;
2808
2809       vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
2810                                          type->vector_elements, 1);
2811
2812       for (int i = 0; i < type->matrix_columns; i++) {
2813          emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
2814       }
2815       return;
2816    }
2817
2818    assert(type->is_scalar() || type->is_vector());
2819
2820    r->type = type->base_type;
2821    if (cond) {
2822       st_src_reg l_src = st_src_reg(*l);
2823       l_src.swizzle = swizzle_for_size(type->vector_elements);
2824
2825       if (native_integers) {
2826          emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
2827               cond_swap ? l_src : *r,
2828               cond_swap ? *r : l_src);
2829       } else {
2830          emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
2831               cond_swap ? l_src : *r,
2832               cond_swap ? *r : l_src);
2833       }
2834    } else {
2835       emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
2836    }
2837    l->index++;
2838    r->index++;
2839    if (type->is_dual_slot_double()) {
2840       l->index++;
2841       if (r->is_double_vertex_input == false)
2842          r->index++;
2843    }
2844 }
2845
2846 void
2847 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2848 {
2849    st_dst_reg l;
2850    st_src_reg r;
2851
2852    ir->rhs->accept(this);
2853    r = this->result;
2854
2855    l = get_assignment_lhs(ir->lhs, this);
2856
2857    /* FINISHME: This should really set to the correct maximal writemask for each
2858     * FINISHME: component written (in the loops below).  This case can only
2859     * FINISHME: occur for matrices, arrays, and structures.
2860     */
2861    if (ir->write_mask == 0) {
2862       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2863
2864       if (ir->lhs->type->is_array() || ir->lhs->type->without_array()->is_matrix()) {
2865          if (ir->lhs->type->without_array()->is_double()) {
2866             switch (ir->lhs->type->without_array()->vector_elements) {
2867             case 1:
2868                l.writemask = WRITEMASK_X;
2869                break;
2870             case 2:
2871                l.writemask = WRITEMASK_XY;
2872                break;
2873             case 3:
2874                l.writemask = WRITEMASK_XYZ;
2875                break;
2876             case 4:
2877                l.writemask = WRITEMASK_XYZW;
2878                break;
2879             }
2880          } else
2881             l.writemask = WRITEMASK_XYZW;
2882       }
2883    } else if (ir->lhs->type->is_scalar() &&
2884               !ir->lhs->type->is_double() &&
2885               ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
2886       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2887        * FINISHME: W component of fragment shader output zero, work correctly.
2888        */
2889       l.writemask = WRITEMASK_XYZW;
2890    } else {
2891       int swizzles[4];
2892       int first_enabled_chan = 0;
2893       int rhs_chan = 0;
2894
2895       l.writemask = ir->write_mask;
2896
2897       for (int i = 0; i < 4; i++) {
2898          if (l.writemask & (1 << i)) {
2899             first_enabled_chan = GET_SWZ(r.swizzle, i);
2900             break;
2901          }
2902       }
2903
2904       /* Swizzle a small RHS vector into the channels being written.
2905        *
2906        * glsl ir treats write_mask as dictating how many channels are
2907        * present on the RHS while TGSI treats write_mask as just
2908        * showing which channels of the vec4 RHS get written.
2909        */
2910       for (int i = 0; i < 4; i++) {
2911          if (l.writemask & (1 << i))
2912             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2913          else
2914             swizzles[i] = first_enabled_chan;
2915       }
2916       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2917                                 swizzles[2], swizzles[3]);
2918    }
2919
2920    assert(l.file != PROGRAM_UNDEFINED);
2921    assert(r.file != PROGRAM_UNDEFINED);
2922
2923    if (ir->condition) {
2924       const bool switch_order = this->process_move_condition(ir->condition);
2925       st_src_reg condition = this->result;
2926
2927       emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
2928    } else if (ir->rhs->as_expression() &&
2929               this->instructions.get_tail() &&
2930               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2931               type_size(ir->lhs->type) == 1 &&
2932               l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
2933       /* To avoid emitting an extra MOV when assigning an expression to a
2934        * variable, emit the last instruction of the expression again, but
2935        * replace the destination register with the target of the assignment.
2936        * Dead code elimination will remove the original instruction.
2937        */
2938       glsl_to_tgsi_instruction *inst, *new_inst;
2939       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2940       new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
2941       new_inst->saturate = inst->saturate;
2942       inst->dead_mask = inst->dst[0].writemask;
2943    } else {
2944       emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
2945    }
2946 }
2947
2948
2949 void
2950 glsl_to_tgsi_visitor::visit(ir_constant *ir)
2951 {
2952    st_src_reg src;
2953    GLdouble stack_vals[4] = { 0 };
2954    gl_constant_value *values = (gl_constant_value *) stack_vals;
2955    GLenum gl_type = GL_NONE;
2956    unsigned int i;
2957    static int in_array = 0;
2958    gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2959
2960    /* Unfortunately, 4 floats is all we can get into
2961     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
2962     * aggregate constant and move each constant value into it.  If we
2963     * get lucky, copy propagation will eliminate the extra moves.
2964     */
2965    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2966       st_src_reg temp_base = get_temp(ir->type);
2967       st_dst_reg temp = st_dst_reg(temp_base);
2968
2969       foreach_in_list(ir_constant, field_value, &ir->components) {
2970          int size = type_size(field_value->type);
2971
2972          assert(size > 0);
2973
2974          field_value->accept(this);
2975          src = this->result;
2976
2977          for (i = 0; i < (unsigned int)size; i++) {
2978             emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
2979
2980             src.index++;
2981             temp.index++;
2982          }
2983       }
2984       this->result = temp_base;
2985       return;
2986    }
2987
2988    if (ir->type->is_array()) {
2989       st_src_reg temp_base = get_temp(ir->type);
2990       st_dst_reg temp = st_dst_reg(temp_base);
2991       int size = type_size(ir->type->fields.array);
2992
2993       assert(size > 0);
2994       in_array++;
2995
2996       for (i = 0; i < ir->type->length; i++) {
2997          ir->array_elements[i]->accept(this);
2998          src = this->result;
2999          for (int j = 0; j < size; j++) {
3000             emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3001
3002             src.index++;
3003             temp.index++;
3004          }
3005       }
3006       this->result = temp_base;
3007       in_array--;
3008       return;
3009    }
3010
3011    if (ir->type->is_matrix()) {
3012       st_src_reg mat = get_temp(ir->type);
3013       st_dst_reg mat_column = st_dst_reg(mat);
3014
3015       for (i = 0; i < ir->type->matrix_columns; i++) {
3016          switch (ir->type->base_type) {
3017          case GLSL_TYPE_FLOAT:
3018             values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
3019
3020             src = st_src_reg(file, -1, ir->type->base_type);
3021             src.index = add_constant(file,
3022                                      values,
3023                                      ir->type->vector_elements,
3024                                      GL_FLOAT,
3025                                      &src.swizzle);
3026             emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3027             break;
3028          case GLSL_TYPE_DOUBLE:
3029             values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
3030             src = st_src_reg(file, -1, ir->type->base_type);
3031             src.index = add_constant(file,
3032                                      values,
3033                                      ir->type->vector_elements,
3034                                      GL_DOUBLE,
3035                                      &src.swizzle);
3036             if (ir->type->vector_elements >= 2) {
3037                mat_column.writemask = WRITEMASK_XY;
3038                src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3039                emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3040             } else {
3041                mat_column.writemask = WRITEMASK_X;
3042                src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
3043                emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3044             }
3045             src.index++;
3046             if (ir->type->vector_elements > 2) {
3047                if (ir->type->vector_elements == 4) {
3048                   mat_column.writemask = WRITEMASK_ZW;
3049                   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3050                   emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3051                } else {
3052                   mat_column.writemask = WRITEMASK_Z;
3053                   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
3054                   emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3055                   mat_column.writemask = WRITEMASK_XYZW;
3056                   src.swizzle = SWIZZLE_XYZW;
3057                }
3058                mat_column.index++;
3059             }
3060             break;
3061          default:
3062             unreachable("Illegal matrix constant type.\n");
3063             break;
3064          }
3065          mat_column.index++;
3066       }
3067       this->result = mat;
3068       return;
3069    }
3070
3071    switch (ir->type->base_type) {
3072    case GLSL_TYPE_FLOAT:
3073       gl_type = GL_FLOAT;
3074       for (i = 0; i < ir->type->vector_elements; i++) {
3075          values[i].f = ir->value.f[i];
3076       }
3077       break;
3078    case GLSL_TYPE_DOUBLE:
3079       gl_type = GL_DOUBLE;
3080       for (i = 0; i < ir->type->vector_elements; i++) {
3081          values[i * 2].i = *(uint32_t *)&ir->value.d[i];
3082          values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
3083       }
3084       break;
3085    case GLSL_TYPE_UINT:
3086       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
3087       for (i = 0; i < ir->type->vector_elements; i++) {
3088          if (native_integers)
3089             values[i].u = ir->value.u[i];
3090          else
3091             values[i].f = ir->value.u[i];
3092       }
3093       break;
3094    case GLSL_TYPE_INT:
3095       gl_type = native_integers ? GL_INT : GL_FLOAT;
3096       for (i = 0; i < ir->type->vector_elements; i++) {
3097          if (native_integers)
3098             values[i].i = ir->value.i[i];
3099          else
3100             values[i].f = ir->value.i[i];
3101       }
3102       break;
3103    case GLSL_TYPE_BOOL:
3104       gl_type = native_integers ? GL_BOOL : GL_FLOAT;
3105       for (i = 0; i < ir->type->vector_elements; i++) {
3106          values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
3107       }
3108       break;
3109    default:
3110       assert(!"Non-float/uint/int/bool constant");
3111    }
3112
3113    this->result = st_src_reg(file, -1, ir->type);
3114    this->result.index = add_constant(file,
3115                                      values,
3116                                      ir->type->vector_elements,
3117                                      gl_type,
3118                                      &this->result.swizzle);
3119 }
3120
3121 function_entry *
3122 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
3123 {
3124    foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
3125       if (entry->sig == sig)
3126          return entry;
3127    }
3128
3129    entry = ralloc(mem_ctx, function_entry);
3130    entry->sig = sig;
3131    entry->sig_id = this->next_signature_id++;
3132    entry->bgn_inst = NULL;
3133
3134    /* Allocate storage for all the parameters. */
3135    foreach_in_list(ir_variable, param, &sig->parameters) {
3136       variable_storage *storage;
3137
3138       storage = find_variable_storage(param);
3139       assert(!storage);
3140
3141       st_src_reg src = get_temp(param->type);
3142
3143       storage = new(mem_ctx) variable_storage(param, src.file, src.index);
3144       this->variables.push_tail(storage);
3145    }
3146
3147    if (!sig->return_type->is_void()) {
3148       entry->return_reg = get_temp(sig->return_type);
3149    } else {
3150       entry->return_reg = undef_src;
3151    }
3152
3153    this->function_signatures.push_tail(entry);
3154    return entry;
3155 }
3156
3157 void
3158 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
3159 {
3160    const char *callee = ir->callee->function_name();
3161    exec_node *param = ir->actual_parameters.get_head();
3162    ir_dereference *deref = static_cast<ir_dereference *>(param);
3163    ir_variable *location = deref->variable_referenced();
3164
3165    st_src_reg buffer(
3166          PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
3167
3168    /* Calculate the surface offset */
3169    st_src_reg offset;
3170    unsigned array_size = 0, base = 0, index = 0;
3171
3172    get_deref_offsets(deref, &array_size, &base, &index, &offset);
3173
3174    if (offset.file != PROGRAM_UNDEFINED) {
3175       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
3176                offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
3177       emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
3178                offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
3179    } else {
3180       offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
3181    }
3182
3183    ir->return_deref->accept(this);
3184    st_dst_reg dst(this->result);
3185    dst.writemask = WRITEMASK_X;
3186
3187    glsl_to_tgsi_instruction *inst;
3188
3189    if (!strcmp("__intrinsic_atomic_read", callee)) {
3190       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
3191    } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
3192       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3193                       st_src_reg_for_int(1));
3194    } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
3195       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3196                       st_src_reg_for_int(-1));
3197       emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
3198    } else {
3199       param = param->get_next();
3200       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3201       val->accept(this);
3202
3203       st_src_reg data = this->result, data2 = undef_src;
3204       unsigned opcode;
3205       if (!strcmp("__intrinsic_atomic_add", callee))
3206          opcode = TGSI_OPCODE_ATOMUADD;
3207       else if (!strcmp("__intrinsic_atomic_min", callee))
3208          opcode = TGSI_OPCODE_ATOMIMIN;
3209       else if (!strcmp("__intrinsic_atomic_max", callee))
3210          opcode = TGSI_OPCODE_ATOMIMAX;
3211       else if (!strcmp("__intrinsic_atomic_and", callee))
3212          opcode = TGSI_OPCODE_ATOMAND;
3213       else if (!strcmp("__intrinsic_atomic_or", callee))
3214          opcode = TGSI_OPCODE_ATOMOR;
3215       else if (!strcmp("__intrinsic_atomic_xor", callee))
3216          opcode = TGSI_OPCODE_ATOMXOR;
3217       else if (!strcmp("__intrinsic_atomic_exchange", callee))
3218          opcode = TGSI_OPCODE_ATOMXCHG;
3219       else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) {
3220          opcode = TGSI_OPCODE_ATOMCAS;
3221          param = param->get_next();
3222          val = ((ir_instruction *)param)->as_rvalue();
3223          val->accept(this);
3224          data2 = this->result;
3225       } else if (!strcmp("__intrinsic_atomic_sub", callee)) {
3226          opcode = TGSI_OPCODE_ATOMUADD;
3227          st_src_reg res = get_temp(glsl_type::uvec4_type);
3228          st_dst_reg dstres = st_dst_reg(res);
3229          dstres.writemask = dst.writemask;
3230          emit_asm(ir, TGSI_OPCODE_INEG, dstres, data);
3231          data = res;
3232       } else {
3233          assert(!"Unexpected intrinsic");
3234          return;
3235       }
3236
3237       inst = emit_asm(ir, opcode, dst, offset, data, data2);
3238    }
3239
3240    inst->buffer = buffer;
3241 }
3242
3243 void
3244 glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
3245 {
3246    const char *callee = ir->callee->function_name();
3247    exec_node *param = ir->actual_parameters.get_head();
3248
3249    ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
3250
3251    param = param->get_next();
3252    ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3253
3254    ir_constant *const_block = block->as_constant();
3255
3256    st_src_reg buffer(
3257          PROGRAM_BUFFER,
3258          ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
3259          (const_block ? const_block->value.u[0] : 0),
3260          GLSL_TYPE_UINT);
3261
3262    if (!const_block) {
3263       block->accept(this);
3264       emit_arl(ir, sampler_reladdr, this->result);
3265       buffer.reladdr = ralloc(mem_ctx, st_src_reg);
3266       memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
3267    }
3268
3269    /* Calculate the surface offset */
3270    offset->accept(this);
3271    st_src_reg off = this->result;
3272
3273    st_dst_reg dst = undef_dst;
3274    if (ir->return_deref) {
3275       ir->return_deref->accept(this);
3276       dst = st_dst_reg(this->result);
3277       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3278    }
3279
3280    glsl_to_tgsi_instruction *inst;
3281
3282    if (!strcmp("__intrinsic_load_ssbo", callee)) {
3283       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3284       if (dst.type == GLSL_TYPE_BOOL)
3285          emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
3286    } else if (!strcmp("__intrinsic_store_ssbo", callee)) {
3287       param = param->get_next();
3288       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3289       val->accept(this);
3290
3291       param = param->get_next();
3292       ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3293       assert(write_mask);
3294       dst.writemask = write_mask->value.u[0];
3295
3296       dst.type = this->result.type;
3297       inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3298    } else {
3299       param = param->get_next();
3300       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3301       val->accept(this);
3302
3303       st_src_reg data = this->result, data2 = undef_src;
3304       unsigned opcode;
3305       if (!strcmp("__intrinsic_atomic_add_ssbo", callee))
3306          opcode = TGSI_OPCODE_ATOMUADD;
3307       else if (!strcmp("__intrinsic_atomic_min_ssbo", callee))
3308          opcode = TGSI_OPCODE_ATOMIMIN;
3309       else if (!strcmp("__intrinsic_atomic_max_ssbo", callee))
3310          opcode = TGSI_OPCODE_ATOMIMAX;
3311       else if (!strcmp("__intrinsic_atomic_and_ssbo", callee))
3312          opcode = TGSI_OPCODE_ATOMAND;
3313       else if (!strcmp("__intrinsic_atomic_or_ssbo", callee))
3314          opcode = TGSI_OPCODE_ATOMOR;
3315       else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee))
3316          opcode = TGSI_OPCODE_ATOMXOR;
3317       else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee))
3318          opcode = TGSI_OPCODE_ATOMXCHG;
3319       else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
3320          opcode = TGSI_OPCODE_ATOMCAS;
3321          param = param->get_next();
3322          val = ((ir_instruction *)param)->as_rvalue();
3323          val->accept(this);
3324          data2 = this->result;
3325       } else {
3326          assert(!"Unexpected intrinsic");
3327          return;
3328       }
3329
3330       inst = emit_asm(ir, opcode, dst, off, data, data2);
3331    }
3332
3333    param = param->get_next();
3334    ir_constant *access = NULL;
3335    if (!param->is_tail_sentinel()) {
3336       access = ((ir_instruction *)param)->as_constant();
3337       assert(access);
3338    }
3339
3340    /* The emit_asm() might have actually split the op into pieces, e.g. for
3341     * double stores. We have to go back and fix up all the generated ops.
3342     */
3343    unsigned op = inst->op;
3344    do {
3345       inst->buffer = buffer;
3346       if (access)
3347          inst->buffer_access = access->value.u[0];
3348       inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3349       if (inst->op == TGSI_OPCODE_UADD)
3350          inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3351    } while (inst && inst->buffer.file == PROGRAM_UNDEFINED && inst->op == op);
3352 }
3353
3354 void
3355 glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
3356 {
3357    const char *callee = ir->callee->function_name();
3358
3359    if (!strcmp("__intrinsic_memory_barrier", callee))
3360       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3361                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3362                                   TGSI_MEMBAR_ATOMIC_BUFFER |
3363                                   TGSI_MEMBAR_SHADER_IMAGE |
3364                                   TGSI_MEMBAR_SHARED));
3365    else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
3366       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3367                st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
3368    else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
3369       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3370                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
3371    else if (!strcmp("__intrinsic_memory_barrier_image", callee))
3372       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3373                st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
3374    else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
3375       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3376                st_src_reg_for_int(TGSI_MEMBAR_SHARED));
3377    else if (!strcmp("__intrinsic_group_memory_barrier", callee))
3378       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3379                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3380                                   TGSI_MEMBAR_ATOMIC_BUFFER |
3381                                   TGSI_MEMBAR_SHADER_IMAGE |
3382                                   TGSI_MEMBAR_SHARED |
3383                                   TGSI_MEMBAR_THREAD_GROUP));
3384    else
3385       assert(!"Unexpected memory barrier intrinsic");
3386 }
3387
3388 void
3389 glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
3390 {
3391    const char *callee = ir->callee->function_name();
3392    exec_node *param = ir->actual_parameters.get_head();
3393
3394    ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3395
3396    st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
3397
3398    /* Calculate the surface offset */
3399    offset->accept(this);
3400    st_src_reg off = this->result;
3401
3402    st_dst_reg dst = undef_dst;
3403    if (ir->return_deref) {
3404       ir->return_deref->accept(this);
3405       dst = st_dst_reg(this->result);
3406       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3407    }
3408
3409    glsl_to_tgsi_instruction *inst;
3410
3411    if (!strcmp("__intrinsic_load_shared", callee)) {
3412       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3413       inst->buffer = buffer;
3414    } else if (!strcmp("__intrinsic_store_shared", callee)) {
3415       param = param->get_next();
3416       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3417       val->accept(this);
3418
3419       param = param->get_next();
3420       ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3421       assert(write_mask);
3422       dst.writemask = write_mask->value.u[0];
3423
3424       dst.type = this->result.type;
3425       inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3426       inst->buffer = buffer;
3427    } else {
3428       param = param->get_next();
3429       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3430       val->accept(this);
3431
3432       st_src_reg data = this->result, data2 = undef_src;
3433       unsigned opcode;
3434       if (!strcmp("__intrinsic_atomic_add_shared", callee))
3435          opcode = TGSI_OPCODE_ATOMUADD;
3436       else if (!strcmp("__intrinsic_atomic_min_shared", callee))
3437          opcode = TGSI_OPCODE_ATOMIMIN;
3438       else if (!strcmp("__intrinsic_atomic_max_shared", callee))
3439          opcode = TGSI_OPCODE_ATOMIMAX;
3440       else if (!strcmp("__intrinsic_atomic_and_shared", callee))
3441          opcode = TGSI_OPCODE_ATOMAND;
3442       else if (!strcmp("__intrinsic_atomic_or_shared", callee))
3443          opcode = TGSI_OPCODE_ATOMOR;
3444       else if (!strcmp("__intrinsic_atomic_xor_shared", callee))
3445          opcode = TGSI_OPCODE_ATOMXOR;
3446       else if (!strcmp("__intrinsic_atomic_exchange_shared", callee))
3447          opcode = TGSI_OPCODE_ATOMXCHG;
3448       else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
3449          opcode = TGSI_OPCODE_ATOMCAS;
3450          param = param->get_next();
3451          val = ((ir_instruction *)param)->as_rvalue();
3452          val->accept(this);
3453          data2 = this->result;
3454       } else {
3455          assert(!"Unexpected intrinsic");
3456          return;
3457       }
3458
3459       inst = emit_asm(ir, opcode, dst, off, data, data2);
3460       inst->buffer = buffer;
3461    }
3462 }
3463
3464 void
3465 glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
3466 {
3467    const char *callee = ir->callee->function_name();
3468    exec_node *param = ir->actual_parameters.get_head();
3469
3470    ir_dereference *img = (ir_dereference *)param;
3471    const ir_variable *imgvar = img->variable_referenced();
3472    const glsl_type *type = imgvar->type->without_array();
3473    unsigned sampler_array_size = 1, sampler_base = 0;
3474
3475    st_src_reg reladdr;
3476    st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
3477
3478    get_deref_offsets(img, &sampler_array_size, &sampler_base,
3479                      (unsigned int *)&image.index, &reladdr);
3480    if (reladdr.file != PROGRAM_UNDEFINED) {
3481       emit_arl(ir, sampler_reladdr, reladdr);
3482       image.reladdr = ralloc(mem_ctx, st_src_reg);
3483       memcpy(image.reladdr, &sampler_reladdr, sizeof(reladdr));
3484    }
3485
3486    st_dst_reg dst = undef_dst;
3487    if (ir->return_deref) {
3488       ir->return_deref->accept(this);
3489       dst = st_dst_reg(this->result);
3490       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3491    }
3492
3493    glsl_to_tgsi_instruction *inst;
3494
3495    if (!strcmp("__intrinsic_image_size", callee)) {
3496       dst.writemask = WRITEMASK_XYZ;
3497       inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
3498    } else if (!strcmp("__intrinsic_image_samples", callee)) {
3499       st_src_reg res = get_temp(glsl_type::ivec4_type);
3500       st_dst_reg dstres = st_dst_reg(res);
3501       dstres.writemask = WRITEMASK_W;
3502       emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
3503       res.swizzle = SWIZZLE_WWWW;
3504       inst = emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
3505    } else {
3506       st_src_reg arg1 = undef_src, arg2 = undef_src;
3507       st_src_reg coord;
3508       st_dst_reg coord_dst;
3509       coord = get_temp(glsl_type::ivec4_type);
3510       coord_dst = st_dst_reg(coord);
3511       coord_dst.writemask = (1 << type->coordinate_components()) - 1;
3512       param = param->get_next();
3513       ((ir_dereference *)param)->accept(this);
3514       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3515       coord.swizzle = SWIZZLE_XXXX;
3516       switch (type->coordinate_components()) {
3517       case 4: assert(!"unexpected coord count");
3518       /* fallthrough */
3519       case 3: coord.swizzle |= SWIZZLE_Z << 6;
3520       /* fallthrough */
3521       case 2: coord.swizzle |= SWIZZLE_Y << 3;
3522       }
3523
3524       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
3525          param = param->get_next();
3526          ((ir_dereference *)param)->accept(this);
3527          st_src_reg sample = this->result;
3528          sample.swizzle = SWIZZLE_XXXX;
3529          coord_dst.writemask = WRITEMASK_W;
3530          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
3531          coord.swizzle |= SWIZZLE_W << 9;
3532       }
3533
3534       param = param->get_next();
3535       if (!param->is_tail_sentinel()) {
3536          ((ir_dereference *)param)->accept(this);
3537          arg1 = this->result;
3538          param = param->get_next();
3539       }
3540
3541       if (!param->is_tail_sentinel()) {
3542          ((ir_dereference *)param)->accept(this);
3543          arg2 = this->result;
3544          param = param->get_next();
3545       }
3546
3547       assert(param->is_tail_sentinel());
3548
3549       unsigned opcode;
3550       if (!strcmp("__intrinsic_image_load", callee))
3551          opcode = TGSI_OPCODE_LOAD;
3552       else if (!strcmp("__intrinsic_image_store", callee))
3553          opcode = TGSI_OPCODE_STORE;
3554       else if (!strcmp("__intrinsic_image_atomic_add", callee))
3555          opcode = TGSI_OPCODE_ATOMUADD;
3556       else if (!strcmp("__intrinsic_image_atomic_min", callee))
3557          opcode = TGSI_OPCODE_ATOMIMIN;
3558       else if (!strcmp("__intrinsic_image_atomic_max", callee))
3559          opcode = TGSI_OPCODE_ATOMIMAX;
3560       else if (!strcmp("__intrinsic_image_atomic_and", callee))
3561          opcode = TGSI_OPCODE_ATOMAND;
3562       else if (!strcmp("__intrinsic_image_atomic_or", callee))
3563          opcode = TGSI_OPCODE_ATOMOR;
3564       else if (!strcmp("__intrinsic_image_atomic_xor", callee))
3565          opcode = TGSI_OPCODE_ATOMXOR;
3566       else if (!strcmp("__intrinsic_image_atomic_exchange", callee))
3567          opcode = TGSI_OPCODE_ATOMXCHG;
3568       else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee))
3569          opcode = TGSI_OPCODE_ATOMCAS;
3570       else {
3571          assert(!"Unexpected intrinsic");
3572          return;
3573       }
3574
3575       inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
3576       if (opcode == TGSI_OPCODE_STORE)
3577          inst->dst[0].writemask = WRITEMASK_XYZW;
3578    }
3579
3580    inst->buffer = image;
3581    inst->sampler_array_size = sampler_array_size;
3582    inst->sampler_base = sampler_base;
3583
3584    switch (type->sampler_dimensionality) {
3585    case GLSL_SAMPLER_DIM_1D:
3586       inst->tex_target = (type->sampler_array)
3587          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
3588       break;
3589    case GLSL_SAMPLER_DIM_2D:
3590       inst->tex_target = (type->sampler_array)
3591          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
3592       break;
3593    case GLSL_SAMPLER_DIM_3D:
3594       inst->tex_target = TEXTURE_3D_INDEX;
3595       break;
3596    case GLSL_SAMPLER_DIM_CUBE:
3597       inst->tex_target = (type->sampler_array)
3598          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
3599       break;
3600    case GLSL_SAMPLER_DIM_RECT:
3601       inst->tex_target = TEXTURE_RECT_INDEX;
3602       break;
3603    case GLSL_SAMPLER_DIM_BUF:
3604       inst->tex_target = TEXTURE_BUFFER_INDEX;
3605       break;
3606    case GLSL_SAMPLER_DIM_EXTERNAL:
3607       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
3608       break;
3609    case GLSL_SAMPLER_DIM_MS:
3610       inst->tex_target = (type->sampler_array)
3611          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
3612       break;
3613    default:
3614       assert(!"Should not get here.");
3615    }
3616
3617    inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
3618          _mesa_get_shader_image_format(imgvar->data.image_format));
3619
3620    if (imgvar->data.image_coherent)
3621       inst->buffer_access |= TGSI_MEMORY_COHERENT;
3622    if (imgvar->data.image_restrict)
3623       inst->buffer_access |= TGSI_MEMORY_RESTRICT;
3624    if (imgvar->data.image_volatile)
3625       inst->buffer_access |= TGSI_MEMORY_VOLATILE;
3626 }
3627
3628 void
3629 glsl_to_tgsi_visitor::visit(ir_call *ir)
3630 {
3631    glsl_to_tgsi_instruction *call_inst;
3632    ir_function_signature *sig = ir->callee;
3633    const char *callee = sig->function_name();
3634    function_entry *entry;
3635    int i;
3636
3637    /* Filter out intrinsics */
3638    if (!strcmp("__intrinsic_atomic_read", callee) ||
3639        !strcmp("__intrinsic_atomic_increment", callee) ||
3640        !strcmp("__intrinsic_atomic_predecrement", callee) ||
3641        !strcmp("__intrinsic_atomic_add", callee) ||
3642        !strcmp("__intrinsic_atomic_sub", callee) ||
3643        !strcmp("__intrinsic_atomic_min", callee) ||
3644        !strcmp("__intrinsic_atomic_max", callee) ||
3645        !strcmp("__intrinsic_atomic_and", callee) ||
3646        !strcmp("__intrinsic_atomic_or", callee) ||
3647        !strcmp("__intrinsic_atomic_xor", callee) ||
3648        !strcmp("__intrinsic_atomic_exchange", callee) ||
3649        !strcmp("__intrinsic_atomic_comp_swap", callee)) {
3650       visit_atomic_counter_intrinsic(ir);
3651       return;
3652    }
3653
3654    if (!strcmp("__intrinsic_load_ssbo", callee) ||
3655        !strcmp("__intrinsic_store_ssbo", callee) ||
3656        !strcmp("__intrinsic_atomic_add_ssbo", callee) ||
3657        !strcmp("__intrinsic_atomic_min_ssbo", callee) ||
3658        !strcmp("__intrinsic_atomic_max_ssbo", callee) ||
3659        !strcmp("__intrinsic_atomic_and_ssbo", callee) ||
3660        !strcmp("__intrinsic_atomic_or_ssbo", callee) ||
3661        !strcmp("__intrinsic_atomic_xor_ssbo", callee) ||
3662        !strcmp("__intrinsic_atomic_exchange_ssbo", callee) ||
3663        !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
3664       visit_ssbo_intrinsic(ir);
3665       return;
3666    }
3667
3668    if (!strcmp("__intrinsic_memory_barrier", callee) ||
3669        !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
3670        !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
3671        !strcmp("__intrinsic_memory_barrier_image", callee) ||
3672        !strcmp("__intrinsic_memory_barrier_shared", callee) ||
3673        !strcmp("__intrinsic_group_memory_barrier", callee)) {
3674       visit_membar_intrinsic(ir);
3675       return;
3676    }
3677
3678    if (!strcmp("__intrinsic_load_shared", callee) ||
3679        !strcmp("__intrinsic_store_shared", callee) ||
3680        !strcmp("__intrinsic_atomic_add_shared", callee) ||
3681        !strcmp("__intrinsic_atomic_min_shared", callee) ||
3682        !strcmp("__intrinsic_atomic_max_shared", callee) ||
3683        !strcmp("__intrinsic_atomic_and_shared", callee) ||
3684        !strcmp("__intrinsic_atomic_or_shared", callee) ||
3685        !strcmp("__intrinsic_atomic_xor_shared", callee) ||
3686        !strcmp("__intrinsic_atomic_exchange_shared", callee) ||
3687        !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
3688       visit_shared_intrinsic(ir);
3689       return;
3690    }
3691
3692    if (!strcmp("__intrinsic_image_load", callee) ||
3693        !strcmp("__intrinsic_image_store", callee) ||
3694        !strcmp("__intrinsic_image_atomic_add", callee) ||
3695        !strcmp("__intrinsic_image_atomic_min", callee) ||
3696        !strcmp("__intrinsic_image_atomic_max", callee) ||
3697        !strcmp("__intrinsic_image_atomic_and", callee) ||
3698        !strcmp("__intrinsic_image_atomic_or", callee) ||
3699        !strcmp("__intrinsic_image_atomic_xor", callee) ||
3700        !strcmp("__intrinsic_image_atomic_exchange", callee) ||
3701        !strcmp("__intrinsic_image_atomic_comp_swap", callee) ||
3702        !strcmp("__intrinsic_image_size", callee) ||
3703        !strcmp("__intrinsic_image_samples", callee)) {
3704       visit_image_intrinsic(ir);
3705       return;
3706    }
3707
3708    entry = get_function_signature(sig);
3709    /* Process in parameters. */
3710    foreach_two_lists(formal_node, &sig->parameters,
3711                      actual_node, &ir->actual_parameters) {
3712       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3713       ir_variable *param = (ir_variable *) formal_node;
3714
3715       if (param->data.mode == ir_var_function_in ||
3716           param->data.mode == ir_var_function_inout) {
3717          variable_storage *storage = find_variable_storage(param);
3718          assert(storage);
3719
3720          param_rval->accept(this);
3721          st_src_reg r = this->result;
3722
3723          st_dst_reg l;
3724          l.file = storage->file;
3725          l.index = storage->index;
3726          l.reladdr = NULL;
3727          l.writemask = WRITEMASK_XYZW;
3728
3729          for (i = 0; i < type_size(param->type); i++) {
3730             emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3731             l.index++;
3732             r.index++;
3733          }
3734       }
3735    }
3736
3737    /* Emit call instruction */
3738    call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
3739    call_inst->function = entry;
3740
3741    /* Process out parameters. */
3742    foreach_two_lists(formal_node, &sig->parameters,
3743                      actual_node, &ir->actual_parameters) {
3744       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3745       ir_variable *param = (ir_variable *) formal_node;
3746
3747       if (param->data.mode == ir_var_function_out ||
3748           param->data.mode == ir_var_function_inout) {
3749          variable_storage *storage = find_variable_storage(param);
3750          assert(storage);
3751
3752          st_src_reg r;
3753          r.file = storage->file;
3754          r.index = storage->index;
3755          r.reladdr = NULL;
3756          r.swizzle = SWIZZLE_NOOP;
3757          r.negate = 0;
3758
3759          param_rval->accept(this);
3760          st_dst_reg l = st_dst_reg(this->result);
3761
3762          for (i = 0; i < type_size(param->type); i++) {
3763             emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3764             l.index++;
3765             r.index++;
3766          }
3767       }
3768    }
3769
3770    /* Process return value. */
3771    this->result = entry->return_reg;
3772 }
3773
3774 void
3775 glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head,
3776                                          ir_dereference *tail,
3777                                          unsigned *array_elements,
3778                                          unsigned *base,
3779                                          unsigned *index,
3780                                          st_src_reg *indirect,
3781                                          unsigned *location)
3782 {
3783    switch (tail->ir_type) {
3784    case ir_type_dereference_record: {
3785       ir_dereference_record *deref_record = tail->as_dereference_record();
3786       const glsl_type *struct_type = deref_record->record->type;
3787       int field_index = deref_record->record->type->field_index(deref_record->field);
3788
3789       calc_deref_offsets(head, deref_record->record->as_dereference(), array_elements, base, index, indirect, location);
3790
3791       assert(field_index >= 0);
3792       *location += struct_type->record_location_offset(field_index);
3793       break;
3794    }
3795
3796    case ir_type_dereference_array: {
3797       ir_dereference_array *deref_arr = tail->as_dereference_array();
3798       ir_constant *array_index = deref_arr->array_index->constant_expression_value();
3799
3800       if (!array_index) {
3801          st_src_reg temp_reg;
3802          st_dst_reg temp_dst;
3803
3804          temp_reg = get_temp(glsl_type::uint_type);
3805          temp_dst = st_dst_reg(temp_reg);
3806          temp_dst.writemask = 1;
3807
3808          deref_arr->array_index->accept(this);
3809          if (*array_elements != 1)
3810             emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements));
3811          else
3812             emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result);
3813
3814          if (indirect->file == PROGRAM_UNDEFINED)
3815             *indirect = temp_reg;
3816          else {
3817             temp_dst = st_dst_reg(*indirect);
3818             temp_dst.writemask = 1;
3819             emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg);
3820          }
3821       } else
3822          *index += array_index->value.u[0] * *array_elements;
3823
3824       *array_elements *= deref_arr->array->type->length;
3825
3826       calc_deref_offsets(head, deref_arr->array->as_dereference(), array_elements, base, index, indirect, location);
3827       break;
3828    }
3829    default:
3830       break;
3831    }
3832 }
3833
3834 void
3835 glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
3836                                         unsigned *array_size,
3837                                         unsigned *base,
3838                                         unsigned *index,
3839                                         st_src_reg *reladdr)
3840 {
3841    GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
3842    unsigned location = 0;
3843    ir_variable *var = ir->variable_referenced();
3844
3845    memset(reladdr, 0, sizeof(*reladdr));
3846    reladdr->file = PROGRAM_UNDEFINED;
3847
3848    *base = 0;
3849    *array_size = 1;
3850
3851    assert(var);
3852    location = var->data.location;
3853    calc_deref_offsets(ir, ir, array_size, base, index, reladdr, &location);
3854
3855    /*
3856     * If we end up with no indirect then adjust the base to the index,
3857     * and set the array size to 1.
3858     */
3859    if (reladdr->file == PROGRAM_UNDEFINED) {
3860       *base = *index;
3861       *array_size = 1;
3862    }
3863
3864    if (location != 0xffffffff) {
3865       *base += this->shader_program->UniformStorage[location].opaque[shader].index;
3866       *index += this->shader_program->UniformStorage[location].opaque[shader].index;
3867    }
3868 }
3869
3870 void
3871 glsl_to_tgsi_visitor::visit(ir_texture *ir)
3872 {
3873    st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
3874    st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
3875    st_src_reg levels_src, reladdr;
3876    st_dst_reg result_dst, coord_dst, cube_sc_dst;
3877    glsl_to_tgsi_instruction *inst = NULL;
3878    unsigned opcode = TGSI_OPCODE_NOP;
3879    const glsl_type *sampler_type = ir->sampler->type;
3880    unsigned sampler_array_size = 1, sampler_index = 0, sampler_base = 0;
3881    bool is_cube_array = false;
3882    unsigned i;
3883
3884    /* if we are a cube array sampler */
3885    if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
3886         sampler_type->sampler_array)) {
3887       is_cube_array = true;
3888    }
3889
3890    if (ir->coordinate) {
3891       ir->coordinate->accept(this);
3892
3893       /* Put our coords in a temp.  We'll need to modify them for shadow,
3894        * projection, or LOD, so the only case we'd use it as is is if
3895        * we're doing plain old texturing.  The optimization passes on
3896        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
3897        */
3898       coord = get_temp(glsl_type::vec4_type);
3899       coord_dst = st_dst_reg(coord);
3900       coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
3901       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3902    }
3903
3904    if (ir->projector) {
3905       ir->projector->accept(this);
3906       projector = this->result;
3907    }
3908
3909    /* Storage for our result.  Ideally for an assignment we'd be using
3910     * the actual storage for the result here, instead.
3911     */
3912    result_src = get_temp(ir->type);
3913    result_dst = st_dst_reg(result_src);
3914
3915    switch (ir->op) {
3916    case ir_tex:
3917       opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
3918       if (ir->offset) {
3919          ir->offset->accept(this);
3920          offset[0] = this->result;
3921       }
3922       break;
3923    case ir_txb:
3924       if (is_cube_array ||
3925           sampler_type == glsl_type::samplerCubeShadow_type) {
3926          opcode = TGSI_OPCODE_TXB2;
3927       }
3928       else {
3929          opcode = TGSI_OPCODE_TXB;
3930       }
3931       ir->lod_info.bias->accept(this);
3932       lod_info = this->result;
3933       if (ir->offset) {
3934          ir->offset->accept(this);
3935          offset[0] = this->result;
3936       }
3937       break;
3938    case ir_txl:
3939       opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
3940       ir->lod_info.lod->accept(this);
3941       lod_info = this->result;
3942       if (ir->offset) {
3943          ir->offset->accept(this);
3944          offset[0] = this->result;
3945       }
3946       break;
3947    case ir_txd:
3948       opcode = TGSI_OPCODE_TXD;
3949       ir->lod_info.grad.dPdx->accept(this);
3950       dx = this->result;
3951       ir->lod_info.grad.dPdy->accept(this);
3952       dy = this->result;
3953       if (ir->offset) {
3954          ir->offset->accept(this);
3955          offset[0] = this->result;
3956       }
3957       break;
3958    case ir_txs:
3959       opcode = TGSI_OPCODE_TXQ;
3960       ir->lod_info.lod->accept(this);
3961       lod_info = this->result;
3962       break;
3963    case ir_query_levels:
3964       opcode = TGSI_OPCODE_TXQ;
3965       lod_info = undef_src;
3966       levels_src = get_temp(ir->type);
3967       break;
3968    case ir_txf:
3969       opcode = TGSI_OPCODE_TXF;
3970       ir->lod_info.lod->accept(this);
3971       lod_info = this->result;
3972       if (ir->offset) {
3973          ir->offset->accept(this);
3974          offset[0] = this->result;
3975       }
3976       break;
3977    case ir_txf_ms:
3978       opcode = TGSI_OPCODE_TXF;
3979       ir->lod_info.sample_index->accept(this);
3980       sample_index = this->result;
3981       break;
3982    case ir_tg4:
3983       opcode = TGSI_OPCODE_TG4;
3984       ir->lod_info.component->accept(this);
3985       component = this->result;
3986       if (ir->offset) {
3987          ir->offset->accept(this);
3988          if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
3989             const glsl_type *elt_type = ir->offset->type->fields.array;
3990             for (i = 0; i < ir->offset->type->length; i++) {
3991                offset[i] = this->result;
3992                offset[i].index += i * type_size(elt_type);
3993                offset[i].type = elt_type->base_type;
3994                offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
3995             }
3996          } else {
3997             offset[0] = this->result;
3998          }
3999       }
4000       break;
4001    case ir_lod:
4002       opcode = TGSI_OPCODE_LODQ;
4003       break;
4004    case ir_texture_samples:
4005       opcode = TGSI_OPCODE_TXQS;
4006       break;
4007    case ir_samples_identical:
4008       unreachable("Unexpected ir_samples_identical opcode");
4009    }
4010
4011    if (ir->projector) {
4012       if (opcode == TGSI_OPCODE_TEX) {
4013          /* Slot the projector in as the last component of the coord. */
4014          coord_dst.writemask = WRITEMASK_W;
4015          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
4016          coord_dst.writemask = WRITEMASK_XYZW;
4017          opcode = TGSI_OPCODE_TXP;
4018       } else {
4019          st_src_reg coord_w = coord;
4020          coord_w.swizzle = SWIZZLE_WWWW;
4021
4022          /* For the other TEX opcodes there's no projective version
4023           * since the last slot is taken up by LOD info.  Do the
4024           * projective divide now.
4025           */
4026          coord_dst.writemask = WRITEMASK_W;
4027          emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
4028
4029          /* In the case where we have to project the coordinates "by hand,"
4030           * the shadow comparator value must also be projected.
4031           */
4032          st_src_reg tmp_src = coord;
4033          if (ir->shadow_comparitor) {
4034             /* Slot the shadow value in as the second to last component of the
4035              * coord.
4036              */
4037             ir->shadow_comparitor->accept(this);
4038
4039             tmp_src = get_temp(glsl_type::vec4_type);
4040             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
4041
4042             /* Projective division not allowed for array samplers. */
4043             assert(!sampler_type->sampler_array);
4044
4045             tmp_dst.writemask = WRITEMASK_Z;
4046             emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
4047
4048             tmp_dst.writemask = WRITEMASK_XY;
4049             emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
4050          }
4051
4052          coord_dst.writemask = WRITEMASK_XYZ;
4053          emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
4054
4055          coord_dst.writemask = WRITEMASK_XYZW;
4056          coord.swizzle = SWIZZLE_XYZW;
4057       }
4058    }
4059
4060    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
4061     * comparator was put in the correct place (and projected) by the code,
4062     * above, that handles by-hand projection.
4063     */
4064    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
4065       /* Slot the shadow value in as the second to last component of the
4066        * coord.
4067        */
4068       ir->shadow_comparitor->accept(this);
4069
4070       if (is_cube_array) {
4071          cube_sc = get_temp(glsl_type::float_type);
4072          cube_sc_dst = st_dst_reg(cube_sc);
4073          cube_sc_dst.writemask = WRITEMASK_X;
4074          emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
4075          cube_sc_dst.writemask = WRITEMASK_X;
4076       }
4077       else {
4078          if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
4079               sampler_type->sampler_array) ||
4080              sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
4081             coord_dst.writemask = WRITEMASK_W;
4082          } else {
4083             coord_dst.writemask = WRITEMASK_Z;
4084          }
4085          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4086          coord_dst.writemask = WRITEMASK_XYZW;
4087       }
4088    }
4089
4090    if (ir->op == ir_txf_ms) {
4091       coord_dst.writemask = WRITEMASK_W;
4092       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
4093       coord_dst.writemask = WRITEMASK_XYZW;
4094    } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
4095        opcode == TGSI_OPCODE_TXF) {
4096       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
4097       coord_dst.writemask = WRITEMASK_W;
4098       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
4099       coord_dst.writemask = WRITEMASK_XYZW;
4100    }
4101
4102    get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
4103                      &sampler_index, &reladdr);
4104    if (reladdr.file != PROGRAM_UNDEFINED)
4105       emit_arl(ir, sampler_reladdr, reladdr);
4106
4107    if (opcode == TGSI_OPCODE_TXD)
4108       inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
4109    else if (opcode == TGSI_OPCODE_TXQ) {
4110       if (ir->op == ir_query_levels) {
4111          /* the level is stored in W */
4112          inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
4113          result_dst.writemask = WRITEMASK_X;
4114          levels_src.swizzle = SWIZZLE_WWWW;
4115          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
4116       } else
4117          inst = emit_asm(ir, opcode, result_dst, lod_info);
4118    } else if (opcode == TGSI_OPCODE_TXQS) {
4119       inst = emit_asm(ir, opcode, result_dst);
4120    } else if (opcode == TGSI_OPCODE_TXF) {
4121       inst = emit_asm(ir, opcode, result_dst, coord);
4122    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
4123       inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
4124    } else if (opcode == TGSI_OPCODE_TEX2) {
4125       inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4126    } else if (opcode == TGSI_OPCODE_TG4) {
4127       if (is_cube_array && ir->shadow_comparitor) {
4128          inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4129       } else {
4130          inst = emit_asm(ir, opcode, result_dst, coord, component);
4131       }
4132    } else
4133       inst = emit_asm(ir, opcode, result_dst, coord);
4134
4135    if (ir->shadow_comparitor)
4136       inst->tex_shadow = GL_TRUE;
4137
4138    inst->sampler.index = sampler_index;
4139    inst->sampler_array_size = sampler_array_size;
4140    inst->sampler_base = sampler_base;
4141
4142    if (reladdr.file != PROGRAM_UNDEFINED) {
4143       inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
4144       memcpy(inst->sampler.reladdr, &reladdr, sizeof(reladdr));
4145    }
4146
4147    if (ir->offset) {
4148       for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
4149          inst->tex_offsets[i] = offset[i];
4150       inst->tex_offset_num_offset = i;
4151    }
4152
4153    switch (sampler_type->sampler_dimensionality) {
4154    case GLSL_SAMPLER_DIM_1D:
4155       inst->tex_target = (sampler_type->sampler_array)
4156          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
4157       break;
4158    case GLSL_SAMPLER_DIM_2D:
4159       inst->tex_target = (sampler_type->sampler_array)
4160          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
4161       break;
4162    case GLSL_SAMPLER_DIM_3D:
4163       inst->tex_target = TEXTURE_3D_INDEX;
4164       break;
4165    case GLSL_SAMPLER_DIM_CUBE:
4166       inst->tex_target = (sampler_type->sampler_array)
4167          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
4168       break;
4169    case GLSL_SAMPLER_DIM_RECT:
4170       inst->tex_target = TEXTURE_RECT_INDEX;
4171       break;
4172    case GLSL_SAMPLER_DIM_BUF:
4173       inst->tex_target = TEXTURE_BUFFER_INDEX;
4174       break;
4175    case GLSL_SAMPLER_DIM_EXTERNAL:
4176       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
4177       break;
4178    case GLSL_SAMPLER_DIM_MS:
4179       inst->tex_target = (sampler_type->sampler_array)
4180          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
4181       break;
4182    default:
4183       assert(!"Should not get here.");
4184    }
4185
4186    inst->tex_type = ir->type->base_type;
4187
4188    this->result = result_src;
4189 }
4190
4191 void
4192 glsl_to_tgsi_visitor::visit(ir_return *ir)
4193 {
4194    if (ir->get_value()) {
4195       st_dst_reg l;
4196       int i;
4197
4198       assert(current_function);
4199
4200       ir->get_value()->accept(this);
4201       st_src_reg r = this->result;
4202
4203       l = st_dst_reg(current_function->return_reg);
4204
4205       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
4206          emit_asm(ir, TGSI_OPCODE_MOV, l, r);
4207          l.index++;
4208          r.index++;
4209       }
4210    }
4211
4212    emit_asm(ir, TGSI_OPCODE_RET);
4213 }
4214
4215 void
4216 glsl_to_tgsi_visitor::visit(ir_discard *ir)
4217 {
4218    if (ir->condition) {
4219       ir->condition->accept(this);
4220       st_src_reg condition = this->result;
4221
4222       /* Convert the bool condition to a float so we can negate. */
4223       if (native_integers) {
4224          st_src_reg temp = get_temp(ir->condition->type);
4225          emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
4226               condition, st_src_reg_for_float(1.0));
4227          condition = temp;
4228       }
4229
4230       condition.negate = ~condition.negate;
4231       emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
4232    } else {
4233       /* unconditional kil */
4234       emit_asm(ir, TGSI_OPCODE_KILL);
4235    }
4236 }
4237
4238 void
4239 glsl_to_tgsi_visitor::visit(ir_if *ir)
4240 {
4241    unsigned if_opcode;
4242    glsl_to_tgsi_instruction *if_inst;
4243
4244    ir->condition->accept(this);
4245    assert(this->result.file != PROGRAM_UNDEFINED);
4246
4247    if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
4248
4249    if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
4250
4251    this->instructions.push_tail(if_inst);
4252
4253    visit_exec_list(&ir->then_instructions, this);
4254
4255    if (!ir->else_instructions.is_empty()) {
4256       emit_asm(ir->condition, TGSI_OPCODE_ELSE);
4257       visit_exec_list(&ir->else_instructions, this);
4258    }
4259
4260    if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
4261 }
4262
4263
4264 void
4265 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
4266 {
4267    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4268
4269    ir->stream->accept(this);
4270    emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
4271 }
4272
4273 void
4274 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
4275 {
4276    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4277
4278    ir->stream->accept(this);
4279    emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
4280 }
4281
4282 void
4283 glsl_to_tgsi_visitor::visit(ir_barrier *ir)
4284 {
4285    assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
4286           this->prog->Target == GL_COMPUTE_PROGRAM_NV);
4287
4288    emit_asm(ir, TGSI_OPCODE_BARRIER);
4289 }
4290
4291 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
4292 {
4293    STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS);
4294
4295    result.file = PROGRAM_UNDEFINED;
4296    next_temp = 1;
4297    array_sizes = NULL;
4298    max_num_arrays = 0;
4299    next_array = 0;
4300    num_input_arrays = 0;
4301    num_output_arrays = 0;
4302    next_signature_id = 1;
4303    num_immediates = 0;
4304    current_function = NULL;
4305    num_address_regs = 0;
4306    samplers_used = 0;
4307    buffers_used = 0;
4308    images_used = 0;
4309    indirect_addr_consts = false;
4310    wpos_transform_const = -1;
4311    glsl_version = 0;
4312    native_integers = false;
4313    mem_ctx = ralloc_context(NULL);
4314    ctx = NULL;
4315    prog = NULL;
4316    shader_program = NULL;
4317    shader = NULL;
4318    options = NULL;
4319    have_sqrt = false;
4320    have_fma = false;
4321    use_shared_memory = false;
4322 }
4323
4324 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
4325 {
4326    free(array_sizes);
4327    ralloc_free(mem_ctx);
4328 }
4329
4330 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
4331 {
4332    delete v;
4333 }
4334
4335
4336 /**
4337  * Count resources used by the given gpu program (number of texture
4338  * samplers, etc).
4339  */
4340 static void
4341 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
4342 {
4343    v->samplers_used = 0;
4344    v->buffers_used = 0;
4345    v->images_used = 0;
4346
4347    foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
4348       if (inst->info->is_tex) {
4349          for (int i = 0; i < inst->sampler_array_size; i++) {
4350             unsigned idx = inst->sampler_base + i;
4351             v->samplers_used |= 1u << idx;
4352
4353             debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
4354             v->sampler_types[idx] = inst->tex_type;
4355             v->sampler_targets[idx] =
4356                st_translate_texture_target(inst->tex_target, inst->tex_shadow);
4357
4358             if (inst->tex_shadow) {
4359                prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
4360             }
4361          }
4362       }
4363       if (inst->buffer.file != PROGRAM_UNDEFINED && (
4364                 is_resource_instruction(inst->op) ||
4365                 inst->op == TGSI_OPCODE_STORE)) {
4366          if (inst->buffer.file == PROGRAM_BUFFER) {
4367             v->buffers_used |= 1 << inst->buffer.index;
4368          } else if (inst->buffer.file == PROGRAM_MEMORY) {
4369             v->use_shared_memory = true;
4370          } else {
4371             assert(inst->buffer.file == PROGRAM_IMAGE);
4372             for (int i = 0; i < inst->sampler_array_size; i++) {
4373                unsigned idx = inst->sampler_base + i;
4374                v->images_used |= 1 << idx;
4375                v->image_targets[idx] =
4376                   st_translate_texture_target(inst->tex_target, false);
4377                v->image_formats[idx] = inst->image_format;
4378             }
4379          }
4380       }
4381    }
4382    prog->SamplersUsed = v->samplers_used;
4383
4384    if (v->shader_program != NULL)
4385       _mesa_update_shader_textures_used(v->shader_program, prog);
4386 }
4387
4388 /**
4389  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
4390  * are read from the given src in this instruction
4391  */
4392 static int
4393 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
4394 {
4395    int read_mask = 0, comp;
4396
4397    /* Now, given the src swizzle and the written channels, find which
4398     * components are actually read
4399     */
4400    for (comp = 0; comp < 4; ++comp) {
4401       const unsigned coord = GET_SWZ(src.swizzle, comp);
4402       assert(coord < 4);
4403       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
4404          read_mask |= 1 << coord;
4405    }
4406
4407    return read_mask;
4408 }
4409
4410 /**
4411  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
4412  * instruction is the first instruction to write to register T0.  There are
4413  * several lowering passes done in GLSL IR (e.g. branches and
4414  * relative addressing) that create a large number of conditional assignments
4415  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
4416  *
4417  * Here is why this conversion is safe:
4418  * CMP T0, T1 T2 T0 can be expanded to:
4419  * if (T1 < 0.0)
4420  *   MOV T0, T2;
4421  * else
4422  *   MOV T0, T0;
4423  *
4424  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
4425  * as the original program.  If (T1 < 0.0) evaluates to false, executing
4426  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
4427  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
4428  * because any instruction that was going to read from T0 after this was going
4429  * to read a garbage value anyway.
4430  */
4431 void
4432 glsl_to_tgsi_visitor::simplify_cmp(void)
4433 {
4434    int tempWritesSize = 0;
4435    unsigned *tempWrites = NULL;
4436    unsigned outputWrites[VARYING_SLOT_TESS_MAX];
4437
4438    memset(outputWrites, 0, sizeof(outputWrites));
4439
4440    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4441       unsigned prevWriteMask = 0;
4442
4443       /* Give up if we encounter relative addressing or flow control. */
4444       if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
4445           inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
4446           tgsi_get_opcode_info(inst->op)->is_branch ||
4447           inst->op == TGSI_OPCODE_BGNSUB ||
4448           inst->op == TGSI_OPCODE_CONT ||
4449           inst->op == TGSI_OPCODE_END ||
4450           inst->op == TGSI_OPCODE_ENDSUB ||
4451           inst->op == TGSI_OPCODE_RET) {
4452          break;
4453       }
4454
4455       if (inst->dst[0].file == PROGRAM_OUTPUT) {
4456          assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
4457          prevWriteMask = outputWrites[inst->dst[0].index];
4458          outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4459       } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
4460          if (inst->dst[0].index >= tempWritesSize) {
4461             const int inc = 4096;
4462
4463             tempWrites = (unsigned*)
4464                          realloc(tempWrites,
4465                                  (tempWritesSize + inc) * sizeof(unsigned));
4466             if (!tempWrites)
4467                return;
4468
4469             memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
4470             tempWritesSize += inc;
4471          }
4472
4473          prevWriteMask = tempWrites[inst->dst[0].index];
4474          tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4475       } else
4476          continue;
4477
4478       /* For a CMP to be considered a conditional write, the destination
4479        * register and source register two must be the same. */
4480       if (inst->op == TGSI_OPCODE_CMP
4481           && !(inst->dst[0].writemask & prevWriteMask)
4482           && inst->src[2].file == inst->dst[0].file
4483           && inst->src[2].index == inst->dst[0].index
4484           && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
4485
4486          inst->op = TGSI_OPCODE_MOV;
4487          inst->src[0] = inst->src[1];
4488       }
4489    }
4490
4491    free(tempWrites);
4492 }
4493
4494 /* Replaces all references to a temporary register index with another index. */
4495 void
4496 glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
4497 {
4498    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4499       unsigned j;
4500       int k;
4501       for (j = 0; j < num_inst_src_regs(inst); j++) {
4502          if (inst->src[j].file == PROGRAM_TEMPORARY)
4503             for (k = 0; k < num_renames; k++)
4504                if (inst->src[j].index == renames[k].old_reg)
4505                   inst->src[j].index = renames[k].new_reg;
4506       }
4507
4508       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4509          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4510             for (k = 0; k < num_renames; k++)
4511                if (inst->tex_offsets[j].index == renames[k].old_reg)
4512                   inst->tex_offsets[j].index = renames[k].new_reg;
4513       }
4514
4515       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4516          if (inst->dst[j].file == PROGRAM_TEMPORARY)
4517              for (k = 0; k < num_renames; k++)
4518                 if (inst->dst[j].index == renames[k].old_reg)
4519                    inst->dst[j].index = renames[k].new_reg;
4520       }
4521    }
4522 }
4523
4524 void
4525 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
4526 {
4527    int depth = 0; /* loop depth */
4528    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4529    unsigned i = 0, j;
4530
4531    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4532       for (j = 0; j < num_inst_src_regs(inst); j++) {
4533          if (inst->src[j].file == PROGRAM_TEMPORARY) {
4534             if (first_reads[inst->src[j].index] == -1)
4535                 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
4536          }
4537       }
4538       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4539          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
4540             if (first_reads[inst->tex_offsets[j].index] == -1)
4541                first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
4542          }
4543       }
4544       if (inst->op == TGSI_OPCODE_BGNLOOP) {
4545          if(depth++ == 0)
4546             loop_start = i;
4547       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4548          if (--depth == 0)
4549             loop_start = -1;
4550       }
4551       assert(depth >= 0);
4552       i++;
4553    }
4554 }
4555
4556 void
4557 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
4558 {
4559    int depth = 0; /* loop depth */
4560    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4561    unsigned i = 0, j;
4562    int k;
4563    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4564       for (j = 0; j < num_inst_src_regs(inst); j++) {
4565          if (inst->src[j].file == PROGRAM_TEMPORARY)
4566             last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
4567       }
4568       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4569          if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4570             if (first_writes[inst->dst[j].index] == -1)
4571                first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
4572             last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
4573          }
4574       }
4575       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4576          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4577             last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
4578       }
4579       if (inst->op == TGSI_OPCODE_BGNLOOP) {
4580          if(depth++ == 0)
4581             loop_start = i;
4582       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4583          if (--depth == 0) {
4584             loop_start = -1;
4585             for (k = 0; k < this->next_temp; k++) {
4586                if (last_reads[k] == -2) {
4587                   last_reads[k] = i;
4588                }
4589             }
4590          }
4591       }
4592       assert(depth >= 0);
4593       i++;
4594    }
4595 }
4596
4597 void
4598 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
4599 {
4600    int depth = 0; /* loop depth */
4601    int i = 0, k;
4602    unsigned j;
4603
4604    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4605       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4606          if (inst->dst[j].file == PROGRAM_TEMPORARY)
4607             last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
4608       }
4609
4610       if (inst->op == TGSI_OPCODE_BGNLOOP)
4611          depth++;
4612       else if (inst->op == TGSI_OPCODE_ENDLOOP)
4613          if (--depth == 0) {
4614             for (k = 0; k < this->next_temp; k++) {
4615                if (last_writes[k] == -2) {
4616                   last_writes[k] = i;
4617                }
4618             }
4619          }
4620       assert(depth >= 0);
4621       i++;
4622    }
4623 }
4624
4625 /*
4626  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
4627  * channels for copy propagation and updates following instructions to
4628  * use the original versions.
4629  *
4630  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4631  * will occur.  As an example, a TXP production before this pass:
4632  *
4633  * 0: MOV TEMP[1], INPUT[4].xyyy;
4634  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4635  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
4636  *
4637  * and after:
4638  *
4639  * 0: MOV TEMP[1], INPUT[4].xyyy;
4640  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4641  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4642  *
4643  * which allows for dead code elimination on TEMP[1]'s writes.
4644  */
4645 void
4646 glsl_to_tgsi_visitor::copy_propagate(void)
4647 {
4648    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
4649                                                   glsl_to_tgsi_instruction *,
4650                                                   this->next_temp * 4);
4651    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4652    int level = 0;
4653
4654    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4655       assert(inst->dst[0].file != PROGRAM_TEMPORARY
4656              || inst->dst[0].index < this->next_temp);
4657
4658       /* First, do any copy propagation possible into the src regs. */
4659       for (int r = 0; r < 3; r++) {
4660          glsl_to_tgsi_instruction *first = NULL;
4661          bool good = true;
4662          int acp_base = inst->src[r].index * 4;
4663
4664          if (inst->src[r].file != PROGRAM_TEMPORARY ||
4665              inst->src[r].reladdr ||
4666              inst->src[r].reladdr2)
4667             continue;
4668
4669          /* See if we can find entries in the ACP consisting of MOVs
4670           * from the same src register for all the swizzled channels
4671           * of this src register reference.
4672           */
4673          for (int i = 0; i < 4; i++) {
4674             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4675             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
4676
4677             if (!copy_chan) {
4678                good = false;
4679                break;
4680             }
4681
4682             assert(acp_level[acp_base + src_chan] <= level);
4683
4684             if (!first) {
4685                first = copy_chan;
4686             } else {
4687                if (first->src[0].file != copy_chan->src[0].file ||
4688                    first->src[0].index != copy_chan->src[0].index ||
4689                    first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
4690                    first->src[0].index2D != copy_chan->src[0].index2D) {
4691                   good = false;
4692                   break;
4693                }
4694             }
4695          }
4696
4697          if (good) {
4698             /* We've now validated that we can copy-propagate to
4699              * replace this src register reference.  Do it.
4700              */
4701             inst->src[r].file = first->src[0].file;
4702             inst->src[r].index = first->src[0].index;
4703             inst->src[r].index2D = first->src[0].index2D;
4704             inst->src[r].has_index2 = first->src[0].has_index2;
4705             inst->src[r].double_reg2 = first->src[0].double_reg2;
4706             inst->src[r].array_id = first->src[0].array_id;
4707
4708             int swizzle = 0;
4709             for (int i = 0; i < 4; i++) {
4710                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4711                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
4712                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
4713             }
4714             inst->src[r].swizzle = swizzle;
4715          }
4716       }
4717
4718       switch (inst->op) {
4719       case TGSI_OPCODE_BGNLOOP:
4720       case TGSI_OPCODE_ENDLOOP:
4721          /* End of a basic block, clear the ACP entirely. */
4722          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4723          break;
4724
4725       case TGSI_OPCODE_IF:
4726       case TGSI_OPCODE_UIF:
4727          ++level;
4728          break;
4729
4730       case TGSI_OPCODE_ENDIF:
4731       case TGSI_OPCODE_ELSE:
4732          /* Clear all channels written inside the block from the ACP, but
4733           * leaving those that were not touched.
4734           */
4735          for (int r = 0; r < this->next_temp; r++) {
4736             for (int c = 0; c < 4; c++) {
4737                if (!acp[4 * r + c])
4738                   continue;
4739
4740                if (acp_level[4 * r + c] >= level)
4741                   acp[4 * r + c] = NULL;
4742             }
4743          }
4744          if (inst->op == TGSI_OPCODE_ENDIF)
4745             --level;
4746          break;
4747
4748       default:
4749          /* Continuing the block, clear any written channels from
4750           * the ACP.
4751           */
4752          for (int d = 0; d < 2; d++) {
4753             if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
4754                /* Any temporary might be written, so no copy propagation
4755                 * across this instruction.
4756                 */
4757                memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4758             } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
4759                        inst->dst[d].reladdr) {
4760                /* Any output might be written, so no copy propagation
4761                 * from outputs across this instruction.
4762                 */
4763                for (int r = 0; r < this->next_temp; r++) {
4764                   for (int c = 0; c < 4; c++) {
4765                      if (!acp[4 * r + c])
4766                         continue;
4767
4768                      if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
4769                         acp[4 * r + c] = NULL;
4770                   }
4771                }
4772             } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
4773                        inst->dst[d].file == PROGRAM_OUTPUT) {
4774                /* Clear where it's used as dst. */
4775                if (inst->dst[d].file == PROGRAM_TEMPORARY) {
4776                   for (int c = 0; c < 4; c++) {
4777                      if (inst->dst[d].writemask & (1 << c))
4778                         acp[4 * inst->dst[d].index + c] = NULL;
4779                   }
4780                }
4781
4782                /* Clear where it's used as src. */
4783                for (int r = 0; r < this->next_temp; r++) {
4784                   for (int c = 0; c < 4; c++) {
4785                      if (!acp[4 * r + c])
4786                         continue;
4787
4788                      int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
4789
4790                      if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
4791                          acp[4 * r + c]->src[0].index == inst->dst[d].index &&
4792                          inst->dst[d].writemask & (1 << src_chan)) {
4793                         acp[4 * r + c] = NULL;
4794                      }
4795                   }
4796                }
4797             }
4798          }
4799          break;
4800       }
4801
4802       /* If this is a copy, add it to the ACP. */
4803       if (inst->op == TGSI_OPCODE_MOV &&
4804           inst->dst[0].file == PROGRAM_TEMPORARY &&
4805           !(inst->dst[0].file == inst->src[0].file &&
4806              inst->dst[0].index == inst->src[0].index) &&
4807           !inst->dst[0].reladdr &&
4808           !inst->dst[0].reladdr2 &&
4809           !inst->saturate &&
4810           inst->src[0].file != PROGRAM_ARRAY &&
4811           !inst->src[0].reladdr &&
4812           !inst->src[0].reladdr2 &&
4813           !inst->src[0].negate) {
4814          for (int i = 0; i < 4; i++) {
4815             if (inst->dst[0].writemask & (1 << i)) {
4816                acp[4 * inst->dst[0].index + i] = inst;
4817                acp_level[4 * inst->dst[0].index + i] = level;
4818             }
4819          }
4820       }
4821    }
4822
4823    ralloc_free(acp_level);
4824    ralloc_free(acp);
4825 }
4826
4827 /*
4828  * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
4829  * code elimination.
4830  *
4831  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4832  * will occur.  As an example, a TXP production after copy propagation but
4833  * before this pass:
4834  *
4835  * 0: MOV TEMP[1], INPUT[4].xyyy;
4836  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4837  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4838  *
4839  * and after this pass:
4840  *
4841  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4842  */
4843 int
4844 glsl_to_tgsi_visitor::eliminate_dead_code(void)
4845 {
4846    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
4847                                                      glsl_to_tgsi_instruction *,
4848                                                      this->next_temp * 4);
4849    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4850    int level = 0;
4851    int removed = 0;
4852
4853    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4854       assert(inst->dst[0].file != PROGRAM_TEMPORARY
4855              || inst->dst[0].index < this->next_temp);
4856
4857       switch (inst->op) {
4858       case TGSI_OPCODE_BGNLOOP:
4859       case TGSI_OPCODE_ENDLOOP:
4860       case TGSI_OPCODE_CONT:
4861       case TGSI_OPCODE_BRK:
4862          /* End of a basic block, clear the write array entirely.
4863           *
4864           * This keeps us from killing dead code when the writes are
4865           * on either side of a loop, even when the register isn't touched
4866           * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
4867           * dead code of this type, so it shouldn't make a difference as long as
4868           * the dead code elimination pass in the GLSL compiler does its job.
4869           */
4870          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4871          break;
4872
4873       case TGSI_OPCODE_ENDIF:
4874       case TGSI_OPCODE_ELSE:
4875          /* Promote the recorded level of all channels written inside the
4876           * preceding if or else block to the level above the if/else block.
4877           */
4878          for (int r = 0; r < this->next_temp; r++) {
4879             for (int c = 0; c < 4; c++) {
4880                if (!writes[4 * r + c])
4881                   continue;
4882
4883                if (write_level[4 * r + c] == level)
4884                   write_level[4 * r + c] = level-1;
4885             }
4886          }
4887          if(inst->op == TGSI_OPCODE_ENDIF)
4888             --level;
4889          break;
4890
4891       case TGSI_OPCODE_IF:
4892       case TGSI_OPCODE_UIF:
4893          ++level;
4894          /* fallthrough to default case to mark the condition as read */
4895       default:
4896          /* Continuing the block, clear any channels from the write array that
4897           * are read by this instruction.
4898           */
4899          for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
4900             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
4901                /* Any temporary might be read, so no dead code elimination
4902                 * across this instruction.
4903                 */
4904                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4905             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
4906                /* Clear where it's used as src. */
4907                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
4908                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
4909                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
4910                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
4911
4912                for (int c = 0; c < 4; c++) {
4913                   if (src_chans & (1 << c))
4914                      writes[4 * inst->src[i].index + c] = NULL;
4915                }
4916             }
4917          }
4918          for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
4919             if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
4920                /* Any temporary might be read, so no dead code elimination
4921                 * across this instruction.
4922                 */
4923                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4924             } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
4925                /* Clear where it's used as src. */
4926                int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
4927                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
4928                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
4929                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
4930
4931                for (int c = 0; c < 4; c++) {
4932                   if (src_chans & (1 << c))
4933                      writes[4 * inst->tex_offsets[i].index + c] = NULL;
4934                }
4935             }
4936          }
4937          break;
4938       }
4939
4940       /* If this instruction writes to a temporary, add it to the write array.
4941        * If there is already an instruction in the write array for one or more
4942        * of the channels, flag that channel write as dead.
4943        */
4944       for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
4945          if (inst->dst[i].file == PROGRAM_TEMPORARY &&
4946              !inst->dst[i].reladdr) {
4947             for (int c = 0; c < 4; c++) {
4948                if (inst->dst[i].writemask & (1 << c)) {
4949                   if (writes[4 * inst->dst[i].index + c]) {
4950                      if (write_level[4 * inst->dst[i].index + c] < level)
4951                         continue;
4952                      else
4953                         writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
4954                   }
4955                   writes[4 * inst->dst[i].index + c] = inst;
4956                   write_level[4 * inst->dst[i].index + c] = level;
4957                }
4958             }
4959          }
4960       }
4961    }
4962
4963    /* Anything still in the write array at this point is dead code. */
4964    for (int r = 0; r < this->next_temp; r++) {
4965       for (int c = 0; c < 4; c++) {
4966          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
4967          if (inst)
4968             inst->dead_mask |= (1 << c);
4969       }
4970    }
4971
4972    /* Now actually remove the instructions that are completely dead and update
4973     * the writemask of other instructions with dead channels.
4974     */
4975    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
4976       if (!inst->dead_mask || !inst->dst[0].writemask)
4977          continue;
4978       /* No amount of dead masks should remove memory stores */
4979       if (inst->info->is_store)
4980          continue;
4981
4982       if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
4983          inst->remove();
4984          delete inst;
4985          removed++;
4986       } else {
4987          if (inst->dst[0].type == GLSL_TYPE_DOUBLE) {
4988             if (inst->dead_mask == WRITEMASK_XY ||
4989                 inst->dead_mask == WRITEMASK_ZW)
4990                inst->dst[0].writemask &= ~(inst->dead_mask);
4991          } else
4992             inst->dst[0].writemask &= ~(inst->dead_mask);
4993       }
4994    }
4995
4996    ralloc_free(write_level);
4997    ralloc_free(writes);
4998
4999    return removed;
5000 }
5001
5002 /* merge DFRACEXP instructions into one. */
5003 void
5004 glsl_to_tgsi_visitor::merge_two_dsts(void)
5005 {
5006    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
5007       glsl_to_tgsi_instruction *inst2;
5008       bool merged;
5009       if (num_inst_dst_regs(inst) != 2)
5010          continue;
5011
5012       if (inst->dst[0].file != PROGRAM_UNDEFINED &&
5013           inst->dst[1].file != PROGRAM_UNDEFINED)
5014          continue;
5015
5016       inst2 = (glsl_to_tgsi_instruction *) inst->next;
5017       do {
5018
5019          if (inst->src[0].file == inst2->src[0].file &&
5020              inst->src[0].index == inst2->src[0].index &&
5021              inst->src[0].type == inst2->src[0].type &&
5022              inst->src[0].swizzle == inst2->src[0].swizzle)
5023             break;
5024          inst2 = (glsl_to_tgsi_instruction *) inst2->next;
5025       } while (inst2);
5026
5027       if (!inst2)
5028          continue;
5029       merged = false;
5030       if (inst->dst[0].file == PROGRAM_UNDEFINED) {
5031          merged = true;
5032          inst->dst[0] = inst2->dst[0];
5033       } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
5034          inst->dst[1] = inst2->dst[1];
5035          merged = true;
5036       }
5037
5038       if (merged) {
5039          inst2->remove();
5040          delete inst2;
5041       }
5042    }
5043 }
5044
5045 /* Merges temporary registers together where possible to reduce the number of
5046  * registers needed to run a program.
5047  *
5048  * Produces optimal code only after copy propagation and dead code elimination
5049  * have been run. */
5050 void
5051 glsl_to_tgsi_visitor::merge_registers(void)
5052 {
5053    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5054    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
5055    struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5056    int i, j;
5057    int num_renames = 0;
5058
5059    /* Read the indices of the last read and first write to each temp register
5060     * into an array so that we don't have to traverse the instruction list as
5061     * much. */
5062    for (i = 0; i < this->next_temp; i++) {
5063       last_reads[i] = -1;
5064       first_writes[i] = -1;
5065    }
5066    get_last_temp_read_first_temp_write(last_reads, first_writes);
5067
5068    /* Start looking for registers with non-overlapping usages that can be
5069     * merged together. */
5070    for (i = 0; i < this->next_temp; i++) {
5071       /* Don't touch unused registers. */
5072       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
5073
5074       for (j = 0; j < this->next_temp; j++) {
5075          /* Don't touch unused registers. */
5076          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
5077
5078          /* We can merge the two registers if the first write to j is after or
5079           * in the same instruction as the last read from i.  Note that the
5080           * register at index i will always be used earlier or at the same time
5081           * as the register at index j. */
5082          if (first_writes[i] <= first_writes[j] &&
5083              last_reads[i] <= first_writes[j]) {
5084             renames[num_renames].old_reg = j;
5085             renames[num_renames].new_reg = i;
5086             num_renames++;
5087
5088             /* Update the first_writes and last_reads arrays with the new
5089              * values for the merged register index, and mark the newly unused
5090              * register index as such. */
5091             assert(last_reads[j] >= last_reads[i]);
5092             last_reads[i] = last_reads[j];
5093             first_writes[j] = -1;
5094             last_reads[j] = -1;
5095          }
5096       }
5097    }
5098
5099    rename_temp_registers(num_renames, renames);
5100    ralloc_free(renames);
5101    ralloc_free(last_reads);
5102    ralloc_free(first_writes);
5103 }
5104
5105 /* Reassign indices to temporary registers by reusing unused indices created
5106  * by optimization passes. */
5107 void
5108 glsl_to_tgsi_visitor::renumber_registers(void)
5109 {
5110    int i = 0;
5111    int new_index = 0;
5112    int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5113    struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5114    int num_renames = 0;
5115    for (i = 0; i < this->next_temp; i++) {
5116       first_reads[i] = -1;
5117    }
5118    get_first_temp_read(first_reads);
5119
5120    for (i = 0; i < this->next_temp; i++) {
5121       if (first_reads[i] < 0) continue;
5122       if (i != new_index) {
5123          renames[num_renames].old_reg = i;
5124          renames[num_renames].new_reg = new_index;
5125          num_renames++;
5126       }
5127       new_index++;
5128    }
5129
5130    rename_temp_registers(num_renames, renames);
5131    this->next_temp = new_index;
5132    ralloc_free(renames);
5133    ralloc_free(first_reads);
5134 }
5135
5136 /* ------------------------- TGSI conversion stuff -------------------------- */
5137 struct label {
5138    unsigned branch_target;
5139    unsigned token;
5140 };
5141
5142 /**
5143  * Intermediate state used during shader translation.
5144  */
5145 struct st_translate {
5146    struct ureg_program *ureg;
5147
5148    unsigned temps_size;
5149    struct ureg_dst *temps;
5150
5151    struct ureg_dst *arrays;
5152    unsigned num_temp_arrays;
5153    struct ureg_src *constants;
5154    int num_constants;
5155    struct ureg_src *immediates;
5156    int num_immediates;
5157    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
5158    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
5159    struct ureg_dst address[3];
5160    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
5161    struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
5162    struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
5163    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
5164    struct ureg_src shared_memory;
5165    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
5166    unsigned *array_sizes;
5167    struct array_decl *input_arrays;
5168    struct array_decl *output_arrays;
5169
5170    const GLuint *inputMapping;
5171    const GLuint *outputMapping;
5172
5173    /* For every instruction that contains a label (eg CALL), keep
5174     * details so that we can go back afterwards and emit the correct
5175     * tgsi instruction number for each label.
5176     */
5177    struct label *labels;
5178    unsigned labels_size;
5179    unsigned labels_count;
5180
5181    /* Keep a record of the tgsi instruction number that each mesa
5182     * instruction starts at, will be used to fix up labels after
5183     * translation.
5184     */
5185    unsigned *insn;
5186    unsigned insn_size;
5187    unsigned insn_count;
5188
5189    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
5190
5191    boolean error;
5192 };
5193
5194 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
5195 unsigned
5196 _mesa_sysval_to_semantic(unsigned sysval)
5197 {
5198    switch (sysval) {
5199    /* Vertex shader */
5200    case SYSTEM_VALUE_VERTEX_ID:
5201       return TGSI_SEMANTIC_VERTEXID;
5202    case SYSTEM_VALUE_INSTANCE_ID:
5203       return TGSI_SEMANTIC_INSTANCEID;
5204    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
5205       return TGSI_SEMANTIC_VERTEXID_NOBASE;
5206    case SYSTEM_VALUE_BASE_VERTEX:
5207       return TGSI_SEMANTIC_BASEVERTEX;
5208    case SYSTEM_VALUE_BASE_INSTANCE:
5209       return TGSI_SEMANTIC_BASEINSTANCE;
5210    case SYSTEM_VALUE_DRAW_ID:
5211       return TGSI_SEMANTIC_DRAWID;
5212
5213    /* Geometry shader */
5214    case SYSTEM_VALUE_INVOCATION_ID:
5215       return TGSI_SEMANTIC_INVOCATIONID;
5216
5217    /* Fragment shader */
5218    case SYSTEM_VALUE_FRAG_COORD:
5219       return TGSI_SEMANTIC_POSITION;
5220    case SYSTEM_VALUE_FRONT_FACE:
5221       return TGSI_SEMANTIC_FACE;
5222    case SYSTEM_VALUE_SAMPLE_ID:
5223       return TGSI_SEMANTIC_SAMPLEID;
5224    case SYSTEM_VALUE_SAMPLE_POS:
5225       return TGSI_SEMANTIC_SAMPLEPOS;
5226    case SYSTEM_VALUE_SAMPLE_MASK_IN:
5227       return TGSI_SEMANTIC_SAMPLEMASK;
5228    case SYSTEM_VALUE_HELPER_INVOCATION:
5229       return TGSI_SEMANTIC_HELPER_INVOCATION;
5230
5231    /* Tessellation shader */
5232    case SYSTEM_VALUE_TESS_COORD:
5233       return TGSI_SEMANTIC_TESSCOORD;
5234    case SYSTEM_VALUE_VERTICES_IN:
5235       return TGSI_SEMANTIC_VERTICESIN;
5236    case SYSTEM_VALUE_PRIMITIVE_ID:
5237       return TGSI_SEMANTIC_PRIMID;
5238    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
5239       return TGSI_SEMANTIC_TESSOUTER;
5240    case SYSTEM_VALUE_TESS_LEVEL_INNER:
5241       return TGSI_SEMANTIC_TESSINNER;
5242
5243    /* Compute shader */
5244    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
5245       return TGSI_SEMANTIC_THREAD_ID;
5246    case SYSTEM_VALUE_WORK_GROUP_ID:
5247       return TGSI_SEMANTIC_BLOCK_ID;
5248    case SYSTEM_VALUE_NUM_WORK_GROUPS:
5249       return TGSI_SEMANTIC_GRID_SIZE;
5250
5251    /* Unhandled */
5252    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
5253    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
5254    case SYSTEM_VALUE_VERTEX_CNT:
5255    default:
5256       assert(!"Unexpected SYSTEM_VALUE_ enum");
5257       return TGSI_SEMANTIC_COUNT;
5258    }
5259 }
5260
5261
5262 /**
5263  * Make note of a branch to a label in the TGSI code.
5264  * After we've emitted all instructions, we'll go over the list
5265  * of labels built here and patch the TGSI code with the actual
5266  * location of each label.
5267  */
5268 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
5269 {
5270    unsigned i;
5271
5272    if (t->labels_count + 1 >= t->labels_size) {
5273       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
5274       t->labels = (struct label *)realloc(t->labels,
5275                                           t->labels_size * sizeof(struct label));
5276       if (t->labels == NULL) {
5277          static unsigned dummy;
5278          t->error = TRUE;
5279          return &dummy;
5280       }
5281    }
5282
5283    i = t->labels_count++;
5284    t->labels[i].branch_target = branch_target;
5285    return &t->labels[i].token;
5286 }
5287
5288 /**
5289  * Called prior to emitting the TGSI code for each instruction.
5290  * Allocate additional space for instructions if needed.
5291  * Update the insn[] array so the next glsl_to_tgsi_instruction points to
5292  * the next TGSI instruction.
5293  */
5294 static void set_insn_start(struct st_translate *t, unsigned start)
5295 {
5296    if (t->insn_count + 1 >= t->insn_size) {
5297       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
5298       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
5299       if (t->insn == NULL) {
5300          t->error = TRUE;
5301          return;
5302       }
5303    }
5304
5305    t->insn[t->insn_count++] = start;
5306 }
5307
5308 /**
5309  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
5310  */
5311 static struct ureg_src
5312 emit_immediate(struct st_translate *t,
5313                gl_constant_value values[4],
5314                int type, int size)
5315 {
5316    struct ureg_program *ureg = t->ureg;
5317
5318    switch(type)
5319    {
5320    case GL_FLOAT:
5321       return ureg_DECL_immediate(ureg, &values[0].f, size);
5322    case GL_DOUBLE:
5323       return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
5324    case GL_INT:
5325       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
5326    case GL_UNSIGNED_INT:
5327    case GL_BOOL:
5328       return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
5329    default:
5330       assert(!"should not get here - type must be float, int, uint, or bool");
5331       return ureg_src_undef();
5332    }
5333 }
5334
5335 /**
5336  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
5337  */
5338 static struct ureg_dst
5339 dst_register(struct st_translate *t, gl_register_file file, unsigned index,
5340              unsigned array_id)
5341 {
5342    unsigned array;
5343
5344    switch(file) {
5345    case PROGRAM_UNDEFINED:
5346       return ureg_dst_undef();
5347
5348    case PROGRAM_TEMPORARY:
5349       /* Allocate space for temporaries on demand. */
5350       if (index >= t->temps_size) {
5351          const int inc = 4096;
5352
5353          t->temps = (struct ureg_dst*)
5354                     realloc(t->temps,
5355                             (t->temps_size + inc) * sizeof(struct ureg_dst));
5356          if (!t->temps)
5357             return ureg_dst_undef();
5358
5359          memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
5360          t->temps_size += inc;
5361       }
5362
5363       if (ureg_dst_is_undef(t->temps[index]))
5364          t->temps[index] = ureg_DECL_local_temporary(t->ureg);
5365
5366       return t->temps[index];
5367
5368    case PROGRAM_ARRAY:
5369       array = index >> 16;
5370
5371       assert(array < t->num_temp_arrays);
5372
5373       if (ureg_dst_is_undef(t->arrays[array]))
5374          t->arrays[array] = ureg_DECL_array_temporary(
5375             t->ureg, t->array_sizes[array], TRUE);
5376
5377       return ureg_dst_array_offset(t->arrays[array],
5378                                    (int)(index & 0xFFFF) - 0x8000);
5379
5380    case PROGRAM_OUTPUT:
5381       if (!array_id) {
5382          if (t->procType == TGSI_PROCESSOR_FRAGMENT)
5383             assert(index < FRAG_RESULT_MAX);
5384          else if (t->procType == TGSI_PROCESSOR_TESS_CTRL ||
5385                   t->procType == TGSI_PROCESSOR_TESS_EVAL)
5386             assert(index < VARYING_SLOT_TESS_MAX);
5387          else
5388             assert(index < VARYING_SLOT_MAX);
5389
5390          assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
5391          assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
5392          return t->outputs[t->outputMapping[index]];
5393       }
5394       else {
5395          struct array_decl *decl = &t->output_arrays[array_id-1];
5396          unsigned mesa_index = decl->mesa_index;
5397          int slot = t->outputMapping[mesa_index];
5398
5399          assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
5400          assert(t->outputs[slot].ArrayID == array_id);
5401          return ureg_dst_array_offset(t->outputs[slot], index - mesa_index);
5402       }
5403
5404    case PROGRAM_ADDRESS:
5405       return t->address[index];
5406
5407    default:
5408       assert(!"unknown dst register file");
5409       return ureg_dst_undef();
5410    }
5411 }
5412
5413 /**
5414  * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
5415  */
5416 static struct ureg_src
5417 src_register(struct st_translate *t, const st_src_reg *reg)
5418 {
5419    int index = reg->index;
5420    int double_reg2 = reg->double_reg2 ? 1 : 0;
5421
5422    switch(reg->file) {
5423    case PROGRAM_UNDEFINED:
5424       return ureg_imm4f(t->ureg, 0, 0, 0, 0);
5425
5426    case PROGRAM_TEMPORARY:
5427    case PROGRAM_ARRAY:
5428    case PROGRAM_OUTPUT:
5429       return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
5430
5431    case PROGRAM_UNIFORM:
5432       assert(reg->index >= 0);
5433       return reg->index < t->num_constants ?
5434                t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5435    case PROGRAM_STATE_VAR:
5436    case PROGRAM_CONSTANT:       /* ie, immediate */
5437       if (reg->has_index2)
5438          return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
5439       else
5440          return reg->index >= 0 && reg->index < t->num_constants ?
5441                   t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5442
5443    case PROGRAM_IMMEDIATE:
5444       assert(reg->index >= 0 && reg->index < t->num_immediates);
5445       return t->immediates[reg->index];
5446
5447    case PROGRAM_INPUT:
5448       /* GLSL inputs are 64-bit containers, so we have to
5449        * map back to the original index and add the offset after
5450        * mapping. */
5451       index -= double_reg2;
5452       if (!reg->array_id) {
5453          assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
5454          assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
5455          return t->inputs[t->inputMapping[index] + double_reg2];
5456       }
5457       else {
5458          struct array_decl *decl = &t->input_arrays[reg->array_id-1];
5459          unsigned mesa_index = decl->mesa_index;
5460          int slot = t->inputMapping[mesa_index];
5461
5462          assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
5463          assert(t->inputs[slot].ArrayID == reg->array_id);
5464          return ureg_src_array_offset(t->inputs[slot], index + double_reg2 - mesa_index);
5465       }
5466
5467    case PROGRAM_ADDRESS:
5468       return ureg_src(t->address[reg->index]);
5469
5470    case PROGRAM_SYSTEM_VALUE:
5471       assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
5472       return t->systemValues[reg->index];
5473
5474    default:
5475       assert(!"unknown src register file");
5476       return ureg_src_undef();
5477    }
5478 }
5479
5480 /**
5481  * Create a TGSI ureg_dst register from an st_dst_reg.
5482  */
5483 static struct ureg_dst
5484 translate_dst(struct st_translate *t,
5485               const st_dst_reg *dst_reg,
5486               bool saturate)
5487 {
5488    struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
5489                                       dst_reg->array_id);
5490
5491    if (dst.File == TGSI_FILE_NULL)
5492       return dst;
5493
5494    dst = ureg_writemask(dst, dst_reg->writemask);
5495
5496    if (saturate)
5497       dst = ureg_saturate(dst);
5498
5499    if (dst_reg->reladdr != NULL) {
5500       assert(dst_reg->file != PROGRAM_TEMPORARY);
5501       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
5502    }
5503
5504    if (dst_reg->has_index2) {
5505       if (dst_reg->reladdr2)
5506          dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
5507                                            dst_reg->index2D);
5508       else
5509          dst = ureg_dst_dimension(dst, dst_reg->index2D);
5510    }
5511
5512    return dst;
5513 }
5514
5515 /**
5516  * Create a TGSI ureg_src register from an st_src_reg.
5517  */
5518 static struct ureg_src
5519 translate_src(struct st_translate *t, const st_src_reg *src_reg)
5520 {
5521    struct ureg_src src = src_register(t, src_reg);
5522
5523    if (src_reg->has_index2) {
5524       /* 2D indexes occur with geometry shader inputs (attrib, vertex)
5525        * and UBO constant buffers (buffer, position).
5526        */
5527       if (src_reg->reladdr2)
5528          src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
5529                                            src_reg->index2D);
5530       else
5531          src = ureg_src_dimension(src, src_reg->index2D);
5532    }
5533
5534    src = ureg_swizzle(src,
5535                       GET_SWZ(src_reg->swizzle, 0) & 0x3,
5536                       GET_SWZ(src_reg->swizzle, 1) & 0x3,
5537                       GET_SWZ(src_reg->swizzle, 2) & 0x3,
5538                       GET_SWZ(src_reg->swizzle, 3) & 0x3);
5539
5540    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
5541       src = ureg_negate(src);
5542
5543    if (src_reg->reladdr != NULL) {
5544       assert(src_reg->file != PROGRAM_TEMPORARY);
5545       src = ureg_src_indirect(src, ureg_src(t->address[0]));
5546    }
5547
5548    return src;
5549 }
5550
5551 static struct tgsi_texture_offset
5552 translate_tex_offset(struct st_translate *t,
5553                      const st_src_reg *in_offset, int idx)
5554 {
5555    struct tgsi_texture_offset offset;
5556    struct ureg_src imm_src;
5557    struct ureg_dst dst;
5558    int array;
5559
5560    switch (in_offset->file) {
5561    case PROGRAM_IMMEDIATE:
5562       assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
5563       imm_src = t->immediates[in_offset->index];
5564
5565       offset.File = imm_src.File;
5566       offset.Index = imm_src.Index;
5567       offset.SwizzleX = imm_src.SwizzleX;
5568       offset.SwizzleY = imm_src.SwizzleY;
5569       offset.SwizzleZ = imm_src.SwizzleZ;
5570       offset.Padding = 0;
5571       break;
5572    case PROGRAM_TEMPORARY:
5573       imm_src = ureg_src(t->temps[in_offset->index]);
5574       offset.File = imm_src.File;
5575       offset.Index = imm_src.Index;
5576       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5577       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5578       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5579       offset.Padding = 0;
5580       break;
5581    case PROGRAM_ARRAY:
5582       array = in_offset->index >> 16;
5583
5584       assert(array >= 0);
5585       assert(array < (int)t->num_temp_arrays);
5586
5587       dst = t->arrays[array];
5588       offset.File = dst.File;
5589       offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
5590       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5591       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5592       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5593       offset.Padding = 0;
5594       break;
5595    default:
5596       break;
5597    }
5598    return offset;
5599 }
5600
5601 static void
5602 compile_tgsi_instruction(struct st_translate *t,
5603                          const glsl_to_tgsi_instruction *inst)
5604 {
5605    struct ureg_program *ureg = t->ureg;
5606    int i;
5607    struct ureg_dst dst[2];
5608    struct ureg_src src[4];
5609    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
5610
5611    int num_dst;
5612    int num_src;
5613    unsigned tex_target = 0;
5614
5615    num_dst = num_inst_dst_regs(inst);
5616    num_src = num_inst_src_regs(inst);
5617
5618    for (i = 0; i < num_dst; i++)
5619       dst[i] = translate_dst(t,
5620                              &inst->dst[i],
5621                              inst->saturate);
5622
5623    for (i = 0; i < num_src; i++)
5624       src[i] = translate_src(t, &inst->src[i]);
5625
5626    switch(inst->op) {
5627    case TGSI_OPCODE_BGNLOOP:
5628    case TGSI_OPCODE_CAL:
5629    case TGSI_OPCODE_ELSE:
5630    case TGSI_OPCODE_ENDLOOP:
5631    case TGSI_OPCODE_IF:
5632    case TGSI_OPCODE_UIF:
5633       assert(num_dst == 0);
5634       ureg_label_insn(ureg,
5635                       inst->op,
5636                       src, num_src,
5637                       get_label(t,
5638                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
5639       return;
5640
5641    case TGSI_OPCODE_TEX:
5642    case TGSI_OPCODE_TXB:
5643    case TGSI_OPCODE_TXD:
5644    case TGSI_OPCODE_TXL:
5645    case TGSI_OPCODE_TXP:
5646    case TGSI_OPCODE_TXQ:
5647    case TGSI_OPCODE_TXQS:
5648    case TGSI_OPCODE_TXF:
5649    case TGSI_OPCODE_TEX2:
5650    case TGSI_OPCODE_TXB2:
5651    case TGSI_OPCODE_TXL2:
5652    case TGSI_OPCODE_TG4:
5653    case TGSI_OPCODE_LODQ:
5654       src[num_src] = t->samplers[inst->sampler.index];
5655       assert(src[num_src].File != TGSI_FILE_NULL);
5656       if (inst->sampler.reladdr)
5657          src[num_src] =
5658             ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
5659       num_src++;
5660       for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
5661          texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
5662       }
5663       tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5664
5665       ureg_tex_insn(ureg,
5666                     inst->op,
5667                     dst, num_dst,
5668                     tex_target,
5669                     texoffsets, inst->tex_offset_num_offset,
5670                     src, num_src);
5671       return;
5672
5673    case TGSI_OPCODE_RESQ:
5674    case TGSI_OPCODE_LOAD:
5675    case TGSI_OPCODE_ATOMUADD:
5676    case TGSI_OPCODE_ATOMXCHG:
5677    case TGSI_OPCODE_ATOMCAS:
5678    case TGSI_OPCODE_ATOMAND:
5679    case TGSI_OPCODE_ATOMOR:
5680    case TGSI_OPCODE_ATOMXOR:
5681    case TGSI_OPCODE_ATOMUMIN:
5682    case TGSI_OPCODE_ATOMUMAX:
5683    case TGSI_OPCODE_ATOMIMIN:
5684    case TGSI_OPCODE_ATOMIMAX:
5685       for (i = num_src - 1; i >= 0; i--)
5686          src[i + 1] = src[i];
5687       num_src++;
5688       if (inst->buffer.file == PROGRAM_MEMORY) {
5689          src[0] = t->shared_memory;
5690       } else if (inst->buffer.file == PROGRAM_BUFFER) {
5691          src[0] = t->buffers[inst->buffer.index];
5692       } else {
5693          src[0] = t->images[inst->buffer.index];
5694          tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5695       }
5696       if (inst->buffer.reladdr)
5697          src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
5698       assert(src[0].File != TGSI_FILE_NULL);
5699       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5700                        inst->buffer_access,
5701                        tex_target, inst->image_format);
5702       break;
5703
5704    case TGSI_OPCODE_STORE:
5705       if (inst->buffer.file == PROGRAM_MEMORY) {
5706          dst[0] = ureg_dst(t->shared_memory);
5707       } else if (inst->buffer.file == PROGRAM_BUFFER) {
5708          dst[0] = ureg_dst(t->buffers[inst->buffer.index]);
5709       } else {
5710          dst[0] = ureg_dst(t->images[inst->buffer.index]);
5711          tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5712       }
5713       dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
5714       if (inst->buffer.reladdr)
5715          dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
5716       assert(dst[0].File != TGSI_FILE_NULL);
5717       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5718                        inst->buffer_access,
5719                        tex_target, inst->image_format);
5720       break;
5721
5722    case TGSI_OPCODE_SCS:
5723       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
5724       ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
5725       break;
5726
5727    default:
5728       ureg_insn(ureg,
5729                 inst->op,
5730                 dst, num_dst,
5731                 src, num_src);
5732       break;
5733    }
5734 }
5735
5736 /**
5737  * Emit the TGSI instructions for inverting and adjusting WPOS.
5738  * This code is unavoidable because it also depends on whether
5739  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
5740  */
5741 static void
5742 emit_wpos_adjustment(struct gl_context *ctx,
5743                      struct st_translate *t,
5744                      int wpos_transform_const,
5745                      boolean invert,
5746                      GLfloat adjX, GLfloat adjY[2])
5747 {
5748    struct ureg_program *ureg = t->ureg;
5749
5750    assert(wpos_transform_const >= 0);
5751
5752    /* Fragment program uses fragment position input.
5753     * Need to replace instances of INPUT[WPOS] with temp T
5754     * where T = INPUT[WPOS] is inverted by Y.
5755     */
5756    struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
5757    struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
5758    struct ureg_src *wpos =
5759       ctx->Const.GLSLFragCoordIsSysVal ?
5760          &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
5761          &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
5762    struct ureg_src wpos_input = *wpos;
5763
5764    /* First, apply the coordinate shift: */
5765    if (adjX || adjY[0] || adjY[1]) {
5766       if (adjY[0] != adjY[1]) {
5767          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
5768           * depending on whether inversion is actually going to be applied
5769           * or not, which is determined by testing against the inversion
5770           * state variable used below, which will be either +1 or -1.
5771           */
5772          struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
5773
5774          ureg_CMP(ureg, adj_temp,
5775                   ureg_scalar(wpostrans, invert ? 2 : 0),
5776                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
5777                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
5778          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
5779       } else {
5780          ureg_ADD(ureg, wpos_temp, wpos_input,
5781                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
5782       }
5783       wpos_input = ureg_src(wpos_temp);
5784    } else {
5785       /* MOV wpos_temp, input[wpos]
5786        */
5787       ureg_MOV( ureg, wpos_temp, wpos_input );
5788    }
5789
5790    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
5791     * inversion/identity, or the other way around if we're drawing to an FBO.
5792     */
5793    if (invert) {
5794       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
5795        */
5796       ureg_MAD( ureg,
5797                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5798                 wpos_input,
5799                 ureg_scalar(wpostrans, 0),
5800                 ureg_scalar(wpostrans, 1));
5801    } else {
5802       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
5803        */
5804       ureg_MAD( ureg,
5805                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5806                 wpos_input,
5807                 ureg_scalar(wpostrans, 2),
5808                 ureg_scalar(wpostrans, 3));
5809    }
5810
5811    /* Use wpos_temp as position input from here on:
5812     */
5813    *wpos = ureg_src(wpos_temp);
5814 }
5815
5816
5817 /**
5818  * Emit fragment position/ooordinate code.
5819  */
5820 static void
5821 emit_wpos(struct st_context *st,
5822           struct st_translate *t,
5823           const struct gl_program *program,
5824           struct ureg_program *ureg,
5825           int wpos_transform_const)
5826 {
5827    const struct gl_fragment_program *fp =
5828       (const struct gl_fragment_program *) program;
5829    struct pipe_screen *pscreen = st->pipe->screen;
5830    GLfloat adjX = 0.0f;
5831    GLfloat adjY[2] = { 0.0f, 0.0f };
5832    boolean invert = FALSE;
5833
5834    /* Query the pixel center conventions supported by the pipe driver and set
5835     * adjX, adjY to help out if it cannot handle the requested one internally.
5836     *
5837     * The bias of the y-coordinate depends on whether y-inversion takes place
5838     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
5839     * drawing to an FBO (causes additional inversion), and whether the the pipe
5840     * driver origin and the requested origin differ (the latter condition is
5841     * stored in the 'invert' variable).
5842     *
5843     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
5844     *
5845     * center shift only:
5846     * i -> h: +0.5
5847     * h -> i: -0.5
5848     *
5849     * inversion only:
5850     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
5851     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
5852     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
5853     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
5854     *
5855     * inversion and center shift:
5856     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
5857     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
5858     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
5859     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
5860     */
5861    if (fp->OriginUpperLeft) {
5862       /* Fragment shader wants origin in upper-left */
5863       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
5864          /* the driver supports upper-left origin */
5865       }
5866       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
5867          /* the driver supports lower-left origin, need to invert Y */
5868          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5869                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5870          invert = TRUE;
5871       }
5872       else
5873          assert(0);
5874    }
5875    else {
5876       /* Fragment shader wants origin in lower-left */
5877       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
5878          /* the driver supports lower-left origin */
5879          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5880                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5881       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
5882          /* the driver supports upper-left origin, need to invert Y */
5883          invert = TRUE;
5884       else
5885          assert(0);
5886    }
5887
5888    if (fp->PixelCenterInteger) {
5889       /* Fragment shader wants pixel center integer */
5890       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5891          /* the driver supports pixel center integer */
5892          adjY[1] = 1.0f;
5893          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5894                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5895       }
5896       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5897          /* the driver supports pixel center half integer, need to bias X,Y */
5898          adjX = -0.5f;
5899          adjY[0] = -0.5f;
5900          adjY[1] = 0.5f;
5901       }
5902       else
5903          assert(0);
5904    }
5905    else {
5906       /* Fragment shader wants pixel center half integer */
5907       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5908          /* the driver supports pixel center half integer */
5909       }
5910       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5911          /* the driver supports pixel center integer, need to bias X,Y */
5912          adjX = adjY[0] = adjY[1] = 0.5f;
5913          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5914                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5915       }
5916       else
5917          assert(0);
5918    }
5919
5920    /* we invert after adjustment so that we avoid the MOV to temporary,
5921     * and reuse the adjustment ADD instead */
5922    emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY);
5923 }
5924
5925 /**
5926  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
5927  * TGSI uses +1 for front, -1 for back.
5928  * This function converts the TGSI value to the GL value.  Simply clamping/
5929  * saturating the value to [0,1] does the job.
5930  */
5931 static void
5932 emit_face_var(struct gl_context *ctx, struct st_translate *t)
5933 {
5934    struct ureg_program *ureg = t->ureg;
5935    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
5936    struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
5937
5938    if (ctx->Const.NativeIntegers) {
5939       ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
5940    }
5941    else {
5942       /* MOV_SAT face_temp, input[face] */
5943       ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
5944    }
5945
5946    /* Use face_temp as face input from here on: */
5947    t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
5948 }
5949
5950 static bool
5951 find_array(unsigned attr, struct array_decl *arrays, unsigned count,
5952            unsigned *array_id, unsigned *array_size)
5953 {
5954    unsigned i;
5955
5956    for (i = 0; i < count; i++) {
5957       struct array_decl *decl = &arrays[i];
5958
5959       if (attr == decl->mesa_index) {
5960          *array_id = decl->array_id;
5961          *array_size = decl->array_size;
5962          assert(*array_size);
5963          return true;
5964       }
5965    }
5966    return false;
5967 }
5968
5969 static void
5970 emit_compute_block_size(const struct gl_program *program,
5971                         struct ureg_program *ureg) {
5972    const struct gl_compute_program *cp =
5973       (const struct gl_compute_program *)program;
5974
5975    ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
5976                        cp->LocalSize[0]);
5977    ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
5978                        cp->LocalSize[1]);
5979    ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
5980                        cp->LocalSize[2]);
5981 }
5982
5983 /**
5984  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
5985  * \param program  the program to translate
5986  * \param numInputs  number of input registers used
5987  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
5988  *                      input indexes
5989  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
5990  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
5991  *                            each input
5992  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
5993  * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input
5994  * \param numOutputs  number of output registers used
5995  * \param outputMapping  maps Mesa fragment program outputs to TGSI
5996  *                       generic outputs
5997  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
5998  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
5999  *                             each output
6000  *
6001  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
6002  */
6003 extern "C" enum pipe_error
6004 st_translate_program(
6005    struct gl_context *ctx,
6006    uint procType,
6007    struct ureg_program *ureg,
6008    glsl_to_tgsi_visitor *program,
6009    const struct gl_program *proginfo,
6010    GLuint numInputs,
6011    const GLuint inputMapping[],
6012    const GLuint inputSlotToAttr[],
6013    const ubyte inputSemanticName[],
6014    const ubyte inputSemanticIndex[],
6015    const GLuint interpMode[],
6016    const GLuint interpLocation[],
6017    GLuint numOutputs,
6018    const GLuint outputMapping[],
6019    const GLuint outputSlotToAttr[],
6020    const ubyte outputSemanticName[],
6021    const ubyte outputSemanticIndex[])
6022 {
6023    struct st_translate *t;
6024    unsigned i;
6025    struct gl_program_constants *frag_const =
6026       &ctx->Const.Program[MESA_SHADER_FRAGMENT];
6027    enum pipe_error ret = PIPE_OK;
6028
6029    assert(numInputs <= ARRAY_SIZE(t->inputs));
6030    assert(numOutputs <= ARRAY_SIZE(t->outputs));
6031
6032    t = CALLOC_STRUCT(st_translate);
6033    if (!t) {
6034       ret = PIPE_ERROR_OUT_OF_MEMORY;
6035       goto out;
6036    }
6037
6038    t->procType = procType;
6039    t->inputMapping = inputMapping;
6040    t->outputMapping = outputMapping;
6041    t->ureg = ureg;
6042    t->num_temp_arrays = program->next_array;
6043    if (t->num_temp_arrays)
6044       t->arrays = (struct ureg_dst*)
6045                   calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays);
6046
6047    /*
6048     * Declare input attributes.
6049     */
6050    switch (procType) {
6051    case TGSI_PROCESSOR_FRAGMENT:
6052       for (i = 0; i < numInputs; i++) {
6053          unsigned array_id = 0;
6054          unsigned array_size;
6055
6056          if (find_array(inputSlotToAttr[i], program->input_arrays,
6057                         program->num_input_arrays, &array_id, &array_size)) {
6058             /* We've found an array. Declare it so. */
6059             t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
6060                               inputSemanticName[i], inputSemanticIndex[i],
6061                               interpMode[i], 0, interpLocation[i],
6062                               array_id, array_size);
6063             i += array_size - 1;
6064          }
6065          else {
6066             t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
6067                               inputSemanticName[i], inputSemanticIndex[i],
6068                               interpMode[i], 0, interpLocation[i], 0, 1);
6069          }
6070       }
6071       break;
6072    case TGSI_PROCESSOR_GEOMETRY:
6073    case TGSI_PROCESSOR_TESS_EVAL:
6074    case TGSI_PROCESSOR_TESS_CTRL:
6075       for (i = 0; i < numInputs; i++) {
6076          unsigned array_id = 0;
6077          unsigned array_size;
6078
6079          if (find_array(inputSlotToAttr[i], program->input_arrays,
6080                         program->num_input_arrays, &array_id, &array_size)) {
6081             /* We've found an array. Declare it so. */
6082             t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
6083                                            inputSemanticIndex[i],
6084                                            array_id, array_size);
6085             i += array_size - 1;
6086          }
6087          else {
6088             t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
6089                                            inputSemanticIndex[i], 0, 1);
6090          }
6091       }
6092       break;
6093    case TGSI_PROCESSOR_VERTEX:
6094       for (i = 0; i < numInputs; i++) {
6095          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
6096       }
6097       break;
6098    case TGSI_PROCESSOR_COMPUTE:
6099       break;
6100    default:
6101       assert(0);
6102    }
6103
6104    /*
6105     * Declare output attributes.
6106     */
6107    switch (procType) {
6108    case TGSI_PROCESSOR_FRAGMENT:
6109    case TGSI_PROCESSOR_COMPUTE:
6110       break;
6111    case TGSI_PROCESSOR_GEOMETRY:
6112    case TGSI_PROCESSOR_TESS_EVAL:
6113    case TGSI_PROCESSOR_TESS_CTRL:
6114    case TGSI_PROCESSOR_VERTEX:
6115       for (i = 0; i < numOutputs; i++) {
6116          unsigned array_id = 0;
6117          unsigned array_size;
6118
6119          if (find_array(outputSlotToAttr[i], program->output_arrays,
6120                         program->num_output_arrays, &array_id, &array_size)) {
6121             /* We've found an array. Declare it so. */
6122             t->outputs[i] = ureg_DECL_output_array(ureg,
6123                                                    outputSemanticName[i],
6124                                                    outputSemanticIndex[i],
6125                                                    array_id, array_size);
6126             i += array_size - 1;
6127          }
6128          else {
6129             t->outputs[i] = ureg_DECL_output(ureg,
6130                                              outputSemanticName[i],
6131                                              outputSemanticIndex[i]);
6132          }
6133       }
6134       break;
6135    default:
6136       assert(0);
6137    }
6138
6139    if (procType == TGSI_PROCESSOR_FRAGMENT) {
6140       if (program->shader->EarlyFragmentTests)
6141          ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
6142
6143       if (proginfo->InputsRead & VARYING_BIT_POS) {
6144           /* Must do this after setting up t->inputs. */
6145           emit_wpos(st_context(ctx), t, proginfo, ureg,
6146                     program->wpos_transform_const);
6147       }
6148
6149       if (proginfo->InputsRead & VARYING_BIT_FACE)
6150          emit_face_var(ctx, t);
6151
6152       for (i = 0; i < numOutputs; i++) {
6153          switch (outputSemanticName[i]) {
6154          case TGSI_SEMANTIC_POSITION:
6155             t->outputs[i] = ureg_DECL_output(ureg,
6156                                              TGSI_SEMANTIC_POSITION, /* Z/Depth */
6157                                              outputSemanticIndex[i]);
6158             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
6159             break;
6160          case TGSI_SEMANTIC_STENCIL:
6161             t->outputs[i] = ureg_DECL_output(ureg,
6162                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
6163                                              outputSemanticIndex[i]);
6164             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
6165             break;
6166          case TGSI_SEMANTIC_COLOR:
6167             t->outputs[i] = ureg_DECL_output(ureg,
6168                                              TGSI_SEMANTIC_COLOR,
6169                                              outputSemanticIndex[i]);
6170             break;
6171          case TGSI_SEMANTIC_SAMPLEMASK:
6172             t->outputs[i] = ureg_DECL_output(ureg,
6173                                              TGSI_SEMANTIC_SAMPLEMASK,
6174                                              outputSemanticIndex[i]);
6175             /* TODO: If we ever support more than 32 samples, this will have
6176              * to become an array.
6177              */
6178             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6179             break;
6180          default:
6181             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
6182             ret = PIPE_ERROR_BAD_INPUT;
6183             goto out;
6184          }
6185       }
6186    }
6187    else if (procType == TGSI_PROCESSOR_VERTEX) {
6188       for (i = 0; i < numOutputs; i++) {
6189          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
6190             /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
6191             ureg_MOV(ureg,
6192                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
6193                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
6194             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6195          }
6196       }
6197    }
6198
6199    if (procType == TGSI_PROCESSOR_COMPUTE) {
6200       emit_compute_block_size(proginfo, ureg);
6201    }
6202
6203    /* Declare address register.
6204     */
6205    if (program->num_address_regs > 0) {
6206       assert(program->num_address_regs <= 3);
6207       for (int i = 0; i < program->num_address_regs; i++)
6208          t->address[i] = ureg_DECL_address(ureg);
6209    }
6210
6211    /* Declare misc input registers
6212     */
6213    {
6214       GLbitfield sysInputs = proginfo->SystemValuesRead;
6215
6216       for (i = 0; sysInputs; i++) {
6217          if (sysInputs & (1 << i)) {
6218             unsigned semName = _mesa_sysval_to_semantic(i);
6219
6220             t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
6221
6222             if (semName == TGSI_SEMANTIC_INSTANCEID ||
6223                 semName == TGSI_SEMANTIC_VERTEXID) {
6224                /* From Gallium perspective, these system values are always
6225                 * integer, and require native integer support.  However, if
6226                 * native integer is supported on the vertex stage but not the
6227                 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
6228                 * assumes these system values are floats. To resolve the
6229                 * inconsistency, we insert a U2F.
6230                 */
6231                struct st_context *st = st_context(ctx);
6232                struct pipe_screen *pscreen = st->pipe->screen;
6233                assert(procType == TGSI_PROCESSOR_VERTEX);
6234                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
6235                (void) pscreen;
6236                if (!ctx->Const.NativeIntegers) {
6237                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
6238                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
6239                   t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
6240                }
6241             }
6242
6243             if (procType == TGSI_PROCESSOR_FRAGMENT &&
6244                 semName == TGSI_SEMANTIC_POSITION)
6245                emit_wpos(st_context(ctx), t, proginfo, ureg,
6246                          program->wpos_transform_const);
6247
6248             sysInputs &= ~(1 << i);
6249          }
6250       }
6251    }
6252
6253    t->array_sizes = program->array_sizes;
6254    t->input_arrays = program->input_arrays;
6255    t->output_arrays = program->output_arrays;
6256
6257    /* Emit constants and uniforms.  TGSI uses a single index space for these,
6258     * so we put all the translated regs in t->constants.
6259     */
6260    if (proginfo->Parameters) {
6261       t->constants = (struct ureg_src *)
6262          calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
6263       if (t->constants == NULL) {
6264          ret = PIPE_ERROR_OUT_OF_MEMORY;
6265          goto out;
6266       }
6267       t->num_constants = proginfo->Parameters->NumParameters;
6268
6269       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
6270          switch (proginfo->Parameters->Parameters[i].Type) {
6271          case PROGRAM_STATE_VAR:
6272          case PROGRAM_UNIFORM:
6273             t->constants[i] = ureg_DECL_constant(ureg, i);
6274             break;
6275
6276          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
6277           * addressing of the const buffer.
6278           * FIXME: Be smarter and recognize param arrays:
6279           * indirect addressing is only valid within the referenced
6280           * array.
6281           */
6282          case PROGRAM_CONSTANT:
6283             if (program->indirect_addr_consts)
6284                t->constants[i] = ureg_DECL_constant(ureg, i);
6285             else
6286                t->constants[i] = emit_immediate(t,
6287                                                 proginfo->Parameters->ParameterValues[i],
6288                                                 proginfo->Parameters->Parameters[i].DataType,
6289                                                 4);
6290             break;
6291          default:
6292             break;
6293          }
6294       }
6295    }
6296
6297    if (program->shader) {
6298       unsigned num_ubos = program->shader->NumUniformBlocks;
6299
6300       for (i = 0; i < num_ubos; i++) {
6301          unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
6302          unsigned num_const_vecs = (size + 15) / 16;
6303          unsigned first, last;
6304          assert(num_const_vecs > 0);
6305          first = 0;
6306          last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
6307          ureg_DECL_constant2D(t->ureg, first, last, i + 1);
6308       }
6309    }
6310
6311    /* Emit immediate values.
6312     */
6313    t->immediates = (struct ureg_src *)
6314       calloc(program->num_immediates, sizeof(struct ureg_src));
6315    if (t->immediates == NULL) {
6316       ret = PIPE_ERROR_OUT_OF_MEMORY;
6317       goto out;
6318    }
6319    t->num_immediates = program->num_immediates;
6320
6321    i = 0;
6322    foreach_in_list(immediate_storage, imm, &program->immediates) {
6323       assert(i < program->num_immediates);
6324       t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
6325    }
6326    assert(i == program->num_immediates);
6327
6328    /* texture samplers */
6329    for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
6330       if (program->samplers_used & (1u << i)) {
6331          unsigned type;
6332
6333          t->samplers[i] = ureg_DECL_sampler(ureg, i);
6334
6335          switch (program->sampler_types[i]) {
6336          case GLSL_TYPE_INT:
6337             type = TGSI_RETURN_TYPE_SINT;
6338             break;
6339          case GLSL_TYPE_UINT:
6340             type = TGSI_RETURN_TYPE_UINT;
6341             break;
6342          case GLSL_TYPE_FLOAT:
6343             type = TGSI_RETURN_TYPE_FLOAT;
6344             break;
6345          default:
6346             unreachable("not reached");
6347          }
6348
6349          ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
6350                                  type, type, type, type );
6351       }
6352    }
6353
6354    for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
6355       if (program->buffers_used & (1 << i)) {
6356          t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
6357       }
6358    }
6359
6360    for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
6361         i++) {
6362       if (program->buffers_used & (1 << i)) {
6363          t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
6364       }
6365    }
6366
6367    if (program->use_shared_memory)
6368       t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
6369
6370    for (i = 0; i < program->shader->NumImages; i++) {
6371       if (program->images_used & (1 << i)) {
6372          t->images[i] = ureg_DECL_image(ureg, i,
6373                                         program->image_targets[i],
6374                                         program->image_formats[i],
6375                                         true, false);
6376       }
6377    }
6378
6379    /* Emit each instruction in turn:
6380     */
6381    foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
6382       set_insn_start(t, ureg_get_instruction_number(ureg));
6383       compile_tgsi_instruction(t, inst);
6384    }
6385
6386    /* Fix up all emitted labels:
6387     */
6388    for (i = 0; i < t->labels_count; i++) {
6389       ureg_fixup_label(ureg, t->labels[i].token,
6390                        t->insn[t->labels[i].branch_target]);
6391    }
6392
6393    /* Set the next shader stage hint for VS and TES. */
6394    switch (procType) {
6395    case TGSI_PROCESSOR_VERTEX:
6396    case TGSI_PROCESSOR_TESS_EVAL:
6397       if (program->shader_program->SeparateShader)
6398          break;
6399
6400       for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
6401          if (program->shader_program->_LinkedShaders[i]) {
6402             unsigned next;
6403
6404             switch (i) {
6405             case MESA_SHADER_TESS_CTRL:
6406                next = TGSI_PROCESSOR_TESS_CTRL;
6407                break;
6408             case MESA_SHADER_TESS_EVAL:
6409                next = TGSI_PROCESSOR_TESS_EVAL;
6410                break;
6411             case MESA_SHADER_GEOMETRY:
6412                next = TGSI_PROCESSOR_GEOMETRY;
6413                break;
6414             case MESA_SHADER_FRAGMENT:
6415                next = TGSI_PROCESSOR_FRAGMENT;
6416                break;
6417             default:
6418                assert(0);
6419                continue;
6420             }
6421
6422             ureg_set_next_shader_processor(ureg, next);
6423             break;
6424          }
6425       }
6426       break;
6427    }
6428
6429 out:
6430    if (t) {
6431       free(t->arrays);
6432       free(t->temps);
6433       free(t->insn);
6434       free(t->labels);
6435       free(t->constants);
6436       t->num_constants = 0;
6437       free(t->immediates);
6438       t->num_immediates = 0;
6439
6440       if (t->error) {
6441          debug_printf("%s: translate error flag set\n", __func__);
6442       }
6443
6444       FREE(t);
6445    }
6446
6447    return ret;
6448 }
6449 /* ----------------------------- End TGSI code ------------------------------ */
6450
6451
6452 /**
6453  * Convert a shader's GLSL IR into a Mesa gl_program, although without
6454  * generating Mesa IR.
6455  */
6456 static struct gl_program *
6457 get_mesa_program(struct gl_context *ctx,
6458                  struct gl_shader_program *shader_program,
6459                  struct gl_shader *shader)
6460 {
6461    glsl_to_tgsi_visitor* v;
6462    struct gl_program *prog;
6463    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
6464    bool progress;
6465    struct gl_shader_compiler_options *options =
6466          &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
6467    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6468    unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
6469
6470    validate_ir_tree(shader->ir);
6471
6472    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
6473    if (!prog)
6474       return NULL;
6475    prog->Parameters = _mesa_new_parameter_list();
6476    v = new glsl_to_tgsi_visitor();
6477    v->ctx = ctx;
6478    v->prog = prog;
6479    v->shader_program = shader_program;
6480    v->shader = shader;
6481    v->options = options;
6482    v->glsl_version = ctx->Const.GLSLVersion;
6483    v->native_integers = ctx->Const.NativeIntegers;
6484
6485    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
6486                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
6487    v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
6488                                            PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
6489
6490    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
6491    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
6492                                                prog->Parameters);
6493
6494    /* Remove reads from output registers. */
6495    lower_output_reads(shader->Stage, shader->ir);
6496
6497    /* Emit intermediate IR for main(). */
6498    visit_exec_list(shader->ir, v);
6499
6500    /* Now emit bodies for any functions that were used. */
6501    do {
6502       progress = GL_FALSE;
6503
6504       foreach_in_list(function_entry, entry, &v->function_signatures) {
6505          if (!entry->bgn_inst) {
6506             v->current_function = entry;
6507
6508             entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB);
6509             entry->bgn_inst->function = entry;
6510
6511             visit_exec_list(&entry->sig->body, v);
6512
6513             glsl_to_tgsi_instruction *last;
6514             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
6515             if (last->op != TGSI_OPCODE_RET)
6516                v->emit_asm(NULL, TGSI_OPCODE_RET);
6517
6518             glsl_to_tgsi_instruction *end;
6519             end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB);
6520             end->function = entry;
6521
6522             progress = GL_TRUE;
6523          }
6524       }
6525    } while (progress);
6526
6527 #if 0
6528    /* Print out some information (for debugging purposes) used by the
6529     * optimization passes. */
6530    {
6531       int i;
6532       int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6533       int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6534       int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6535       int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6536
6537       for (i = 0; i < v->next_temp; i++) {
6538          first_writes[i] = -1;
6539          first_reads[i] = -1;
6540          last_writes[i] = -1;
6541          last_reads[i] = -1;
6542       }
6543       v->get_first_temp_read(first_reads);
6544       v->get_last_temp_read_first_temp_write(last_reads, first_writes);
6545       v->get_last_temp_write(last_writes);
6546       for (i = 0; i < v->next_temp; i++)
6547          printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
6548                 first_writes[i],
6549                 last_reads[i],
6550                 last_writes[i]);
6551       ralloc_free(first_writes);
6552       ralloc_free(first_reads);
6553       ralloc_free(last_writes);
6554       ralloc_free(last_reads);
6555    }
6556 #endif
6557
6558    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
6559    v->simplify_cmp();
6560
6561    if (shader->Type != GL_TESS_CONTROL_SHADER &&
6562        shader->Type != GL_TESS_EVALUATION_SHADER)
6563       v->copy_propagate();
6564
6565    while (v->eliminate_dead_code());
6566
6567    v->merge_two_dsts();
6568    v->merge_registers();
6569    v->renumber_registers();
6570
6571    /* Write the END instruction. */
6572    v->emit_asm(NULL, TGSI_OPCODE_END);
6573
6574    if (ctx->_Shader->Flags & GLSL_DUMP) {
6575       _mesa_log("\n");
6576       _mesa_log("GLSL IR for linked %s program %d:\n",
6577              _mesa_shader_stage_to_string(shader->Stage),
6578              shader_program->Name);
6579       _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
6580       _mesa_log("\n\n");
6581    }
6582
6583    prog->Instructions = NULL;
6584    prog->NumInstructions = 0;
6585
6586    do_set_program_inouts(shader->ir, prog, shader->Stage);
6587    shrink_array_declarations(v->input_arrays, v->num_input_arrays,
6588                              prog->InputsRead, prog->DoubleInputsRead, prog->PatchInputsRead);
6589    shrink_array_declarations(v->output_arrays, v->num_output_arrays,
6590                              prog->OutputsWritten, 0ULL, prog->PatchOutputsWritten);
6591    count_resources(v, prog);
6592
6593    /* The GLSL IR won't be needed anymore. */
6594    ralloc_free(shader->ir);
6595    shader->ir = NULL;
6596
6597    /* This must be done before the uniform storage is associated. */
6598    if (shader->Type == GL_FRAGMENT_SHADER &&
6599        (prog->InputsRead & VARYING_BIT_POS ||
6600         prog->SystemValuesRead & (1 << SYSTEM_VALUE_FRAG_COORD))) {
6601       static const gl_state_index wposTransformState[STATE_LENGTH] = {
6602          STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
6603       };
6604
6605       v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
6606                                                           wposTransformState);
6607    }
6608
6609    _mesa_reference_program(ctx, &shader->Program, prog);
6610
6611    /* Avoid reallocation of the program parameter list, because the uniform
6612     * storage is only associated with the original parameter list.
6613     * This should be enough for Bitmap and DrawPixels constants.
6614     */
6615    _mesa_reserve_parameter_storage(prog->Parameters, 8);
6616
6617    /* This has to be done last.  Any operation the can cause
6618     * prog->ParameterValues to get reallocated (e.g., anything that adds a
6619     * program constant) has to happen before creating this linkage.
6620     */
6621    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
6622    if (!shader_program->LinkStatus) {
6623       free_glsl_to_tgsi_visitor(v);
6624       return NULL;
6625    }
6626
6627    struct st_vertex_program *stvp;
6628    struct st_fragment_program *stfp;
6629    struct st_geometry_program *stgp;
6630    struct st_tessctrl_program *sttcp;
6631    struct st_tesseval_program *sttep;
6632    struct st_compute_program *stcp;
6633
6634    switch (shader->Type) {
6635    case GL_VERTEX_SHADER:
6636       stvp = (struct st_vertex_program *)prog;
6637       stvp->glsl_to_tgsi = v;
6638       break;
6639    case GL_FRAGMENT_SHADER:
6640       stfp = (struct st_fragment_program *)prog;
6641       stfp->glsl_to_tgsi = v;
6642       break;
6643    case GL_GEOMETRY_SHADER:
6644       stgp = (struct st_geometry_program *)prog;
6645       stgp->glsl_to_tgsi = v;
6646       break;
6647    case GL_TESS_CONTROL_SHADER:
6648       sttcp = (struct st_tessctrl_program *)prog;
6649       sttcp->glsl_to_tgsi = v;
6650       break;
6651    case GL_TESS_EVALUATION_SHADER:
6652       sttep = (struct st_tesseval_program *)prog;
6653       sttep->glsl_to_tgsi = v;
6654       break;
6655    case GL_COMPUTE_SHADER:
6656       stcp = (struct st_compute_program *)prog;
6657       stcp->glsl_to_tgsi = v;
6658       break;
6659    default:
6660       assert(!"should not be reached");
6661       return NULL;
6662    }
6663
6664    return prog;
6665 }
6666
6667 extern "C" {
6668
6669 static void
6670 st_dump_program_for_shader_db(struct gl_context *ctx,
6671                               struct gl_shader_program *prog)
6672 {
6673    /* Dump only successfully compiled and linked shaders to the specified
6674     * file. This is for shader-db.
6675     *
6676     * These options allow some pre-processing of shaders while dumping,
6677     * because some apps have ill-formed shaders.
6678     */
6679    const char *dump_filename = os_get_option("ST_DUMP_SHADERS");
6680    const char *insert_directives = os_get_option("ST_DUMP_INSERT");
6681
6682    if (dump_filename && prog->Name != 0) {
6683       FILE *f = fopen(dump_filename, "a");
6684
6685       if (f) {
6686          for (unsigned i = 0; i < prog->NumShaders; i++) {
6687             const struct gl_shader *sh = prog->Shaders[i];
6688             const char *source;
6689             bool skip_version = false;
6690
6691             if (!sh)
6692                continue;
6693
6694             source = sh->Source;
6695
6696             /* This string mustn't be changed. shader-db uses it to find
6697              * where the shader begins.
6698              */
6699             fprintf(f, "GLSL %s shader %d source for linked program %d:\n",
6700                     _mesa_shader_stage_to_string(sh->Stage),
6701                     i, prog->Name);
6702
6703             /* Dump the forced version if set. */
6704             if (ctx->Const.ForceGLSLVersion) {
6705                fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion);
6706                skip_version = true;
6707             }
6708
6709             /* Insert directives (optional). */
6710             if (insert_directives) {
6711                if (!ctx->Const.ForceGLSLVersion && prog->Version)
6712                   fprintf(f, "#version %i\n", prog->Version);
6713                fprintf(f, "%s\n", insert_directives);
6714                skip_version = true;
6715             }
6716
6717             if (skip_version && strncmp(source, "#version ", 9) == 0) {
6718                const char *next_line = strstr(source, "\n");
6719
6720                if (next_line)
6721                   source = next_line + 1;
6722                else
6723                   continue;
6724             }
6725
6726             fprintf(f, "%s", source);
6727             fprintf(f, "\n");
6728          }
6729          fclose(f);
6730       }
6731    }
6732 }
6733
6734 /**
6735  * Link a shader.
6736  * Called via ctx->Driver.LinkShader()
6737  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
6738  * with code lowering and other optimizations.
6739  */
6740 GLboolean
6741 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
6742 {
6743    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6744    assert(prog->LinkStatus);
6745
6746    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6747       if (prog->_LinkedShaders[i] == NULL)
6748          continue;
6749
6750       bool progress;
6751       exec_list *ir = prog->_LinkedShaders[i]->ir;
6752       gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
6753       const struct gl_shader_compiler_options *options =
6754             &ctx->Const.ShaderCompilerOptions[stage];
6755       unsigned ptarget = st_shader_stage_to_ptarget(stage);
6756       bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
6757                                                    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
6758       bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
6759                                                    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
6760
6761       /* If there are forms of indirect addressing that the driver
6762        * cannot handle, perform the lowering pass.
6763        */
6764       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
6765           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
6766          lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
6767                                              options->EmitNoIndirectInput,
6768                                              options->EmitNoIndirectOutput,
6769                                              options->EmitNoIndirectTemp,
6770                                              options->EmitNoIndirectUniform);
6771       }
6772
6773       if (ctx->Extensions.ARB_shading_language_packing) {
6774          unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
6775                                LOWER_UNPACK_SNORM_2x16 |
6776                                LOWER_PACK_UNORM_2x16 |
6777                                LOWER_UNPACK_UNORM_2x16 |
6778                                LOWER_PACK_SNORM_4x8 |
6779                                LOWER_UNPACK_SNORM_4x8 |
6780                                LOWER_UNPACK_UNORM_4x8 |
6781                                LOWER_PACK_UNORM_4x8;
6782
6783          if (ctx->Extensions.ARB_gpu_shader5)
6784             lower_inst |= LOWER_PACK_USE_BFI |
6785                           LOWER_PACK_USE_BFE;
6786          if (!ctx->st->has_half_float_packing)
6787             lower_inst |= LOWER_PACK_HALF_2x16 |
6788                           LOWER_UNPACK_HALF_2x16;
6789
6790          lower_packing_builtins(ir, lower_inst);
6791       }
6792
6793       if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
6794          lower_offset_arrays(ir);
6795       do_mat_op_to_vec(ir);
6796       lower_instructions(ir,
6797                          MOD_TO_FLOOR |
6798                          DIV_TO_MUL_RCP |
6799                          EXP_TO_EXP2 |
6800                          LOG_TO_LOG2 |
6801                          LDEXP_TO_ARITH |
6802                          (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
6803                          CARRY_TO_ARITH |
6804                          BORROW_TO_ARITH |
6805                          (have_dround ? 0 : DOPS_TO_DFRAC) |
6806                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
6807                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
6808                          (options->EmitNoSat ? SAT_TO_CLAMP : 0));
6809
6810       do_vec_index_to_cond_assign(ir);
6811       lower_vector_insert(ir, true);
6812       lower_quadop_vector(ir, false);
6813       lower_noise(ir);
6814       if (options->MaxIfDepth == 0) {
6815          lower_discard(ir);
6816       }
6817
6818       do {
6819          progress = false;
6820
6821          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
6822
6823          progress = do_common_optimization(ir, true, true, options,
6824                                            ctx->Const.NativeIntegers)
6825            || progress;
6826
6827          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
6828
6829       } while (progress);
6830
6831       validate_ir_tree(ir);
6832    }
6833
6834    build_program_resource_list(ctx, prog);
6835
6836    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6837       struct gl_program *linked_prog;
6838
6839       if (prog->_LinkedShaders[i] == NULL)
6840          continue;
6841
6842       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
6843
6844       if (linked_prog) {
6845          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6846                                  linked_prog);
6847          if (!ctx->Driver.ProgramStringNotify(ctx,
6848                                               _mesa_shader_stage_to_program(i),
6849                                               linked_prog)) {
6850             _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6851                                     NULL);
6852             _mesa_reference_program(ctx, &linked_prog, NULL);
6853             return GL_FALSE;
6854          }
6855       }
6856
6857       _mesa_reference_program(ctx, &linked_prog, NULL);
6858    }
6859
6860    st_dump_program_for_shader_db(ctx, prog);
6861    return GL_TRUE;
6862 }
6863
6864 void
6865 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
6866                                 const GLuint outputMapping[],
6867                                 struct pipe_stream_output_info *so)
6868 {
6869    unsigned i;
6870    struct gl_transform_feedback_info *info =
6871       &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
6872
6873    for (i = 0; i < info->NumOutputs; i++) {
6874       so->output[i].register_index =
6875          outputMapping[info->Outputs[i].OutputRegister];
6876       so->output[i].start_component = info->Outputs[i].ComponentOffset;
6877       so->output[i].num_components = info->Outputs[i].NumComponents;
6878       so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
6879       so->output[i].dst_offset = info->Outputs[i].DstOffset;
6880       so->output[i].stream = info->Outputs[i].StreamId;
6881    }
6882
6883    for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
6884       so->stride[i] = info->Buffers[i].Stride;
6885    }
6886    so->num_outputs = info->NumOutputs;
6887 }
6888
6889 } /* extern "C" */