src/mesa/state_tracker/st_glsl_to_tgsi.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  * Copyright © 2011 Bryan Cain
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24  * DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 /**
  28  * \file glsl_to_tgsi.cpp
  29  *
  30  * Translate GLSL IR to TGSI.
  31  */
  32
  33 #include "st_glsl_to_tgsi.h"
  34
  35 #include "compiler/glsl/glsl_parser_extras.h"
  36 #include "compiler/glsl/ir_optimization.h"
  37 #include "compiler/glsl/program.h"
  38
  39 #include "main/errors.h"
  40 #include "main/shaderobj.h"
  41 #include "main/uniforms.h"
  42 #include "main/shaderapi.h"
  43 #include "main/shaderimage.h"
  44 #include "program/prog_instruction.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_screen.h"
  48 #include "tgsi/tgsi_ureg.h"
  49 #include "tgsi/tgsi_info.h"
  50 #include "util/u_math.h"
  51 #include "util/u_memory.h"
  52 #include "st_program.h"
  53 #include "st_mesa_to_tgsi.h"
  54 #include "st_format.h"
  55
  56
  57 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
  58                            (1 << PROGRAM_CONSTANT) |     \
  59                            (1 << PROGRAM_UNIFORM))
  60
  61 #define MAX_GLSL_TEXTURE_OFFSET 4
  62
  63 class st_src_reg;
  64 class st_dst_reg;
  65
  66 static int swizzle_for_size(int size);
  67
  68 /**
  69  * This struct is a corresponding struct to TGSI ureg_src.
  70  */
  71 class st_src_reg {
  72 public:
  73    st_src_reg(gl_register_file file, int index, const glsl_type *type)
  74    {
  75       this->file = file;
  76       this->index = index;
  77       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  78          this->swizzle = swizzle_for_size(type->vector_elements);
  79       else
  80          this->swizzle = SWIZZLE_XYZW;
  81       this->negate = 0;
  82       this->index2D = 0;
  83       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
  84       this->reladdr = NULL;
  85       this->reladdr2 = NULL;
  86       this->has_index2 = false;
  87       this->double_reg2 = false;
  88       this->array_id = 0;
  89       this->is_double_vertex_input = false;
  90    }
  91
  92    st_src_reg(gl_register_file file, int index, int type)
  93    {
  94       this->type = type;
  95       this->file = file;
  96       this->index = index;
  97       this->index2D = 0;
  98       this->swizzle = SWIZZLE_XYZW;
  99       this->negate = 0;
 100       this->reladdr = NULL;
 101       this->reladdr2 = NULL;
 102       this->has_index2 = false;
 103       this->double_reg2 = false;
 104       this->array_id = 0;
 105       this->is_double_vertex_input = false;
 106    }
 107
 108    st_src_reg(gl_register_file file, int index, int type, int index2D)
 109    {
 110       this->type = type;
 111       this->file = file;
 112       this->index = index;
 113       this->index2D = index2D;
 114       this->swizzle = SWIZZLE_XYZW;
 115       this->negate = 0;
 116       this->reladdr = NULL;
 117       this->reladdr2 = NULL;
 118       this->has_index2 = false;
 119       this->double_reg2 = false;
 120       this->array_id = 0;
 121       this->is_double_vertex_input = false;
 122    }
 123
 124    st_src_reg()
 125    {
 126       this->type = GLSL_TYPE_ERROR;
 127       this->file = PROGRAM_UNDEFINED;
 128       this->index = 0;
 129       this->index2D = 0;
 130       this->swizzle = 0;
 131       this->negate = 0;
 132       this->reladdr = NULL;
 133       this->reladdr2 = NULL;
 134       this->has_index2 = false;
 135       this->double_reg2 = false;
 136       this->array_id = 0;
 137       this->is_double_vertex_input = false;
 138    }
 139
 140    explicit st_src_reg(st_dst_reg reg);
 141
 142    gl_register_file file; /**< PROGRAM_* from Mesa */
 143    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 144    int index2D;
 145    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
 146    int negate; /**< NEGATE_XYZW mask from mesa */
 147    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 148    /** Register index should be offset by the integer in this reg. */
 149    st_src_reg *reladdr;
 150    st_src_reg *reladdr2;
 151    bool has_index2;
 152    /*
 153     * Is this the second half of a double register pair?
 154     * currently used for input mapping only.
 155     */
 156    bool double_reg2;
 157    unsigned array_id;
 158    bool is_double_vertex_input;
 159 };
 160
 161 class st_dst_reg {
 162 public:
 163    st_dst_reg(gl_register_file file, int writemask, int type, int index)
 164    {
 165       this->file = file;
 166       this->index = index;
 167       this->index2D = 0;
 168       this->writemask = writemask;
 169       this->reladdr = NULL;
 170       this->reladdr2 = NULL;
 171       this->has_index2 = false;
 172       this->type = type;
 173       this->array_id = 0;
 174    }
 175
 176    st_dst_reg(gl_register_file file, int writemask, int type)
 177    {
 178       this->file = file;
 179       this->index = 0;
 180       this->index2D = 0;
 181       this->writemask = writemask;
 182       this->reladdr = NULL;
 183       this->reladdr2 = NULL;
 184       this->has_index2 = false;
 185       this->type = type;
 186       this->array_id = 0;
 187    }
 188
 189    st_dst_reg()
 190    {
 191       this->type = GLSL_TYPE_ERROR;
 192       this->file = PROGRAM_UNDEFINED;
 193       this->index = 0;
 194       this->index2D = 0;
 195       this->writemask = 0;
 196       this->reladdr = NULL;
 197       this->reladdr2 = NULL;
 198       this->has_index2 = false;
 199       this->array_id = 0;
 200    }
 201
 202    explicit st_dst_reg(st_src_reg reg);
 203
 204    gl_register_file file; /**< PROGRAM_* from Mesa */
 205    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 206    int index2D;
 207    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 208    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 209    /** Register index should be offset by the integer in this reg. */
 210    st_src_reg *reladdr;
 211    st_src_reg *reladdr2;
 212    bool has_index2;
 213    unsigned array_id;
 214 };
 215
 216 st_src_reg::st_src_reg(st_dst_reg reg)
 217 {
 218    this->type = reg.type;
 219    this->file = reg.file;
 220    this->index = reg.index;
 221    this->swizzle = SWIZZLE_XYZW;
 222    this->negate = 0;
 223    this->reladdr = reg.reladdr;
 224    this->index2D = reg.index2D;
 225    this->reladdr2 = reg.reladdr2;
 226    this->has_index2 = reg.has_index2;
 227    this->double_reg2 = false;
 228    this->array_id = reg.array_id;
 229    this->is_double_vertex_input = false;
 230 }
 231
 232 st_dst_reg::st_dst_reg(st_src_reg reg)
 233 {
 234    this->type = reg.type;
 235    this->file = reg.file;
 236    this->index = reg.index;
 237    this->writemask = WRITEMASK_XYZW;
 238    this->reladdr = reg.reladdr;
 239    this->index2D = reg.index2D;
 240    this->reladdr2 = reg.reladdr2;
 241    this->has_index2 = reg.has_index2;
 242    this->array_id = reg.array_id;
 243 }
 244
 245 class glsl_to_tgsi_instruction : public exec_node {
 246 public:
 247    DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
 248
 249    unsigned op;
 250    st_dst_reg dst[2];
 251    st_src_reg src[4];
 252    /** Pointer to the ir source this tree came from for debugging */
 253    ir_instruction *ir;
 254    GLboolean cond_update;
 255    bool saturate;
 256    st_src_reg sampler; /**< sampler register */
 257    int sampler_base;
 258    int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
 259    int tex_target; /**< One of TEXTURE_*_INDEX */
 260    glsl_base_type tex_type;
 261    GLboolean tex_shadow;
 262    unsigned image_format;
 263
 264    st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
 265    unsigned tex_offset_num_offset;
 266    int dead_mask; /**< Used in dead code elimination */
 267
 268    st_src_reg buffer; /**< buffer register */
 269    unsigned buffer_access; /**< buffer access type */
 270
 271    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 272    const struct tgsi_opcode_info *info;
 273 };
 274
 275 class variable_storage : public exec_node {
 276 public:
 277    variable_storage(ir_variable *var, gl_register_file file, int index,
 278                     unsigned array_id = 0)
 279       : file(file), index(index), var(var), array_id(array_id)
 280    {
 281       /* empty */
 282    }
 283
 284    gl_register_file file;
 285    int index;
 286    ir_variable *var; /* variable that maps to this, if any */
 287    unsigned array_id;
 288 };
 289
 290 class immediate_storage : public exec_node {
 291 public:
 292    immediate_storage(gl_constant_value *values, int size32, int type)
 293    {
 294       memcpy(this->values, values, size32 * sizeof(gl_constant_value));
 295       this->size32 = size32;
 296       this->type = type;
 297    }
 298
 299    /* doubles are stored across 2 gl_constant_values */
 300    gl_constant_value values[4];
 301    int size32; /**< Number of 32-bit components (1-4) */
 302    int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
 303 };
 304
 305 class function_entry : public exec_node {
 306 public:
 307    ir_function_signature *sig;
 308
 309    /**
 310     * identifier of this function signature used by the program.
 311     *
 312     * At the point that TGSI instructions for function calls are
 313     * generated, we don't know the address of the first instruction of
 314     * the function body.  So we make the BranchTarget that is called a
 315     * small integer and rewrite them during set_branchtargets().
 316     */
 317    int sig_id;
 318
 319    /**
 320     * Pointer to first instruction of the function body.
 321     *
 322     * Set during function body emits after main() is processed.
 323     */
 324    glsl_to_tgsi_instruction *bgn_inst;
 325
 326    /**
 327     * Index of the first instruction of the function body in actual TGSI.
 328     *
 329     * Set after conversion from glsl_to_tgsi_instruction to TGSI.
 330     */
 331    int inst;
 332
 333    /** Storage for the return value. */
 334    st_src_reg return_reg;
 335 };
 336
 337 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 338 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 339
 340 struct array_decl {
 341    unsigned mesa_index;
 342    unsigned array_id;
 343    unsigned array_size;
 344    unsigned array_type;
 345 };
 346
 347 static unsigned
 348 find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id)
 349 {
 350    unsigned i;
 351
 352    for (i = 0; i < count; i++) {
 353       struct array_decl *decl = &arrays[i];
 354
 355       if (array_id == decl->array_id) {
 356          return decl->array_type;
 357       }
 358    }
 359    return GLSL_TYPE_ERROR;
 360 }
 361
 362 struct rename_reg_pair {
 363    int old_reg;
 364    int new_reg;
 365 };
 366
 367 struct glsl_to_tgsi_visitor : public ir_visitor {
 368 public:
 369    glsl_to_tgsi_visitor();
 370    ~glsl_to_tgsi_visitor();
 371
 372    function_entry *current_function;
 373
 374    struct gl_context *ctx;
 375    struct gl_program *prog;
 376    struct gl_shader_program *shader_program;
 377    struct gl_shader *shader;
 378    struct gl_shader_compiler_options *options;
 379
 380    int next_temp;
 381
 382    unsigned *array_sizes;
 383    unsigned max_num_arrays;
 384    unsigned next_array;
 385
 386    struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS];
 387    unsigned num_input_arrays;
 388    struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS];
 389    unsigned num_output_arrays;
 390
 391    int num_address_regs;
 392    int samplers_used;
 393    glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
 394    int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
 395    int buffers_used;
 396    int images_used;
 397    int image_targets[PIPE_MAX_SHADER_IMAGES];
 398    unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
 399    bool indirect_addr_consts;
 400    int wpos_transform_const;
 401
 402    int glsl_version;
 403    bool native_integers;
 404    bool have_sqrt;
 405    bool have_fma;
 406    bool use_shared_memory;
 407
 408    variable_storage *find_variable_storage(ir_variable *var);
 409
 410    int add_constant(gl_register_file file, gl_constant_value values[8],
 411                     int size, int datatype, GLuint *swizzle_out);
 412
 413    function_entry *get_function_signature(ir_function_signature *sig);
 414
 415    st_src_reg get_temp(const glsl_type *type);
 416    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 417
 418    st_src_reg st_src_reg_for_double(double val);
 419    st_src_reg st_src_reg_for_float(float val);
 420    st_src_reg st_src_reg_for_int(int val);
 421    st_src_reg st_src_reg_for_type(int type, int val);
 422
 423    /**
 424     * \name Visit methods
 425     *
 426     * As typical for the visitor pattern, there must be one \c visit method for
 427     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 428     * the hierarchy should not have \c visit methods.
 429     */
 430    /*@{*/
 431    virtual void visit(ir_variable *);
 432    virtual void visit(ir_loop *);
 433    virtual void visit(ir_loop_jump *);
 434    virtual void visit(ir_function_signature *);
 435    virtual void visit(ir_function *);
 436    virtual void visit(ir_expression *);
 437    virtual void visit(ir_swizzle *);
 438    virtual void visit(ir_dereference_variable  *);
 439    virtual void visit(ir_dereference_array *);
 440    virtual void visit(ir_dereference_record *);
 441    virtual void visit(ir_assignment *);
 442    virtual void visit(ir_constant *);
 443    virtual void visit(ir_call *);
 444    virtual void visit(ir_return *);
 445    virtual void visit(ir_discard *);
 446    virtual void visit(ir_texture *);
 447    virtual void visit(ir_if *);
 448    virtual void visit(ir_emit_vertex *);
 449    virtual void visit(ir_end_primitive *);
 450    virtual void visit(ir_barrier *);
 451    /*@}*/
 452
 453    void visit_atomic_counter_intrinsic(ir_call *);
 454    void visit_ssbo_intrinsic(ir_call *);
 455    void visit_membar_intrinsic(ir_call *);
 456    void visit_shared_intrinsic(ir_call *);
 457    void visit_image_intrinsic(ir_call *);
 458
 459    st_src_reg result;
 460
 461    /** List of variable_storage */
 462    exec_list variables;
 463
 464    /** List of immediate_storage */
 465    exec_list immediates;
 466    unsigned num_immediates;
 467
 468    /** List of function_entry */
 469    exec_list function_signatures;
 470    int next_signature_id;
 471
 472    /** List of glsl_to_tgsi_instruction */
 473    exec_list instructions;
 474
 475    glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
 476                                       st_dst_reg dst = undef_dst,
 477                                       st_src_reg src0 = undef_src,
 478                                       st_src_reg src1 = undef_src,
 479                                       st_src_reg src2 = undef_src,
 480                                       st_src_reg src3 = undef_src);
 481
 482    glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
 483                                       st_dst_reg dst, st_dst_reg dst1,
 484                                       st_src_reg src0 = undef_src,
 485                                       st_src_reg src1 = undef_src,
 486                                       st_src_reg src2 = undef_src,
 487                                       st_src_reg src3 = undef_src);
 488
 489    unsigned get_opcode(ir_instruction *ir, unsigned op,
 490                     st_dst_reg dst,
 491                     st_src_reg src0, st_src_reg src1);
 492
 493    /**
 494     * Emit the correct dot-product instruction for the type of arguments
 495     */
 496    glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
 497                                      st_dst_reg dst,
 498                                      st_src_reg src0,
 499                                      st_src_reg src1,
 500                                      unsigned elements);
 501
 502    void emit_scalar(ir_instruction *ir, unsigned op,
 503                     st_dst_reg dst, st_src_reg src0);
 504
 505    void emit_scalar(ir_instruction *ir, unsigned op,
 506                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 507
 508    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 509
 510    void get_deref_offsets(ir_dereference *ir,
 511                           unsigned *array_size,
 512                           unsigned *base,
 513                           unsigned *index,
 514                           st_src_reg *reladdr);
 515   void calc_deref_offsets(ir_dereference *head,
 516                           ir_dereference *tail,
 517                           unsigned *array_elements,
 518                           unsigned *base,
 519                           unsigned *index,
 520                           st_src_reg *indirect,
 521                           unsigned *location);
 522
 523    bool try_emit_mad(ir_expression *ir,
 524               int mul_operand);
 525    bool try_emit_mad_for_and_not(ir_expression *ir,
 526               int mul_operand);
 527
 528    void emit_swz(ir_expression *ir);
 529
 530    bool process_move_condition(ir_rvalue *ir);
 531
 532    void simplify_cmp(void);
 533
 534    void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
 535    void get_first_temp_read(int *first_reads);
 536    void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
 537    void get_last_temp_write(int *last_writes);
 538
 539    void copy_propagate(void);
 540    int eliminate_dead_code(void);
 541
 542    void merge_two_dsts(void);
 543    void merge_registers(void);
 544    void renumber_registers(void);
 545
 546    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
 547                        st_dst_reg *l, st_src_reg *r,
 548                        st_src_reg *cond, bool cond_swap);
 549
 550    void *mem_ctx;
 551 };
 552
 553 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
 554 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
 555 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
 556
 557 static void
 558 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
 559
 560 static void
 561 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
 562 {
 563    va_list args;
 564    va_start(args, fmt);
 565    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
 566    va_end(args);
 567
 568    prog->LinkStatus = GL_FALSE;
 569 }
 570
 571 static int
 572 swizzle_for_size(int size)
 573 {
 574    static const int size_swizzles[4] = {
 575       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 576       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 577       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 578       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 579    };
 580
 581    assert((size >= 1) && (size <= 4));
 582    return size_swizzles[size - 1];
 583 }
 584
 585 static bool
 586 is_resource_instruction(unsigned opcode)
 587 {
 588    switch (opcode) {
 589    case TGSI_OPCODE_RESQ:
 590    case TGSI_OPCODE_LOAD:
 591    case TGSI_OPCODE_ATOMUADD:
 592    case TGSI_OPCODE_ATOMXCHG:
 593    case TGSI_OPCODE_ATOMCAS:
 594    case TGSI_OPCODE_ATOMAND:
 595    case TGSI_OPCODE_ATOMOR:
 596    case TGSI_OPCODE_ATOMXOR:
 597    case TGSI_OPCODE_ATOMUMIN:
 598    case TGSI_OPCODE_ATOMUMAX:
 599    case TGSI_OPCODE_ATOMIMIN:
 600    case TGSI_OPCODE_ATOMIMAX:
 601       return true;
 602    default:
 603       return false;
 604    }
 605 }
 606
 607 static unsigned
 608 num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
 609 {
 610    return op->info->num_dst;
 611 }
 612
 613 static unsigned
 614 num_inst_src_regs(const glsl_to_tgsi_instruction *op)
 615 {
 616    return op->info->is_tex || is_resource_instruction(op->op) ?
 617       op->info->num_src - 1 : op->info->num_src;
 618 }
 619
 620 glsl_to_tgsi_instruction *
 621 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
 622                                st_dst_reg dst, st_dst_reg dst1,
 623                                st_src_reg src0, st_src_reg src1,
 624                                st_src_reg src2, st_src_reg src3)
 625 {
 626    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
 627    int num_reladdr = 0, i, j;
 628    bool dst_is_double[2];
 629
 630    op = get_opcode(ir, op, dst, src0, src1);
 631
 632    /* If we have to do relative addressing, we want to load the ARL
 633     * reg directly for one of the regs, and preload the other reladdr
 634     * sources into temps.
 635     */
 636    num_reladdr += dst.reladdr != NULL || dst.reladdr2;
 637    num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
 638    num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
 639    num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
 640    num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
 641    num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
 642
 643    reladdr_to_temp(ir, &src3, &num_reladdr);
 644    reladdr_to_temp(ir, &src2, &num_reladdr);
 645    reladdr_to_temp(ir, &src1, &num_reladdr);
 646    reladdr_to_temp(ir, &src0, &num_reladdr);
 647
 648    if (dst.reladdr || dst.reladdr2) {
 649       if (dst.reladdr)
 650          emit_arl(ir, address_reg, *dst.reladdr);
 651       if (dst.reladdr2)
 652          emit_arl(ir, address_reg2, *dst.reladdr2);
 653       num_reladdr--;
 654    }
 655    if (dst1.reladdr) {
 656       emit_arl(ir, address_reg, *dst1.reladdr);
 657       num_reladdr--;
 658    }
 659    assert(num_reladdr == 0);
 660
 661    inst->op = op;
 662    inst->info = tgsi_get_opcode_info(op);
 663    inst->dst[0] = dst;
 664    inst->dst[1] = dst1;
 665    inst->src[0] = src0;
 666    inst->src[1] = src1;
 667    inst->src[2] = src2;
 668    inst->src[3] = src3;
 669    inst->ir = ir;
 670    inst->dead_mask = 0;
 671    /* default to float, for paths where this is not initialized
 672     * (since 0==UINT which is likely wrong):
 673     */
 674    inst->tex_type = GLSL_TYPE_FLOAT;
 675
 676    inst->function = NULL;
 677
 678    /* Update indirect addressing status used by TGSI */
 679    if (dst.reladdr || dst.reladdr2) {
 680       switch(dst.file) {
 681       case PROGRAM_STATE_VAR:
 682       case PROGRAM_CONSTANT:
 683       case PROGRAM_UNIFORM:
 684          this->indirect_addr_consts = true;
 685          break;
 686       case PROGRAM_IMMEDIATE:
 687          assert(!"immediates should not have indirect addressing");
 688          break;
 689       default:
 690          break;
 691       }
 692    }
 693    else {
 694       for (i = 0; i < 4; i++) {
 695          if(inst->src[i].reladdr) {
 696             switch(inst->src[i].file) {
 697             case PROGRAM_STATE_VAR:
 698             case PROGRAM_CONSTANT:
 699             case PROGRAM_UNIFORM:
 700                this->indirect_addr_consts = true;
 701                break;
 702             case PROGRAM_IMMEDIATE:
 703                assert(!"immediates should not have indirect addressing");
 704                break;
 705             default:
 706                break;
 707             }
 708          }
 709       }
 710    }
 711
 712    /*
 713     * This section contains the double processing.
 714     * GLSL just represents doubles as single channel values,
 715     * however most HW and TGSI represent doubles as pairs of register channels.
 716     *
 717     * so we have to fixup destination writemask/index and src swizzle/indexes.
 718     * dest writemasks need to translate from single channel write mask
 719     * to a dual-channel writemask, but also need to modify the index,
 720     * if we are touching the Z,W fields in the pre-translated writemask.
 721     *
 722     * src channels have similiar index modifications along with swizzle
 723     * changes to we pick the XY, ZW pairs from the correct index.
 724     *
 725     * GLSL [0].x -> TGSI [0].xy
 726     * GLSL [0].y -> TGSI [0].zw
 727     * GLSL [0].z -> TGSI [1].xy
 728     * GLSL [0].w -> TGSI [1].zw
 729     */
 730    for (j = 0; j < 2; j++) {
 731       dst_is_double[j] = false;
 732       if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
 733          dst_is_double[j] = true;
 734       else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
 735          unsigned type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id);
 736          if (type == GLSL_TYPE_DOUBLE)
 737             dst_is_double[j] = true;
 738       }
 739    }
 740
 741    if (dst_is_double[0] || dst_is_double[1] ||
 742        inst->src[0].type == GLSL_TYPE_DOUBLE) {
 743       glsl_to_tgsi_instruction *dinst = NULL;
 744       int initial_src_swz[4], initial_src_idx[4];
 745       int initial_dst_idx[2], initial_dst_writemask[2];
 746       /* select the writemask for dst0 or dst1 */
 747       unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
 748
 749       /* copy out the writemask, index and swizzles for all src/dsts. */
 750       for (j = 0; j < 2; j++) {
 751          initial_dst_writemask[j] = inst->dst[j].writemask;
 752          initial_dst_idx[j] = inst->dst[j].index;
 753       }
 754
 755       for (j = 0; j < 4; j++) {
 756          initial_src_swz[j] = inst->src[j].swizzle;
 757          initial_src_idx[j] = inst->src[j].index;
 758       }
 759
 760       /*
 761        * scan all the components in the dst writemask
 762        * generate an instruction for each of them if required.
 763        */
 764       st_src_reg addr;
 765       while (writemask) {
 766
 767          int i = u_bit_scan(&writemask);
 768
 769          /* before emitting the instruction, see if we have to adjust store
 770           * address */
 771          if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
 772              addr.file == PROGRAM_UNDEFINED) {
 773             /* We have to advance the buffer address by 16 */
 774             addr = get_temp(glsl_type::uint_type);
 775             emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
 776                      inst->src[0], st_src_reg_for_int(16));
 777          }
 778
 779
 780          /* first time use previous instruction */
 781          if (dinst == NULL) {
 782             dinst = inst;
 783          } else {
 784             /* create a new instructions for subsequent attempts */
 785             dinst = new(mem_ctx) glsl_to_tgsi_instruction();
 786             *dinst = *inst;
 787             dinst->next = NULL;
 788             dinst->prev = NULL;
 789          }
 790          this->instructions.push_tail(dinst);
 791
 792          /* modify the destination if we are splitting */
 793          for (j = 0; j < 2; j++) {
 794             if (dst_is_double[j]) {
 795                dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
 796                dinst->dst[j].index = initial_dst_idx[j];
 797                if (i > 1) {
 798                   if (dinst->op == TGSI_OPCODE_STORE) {
 799                      dinst->src[0] = addr;
 800                   } else {
 801                      dinst->dst[j].index++;
 802                   }
 803                }
 804             } else {
 805                /* if we aren't writing to a double, just get the bit of the initial writemask
 806                   for this channel */
 807                dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
 808             }
 809          }
 810
 811          /* modify the src registers */
 812          for (j = 0; j < 4; j++) {
 813             int swz = GET_SWZ(initial_src_swz[j], i);
 814
 815             if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
 816                dinst->src[j].index = initial_src_idx[j];
 817                if (swz > 1) {
 818                   dinst->src[j].double_reg2 = true;
 819                   dinst->src[j].index++;
 820                }
 821
 822                if (swz & 1)
 823                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
 824                else
 825                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
 826
 827             } else {
 828                /* some opcodes are special case in what they use as sources
 829                   - F2D is a float src0, DLDEXP is integer src1 */
 830                if (op == TGSI_OPCODE_F2D ||
 831                    op == TGSI_OPCODE_DLDEXP ||
 832                    (op == TGSI_OPCODE_UCMP && dst_is_double[0])) {
 833                   dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
 834                }
 835             }
 836          }
 837       }
 838       inst = dinst;
 839    } else {
 840       this->instructions.push_tail(inst);
 841    }
 842
 843
 844    return inst;
 845 }
 846
 847 glsl_to_tgsi_instruction *
 848 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
 849                                st_dst_reg dst,
 850                                st_src_reg src0, st_src_reg src1,
 851                                st_src_reg src2, st_src_reg src3)
 852 {
 853    return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
 854 }
 855
 856 /**
 857  * Determines whether to use an integer, unsigned integer, or float opcode
 858  * based on the operands and input opcode, then emits the result.
 859  */
 860 unsigned
 861 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
 862                                  st_dst_reg dst,
 863                                  st_src_reg src0, st_src_reg src1)
 864 {
 865    int type = GLSL_TYPE_FLOAT;
 866
 867    if (op == TGSI_OPCODE_MOV)
 868        return op;
 869
 870    assert(src0.type != GLSL_TYPE_ARRAY);
 871    assert(src0.type != GLSL_TYPE_STRUCT);
 872    assert(src1.type != GLSL_TYPE_ARRAY);
 873    assert(src1.type != GLSL_TYPE_STRUCT);
 874
 875    if (is_resource_instruction(op))
 876       type = src1.type;
 877    else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
 878       type = GLSL_TYPE_DOUBLE;
 879    else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
 880       type = GLSL_TYPE_FLOAT;
 881    else if (native_integers)
 882       type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 883
 884 #define case5(c, f, i, u, d)                    \
 885    case TGSI_OPCODE_##c: \
 886       if (type == GLSL_TYPE_DOUBLE)           \
 887          op = TGSI_OPCODE_##d; \
 888       else if (type == GLSL_TYPE_INT)       \
 889          op = TGSI_OPCODE_##i; \
 890       else if (type == GLSL_TYPE_UINT) \
 891          op = TGSI_OPCODE_##u; \
 892       else \
 893          op = TGSI_OPCODE_##f; \
 894       break;
 895
 896 #define case4(c, f, i, u)                    \
 897    case TGSI_OPCODE_##c: \
 898       if (type == GLSL_TYPE_INT) \
 899          op = TGSI_OPCODE_##i; \
 900       else if (type == GLSL_TYPE_UINT) \
 901          op = TGSI_OPCODE_##u; \
 902       else \
 903          op = TGSI_OPCODE_##f; \
 904       break;
 905
 906 #define case3(f, i, u)  case4(f, f, i, u)
 907 #define case4d(f, i, u, d)  case5(f, f, i, u, d)
 908 #define case3fid(f, i, d) case5(f, f, i, i, d)
 909 #define case2fi(f, i)   case4(f, f, i, i)
 910 #define case2iu(i, u)   case4(i, LAST, i, u)
 911
 912 #define casecomp(c, f, i, u, d)                   \
 913    case TGSI_OPCODE_##c: \
 914       if (type == GLSL_TYPE_DOUBLE) \
 915          op = TGSI_OPCODE_##d; \
 916       else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)       \
 917          op = TGSI_OPCODE_##i; \
 918       else if (type == GLSL_TYPE_UINT) \
 919          op = TGSI_OPCODE_##u; \
 920       else if (native_integers) \
 921          op = TGSI_OPCODE_##f; \
 922       else \
 923          op = TGSI_OPCODE_##c; \
 924       break;
 925
 926    switch(op) {
 927       case3fid(ADD, UADD, DADD);
 928       case3fid(MUL, UMUL, DMUL);
 929       case3fid(MAD, UMAD, DMAD);
 930       case3fid(FMA, UMAD, DFMA);
 931       case3(DIV, IDIV, UDIV);
 932       case4d(MAX, IMAX, UMAX, DMAX);
 933       case4d(MIN, IMIN, UMIN, DMIN);
 934       case2iu(MOD, UMOD);
 935
 936       casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
 937       casecomp(SNE, FSNE, USNE, USNE, DSNE);
 938       casecomp(SGE, FSGE, ISGE, USGE, DSGE);
 939       casecomp(SLT, FSLT, ISLT, USLT, DSLT);
 940
 941       case2iu(ISHR, USHR);
 942
 943       case3fid(SSG, ISSG, DSSG);
 944       case3fid(ABS, IABS, DABS);
 945
 946       case2iu(IBFE, UBFE);
 947       case2iu(IMSB, UMSB);
 948       case2iu(IMUL_HI, UMUL_HI);
 949
 950       case3fid(SQRT, SQRT, DSQRT);
 951
 952       case3fid(RCP, RCP, DRCP);
 953       case3fid(RSQ, RSQ, DRSQ);
 954
 955       case3fid(FRC, FRC, DFRAC);
 956       case3fid(TRUNC, TRUNC, DTRUNC);
 957       case3fid(CEIL, CEIL, DCEIL);
 958       case3fid(FLR, FLR, DFLR);
 959       case3fid(ROUND, ROUND, DROUND);
 960
 961       case2iu(ATOMIMAX, ATOMUMAX);
 962       case2iu(ATOMIMIN, ATOMUMIN);
 963
 964       default: break;
 965    }
 966
 967    assert(op != TGSI_OPCODE_LAST);
 968    return op;
 969 }
 970
 971 glsl_to_tgsi_instruction *
 972 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
 973                               st_dst_reg dst, st_src_reg src0, st_src_reg src1,
 974                               unsigned elements)
 975 {
 976    static const unsigned dot_opcodes[] = {
 977       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
 978    };
 979
 980    return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
 981 }
 982
 983 /**
 984  * Emits TGSI scalar opcodes to produce unique answers across channels.
 985  *
 986  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
 987  * channel determines the result across all channels.  So to do a vec4
 988  * of this operation, we want to emit a scalar per source channel used
 989  * to produce dest channels.
 990  */
 991 void
 992 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 993                                   st_dst_reg dst,
 994                                   st_src_reg orig_src0, st_src_reg orig_src1)
 995 {
 996    int i, j;
 997    int done_mask = ~dst.writemask;
 998
 999    /* TGSI RCP is a scalar operation splatting results to all channels,
1000     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
1001     * dst channels.
1002     */
1003    for (i = 0; i < 4; i++) {
1004       GLuint this_mask = (1 << i);
1005       st_src_reg src0 = orig_src0;
1006       st_src_reg src1 = orig_src1;
1007
1008       if (done_mask & this_mask)
1009          continue;
1010
1011       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
1012       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
1013       for (j = i + 1; j < 4; j++) {
1014          /* If there is another enabled component in the destination that is
1015           * derived from the same inputs, generate its value on this pass as
1016           * well.
1017           */
1018          if (!(done_mask & (1 << j)) &&
1019              GET_SWZ(src0.swizzle, j) == src0_swiz &&
1020              GET_SWZ(src1.swizzle, j) == src1_swiz) {
1021             this_mask |= (1 << j);
1022          }
1023       }
1024       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
1025                                    src0_swiz, src0_swiz);
1026       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
1027                                    src1_swiz, src1_swiz);
1028
1029       dst.writemask = this_mask;
1030       emit_asm(ir, op, dst, src0, src1);
1031       done_mask |= this_mask;
1032    }
1033 }
1034
1035 void
1036 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
1037                                   st_dst_reg dst, st_src_reg src0)
1038 {
1039    st_src_reg undef = undef_src;
1040
1041    undef.swizzle = SWIZZLE_XXXX;
1042
1043    emit_scalar(ir, op, dst, src0, undef);
1044 }
1045
1046 void
1047 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
1048                                st_dst_reg dst, st_src_reg src0)
1049 {
1050    int op = TGSI_OPCODE_ARL;
1051
1052    if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
1053       op = TGSI_OPCODE_UARL;
1054
1055    assert(dst.file == PROGRAM_ADDRESS);
1056    if (dst.index >= this->num_address_regs)
1057       this->num_address_regs = dst.index + 1;
1058
1059    emit_asm(NULL, op, dst, src0);
1060 }
1061
1062 int
1063 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
1064                                    gl_constant_value values[8], int size, int datatype,
1065                                    GLuint *swizzle_out)
1066 {
1067    if (file == PROGRAM_CONSTANT) {
1068       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
1069                                               size, datatype, swizzle_out);
1070    }
1071
1072    assert(file == PROGRAM_IMMEDIATE);
1073
1074    int index = 0;
1075    immediate_storage *entry;
1076    int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
1077    int i;
1078
1079    /* Search immediate storage to see if we already have an identical
1080     * immediate that we can use instead of adding a duplicate entry.
1081     */
1082    foreach_in_list(immediate_storage, entry, &this->immediates) {
1083       immediate_storage *tmp = entry;
1084
1085       for (i = 0; i * 4 < size32; i++) {
1086          int slot_size = MIN2(size32 - (i * 4), 4);
1087          if (tmp->type != datatype || tmp->size32 != slot_size)
1088             break;
1089          if (memcmp(tmp->values, &values[i * 4],
1090                     slot_size * sizeof(gl_constant_value)))
1091             break;
1092
1093          /* Everything matches, keep going until the full size is matched */
1094          tmp = (immediate_storage *)tmp->next;
1095       }
1096
1097       /* The full value matched */
1098       if (i * 4 >= size32)
1099          return index;
1100
1101       index++;
1102    }
1103
1104    for (i = 0; i * 4 < size32; i++) {
1105       int slot_size = MIN2(size32 - (i * 4), 4);
1106       /* Add this immediate to the list. */
1107       entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
1108       this->immediates.push_tail(entry);
1109       this->num_immediates++;
1110    }
1111    return index;
1112 }
1113
1114 st_src_reg
1115 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
1116 {
1117    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
1118    union gl_constant_value uval;
1119
1120    uval.f = val;
1121    src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
1122
1123    return src;
1124 }
1125
1126 st_src_reg
1127 glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
1128 {
1129    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
1130    union gl_constant_value uval[2];
1131
1132    uval[0].u = *(uint32_t *)&val;
1133    uval[1].u = *(((uint32_t *)&val) + 1);
1134    src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
1135
1136    return src;
1137 }
1138
1139 st_src_reg
1140 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
1141 {
1142    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
1143    union gl_constant_value uval;
1144
1145    assert(native_integers);
1146
1147    uval.i = val;
1148    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
1149
1150    return src;
1151 }
1152
1153 st_src_reg
1154 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
1155 {
1156    if (native_integers)
1157       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
1158                                        st_src_reg_for_int(val);
1159    else
1160       return st_src_reg_for_float(val);
1161 }
1162
1163 static int
1164 attrib_type_size(const struct glsl_type *type, bool is_vs_input)
1165 {
1166    unsigned int i;
1167    int size;
1168
1169    switch (type->base_type) {
1170    case GLSL_TYPE_UINT:
1171    case GLSL_TYPE_INT:
1172    case GLSL_TYPE_FLOAT:
1173    case GLSL_TYPE_BOOL:
1174       if (type->is_matrix()) {
1175          return type->matrix_columns;
1176       } else {
1177          /* Regardless of size of vector, it gets a vec4. This is bad
1178           * packing for things like floats, but otherwise arrays become a
1179           * mess.  Hopefully a later pass over the code can pack scalars
1180           * down if appropriate.
1181           */
1182          return 1;
1183       }
1184       break;
1185    case GLSL_TYPE_DOUBLE:
1186       if (type->is_matrix()) {
1187          if (type->vector_elements <= 2 || is_vs_input)
1188             return type->matrix_columns;
1189          else
1190             return type->matrix_columns * 2;
1191       } else {
1192          /* For doubles if we have a double or dvec2 they fit in one
1193           * vec4, else they need 2 vec4s.
1194           */
1195          if (type->vector_elements <= 2 || is_vs_input)
1196             return 1;
1197          else
1198             return 2;
1199       }
1200       break;
1201    case GLSL_TYPE_ARRAY:
1202       assert(type->length > 0);
1203       return attrib_type_size(type->fields.array, is_vs_input) * type->length;
1204    case GLSL_TYPE_STRUCT:
1205       size = 0;
1206       for (i = 0; i < type->length; i++) {
1207          size += attrib_type_size(type->fields.structure[i].type, is_vs_input);
1208       }
1209       return size;
1210    case GLSL_TYPE_SAMPLER:
1211    case GLSL_TYPE_IMAGE:
1212    case GLSL_TYPE_SUBROUTINE:
1213       /* Samplers take up one slot in UNIFORMS[], but they're baked in
1214        * at link time.
1215        */
1216       return 1;
1217    case GLSL_TYPE_ATOMIC_UINT:
1218    case GLSL_TYPE_INTERFACE:
1219    case GLSL_TYPE_VOID:
1220    case GLSL_TYPE_ERROR:
1221    case GLSL_TYPE_FUNCTION:
1222       assert(!"Invalid type in type_size");
1223       break;
1224    }
1225    return 0;
1226 }
1227
1228 static int
1229 type_size(const struct glsl_type *type)
1230 {
1231   return attrib_type_size(type, false);
1232 }
1233
1234 /**
1235  * If the given GLSL type is an array or matrix or a structure containing
1236  * an array/matrix member, return true.  Else return false.
1237  *
1238  * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY
1239  * or PROGRAM_ARRAY) should be used for variables of this type.  Anytime
1240  * we have an array that might be indexed with a variable, we need to use
1241  * the later storage type.
1242  */
1243 static bool
1244 type_has_array_or_matrix(const glsl_type *type)
1245 {
1246    if (type->is_array() || type->is_matrix())
1247       return true;
1248
1249    if (type->is_record()) {
1250       for (unsigned i = 0; i < type->length; i++) {
1251          if (type_has_array_or_matrix(type->fields.structure[i].type)) {
1252             return true;
1253          }
1254       }
1255    }
1256
1257    return false;
1258 }
1259
1260
1261 /**
1262  * In the initial pass of codegen, we assign temporary numbers to
1263  * intermediate results.  (not SSA -- variable assignments will reuse
1264  * storage).
1265  */
1266 st_src_reg
1267 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1268 {
1269    st_src_reg src;
1270
1271    src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1272    src.reladdr = NULL;
1273    src.negate = 0;
1274
1275    if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
1276       if (next_array >= max_num_arrays) {
1277          max_num_arrays += 32;
1278          array_sizes = (unsigned*)
1279             realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
1280       }
1281
1282       src.file = PROGRAM_ARRAY;
1283       src.index = next_array << 16 | 0x8000;
1284       array_sizes[next_array] = type_size(type);
1285       ++next_array;
1286
1287    } else {
1288       src.file = PROGRAM_TEMPORARY;
1289       src.index = next_temp;
1290       next_temp += type_size(type);
1291    }
1292
1293    if (type->is_array() || type->is_record()) {
1294       src.swizzle = SWIZZLE_NOOP;
1295    } else {
1296       src.swizzle = swizzle_for_size(type->vector_elements);
1297    }
1298
1299    return src;
1300 }
1301
1302 variable_storage *
1303 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1304 {
1305
1306    foreach_in_list(variable_storage, entry, &this->variables) {
1307       if (entry->var == var)
1308          return entry;
1309    }
1310
1311    return NULL;
1312 }
1313
1314 void
1315 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1316 {
1317    if (strcmp(ir->name, "gl_FragCoord") == 0) {
1318       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1319
1320       fp->OriginUpperLeft = ir->data.origin_upper_left;
1321       fp->PixelCenterInteger = ir->data.pixel_center_integer;
1322    }
1323
1324    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1325       unsigned int i;
1326       const ir_state_slot *const slots = ir->get_state_slots();
1327       assert(slots != NULL);
1328
1329       /* Check if this statevar's setup in the STATE file exactly
1330        * matches how we'll want to reference it as a
1331        * struct/array/whatever.  If not, then we need to move it into
1332        * temporary storage and hope that it'll get copy-propagated
1333        * out.
1334        */
1335       for (i = 0; i < ir->get_num_state_slots(); i++) {
1336          if (slots[i].swizzle != SWIZZLE_XYZW) {
1337             break;
1338          }
1339       }
1340
1341       variable_storage *storage;
1342       st_dst_reg dst;
1343       if (i == ir->get_num_state_slots()) {
1344          /* We'll set the index later. */
1345          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1346          this->variables.push_tail(storage);
1347
1348          dst = undef_dst;
1349       } else {
1350          /* The variable_storage constructor allocates slots based on the size
1351           * of the type.  However, this had better match the number of state
1352           * elements that we're going to copy into the new temporary.
1353           */
1354          assert((int) ir->get_num_state_slots() == type_size(ir->type));
1355
1356          dst = st_dst_reg(get_temp(ir->type));
1357
1358          storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
1359
1360          this->variables.push_tail(storage);
1361       }
1362
1363
1364       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
1365          int index = _mesa_add_state_reference(this->prog->Parameters,
1366                                                (gl_state_index *)slots[i].tokens);
1367
1368          if (storage->file == PROGRAM_STATE_VAR) {
1369             if (storage->index == -1) {
1370                storage->index = index;
1371             } else {
1372                assert(index == storage->index + (int)i);
1373             }
1374          } else {
1375             /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1376              * the data being moved since MOV does not care about the type of
1377              * data it is moving, and we don't want to declare registers with
1378              * array or struct types.
1379              */
1380             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1381             src.swizzle = slots[i].swizzle;
1382             emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
1383             /* even a float takes up a whole vec4 reg in a struct/array. */
1384             dst.index++;
1385          }
1386       }
1387
1388       if (storage->file == PROGRAM_TEMPORARY &&
1389           dst.index != storage->index + (int) ir->get_num_state_slots()) {
1390          fail_link(this->shader_program,
1391                   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1392                   ir->name, dst.index - storage->index,
1393                   type_size(ir->type));
1394       }
1395    }
1396 }
1397
1398 void
1399 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1400 {
1401    emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
1402
1403    visit_exec_list(&ir->body_instructions, this);
1404
1405    emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
1406 }
1407
1408 void
1409 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1410 {
1411    switch (ir->mode) {
1412    case ir_loop_jump::jump_break:
1413       emit_asm(NULL, TGSI_OPCODE_BRK);
1414       break;
1415    case ir_loop_jump::jump_continue:
1416       emit_asm(NULL, TGSI_OPCODE_CONT);
1417       break;
1418    }
1419 }
1420
1421
1422 void
1423 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1424 {
1425    assert(0);
1426    (void)ir;
1427 }
1428
1429 void
1430 glsl_to_tgsi_visitor::visit(ir_function *ir)
1431 {
1432    /* Ignore function bodies other than main() -- we shouldn't see calls to
1433     * them since they should all be inlined before we get to glsl_to_tgsi.
1434     */
1435    if (strcmp(ir->name, "main") == 0) {
1436       const ir_function_signature *sig;
1437       exec_list empty;
1438
1439       sig = ir->matching_signature(NULL, &empty, false);
1440
1441       assert(sig);
1442
1443       foreach_in_list(ir_instruction, ir, &sig->body) {
1444          ir->accept(this);
1445       }
1446    }
1447 }
1448
1449 bool
1450 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1451 {
1452    int nonmul_operand = 1 - mul_operand;
1453    st_src_reg a, b, c;
1454    st_dst_reg result_dst;
1455
1456    ir_expression *expr = ir->operands[mul_operand]->as_expression();
1457    if (!expr || expr->operation != ir_binop_mul)
1458       return false;
1459
1460    expr->operands[0]->accept(this);
1461    a = this->result;
1462    expr->operands[1]->accept(this);
1463    b = this->result;
1464    ir->operands[nonmul_operand]->accept(this);
1465    c = this->result;
1466
1467    this->result = get_temp(ir->type);
1468    result_dst = st_dst_reg(this->result);
1469    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1470    emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1471
1472    return true;
1473 }
1474
1475 /**
1476  * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1477  *
1478  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1479  * implemented using multiplication, and logical-or is implemented using
1480  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1481  * As result, the logical expression (a & !b) can be rewritten as:
1482  *
1483  *     - a * !b
1484  *     - a * (1 - b)
1485  *     - (a * 1) - (a * b)
1486  *     - a + -(a * b)
1487  *     - a + (a * -b)
1488  *
1489  * This final expression can be implemented as a single MAD(a, -b, a)
1490  * instruction.
1491  */
1492 bool
1493 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1494 {
1495    const int other_operand = 1 - try_operand;
1496    st_src_reg a, b;
1497
1498    ir_expression *expr = ir->operands[try_operand]->as_expression();
1499    if (!expr || expr->operation != ir_unop_logic_not)
1500       return false;
1501
1502    ir->operands[other_operand]->accept(this);
1503    a = this->result;
1504    expr->operands[0]->accept(this);
1505    b = this->result;
1506
1507    b.negate = ~b.negate;
1508
1509    this->result = get_temp(ir->type);
1510    emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1511
1512    return true;
1513 }
1514
1515 void
1516 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1517                                       st_src_reg *reg, int *num_reladdr)
1518 {
1519    if (!reg->reladdr && !reg->reladdr2)
1520       return;
1521
1522    if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
1523    if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
1524
1525    if (*num_reladdr != 1) {
1526       st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type);
1527
1528       emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1529       *reg = temp;
1530    }
1531
1532    (*num_reladdr)--;
1533 }
1534
1535 void
1536 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1537 {
1538    unsigned int operand;
1539    st_src_reg op[ARRAY_SIZE(ir->operands)];
1540    st_src_reg result_src;
1541    st_dst_reg result_dst;
1542
1543    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1544     */
1545    if (ir->operation == ir_binop_add) {
1546       if (try_emit_mad(ir, 1))
1547          return;
1548       if (try_emit_mad(ir, 0))
1549          return;
1550    }
1551
1552    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1553     */
1554    if (!native_integers && ir->operation == ir_binop_logic_and) {
1555       if (try_emit_mad_for_and_not(ir, 1))
1556          return;
1557       if (try_emit_mad_for_and_not(ir, 0))
1558          return;
1559    }
1560
1561    if (ir->operation == ir_quadop_vector)
1562       assert(!"ir_quadop_vector should have been lowered");
1563
1564    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1565       this->result.file = PROGRAM_UNDEFINED;
1566       ir->operands[operand]->accept(this);
1567       if (this->result.file == PROGRAM_UNDEFINED) {
1568          printf("Failed to get tree for expression operand:\n");
1569          ir->operands[operand]->print();
1570          printf("\n");
1571          exit(1);
1572       }
1573       op[operand] = this->result;
1574
1575       /* Matrix expression operands should have been broken down to vector
1576        * operations already.
1577        */
1578       assert(!ir->operands[operand]->type->is_matrix());
1579    }
1580
1581    int vector_elements = ir->operands[0]->type->vector_elements;
1582    if (ir->operands[1]) {
1583       vector_elements = MAX2(vector_elements,
1584                              ir->operands[1]->type->vector_elements);
1585    }
1586
1587    this->result.file = PROGRAM_UNDEFINED;
1588
1589    /* Storage for our result.  Ideally for an assignment we'd be using
1590     * the actual storage for the result here, instead.
1591     */
1592    result_src = get_temp(ir->type);
1593    /* convenience for the emit functions below. */
1594    result_dst = st_dst_reg(result_src);
1595    /* Limit writes to the channels that will be used by result_src later.
1596     * This does limit this temp's use as a temporary for multi-instruction
1597     * sequences.
1598     */
1599    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1600
1601    switch (ir->operation) {
1602    case ir_unop_logic_not:
1603       if (result_dst.type != GLSL_TYPE_FLOAT)
1604          emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1605       else {
1606          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1607           * older GPUs implement SEQ using multiple instructions (i915 uses two
1608           * SGE instructions and a MUL instruction).  Since our logic values are
1609           * 0.0 and 1.0, 1-x also implements !x.
1610           */
1611          op[0].negate = ~op[0].negate;
1612          emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1613       }
1614       break;
1615    case ir_unop_neg:
1616       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1617          emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1618       else if (result_dst.type == GLSL_TYPE_DOUBLE)
1619          emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
1620       else {
1621          op[0].negate = ~op[0].negate;
1622          result_src = op[0];
1623       }
1624       break;
1625    case ir_unop_subroutine_to_int:
1626       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1627       break;
1628    case ir_unop_abs:
1629       emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1630       break;
1631    case ir_unop_sign:
1632       emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1633       break;
1634    case ir_unop_rcp:
1635       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1636       break;
1637
1638    case ir_unop_exp2:
1639       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1640       break;
1641    case ir_unop_exp:
1642    case ir_unop_log:
1643       assert(!"not reached: should be handled by ir_explog_to_explog2");
1644       break;
1645    case ir_unop_log2:
1646       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1647       break;
1648    case ir_unop_sin:
1649       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1650       break;
1651    case ir_unop_cos:
1652       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1653       break;
1654    case ir_unop_saturate: {
1655       glsl_to_tgsi_instruction *inst;
1656       inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1657       inst->saturate = true;
1658       break;
1659    }
1660
1661    case ir_unop_dFdx:
1662    case ir_unop_dFdx_coarse:
1663       emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1664       break;
1665    case ir_unop_dFdx_fine:
1666       emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
1667       break;
1668    case ir_unop_dFdy:
1669    case ir_unop_dFdy_coarse:
1670    case ir_unop_dFdy_fine:
1671    {
1672       /* The X component contains 1 or -1 depending on whether the framebuffer
1673        * is a FBO or the window system buffer, respectively.
1674        * It is then multiplied with the source operand of DDY.
1675        */
1676       static const gl_state_index transform_y_state[STATE_LENGTH]
1677          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1678
1679       unsigned transform_y_index =
1680          _mesa_add_state_reference(this->prog->Parameters,
1681                                    transform_y_state);
1682
1683       st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1684                                           transform_y_index,
1685                                           glsl_type::vec4_type);
1686       transform_y.swizzle = SWIZZLE_XXXX;
1687
1688       st_src_reg temp = get_temp(glsl_type::vec4_type);
1689
1690       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1691       emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
1692            TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
1693       break;
1694    }
1695
1696    case ir_unop_frexp_sig:
1697       emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
1698       break;
1699
1700    case ir_unop_frexp_exp:
1701       emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
1702       break;
1703
1704    case ir_unop_noise: {
1705       /* At some point, a motivated person could add a better
1706        * implementation of noise.  Currently not even the nvidia
1707        * binary drivers do anything more than this.  In any case, the
1708        * place to do this is in the GL state tracker, not the poor
1709        * driver.
1710        */
1711       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1712       break;
1713    }
1714
1715    case ir_binop_add:
1716       emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1717       break;
1718    case ir_binop_sub:
1719       emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1720       break;
1721
1722    case ir_binop_mul:
1723       emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1724       break;
1725    case ir_binop_div:
1726       if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
1727          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1728       else
1729          emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1730       break;
1731    case ir_binop_mod:
1732       if (result_dst.type == GLSL_TYPE_FLOAT)
1733          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1734       else
1735          emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1736       break;
1737
1738    case ir_binop_less:
1739       emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1740       break;
1741    case ir_binop_greater:
1742       emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1743       break;
1744    case ir_binop_lequal:
1745       emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1746       break;
1747    case ir_binop_gequal:
1748       emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1749       break;
1750    case ir_binop_equal:
1751       emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1752       break;
1753    case ir_binop_nequal:
1754       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1755       break;
1756    case ir_binop_all_equal:
1757       /* "==" operator producing a scalar boolean. */
1758       if (ir->operands[0]->type->is_vector() ||
1759           ir->operands[1]->type->is_vector()) {
1760          st_src_reg temp = get_temp(native_integers ?
1761                                     glsl_type::uvec4_type :
1762                                     glsl_type::vec4_type);
1763
1764          if (native_integers) {
1765             st_dst_reg temp_dst = st_dst_reg(temp);
1766             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1767
1768             if (ir->operands[0]->type->is_boolean() &&
1769                 ir->operands[1]->as_constant() &&
1770                 ir->operands[1]->as_constant()->is_one()) {
1771                emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1772             } else {
1773                emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1774             }
1775
1776             /* Emit 1-3 AND operations to combine the SEQ results. */
1777             switch (ir->operands[0]->type->vector_elements) {
1778             case 2:
1779                break;
1780             case 3:
1781                temp_dst.writemask = WRITEMASK_Y;
1782                temp1.swizzle = SWIZZLE_YYYY;
1783                temp2.swizzle = SWIZZLE_ZZZZ;
1784                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1785                break;
1786             case 4:
1787                temp_dst.writemask = WRITEMASK_X;
1788                temp1.swizzle = SWIZZLE_XXXX;
1789                temp2.swizzle = SWIZZLE_YYYY;
1790                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1791                temp_dst.writemask = WRITEMASK_Y;
1792                temp1.swizzle = SWIZZLE_ZZZZ;
1793                temp2.swizzle = SWIZZLE_WWWW;
1794                emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1795             }
1796
1797             temp1.swizzle = SWIZZLE_XXXX;
1798             temp2.swizzle = SWIZZLE_YYYY;
1799             emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1800          } else {
1801             emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1802
1803             /* After the dot-product, the value will be an integer on the
1804              * range [0,4].  Zero becomes 1.0, and positive values become zero.
1805              */
1806             emit_dp(ir, result_dst, temp, temp, vector_elements);
1807
1808             /* Negating the result of the dot-product gives values on the range
1809              * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1810              * This is achieved using SGE.
1811              */
1812             st_src_reg sge_src = result_src;
1813             sge_src.negate = ~sge_src.negate;
1814             emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1815          }
1816       } else {
1817          emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1818       }
1819       break;
1820    case ir_binop_any_nequal:
1821       /* "!=" operator producing a scalar boolean. */
1822       if (ir->operands[0]->type->is_vector() ||
1823           ir->operands[1]->type->is_vector()) {
1824          st_src_reg temp = get_temp(native_integers ?
1825                                     glsl_type::uvec4_type :
1826                                     glsl_type::vec4_type);
1827          if (ir->operands[0]->type->is_boolean() &&
1828              ir->operands[1]->as_constant() &&
1829              ir->operands[1]->as_constant()->is_zero()) {
1830             emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1831          } else {
1832             emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1833          }
1834
1835          if (native_integers) {
1836             st_dst_reg temp_dst = st_dst_reg(temp);
1837             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1838
1839             /* Emit 1-3 OR operations to combine the SNE results. */
1840             switch (ir->operands[0]->type->vector_elements) {
1841             case 2:
1842                break;
1843             case 3:
1844                temp_dst.writemask = WRITEMASK_Y;
1845                temp1.swizzle = SWIZZLE_YYYY;
1846                temp2.swizzle = SWIZZLE_ZZZZ;
1847                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1848                break;
1849             case 4:
1850                temp_dst.writemask = WRITEMASK_X;
1851                temp1.swizzle = SWIZZLE_XXXX;
1852                temp2.swizzle = SWIZZLE_YYYY;
1853                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1854                temp_dst.writemask = WRITEMASK_Y;
1855                temp1.swizzle = SWIZZLE_ZZZZ;
1856                temp2.swizzle = SWIZZLE_WWWW;
1857                emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1858             }
1859
1860             temp1.swizzle = SWIZZLE_XXXX;
1861             temp2.swizzle = SWIZZLE_YYYY;
1862             emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1863          } else {
1864             /* After the dot-product, the value will be an integer on the
1865              * range [0,4].  Zero stays zero, and positive values become 1.0.
1866              */
1867             glsl_to_tgsi_instruction *const dp =
1868                   emit_dp(ir, result_dst, temp, temp, vector_elements);
1869             if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1870                /* The clamping to [0,1] can be done for free in the fragment
1871                 * shader with a saturate.
1872                 */
1873                dp->saturate = true;
1874             } else {
1875                /* Negating the result of the dot-product gives values on the range
1876                 * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1877                 * achieved using SLT.
1878                 */
1879                st_src_reg slt_src = result_src;
1880                slt_src.negate = ~slt_src.negate;
1881                emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1882             }
1883          }
1884       } else {
1885          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1886       }
1887       break;
1888
1889    case ir_binop_logic_xor:
1890       if (native_integers)
1891          emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1892       else
1893          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1894       break;
1895
1896    case ir_binop_logic_or: {
1897       if (native_integers) {
1898          /* If integers are used as booleans, we can use an actual "or"
1899           * instruction.
1900           */
1901          assert(native_integers);
1902          emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1903       } else {
1904          /* After the addition, the value will be an integer on the
1905           * range [0,2].  Zero stays zero, and positive values become 1.0.
1906           */
1907          glsl_to_tgsi_instruction *add =
1908             emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1909          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1910             /* The clamping to [0,1] can be done for free in the fragment
1911              * shader with a saturate if floats are being used as boolean values.
1912              */
1913             add->saturate = true;
1914          } else {
1915             /* Negating the result of the addition gives values on the range
1916              * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1917              * is achieved using SLT.
1918              */
1919             st_src_reg slt_src = result_src;
1920             slt_src.negate = ~slt_src.negate;
1921             emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1922          }
1923       }
1924       break;
1925    }
1926
1927    case ir_binop_logic_and:
1928       /* If native integers are disabled, the bool args are stored as float 0.0
1929        * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
1930        * actual AND opcode.
1931        */
1932       if (native_integers)
1933          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1934       else
1935          emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1936       break;
1937
1938    case ir_binop_dot:
1939       assert(ir->operands[0]->type->is_vector());
1940       assert(ir->operands[0]->type == ir->operands[1]->type);
1941       emit_dp(ir, result_dst, op[0], op[1],
1942               ir->operands[0]->type->vector_elements);
1943       break;
1944
1945    case ir_unop_sqrt:
1946       if (have_sqrt) {
1947          emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1948       } else {
1949          /* sqrt(x) = x * rsq(x). */
1950          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1951          emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1952          /* For incoming channels <= 0, set the result to 0. */
1953          op[0].negate = ~op[0].negate;
1954          emit_asm(ir, TGSI_OPCODE_CMP, result_dst,
1955               op[0], result_src, st_src_reg_for_float(0.0));
1956       }
1957       break;
1958    case ir_unop_rsq:
1959       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1960       break;
1961    case ir_unop_i2f:
1962       if (native_integers) {
1963          emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1964          break;
1965       }
1966       /* fallthrough to next case otherwise */
1967    case ir_unop_b2f:
1968       if (native_integers) {
1969          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1970          break;
1971       }
1972       /* fallthrough to next case otherwise */
1973    case ir_unop_i2u:
1974    case ir_unop_u2i:
1975       /* Converting between signed and unsigned integers is a no-op. */
1976       result_src = op[0];
1977       result_src.type = result_dst.type;
1978       break;
1979    case ir_unop_b2i:
1980       if (native_integers) {
1981          /* Booleans are stored as integers using ~0 for true and 0 for false.
1982           * GLSL requires that int(bool) return 1 for true and 0 for false.
1983           * This conversion is done with AND, but it could be done with NEG.
1984           */
1985          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1986       } else {
1987          /* Booleans and integers are both stored as floats when native
1988           * integers are disabled.
1989           */
1990          result_src = op[0];
1991       }
1992       break;
1993    case ir_unop_f2i:
1994       if (native_integers)
1995          emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1996       else
1997          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1998       break;
1999    case ir_unop_f2u:
2000       if (native_integers)
2001          emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
2002       else
2003          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
2004       break;
2005    case ir_unop_bitcast_f2i:
2006       result_src = op[0];
2007       result_src.type = GLSL_TYPE_INT;
2008       break;
2009    case ir_unop_bitcast_f2u:
2010       result_src = op[0];
2011       result_src.type = GLSL_TYPE_UINT;
2012       break;
2013    case ir_unop_bitcast_i2f:
2014    case ir_unop_bitcast_u2f:
2015       result_src = op[0];
2016       result_src.type = GLSL_TYPE_FLOAT;
2017       break;
2018    case ir_unop_f2b:
2019       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
2020       break;
2021    case ir_unop_d2b:
2022       emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
2023       break;
2024    case ir_unop_i2b:
2025       if (native_integers)
2026          emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
2027       else
2028          emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
2029       break;
2030    case ir_unop_trunc:
2031       emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
2032       break;
2033    case ir_unop_ceil:
2034       emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
2035       break;
2036    case ir_unop_floor:
2037       emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
2038       break;
2039    case ir_unop_round_even:
2040       emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
2041       break;
2042    case ir_unop_fract:
2043       emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
2044       break;
2045
2046    case ir_binop_min:
2047       emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
2048       break;
2049    case ir_binop_max:
2050       emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
2051       break;
2052    case ir_binop_pow:
2053       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
2054       break;
2055
2056    case ir_unop_bit_not:
2057       if (native_integers) {
2058          emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
2059          break;
2060       }
2061    case ir_unop_u2f:
2062       if (native_integers) {
2063          emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
2064          break;
2065       }
2066    case ir_binop_lshift:
2067       if (native_integers) {
2068          emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
2069          break;
2070       }
2071    case ir_binop_rshift:
2072       if (native_integers) {
2073          emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
2074          break;
2075       }
2076    case ir_binop_bit_and:
2077       if (native_integers) {
2078          emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
2079          break;
2080       }
2081    case ir_binop_bit_xor:
2082       if (native_integers) {
2083          emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
2084          break;
2085       }
2086    case ir_binop_bit_or:
2087       if (native_integers) {
2088          emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
2089          break;
2090       }
2091
2092       assert(!"GLSL 1.30 features unsupported");
2093       break;
2094
2095    case ir_binop_ubo_load: {
2096       ir_constant *const_uniform_block = ir->operands[0]->as_constant();
2097       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
2098       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
2099       unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
2100       st_src_reg index_reg = get_temp(glsl_type::uint_type);
2101       st_src_reg cbuf;
2102
2103       cbuf.type = ir->type->base_type;
2104       cbuf.file = PROGRAM_CONSTANT;
2105       cbuf.index = 0;
2106       cbuf.reladdr = NULL;
2107       cbuf.negate = 0;
2108
2109       assert(ir->type->is_vector() || ir->type->is_scalar());
2110
2111       if (const_offset_ir) {
2112          /* Constant index into constant buffer */
2113          cbuf.reladdr = NULL;
2114          cbuf.index = const_offset / 16;
2115       }
2116       else {
2117          /* Relative/variable index into constant buffer */
2118          emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
2119               st_src_reg_for_int(4));
2120          cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
2121          memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
2122       }
2123
2124       if (const_uniform_block) {
2125          /* Constant constant buffer */
2126          cbuf.reladdr2 = NULL;
2127          cbuf.index2D = const_block;
2128          cbuf.has_index2 = true;
2129       }
2130       else {
2131          /* Relative/variable constant buffer */
2132          cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
2133          cbuf.index2D = 1;
2134          memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
2135          cbuf.has_index2 = true;
2136       }
2137
2138       cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
2139       if (cbuf.type == GLSL_TYPE_DOUBLE)
2140          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
2141                                        const_offset % 16 / 8,
2142                                        const_offset % 16 / 8,
2143                                        const_offset % 16 / 8);
2144       else
2145          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
2146                                        const_offset % 16 / 4,
2147                                        const_offset % 16 / 4,
2148                                        const_offset % 16 / 4);
2149
2150       if (ir->type->base_type == GLSL_TYPE_BOOL) {
2151          emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
2152       } else {
2153          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
2154       }
2155       break;
2156    }
2157    case ir_triop_lrp:
2158       /* note: we have to reorder the three args here */
2159       emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
2160       break;
2161    case ir_triop_csel:
2162       if (this->ctx->Const.NativeIntegers)
2163          emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
2164       else {
2165          op[0].negate = ~op[0].negate;
2166          emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
2167       }
2168       break;
2169    case ir_triop_bitfield_extract:
2170       emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
2171       break;
2172    case ir_quadop_bitfield_insert:
2173       emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
2174       break;
2175    case ir_unop_bitfield_reverse:
2176       emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
2177       break;
2178    case ir_unop_bit_count:
2179       emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
2180       break;
2181    case ir_unop_find_msb:
2182       emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
2183       break;
2184    case ir_unop_find_lsb:
2185       emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
2186       break;
2187    case ir_binop_imul_high:
2188       emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
2189       break;
2190    case ir_triop_fma:
2191       /* In theory, MAD is incorrect here. */
2192       if (have_fma)
2193          emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
2194       else
2195          emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
2196       break;
2197    case ir_unop_interpolate_at_centroid:
2198       emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
2199       break;
2200    case ir_binop_interpolate_at_offset:
2201       emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
2202       break;
2203    case ir_binop_interpolate_at_sample:
2204       emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
2205       break;
2206
2207    case ir_unop_d2f:
2208       emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
2209       break;
2210    case ir_unop_f2d:
2211       emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
2212       break;
2213    case ir_unop_d2i:
2214       emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
2215       break;
2216    case ir_unop_i2d:
2217       emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
2218       break;
2219    case ir_unop_d2u:
2220       emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
2221       break;
2222    case ir_unop_u2d:
2223       emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
2224       break;
2225    case ir_unop_unpack_double_2x32:
2226    case ir_unop_pack_double_2x32:
2227       emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2228       break;
2229
2230    case ir_binop_ldexp:
2231       if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
2232          emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
2233       } else {
2234          assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
2235       }
2236       break;
2237
2238    case ir_unop_pack_half_2x16:
2239       emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]);
2240       break;
2241    case ir_unop_unpack_half_2x16:
2242       emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
2243       break;
2244
2245    case ir_unop_get_buffer_size: {
2246       ir_constant *const_offset = ir->operands[0]->as_constant();
2247       st_src_reg buffer(
2248             PROGRAM_BUFFER,
2249             ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
2250             (const_offset ? const_offset->value.u[0] : 0),
2251             GLSL_TYPE_UINT);
2252       if (!const_offset) {
2253          buffer.reladdr = ralloc(mem_ctx, st_src_reg);
2254          memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
2255          emit_arl(ir, sampler_reladdr, op[0]);
2256       }
2257       emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
2258       break;
2259    }
2260
2261    case ir_unop_pack_snorm_2x16:
2262    case ir_unop_pack_unorm_2x16:
2263    case ir_unop_pack_snorm_4x8:
2264    case ir_unop_pack_unorm_4x8:
2265
2266    case ir_unop_unpack_snorm_2x16:
2267    case ir_unop_unpack_unorm_2x16:
2268    case ir_unop_unpack_snorm_4x8:
2269    case ir_unop_unpack_unorm_4x8:
2270
2271    case ir_quadop_vector:
2272    case ir_binop_vector_extract:
2273    case ir_triop_vector_insert:
2274    case ir_binop_carry:
2275    case ir_binop_borrow:
2276    case ir_unop_ssbo_unsized_array_length:
2277       /* This operation is not supported, or should have already been handled.
2278        */
2279       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
2280       break;
2281    }
2282
2283    this->result = result_src;
2284 }
2285
2286
2287 void
2288 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
2289 {
2290    st_src_reg src;
2291    int i;
2292    int swizzle[4];
2293
2294    /* Note that this is only swizzles in expressions, not those on the left
2295     * hand side of an assignment, which do write masking.  See ir_assignment
2296     * for that.
2297     */
2298
2299    ir->val->accept(this);
2300    src = this->result;
2301    assert(src.file != PROGRAM_UNDEFINED);
2302    assert(ir->type->vector_elements > 0);
2303
2304    for (i = 0; i < 4; i++) {
2305       if (i < ir->type->vector_elements) {
2306          switch (i) {
2307          case 0:
2308             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2309             break;
2310          case 1:
2311             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2312             break;
2313          case 2:
2314             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2315             break;
2316          case 3:
2317             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2318             break;
2319          }
2320       } else {
2321          /* If the type is smaller than a vec4, replicate the last
2322           * channel out.
2323           */
2324          swizzle[i] = swizzle[ir->type->vector_elements - 1];
2325       }
2326    }
2327
2328    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2329
2330    this->result = src;
2331 }
2332
2333 /* Test if the variable is an array. Note that geometry and
2334  * tessellation shader inputs are outputs are always arrays (except
2335  * for patch inputs), so only the array element type is considered.
2336  */
2337 static bool
2338 is_inout_array(unsigned stage, ir_variable *var, bool *is_2d)
2339 {
2340    const glsl_type *type = var->type;
2341
2342    if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
2343        (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
2344       return false;
2345
2346    *is_2d = false;
2347
2348    if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
2349         (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
2350         stage == MESA_SHADER_TESS_CTRL) &&
2351        !var->data.patch) {
2352       if (!var->type->is_array())
2353          return false; /* a system value probably */
2354
2355       type = var->type->fields.array;
2356       *is_2d = true;
2357    }
2358
2359    return type->is_array() || type->is_matrix();
2360 }
2361
2362 void
2363 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2364 {
2365    variable_storage *entry = find_variable_storage(ir->var);
2366    ir_variable *var = ir->var;
2367    bool is_2d;
2368
2369    if (!entry) {
2370       switch (var->data.mode) {
2371       case ir_var_uniform:
2372          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2373                                                var->data.param_index);
2374          this->variables.push_tail(entry);
2375          break;
2376       case ir_var_shader_in:
2377          /* The linker assigns locations for varyings and attributes,
2378           * including deprecated builtins (like gl_Color), user-assign
2379           * generic attributes (glBindVertexLocation), and
2380           * user-defined varyings.
2381           */
2382          assert(var->data.location != -1);
2383
2384          if (is_inout_array(shader->Stage, var, &is_2d)) {
2385             struct array_decl *decl = &input_arrays[num_input_arrays];
2386
2387             decl->mesa_index = var->data.location;
2388             decl->array_id = num_input_arrays + 1;
2389             if (is_2d) {
2390                decl->array_size = type_size(var->type->fields.array);
2391                decl->array_type = var->type->fields.array->without_array()->base_type;
2392             } else {
2393                decl->array_size = type_size(var->type);
2394                decl->array_type = var->type->without_array()->base_type;
2395             }
2396             num_input_arrays++;
2397
2398             entry = new(mem_ctx) variable_storage(var,
2399                                                   PROGRAM_INPUT,
2400                                                   var->data.location,
2401                                                   decl->array_id);
2402          }
2403          else {
2404             entry = new(mem_ctx) variable_storage(var,
2405                                                   PROGRAM_INPUT,
2406                                                   var->data.location);
2407          }
2408          this->variables.push_tail(entry);
2409          break;
2410       case ir_var_shader_out:
2411          assert(var->data.location != -1);
2412
2413          if (is_inout_array(shader->Stage, var, &is_2d)) {
2414             struct array_decl *decl = &output_arrays[num_output_arrays];
2415
2416             decl->mesa_index = var->data.location;
2417             decl->array_id = num_output_arrays + 1;
2418             if (is_2d) {
2419                decl->array_size = type_size(var->type->fields.array);
2420                decl->array_type = var->type->fields.array->without_array()->base_type;
2421             } else {
2422                decl->array_size = type_size(var->type);
2423                decl->array_type = var->type->without_array()->base_type;
2424             }
2425             num_output_arrays++;
2426
2427             entry = new(mem_ctx) variable_storage(var,
2428                                                   PROGRAM_OUTPUT,
2429                                                   var->data.location,
2430                                                   decl->array_id);
2431          }
2432          else {
2433             entry = new(mem_ctx) variable_storage(var,
2434                                                   PROGRAM_OUTPUT,
2435                                                   var->data.location
2436                                                   + var->data.index);
2437          }
2438          this->variables.push_tail(entry);
2439          break;
2440       case ir_var_system_value:
2441          entry = new(mem_ctx) variable_storage(var,
2442                                                PROGRAM_SYSTEM_VALUE,
2443                                                var->data.location);
2444          break;
2445       case ir_var_auto:
2446       case ir_var_temporary:
2447          st_src_reg src = get_temp(var->type);
2448
2449          entry = new(mem_ctx) variable_storage(var, src.file, src.index);
2450          this->variables.push_tail(entry);
2451
2452          break;
2453       }
2454
2455       if (!entry) {
2456          printf("Failed to make storage for %s\n", var->name);
2457          exit(1);
2458       }
2459    }
2460
2461    this->result = st_src_reg(entry->file, entry->index, var->type);
2462    this->result.array_id = entry->array_id;
2463    if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double())
2464       this->result.is_double_vertex_input = true;
2465    if (!native_integers)
2466       this->result.type = GLSL_TYPE_FLOAT;
2467 }
2468
2469 static void
2470 shrink_array_declarations(struct array_decl *arrays, unsigned count,
2471                           GLbitfield64 usage_mask,
2472                           GLbitfield64 double_usage_mask,
2473                           GLbitfield patch_usage_mask)
2474 {
2475    unsigned i, j;
2476
2477    /* Fix array declarations by removing unused array elements at both ends
2478     * of the arrays. For example, mat4[3] where only mat[1] is used.
2479     */
2480    for (i = 0; i < count; i++) {
2481       struct array_decl *decl = &arrays[i];
2482
2483       /* Shrink the beginning. */
2484       for (j = 0; j < decl->array_size; j++) {
2485          if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2486             if (patch_usage_mask &
2487                 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2488                break;
2489          }
2490          else {
2491             if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2492                break;
2493             if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2494                break;
2495          }
2496
2497          decl->mesa_index++;
2498          decl->array_size--;
2499          j--;
2500       }
2501
2502       /* Shrink the end. */
2503       for (j = decl->array_size-1; j >= 0; j--) {
2504          if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2505             if (patch_usage_mask &
2506                 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2507                break;
2508          }
2509          else {
2510             if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2511                break;
2512             if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2513                break;
2514          }
2515
2516          decl->array_size--;
2517       }
2518    }
2519 }
2520
2521 void
2522 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2523 {
2524    ir_constant *index;
2525    st_src_reg src;
2526    int element_size = type_size(ir->type);
2527    bool is_2D = false;
2528
2529    index = ir->array_index->constant_expression_value();
2530
2531    ir->array->accept(this);
2532    src = this->result;
2533
2534    if (ir->array->ir_type != ir_type_dereference_array) {
2535       switch (this->prog->Target) {
2536       case GL_TESS_CONTROL_PROGRAM_NV:
2537          is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
2538                  !ir->variable_referenced()->data.patch;
2539          break;
2540       case GL_TESS_EVALUATION_PROGRAM_NV:
2541          is_2D = src.file == PROGRAM_INPUT &&
2542                  !ir->variable_referenced()->data.patch;
2543          break;
2544       case GL_GEOMETRY_PROGRAM_NV:
2545          is_2D = src.file == PROGRAM_INPUT;
2546          break;
2547       }
2548    }
2549
2550    if (is_2D)
2551       element_size = 1;
2552
2553    if (index) {
2554
2555       if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
2556           src.file == PROGRAM_INPUT)
2557          element_size = attrib_type_size(ir->type, true);
2558       if (is_2D) {
2559          src.index2D = index->value.i[0];
2560          src.has_index2 = true;
2561       } else
2562          src.index += index->value.i[0] * element_size;
2563    } else {
2564       /* Variable index array dereference.  It eats the "vec4" of the
2565        * base of the array and an index that offsets the TGSI register
2566        * index.
2567        */
2568       ir->array_index->accept(this);
2569
2570       st_src_reg index_reg;
2571
2572       if (element_size == 1) {
2573          index_reg = this->result;
2574       } else {
2575          index_reg = get_temp(native_integers ?
2576                               glsl_type::int_type : glsl_type::float_type);
2577
2578          emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2579               this->result, st_src_reg_for_type(index_reg.type, element_size));
2580       }
2581
2582       /* If there was already a relative address register involved, add the
2583        * new and the old together to get the new offset.
2584        */
2585       if (!is_2D && src.reladdr != NULL) {
2586          st_src_reg accum_reg = get_temp(native_integers ?
2587                                 glsl_type::int_type : glsl_type::float_type);
2588
2589          emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2590               index_reg, *src.reladdr);
2591
2592          index_reg = accum_reg;
2593       }
2594
2595       if (is_2D) {
2596          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
2597          memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
2598          src.index2D = 0;
2599          src.has_index2 = true;
2600       } else {
2601          src.reladdr = ralloc(mem_ctx, st_src_reg);
2602          memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2603       }
2604    }
2605
2606    /* If the type is smaller than a vec4, replicate the last channel out. */
2607    if (ir->type->is_scalar() || ir->type->is_vector())
2608       src.swizzle = swizzle_for_size(ir->type->vector_elements);
2609    else
2610       src.swizzle = SWIZZLE_NOOP;
2611
2612    /* Change the register type to the element type of the array. */
2613    src.type = ir->type->base_type;
2614
2615    this->result = src;
2616 }
2617
2618 void
2619 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2620 {
2621    unsigned int i;
2622    const glsl_type *struct_type = ir->record->type;
2623    int offset = 0;
2624
2625    ir->record->accept(this);
2626
2627    for (i = 0; i < struct_type->length; i++) {
2628       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2629          break;
2630       offset += type_size(struct_type->fields.structure[i].type);
2631    }
2632
2633    /* If the type is smaller than a vec4, replicate the last channel out. */
2634    if (ir->type->is_scalar() || ir->type->is_vector())
2635       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2636    else
2637       this->result.swizzle = SWIZZLE_NOOP;
2638
2639    this->result.index += offset;
2640    this->result.type = ir->type->base_type;
2641 }
2642
2643 /**
2644  * We want to be careful in assignment setup to hit the actual storage
2645  * instead of potentially using a temporary like we might with the
2646  * ir_dereference handler.
2647  */
2648 static st_dst_reg
2649 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2650 {
2651    /* The LHS must be a dereference.  If the LHS is a variable indexed array
2652     * access of a vector, it must be separated into a series conditional moves
2653     * before reaching this point (see ir_vec_index_to_cond_assign).
2654     */
2655    assert(ir->as_dereference());
2656    ir_dereference_array *deref_array = ir->as_dereference_array();
2657    if (deref_array) {
2658       assert(!deref_array->array->type->is_vector());
2659    }
2660
2661    /* Use the rvalue deref handler for the most part.  We'll ignore
2662     * swizzles in it and write swizzles using writemask, though.
2663     */
2664    ir->accept(v);
2665    return st_dst_reg(v->result);
2666 }
2667
2668 /**
2669  * Process the condition of a conditional assignment
2670  *
2671  * Examines the condition of a conditional assignment to generate the optimal
2672  * first operand of a \c CMP instruction.  If the condition is a relational
2673  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2674  * used as the source for the \c CMP instruction.  Otherwise the comparison
2675  * is processed to a boolean result, and the boolean result is used as the
2676  * operand to the CMP instruction.
2677  */
2678 bool
2679 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2680 {
2681    ir_rvalue *src_ir = ir;
2682    bool negate = true;
2683    bool switch_order = false;
2684
2685    ir_expression *const expr = ir->as_expression();
2686
2687    if (native_integers) {
2688       if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2689          enum glsl_base_type type = expr->operands[0]->type->base_type;
2690          if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
2691              type == GLSL_TYPE_BOOL) {
2692             if (expr->operation == ir_binop_equal) {
2693                if (expr->operands[0]->is_zero()) {
2694                   src_ir = expr->operands[1];
2695                   switch_order = true;
2696                }
2697                else if (expr->operands[1]->is_zero()) {
2698                   src_ir = expr->operands[0];
2699                   switch_order = true;
2700                }
2701             }
2702             else if (expr->operation == ir_binop_nequal) {
2703                if (expr->operands[0]->is_zero()) {
2704                   src_ir = expr->operands[1];
2705                }
2706                else if (expr->operands[1]->is_zero()) {
2707                   src_ir = expr->operands[0];
2708                }
2709             }
2710          }
2711       }
2712
2713       src_ir->accept(this);
2714       return switch_order;
2715    }
2716
2717    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2718       bool zero_on_left = false;
2719
2720       if (expr->operands[0]->is_zero()) {
2721          src_ir = expr->operands[1];
2722          zero_on_left = true;
2723       } else if (expr->operands[1]->is_zero()) {
2724          src_ir = expr->operands[0];
2725          zero_on_left = false;
2726       }
2727
2728       /*      a is -  0  +            -  0  +
2729        * (a <  0)  T  F  F  ( a < 0)  T  F  F
2730        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2731        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2732        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2733        * (a >  0)  F  F  T  (-a < 0)  F  F  T
2734        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2735        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2736        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2737        *
2738        * Note that exchanging the order of 0 and 'a' in the comparison simply
2739        * means that the value of 'a' should be negated.
2740        */
2741       if (src_ir != ir) {
2742          switch (expr->operation) {
2743          case ir_binop_less:
2744             switch_order = false;
2745             negate = zero_on_left;
2746             break;
2747
2748          case ir_binop_greater:
2749             switch_order = false;
2750             negate = !zero_on_left;
2751             break;
2752
2753          case ir_binop_lequal:
2754             switch_order = true;
2755             negate = !zero_on_left;
2756             break;
2757
2758          case ir_binop_gequal:
2759             switch_order = true;
2760             negate = zero_on_left;
2761             break;
2762
2763          default:
2764             /* This isn't the right kind of comparison afterall, so make sure
2765              * the whole condition is visited.
2766              */
2767             src_ir = ir;
2768             break;
2769          }
2770       }
2771    }
2772
2773    src_ir->accept(this);
2774
2775    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2776     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2777     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2778     * computing the condition.
2779     */
2780    if (negate)
2781       this->result.negate = ~this->result.negate;
2782
2783    return switch_order;
2784 }
2785
2786 void
2787 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2788                                      st_dst_reg *l, st_src_reg *r,
2789                                      st_src_reg *cond, bool cond_swap)
2790 {
2791    if (type->base_type == GLSL_TYPE_STRUCT) {
2792       for (unsigned int i = 0; i < type->length; i++) {
2793          emit_block_mov(ir, type->fields.structure[i].type, l, r,
2794                         cond, cond_swap);
2795       }
2796       return;
2797    }
2798
2799    if (type->is_array()) {
2800       for (unsigned int i = 0; i < type->length; i++) {
2801          emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
2802       }
2803       return;
2804    }
2805
2806    if (type->is_matrix()) {
2807       const struct glsl_type *vec_type;
2808
2809       vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
2810                                          type->vector_elements, 1);
2811
2812       for (int i = 0; i < type->matrix_columns; i++) {
2813          emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
2814       }
2815       return;
2816    }
2817
2818    assert(type->is_scalar() || type->is_vector());
2819
2820    r->type = type->base_type;
2821    if (cond) {
2822       st_src_reg l_src = st_src_reg(*l);
2823       l_src.swizzle = swizzle_for_size(type->vector_elements);
2824
2825       if (native_integers) {
2826          emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
2827               cond_swap ? l_src : *r,
2828               cond_swap ? *r : l_src);
2829       } else {
2830          emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
2831               cond_swap ? l_src : *r,
2832               cond_swap ? *r : l_src);
2833       }
2834    } else {
2835       emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
2836    }
2837    l->index++;
2838    r->index++;
2839    if (type->is_dual_slot_double()) {
2840       l->index++;
2841       if (r->is_double_vertex_input == false)
2842          r->index++;
2843    }
2844 }
2845
2846 void
2847 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2848 {
2849    st_dst_reg l;
2850    st_src_reg r;
2851
2852    ir->rhs->accept(this);
2853    r = this->result;
2854
2855    l = get_assignment_lhs(ir->lhs, this);
2856
2857    /* FINISHME: This should really set to the correct maximal writemask for each
2858     * FINISHME: component written (in the loops below).  This case can only
2859     * FINISHME: occur for matrices, arrays, and structures.
2860     */
2861    if (ir->write_mask == 0) {
2862       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2863
2864       if (ir->lhs->type->is_array() || ir->lhs->type->without_array()->is_matrix()) {
2865          if (ir->lhs->type->without_array()->is_double()) {
2866             switch (ir->lhs->type->without_array()->vector_elements) {
2867             case 1:
2868                l.writemask = WRITEMASK_X;
2869                break;
2870             case 2:
2871                l.writemask = WRITEMASK_XY;
2872                break;
2873             case 3:
2874                l.writemask = WRITEMASK_XYZ;
2875                break;
2876             case 4:
2877                l.writemask = WRITEMASK_XYZW;
2878                break;
2879             }
2880          } else
2881             l.writemask = WRITEMASK_XYZW;
2882       }
2883    } else if (ir->lhs->type->is_scalar() &&
2884               !ir->lhs->type->is_double() &&
2885               ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
2886       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2887        * FINISHME: W component of fragment shader output zero, work correctly.
2888        */
2889       l.writemask = WRITEMASK_XYZW;
2890    } else {
2891       int swizzles[4];
2892       int first_enabled_chan = 0;
2893       int rhs_chan = 0;
2894
2895       l.writemask = ir->write_mask;
2896
2897       for (int i = 0; i < 4; i++) {
2898          if (l.writemask & (1 << i)) {
2899             first_enabled_chan = GET_SWZ(r.swizzle, i);
2900             break;
2901          }
2902       }
2903
2904       /* Swizzle a small RHS vector into the channels being written.
2905        *
2906        * glsl ir treats write_mask as dictating how many channels are
2907        * present on the RHS while TGSI treats write_mask as just
2908        * showing which channels of the vec4 RHS get written.
2909        */
2910       for (int i = 0; i < 4; i++) {
2911          if (l.writemask & (1 << i))
2912             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2913          else
2914             swizzles[i] = first_enabled_chan;
2915       }
2916       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2917                                 swizzles[2], swizzles[3]);
2918    }
2919
2920    assert(l.file != PROGRAM_UNDEFINED);
2921    assert(r.file != PROGRAM_UNDEFINED);
2922
2923    if (ir->condition) {
2924       const bool switch_order = this->process_move_condition(ir->condition);
2925       st_src_reg condition = this->result;
2926
2927       emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
2928    } else if (ir->rhs->as_expression() &&
2929               this->instructions.get_tail() &&
2930               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2931               type_size(ir->lhs->type) == 1 &&
2932               l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
2933       /* To avoid emitting an extra MOV when assigning an expression to a
2934        * variable, emit the last instruction of the expression again, but
2935        * replace the destination register with the target of the assignment.
2936        * Dead code elimination will remove the original instruction.
2937        */
2938       glsl_to_tgsi_instruction *inst, *new_inst;
2939       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2940       new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
2941       new_inst->saturate = inst->saturate;
2942       inst->dead_mask = inst->dst[0].writemask;
2943    } else {
2944       emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
2945    }
2946 }
2947
2948
2949 void
2950 glsl_to_tgsi_visitor::visit(ir_constant *ir)
2951 {
2952    st_src_reg src;
2953    GLdouble stack_vals[4] = { 0 };
2954    gl_constant_value *values = (gl_constant_value *) stack_vals;
2955    GLenum gl_type = GL_NONE;
2956    unsigned int i;
2957    static int in_array = 0;
2958    gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2959
2960    /* Unfortunately, 4 floats is all we can get into
2961     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
2962     * aggregate constant and move each constant value into it.  If we
2963     * get lucky, copy propagation will eliminate the extra moves.
2964     */
2965    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2966       st_src_reg temp_base = get_temp(ir->type);
2967       st_dst_reg temp = st_dst_reg(temp_base);
2968
2969       foreach_in_list(ir_constant, field_value, &ir->components) {
2970          int size = type_size(field_value->type);
2971
2972          assert(size > 0);
2973
2974          field_value->accept(this);
2975          src = this->result;
2976
2977          for (i = 0; i < (unsigned int)size; i++) {
2978             emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
2979
2980             src.index++;
2981             temp.index++;
2982          }
2983       }
2984       this->result = temp_base;
2985       return;
2986    }
2987
2988    if (ir->type->is_array()) {
2989       st_src_reg temp_base = get_temp(ir->type);
2990       st_dst_reg temp = st_dst_reg(temp_base);
2991       int size = type_size(ir->type->fields.array);
2992
2993       assert(size > 0);
2994       in_array++;
2995
2996       for (i = 0; i < ir->type->length; i++) {
2997          ir->array_elements[i]->accept(this);
2998          src = this->result;
2999          for (int j = 0; j < size; j++) {
3000             emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3001
3002             src.index++;
3003             temp.index++;
3004          }
3005       }
3006       this->result = temp_base;
3007       in_array--;
3008       return;
3009    }
3010
3011    if (ir->type->is_matrix()) {
3012       st_src_reg mat = get_temp(ir->type);
3013       st_dst_reg mat_column = st_dst_reg(mat);
3014
3015       for (i = 0; i < ir->type->matrix_columns; i++) {
3016          switch (ir->type->base_type) {
3017          case GLSL_TYPE_FLOAT:
3018             values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
3019
3020             src = st_src_reg(file, -1, ir->type->base_type);
3021             src.index = add_constant(file,
3022                                      values,
3023                                      ir->type->vector_elements,
3024                                      GL_FLOAT,
3025                                      &src.swizzle);
3026             emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3027             break;
3028          case GLSL_TYPE_DOUBLE:
3029             values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
3030             src = st_src_reg(file, -1, ir->type->base_type);
3031             src.index = add_constant(file,
3032                                      values,
3033                                      ir->type->vector_elements,
3034                                      GL_DOUBLE,
3035                                      &src.swizzle);
3036             if (ir->type->vector_elements >= 2) {
3037                mat_column.writemask = WRITEMASK_XY;
3038                src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3039                emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3040             } else {
3041                mat_column.writemask = WRITEMASK_X;
3042                src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
3043                emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3044             }
3045             src.index++;
3046             if (ir->type->vector_elements > 2) {
3047                if (ir->type->vector_elements == 4) {
3048                   mat_column.writemask = WRITEMASK_ZW;
3049                   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3050                   emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3051                } else {
3052                   mat_column.writemask = WRITEMASK_Z;
3053                   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
3054                   emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3055                   mat_column.writemask = WRITEMASK_XYZW;
3056                   src.swizzle = SWIZZLE_XYZW;
3057                }
3058                mat_column.index++;
3059             }
3060             break;
3061          default:
3062             unreachable("Illegal matrix constant type.\n");
3063             break;
3064          }
3065          mat_column.index++;
3066       }
3067       this->result = mat;
3068       return;
3069    }
3070
3071    switch (ir->type->base_type) {
3072    case GLSL_TYPE_FLOAT:
3073       gl_type = GL_FLOAT;
3074       for (i = 0; i < ir->type->vector_elements; i++) {
3075          values[i].f = ir->value.f[i];
3076       }
3077       break;
3078    case GLSL_TYPE_DOUBLE:
3079       gl_type = GL_DOUBLE;
3080       for (i = 0; i < ir->type->vector_elements; i++) {
3081          values[i * 2].i = *(uint32_t *)&ir->value.d[i];
3082          values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
3083       }
3084       break;
3085    case GLSL_TYPE_UINT:
3086       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
3087       for (i = 0; i < ir->type->vector_elements; i++) {
3088          if (native_integers)
3089             values[i].u = ir->value.u[i];
3090          else
3091             values[i].f = ir->value.u[i];
3092       }
3093       break;
3094    case GLSL_TYPE_INT:
3095       gl_type = native_integers ? GL_INT : GL_FLOAT;
3096       for (i = 0; i < ir->type->vector_elements; i++) {
3097          if (native_integers)
3098             values[i].i = ir->value.i[i];
3099          else
3100             values[i].f = ir->value.i[i];
3101       }
3102       break;
3103    case GLSL_TYPE_BOOL:
3104       gl_type = native_integers ? GL_BOOL : GL_FLOAT;
3105       for (i = 0; i < ir->type->vector_elements; i++) {
3106          values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
3107       }
3108       break;
3109    default:
3110       assert(!"Non-float/uint/int/bool constant");
3111    }
3112
3113    this->result = st_src_reg(file, -1, ir->type);
3114    this->result.index = add_constant(file,
3115                                      values,
3116                                      ir->type->vector_elements,
3117                                      gl_type,
3118                                      &this->result.swizzle);
3119 }
3120
3121 function_entry *
3122 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
3123 {
3124    foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
3125       if (entry->sig == sig)
3126          return entry;
3127    }
3128
3129    entry = ralloc(mem_ctx, function_entry);
3130    entry->sig = sig;
3131    entry->sig_id = this->next_signature_id++;
3132    entry->bgn_inst = NULL;
3133
3134    /* Allocate storage for all the parameters. */
3135    foreach_in_list(ir_variable, param, &sig->parameters) {
3136       variable_storage *storage;
3137
3138       storage = find_variable_storage(param);
3139       assert(!storage);
3140
3141       st_src_reg src = get_temp(param->type);
3142
3143       storage = new(mem_ctx) variable_storage(param, src.file, src.index);
3144       this->variables.push_tail(storage);
3145    }
3146
3147    if (!sig->return_type->is_void()) {
3148       entry->return_reg = get_temp(sig->return_type);
3149    } else {
3150       entry->return_reg = undef_src;
3151    }
3152
3153    this->function_signatures.push_tail(entry);
3154    return entry;
3155 }
3156
3157 void
3158 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
3159 {
3160    const char *callee = ir->callee->function_name();
3161    exec_node *param = ir->actual_parameters.get_head();
3162    ir_dereference *deref = static_cast<ir_dereference *>(param);
3163    ir_variable *location = deref->variable_referenced();
3164
3165    st_src_reg buffer(
3166          PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
3167
3168    /* Calculate the surface offset */
3169    st_src_reg offset;
3170    unsigned array_size = 0, base = 0, index = 0;
3171
3172    get_deref_offsets(deref, &array_size, &base, &index, &offset);
3173
3174    if (offset.file != PROGRAM_UNDEFINED) {
3175       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
3176                offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
3177       emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
3178                offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
3179    } else {
3180       offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
3181    }
3182
3183    ir->return_deref->accept(this);
3184    st_dst_reg dst(this->result);
3185    dst.writemask = WRITEMASK_X;
3186
3187    glsl_to_tgsi_instruction *inst;
3188
3189    if (!strcmp("__intrinsic_atomic_read", callee)) {
3190       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
3191    } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
3192       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3193                       st_src_reg_for_int(1));
3194    } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
3195       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3196                       st_src_reg_for_int(-1));
3197       emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
3198    } else {
3199       param = param->get_next();
3200       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3201       val->accept(this);
3202
3203       st_src_reg data = this->result, data2 = undef_src;
3204       unsigned opcode;
3205       if (!strcmp("__intrinsic_atomic_add", callee))
3206          opcode = TGSI_OPCODE_ATOMUADD;
3207       else if (!strcmp("__intrinsic_atomic_min", callee))
3208          opcode = TGSI_OPCODE_ATOMIMIN;
3209       else if (!strcmp("__intrinsic_atomic_max", callee))
3210          opcode = TGSI_OPCODE_ATOMIMAX;
3211       else if (!strcmp("__intrinsic_atomic_and", callee))
3212          opcode = TGSI_OPCODE_ATOMAND;
3213       else if (!strcmp("__intrinsic_atomic_or", callee))
3214          opcode = TGSI_OPCODE_ATOMOR;
3215       else if (!strcmp("__intrinsic_atomic_xor", callee))
3216          opcode = TGSI_OPCODE_ATOMXOR;
3217       else if (!strcmp("__intrinsic_atomic_exchange", callee))
3218          opcode = TGSI_OPCODE_ATOMXCHG;
3219       else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) {
3220          opcode = TGSI_OPCODE_ATOMCAS;
3221          param = param->get_next();
3222          val = ((ir_instruction *)param)->as_rvalue();
3223          val->accept(this);
3224          data2 = this->result;
3225       } else if (!strcmp("__intrinsic_atomic_sub", callee)) {
3226          opcode = TGSI_OPCODE_ATOMUADD;
3227          st_src_reg res = get_temp(glsl_type::uvec4_type);
3228          st_dst_reg dstres = st_dst_reg(res);
3229          dstres.writemask = dst.writemask;
3230          emit_asm(ir, TGSI_OPCODE_INEG, dstres, data);
3231          data = res;
3232       } else {
3233          assert(!"Unexpected intrinsic");
3234          return;
3235       }
3236
3237       inst = emit_asm(ir, opcode, dst, offset, data, data2);
3238    }
3239
3240    inst->buffer = buffer;
3241 }
3242
3243 void
3244 glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
3245 {
3246    const char *callee = ir->callee->function_name();
3247    exec_node *param = ir->actual_parameters.get_head();
3248
3249    ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
3250
3251    param = param->get_next();
3252    ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3253
3254    ir_constant *const_block = block->as_constant();
3255
3256    st_src_reg buffer(
3257          PROGRAM_BUFFER,
3258          ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
3259          (const_block ? const_block->value.u[0] : 0),
3260          GLSL_TYPE_UINT);
3261
3262    if (!const_block) {
3263       block->accept(this);
3264       emit_arl(ir, sampler_reladdr, this->result);
3265       buffer.reladdr = ralloc(mem_ctx, st_src_reg);
3266       memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
3267    }
3268
3269    /* Calculate the surface offset */
3270    offset->accept(this);
3271    st_src_reg off = this->result;
3272
3273    st_dst_reg dst = undef_dst;
3274    if (ir->return_deref) {
3275       ir->return_deref->accept(this);
3276       dst = st_dst_reg(this->result);
3277       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3278    }
3279
3280    glsl_to_tgsi_instruction *inst;
3281
3282    if (!strcmp("__intrinsic_load_ssbo", callee)) {
3283       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3284       if (dst.type == GLSL_TYPE_BOOL)
3285          emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
3286    } else if (!strcmp("__intrinsic_store_ssbo", callee)) {
3287       param = param->get_next();
3288       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3289       val->accept(this);
3290
3291       param = param->get_next();
3292       ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3293       assert(write_mask);
3294       dst.writemask = write_mask->value.u[0];
3295
3296       dst.type = this->result.type;
3297       inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3298    } else {
3299       param = param->get_next();
3300       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3301       val->accept(this);
3302
3303       st_src_reg data = this->result, data2 = undef_src;
3304       unsigned opcode;
3305       if (!strcmp("__intrinsic_atomic_add_ssbo", callee))
3306          opcode = TGSI_OPCODE_ATOMUADD;
3307       else if (!strcmp("__intrinsic_atomic_min_ssbo", callee))
3308          opcode = TGSI_OPCODE_ATOMIMIN;
3309       else if (!strcmp("__intrinsic_atomic_max_ssbo", callee))
3310          opcode = TGSI_OPCODE_ATOMIMAX;
3311       else if (!strcmp("__intrinsic_atomic_and_ssbo", callee))
3312          opcode = TGSI_OPCODE_ATOMAND;
3313       else if (!strcmp("__intrinsic_atomic_or_ssbo", callee))
3314          opcode = TGSI_OPCODE_ATOMOR;
3315       else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee))
3316          opcode = TGSI_OPCODE_ATOMXOR;
3317       else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee))
3318          opcode = TGSI_OPCODE_ATOMXCHG;
3319       else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
3320          opcode = TGSI_OPCODE_ATOMCAS;
3321          param = param->get_next();
3322          val = ((ir_instruction *)param)->as_rvalue();
3323          val->accept(this);
3324          data2 = this->result;
3325       } else {
3326          assert(!"Unexpected intrinsic");
3327          return;
3328       }
3329
3330       inst = emit_asm(ir, opcode, dst, off, data, data2);
3331    }
3332
3333    param = param->get_next();
3334    ir_constant *access = NULL;
3335    if (!param->is_tail_sentinel()) {
3336       access = ((ir_instruction *)param)->as_constant();
3337       assert(access);
3338    }
3339
3340    /* The emit_asm() might have actually split the op into pieces, e.g. for
3341     * double stores. We have to go back and fix up all the generated ops.
3342     */
3343    unsigned op = inst->op;
3344    do {
3345       inst->buffer = buffer;
3346       if (access)
3347          inst->buffer_access = access->value.u[0];
3348       inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3349       if (inst->op == TGSI_OPCODE_UADD)
3350          inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3351    } while (inst && inst->buffer.file == PROGRAM_UNDEFINED && inst->op == op);
3352 }
3353
3354 void
3355 glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
3356 {
3357    const char *callee = ir->callee->function_name();
3358
3359    if (!strcmp("__intrinsic_memory_barrier", callee))
3360       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3361                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3362                                   TGSI_MEMBAR_ATOMIC_BUFFER |
3363                                   TGSI_MEMBAR_SHADER_IMAGE |
3364                                   TGSI_MEMBAR_SHARED));
3365    else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
3366       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3367                st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
3368    else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
3369       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3370                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
3371    else if (!strcmp("__intrinsic_memory_barrier_image", callee))
3372       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3373                st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
3374    else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
3375       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3376                st_src_reg_for_int(TGSI_MEMBAR_SHARED));
3377    else if (!strcmp("__intrinsic_group_memory_barrier", callee))
3378       emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3379                st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3380                                   TGSI_MEMBAR_ATOMIC_BUFFER |
3381                                   TGSI_MEMBAR_SHADER_IMAGE |
3382                                   TGSI_MEMBAR_SHARED |
3383                                   TGSI_MEMBAR_THREAD_GROUP));
3384    else
3385       assert(!"Unexpected memory barrier intrinsic");
3386 }
3387
3388 void
3389 glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
3390 {
3391    const char *callee = ir->callee->function_name();
3392    exec_node *param = ir->actual_parameters.get_head();
3393
3394    ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3395
3396    st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
3397
3398    /* Calculate the surface offset */
3399    offset->accept(this);
3400    st_src_reg off = this->result;
3401
3402    st_dst_reg dst = undef_dst;
3403    if (ir->return_deref) {
3404       ir->return_deref->accept(this);
3405       dst = st_dst_reg(this->result);
3406       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3407    }
3408
3409    glsl_to_tgsi_instruction *inst;
3410
3411    if (!strcmp("__intrinsic_load_shared", callee)) {
3412       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3413       inst->buffer = buffer;
3414    } else if (!strcmp("__intrinsic_store_shared", callee)) {
3415       param = param->get_next();
3416       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3417       val->accept(this);
3418
3419       param = param->get_next();
3420       ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3421       assert(write_mask);
3422       dst.writemask = write_mask->value.u[0];
3423
3424       dst.type = this->result.type;
3425       inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3426       inst->buffer = buffer;
3427    } else {
3428       param = param->get_next();
3429       ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3430       val->accept(this);
3431
3432       st_src_reg data = this->result, data2 = undef_src;
3433       unsigned opcode;
3434       if (!strcmp("__intrinsic_atomic_add_shared", callee))
3435          opcode = TGSI_OPCODE_ATOMUADD;
3436       else if (!strcmp("__intrinsic_atomic_min_shared", callee))
3437          opcode = TGSI_OPCODE_ATOMIMIN;
3438       else if (!strcmp("__intrinsic_atomic_max_shared", callee))
3439          opcode = TGSI_OPCODE_ATOMIMAX;
3440       else if (!strcmp("__intrinsic_atomic_and_shared", callee))
3441          opcode = TGSI_OPCODE_ATOMAND;
3442       else if (!strcmp("__intrinsic_atomic_or_shared", callee))
3443          opcode = TGSI_OPCODE_ATOMOR;
3444       else if (!strcmp("__intrinsic_atomic_xor_shared", callee))
3445          opcode = TGSI_OPCODE_ATOMXOR;
3446       else if (!strcmp("__intrinsic_atomic_exchange_shared", callee))
3447          opcode = TGSI_OPCODE_ATOMXCHG;
3448       else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
3449          opcode = TGSI_OPCODE_ATOMCAS;
3450          param = param->get_next();
3451          val = ((ir_instruction *)param)->as_rvalue();
3452          val->accept(this);
3453          data2 = this->result;
3454       } else {
3455          assert(!"Unexpected intrinsic");
3456          return;
3457       }
3458
3459       inst = emit_asm(ir, opcode, dst, off, data, data2);
3460       inst->buffer = buffer;
3461    }
3462 }
3463
3464 void
3465 glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
3466 {
3467    const char *callee = ir->callee->function_name();
3468    exec_node *param = ir->actual_parameters.get_head();
3469
3470    ir_dereference *img = (ir_dereference *)param;
3471    const ir_variable *imgvar = img->variable_referenced();
3472    const glsl_type *type = imgvar->type->without_array();
3473    unsigned sampler_array_size = 1, sampler_base = 0;
3474
3475    st_src_reg reladdr;
3476    st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
3477
3478    get_deref_offsets(img, &sampler_array_size, &sampler_base,
3479                      (unsigned int *)&image.index, &reladdr);
3480    if (reladdr.file != PROGRAM_UNDEFINED) {
3481       emit_arl(ir, sampler_reladdr, reladdr);
3482       image.reladdr = ralloc(mem_ctx, st_src_reg);
3483       memcpy(image.reladdr, &sampler_reladdr, sizeof(reladdr));
3484    }
3485
3486    st_dst_reg dst = undef_dst;
3487    if (ir->return_deref) {
3488       ir->return_deref->accept(this);
3489       dst = st_dst_reg(this->result);
3490       dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3491    }
3492
3493    glsl_to_tgsi_instruction *inst;
3494
3495    if (!strcmp("__intrinsic_image_size", callee)) {
3496       dst.writemask = WRITEMASK_XYZ;
3497       inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
3498    } else if (!strcmp("__intrinsic_image_samples", callee)) {
3499       st_src_reg res = get_temp(glsl_type::ivec4_type);
3500       st_dst_reg dstres = st_dst_reg(res);
3501       dstres.writemask = WRITEMASK_W;
3502       emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
3503       res.swizzle = SWIZZLE_WWWW;
3504       inst = emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
3505    } else {
3506       st_src_reg arg1 = undef_src, arg2 = undef_src;
3507       st_src_reg coord;
3508       st_dst_reg coord_dst;
3509       coord = get_temp(glsl_type::ivec4_type);
3510       coord_dst = st_dst_reg(coord);
3511       coord_dst.writemask = (1 << type->coordinate_components()) - 1;
3512       param = param->get_next();
3513       ((ir_dereference *)param)->accept(this);
3514       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3515       coord.swizzle = SWIZZLE_XXXX;
3516       switch (type->coordinate_components()) {
3517       case 4: assert(!"unexpected coord count");
3518       /* fallthrough */
3519       case 3: coord.swizzle |= SWIZZLE_Z << 6;
3520       /* fallthrough */
3521       case 2: coord.swizzle |= SWIZZLE_Y << 3;
3522       }
3523
3524       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
3525          param = param->get_next();
3526          ((ir_dereference *)param)->accept(this);
3527          st_src_reg sample = this->result;
3528          sample.swizzle = SWIZZLE_XXXX;
3529          coord_dst.writemask = WRITEMASK_W;
3530          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
3531          coord.swizzle |= SWIZZLE_W << 9;
3532       }
3533
3534       param = param->get_next();
3535       if (!param->is_tail_sentinel()) {
3536          ((ir_dereference *)param)->accept(this);
3537          arg1 = this->result;
3538          param = param->get_next();
3539       }
3540
3541       if (!param->is_tail_sentinel()) {
3542          ((ir_dereference *)param)->accept(this);
3543          arg2 = this->result;
3544          param = param->get_next();
3545       }
3546
3547       assert(param->is_tail_sentinel());
3548
3549       unsigned opcode;
3550       if (!strcmp("__intrinsic_image_load", callee))
3551          opcode = TGSI_OPCODE_LOAD;
3552       else if (!strcmp("__intrinsic_image_store", callee))
3553          opcode = TGSI_OPCODE_STORE;
3554       else if (!strcmp("__intrinsic_image_atomic_add", callee))
3555          opcode = TGSI_OPCODE_ATOMUADD;
3556       else if (!strcmp("__intrinsic_image_atomic_min", callee))
3557          opcode = TGSI_OPCODE_ATOMIMIN;
3558       else if (!strcmp("__intrinsic_image_atomic_max", callee))
3559          opcode = TGSI_OPCODE_ATOMIMAX;
3560       else if (!strcmp("__intrinsic_image_atomic_and", callee))
3561          opcode = TGSI_OPCODE_ATOMAND;
3562       else if (!strcmp("__intrinsic_image_atomic_or", callee))
3563          opcode = TGSI_OPCODE_ATOMOR;
3564       else if (!strcmp("__intrinsic_image_atomic_xor", callee))
3565          opcode = TGSI_OPCODE_ATOMXOR;
3566       else if (!strcmp("__intrinsic_image_atomic_exchange", callee))
3567          opcode = TGSI_OPCODE_ATOMXCHG;
3568       else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee))
3569          opcode = TGSI_OPCODE_ATOMCAS;
3570       else {
3571          assert(!"Unexpected intrinsic");
3572          return;
3573       }
3574
3575       inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
3576       if (opcode == TGSI_OPCODE_STORE)
3577          inst->dst[0].writemask = WRITEMASK_XYZW;
3578    }
3579
3580    inst->buffer = image;
3581    inst->sampler_array_size = sampler_array_size;
3582    inst->sampler_base = sampler_base;
3583
3584    switch (type->sampler_dimensionality) {
3585    case GLSL_SAMPLER_DIM_1D:
3586       inst->tex_target = (type->sampler_array)
3587          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
3588       break;
3589    case GLSL_SAMPLER_DIM_2D:
3590       inst->tex_target = (type->sampler_array)
3591          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
3592       break;
3593    case GLSL_SAMPLER_DIM_3D:
3594       inst->tex_target = TEXTURE_3D_INDEX;
3595       break;
3596    case GLSL_SAMPLER_DIM_CUBE:
3597       inst->tex_target = (type->sampler_array)
3598          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
3599       break;
3600    case GLSL_SAMPLER_DIM_RECT:
3601       inst->tex_target = TEXTURE_RECT_INDEX;
3602       break;
3603    case GLSL_SAMPLER_DIM_BUF:
3604       inst->tex_target = TEXTURE_BUFFER_INDEX;
3605       break;
3606    case GLSL_SAMPLER_DIM_EXTERNAL:
3607       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
3608       break;
3609    case GLSL_SAMPLER_DIM_MS:
3610       inst->tex_target = (type->sampler_array)
3611          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
3612       break;
3613    default:
3614       assert(!"Should not get here.");
3615    }
3616
3617    inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
3618          _mesa_get_shader_image_format(imgvar->data.image_format));
3619
3620    if (imgvar->data.image_coherent)
3621       inst->buffer_access |= TGSI_MEMORY_COHERENT;
3622    if (imgvar->data.image_restrict)
3623       inst->buffer_access |= TGSI_MEMORY_RESTRICT;
3624    if (imgvar->data.image_volatile)
3625       inst->buffer_access |= TGSI_MEMORY_VOLATILE;
3626 }
3627
3628 void
3629 glsl_to_tgsi_visitor::visit(ir_call *ir)
3630 {
3631    glsl_to_tgsi_instruction *call_inst;
3632    ir_function_signature *sig = ir->callee;
3633    const char *callee = sig->function_name();
3634    function_entry *entry;
3635    int i;
3636
3637    /* Filter out intrinsics */
3638    if (!strcmp("__intrinsic_atomic_read", callee) ||
3639        !strcmp("__intrinsic_atomic_increment", callee) ||
3640        !strcmp("__intrinsic_atomic_predecrement", callee) ||
3641        !strcmp("__intrinsic_atomic_add", callee) ||
3642        !strcmp("__intrinsic_atomic_sub", callee) ||
3643        !strcmp("__intrinsic_atomic_min", callee) ||
3644        !strcmp("__intrinsic_atomic_max", callee) ||
3645        !strcmp("__intrinsic_atomic_and", callee) ||
3646        !strcmp("__intrinsic_atomic_or", callee) ||
3647        !strcmp("__intrinsic_atomic_xor", callee) ||
3648        !strcmp("__intrinsic_atomic_exchange", callee) ||
3649        !strcmp("__intrinsic_atomic_comp_swap", callee)) {
3650       visit_atomic_counter_intrinsic(ir);
3651       return;
3652    }
3653
3654    if (!strcmp("__intrinsic_load_ssbo", callee) ||
3655        !strcmp("__intrinsic_store_ssbo", callee) ||
3656        !strcmp("__intrinsic_atomic_add_ssbo", callee) ||
3657        !strcmp("__intrinsic_atomic_min_ssbo", callee) ||
3658        !strcmp("__intrinsic_atomic_max_ssbo", callee) ||
3659        !strcmp("__intrinsic_atomic_and_ssbo", callee) ||
3660        !strcmp("__intrinsic_atomic_or_ssbo", callee) ||
3661        !strcmp("__intrinsic_atomic_xor_ssbo", callee) ||
3662        !strcmp("__intrinsic_atomic_exchange_ssbo", callee) ||
3663        !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
3664       visit_ssbo_intrinsic(ir);
3665       return;
3666    }
3667
3668    if (!strcmp("__intrinsic_memory_barrier", callee) ||
3669        !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
3670        !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
3671        !strcmp("__intrinsic_memory_barrier_image", callee) ||
3672        !strcmp("__intrinsic_memory_barrier_shared", callee) ||
3673        !strcmp("__intrinsic_group_memory_barrier", callee)) {
3674       visit_membar_intrinsic(ir);
3675       return;
3676    }
3677
3678    if (!strcmp("__intrinsic_load_shared", callee) ||
3679        !strcmp("__intrinsic_store_shared", callee) ||
3680        !strcmp("__intrinsic_atomic_add_shared", callee) ||
3681        !strcmp("__intrinsic_atomic_min_shared", callee) ||
3682        !strcmp("__intrinsic_atomic_max_shared", callee) ||
3683        !strcmp("__intrinsic_atomic_and_shared", callee) ||
3684        !strcmp("__intrinsic_atomic_or_shared", callee) ||
3685        !strcmp("__intrinsic_atomic_xor_shared", callee) ||
3686        !strcmp("__intrinsic_atomic_exchange_shared", callee) ||
3687        !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
3688       visit_shared_intrinsic(ir);
3689       return;
3690    }
3691
3692    if (!strcmp("__intrinsic_image_load", callee) ||
3693        !strcmp("__intrinsic_image_store", callee) ||
3694        !strcmp("__intrinsic_image_atomic_add", callee) ||
3695        !strcmp("__intrinsic_image_atomic_min", callee) ||
3696        !strcmp("__intrinsic_image_atomic_max", callee) ||
3697        !strcmp("__intrinsic_image_atomic_and", callee) ||
3698        !strcmp("__intrinsic_image_atomic_or", callee) ||
3699        !strcmp("__intrinsic_image_atomic_xor", callee) ||
3700        !strcmp("__intrinsic_image_atomic_exchange", callee) ||
3701        !strcmp("__intrinsic_image_atomic_comp_swap", callee) ||
3702        !strcmp("__intrinsic_image_size", callee) ||
3703        !strcmp("__intrinsic_image_samples", callee)) {
3704       visit_image_intrinsic(ir);
3705       return;
3706    }
3707
3708    entry = get_function_signature(sig);
3709    /* Process in parameters. */
3710    foreach_two_lists(formal_node, &sig->parameters,
3711                      actual_node, &ir->actual_parameters) {
3712       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3713       ir_variable *param = (ir_variable *) formal_node;
3714
3715       if (param->data.mode == ir_var_function_in ||
3716           param->data.mode == ir_var_function_inout) {
3717          variable_storage *storage = find_variable_storage(param);
3718          assert(storage);
3719
3720          param_rval->accept(this);
3721          st_src_reg r = this->result;
3722
3723          st_dst_reg l;
3724          l.file = storage->file;
3725          l.index = storage->index;
3726          l.reladdr = NULL;
3727          l.writemask = WRITEMASK_XYZW;
3728
3729          for (i = 0; i < type_size(param->type); i++) {
3730             emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3731             l.index++;
3732             r.index++;
3733          }
3734       }
3735    }
3736
3737    /* Emit call instruction */
3738    call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
3739    call_inst->function = entry;
3740
3741    /* Process out parameters. */
3742    foreach_two_lists(formal_node, &sig->parameters,
3743                      actual_node, &ir->actual_parameters) {
3744       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3745       ir_variable *param = (ir_variable *) formal_node;
3746
3747       if (param->data.mode == ir_var_function_out ||
3748           param->data.mode == ir_var_function_inout) {
3749          variable_storage *storage = find_variable_storage(param);
3750          assert(storage);
3751
3752          st_src_reg r;
3753          r.file = storage->file;
3754          r.index = storage->index;
3755          r.reladdr = NULL;
3756          r.swizzle = SWIZZLE_NOOP;
3757          r.negate = 0;
3758
3759          param_rval->accept(this);
3760          st_dst_reg l = st_dst_reg(this->result);
3761
3762          for (i = 0; i < type_size(param->type); i++) {
3763             emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3764             l.index++;
3765             r.index++;
3766          }
3767       }
3768    }
3769
3770    /* Process return value. */
3771    this->result = entry->return_reg;
3772 }
3773
3774 void
3775 glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head,
3776                                          ir_dereference *tail,
3777                                          unsigned *array_elements,
3778                                          unsigned *base,
3779                                          unsigned *index,
3780                                          st_src_reg *indirect,
3781                                          unsigned *location)
3782 {
3783    switch (tail->ir_type) {
3784    case ir_type_dereference_record: {
3785       ir_dereference_record *deref_record = tail->as_dereference_record();
3786       const glsl_type *struct_type = deref_record->record->type;
3787       int field_index = deref_record->record->type->field_index(deref_record->field);
3788
3789       calc_deref_offsets(head, deref_record->record->as_dereference(), array_elements, base, index, indirect, location);
3790
3791       assert(field_index >= 0);
3792       *location += struct_type->record_location_offset(field_index);
3793       break;
3794    }
3795
3796    case ir_type_dereference_array: {
3797       ir_dereference_array *deref_arr = tail->as_dereference_array();
3798       ir_constant *array_index = deref_arr->array_index->constant_expression_value();
3799
3800       if (!array_index) {
3801          st_src_reg temp_reg;
3802          st_dst_reg temp_dst;
3803
3804          temp_reg = get_temp(glsl_type::uint_type);
3805          temp_dst = st_dst_reg(temp_reg);
3806          temp_dst.writemask = 1;
3807
3808          deref_arr->array_index->accept(this);
3809          if (*array_elements != 1)
3810             emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements));
3811          else
3812             emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result);
3813
3814          if (indirect->file == PROGRAM_UNDEFINED)
3815             *indirect = temp_reg;
3816          else {
3817             temp_dst = st_dst_reg(*indirect);
3818             temp_dst.writemask = 1;
3819             emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg);
3820          }
3821       } else
3822          *index += array_index->value.u[0] * *array_elements;
3823
3824       *array_elements *= deref_arr->array->type->length;
3825
3826       calc_deref_offsets(head, deref_arr->array->as_dereference(), array_elements, base, index, indirect, location);
3827       break;
3828    }
3829    default:
3830       break;
3831    }
3832 }
3833
3834 void
3835 glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
3836                                         unsigned *array_size,
3837                                         unsigned *base,
3838                                         unsigned *index,
3839                                         st_src_reg *reladdr)
3840 {
3841    GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
3842    unsigned location = 0;
3843    ir_variable *var = ir->variable_referenced();
3844
3845    memset(reladdr, 0, sizeof(*reladdr));
3846    reladdr->file = PROGRAM_UNDEFINED;
3847
3848    *base = 0;
3849    *array_size = 1;
3850
3851    assert(var);
3852    location = var->data.location;
3853    calc_deref_offsets(ir, ir, array_size, base, index, reladdr, &location);
3854
3855    /*
3856     * If we end up with no indirect then adjust the base to the index,
3857     * and set the array size to 1.
3858     */
3859    if (reladdr->file == PROGRAM_UNDEFINED) {
3860       *base = *index;
3861       *array_size = 1;
3862    }
3863
3864    if (location != 0xffffffff) {
3865       *base += this->shader_program->UniformStorage[location].opaque[shader].index;
3866       *index += this->shader_program->UniformStorage[location].opaque[shader].index;
3867    }
3868 }
3869
3870 void
3871 glsl_to_tgsi_visitor::visit(ir_texture *ir)
3872 {
3873    st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
3874    st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
3875    st_src_reg levels_src, reladdr;
3876    st_dst_reg result_dst, coord_dst, cube_sc_dst;
3877    glsl_to_tgsi_instruction *inst = NULL;
3878    unsigned opcode = TGSI_OPCODE_NOP;
3879    const glsl_type *sampler_type = ir->sampler->type;
3880    unsigned sampler_array_size = 1, sampler_index = 0, sampler_base = 0;
3881    bool is_cube_array = false;
3882    unsigned i;
3883
3884    /* if we are a cube array sampler */
3885    if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
3886         sampler_type->sampler_array)) {
3887       is_cube_array = true;
3888    }
3889
3890    if (ir->coordinate) {
3891       ir->coordinate->accept(this);
3892
3893       /* Put our coords in a temp.  We'll need to modify them for shadow,
3894        * projection, or LOD, so the only case we'd use it as is is if
3895        * we're doing plain old texturing.  The optimization passes on
3896        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
3897        */
3898       coord = get_temp(glsl_type::vec4_type);
3899       coord_dst = st_dst_reg(coord);
3900       coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
3901       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3902    }
3903
3904    if (ir->projector) {
3905       ir->projector->accept(this);
3906       projector = this->result;
3907    }
3908
3909    /* Storage for our result.  Ideally for an assignment we'd be using
3910     * the actual storage for the result here, instead.
3911     */
3912    result_src = get_temp(ir->type);
3913    result_dst = st_dst_reg(result_src);
3914
3915    switch (ir->op) {
3916    case ir_tex:
3917       opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
3918       if (ir->offset) {
3919          ir->offset->accept(this);
3920          offset[0] = this->result;
3921       }
3922       break;
3923    case ir_txb:
3924       if (is_cube_array ||
3925           sampler_type == glsl_type::samplerCubeShadow_type) {
3926          opcode = TGSI_OPCODE_TXB2;
3927       }
3928       else {
3929          opcode = TGSI_OPCODE_TXB;
3930       }
3931       ir->lod_info.bias->accept(this);
3932       lod_info = this->result;
3933       if (ir->offset) {
3934          ir->offset->accept(this);
3935          offset[0] = this->result;
3936       }
3937       break;
3938    case ir_txl:
3939       opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
3940       ir->lod_info.lod->accept(this);
3941       lod_info = this->result;
3942       if (ir->offset) {
3943          ir->offset->accept(this);
3944          offset[0] = this->result;
3945       }
3946       break;
3947    case ir_txd:
3948       opcode = TGSI_OPCODE_TXD;
3949       ir->lod_info.grad.dPdx->accept(this);
3950       dx = this->result;
3951       ir->lod_info.grad.dPdy->accept(this);
3952       dy = this->result;
3953       if (ir->offset) {
3954          ir->offset->accept(this);
3955          offset[0] = this->result;
3956       }
3957       break;
3958    case ir_txs:
3959       opcode = TGSI_OPCODE_TXQ;
3960       ir->lod_info.lod->accept(this);
3961       lod_info = this->result;
3962       break;
3963    case ir_query_levels:
3964       opcode = TGSI_OPCODE_TXQ;
3965       lod_info = undef_src;
3966       levels_src = get_temp(ir->type);
3967       break;
3968    case ir_txf:
3969       opcode = TGSI_OPCODE_TXF;
3970       ir->lod_info.lod->accept(this);
3971       lod_info = this->result;
3972       if (ir->offset) {
3973          ir->offset->accept(this);
3974          offset[0] = this->result;
3975       }
3976       break;
3977    case ir_txf_ms:
3978       opcode = TGSI_OPCODE_TXF;
3979       ir->lod_info.sample_index->accept(this);
3980       sample_index = this->result;
3981       break;
3982    case ir_tg4:
3983       opcode = TGSI_OPCODE_TG4;
3984       ir->lod_info.component->accept(this);
3985       component = this->result;
3986       if (ir->offset) {
3987          ir->offset->accept(this);
3988          if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
3989             const glsl_type *elt_type = ir->offset->type->fields.array;
3990             for (i = 0; i < ir->offset->type->length; i++) {
3991                offset[i] = this->result;
3992                offset[i].index += i * type_size(elt_type);
3993                offset[i].type = elt_type->base_type;
3994                offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
3995             }
3996          } else {
3997             offset[0] = this->result;
3998          }
3999       }
4000       break;
4001    case ir_lod:
4002       opcode = TGSI_OPCODE_LODQ;
4003       break;
4004    case ir_texture_samples:
4005       opcode = TGSI_OPCODE_TXQS;
4006       break;
4007    case ir_samples_identical:
4008       unreachable("Unexpected ir_samples_identical opcode");
4009    }
4010
4011    if (ir->projector) {
4012       if (opcode == TGSI_OPCODE_TEX) {
4013          /* Slot the projector in as the last component of the coord. */
4014          coord_dst.writemask = WRITEMASK_W;
4015          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
4016          coord_dst.writemask = WRITEMASK_XYZW;
4017          opcode = TGSI_OPCODE_TXP;
4018       } else {
4019          st_src_reg coord_w = coord;
4020          coord_w.swizzle = SWIZZLE_WWWW;
4021
4022          /* For the other TEX opcodes there's no projective version
4023           * since the last slot is taken up by LOD info.  Do the
4024           * projective divide now.
4025           */
4026          coord_dst.writemask = WRITEMASK_W;
4027          emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
4028
4029          /* In the case where we have to project the coordinates "by hand,"
4030           * the shadow comparator value must also be projected.
4031           */
4032          st_src_reg tmp_src = coord;
4033          if (ir->shadow_comparitor) {
4034             /* Slot the shadow value in as the second to last component of the
4035              * coord.
4036              */
4037             ir->shadow_comparitor->accept(this);
4038
4039             tmp_src = get_temp(glsl_type::vec4_type);
4040             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
4041
4042             /* Projective division not allowed for array samplers. */
4043             assert(!sampler_type->sampler_array);
4044
4045             tmp_dst.writemask = WRITEMASK_Z;
4046             emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
4047
4048             tmp_dst.writemask = WRITEMASK_XY;
4049             emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
4050          }
4051
4052          coord_dst.writemask = WRITEMASK_XYZ;
4053          emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
4054
4055          coord_dst.writemask = WRITEMASK_XYZW;
4056          coord.swizzle = SWIZZLE_XYZW;
4057       }
4058    }
4059
4060    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
4061     * comparator was put in the correct place (and projected) by the code,
4062     * above, that handles by-hand projection.
4063     */
4064    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
4065       /* Slot the shadow value in as the second to last component of the
4066        * coord.
4067        */
4068       ir->shadow_comparitor->accept(this);
4069
4070       if (is_cube_array) {
4071          cube_sc = get_temp(glsl_type::float_type);
4072          cube_sc_dst = st_dst_reg(cube_sc);
4073          cube_sc_dst.writemask = WRITEMASK_X;
4074          emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
4075          cube_sc_dst.writemask = WRITEMASK_X;
4076       }
4077       else {
4078          if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
4079               sampler_type->sampler_array) ||
4080              sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
4081             coord_dst.writemask = WRITEMASK_W;
4082          } else {
4083             coord_dst.writemask = WRITEMASK_Z;
4084          }
4085          emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4086          coord_dst.writemask = WRITEMASK_XYZW;
4087       }
4088    }
4089
4090    if (ir->op == ir_txf_ms) {
4091       coord_dst.writemask = WRITEMASK_W;
4092       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
4093       coord_dst.writemask = WRITEMASK_XYZW;
4094    } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
4095        opcode == TGSI_OPCODE_TXF) {
4096       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
4097       coord_dst.writemask = WRITEMASK_W;
4098       emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
4099       coord_dst.writemask = WRITEMASK_XYZW;
4100    }
4101
4102    get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
4103                      &sampler_index, &reladdr);
4104    if (reladdr.file != PROGRAM_UNDEFINED)
4105       emit_arl(ir, sampler_reladdr, reladdr);
4106
4107    if (opcode == TGSI_OPCODE_TXD)
4108       inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
4109    else if (opcode == TGSI_OPCODE_TXQ) {
4110       if (ir->op == ir_query_levels) {
4111          /* the level is stored in W */
4112          inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
4113          result_dst.writemask = WRITEMASK_X;
4114          levels_src.swizzle = SWIZZLE_WWWW;
4115          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
4116       } else
4117          inst = emit_asm(ir, opcode, result_dst, lod_info);
4118    } else if (opcode == TGSI_OPCODE_TXQS) {
4119       inst = emit_asm(ir, opcode, result_dst);
4120    } else if (opcode == TGSI_OPCODE_TXF) {
4121       inst = emit_asm(ir, opcode, result_dst, coord);
4122    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
4123       inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
4124    } else if (opcode == TGSI_OPCODE_TEX2) {
4125       inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4126    } else if (opcode == TGSI_OPCODE_TG4) {
4127       if (is_cube_array && ir->shadow_comparitor) {
4128          inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4129       } else {
4130          inst = emit_asm(ir, opcode, result_dst, coord, component);
4131       }
4132    } else
4133       inst = emit_asm(ir, opcode, result_dst, coord);
4134
4135    if (ir->shadow_comparitor)
4136       inst->tex_shadow = GL_TRUE;
4137
4138    inst->sampler.index = sampler_index;
4139    inst->sampler_array_size = sampler_array_size;
4140    inst->sampler_base = sampler_base;
4141
4142    if (reladdr.file != PROGRAM_UNDEFINED) {
4143       inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
4144       memcpy(inst->sampler.reladdr, &reladdr, sizeof(reladdr));
4145    }
4146
4147    if (ir->offset) {
4148       for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
4149          inst->tex_offsets[i] = offset[i];
4150       inst->tex_offset_num_offset = i;
4151    }
4152
4153    switch (sampler_type->sampler_dimensionality) {
4154    case GLSL_SAMPLER_DIM_1D:
4155       inst->tex_target = (sampler_type->sampler_array)
4156          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
4157       break;
4158    case GLSL_SAMPLER_DIM_2D:
4159       inst->tex_target = (sampler_type->sampler_array)
4160          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
4161       break;
4162    case GLSL_SAMPLER_DIM_3D:
4163       inst->tex_target = TEXTURE_3D_INDEX;
4164       break;
4165    case GLSL_SAMPLER_DIM_CUBE:
4166       inst->tex_target = (sampler_type->sampler_array)
4167          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
4168       break;
4169    case GLSL_SAMPLER_DIM_RECT:
4170       inst->tex_target = TEXTURE_RECT_INDEX;
4171       break;
4172    case GLSL_SAMPLER_DIM_BUF:
4173       inst->tex_target = TEXTURE_BUFFER_INDEX;
4174       break;
4175    case GLSL_SAMPLER_DIM_EXTERNAL:
4176       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
4177       break;
4178    case GLSL_SAMPLER_DIM_MS:
4179       inst->tex_target = (sampler_type->sampler_array)
4180          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
4181       break;
4182    default:
4183       assert(!"Should not get here.");
4184    }
4185
4186    inst->tex_type = ir->type->base_type;
4187
4188    this->result = result_src;
4189 }
4190
4191 void
4192 glsl_to_tgsi_visitor::visit(ir_return *ir)
4193 {
4194    if (ir->get_value()) {
4195       st_dst_reg l;
4196       int i;
4197
4198       assert(current_function);
4199
4200       ir->get_value()->accept(this);
4201       st_src_reg r = this->result;
4202
4203       l = st_dst_reg(current_function->return_reg);
4204
4205       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
4206          emit_asm(ir, TGSI_OPCODE_MOV, l, r);
4207          l.index++;
4208          r.index++;
4209       }
4210    }
4211
4212    emit_asm(ir, TGSI_OPCODE_RET);
4213 }
4214
4215 void
4216 glsl_to_tgsi_visitor::visit(ir_discard *ir)
4217 {
4218    if (ir->condition) {
4219       ir->condition->accept(this);
4220       st_src_reg condition = this->result;
4221
4222       /* Convert the bool condition to a float so we can negate. */
4223       if (native_integers) {
4224          st_src_reg temp = get_temp(ir->condition->type);
4225          emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
4226               condition, st_src_reg_for_float(1.0));
4227          condition = temp;
4228       }
4229
4230       condition.negate = ~condition.negate;
4231       emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
4232    } else {
4233       /* unconditional kil */
4234       emit_asm(ir, TGSI_OPCODE_KILL);
4235    }
4236 }
4237
4238 void
4239 glsl_to_tgsi_visitor::visit(ir_if *ir)
4240 {
4241    unsigned if_opcode;
4242    glsl_to_tgsi_instruction *if_inst;
4243
4244    ir->condition->accept(this);
4245    assert(this->result.file != PROGRAM_UNDEFINED);
4246
4247    if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
4248
4249    if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
4250
4251    this->instructions.push_tail(if_inst);
4252
4253    visit_exec_list(&ir->then_instructions, this);
4254
4255    if (!ir->else_instructions.is_empty()) {
4256       emit_asm(ir->condition, TGSI_OPCODE_ELSE);
4257       visit_exec_list(&ir->else_instructions, this);
4258    }
4259
4260    if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
4261 }
4262
4263
4264 void
4265 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
4266 {
4267    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4268
4269    ir->stream->accept(this);
4270    emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
4271 }
4272
4273 void
4274 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
4275 {
4276    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4277
4278    ir->stream->accept(this);
4279    emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
4280 }
4281
4282 void
4283 glsl_to_tgsi_visitor::visit(ir_barrier *ir)
4284 {
4285    assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
4286           this->prog->Target == GL_COMPUTE_PROGRAM_NV);
4287
4288    emit_asm(ir, TGSI_OPCODE_BARRIER);
4289 }
4290
4291 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
4292 {
4293    result.file = PROGRAM_UNDEFINED;
4294    next_temp = 1;
4295    array_sizes = NULL;
4296    max_num_arrays = 0;
4297    next_array = 0;
4298    num_input_arrays = 0;
4299    num_output_arrays = 0;
4300    next_signature_id = 1;
4301    num_immediates = 0;
4302    current_function = NULL;
4303    num_address_regs = 0;
4304    samplers_used = 0;
4305    buffers_used = 0;
4306    images_used = 0;
4307    indirect_addr_consts = false;
4308    wpos_transform_const = -1;
4309    glsl_version = 0;
4310    native_integers = false;
4311    mem_ctx = ralloc_context(NULL);
4312    ctx = NULL;
4313    prog = NULL;
4314    shader_program = NULL;
4315    shader = NULL;
4316    options = NULL;
4317    have_sqrt = false;
4318    have_fma = false;
4319    use_shared_memory = false;
4320 }
4321
4322 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
4323 {
4324    free(array_sizes);
4325    ralloc_free(mem_ctx);
4326 }
4327
4328 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
4329 {
4330    delete v;
4331 }
4332
4333
4334 /**
4335  * Count resources used by the given gpu program (number of texture
4336  * samplers, etc).
4337  */
4338 static void
4339 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
4340 {
4341    v->samplers_used = 0;
4342    v->buffers_used = 0;
4343    v->images_used = 0;
4344
4345    foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
4346       if (inst->info->is_tex) {
4347          for (int i = 0; i < inst->sampler_array_size; i++) {
4348             unsigned idx = inst->sampler_base + i;
4349             v->samplers_used |= 1 << idx;
4350
4351             debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
4352             v->sampler_types[idx] = inst->tex_type;
4353             v->sampler_targets[idx] =
4354                st_translate_texture_target(inst->tex_target, inst->tex_shadow);
4355
4356             if (inst->tex_shadow) {
4357                prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
4358             }
4359          }
4360       }
4361       if (inst->buffer.file != PROGRAM_UNDEFINED && (
4362                 is_resource_instruction(inst->op) ||
4363                 inst->op == TGSI_OPCODE_STORE)) {
4364          if (inst->buffer.file == PROGRAM_BUFFER) {
4365             v->buffers_used |= 1 << inst->buffer.index;
4366          } else if (inst->buffer.file == PROGRAM_MEMORY) {
4367             v->use_shared_memory = true;
4368          } else {
4369             assert(inst->buffer.file == PROGRAM_IMAGE);
4370             for (int i = 0; i < inst->sampler_array_size; i++) {
4371                unsigned idx = inst->sampler_base + i;
4372                v->images_used |= 1 << idx;
4373                v->image_targets[idx] =
4374                   st_translate_texture_target(inst->tex_target, false);
4375                v->image_formats[idx] = inst->image_format;
4376             }
4377          }
4378       }
4379    }
4380    prog->SamplersUsed = v->samplers_used;
4381
4382    if (v->shader_program != NULL)
4383       _mesa_update_shader_textures_used(v->shader_program, prog);
4384 }
4385
4386 /**
4387  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
4388  * are read from the given src in this instruction
4389  */
4390 static int
4391 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
4392 {
4393    int read_mask = 0, comp;
4394
4395    /* Now, given the src swizzle and the written channels, find which
4396     * components are actually read
4397     */
4398    for (comp = 0; comp < 4; ++comp) {
4399       const unsigned coord = GET_SWZ(src.swizzle, comp);
4400       assert(coord < 4);
4401       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
4402          read_mask |= 1 << coord;
4403    }
4404
4405    return read_mask;
4406 }
4407
4408 /**
4409  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
4410  * instruction is the first instruction to write to register T0.  There are
4411  * several lowering passes done in GLSL IR (e.g. branches and
4412  * relative addressing) that create a large number of conditional assignments
4413  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
4414  *
4415  * Here is why this conversion is safe:
4416  * CMP T0, T1 T2 T0 can be expanded to:
4417  * if (T1 < 0.0)
4418  *   MOV T0, T2;
4419  * else
4420  *   MOV T0, T0;
4421  *
4422  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
4423  * as the original program.  If (T1 < 0.0) evaluates to false, executing
4424  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
4425  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
4426  * because any instruction that was going to read from T0 after this was going
4427  * to read a garbage value anyway.
4428  */
4429 void
4430 glsl_to_tgsi_visitor::simplify_cmp(void)
4431 {
4432    int tempWritesSize = 0;
4433    unsigned *tempWrites = NULL;
4434    unsigned outputWrites[VARYING_SLOT_TESS_MAX];
4435
4436    memset(outputWrites, 0, sizeof(outputWrites));
4437
4438    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4439       unsigned prevWriteMask = 0;
4440
4441       /* Give up if we encounter relative addressing or flow control. */
4442       if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
4443           inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
4444           tgsi_get_opcode_info(inst->op)->is_branch ||
4445           inst->op == TGSI_OPCODE_BGNSUB ||
4446           inst->op == TGSI_OPCODE_CONT ||
4447           inst->op == TGSI_OPCODE_END ||
4448           inst->op == TGSI_OPCODE_ENDSUB ||
4449           inst->op == TGSI_OPCODE_RET) {
4450          break;
4451       }
4452
4453       if (inst->dst[0].file == PROGRAM_OUTPUT) {
4454          assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
4455          prevWriteMask = outputWrites[inst->dst[0].index];
4456          outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4457       } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
4458          if (inst->dst[0].index >= tempWritesSize) {
4459             const int inc = 4096;
4460
4461             tempWrites = (unsigned*)
4462                          realloc(tempWrites,
4463                                  (tempWritesSize + inc) * sizeof(unsigned));
4464             if (!tempWrites)
4465                return;
4466
4467             memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
4468             tempWritesSize += inc;
4469          }
4470
4471          prevWriteMask = tempWrites[inst->dst[0].index];
4472          tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4473       } else
4474          continue;
4475
4476       /* For a CMP to be considered a conditional write, the destination
4477        * register and source register two must be the same. */
4478       if (inst->op == TGSI_OPCODE_CMP
4479           && !(inst->dst[0].writemask & prevWriteMask)
4480           && inst->src[2].file == inst->dst[0].file
4481           && inst->src[2].index == inst->dst[0].index
4482           && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
4483
4484          inst->op = TGSI_OPCODE_MOV;
4485          inst->src[0] = inst->src[1];
4486       }
4487    }
4488
4489    free(tempWrites);
4490 }
4491
4492 /* Replaces all references to a temporary register index with another index. */
4493 void
4494 glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
4495 {
4496    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4497       unsigned j;
4498       int k;
4499       for (j = 0; j < num_inst_src_regs(inst); j++) {
4500          if (inst->src[j].file == PROGRAM_TEMPORARY)
4501             for (k = 0; k < num_renames; k++)
4502                if (inst->src[j].index == renames[k].old_reg)
4503                   inst->src[j].index = renames[k].new_reg;
4504       }
4505
4506       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4507          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4508             for (k = 0; k < num_renames; k++)
4509                if (inst->tex_offsets[j].index == renames[k].old_reg)
4510                   inst->tex_offsets[j].index = renames[k].new_reg;
4511       }
4512
4513       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4514          if (inst->dst[j].file == PROGRAM_TEMPORARY)
4515              for (k = 0; k < num_renames; k++)
4516                 if (inst->dst[j].index == renames[k].old_reg)
4517                    inst->dst[j].index = renames[k].new_reg;
4518       }
4519    }
4520 }
4521
4522 void
4523 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
4524 {
4525    int depth = 0; /* loop depth */
4526    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4527    unsigned i = 0, j;
4528
4529    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4530       for (j = 0; j < num_inst_src_regs(inst); j++) {
4531          if (inst->src[j].file == PROGRAM_TEMPORARY) {
4532             if (first_reads[inst->src[j].index] == -1)
4533                 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
4534          }
4535       }
4536       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4537          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
4538             if (first_reads[inst->tex_offsets[j].index] == -1)
4539                first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
4540          }
4541       }
4542       if (inst->op == TGSI_OPCODE_BGNLOOP) {
4543          if(depth++ == 0)
4544             loop_start = i;
4545       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4546          if (--depth == 0)
4547             loop_start = -1;
4548       }
4549       assert(depth >= 0);
4550       i++;
4551    }
4552 }
4553
4554 void
4555 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
4556 {
4557    int depth = 0; /* loop depth */
4558    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4559    unsigned i = 0, j;
4560    int k;
4561    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4562       for (j = 0; j < num_inst_src_regs(inst); j++) {
4563          if (inst->src[j].file == PROGRAM_TEMPORARY)
4564             last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
4565       }
4566       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4567          if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4568             if (first_writes[inst->dst[j].index] == -1)
4569                first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
4570             last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
4571          }
4572       }
4573       for (j = 0; j < inst->tex_offset_num_offset; j++) {
4574          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4575             last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
4576       }
4577       if (inst->op == TGSI_OPCODE_BGNLOOP) {
4578          if(depth++ == 0)
4579             loop_start = i;
4580       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4581          if (--depth == 0) {
4582             loop_start = -1;
4583             for (k = 0; k < this->next_temp; k++) {
4584                if (last_reads[k] == -2) {
4585                   last_reads[k] = i;
4586                }
4587             }
4588          }
4589       }
4590       assert(depth >= 0);
4591       i++;
4592    }
4593 }
4594
4595 void
4596 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
4597 {
4598    int depth = 0; /* loop depth */
4599    int i = 0, k;
4600    unsigned j;
4601
4602    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4603       for (j = 0; j < num_inst_dst_regs(inst); j++) {
4604          if (inst->dst[j].file == PROGRAM_TEMPORARY)
4605             last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
4606       }
4607
4608       if (inst->op == TGSI_OPCODE_BGNLOOP)
4609          depth++;
4610       else if (inst->op == TGSI_OPCODE_ENDLOOP)
4611          if (--depth == 0) {
4612             for (k = 0; k < this->next_temp; k++) {
4613                if (last_writes[k] == -2) {
4614                   last_writes[k] = i;
4615                }
4616             }
4617          }
4618       assert(depth >= 0);
4619       i++;
4620    }
4621 }
4622
4623 /*
4624  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
4625  * channels for copy propagation and updates following instructions to
4626  * use the original versions.
4627  *
4628  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4629  * will occur.  As an example, a TXP production before this pass:
4630  *
4631  * 0: MOV TEMP[1], INPUT[4].xyyy;
4632  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4633  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
4634  *
4635  * and after:
4636  *
4637  * 0: MOV TEMP[1], INPUT[4].xyyy;
4638  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4639  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4640  *
4641  * which allows for dead code elimination on TEMP[1]'s writes.
4642  */
4643 void
4644 glsl_to_tgsi_visitor::copy_propagate(void)
4645 {
4646    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
4647                                                   glsl_to_tgsi_instruction *,
4648                                                   this->next_temp * 4);
4649    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4650    int level = 0;
4651
4652    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4653       assert(inst->dst[0].file != PROGRAM_TEMPORARY
4654              || inst->dst[0].index < this->next_temp);
4655
4656       /* First, do any copy propagation possible into the src regs. */
4657       for (int r = 0; r < 3; r++) {
4658          glsl_to_tgsi_instruction *first = NULL;
4659          bool good = true;
4660          int acp_base = inst->src[r].index * 4;
4661
4662          if (inst->src[r].file != PROGRAM_TEMPORARY ||
4663              inst->src[r].reladdr ||
4664              inst->src[r].reladdr2)
4665             continue;
4666
4667          /* See if we can find entries in the ACP consisting of MOVs
4668           * from the same src register for all the swizzled channels
4669           * of this src register reference.
4670           */
4671          for (int i = 0; i < 4; i++) {
4672             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4673             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
4674
4675             if (!copy_chan) {
4676                good = false;
4677                break;
4678             }
4679
4680             assert(acp_level[acp_base + src_chan] <= level);
4681
4682             if (!first) {
4683                first = copy_chan;
4684             } else {
4685                if (first->src[0].file != copy_chan->src[0].file ||
4686                    first->src[0].index != copy_chan->src[0].index ||
4687                    first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
4688                    first->src[0].index2D != copy_chan->src[0].index2D) {
4689                   good = false;
4690                   break;
4691                }
4692             }
4693          }
4694
4695          if (good) {
4696             /* We've now validated that we can copy-propagate to
4697              * replace this src register reference.  Do it.
4698              */
4699             inst->src[r].file = first->src[0].file;
4700             inst->src[r].index = first->src[0].index;
4701             inst->src[r].index2D = first->src[0].index2D;
4702             inst->src[r].has_index2 = first->src[0].has_index2;
4703             inst->src[r].double_reg2 = first->src[0].double_reg2;
4704             inst->src[r].array_id = first->src[0].array_id;
4705
4706             int swizzle = 0;
4707             for (int i = 0; i < 4; i++) {
4708                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4709                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
4710                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
4711             }
4712             inst->src[r].swizzle = swizzle;
4713          }
4714       }
4715
4716       switch (inst->op) {
4717       case TGSI_OPCODE_BGNLOOP:
4718       case TGSI_OPCODE_ENDLOOP:
4719          /* End of a basic block, clear the ACP entirely. */
4720          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4721          break;
4722
4723       case TGSI_OPCODE_IF:
4724       case TGSI_OPCODE_UIF:
4725          ++level;
4726          break;
4727
4728       case TGSI_OPCODE_ENDIF:
4729       case TGSI_OPCODE_ELSE:
4730          /* Clear all channels written inside the block from the ACP, but
4731           * leaving those that were not touched.
4732           */
4733          for (int r = 0; r < this->next_temp; r++) {
4734             for (int c = 0; c < 4; c++) {
4735                if (!acp[4 * r + c])
4736                   continue;
4737
4738                if (acp_level[4 * r + c] >= level)
4739                   acp[4 * r + c] = NULL;
4740             }
4741          }
4742          if (inst->op == TGSI_OPCODE_ENDIF)
4743             --level;
4744          break;
4745
4746       default:
4747          /* Continuing the block, clear any written channels from
4748           * the ACP.
4749           */
4750          for (int d = 0; d < 2; d++) {
4751             if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
4752                /* Any temporary might be written, so no copy propagation
4753                 * across this instruction.
4754                 */
4755                memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4756             } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
4757                        inst->dst[d].reladdr) {
4758                /* Any output might be written, so no copy propagation
4759                 * from outputs across this instruction.
4760                 */
4761                for (int r = 0; r < this->next_temp; r++) {
4762                   for (int c = 0; c < 4; c++) {
4763                      if (!acp[4 * r + c])
4764                         continue;
4765
4766                      if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
4767                         acp[4 * r + c] = NULL;
4768                   }
4769                }
4770             } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
4771                        inst->dst[d].file == PROGRAM_OUTPUT) {
4772                /* Clear where it's used as dst. */
4773                if (inst->dst[d].file == PROGRAM_TEMPORARY) {
4774                   for (int c = 0; c < 4; c++) {
4775                      if (inst->dst[d].writemask & (1 << c))
4776                         acp[4 * inst->dst[d].index + c] = NULL;
4777                   }
4778                }
4779
4780                /* Clear where it's used as src. */
4781                for (int r = 0; r < this->next_temp; r++) {
4782                   for (int c = 0; c < 4; c++) {
4783                      if (!acp[4 * r + c])
4784                         continue;
4785
4786                      int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
4787
4788                      if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
4789                          acp[4 * r + c]->src[0].index == inst->dst[d].index &&
4790                          inst->dst[d].writemask & (1 << src_chan)) {
4791                         acp[4 * r + c] = NULL;
4792                      }
4793                   }
4794                }
4795             }
4796          }
4797          break;
4798       }
4799
4800       /* If this is a copy, add it to the ACP. */
4801       if (inst->op == TGSI_OPCODE_MOV &&
4802           inst->dst[0].file == PROGRAM_TEMPORARY &&
4803           !(inst->dst[0].file == inst->src[0].file &&
4804              inst->dst[0].index == inst->src[0].index) &&
4805           !inst->dst[0].reladdr &&
4806           !inst->dst[0].reladdr2 &&
4807           !inst->saturate &&
4808           inst->src[0].file != PROGRAM_ARRAY &&
4809           !inst->src[0].reladdr &&
4810           !inst->src[0].reladdr2 &&
4811           !inst->src[0].negate) {
4812          for (int i = 0; i < 4; i++) {
4813             if (inst->dst[0].writemask & (1 << i)) {
4814                acp[4 * inst->dst[0].index + i] = inst;
4815                acp_level[4 * inst->dst[0].index + i] = level;
4816             }
4817          }
4818       }
4819    }
4820
4821    ralloc_free(acp_level);
4822    ralloc_free(acp);
4823 }
4824
4825 /*
4826  * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
4827  * code elimination.
4828  *
4829  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4830  * will occur.  As an example, a TXP production after copy propagation but
4831  * before this pass:
4832  *
4833  * 0: MOV TEMP[1], INPUT[4].xyyy;
4834  * 1: MOV TEMP[1].w, INPUT[4].wwww;
4835  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4836  *
4837  * and after this pass:
4838  *
4839  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4840  */
4841 int
4842 glsl_to_tgsi_visitor::eliminate_dead_code(void)
4843 {
4844    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
4845                                                      glsl_to_tgsi_instruction *,
4846                                                      this->next_temp * 4);
4847    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4848    int level = 0;
4849    int removed = 0;
4850
4851    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4852       assert(inst->dst[0].file != PROGRAM_TEMPORARY
4853              || inst->dst[0].index < this->next_temp);
4854
4855       switch (inst->op) {
4856       case TGSI_OPCODE_BGNLOOP:
4857       case TGSI_OPCODE_ENDLOOP:
4858       case TGSI_OPCODE_CONT:
4859       case TGSI_OPCODE_BRK:
4860          /* End of a basic block, clear the write array entirely.
4861           *
4862           * This keeps us from killing dead code when the writes are
4863           * on either side of a loop, even when the register isn't touched
4864           * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
4865           * dead code of this type, so it shouldn't make a difference as long as
4866           * the dead code elimination pass in the GLSL compiler does its job.
4867           */
4868          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4869          break;
4870
4871       case TGSI_OPCODE_ENDIF:
4872       case TGSI_OPCODE_ELSE:
4873          /* Promote the recorded level of all channels written inside the
4874           * preceding if or else block to the level above the if/else block.
4875           */
4876          for (int r = 0; r < this->next_temp; r++) {
4877             for (int c = 0; c < 4; c++) {
4878                if (!writes[4 * r + c])
4879                   continue;
4880
4881                if (write_level[4 * r + c] == level)
4882                   write_level[4 * r + c] = level-1;
4883             }
4884          }
4885          if(inst->op == TGSI_OPCODE_ENDIF)
4886             --level;
4887          break;
4888
4889       case TGSI_OPCODE_IF:
4890       case TGSI_OPCODE_UIF:
4891          ++level;
4892          /* fallthrough to default case to mark the condition as read */
4893       default:
4894          /* Continuing the block, clear any channels from the write array that
4895           * are read by this instruction.
4896           */
4897          for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
4898             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
4899                /* Any temporary might be read, so no dead code elimination
4900                 * across this instruction.
4901                 */
4902                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4903             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
4904                /* Clear where it's used as src. */
4905                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
4906                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
4907                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
4908                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
4909
4910                for (int c = 0; c < 4; c++) {
4911                   if (src_chans & (1 << c))
4912                      writes[4 * inst->src[i].index + c] = NULL;
4913                }
4914             }
4915          }
4916          for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
4917             if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
4918                /* Any temporary might be read, so no dead code elimination
4919                 * across this instruction.
4920                 */
4921                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4922             } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
4923                /* Clear where it's used as src. */
4924                int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
4925                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
4926                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
4927                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
4928
4929                for (int c = 0; c < 4; c++) {
4930                   if (src_chans & (1 << c))
4931                      writes[4 * inst->tex_offsets[i].index + c] = NULL;
4932                }
4933             }
4934          }
4935          break;
4936       }
4937
4938       /* If this instruction writes to a temporary, add it to the write array.
4939        * If there is already an instruction in the write array for one or more
4940        * of the channels, flag that channel write as dead.
4941        */
4942       for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
4943          if (inst->dst[i].file == PROGRAM_TEMPORARY &&
4944              !inst->dst[i].reladdr) {
4945             for (int c = 0; c < 4; c++) {
4946                if (inst->dst[i].writemask & (1 << c)) {
4947                   if (writes[4 * inst->dst[i].index + c]) {
4948                      if (write_level[4 * inst->dst[i].index + c] < level)
4949                         continue;
4950                      else
4951                         writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
4952                   }
4953                   writes[4 * inst->dst[i].index + c] = inst;
4954                   write_level[4 * inst->dst[i].index + c] = level;
4955                }
4956             }
4957          }
4958       }
4959    }
4960
4961    /* Anything still in the write array at this point is dead code. */
4962    for (int r = 0; r < this->next_temp; r++) {
4963       for (int c = 0; c < 4; c++) {
4964          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
4965          if (inst)
4966             inst->dead_mask |= (1 << c);
4967       }
4968    }
4969
4970    /* Now actually remove the instructions that are completely dead and update
4971     * the writemask of other instructions with dead channels.
4972     */
4973    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
4974       if (!inst->dead_mask || !inst->dst[0].writemask)
4975          continue;
4976       /* No amount of dead masks should remove memory stores */
4977       if (inst->info->is_store)
4978          continue;
4979
4980       if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
4981          inst->remove();
4982          delete inst;
4983          removed++;
4984       } else {
4985          if (inst->dst[0].type == GLSL_TYPE_DOUBLE) {
4986             if (inst->dead_mask == WRITEMASK_XY ||
4987                 inst->dead_mask == WRITEMASK_ZW)
4988                inst->dst[0].writemask &= ~(inst->dead_mask);
4989          } else
4990             inst->dst[0].writemask &= ~(inst->dead_mask);
4991       }
4992    }
4993
4994    ralloc_free(write_level);
4995    ralloc_free(writes);
4996
4997    return removed;
4998 }
4999
5000 /* merge DFRACEXP instructions into one. */
5001 void
5002 glsl_to_tgsi_visitor::merge_two_dsts(void)
5003 {
5004    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
5005       glsl_to_tgsi_instruction *inst2;
5006       bool merged;
5007       if (num_inst_dst_regs(inst) != 2)
5008          continue;
5009
5010       if (inst->dst[0].file != PROGRAM_UNDEFINED &&
5011           inst->dst[1].file != PROGRAM_UNDEFINED)
5012          continue;
5013
5014       inst2 = (glsl_to_tgsi_instruction *) inst->next;
5015       do {
5016
5017          if (inst->src[0].file == inst2->src[0].file &&
5018              inst->src[0].index == inst2->src[0].index &&
5019              inst->src[0].type == inst2->src[0].type &&
5020              inst->src[0].swizzle == inst2->src[0].swizzle)
5021             break;
5022          inst2 = (glsl_to_tgsi_instruction *) inst2->next;
5023       } while (inst2);
5024
5025       if (!inst2)
5026          continue;
5027       merged = false;
5028       if (inst->dst[0].file == PROGRAM_UNDEFINED) {
5029          merged = true;
5030          inst->dst[0] = inst2->dst[0];
5031       } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
5032          inst->dst[1] = inst2->dst[1];
5033          merged = true;
5034       }
5035
5036       if (merged) {
5037          inst2->remove();
5038          delete inst2;
5039       }
5040    }
5041 }
5042
5043 /* Merges temporary registers together where possible to reduce the number of
5044  * registers needed to run a program.
5045  *
5046  * Produces optimal code only after copy propagation and dead code elimination
5047  * have been run. */
5048 void
5049 glsl_to_tgsi_visitor::merge_registers(void)
5050 {
5051    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5052    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
5053    struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5054    int i, j;
5055    int num_renames = 0;
5056
5057    /* Read the indices of the last read and first write to each temp register
5058     * into an array so that we don't have to traverse the instruction list as
5059     * much. */
5060    for (i = 0; i < this->next_temp; i++) {
5061       last_reads[i] = -1;
5062       first_writes[i] = -1;
5063    }
5064    get_last_temp_read_first_temp_write(last_reads, first_writes);
5065
5066    /* Start looking for registers with non-overlapping usages that can be
5067     * merged together. */
5068    for (i = 0; i < this->next_temp; i++) {
5069       /* Don't touch unused registers. */
5070       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
5071
5072       for (j = 0; j < this->next_temp; j++) {
5073          /* Don't touch unused registers. */
5074          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
5075
5076          /* We can merge the two registers if the first write to j is after or
5077           * in the same instruction as the last read from i.  Note that the
5078           * register at index i will always be used earlier or at the same time
5079           * as the register at index j. */
5080          if (first_writes[i] <= first_writes[j] &&
5081              last_reads[i] <= first_writes[j]) {
5082             renames[num_renames].old_reg = j;
5083             renames[num_renames].new_reg = i;
5084             num_renames++;
5085
5086             /* Update the first_writes and last_reads arrays with the new
5087              * values for the merged register index, and mark the newly unused
5088              * register index as such. */
5089             assert(last_reads[j] >= last_reads[i]);
5090             last_reads[i] = last_reads[j];
5091             first_writes[j] = -1;
5092             last_reads[j] = -1;
5093          }
5094       }
5095    }
5096
5097    rename_temp_registers(num_renames, renames);
5098    ralloc_free(renames);
5099    ralloc_free(last_reads);
5100    ralloc_free(first_writes);
5101 }
5102
5103 /* Reassign indices to temporary registers by reusing unused indices created
5104  * by optimization passes. */
5105 void
5106 glsl_to_tgsi_visitor::renumber_registers(void)
5107 {
5108    int i = 0;
5109    int new_index = 0;
5110    int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5111    struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5112    int num_renames = 0;
5113    for (i = 0; i < this->next_temp; i++) {
5114       first_reads[i] = -1;
5115    }
5116    get_first_temp_read(first_reads);
5117
5118    for (i = 0; i < this->next_temp; i++) {
5119       if (first_reads[i] < 0) continue;
5120       if (i != new_index) {
5121          renames[num_renames].old_reg = i;
5122          renames[num_renames].new_reg = new_index;
5123          num_renames++;
5124       }
5125       new_index++;
5126    }
5127
5128    rename_temp_registers(num_renames, renames);
5129    this->next_temp = new_index;
5130    ralloc_free(renames);
5131    ralloc_free(first_reads);
5132 }
5133
5134 /* ------------------------- TGSI conversion stuff -------------------------- */
5135 struct label {
5136    unsigned branch_target;
5137    unsigned token;
5138 };
5139
5140 /**
5141  * Intermediate state used during shader translation.
5142  */
5143 struct st_translate {
5144    struct ureg_program *ureg;
5145
5146    unsigned temps_size;
5147    struct ureg_dst *temps;
5148
5149    struct ureg_dst *arrays;
5150    unsigned num_temp_arrays;
5151    struct ureg_src *constants;
5152    int num_constants;
5153    struct ureg_src *immediates;
5154    int num_immediates;
5155    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
5156    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
5157    struct ureg_dst address[3];
5158    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
5159    struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
5160    struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
5161    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
5162    struct ureg_src shared_memory;
5163    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
5164    unsigned *array_sizes;
5165    struct array_decl *input_arrays;
5166    struct array_decl *output_arrays;
5167
5168    const GLuint *inputMapping;
5169    const GLuint *outputMapping;
5170
5171    /* For every instruction that contains a label (eg CALL), keep
5172     * details so that we can go back afterwards and emit the correct
5173     * tgsi instruction number for each label.
5174     */
5175    struct label *labels;
5176    unsigned labels_size;
5177    unsigned labels_count;
5178
5179    /* Keep a record of the tgsi instruction number that each mesa
5180     * instruction starts at, will be used to fix up labels after
5181     * translation.
5182     */
5183    unsigned *insn;
5184    unsigned insn_size;
5185    unsigned insn_count;
5186
5187    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
5188
5189    boolean error;
5190 };
5191
5192 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
5193 const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
5194    /* Vertex shader
5195     */
5196    TGSI_SEMANTIC_VERTEXID,
5197    TGSI_SEMANTIC_INSTANCEID,
5198    TGSI_SEMANTIC_VERTEXID_NOBASE,
5199    TGSI_SEMANTIC_BASEVERTEX,
5200    TGSI_SEMANTIC_BASEINSTANCE,
5201    TGSI_SEMANTIC_DRAWID,
5202
5203    /* Geometry shader
5204     */
5205    TGSI_SEMANTIC_INVOCATIONID,
5206
5207    /* Fragment shader
5208     */
5209    TGSI_SEMANTIC_POSITION,
5210    TGSI_SEMANTIC_FACE,
5211    TGSI_SEMANTIC_SAMPLEID,
5212    TGSI_SEMANTIC_SAMPLEPOS,
5213    TGSI_SEMANTIC_SAMPLEMASK,
5214    TGSI_SEMANTIC_HELPER_INVOCATION,
5215
5216    /* Tessellation shaders
5217     */
5218    TGSI_SEMANTIC_TESSCOORD,
5219    TGSI_SEMANTIC_VERTICESIN,
5220    TGSI_SEMANTIC_PRIMID,
5221    TGSI_SEMANTIC_TESSOUTER,
5222    TGSI_SEMANTIC_TESSINNER,
5223
5224    /* Compute shaders
5225     */
5226    TGSI_SEMANTIC_THREAD_ID,
5227    TGSI_SEMANTIC_BLOCK_ID,
5228    TGSI_SEMANTIC_GRID_SIZE,
5229 };
5230
5231 /**
5232  * Make note of a branch to a label in the TGSI code.
5233  * After we've emitted all instructions, we'll go over the list
5234  * of labels built here and patch the TGSI code with the actual
5235  * location of each label.
5236  */
5237 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
5238 {
5239    unsigned i;
5240
5241    if (t->labels_count + 1 >= t->labels_size) {
5242       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
5243       t->labels = (struct label *)realloc(t->labels,
5244                                           t->labels_size * sizeof(struct label));
5245       if (t->labels == NULL) {
5246          static unsigned dummy;
5247          t->error = TRUE;
5248          return &dummy;
5249       }
5250    }
5251
5252    i = t->labels_count++;
5253    t->labels[i].branch_target = branch_target;
5254    return &t->labels[i].token;
5255 }
5256
5257 /**
5258  * Called prior to emitting the TGSI code for each instruction.
5259  * Allocate additional space for instructions if needed.
5260  * Update the insn[] array so the next glsl_to_tgsi_instruction points to
5261  * the next TGSI instruction.
5262  */
5263 static void set_insn_start(struct st_translate *t, unsigned start)
5264 {
5265    if (t->insn_count + 1 >= t->insn_size) {
5266       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
5267       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
5268       if (t->insn == NULL) {
5269          t->error = TRUE;
5270          return;
5271       }
5272    }
5273
5274    t->insn[t->insn_count++] = start;
5275 }
5276
5277 /**
5278  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
5279  */
5280 static struct ureg_src
5281 emit_immediate(struct st_translate *t,
5282                gl_constant_value values[4],
5283                int type, int size)
5284 {
5285    struct ureg_program *ureg = t->ureg;
5286
5287    switch(type)
5288    {
5289    case GL_FLOAT:
5290       return ureg_DECL_immediate(ureg, &values[0].f, size);
5291    case GL_DOUBLE:
5292       return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
5293    case GL_INT:
5294       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
5295    case GL_UNSIGNED_INT:
5296    case GL_BOOL:
5297       return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
5298    default:
5299       assert(!"should not get here - type must be float, int, uint, or bool");
5300       return ureg_src_undef();
5301    }
5302 }
5303
5304 /**
5305  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
5306  */
5307 static struct ureg_dst
5308 dst_register(struct st_translate *t, gl_register_file file, unsigned index,
5309              unsigned array_id)
5310 {
5311    unsigned array;
5312
5313    switch(file) {
5314    case PROGRAM_UNDEFINED:
5315       return ureg_dst_undef();
5316
5317    case PROGRAM_TEMPORARY:
5318       /* Allocate space for temporaries on demand. */
5319       if (index >= t->temps_size) {
5320          const int inc = 4096;
5321
5322          t->temps = (struct ureg_dst*)
5323                     realloc(t->temps,
5324                             (t->temps_size + inc) * sizeof(struct ureg_dst));
5325          if (!t->temps)
5326             return ureg_dst_undef();
5327
5328          memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
5329          t->temps_size += inc;
5330       }
5331
5332       if (ureg_dst_is_undef(t->temps[index]))
5333          t->temps[index] = ureg_DECL_local_temporary(t->ureg);
5334
5335       return t->temps[index];
5336
5337    case PROGRAM_ARRAY:
5338       array = index >> 16;
5339
5340       assert(array < t->num_temp_arrays);
5341
5342       if (ureg_dst_is_undef(t->arrays[array]))
5343          t->arrays[array] = ureg_DECL_array_temporary(
5344             t->ureg, t->array_sizes[array], TRUE);
5345
5346       return ureg_dst_array_offset(t->arrays[array],
5347                                    (int)(index & 0xFFFF) - 0x8000);
5348
5349    case PROGRAM_OUTPUT:
5350       if (!array_id) {
5351          if (t->procType == TGSI_PROCESSOR_FRAGMENT)
5352             assert(index < FRAG_RESULT_MAX);
5353          else if (t->procType == TGSI_PROCESSOR_TESS_CTRL ||
5354                   t->procType == TGSI_PROCESSOR_TESS_EVAL)
5355             assert(index < VARYING_SLOT_TESS_MAX);
5356          else
5357             assert(index < VARYING_SLOT_MAX);
5358
5359          assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
5360          assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
5361          return t->outputs[t->outputMapping[index]];
5362       }
5363       else {
5364          struct array_decl *decl = &t->output_arrays[array_id-1];
5365          unsigned mesa_index = decl->mesa_index;
5366          int slot = t->outputMapping[mesa_index];
5367
5368          assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
5369          assert(t->outputs[slot].ArrayID == array_id);
5370          return ureg_dst_array_offset(t->outputs[slot], index - mesa_index);
5371       }
5372
5373    case PROGRAM_ADDRESS:
5374       return t->address[index];
5375
5376    default:
5377       assert(!"unknown dst register file");
5378       return ureg_dst_undef();
5379    }
5380 }
5381
5382 /**
5383  * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
5384  */
5385 static struct ureg_src
5386 src_register(struct st_translate *t, const st_src_reg *reg)
5387 {
5388    int index = reg->index;
5389    int double_reg2 = reg->double_reg2 ? 1 : 0;
5390
5391    switch(reg->file) {
5392    case PROGRAM_UNDEFINED:
5393       return ureg_imm4f(t->ureg, 0, 0, 0, 0);
5394
5395    case PROGRAM_TEMPORARY:
5396    case PROGRAM_ARRAY:
5397    case PROGRAM_OUTPUT:
5398       return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
5399
5400    case PROGRAM_UNIFORM:
5401       assert(reg->index >= 0);
5402       return reg->index < t->num_constants ?
5403                t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5404    case PROGRAM_STATE_VAR:
5405    case PROGRAM_CONSTANT:       /* ie, immediate */
5406       if (reg->has_index2)
5407          return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
5408       else
5409          return reg->index >= 0 && reg->index < t->num_constants ?
5410                   t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5411
5412    case PROGRAM_IMMEDIATE:
5413       assert(reg->index >= 0 && reg->index < t->num_immediates);
5414       return t->immediates[reg->index];
5415
5416    case PROGRAM_INPUT:
5417       /* GLSL inputs are 64-bit containers, so we have to
5418        * map back to the original index and add the offset after
5419        * mapping. */
5420       index -= double_reg2;
5421       if (!reg->array_id) {
5422          assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
5423          assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
5424          return t->inputs[t->inputMapping[index] + double_reg2];
5425       }
5426       else {
5427          struct array_decl *decl = &t->input_arrays[reg->array_id-1];
5428          unsigned mesa_index = decl->mesa_index;
5429          int slot = t->inputMapping[mesa_index];
5430
5431          assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
5432          assert(t->inputs[slot].ArrayID == reg->array_id);
5433          return ureg_src_array_offset(t->inputs[slot], index + double_reg2 - mesa_index);
5434       }
5435
5436    case PROGRAM_ADDRESS:
5437       return ureg_src(t->address[reg->index]);
5438
5439    case PROGRAM_SYSTEM_VALUE:
5440       assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
5441       return t->systemValues[reg->index];
5442
5443    default:
5444       assert(!"unknown src register file");
5445       return ureg_src_undef();
5446    }
5447 }
5448
5449 /**
5450  * Create a TGSI ureg_dst register from an st_dst_reg.
5451  */
5452 static struct ureg_dst
5453 translate_dst(struct st_translate *t,
5454               const st_dst_reg *dst_reg,
5455               bool saturate)
5456 {
5457    struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
5458                                       dst_reg->array_id);
5459
5460    if (dst.File == TGSI_FILE_NULL)
5461       return dst;
5462
5463    dst = ureg_writemask(dst, dst_reg->writemask);
5464
5465    if (saturate)
5466       dst = ureg_saturate(dst);
5467
5468    if (dst_reg->reladdr != NULL) {
5469       assert(dst_reg->file != PROGRAM_TEMPORARY);
5470       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
5471    }
5472
5473    if (dst_reg->has_index2) {
5474       if (dst_reg->reladdr2)
5475          dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
5476                                            dst_reg->index2D);
5477       else
5478          dst = ureg_dst_dimension(dst, dst_reg->index2D);
5479    }
5480
5481    return dst;
5482 }
5483
5484 /**
5485  * Create a TGSI ureg_src register from an st_src_reg.
5486  */
5487 static struct ureg_src
5488 translate_src(struct st_translate *t, const st_src_reg *src_reg)
5489 {
5490    struct ureg_src src = src_register(t, src_reg);
5491
5492    if (src_reg->has_index2) {
5493       /* 2D indexes occur with geometry shader inputs (attrib, vertex)
5494        * and UBO constant buffers (buffer, position).
5495        */
5496       if (src_reg->reladdr2)
5497          src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
5498                                            src_reg->index2D);
5499       else
5500          src = ureg_src_dimension(src, src_reg->index2D);
5501    }
5502
5503    src = ureg_swizzle(src,
5504                       GET_SWZ(src_reg->swizzle, 0) & 0x3,
5505                       GET_SWZ(src_reg->swizzle, 1) & 0x3,
5506                       GET_SWZ(src_reg->swizzle, 2) & 0x3,
5507                       GET_SWZ(src_reg->swizzle, 3) & 0x3);
5508
5509    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
5510       src = ureg_negate(src);
5511
5512    if (src_reg->reladdr != NULL) {
5513       assert(src_reg->file != PROGRAM_TEMPORARY);
5514       src = ureg_src_indirect(src, ureg_src(t->address[0]));
5515    }
5516
5517    return src;
5518 }
5519
5520 static struct tgsi_texture_offset
5521 translate_tex_offset(struct st_translate *t,
5522                      const st_src_reg *in_offset, int idx)
5523 {
5524    struct tgsi_texture_offset offset;
5525    struct ureg_src imm_src;
5526    struct ureg_dst dst;
5527    int array;
5528
5529    switch (in_offset->file) {
5530    case PROGRAM_IMMEDIATE:
5531       assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
5532       imm_src = t->immediates[in_offset->index];
5533
5534       offset.File = imm_src.File;
5535       offset.Index = imm_src.Index;
5536       offset.SwizzleX = imm_src.SwizzleX;
5537       offset.SwizzleY = imm_src.SwizzleY;
5538       offset.SwizzleZ = imm_src.SwizzleZ;
5539       offset.Padding = 0;
5540       break;
5541    case PROGRAM_TEMPORARY:
5542       imm_src = ureg_src(t->temps[in_offset->index]);
5543       offset.File = imm_src.File;
5544       offset.Index = imm_src.Index;
5545       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5546       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5547       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5548       offset.Padding = 0;
5549       break;
5550    case PROGRAM_ARRAY:
5551       array = in_offset->index >> 16;
5552
5553       assert(array >= 0);
5554       assert(array < (int)t->num_temp_arrays);
5555
5556       dst = t->arrays[array];
5557       offset.File = dst.File;
5558       offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
5559       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
5560       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
5561       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
5562       offset.Padding = 0;
5563       break;
5564    default:
5565       break;
5566    }
5567    return offset;
5568 }
5569
5570 static void
5571 compile_tgsi_instruction(struct st_translate *t,
5572                          const glsl_to_tgsi_instruction *inst)
5573 {
5574    struct ureg_program *ureg = t->ureg;
5575    int i;
5576    struct ureg_dst dst[2];
5577    struct ureg_src src[4];
5578    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
5579
5580    int num_dst;
5581    int num_src;
5582    unsigned tex_target = 0;
5583
5584    num_dst = num_inst_dst_regs(inst);
5585    num_src = num_inst_src_regs(inst);
5586
5587    for (i = 0; i < num_dst; i++)
5588       dst[i] = translate_dst(t,
5589                              &inst->dst[i],
5590                              inst->saturate);
5591
5592    for (i = 0; i < num_src; i++)
5593       src[i] = translate_src(t, &inst->src[i]);
5594
5595    switch(inst->op) {
5596    case TGSI_OPCODE_BGNLOOP:
5597    case TGSI_OPCODE_CAL:
5598    case TGSI_OPCODE_ELSE:
5599    case TGSI_OPCODE_ENDLOOP:
5600    case TGSI_OPCODE_IF:
5601    case TGSI_OPCODE_UIF:
5602       assert(num_dst == 0);
5603       ureg_label_insn(ureg,
5604                       inst->op,
5605                       src, num_src,
5606                       get_label(t,
5607                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
5608       return;
5609
5610    case TGSI_OPCODE_TEX:
5611    case TGSI_OPCODE_TXB:
5612    case TGSI_OPCODE_TXD:
5613    case TGSI_OPCODE_TXL:
5614    case TGSI_OPCODE_TXP:
5615    case TGSI_OPCODE_TXQ:
5616    case TGSI_OPCODE_TXQS:
5617    case TGSI_OPCODE_TXF:
5618    case TGSI_OPCODE_TEX2:
5619    case TGSI_OPCODE_TXB2:
5620    case TGSI_OPCODE_TXL2:
5621    case TGSI_OPCODE_TG4:
5622    case TGSI_OPCODE_LODQ:
5623       src[num_src] = t->samplers[inst->sampler.index];
5624       assert(src[num_src].File != TGSI_FILE_NULL);
5625       if (inst->sampler.reladdr)
5626          src[num_src] =
5627             ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
5628       num_src++;
5629       for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
5630          texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
5631       }
5632       tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5633
5634       ureg_tex_insn(ureg,
5635                     inst->op,
5636                     dst, num_dst,
5637                     tex_target,
5638                     texoffsets, inst->tex_offset_num_offset,
5639                     src, num_src);
5640       return;
5641
5642    case TGSI_OPCODE_RESQ:
5643    case TGSI_OPCODE_LOAD:
5644    case TGSI_OPCODE_ATOMUADD:
5645    case TGSI_OPCODE_ATOMXCHG:
5646    case TGSI_OPCODE_ATOMCAS:
5647    case TGSI_OPCODE_ATOMAND:
5648    case TGSI_OPCODE_ATOMOR:
5649    case TGSI_OPCODE_ATOMXOR:
5650    case TGSI_OPCODE_ATOMUMIN:
5651    case TGSI_OPCODE_ATOMUMAX:
5652    case TGSI_OPCODE_ATOMIMIN:
5653    case TGSI_OPCODE_ATOMIMAX:
5654       for (i = num_src - 1; i >= 0; i--)
5655          src[i + 1] = src[i];
5656       num_src++;
5657       if (inst->buffer.file == PROGRAM_MEMORY) {
5658          src[0] = t->shared_memory;
5659       } else if (inst->buffer.file == PROGRAM_BUFFER) {
5660          src[0] = t->buffers[inst->buffer.index];
5661       } else {
5662          src[0] = t->images[inst->buffer.index];
5663          tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5664       }
5665       if (inst->buffer.reladdr)
5666          src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
5667       assert(src[0].File != TGSI_FILE_NULL);
5668       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5669                        inst->buffer_access,
5670                        tex_target, inst->image_format);
5671       break;
5672
5673    case TGSI_OPCODE_STORE:
5674       if (inst->buffer.file == PROGRAM_MEMORY) {
5675          dst[0] = ureg_dst(t->shared_memory);
5676       } else if (inst->buffer.file == PROGRAM_BUFFER) {
5677          dst[0] = ureg_dst(t->buffers[inst->buffer.index]);
5678       } else {
5679          dst[0] = ureg_dst(t->images[inst->buffer.index]);
5680          tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5681       }
5682       dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
5683       if (inst->buffer.reladdr)
5684          dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
5685       assert(dst[0].File != TGSI_FILE_NULL);
5686       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5687                        inst->buffer_access,
5688                        tex_target, inst->image_format);
5689       break;
5690
5691    case TGSI_OPCODE_SCS:
5692       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
5693       ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
5694       break;
5695
5696    default:
5697       ureg_insn(ureg,
5698                 inst->op,
5699                 dst, num_dst,
5700                 src, num_src);
5701       break;
5702    }
5703 }
5704
5705 /**
5706  * Emit the TGSI instructions for inverting and adjusting WPOS.
5707  * This code is unavoidable because it also depends on whether
5708  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
5709  */
5710 static void
5711 emit_wpos_adjustment(struct gl_context *ctx,
5712                      struct st_translate *t,
5713                      int wpos_transform_const,
5714                      boolean invert,
5715                      GLfloat adjX, GLfloat adjY[2])
5716 {
5717    struct ureg_program *ureg = t->ureg;
5718
5719    assert(wpos_transform_const >= 0);
5720
5721    /* Fragment program uses fragment position input.
5722     * Need to replace instances of INPUT[WPOS] with temp T
5723     * where T = INPUT[WPOS] is inverted by Y.
5724     */
5725    struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
5726    struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
5727    struct ureg_src *wpos =
5728       ctx->Const.GLSLFragCoordIsSysVal ?
5729          &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
5730          &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
5731    struct ureg_src wpos_input = *wpos;
5732
5733    /* First, apply the coordinate shift: */
5734    if (adjX || adjY[0] || adjY[1]) {
5735       if (adjY[0] != adjY[1]) {
5736          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
5737           * depending on whether inversion is actually going to be applied
5738           * or not, which is determined by testing against the inversion
5739           * state variable used below, which will be either +1 or -1.
5740           */
5741          struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
5742
5743          ureg_CMP(ureg, adj_temp,
5744                   ureg_scalar(wpostrans, invert ? 2 : 0),
5745                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
5746                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
5747          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
5748       } else {
5749          ureg_ADD(ureg, wpos_temp, wpos_input,
5750                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
5751       }
5752       wpos_input = ureg_src(wpos_temp);
5753    } else {
5754       /* MOV wpos_temp, input[wpos]
5755        */
5756       ureg_MOV( ureg, wpos_temp, wpos_input );
5757    }
5758
5759    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
5760     * inversion/identity, or the other way around if we're drawing to an FBO.
5761     */
5762    if (invert) {
5763       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
5764        */
5765       ureg_MAD( ureg,
5766                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5767                 wpos_input,
5768                 ureg_scalar(wpostrans, 0),
5769                 ureg_scalar(wpostrans, 1));
5770    } else {
5771       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
5772        */
5773       ureg_MAD( ureg,
5774                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5775                 wpos_input,
5776                 ureg_scalar(wpostrans, 2),
5777                 ureg_scalar(wpostrans, 3));
5778    }
5779
5780    /* Use wpos_temp as position input from here on:
5781     */
5782    *wpos = ureg_src(wpos_temp);
5783 }
5784
5785
5786 /**
5787  * Emit fragment position/ooordinate code.
5788  */
5789 static void
5790 emit_wpos(struct st_context *st,
5791           struct st_translate *t,
5792           const struct gl_program *program,
5793           struct ureg_program *ureg,
5794           int wpos_transform_const)
5795 {
5796    const struct gl_fragment_program *fp =
5797       (const struct gl_fragment_program *) program;
5798    struct pipe_screen *pscreen = st->pipe->screen;
5799    GLfloat adjX = 0.0f;
5800    GLfloat adjY[2] = { 0.0f, 0.0f };
5801    boolean invert = FALSE;
5802
5803    /* Query the pixel center conventions supported by the pipe driver and set
5804     * adjX, adjY to help out if it cannot handle the requested one internally.
5805     *
5806     * The bias of the y-coordinate depends on whether y-inversion takes place
5807     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
5808     * drawing to an FBO (causes additional inversion), and whether the the pipe
5809     * driver origin and the requested origin differ (the latter condition is
5810     * stored in the 'invert' variable).
5811     *
5812     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
5813     *
5814     * center shift only:
5815     * i -> h: +0.5
5816     * h -> i: -0.5
5817     *
5818     * inversion only:
5819     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
5820     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
5821     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
5822     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
5823     *
5824     * inversion and center shift:
5825     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
5826     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
5827     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
5828     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
5829     */
5830    if (fp->OriginUpperLeft) {
5831       /* Fragment shader wants origin in upper-left */
5832       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
5833          /* the driver supports upper-left origin */
5834       }
5835       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
5836          /* the driver supports lower-left origin, need to invert Y */
5837          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5838                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5839          invert = TRUE;
5840       }
5841       else
5842          assert(0);
5843    }
5844    else {
5845       /* Fragment shader wants origin in lower-left */
5846       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
5847          /* the driver supports lower-left origin */
5848          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5849                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5850       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
5851          /* the driver supports upper-left origin, need to invert Y */
5852          invert = TRUE;
5853       else
5854          assert(0);
5855    }
5856
5857    if (fp->PixelCenterInteger) {
5858       /* Fragment shader wants pixel center integer */
5859       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5860          /* the driver supports pixel center integer */
5861          adjY[1] = 1.0f;
5862          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5863                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5864       }
5865       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5866          /* the driver supports pixel center half integer, need to bias X,Y */
5867          adjX = -0.5f;
5868          adjY[0] = -0.5f;
5869          adjY[1] = 0.5f;
5870       }
5871       else
5872          assert(0);
5873    }
5874    else {
5875       /* Fragment shader wants pixel center half integer */
5876       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5877          /* the driver supports pixel center half integer */
5878       }
5879       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5880          /* the driver supports pixel center integer, need to bias X,Y */
5881          adjX = adjY[0] = adjY[1] = 0.5f;
5882          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5883                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5884       }
5885       else
5886          assert(0);
5887    }
5888
5889    /* we invert after adjustment so that we avoid the MOV to temporary,
5890     * and reuse the adjustment ADD instead */
5891    emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY);
5892 }
5893
5894 /**
5895  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
5896  * TGSI uses +1 for front, -1 for back.
5897  * This function converts the TGSI value to the GL value.  Simply clamping/
5898  * saturating the value to [0,1] does the job.
5899  */
5900 static void
5901 emit_face_var(struct gl_context *ctx, struct st_translate *t)
5902 {
5903    struct ureg_program *ureg = t->ureg;
5904    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
5905    struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
5906
5907    if (ctx->Const.NativeIntegers) {
5908       ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
5909    }
5910    else {
5911       /* MOV_SAT face_temp, input[face] */
5912       ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
5913    }
5914
5915    /* Use face_temp as face input from here on: */
5916    t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
5917 }
5918
5919 static bool
5920 find_array(unsigned attr, struct array_decl *arrays, unsigned count,
5921            unsigned *array_id, unsigned *array_size)
5922 {
5923    unsigned i;
5924
5925    for (i = 0; i < count; i++) {
5926       struct array_decl *decl = &arrays[i];
5927
5928       if (attr == decl->mesa_index) {
5929          *array_id = decl->array_id;
5930          *array_size = decl->array_size;
5931          assert(*array_size);
5932          return true;
5933       }
5934    }
5935    return false;
5936 }
5937
5938 /**
5939  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
5940  * \param program  the program to translate
5941  * \param numInputs  number of input registers used
5942  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
5943  *                      input indexes
5944  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
5945  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
5946  *                            each input
5947  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
5948  * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input
5949  * \param numOutputs  number of output registers used
5950  * \param outputMapping  maps Mesa fragment program outputs to TGSI
5951  *                       generic outputs
5952  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
5953  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
5954  *                             each output
5955  *
5956  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
5957  */
5958 extern "C" enum pipe_error
5959 st_translate_program(
5960    struct gl_context *ctx,
5961    uint procType,
5962    struct ureg_program *ureg,
5963    glsl_to_tgsi_visitor *program,
5964    const struct gl_program *proginfo,
5965    GLuint numInputs,
5966    const GLuint inputMapping[],
5967    const GLuint inputSlotToAttr[],
5968    const ubyte inputSemanticName[],
5969    const ubyte inputSemanticIndex[],
5970    const GLuint interpMode[],
5971    const GLuint interpLocation[],
5972    GLuint numOutputs,
5973    const GLuint outputMapping[],
5974    const GLuint outputSlotToAttr[],
5975    const ubyte outputSemanticName[],
5976    const ubyte outputSemanticIndex[])
5977 {
5978    struct st_translate *t;
5979    unsigned i;
5980    struct gl_program_constants *frag_const =
5981       &ctx->Const.Program[MESA_SHADER_FRAGMENT];
5982    enum pipe_error ret = PIPE_OK;
5983
5984    assert(numInputs <= ARRAY_SIZE(t->inputs));
5985    assert(numOutputs <= ARRAY_SIZE(t->outputs));
5986
5987    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] ==
5988           TGSI_SEMANTIC_FACE);
5989    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] ==
5990           TGSI_SEMANTIC_VERTEXID);
5991    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] ==
5992           TGSI_SEMANTIC_INSTANCEID);
5993    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] ==
5994           TGSI_SEMANTIC_SAMPLEID);
5995    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] ==
5996           TGSI_SEMANTIC_SAMPLEPOS);
5997    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] ==
5998           TGSI_SEMANTIC_SAMPLEMASK);
5999    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] ==
6000           TGSI_SEMANTIC_INVOCATIONID);
6001    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE] ==
6002           TGSI_SEMANTIC_VERTEXID_NOBASE);
6003    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] ==
6004           TGSI_SEMANTIC_BASEVERTEX);
6005    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
6006           TGSI_SEMANTIC_TESSCOORD);
6007    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
6008           TGSI_SEMANTIC_HELPER_INVOCATION);
6009    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] ==
6010           TGSI_SEMANTIC_THREAD_ID);
6011    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] ==
6012           TGSI_SEMANTIC_BLOCK_ID);
6013    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] ==
6014           TGSI_SEMANTIC_GRID_SIZE);
6015
6016    t = CALLOC_STRUCT(st_translate);
6017    if (!t) {
6018       ret = PIPE_ERROR_OUT_OF_MEMORY;
6019       goto out;
6020    }
6021
6022    t->procType = procType;
6023    t->inputMapping = inputMapping;
6024    t->outputMapping = outputMapping;
6025    t->ureg = ureg;
6026    t->num_temp_arrays = program->next_array;
6027    if (t->num_temp_arrays)
6028       t->arrays = (struct ureg_dst*)
6029                   calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays);
6030
6031    /*
6032     * Declare input attributes.
6033     */
6034    switch (procType) {
6035    case TGSI_PROCESSOR_FRAGMENT:
6036       for (i = 0; i < numInputs; i++) {
6037          unsigned array_id = 0;
6038          unsigned array_size;
6039
6040          if (find_array(inputSlotToAttr[i], program->input_arrays,
6041                         program->num_input_arrays, &array_id, &array_size)) {
6042             /* We've found an array. Declare it so. */
6043             t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
6044                               inputSemanticName[i], inputSemanticIndex[i],
6045                               interpMode[i], 0, interpLocation[i],
6046                               array_id, array_size);
6047             i += array_size - 1;
6048          }
6049          else {
6050             t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
6051                               inputSemanticName[i], inputSemanticIndex[i],
6052                               interpMode[i], 0, interpLocation[i], 0, 1);
6053          }
6054       }
6055       break;
6056    case TGSI_PROCESSOR_GEOMETRY:
6057    case TGSI_PROCESSOR_TESS_EVAL:
6058    case TGSI_PROCESSOR_TESS_CTRL:
6059       for (i = 0; i < numInputs; i++) {
6060          unsigned array_id = 0;
6061          unsigned array_size;
6062
6063          if (find_array(inputSlotToAttr[i], program->input_arrays,
6064                         program->num_input_arrays, &array_id, &array_size)) {
6065             /* We've found an array. Declare it so. */
6066             t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
6067                                            inputSemanticIndex[i],
6068                                            array_id, array_size);
6069             i += array_size - 1;
6070          }
6071          else {
6072             t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
6073                                            inputSemanticIndex[i], 0, 1);
6074          }
6075       }
6076       break;
6077    case TGSI_PROCESSOR_VERTEX:
6078       for (i = 0; i < numInputs; i++) {
6079          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
6080       }
6081       break;
6082    case TGSI_PROCESSOR_COMPUTE:
6083       break;
6084    default:
6085       assert(0);
6086    }
6087
6088    /*
6089     * Declare output attributes.
6090     */
6091    switch (procType) {
6092    case TGSI_PROCESSOR_FRAGMENT:
6093    case TGSI_PROCESSOR_COMPUTE:
6094       break;
6095    case TGSI_PROCESSOR_GEOMETRY:
6096    case TGSI_PROCESSOR_TESS_EVAL:
6097    case TGSI_PROCESSOR_TESS_CTRL:
6098    case TGSI_PROCESSOR_VERTEX:
6099       for (i = 0; i < numOutputs; i++) {
6100          unsigned array_id = 0;
6101          unsigned array_size;
6102
6103          if (find_array(outputSlotToAttr[i], program->output_arrays,
6104                         program->num_output_arrays, &array_id, &array_size)) {
6105             /* We've found an array. Declare it so. */
6106             t->outputs[i] = ureg_DECL_output_array(ureg,
6107                                                    outputSemanticName[i],
6108                                                    outputSemanticIndex[i],
6109                                                    array_id, array_size);
6110             i += array_size - 1;
6111          }
6112          else {
6113             t->outputs[i] = ureg_DECL_output(ureg,
6114                                              outputSemanticName[i],
6115                                              outputSemanticIndex[i]);
6116          }
6117       }
6118       break;
6119    default:
6120       assert(0);
6121    }
6122
6123    if (procType == TGSI_PROCESSOR_FRAGMENT) {
6124       if (program->shader->EarlyFragmentTests)
6125          ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
6126
6127       if (proginfo->InputsRead & VARYING_BIT_POS) {
6128           /* Must do this after setting up t->inputs. */
6129           emit_wpos(st_context(ctx), t, proginfo, ureg,
6130                     program->wpos_transform_const);
6131       }
6132
6133       if (proginfo->InputsRead & VARYING_BIT_FACE)
6134          emit_face_var(ctx, t);
6135
6136       for (i = 0; i < numOutputs; i++) {
6137          switch (outputSemanticName[i]) {
6138          case TGSI_SEMANTIC_POSITION:
6139             t->outputs[i] = ureg_DECL_output(ureg,
6140                                              TGSI_SEMANTIC_POSITION, /* Z/Depth */
6141                                              outputSemanticIndex[i]);
6142             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
6143             break;
6144          case TGSI_SEMANTIC_STENCIL:
6145             t->outputs[i] = ureg_DECL_output(ureg,
6146                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
6147                                              outputSemanticIndex[i]);
6148             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
6149             break;
6150          case TGSI_SEMANTIC_COLOR:
6151             t->outputs[i] = ureg_DECL_output(ureg,
6152                                              TGSI_SEMANTIC_COLOR,
6153                                              outputSemanticIndex[i]);
6154             break;
6155          case TGSI_SEMANTIC_SAMPLEMASK:
6156             t->outputs[i] = ureg_DECL_output(ureg,
6157                                              TGSI_SEMANTIC_SAMPLEMASK,
6158                                              outputSemanticIndex[i]);
6159             /* TODO: If we ever support more than 32 samples, this will have
6160              * to become an array.
6161              */
6162             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6163             break;
6164          default:
6165             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
6166             ret = PIPE_ERROR_BAD_INPUT;
6167             goto out;
6168          }
6169       }
6170    }
6171    else if (procType == TGSI_PROCESSOR_VERTEX) {
6172       for (i = 0; i < numOutputs; i++) {
6173          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
6174             /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
6175             ureg_MOV(ureg,
6176                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
6177                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
6178             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6179          }
6180       }
6181    }
6182
6183    /* Declare address register.
6184     */
6185    if (program->num_address_regs > 0) {
6186       assert(program->num_address_regs <= 3);
6187       for (int i = 0; i < program->num_address_regs; i++)
6188          t->address[i] = ureg_DECL_address(ureg);
6189    }
6190
6191    /* Declare misc input registers
6192     */
6193    {
6194       GLbitfield sysInputs = proginfo->SystemValuesRead;
6195
6196       for (i = 0; sysInputs; i++) {
6197          if (sysInputs & (1 << i)) {
6198             unsigned semName = _mesa_sysval_to_semantic[i];
6199
6200             t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
6201
6202             if (semName == TGSI_SEMANTIC_INSTANCEID ||
6203                 semName == TGSI_SEMANTIC_VERTEXID) {
6204                /* From Gallium perspective, these system values are always
6205                 * integer, and require native integer support.  However, if
6206                 * native integer is supported on the vertex stage but not the
6207                 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
6208                 * assumes these system values are floats. To resolve the
6209                 * inconsistency, we insert a U2F.
6210                 */
6211                struct st_context *st = st_context(ctx);
6212                struct pipe_screen *pscreen = st->pipe->screen;
6213                assert(procType == TGSI_PROCESSOR_VERTEX);
6214                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
6215                (void) pscreen;
6216                if (!ctx->Const.NativeIntegers) {
6217                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
6218                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
6219                   t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
6220                }
6221             }
6222
6223             if (procType == TGSI_PROCESSOR_FRAGMENT &&
6224                 semName == TGSI_SEMANTIC_POSITION)
6225                emit_wpos(st_context(ctx), t, proginfo, ureg,
6226                          program->wpos_transform_const);
6227
6228             sysInputs &= ~(1 << i);
6229          }
6230       }
6231    }
6232
6233    t->array_sizes = program->array_sizes;
6234    t->input_arrays = program->input_arrays;
6235    t->output_arrays = program->output_arrays;
6236
6237    /* Emit constants and uniforms.  TGSI uses a single index space for these,
6238     * so we put all the translated regs in t->constants.
6239     */
6240    if (proginfo->Parameters) {
6241       t->constants = (struct ureg_src *)
6242          calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
6243       if (t->constants == NULL) {
6244          ret = PIPE_ERROR_OUT_OF_MEMORY;
6245          goto out;
6246       }
6247       t->num_constants = proginfo->Parameters->NumParameters;
6248
6249       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
6250          switch (proginfo->Parameters->Parameters[i].Type) {
6251          case PROGRAM_STATE_VAR:
6252          case PROGRAM_UNIFORM:
6253             t->constants[i] = ureg_DECL_constant(ureg, i);
6254             break;
6255
6256          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
6257           * addressing of the const buffer.
6258           * FIXME: Be smarter and recognize param arrays:
6259           * indirect addressing is only valid within the referenced
6260           * array.
6261           */
6262          case PROGRAM_CONSTANT:
6263             if (program->indirect_addr_consts)
6264                t->constants[i] = ureg_DECL_constant(ureg, i);
6265             else
6266                t->constants[i] = emit_immediate(t,
6267                                                 proginfo->Parameters->ParameterValues[i],
6268                                                 proginfo->Parameters->Parameters[i].DataType,
6269                                                 4);
6270             break;
6271          default:
6272             break;
6273          }
6274       }
6275    }
6276
6277    if (program->shader) {
6278       unsigned num_ubos = program->shader->NumUniformBlocks;
6279
6280       for (i = 0; i < num_ubos; i++) {
6281          unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
6282          unsigned num_const_vecs = (size + 15) / 16;
6283          unsigned first, last;
6284          assert(num_const_vecs > 0);
6285          first = 0;
6286          last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
6287          ureg_DECL_constant2D(t->ureg, first, last, i + 1);
6288       }
6289    }
6290
6291    /* Emit immediate values.
6292     */
6293    t->immediates = (struct ureg_src *)
6294       calloc(program->num_immediates, sizeof(struct ureg_src));
6295    if (t->immediates == NULL) {
6296       ret = PIPE_ERROR_OUT_OF_MEMORY;
6297       goto out;
6298    }
6299    t->num_immediates = program->num_immediates;
6300
6301    i = 0;
6302    foreach_in_list(immediate_storage, imm, &program->immediates) {
6303       assert(i < program->num_immediates);
6304       t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
6305    }
6306    assert(i == program->num_immediates);
6307
6308    /* texture samplers */
6309    for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
6310       if (program->samplers_used & (1 << i)) {
6311          unsigned type;
6312
6313          t->samplers[i] = ureg_DECL_sampler(ureg, i);
6314
6315          switch (program->sampler_types[i]) {
6316          case GLSL_TYPE_INT:
6317             type = TGSI_RETURN_TYPE_SINT;
6318             break;
6319          case GLSL_TYPE_UINT:
6320             type = TGSI_RETURN_TYPE_UINT;
6321             break;
6322          case GLSL_TYPE_FLOAT:
6323             type = TGSI_RETURN_TYPE_FLOAT;
6324             break;
6325          default:
6326             unreachable("not reached");
6327          }
6328
6329          ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
6330                                  type, type, type, type );
6331       }
6332    }
6333
6334    for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
6335       if (program->buffers_used & (1 << i)) {
6336          t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
6337       }
6338    }
6339
6340    for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
6341         i++) {
6342       if (program->buffers_used & (1 << i)) {
6343          t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
6344       }
6345    }
6346
6347    if (program->use_shared_memory)
6348       t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
6349
6350    for (i = 0; i < program->shader->NumImages; i++) {
6351       if (program->images_used & (1 << i)) {
6352          t->images[i] = ureg_DECL_image(ureg, i,
6353                                         program->image_targets[i],
6354                                         program->image_formats[i],
6355                                         true, false);
6356       }
6357    }
6358
6359    /* Emit each instruction in turn:
6360     */
6361    foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
6362       set_insn_start(t, ureg_get_instruction_number(ureg));
6363       compile_tgsi_instruction(t, inst);
6364    }
6365
6366    /* Fix up all emitted labels:
6367     */
6368    for (i = 0; i < t->labels_count; i++) {
6369       ureg_fixup_label(ureg, t->labels[i].token,
6370                        t->insn[t->labels[i].branch_target]);
6371    }
6372
6373    /* Set the next shader stage hint for VS and TES. */
6374    switch (procType) {
6375    case TGSI_PROCESSOR_VERTEX:
6376    case TGSI_PROCESSOR_TESS_EVAL:
6377       if (program->shader_program->SeparateShader)
6378          break;
6379
6380       for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
6381          if (program->shader_program->_LinkedShaders[i]) {
6382             unsigned next;
6383
6384             switch (i) {
6385             case MESA_SHADER_TESS_CTRL:
6386                next = TGSI_PROCESSOR_TESS_CTRL;
6387                break;
6388             case MESA_SHADER_TESS_EVAL:
6389                next = TGSI_PROCESSOR_TESS_EVAL;
6390                break;
6391             case MESA_SHADER_GEOMETRY:
6392                next = TGSI_PROCESSOR_GEOMETRY;
6393                break;
6394             case MESA_SHADER_FRAGMENT:
6395                next = TGSI_PROCESSOR_FRAGMENT;
6396                break;
6397             default:
6398                assert(0);
6399                continue;
6400             }
6401
6402             ureg_set_next_shader_processor(ureg, next);
6403             break;
6404          }
6405       }
6406       break;
6407    }
6408
6409 out:
6410    if (t) {
6411       free(t->arrays);
6412       free(t->temps);
6413       free(t->insn);
6414       free(t->labels);
6415       free(t->constants);
6416       t->num_constants = 0;
6417       free(t->immediates);
6418       t->num_immediates = 0;
6419
6420       if (t->error) {
6421          debug_printf("%s: translate error flag set\n", __func__);
6422       }
6423
6424       FREE(t);
6425    }
6426
6427    return ret;
6428 }
6429 /* ----------------------------- End TGSI code ------------------------------ */
6430
6431
6432 /**
6433  * Convert a shader's GLSL IR into a Mesa gl_program, although without
6434  * generating Mesa IR.
6435  */
6436 static struct gl_program *
6437 get_mesa_program(struct gl_context *ctx,
6438                  struct gl_shader_program *shader_program,
6439                  struct gl_shader *shader)
6440 {
6441    glsl_to_tgsi_visitor* v;
6442    struct gl_program *prog;
6443    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
6444    bool progress;
6445    struct gl_shader_compiler_options *options =
6446          &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
6447    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6448    unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
6449
6450    validate_ir_tree(shader->ir);
6451
6452    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
6453    if (!prog)
6454       return NULL;
6455    prog->Parameters = _mesa_new_parameter_list();
6456    v = new glsl_to_tgsi_visitor();
6457    v->ctx = ctx;
6458    v->prog = prog;
6459    v->shader_program = shader_program;
6460    v->shader = shader;
6461    v->options = options;
6462    v->glsl_version = ctx->Const.GLSLVersion;
6463    v->native_integers = ctx->Const.NativeIntegers;
6464
6465    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
6466                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
6467    v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
6468                                            PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
6469
6470    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
6471    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
6472                                                prog->Parameters);
6473
6474    /* Remove reads from output registers. */
6475    lower_output_reads(shader->Stage, shader->ir);
6476
6477    /* Emit intermediate IR for main(). */
6478    visit_exec_list(shader->ir, v);
6479
6480    /* Now emit bodies for any functions that were used. */
6481    do {
6482       progress = GL_FALSE;
6483
6484       foreach_in_list(function_entry, entry, &v->function_signatures) {
6485          if (!entry->bgn_inst) {
6486             v->current_function = entry;
6487
6488             entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB);
6489             entry->bgn_inst->function = entry;
6490
6491             visit_exec_list(&entry->sig->body, v);
6492
6493             glsl_to_tgsi_instruction *last;
6494             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
6495             if (last->op != TGSI_OPCODE_RET)
6496                v->emit_asm(NULL, TGSI_OPCODE_RET);
6497
6498             glsl_to_tgsi_instruction *end;
6499             end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB);
6500             end->function = entry;
6501
6502             progress = GL_TRUE;
6503          }
6504       }
6505    } while (progress);
6506
6507 #if 0
6508    /* Print out some information (for debugging purposes) used by the
6509     * optimization passes. */
6510    {
6511       int i;
6512       int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6513       int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6514       int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6515       int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6516
6517       for (i = 0; i < v->next_temp; i++) {
6518          first_writes[i] = -1;
6519          first_reads[i] = -1;
6520          last_writes[i] = -1;
6521          last_reads[i] = -1;
6522       }
6523       v->get_first_temp_read(first_reads);
6524       v->get_last_temp_read_first_temp_write(last_reads, first_writes);
6525       v->get_last_temp_write(last_writes);
6526       for (i = 0; i < v->next_temp; i++)
6527          printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
6528                 first_writes[i],
6529                 last_reads[i],
6530                 last_writes[i]);
6531       ralloc_free(first_writes);
6532       ralloc_free(first_reads);
6533       ralloc_free(last_writes);
6534       ralloc_free(last_reads);
6535    }
6536 #endif
6537
6538    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
6539    v->simplify_cmp();
6540
6541    if (shader->Type != GL_TESS_CONTROL_SHADER &&
6542        shader->Type != GL_TESS_EVALUATION_SHADER)
6543       v->copy_propagate();
6544
6545    while (v->eliminate_dead_code());
6546
6547    v->merge_two_dsts();
6548    v->merge_registers();
6549    v->renumber_registers();
6550
6551    /* Write the END instruction. */
6552    v->emit_asm(NULL, TGSI_OPCODE_END);
6553
6554    if (ctx->_Shader->Flags & GLSL_DUMP) {
6555       _mesa_log("\n");
6556       _mesa_log("GLSL IR for linked %s program %d:\n",
6557              _mesa_shader_stage_to_string(shader->Stage),
6558              shader_program->Name);
6559       _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
6560       _mesa_log("\n\n");
6561    }
6562
6563    prog->Instructions = NULL;
6564    prog->NumInstructions = 0;
6565
6566    do_set_program_inouts(shader->ir, prog, shader->Stage);
6567    shrink_array_declarations(v->input_arrays, v->num_input_arrays,
6568                              prog->InputsRead, prog->DoubleInputsRead, prog->PatchInputsRead);
6569    shrink_array_declarations(v->output_arrays, v->num_output_arrays,
6570                              prog->OutputsWritten, 0ULL, prog->PatchOutputsWritten);
6571    count_resources(v, prog);
6572
6573    /* The GLSL IR won't be needed anymore. */
6574    ralloc_free(shader->ir);
6575    shader->ir = NULL;
6576
6577    /* This must be done before the uniform storage is associated. */
6578    if (shader->Type == GL_FRAGMENT_SHADER &&
6579        (prog->InputsRead & VARYING_BIT_POS ||
6580         prog->SystemValuesRead & (1 << SYSTEM_VALUE_FRAG_COORD))) {
6581       static const gl_state_index wposTransformState[STATE_LENGTH] = {
6582          STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
6583       };
6584
6585       v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
6586                                                           wposTransformState);
6587    }
6588
6589    _mesa_reference_program(ctx, &shader->Program, prog);
6590
6591    /* Avoid reallocation of the program parameter list, because the uniform
6592     * storage is only associated with the original parameter list.
6593     * This should be enough for Bitmap and DrawPixels constants.
6594     */
6595    _mesa_reserve_parameter_storage(prog->Parameters, 8);
6596
6597    /* This has to be done last.  Any operation the can cause
6598     * prog->ParameterValues to get reallocated (e.g., anything that adds a
6599     * program constant) has to happen before creating this linkage.
6600     */
6601    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
6602    if (!shader_program->LinkStatus) {
6603       free_glsl_to_tgsi_visitor(v);
6604       return NULL;
6605    }
6606
6607    struct st_vertex_program *stvp;
6608    struct st_fragment_program *stfp;
6609    struct st_geometry_program *stgp;
6610    struct st_tessctrl_program *sttcp;
6611    struct st_tesseval_program *sttep;
6612    struct st_compute_program *stcp;
6613
6614    switch (shader->Type) {
6615    case GL_VERTEX_SHADER:
6616       stvp = (struct st_vertex_program *)prog;
6617       stvp->glsl_to_tgsi = v;
6618       break;
6619    case GL_FRAGMENT_SHADER:
6620       stfp = (struct st_fragment_program *)prog;
6621       stfp->glsl_to_tgsi = v;
6622       break;
6623    case GL_GEOMETRY_SHADER:
6624       stgp = (struct st_geometry_program *)prog;
6625       stgp->glsl_to_tgsi = v;
6626       break;
6627    case GL_TESS_CONTROL_SHADER:
6628       sttcp = (struct st_tessctrl_program *)prog;
6629       sttcp->glsl_to_tgsi = v;
6630       break;
6631    case GL_TESS_EVALUATION_SHADER:
6632       sttep = (struct st_tesseval_program *)prog;
6633       sttep->glsl_to_tgsi = v;
6634       break;
6635    case GL_COMPUTE_SHADER:
6636       stcp = (struct st_compute_program *)prog;
6637       stcp->glsl_to_tgsi = v;
6638       break;
6639    default:
6640       assert(!"should not be reached");
6641       return NULL;
6642    }
6643
6644    return prog;
6645 }
6646
6647 extern "C" {
6648
6649 static void
6650 st_dump_program_for_shader_db(struct gl_context *ctx,
6651                               struct gl_shader_program *prog)
6652 {
6653    /* Dump only successfully compiled and linked shaders to the specified
6654     * file. This is for shader-db.
6655     *
6656     * These options allow some pre-processing of shaders while dumping,
6657     * because some apps have ill-formed shaders.
6658     */
6659    const char *dump_filename = os_get_option("ST_DUMP_SHADERS");
6660    const char *insert_directives = os_get_option("ST_DUMP_INSERT");
6661
6662    if (dump_filename && prog->Name != 0) {
6663       FILE *f = fopen(dump_filename, "a");
6664
6665       if (f) {
6666          for (unsigned i = 0; i < prog->NumShaders; i++) {
6667             const struct gl_shader *sh = prog->Shaders[i];
6668             const char *source;
6669             bool skip_version = false;
6670
6671             if (!sh)
6672                continue;
6673
6674             source = sh->Source;
6675
6676             /* This string mustn't be changed. shader-db uses it to find
6677              * where the shader begins.
6678              */
6679             fprintf(f, "GLSL %s shader %d source for linked program %d:\n",
6680                     _mesa_shader_stage_to_string(sh->Stage),
6681                     i, prog->Name);
6682
6683             /* Dump the forced version if set. */
6684             if (ctx->Const.ForceGLSLVersion) {
6685                fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion);
6686                skip_version = true;
6687             }
6688
6689             /* Insert directives (optional). */
6690             if (insert_directives) {
6691                if (!ctx->Const.ForceGLSLVersion && prog->Version)
6692                   fprintf(f, "#version %i\n", prog->Version);
6693                fprintf(f, "%s\n", insert_directives);
6694                skip_version = true;
6695             }
6696
6697             if (skip_version && strncmp(source, "#version ", 9) == 0) {
6698                const char *next_line = strstr(source, "\n");
6699
6700                if (next_line)
6701                   source = next_line + 1;
6702                else
6703                   continue;
6704             }
6705
6706             fprintf(f, "%s", source);
6707             fprintf(f, "\n");
6708          }
6709          fclose(f);
6710       }
6711    }
6712 }
6713
6714 /**
6715  * Link a shader.
6716  * Called via ctx->Driver.LinkShader()
6717  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
6718  * with code lowering and other optimizations.
6719  */
6720 GLboolean
6721 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
6722 {
6723    struct pipe_screen *pscreen = ctx->st->pipe->screen;
6724    assert(prog->LinkStatus);
6725
6726    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6727       if (prog->_LinkedShaders[i] == NULL)
6728          continue;
6729
6730       bool progress;
6731       exec_list *ir = prog->_LinkedShaders[i]->ir;
6732       gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
6733       const struct gl_shader_compiler_options *options =
6734             &ctx->Const.ShaderCompilerOptions[stage];
6735       unsigned ptarget = st_shader_stage_to_ptarget(stage);
6736       bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
6737                                                    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
6738       bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
6739                                                    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
6740
6741       /* If there are forms of indirect addressing that the driver
6742        * cannot handle, perform the lowering pass.
6743        */
6744       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
6745           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
6746          lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
6747                                              options->EmitNoIndirectInput,
6748                                              options->EmitNoIndirectOutput,
6749                                              options->EmitNoIndirectTemp,
6750                                              options->EmitNoIndirectUniform);
6751       }
6752
6753       if (ctx->Extensions.ARB_shading_language_packing) {
6754          unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
6755                                LOWER_UNPACK_SNORM_2x16 |
6756                                LOWER_PACK_UNORM_2x16 |
6757                                LOWER_UNPACK_UNORM_2x16 |
6758                                LOWER_PACK_SNORM_4x8 |
6759                                LOWER_UNPACK_SNORM_4x8 |
6760                                LOWER_UNPACK_UNORM_4x8 |
6761                                LOWER_PACK_UNORM_4x8;
6762
6763          if (ctx->Extensions.ARB_gpu_shader5)
6764             lower_inst |= LOWER_PACK_USE_BFI |
6765                           LOWER_PACK_USE_BFE;
6766          if (!ctx->st->has_half_float_packing)
6767             lower_inst |= LOWER_PACK_HALF_2x16 |
6768                           LOWER_UNPACK_HALF_2x16;
6769
6770          lower_packing_builtins(ir, lower_inst);
6771       }
6772
6773       if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
6774          lower_offset_arrays(ir);
6775       do_mat_op_to_vec(ir);
6776       lower_instructions(ir,
6777                          MOD_TO_FLOOR |
6778                          DIV_TO_MUL_RCP |
6779                          EXP_TO_EXP2 |
6780                          LOG_TO_LOG2 |
6781                          LDEXP_TO_ARITH |
6782                          (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
6783                          CARRY_TO_ARITH |
6784                          BORROW_TO_ARITH |
6785                          (have_dround ? 0 : DOPS_TO_DFRAC) |
6786                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
6787                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
6788                          (options->EmitNoSat ? SAT_TO_CLAMP : 0));
6789
6790       do_vec_index_to_cond_assign(ir);
6791       lower_vector_insert(ir, true);
6792       lower_quadop_vector(ir, false);
6793       lower_noise(ir);
6794       if (options->MaxIfDepth == 0) {
6795          lower_discard(ir);
6796       }
6797
6798       do {
6799          progress = false;
6800
6801          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
6802
6803          progress = do_common_optimization(ir, true, true, options,
6804                                            ctx->Const.NativeIntegers)
6805            || progress;
6806
6807          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
6808
6809       } while (progress);
6810
6811       validate_ir_tree(ir);
6812    }
6813
6814    build_program_resource_list(ctx, prog);
6815
6816    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6817       struct gl_program *linked_prog;
6818
6819       if (prog->_LinkedShaders[i] == NULL)
6820          continue;
6821
6822       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
6823
6824       if (linked_prog) {
6825          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6826                                  linked_prog);
6827          if (!ctx->Driver.ProgramStringNotify(ctx,
6828                                               _mesa_shader_stage_to_program(i),
6829                                               linked_prog)) {
6830             _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6831                                     NULL);
6832             _mesa_reference_program(ctx, &linked_prog, NULL);
6833             return GL_FALSE;
6834          }
6835       }
6836
6837       _mesa_reference_program(ctx, &linked_prog, NULL);
6838    }
6839
6840    st_dump_program_for_shader_db(ctx, prog);
6841    return GL_TRUE;
6842 }
6843
6844 void
6845 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
6846                                 const GLuint outputMapping[],
6847                                 struct pipe_stream_output_info *so)
6848 {
6849    unsigned i;
6850    struct gl_transform_feedback_info *info =
6851       &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
6852
6853    for (i = 0; i < info->NumOutputs; i++) {
6854       so->output[i].register_index =
6855          outputMapping[info->Outputs[i].OutputRegister];
6856       so->output[i].start_component = info->Outputs[i].ComponentOffset;
6857       so->output[i].num_components = info->Outputs[i].NumComponents;
6858       so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
6859       so->output[i].dst_offset = info->Outputs[i].DstOffset;
6860       so->output[i].stream = info->Outputs[i].StreamId;
6861    }
6862
6863    for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
6864       so->stride[i] = info->Buffers[i].Stride;
6865    }
6866    so->num_outputs = info->NumOutputs;
6867 }
6868
6869 } /* extern "C" */