src/mesa/state_tracker/st_glsl_to_tgsi.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  * Copyright © 2011 Bryan Cain
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24  * DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 /**
  28  * \file glsl_to_tgsi.cpp
  29  *
  30  * Translate GLSL IR to TGSI.
  31  */
  32
  33 #include <stdio.h>
  34 #include "main/compiler.h"
  35 #include "ir.h"
  36 #include "ir_visitor.h"
  37 #include "ir_print_visitor.h"
  38 #include "ir_expression_flattening.h"
  39 #include "glsl_types.h"
  40 #include "glsl_parser_extras.h"
  41 #include "../glsl/program.h"
  42 #include "ir_optimization.h"
  43 #include "ast.h"
  44
  45 extern "C" {
  46 #include "main/mtypes.h"
  47 #include "main/shaderapi.h"
  48 #include "main/shaderobj.h"
  49 #include "main/uniforms.h"
  50 #include "program/hash_table.h"
  51 #include "program/prog_instruction.h"
  52 #include "program/prog_optimize.h"
  53 #include "program/prog_print.h"
  54 #include "program/program.h"
  55 #include "program/prog_uniform.h"
  56 #include "program/prog_parameter.h"
  57 #include "program/sampler.h"
  58
  59 #include "pipe/p_compiler.h"
  60 #include "pipe/p_context.h"
  61 #include "pipe/p_screen.h"
  62 #include "pipe/p_shader_tokens.h"
  63 #include "pipe/p_state.h"
  64 #include "util/u_math.h"
  65 #include "tgsi/tgsi_ureg.h"
  66 #include "tgsi/tgsi_info.h"
  67 #include "st_context.h"
  68 #include "st_program.h"
  69 #include "st_glsl_to_tgsi.h"
  70 #include "st_mesa_to_tgsi.h"
  71 }
  72
  73 #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
  74 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
  75                            (1 << PROGRAM_ENV_PARAM) |    \
  76                            (1 << PROGRAM_STATE_VAR) |    \
  77                            (1 << PROGRAM_NAMED_PARAM) |  \
  78                            (1 << PROGRAM_CONSTANT) |     \
  79                            (1 << PROGRAM_UNIFORM))
  80
  81 #define MAX_TEMPS         4096
  82
  83 /* will be 4 for GLSL 4.00 */
  84 #define MAX_GLSL_TEXTURE_OFFSET 1
  85
  86 class st_src_reg;
  87 class st_dst_reg;
  88
  89 static int swizzle_for_size(int size);
  90
  91 /**
  92  * This struct is a corresponding struct to TGSI ureg_src.
  93  */
  94 class st_src_reg {
  95 public:
  96    st_src_reg(gl_register_file file, int index, const glsl_type *type)
  97    {
  98       this->file = file;
  99       this->index = index;
 100       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
 101          this->swizzle = swizzle_for_size(type->vector_elements);
 102       else
 103          this->swizzle = SWIZZLE_XYZW;
 104       this->negate = 0;
 105       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
 106       this->reladdr = NULL;
 107    }
 108
 109    st_src_reg(gl_register_file file, int index, int type)
 110    {
 111       this->type = type;
 112       this->file = file;
 113       this->index = index;
 114       this->swizzle = SWIZZLE_XYZW;
 115       this->negate = 0;
 116       this->reladdr = NULL;
 117    }
 118
 119    st_src_reg()
 120    {
 121       this->type = GLSL_TYPE_ERROR;
 122       this->file = PROGRAM_UNDEFINED;
 123       this->index = 0;
 124       this->swizzle = 0;
 125       this->negate = 0;
 126       this->reladdr = NULL;
 127    }
 128
 129    explicit st_src_reg(st_dst_reg reg);
 130
 131    gl_register_file file; /**< PROGRAM_* from Mesa */
 132    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
 133    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
 134    int negate; /**< NEGATE_XYZW mask from mesa */
 135    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 136    /** Register index should be offset by the integer in this reg. */
 137    st_src_reg *reladdr;
 138 };
 139
 140 class st_dst_reg {
 141 public:
 142    st_dst_reg(gl_register_file file, int writemask, int type)
 143    {
 144       this->file = file;
 145       this->index = 0;
 146       this->writemask = writemask;
 147       this->cond_mask = COND_TR;
 148       this->reladdr = NULL;
 149       this->type = type;
 150    }
 151
 152    st_dst_reg()
 153    {
 154       this->type = GLSL_TYPE_ERROR;
 155       this->file = PROGRAM_UNDEFINED;
 156       this->index = 0;
 157       this->writemask = 0;
 158       this->cond_mask = COND_TR;
 159       this->reladdr = NULL;
 160    }
 161
 162    explicit st_dst_reg(st_src_reg reg);
 163
 164    gl_register_file file; /**< PROGRAM_* from Mesa */
 165    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
 166    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 167    GLuint cond_mask:4;
 168    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 169    /** Register index should be offset by the integer in this reg. */
 170    st_src_reg *reladdr;
 171 };
 172
 173 st_src_reg::st_src_reg(st_dst_reg reg)
 174 {
 175    this->type = reg.type;
 176    this->file = reg.file;
 177    this->index = reg.index;
 178    this->swizzle = SWIZZLE_XYZW;
 179    this->negate = 0;
 180    this->reladdr = reg.reladdr;
 181 }
 182
 183 st_dst_reg::st_dst_reg(st_src_reg reg)
 184 {
 185    this->type = reg.type;
 186    this->file = reg.file;
 187    this->index = reg.index;
 188    this->writemask = WRITEMASK_XYZW;
 189    this->cond_mask = COND_TR;
 190    this->reladdr = reg.reladdr;
 191 }
 192
 193 class glsl_to_tgsi_instruction : public exec_node {
 194 public:
 195    /* Callers of this ralloc-based new need not call delete. It's
 196     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
 197    static void* operator new(size_t size, void *ctx)
 198    {
 199       void *node;
 200
 201       node = rzalloc_size(ctx, size);
 202       assert(node != NULL);
 203
 204       return node;
 205    }
 206
 207    unsigned op;
 208    st_dst_reg dst;
 209    st_src_reg src[3];
 210    /** Pointer to the ir source this tree came from for debugging */
 211    ir_instruction *ir;
 212    GLboolean cond_update;
 213    bool saturate;
 214    int sampler; /**< sampler index */
 215    int tex_target; /**< One of TEXTURE_*_INDEX */
 216    GLboolean tex_shadow;
 217    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
 218    unsigned tex_offset_num_offset;
 219    int dead_mask; /**< Used in dead code elimination */
 220
 221    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 222 };
 223
 224 class variable_storage : public exec_node {
 225 public:
 226    variable_storage(ir_variable *var, gl_register_file file, int index)
 227       : file(file), index(index), var(var)
 228    {
 229       /* empty */
 230    }
 231
 232    gl_register_file file;
 233    int index;
 234    ir_variable *var; /* variable that maps to this, if any */
 235 };
 236
 237 class immediate_storage : public exec_node {
 238 public:
 239    immediate_storage(gl_constant_value *values, int size, int type)
 240    {
 241       memcpy(this->values, values, size * sizeof(gl_constant_value));
 242       this->size = size;
 243       this->type = type;
 244    }
 245
 246    gl_constant_value values[4];
 247    int size; /**< Number of components (1-4) */
 248    int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
 249 };
 250
 251 class function_entry : public exec_node {
 252 public:
 253    ir_function_signature *sig;
 254
 255    /**
 256     * identifier of this function signature used by the program.
 257     *
 258     * At the point that TGSI instructions for function calls are
 259     * generated, we don't know the address of the first instruction of
 260     * the function body.  So we make the BranchTarget that is called a
 261     * small integer and rewrite them during set_branchtargets().
 262     */
 263    int sig_id;
 264
 265    /**
 266     * Pointer to first instruction of the function body.
 267     *
 268     * Set during function body emits after main() is processed.
 269     */
 270    glsl_to_tgsi_instruction *bgn_inst;
 271
 272    /**
 273     * Index of the first instruction of the function body in actual TGSI.
 274     *
 275     * Set after conversion from glsl_to_tgsi_instruction to TGSI.
 276     */
 277    int inst;
 278
 279    /** Storage for the return value. */
 280    st_src_reg return_reg;
 281 };
 282
 283 class glsl_to_tgsi_visitor : public ir_visitor {
 284 public:
 285    glsl_to_tgsi_visitor();
 286    ~glsl_to_tgsi_visitor();
 287
 288    function_entry *current_function;
 289
 290    struct gl_context *ctx;
 291    struct gl_program *prog;
 292    struct gl_shader_program *shader_program;
 293    struct gl_shader_compiler_options *options;
 294
 295    int next_temp;
 296
 297    int num_address_regs;
 298    int samplers_used;
 299    bool indirect_addr_temps;
 300    bool indirect_addr_consts;
 301
 302    int glsl_version;
 303    bool native_integers;
 304
 305    variable_storage *find_variable_storage(ir_variable *var);
 306
 307    int add_constant(gl_register_file file, gl_constant_value values[4],
 308                     int size, int datatype, GLuint *swizzle_out);
 309
 310    function_entry *get_function_signature(ir_function_signature *sig);
 311
 312    st_src_reg get_temp(const glsl_type *type);
 313    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 314
 315    st_src_reg st_src_reg_for_float(float val);
 316    st_src_reg st_src_reg_for_int(int val);
 317    st_src_reg st_src_reg_for_type(int type, int val);
 318
 319    /**
 320     * \name Visit methods
 321     *
 322     * As typical for the visitor pattern, there must be one \c visit method for
 323     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 324     * the hierarchy should not have \c visit methods.
 325     */
 326    /*@{*/
 327    virtual void visit(ir_variable *);
 328    virtual void visit(ir_loop *);
 329    virtual void visit(ir_loop_jump *);
 330    virtual void visit(ir_function_signature *);
 331    virtual void visit(ir_function *);
 332    virtual void visit(ir_expression *);
 333    virtual void visit(ir_swizzle *);
 334    virtual void visit(ir_dereference_variable  *);
 335    virtual void visit(ir_dereference_array *);
 336    virtual void visit(ir_dereference_record *);
 337    virtual void visit(ir_assignment *);
 338    virtual void visit(ir_constant *);
 339    virtual void visit(ir_call *);
 340    virtual void visit(ir_return *);
 341    virtual void visit(ir_discard *);
 342    virtual void visit(ir_texture *);
 343    virtual void visit(ir_if *);
 344    /*@}*/
 345
 346    st_src_reg result;
 347
 348    /** List of variable_storage */
 349    exec_list variables;
 350
 351    /** List of immediate_storage */
 352    exec_list immediates;
 353    int num_immediates;
 354
 355    /** List of function_entry */
 356    exec_list function_signatures;
 357    int next_signature_id;
 358
 359    /** List of glsl_to_tgsi_instruction */
 360    exec_list instructions;
 361
 362    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
 363
 364    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
 365                                 st_dst_reg dst, st_src_reg src0);
 366
 367    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
 368                                 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 369
 370    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
 371                                 st_dst_reg dst,
 372                                 st_src_reg src0, st_src_reg src1, st_src_reg src2);
 373
 374    unsigned get_opcode(ir_instruction *ir, unsigned op,
 375                     st_dst_reg dst,
 376                     st_src_reg src0, st_src_reg src1);
 377
 378    /**
 379     * Emit the correct dot-product instruction for the type of arguments
 380     */
 381    glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
 382                                      st_dst_reg dst,
 383                                      st_src_reg src0,
 384                                      st_src_reg src1,
 385                                      unsigned elements);
 386
 387    void emit_scalar(ir_instruction *ir, unsigned op,
 388                     st_dst_reg dst, st_src_reg src0);
 389
 390    void emit_scalar(ir_instruction *ir, unsigned op,
 391                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 392
 393    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 394
 395    void emit_scs(ir_instruction *ir, unsigned op,
 396                  st_dst_reg dst, const st_src_reg &src);
 397
 398    bool try_emit_mad(ir_expression *ir,
 399               int mul_operand);
 400    bool try_emit_mad_for_and_not(ir_expression *ir,
 401               int mul_operand);
 402    bool try_emit_sat(ir_expression *ir);
 403
 404    void emit_swz(ir_expression *ir);
 405
 406    bool process_move_condition(ir_rvalue *ir);
 407
 408    void remove_output_reads(gl_register_file type);
 409    void simplify_cmp(void);
 410
 411    void rename_temp_register(int index, int new_index);
 412    int get_first_temp_read(int index);
 413    int get_first_temp_write(int index);
 414    int get_last_temp_read(int index);
 415    int get_last_temp_write(int index);
 416
 417    void copy_propagate(void);
 418    void eliminate_dead_code(void);
 419    int eliminate_dead_code_advanced(void);
 420    void merge_registers(void);
 421    void renumber_registers(void);
 422
 423    void *mem_ctx;
 424 };
 425
 426 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 427
 428 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 429
 430 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
 431
 432 static void
 433 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
 434
 435 static void
 436 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
 437 {
 438    va_list args;
 439    va_start(args, fmt);
 440    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
 441    va_end(args);
 442
 443    prog->LinkStatus = GL_FALSE;
 444 }
 445
 446 static int
 447 swizzle_for_size(int size)
 448 {
 449    int size_swizzles[4] = {
 450       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 451       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 452       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 453       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 454    };
 455
 456    assert((size >= 1) && (size <= 4));
 457    return size_swizzles[size - 1];
 458 }
 459
 460 static bool
 461 is_tex_instruction(unsigned opcode)
 462 {
 463    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
 464    return info->is_tex;
 465 }
 466
 467 static unsigned
 468 num_inst_dst_regs(unsigned opcode)
 469 {
 470    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
 471    return info->num_dst;
 472 }
 473
 474 static unsigned
 475 num_inst_src_regs(unsigned opcode)
 476 {
 477    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
 478    return info->is_tex ? info->num_src - 1 : info->num_src;
 479 }
 480
 481 glsl_to_tgsi_instruction *
 482 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 483                          st_dst_reg dst,
 484                          st_src_reg src0, st_src_reg src1, st_src_reg src2)
 485 {
 486    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
 487    int num_reladdr = 0, i;
 488
 489    op = get_opcode(ir, op, dst, src0, src1);
 490
 491    /* If we have to do relative addressing, we want to load the ARL
 492     * reg directly for one of the regs, and preload the other reladdr
 493     * sources into temps.
 494     */
 495    num_reladdr += dst.reladdr != NULL;
 496    num_reladdr += src0.reladdr != NULL;
 497    num_reladdr += src1.reladdr != NULL;
 498    num_reladdr += src2.reladdr != NULL;
 499
 500    reladdr_to_temp(ir, &src2, &num_reladdr);
 501    reladdr_to_temp(ir, &src1, &num_reladdr);
 502    reladdr_to_temp(ir, &src0, &num_reladdr);
 503
 504    if (dst.reladdr) {
 505       emit_arl(ir, address_reg, *dst.reladdr);
 506       num_reladdr--;
 507    }
 508    assert(num_reladdr == 0);
 509
 510    inst->op = op;
 511    inst->dst = dst;
 512    inst->src[0] = src0;
 513    inst->src[1] = src1;
 514    inst->src[2] = src2;
 515    inst->ir = ir;
 516    inst->dead_mask = 0;
 517
 518    inst->function = NULL;
 519
 520    if (op == TGSI_OPCODE_ARL)
 521       this->num_address_regs = 1;
 522
 523    /* Update indirect addressing status used by TGSI */
 524    if (dst.reladdr) {
 525       switch(dst.file) {
 526       case PROGRAM_TEMPORARY:
 527          this->indirect_addr_temps = true;
 528          break;
 529       case PROGRAM_LOCAL_PARAM:
 530       case PROGRAM_ENV_PARAM:
 531       case PROGRAM_STATE_VAR:
 532       case PROGRAM_NAMED_PARAM:
 533       case PROGRAM_CONSTANT:
 534       case PROGRAM_UNIFORM:
 535          this->indirect_addr_consts = true;
 536          break;
 537       case PROGRAM_IMMEDIATE:
 538          assert(!"immediates should not have indirect addressing");
 539          break;
 540       default:
 541          break;
 542       }
 543    }
 544    else {
 545       for (i=0; i<3; i++) {
 546          if(inst->src[i].reladdr) {
 547             switch(inst->src[i].file) {
 548             case PROGRAM_TEMPORARY:
 549                this->indirect_addr_temps = true;
 550                break;
 551             case PROGRAM_LOCAL_PARAM:
 552             case PROGRAM_ENV_PARAM:
 553             case PROGRAM_STATE_VAR:
 554             case PROGRAM_NAMED_PARAM:
 555             case PROGRAM_CONSTANT:
 556             case PROGRAM_UNIFORM:
 557                this->indirect_addr_consts = true;
 558                break;
 559             case PROGRAM_IMMEDIATE:
 560                assert(!"immediates should not have indirect addressing");
 561                break;
 562             default:
 563                break;
 564             }
 565          }
 566       }
 567    }
 568
 569    this->instructions.push_tail(inst);
 570
 571    return inst;
 572 }
 573
 574
 575 glsl_to_tgsi_instruction *
 576 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 577                          st_dst_reg dst, st_src_reg src0, st_src_reg src1)
 578 {
 579    return emit(ir, op, dst, src0, src1, undef_src);
 580 }
 581
 582 glsl_to_tgsi_instruction *
 583 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 584                          st_dst_reg dst, st_src_reg src0)
 585 {
 586    assert(dst.writemask != 0);
 587    return emit(ir, op, dst, src0, undef_src, undef_src);
 588 }
 589
 590 glsl_to_tgsi_instruction *
 591 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
 592 {
 593    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 594 }
 595
 596 /**
 597  * Determines whether to use an integer, unsigned integer, or float opcode
 598  * based on the operands and input opcode, then emits the result.
 599  *
 600  * TODO: type checking for remaining TGSI opcodes
 601  */
 602 unsigned
 603 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
 604                          st_dst_reg dst,
 605                          st_src_reg src0, st_src_reg src1)
 606 {
 607    int type = GLSL_TYPE_FLOAT;
 608
 609    if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
 610       type = GLSL_TYPE_FLOAT;
 611    else if (native_integers)
 612       type = src0.type;
 613
 614 #define case4(c, f, i, u) \
 615    case TGSI_OPCODE_##c: \
 616       if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
 617       else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
 618       else op = TGSI_OPCODE_##f; \
 619       break;
 620 #define case3(f, i, u)  case4(f, f, i, u)
 621 #define case2fi(f, i)   case4(f, f, i, i)
 622 #define case2iu(i, u)   case4(i, LAST, i, u)
 623
 624    switch(op) {
 625       case2fi(ADD, UADD);
 626       case2fi(MUL, UMUL);
 627       case2fi(MAD, UMAD);
 628       case3(DIV, IDIV, UDIV);
 629       case3(MAX, IMAX, UMAX);
 630       case3(MIN, IMIN, UMIN);
 631       case2iu(MOD, UMOD);
 632
 633       case2fi(SEQ, USEQ);
 634       case2fi(SNE, USNE);
 635       case3(SGE, ISGE, USGE);
 636       case3(SLT, ISLT, USLT);
 637
 638       case2iu(SHL, SHL);
 639       case2iu(ISHR, USHR);
 640       case2iu(NOT, NOT);
 641       case2iu(AND, AND);
 642       case2iu(OR, OR);
 643       case2iu(XOR, XOR);
 644
 645       default: break;
 646    }
 647
 648    assert(op != TGSI_OPCODE_LAST);
 649    return op;
 650 }
 651
 652 glsl_to_tgsi_instruction *
 653 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
 654                             st_dst_reg dst, st_src_reg src0, st_src_reg src1,
 655                             unsigned elements)
 656 {
 657    static const unsigned dot_opcodes[] = {
 658       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
 659    };
 660
 661    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 662 }
 663
 664 /**
 665  * Emits TGSI scalar opcodes to produce unique answers across channels.
 666  *
 667  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
 668  * channel determines the result across all channels.  So to do a vec4
 669  * of this operation, we want to emit a scalar per source channel used
 670  * to produce dest channels.
 671  */
 672 void
 673 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 674                                 st_dst_reg dst,
 675                                 st_src_reg orig_src0, st_src_reg orig_src1)
 676 {
 677    int i, j;
 678    int done_mask = ~dst.writemask;
 679
 680    /* TGSI RCP is a scalar operation splatting results to all channels,
 681     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 682     * dst channels.
 683     */
 684    for (i = 0; i < 4; i++) {
 685       GLuint this_mask = (1 << i);
 686       glsl_to_tgsi_instruction *inst;
 687       st_src_reg src0 = orig_src0;
 688       st_src_reg src1 = orig_src1;
 689
 690       if (done_mask & this_mask)
 691          continue;
 692
 693       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 694       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 695       for (j = i + 1; j < 4; j++) {
 696          /* If there is another enabled component in the destination that is
 697           * derived from the same inputs, generate its value on this pass as
 698           * well.
 699           */
 700          if (!(done_mask & (1 << j)) &&
 701              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 702              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 703             this_mask |= (1 << j);
 704          }
 705       }
 706       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 707                                    src0_swiz, src0_swiz);
 708       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 709                                   src1_swiz, src1_swiz);
 710
 711       inst = emit(ir, op, dst, src0, src1);
 712       inst->dst.writemask = this_mask;
 713       done_mask |= this_mask;
 714    }
 715 }
 716
 717 void
 718 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 719                                 st_dst_reg dst, st_src_reg src0)
 720 {
 721    st_src_reg undef = undef_src;
 722
 723    undef.swizzle = SWIZZLE_XXXX;
 724
 725    emit_scalar(ir, op, dst, src0, undef);
 726 }
 727
 728 void
 729 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
 730                                 st_dst_reg dst, st_src_reg src0)
 731 {
 732    st_src_reg tmp = get_temp(glsl_type::float_type);
 733
 734    if (src0.type == GLSL_TYPE_INT)
 735       emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
 736    else if (src0.type == GLSL_TYPE_UINT)
 737       emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
 738    else
 739       tmp = src0;
 740
 741    emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
 742 }
 743
 744 /**
 745  * Emit an TGSI_OPCODE_SCS instruction
 746  *
 747  * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
 748  * Instead of splatting its result across all four components of the
 749  * destination, it writes one value to the \c x component and another value to
 750  * the \c y component.
 751  *
 752  * \param ir        IR instruction being processed
 753  * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
 754  *                  on which value is desired.
 755  * \param dst       Destination register
 756  * \param src       Source register
 757  */
 758 void
 759 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 760                              st_dst_reg dst,
 761                              const st_src_reg &src)
 762 {
 763    /* Vertex programs cannot use the SCS opcode.
 764     */
 765    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
 766       emit_scalar(ir, op, dst, src);
 767       return;
 768    }
 769
 770    const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
 771    const unsigned scs_mask = (1U << component);
 772    int done_mask = ~dst.writemask;
 773    st_src_reg tmp;
 774
 775    assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
 776
 777    /* If there are compnents in the destination that differ from the component
 778     * that will be written by the SCS instrution, we'll need a temporary.
 779     */
 780    if (scs_mask != unsigned(dst.writemask)) {
 781       tmp = get_temp(glsl_type::vec4_type);
 782    }
 783
 784    for (unsigned i = 0; i < 4; i++) {
 785       unsigned this_mask = (1U << i);
 786       st_src_reg src0 = src;
 787
 788       if ((done_mask & this_mask) != 0)
 789          continue;
 790
 791       /* The source swizzle specified which component of the source generates
 792        * sine / cosine for the current component in the destination.  The SCS
 793        * instruction requires that this value be swizzle to the X component.
 794        * Replace the current swizzle with a swizzle that puts the source in
 795        * the X component.
 796        */
 797       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
 798
 799       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 800                                    src0_swiz, src0_swiz);
 801       for (unsigned j = i + 1; j < 4; j++) {
 802          /* If there is another enabled component in the destination that is
 803           * derived from the same inputs, generate its value on this pass as
 804           * well.
 805           */
 806          if (!(done_mask & (1 << j)) &&
 807              GET_SWZ(src0.swizzle, j) == src0_swiz) {
 808             this_mask |= (1 << j);
 809          }
 810       }
 811
 812       if (this_mask != scs_mask) {
 813          glsl_to_tgsi_instruction *inst;
 814          st_dst_reg tmp_dst = st_dst_reg(tmp);
 815
 816          /* Emit the SCS instruction.
 817           */
 818          inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
 819          inst->dst.writemask = scs_mask;
 820
 821          /* Move the result of the SCS instruction to the desired location in
 822           * the destination.
 823           */
 824          tmp.swizzle = MAKE_SWIZZLE4(component, component,
 825                                      component, component);
 826          inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
 827          inst->dst.writemask = this_mask;
 828       } else {
 829          /* Emit the SCS instruction to write directly to the destination.
 830           */
 831          glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
 832          inst->dst.writemask = scs_mask;
 833       }
 834
 835       done_mask |= this_mask;
 836    }
 837 }
 838
 839 int
 840 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
 841                              gl_constant_value values[4], int size, int datatype,
 842                              GLuint *swizzle_out)
 843 {
 844    if (file == PROGRAM_CONSTANT) {
 845       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
 846                                               size, datatype, swizzle_out);
 847    } else {
 848       int index = 0;
 849       immediate_storage *entry;
 850       assert(file == PROGRAM_IMMEDIATE);
 851
 852       /* Search immediate storage to see if we already have an identical
 853        * immediate that we can use instead of adding a duplicate entry.
 854        */
 855       foreach_iter(exec_list_iterator, iter, this->immediates) {
 856          entry = (immediate_storage *)iter.get();
 857
 858          if (entry->size == size &&
 859              entry->type == datatype &&
 860              !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
 861              return index;
 862          }
 863          index++;
 864       }
 865
 866       /* Add this immediate to the list. */
 867       entry = new(mem_ctx) immediate_storage(values, size, datatype);
 868       this->immediates.push_tail(entry);
 869       this->num_immediates++;
 870       return index;
 871    }
 872 }
 873
 874 st_src_reg
 875 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 876 {
 877    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
 878    union gl_constant_value uval;
 879
 880    uval.f = val;
 881    src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
 882
 883    return src;
 884 }
 885
 886 st_src_reg
 887 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 888 {
 889    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
 890    union gl_constant_value uval;
 891
 892    assert(native_integers);
 893
 894    uval.i = val;
 895    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
 896
 897    return src;
 898 }
 899
 900 st_src_reg
 901 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 902 {
 903    if (native_integers)
 904       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
 905                                        st_src_reg_for_int(val);
 906    else
 907       return st_src_reg_for_float(val);
 908 }
 909
 910 static int
 911 type_size(const struct glsl_type *type)
 912 {
 913    unsigned int i;
 914    int size;
 915
 916    switch (type->base_type) {
 917    case GLSL_TYPE_UINT:
 918    case GLSL_TYPE_INT:
 919    case GLSL_TYPE_FLOAT:
 920    case GLSL_TYPE_BOOL:
 921       if (type->is_matrix()) {
 922          return type->matrix_columns;
 923       } else {
 924          /* Regardless of size of vector, it gets a vec4. This is bad
 925           * packing for things like floats, but otherwise arrays become a
 926           * mess.  Hopefully a later pass over the code can pack scalars
 927           * down if appropriate.
 928           */
 929          return 1;
 930       }
 931    case GLSL_TYPE_ARRAY:
 932       assert(type->length > 0);
 933       return type_size(type->fields.array) * type->length;
 934    case GLSL_TYPE_STRUCT:
 935       size = 0;
 936       for (i = 0; i < type->length; i++) {
 937          size += type_size(type->fields.structure[i].type);
 938       }
 939       return size;
 940    case GLSL_TYPE_SAMPLER:
 941       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 942        * at link time.
 943        */
 944       return 1;
 945    default:
 946       assert(0);
 947       return 0;
 948    }
 949 }
 950
 951 /**
 952  * In the initial pass of codegen, we assign temporary numbers to
 953  * intermediate results.  (not SSA -- variable assignments will reuse
 954  * storage).
 955  */
 956 st_src_reg
 957 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 958 {
 959    st_src_reg src;
 960
 961    src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
 962    src.file = PROGRAM_TEMPORARY;
 963    src.index = next_temp;
 964    src.reladdr = NULL;
 965    next_temp += type_size(type);
 966
 967    if (type->is_array() || type->is_record()) {
 968       src.swizzle = SWIZZLE_NOOP;
 969    } else {
 970       src.swizzle = swizzle_for_size(type->vector_elements);
 971    }
 972    src.negate = 0;
 973
 974    return src;
 975 }
 976
 977 variable_storage *
 978 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
 979 {
 980
 981    variable_storage *entry;
 982
 983    foreach_iter(exec_list_iterator, iter, this->variables) {
 984       entry = (variable_storage *)iter.get();
 985
 986       if (entry->var == var)
 987          return entry;
 988    }
 989
 990    return NULL;
 991 }
 992
 993 void
 994 glsl_to_tgsi_visitor::visit(ir_variable *ir)
 995 {
 996    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 997       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 998
 999       fp->OriginUpperLeft = ir->origin_upper_left;
1000       fp->PixelCenterInteger = ir->pixel_center_integer;
1001
1002    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
1003       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1004       switch (ir->depth_layout) {
1005       case ir_depth_layout_none:
1006          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
1007          break;
1008       case ir_depth_layout_any:
1009          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
1010          break;
1011       case ir_depth_layout_greater:
1012          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
1013          break;
1014       case ir_depth_layout_less:
1015          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
1016          break;
1017       case ir_depth_layout_unchanged:
1018          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
1019          break;
1020       default:
1021          assert(0);
1022          break;
1023       }
1024    }
1025
1026    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1027       unsigned int i;
1028       const ir_state_slot *const slots = ir->state_slots;
1029       assert(ir->state_slots != NULL);
1030
1031       /* Check if this statevar's setup in the STATE file exactly
1032        * matches how we'll want to reference it as a
1033        * struct/array/whatever.  If not, then we need to move it into
1034        * temporary storage and hope that it'll get copy-propagated
1035        * out.
1036        */
1037       for (i = 0; i < ir->num_state_slots; i++) {
1038          if (slots[i].swizzle != SWIZZLE_XYZW) {
1039             break;
1040          }
1041       }
1042
1043       variable_storage *storage;
1044       st_dst_reg dst;
1045       if (i == ir->num_state_slots) {
1046          /* We'll set the index later. */
1047          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1048          this->variables.push_tail(storage);
1049
1050          dst = undef_dst;
1051       } else {
1052          /* The variable_storage constructor allocates slots based on the size
1053           * of the type.  However, this had better match the number of state
1054           * elements that we're going to copy into the new temporary.
1055           */
1056          assert((int) ir->num_state_slots == type_size(ir->type));
1057
1058          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
1059                                                  this->next_temp);
1060          this->variables.push_tail(storage);
1061          this->next_temp += type_size(ir->type);
1062
1063          dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
1064                native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
1065       }
1066
1067
1068       for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1069          int index = _mesa_add_state_reference(this->prog->Parameters,
1070                                                (gl_state_index *)slots[i].tokens);
1071
1072          if (storage->file == PROGRAM_STATE_VAR) {
1073             if (storage->index == -1) {
1074                storage->index = index;
1075             } else {
1076                assert(index == storage->index + (int)i);
1077             }
1078          } else {
1079             st_src_reg src(PROGRAM_STATE_VAR, index,
1080                   native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
1081             src.swizzle = slots[i].swizzle;
1082             emit(ir, TGSI_OPCODE_MOV, dst, src);
1083             /* even a float takes up a whole vec4 reg in a struct/array. */
1084             dst.index++;
1085          }
1086       }
1087
1088       if (storage->file == PROGRAM_TEMPORARY &&
1089           dst.index != storage->index + (int) ir->num_state_slots) {
1090          fail_link(this->shader_program,
1091                    "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1092                    ir->name, dst.index - storage->index,
1093                    type_size(ir->type));
1094       }
1095    }
1096 }
1097
1098 void
1099 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1100 {
1101    ir_dereference_variable *counter = NULL;
1102
1103    if (ir->counter != NULL)
1104       counter = new(ir) ir_dereference_variable(ir->counter);
1105
1106    if (ir->from != NULL) {
1107       assert(ir->counter != NULL);
1108
1109       ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1110
1111       a->accept(this);
1112       delete a;
1113    }
1114
1115    emit(NULL, TGSI_OPCODE_BGNLOOP);
1116
1117    if (ir->to) {
1118       ir_expression *e =
1119          new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1120                                counter, ir->to);
1121       ir_if *if_stmt =  new(ir) ir_if(e);
1122
1123       ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1124
1125       if_stmt->then_instructions.push_tail(brk);
1126
1127       if_stmt->accept(this);
1128
1129       delete if_stmt;
1130       delete e;
1131       delete brk;
1132    }
1133
1134    visit_exec_list(&ir->body_instructions, this);
1135
1136    if (ir->increment) {
1137       ir_expression *e =
1138          new(ir) ir_expression(ir_binop_add, counter->type,
1139                                counter, ir->increment);
1140
1141       ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1142
1143       a->accept(this);
1144       delete a;
1145       delete e;
1146    }
1147
1148    emit(NULL, TGSI_OPCODE_ENDLOOP);
1149 }
1150
1151 void
1152 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1153 {
1154    switch (ir->mode) {
1155    case ir_loop_jump::jump_break:
1156       emit(NULL, TGSI_OPCODE_BRK);
1157       break;
1158    case ir_loop_jump::jump_continue:
1159       emit(NULL, TGSI_OPCODE_CONT);
1160       break;
1161    }
1162 }
1163
1164
1165 void
1166 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1167 {
1168    assert(0);
1169    (void)ir;
1170 }
1171
1172 void
1173 glsl_to_tgsi_visitor::visit(ir_function *ir)
1174 {
1175    /* Ignore function bodies other than main() -- we shouldn't see calls to
1176     * them since they should all be inlined before we get to glsl_to_tgsi.
1177     */
1178    if (strcmp(ir->name, "main") == 0) {
1179       const ir_function_signature *sig;
1180       exec_list empty;
1181
1182       sig = ir->matching_signature(&empty);
1183
1184       assert(sig);
1185
1186       foreach_iter(exec_list_iterator, iter, sig->body) {
1187          ir_instruction *ir = (ir_instruction *)iter.get();
1188
1189          ir->accept(this);
1190       }
1191    }
1192 }
1193
1194 bool
1195 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1196 {
1197    int nonmul_operand = 1 - mul_operand;
1198    st_src_reg a, b, c;
1199    st_dst_reg result_dst;
1200
1201    ir_expression *expr = ir->operands[mul_operand]->as_expression();
1202    if (!expr || expr->operation != ir_binop_mul)
1203       return false;
1204
1205    expr->operands[0]->accept(this);
1206    a = this->result;
1207    expr->operands[1]->accept(this);
1208    b = this->result;
1209    ir->operands[nonmul_operand]->accept(this);
1210    c = this->result;
1211
1212    this->result = get_temp(ir->type);
1213    result_dst = st_dst_reg(this->result);
1214    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1215    emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1216
1217    return true;
1218 }
1219
1220 /**
1221  * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1222  *
1223  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1224  * implemented using multiplication, and logical-or is implemented using
1225  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1226  * As result, the logical expression (a & !b) can be rewritten as:
1227  *
1228  *     - a * !b
1229  *     - a * (1 - b)
1230  *     - (a * 1) - (a * b)
1231  *     - a + -(a * b)
1232  *     - a + (a * -b)
1233  *
1234  * This final expression can be implemented as a single MAD(a, -b, a)
1235  * instruction.
1236  */
1237 bool
1238 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1239 {
1240    const int other_operand = 1 - try_operand;
1241    st_src_reg a, b;
1242
1243    ir_expression *expr = ir->operands[try_operand]->as_expression();
1244    if (!expr || expr->operation != ir_unop_logic_not)
1245       return false;
1246
1247    ir->operands[other_operand]->accept(this);
1248    a = this->result;
1249    expr->operands[0]->accept(this);
1250    b = this->result;
1251
1252    b.negate = ~b.negate;
1253
1254    this->result = get_temp(ir->type);
1255    emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1256
1257    return true;
1258 }
1259
1260 bool
1261 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1262 {
1263    /* Saturates were only introduced to vertex programs in
1264     * NV_vertex_program3, so don't give them to drivers in the VP.
1265     */
1266    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1267       return false;
1268
1269    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1270    if (!sat_src)
1271       return false;
1272
1273    sat_src->accept(this);
1274    st_src_reg src = this->result;
1275
1276    /* If we generated an expression instruction into a temporary in
1277     * processing the saturate's operand, apply the saturate to that
1278     * instruction.  Otherwise, generate a MOV to do the saturate.
1279     *
1280     * Note that we have to be careful to only do this optimization if
1281     * the instruction in question was what generated src->result.  For
1282     * example, ir_dereference_array might generate a MUL instruction
1283     * to create the reladdr, and return us a src reg using that
1284     * reladdr.  That MUL result is not the value we're trying to
1285     * saturate.
1286     */
1287    ir_expression *sat_src_expr = sat_src->as_expression();
1288    if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
1289                         sat_src_expr->operation == ir_binop_add ||
1290                         sat_src_expr->operation == ir_binop_dot)) {
1291       glsl_to_tgsi_instruction *new_inst;
1292       new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1293       new_inst->saturate = true;
1294    } else {
1295       this->result = get_temp(ir->type);
1296       st_dst_reg result_dst = st_dst_reg(this->result);
1297       result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1298       glsl_to_tgsi_instruction *inst;
1299       inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1300       inst->saturate = true;
1301    }
1302
1303    return true;
1304 }
1305
1306 void
1307 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1308                                     st_src_reg *reg, int *num_reladdr)
1309 {
1310    if (!reg->reladdr)
1311       return;
1312
1313    emit_arl(ir, address_reg, *reg->reladdr);
1314
1315    if (*num_reladdr != 1) {
1316       st_src_reg temp = get_temp(glsl_type::vec4_type);
1317
1318       emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1319       *reg = temp;
1320    }
1321
1322    (*num_reladdr)--;
1323 }
1324
1325 void
1326 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1327 {
1328    unsigned int operand;
1329    st_src_reg op[Elements(ir->operands)];
1330    st_src_reg result_src;
1331    st_dst_reg result_dst;
1332
1333    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1334     */
1335    if (ir->operation == ir_binop_add) {
1336       if (try_emit_mad(ir, 1))
1337          return;
1338       if (try_emit_mad(ir, 0))
1339          return;
1340    }
1341
1342    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1343     */
1344    if (ir->operation == ir_binop_logic_and) {
1345       if (try_emit_mad_for_and_not(ir, 1))
1346          return;
1347       if (try_emit_mad_for_and_not(ir, 0))
1348          return;
1349    }
1350
1351    if (try_emit_sat(ir))
1352       return;
1353
1354    if (ir->operation == ir_quadop_vector)
1355       assert(!"ir_quadop_vector should have been lowered");
1356
1357    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1358       this->result.file = PROGRAM_UNDEFINED;
1359       ir->operands[operand]->accept(this);
1360       if (this->result.file == PROGRAM_UNDEFINED) {
1361          ir_print_visitor v;
1362          printf("Failed to get tree for expression operand:\n");
1363          ir->operands[operand]->accept(&v);
1364          exit(1);
1365       }
1366       op[operand] = this->result;
1367
1368       /* Matrix expression operands should have been broken down to vector
1369        * operations already.
1370        */
1371       assert(!ir->operands[operand]->type->is_matrix());
1372    }
1373
1374    int vector_elements = ir->operands[0]->type->vector_elements;
1375    if (ir->operands[1]) {
1376       vector_elements = MAX2(vector_elements,
1377                              ir->operands[1]->type->vector_elements);
1378    }
1379
1380    this->result.file = PROGRAM_UNDEFINED;
1381
1382    /* Storage for our result.  Ideally for an assignment we'd be using
1383     * the actual storage for the result here, instead.
1384     */
1385    result_src = get_temp(ir->type);
1386    /* convenience for the emit functions below. */
1387    result_dst = st_dst_reg(result_src);
1388    /* Limit writes to the channels that will be used by result_src later.
1389     * This does limit this temp's use as a temporary for multi-instruction
1390     * sequences.
1391     */
1392    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1393
1394    switch (ir->operation) {
1395    case ir_unop_logic_not:
1396       if (result_dst.type != GLSL_TYPE_FLOAT)
1397          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
1398       else {
1399          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1400           * older GPUs implement SEQ using multiple instructions (i915 uses two
1401           * SGE instructions and a MUL instruction).  Since our logic values are
1402           * 0.0 and 1.0, 1-x also implements !x.
1403           */
1404          op[0].negate = ~op[0].negate;
1405          emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1406       }
1407       break;
1408    case ir_unop_neg:
1409       assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
1410       if (result_dst.type == GLSL_TYPE_INT)
1411          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1412       else {
1413          op[0].negate = ~op[0].negate;
1414          result_src = op[0];
1415       }
1416       break;
1417    case ir_unop_abs:
1418       assert(result_dst.type == GLSL_TYPE_FLOAT);
1419       emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1420       break;
1421    case ir_unop_sign:
1422       emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1423       break;
1424    case ir_unop_rcp:
1425       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1426       break;
1427
1428    case ir_unop_exp2:
1429       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1430       break;
1431    case ir_unop_exp:
1432    case ir_unop_log:
1433       assert(!"not reached: should be handled by ir_explog_to_explog2");
1434       break;
1435    case ir_unop_log2:
1436       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1437       break;
1438    case ir_unop_sin:
1439       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1440       break;
1441    case ir_unop_cos:
1442       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1443       break;
1444    case ir_unop_sin_reduced:
1445       emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1446       break;
1447    case ir_unop_cos_reduced:
1448       emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1449       break;
1450
1451    case ir_unop_dFdx:
1452       emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1453       break;
1454    case ir_unop_dFdy:
1455       op[0].negate = ~op[0].negate;
1456       emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1457       break;
1458
1459    case ir_unop_noise: {
1460       /* At some point, a motivated person could add a better
1461        * implementation of noise.  Currently not even the nvidia
1462        * binary drivers do anything more than this.  In any case, the
1463        * place to do this is in the GL state tracker, not the poor
1464        * driver.
1465        */
1466       emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1467       break;
1468    }
1469
1470    case ir_binop_add:
1471       emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1472       break;
1473    case ir_binop_sub:
1474       emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1475       break;
1476
1477    case ir_binop_mul:
1478       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1479       break;
1480    case ir_binop_div:
1481       if (result_dst.type == GLSL_TYPE_FLOAT)
1482          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1483       else
1484          emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1485       break;
1486    case ir_binop_mod:
1487       if (result_dst.type == GLSL_TYPE_FLOAT)
1488          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1489       else
1490          emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1491       break;
1492
1493    case ir_binop_less:
1494       emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1495       break;
1496    case ir_binop_greater:
1497       emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
1498       break;
1499    case ir_binop_lequal:
1500       emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
1501       break;
1502    case ir_binop_gequal:
1503       emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1504       break;
1505    case ir_binop_equal:
1506       emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1507       break;
1508    case ir_binop_nequal:
1509       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1510       break;
1511    case ir_binop_all_equal:
1512       /* "==" operator producing a scalar boolean. */
1513       if (ir->operands[0]->type->is_vector() ||
1514           ir->operands[1]->type->is_vector()) {
1515          st_src_reg temp = get_temp(native_integers ?
1516                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1517                glsl_type::vec4_type);
1518          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1519          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1520
1521          /* After the dot-product, the value will be an integer on the
1522           * range [0,4].  Zero becomes 1.0, and positive values become zero.
1523           */
1524          emit_dp(ir, result_dst, temp, temp, vector_elements);
1525
1526          if (result_dst.type == GLSL_TYPE_FLOAT) {
1527             /* Negating the result of the dot-product gives values on the range
1528              * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1529              * This is achieved using SGE.
1530              */
1531             st_src_reg sge_src = result_src;
1532             sge_src.negate = ~sge_src.negate;
1533             emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1534          } else {
1535             /* The TGSI negate flag doesn't work for integers, so use SEQ 0
1536              * instead.
1537              */
1538             emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
1539          }
1540       } else {
1541          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1542       }
1543       break;
1544    case ir_binop_any_nequal:
1545       /* "!=" operator producing a scalar boolean. */
1546       if (ir->operands[0]->type->is_vector() ||
1547           ir->operands[1]->type->is_vector()) {
1548          st_src_reg temp = get_temp(native_integers ?
1549                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1550                glsl_type::vec4_type);
1551          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1552          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1553
1554          /* After the dot-product, the value will be an integer on the
1555           * range [0,4].  Zero stays zero, and positive values become 1.0.
1556           */
1557          glsl_to_tgsi_instruction *const dp =
1558                emit_dp(ir, result_dst, temp, temp, vector_elements);
1559          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1560              result_dst.type == GLSL_TYPE_FLOAT) {
1561             /* The clamping to [0,1] can be done for free in the fragment
1562              * shader with a saturate.
1563              */
1564             dp->saturate = true;
1565          } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1566             /* Negating the result of the dot-product gives values on the range
1567              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1568              * achieved using SLT.
1569              */
1570             st_src_reg slt_src = result_src;
1571             slt_src.negate = ~slt_src.negate;
1572             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1573          } else {
1574             emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1575          }
1576       } else {
1577          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1578       }
1579       break;
1580
1581    case ir_unop_any: {
1582       assert(ir->operands[0]->type->is_vector());
1583
1584       /* After the dot-product, the value will be an integer on the
1585        * range [0,4].  Zero stays zero, and positive values become 1.0.
1586        */
1587       glsl_to_tgsi_instruction *const dp =
1588          emit_dp(ir, result_dst, op[0], op[0],
1589                  ir->operands[0]->type->vector_elements);
1590       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1591           result_dst.type == GLSL_TYPE_FLOAT) {
1592               /* The clamping to [0,1] can be done for free in the fragment
1593                * shader with a saturate.
1594                */
1595               dp->saturate = true;
1596       } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1597               /* Negating the result of the dot-product gives values on the range
1598                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1599                * is achieved using SLT.
1600                */
1601               st_src_reg slt_src = result_src;
1602               slt_src.negate = ~slt_src.negate;
1603               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1604       }
1605       else {
1606          /* Use SNE 0 if integers are being used as boolean values. */
1607          emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1608       }
1609       break;
1610    }
1611
1612    case ir_binop_logic_xor:
1613       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1614       break;
1615
1616    case ir_binop_logic_or: {
1617       /* After the addition, the value will be an integer on the
1618        * range [0,2].  Zero stays zero, and positive values become 1.0.
1619        */
1620       glsl_to_tgsi_instruction *add =
1621          emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1622       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1623           result_dst.type == GLSL_TYPE_FLOAT) {
1624          /* The clamping to [0,1] can be done for free in the fragment
1625           * shader with a saturate if floats are being used as boolean values.
1626           */
1627          add->saturate = true;
1628       } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1629          /* Negating the result of the addition gives values on the range
1630           * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1631           * is achieved using SLT.
1632           */
1633          st_src_reg slt_src = result_src;
1634          slt_src.negate = ~slt_src.negate;
1635          emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1636       } else {
1637          /* Use an SNE on the result of the addition.  Zero stays zero,
1638           * 1 stays 1, and 2 becomes 1.
1639           */
1640          emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1641       }
1642       break;
1643    }
1644
1645    case ir_binop_logic_and:
1646       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1647       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1648       break;
1649
1650    case ir_binop_dot:
1651       assert(ir->operands[0]->type->is_vector());
1652       assert(ir->operands[0]->type == ir->operands[1]->type);
1653       emit_dp(ir, result_dst, op[0], op[1],
1654               ir->operands[0]->type->vector_elements);
1655       break;
1656
1657    case ir_unop_sqrt:
1658       /* sqrt(x) = x * rsq(x). */
1659       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1660       emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1661       /* For incoming channels <= 0, set the result to 0. */
1662       op[0].negate = ~op[0].negate;
1663       emit(ir, TGSI_OPCODE_CMP, result_dst,
1664                           op[0], result_src, st_src_reg_for_float(0.0));
1665       break;
1666    case ir_unop_rsq:
1667       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1668       break;
1669    case ir_unop_i2f:
1670    case ir_unop_b2f:
1671       if (native_integers) {
1672          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1673          break;
1674       }
1675    case ir_unop_i2u:
1676    case ir_unop_u2i:
1677       /* Converting between signed and unsigned integers is a no-op. */
1678    case ir_unop_b2i:
1679       /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
1680       result_src = op[0];
1681       break;
1682    case ir_unop_f2i:
1683       if (native_integers)
1684          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1685       else
1686          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1687       break;
1688    case ir_unop_f2b:
1689    case ir_unop_i2b:
1690       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0],
1691             st_src_reg_for_type(result_dst.type, 0));
1692       break;
1693    case ir_unop_trunc:
1694       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1695       break;
1696    case ir_unop_ceil:
1697       op[0].negate = ~op[0].negate;
1698       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1699       result_src.negate = ~result_src.negate;
1700       break;
1701    case ir_unop_floor:
1702       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1703       break;
1704    case ir_unop_fract:
1705       emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1706       break;
1707
1708    case ir_binop_min:
1709       emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1710       break;
1711    case ir_binop_max:
1712       emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1713       break;
1714    case ir_binop_pow:
1715       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1716       break;
1717
1718    case ir_unop_bit_not:
1719       if (glsl_version >= 130) {
1720          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1721          break;
1722       }
1723    case ir_unop_u2f:
1724       if (native_integers) {
1725          emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1726          break;
1727       }
1728    case ir_binop_lshift:
1729       if (glsl_version >= 130) {
1730          emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
1731          break;
1732       }
1733    case ir_binop_rshift:
1734       if (glsl_version >= 130) {
1735          emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
1736          break;
1737       }
1738    case ir_binop_bit_and:
1739       if (glsl_version >= 130) {
1740          emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
1741          break;
1742       }
1743    case ir_binop_bit_xor:
1744       if (glsl_version >= 130) {
1745          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
1746          break;
1747       }
1748    case ir_binop_bit_or:
1749       if (glsl_version >= 130) {
1750          emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
1751          break;
1752       }
1753    case ir_unop_round_even:
1754       assert(!"GLSL 1.30 features unsupported");
1755       break;
1756
1757    case ir_quadop_vector:
1758       /* This operation should have already been handled.
1759        */
1760       assert(!"Should not get here.");
1761       break;
1762    }
1763
1764    this->result = result_src;
1765 }
1766
1767
1768 void
1769 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1770 {
1771    st_src_reg src;
1772    int i;
1773    int swizzle[4];
1774
1775    /* Note that this is only swizzles in expressions, not those on the left
1776     * hand side of an assignment, which do write masking.  See ir_assignment
1777     * for that.
1778     */
1779
1780    ir->val->accept(this);
1781    src = this->result;
1782    assert(src.file != PROGRAM_UNDEFINED);
1783
1784    for (i = 0; i < 4; i++) {
1785       if (i < ir->type->vector_elements) {
1786          switch (i) {
1787          case 0:
1788             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1789             break;
1790          case 1:
1791             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1792             break;
1793          case 2:
1794             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1795             break;
1796          case 3:
1797             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1798             break;
1799          }
1800       } else {
1801          /* If the type is smaller than a vec4, replicate the last
1802           * channel out.
1803           */
1804          swizzle[i] = swizzle[ir->type->vector_elements - 1];
1805       }
1806    }
1807
1808    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1809
1810    this->result = src;
1811 }
1812
1813 void
1814 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1815 {
1816    variable_storage *entry = find_variable_storage(ir->var);
1817    ir_variable *var = ir->var;
1818
1819    if (!entry) {
1820       switch (var->mode) {
1821       case ir_var_uniform:
1822          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1823                                                var->location);
1824          this->variables.push_tail(entry);
1825          break;
1826       case ir_var_in:
1827       case ir_var_inout:
1828          /* The linker assigns locations for varyings and attributes,
1829           * including deprecated builtins (like gl_Color), user-assign
1830           * generic attributes (glBindVertexLocation), and
1831           * user-defined varyings.
1832           *
1833           * FINISHME: We would hit this path for function arguments.  Fix!
1834           */
1835          assert(var->location != -1);
1836          entry = new(mem_ctx) variable_storage(var,
1837                                                PROGRAM_INPUT,
1838                                                var->location);
1839          if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1840              var->location >= VERT_ATTRIB_GENERIC0) {
1841             _mesa_add_attribute(this->prog->Attributes,
1842                                 var->name,
1843                                 _mesa_sizeof_glsl_type(var->type->gl_type),
1844                                 var->type->gl_type,
1845                                 var->location - VERT_ATTRIB_GENERIC0);
1846          }
1847          break;
1848       case ir_var_out:
1849          assert(var->location != -1);
1850          entry = new(mem_ctx) variable_storage(var,
1851                                                PROGRAM_OUTPUT,
1852                                                var->location);
1853          break;
1854       case ir_var_system_value:
1855          entry = new(mem_ctx) variable_storage(var,
1856                                                PROGRAM_SYSTEM_VALUE,
1857                                                var->location);
1858          break;
1859       case ir_var_auto:
1860       case ir_var_temporary:
1861          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1862                                                this->next_temp);
1863          this->variables.push_tail(entry);
1864
1865          next_temp += type_size(var->type);
1866          break;
1867       }
1868
1869       if (!entry) {
1870          printf("Failed to make storage for %s\n", var->name);
1871          exit(1);
1872       }
1873    }
1874
1875    this->result = st_src_reg(entry->file, entry->index, var->type);
1876    if (!native_integers)
1877       this->result.type = GLSL_TYPE_FLOAT;
1878 }
1879
1880 void
1881 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1882 {
1883    ir_constant *index;
1884    st_src_reg src;
1885    int element_size = type_size(ir->type);
1886
1887    index = ir->array_index->constant_expression_value();
1888
1889    ir->array->accept(this);
1890    src = this->result;
1891
1892    if (index) {
1893       src.index += index->value.i[0] * element_size;
1894    } else {
1895       /* Variable index array dereference.  It eats the "vec4" of the
1896        * base of the array and an index that offsets the TGSI register
1897        * index.
1898        */
1899       ir->array_index->accept(this);
1900
1901       st_src_reg index_reg;
1902
1903       if (element_size == 1) {
1904          index_reg = this->result;
1905       } else {
1906          index_reg = get_temp(glsl_type::float_type);
1907
1908          emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1909               this->result, st_src_reg_for_float(element_size));
1910       }
1911
1912       /* If there was already a relative address register involved, add the
1913        * new and the old together to get the new offset.
1914        */
1915       if (src.reladdr != NULL) {
1916          st_src_reg accum_reg = get_temp(glsl_type::float_type);
1917
1918          emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
1919               index_reg, *src.reladdr);
1920
1921          index_reg = accum_reg;
1922       }
1923
1924       src.reladdr = ralloc(mem_ctx, st_src_reg);
1925       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1926    }
1927
1928    /* If the type is smaller than a vec4, replicate the last channel out. */
1929    if (ir->type->is_scalar() || ir->type->is_vector())
1930       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1931    else
1932       src.swizzle = SWIZZLE_NOOP;
1933
1934    this->result = src;
1935 }
1936
1937 void
1938 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
1939 {
1940    unsigned int i;
1941    const glsl_type *struct_type = ir->record->type;
1942    int offset = 0;
1943
1944    ir->record->accept(this);
1945
1946    for (i = 0; i < struct_type->length; i++) {
1947       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1948          break;
1949       offset += type_size(struct_type->fields.structure[i].type);
1950    }
1951
1952    /* If the type is smaller than a vec4, replicate the last channel out. */
1953    if (ir->type->is_scalar() || ir->type->is_vector())
1954       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1955    else
1956       this->result.swizzle = SWIZZLE_NOOP;
1957
1958    this->result.index += offset;
1959 }
1960
1961 /**
1962  * We want to be careful in assignment setup to hit the actual storage
1963  * instead of potentially using a temporary like we might with the
1964  * ir_dereference handler.
1965  */
1966 static st_dst_reg
1967 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
1968 {
1969    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1970     * access of a vector, it must be separated into a series conditional moves
1971     * before reaching this point (see ir_vec_index_to_cond_assign).
1972     */
1973    assert(ir->as_dereference());
1974    ir_dereference_array *deref_array = ir->as_dereference_array();
1975    if (deref_array) {
1976       assert(!deref_array->array->type->is_vector());
1977    }
1978
1979    /* Use the rvalue deref handler for the most part.  We'll ignore
1980     * swizzles in it and write swizzles using writemask, though.
1981     */
1982    ir->accept(v);
1983    return st_dst_reg(v->result);
1984 }
1985
1986 /**
1987  * Process the condition of a conditional assignment
1988  *
1989  * Examines the condition of a conditional assignment to generate the optimal
1990  * first operand of a \c CMP instruction.  If the condition is a relational
1991  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1992  * used as the source for the \c CMP instruction.  Otherwise the comparison
1993  * is processed to a boolean result, and the boolean result is used as the
1994  * operand to the CMP instruction.
1995  */
1996 bool
1997 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
1998 {
1999    ir_rvalue *src_ir = ir;
2000    bool negate = true;
2001    bool switch_order = false;
2002
2003    ir_expression *const expr = ir->as_expression();
2004    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2005       bool zero_on_left = false;
2006
2007       if (expr->operands[0]->is_zero()) {
2008          src_ir = expr->operands[1];
2009          zero_on_left = true;
2010       } else if (expr->operands[1]->is_zero()) {
2011          src_ir = expr->operands[0];
2012          zero_on_left = false;
2013       }
2014
2015       /*      a is -  0  +            -  0  +
2016        * (a <  0)  T  F  F  ( a < 0)  T  F  F
2017        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2018        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2019        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2020        * (a >  0)  F  F  T  (-a < 0)  F  F  T
2021        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2022        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2023        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2024        *
2025        * Note that exchanging the order of 0 and 'a' in the comparison simply
2026        * means that the value of 'a' should be negated.
2027        */
2028       if (src_ir != ir) {
2029          switch (expr->operation) {
2030          case ir_binop_less:
2031             switch_order = false;
2032             negate = zero_on_left;
2033             break;
2034
2035          case ir_binop_greater:
2036             switch_order = false;
2037             negate = !zero_on_left;
2038             break;
2039
2040          case ir_binop_lequal:
2041             switch_order = true;
2042             negate = !zero_on_left;
2043             break;
2044
2045          case ir_binop_gequal:
2046             switch_order = true;
2047             negate = zero_on_left;
2048             break;
2049
2050          default:
2051             /* This isn't the right kind of comparison afterall, so make sure
2052              * the whole condition is visited.
2053              */
2054             src_ir = ir;
2055             break;
2056          }
2057       }
2058    }
2059
2060    src_ir->accept(this);
2061
2062    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2063     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2064     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2065     * computing the condition.
2066     */
2067    if (negate)
2068       this->result.negate = ~this->result.negate;
2069
2070    return switch_order;
2071 }
2072
2073 void
2074 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2075 {
2076    st_dst_reg l;
2077    st_src_reg r;
2078    int i;
2079
2080    ir->rhs->accept(this);
2081    r = this->result;
2082
2083    l = get_assignment_lhs(ir->lhs, this);
2084
2085    /* FINISHME: This should really set to the correct maximal writemask for each
2086     * FINISHME: component written (in the loops below).  This case can only
2087     * FINISHME: occur for matrices, arrays, and structures.
2088     */
2089    if (ir->write_mask == 0) {
2090       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2091       l.writemask = WRITEMASK_XYZW;
2092    } else if (ir->lhs->type->is_scalar() &&
2093               ir->lhs->variable_referenced()->mode == ir_var_out) {
2094       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2095        * FINISHME: W component of fragment shader output zero, work correctly.
2096        */
2097       l.writemask = WRITEMASK_XYZW;
2098    } else {
2099       int swizzles[4];
2100       int first_enabled_chan = 0;
2101       int rhs_chan = 0;
2102
2103       l.writemask = ir->write_mask;
2104
2105       for (int i = 0; i < 4; i++) {
2106          if (l.writemask & (1 << i)) {
2107             first_enabled_chan = GET_SWZ(r.swizzle, i);
2108             break;
2109          }
2110       }
2111
2112       /* Swizzle a small RHS vector into the channels being written.
2113        *
2114        * glsl ir treats write_mask as dictating how many channels are
2115        * present on the RHS while TGSI treats write_mask as just
2116        * showing which channels of the vec4 RHS get written.
2117        */
2118       for (int i = 0; i < 4; i++) {
2119          if (l.writemask & (1 << i))
2120             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2121          else
2122             swizzles[i] = first_enabled_chan;
2123       }
2124       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2125                                 swizzles[2], swizzles[3]);
2126    }
2127
2128    assert(l.file != PROGRAM_UNDEFINED);
2129    assert(r.file != PROGRAM_UNDEFINED);
2130
2131    if (ir->condition) {
2132       const bool switch_order = this->process_move_condition(ir->condition);
2133       st_src_reg condition = this->result;
2134
2135       for (i = 0; i < type_size(ir->lhs->type); i++) {
2136          st_src_reg l_src = st_src_reg(l);
2137          l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
2138
2139          if (switch_order) {
2140             emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
2141          } else {
2142             emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
2143          }
2144
2145          l.index++;
2146          r.index++;
2147       }
2148    } else if (ir->rhs->as_expression() &&
2149               this->instructions.get_tail() &&
2150               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2151               type_size(ir->lhs->type) == 1 &&
2152               l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
2153       /* To avoid emitting an extra MOV when assigning an expression to a
2154        * variable, emit the last instruction of the expression again, but
2155        * replace the destination register with the target of the assignment.
2156        * Dead code elimination will remove the original instruction.
2157        */
2158       glsl_to_tgsi_instruction *inst, *new_inst;
2159       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2160       new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
2161       new_inst->saturate = inst->saturate;
2162    } else {
2163       for (i = 0; i < type_size(ir->lhs->type); i++) {
2164          emit(ir, TGSI_OPCODE_MOV, l, r);
2165          l.index++;
2166          r.index++;
2167       }
2168    }
2169 }
2170
2171
2172 void
2173 glsl_to_tgsi_visitor::visit(ir_constant *ir)
2174 {
2175    st_src_reg src;
2176    GLfloat stack_vals[4] = { 0 };
2177    gl_constant_value *values = (gl_constant_value *) stack_vals;
2178    GLenum gl_type = GL_NONE;
2179    unsigned int i;
2180    static int in_array = 0;
2181    gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2182
2183    /* Unfortunately, 4 floats is all we can get into
2184     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
2185     * aggregate constant and move each constant value into it.  If we
2186     * get lucky, copy propagation will eliminate the extra moves.
2187     */
2188    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2189       st_src_reg temp_base = get_temp(ir->type);
2190       st_dst_reg temp = st_dst_reg(temp_base);
2191
2192       foreach_iter(exec_list_iterator, iter, ir->components) {
2193          ir_constant *field_value = (ir_constant *)iter.get();
2194          int size = type_size(field_value->type);
2195
2196          assert(size > 0);
2197
2198          field_value->accept(this);
2199          src = this->result;
2200
2201          for (i = 0; i < (unsigned int)size; i++) {
2202             emit(ir, TGSI_OPCODE_MOV, temp, src);
2203
2204             src.index++;
2205             temp.index++;
2206          }
2207       }
2208       this->result = temp_base;
2209       return;
2210    }
2211
2212    if (ir->type->is_array()) {
2213       st_src_reg temp_base = get_temp(ir->type);
2214       st_dst_reg temp = st_dst_reg(temp_base);
2215       int size = type_size(ir->type->fields.array);
2216
2217       assert(size > 0);
2218       in_array++;
2219
2220       for (i = 0; i < ir->type->length; i++) {
2221          ir->array_elements[i]->accept(this);
2222          src = this->result;
2223          for (int j = 0; j < size; j++) {
2224             emit(ir, TGSI_OPCODE_MOV, temp, src);
2225
2226             src.index++;
2227             temp.index++;
2228          }
2229       }
2230       this->result = temp_base;
2231       in_array--;
2232       return;
2233    }
2234
2235    if (ir->type->is_matrix()) {
2236       st_src_reg mat = get_temp(ir->type);
2237       st_dst_reg mat_column = st_dst_reg(mat);
2238
2239       for (i = 0; i < ir->type->matrix_columns; i++) {
2240          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2241          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2242
2243          src = st_src_reg(file, -1, ir->type->base_type);
2244          src.index = add_constant(file,
2245                                   values,
2246                                   ir->type->vector_elements,
2247                                   GL_FLOAT,
2248                                   &src.swizzle);
2249          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2250
2251          mat_column.index++;
2252       }
2253
2254       this->result = mat;
2255       return;
2256    }
2257
2258    switch (ir->type->base_type) {
2259    case GLSL_TYPE_FLOAT:
2260       gl_type = GL_FLOAT;
2261       for (i = 0; i < ir->type->vector_elements; i++) {
2262          values[i].f = ir->value.f[i];
2263       }
2264       break;
2265    case GLSL_TYPE_UINT:
2266       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2267       for (i = 0; i < ir->type->vector_elements; i++) {
2268          if (native_integers)
2269             values[i].u = ir->value.u[i];
2270          else
2271             values[i].f = ir->value.u[i];
2272       }
2273       break;
2274    case GLSL_TYPE_INT:
2275       gl_type = native_integers ? GL_INT : GL_FLOAT;
2276       for (i = 0; i < ir->type->vector_elements; i++) {
2277          if (native_integers)
2278             values[i].i = ir->value.i[i];
2279          else
2280             values[i].f = ir->value.i[i];
2281       }
2282       break;
2283    case GLSL_TYPE_BOOL:
2284       gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2285       for (i = 0; i < ir->type->vector_elements; i++) {
2286          if (native_integers)
2287             values[i].b = ir->value.b[i];
2288          else
2289             values[i].f = ir->value.b[i];
2290       }
2291       break;
2292    default:
2293       assert(!"Non-float/uint/int/bool constant");
2294    }
2295
2296    this->result = st_src_reg(file, -1, ir->type);
2297    this->result.index = add_constant(file,
2298                                      values,
2299                                      ir->type->vector_elements,
2300                                      gl_type,
2301                                      &this->result.swizzle);
2302 }
2303
2304 function_entry *
2305 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2306 {
2307    function_entry *entry;
2308
2309    foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2310       entry = (function_entry *)iter.get();
2311
2312       if (entry->sig == sig)
2313          return entry;
2314    }
2315
2316    entry = ralloc(mem_ctx, function_entry);
2317    entry->sig = sig;
2318    entry->sig_id = this->next_signature_id++;
2319    entry->bgn_inst = NULL;
2320
2321    /* Allocate storage for all the parameters. */
2322    foreach_iter(exec_list_iterator, iter, sig->parameters) {
2323       ir_variable *param = (ir_variable *)iter.get();
2324       variable_storage *storage;
2325
2326       storage = find_variable_storage(param);
2327       assert(!storage);
2328
2329       storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2330                                               this->next_temp);
2331       this->variables.push_tail(storage);
2332
2333       this->next_temp += type_size(param->type);
2334    }
2335
2336    if (!sig->return_type->is_void()) {
2337       entry->return_reg = get_temp(sig->return_type);
2338    } else {
2339       entry->return_reg = undef_src;
2340    }
2341
2342    this->function_signatures.push_tail(entry);
2343    return entry;
2344 }
2345
2346 void
2347 glsl_to_tgsi_visitor::visit(ir_call *ir)
2348 {
2349    glsl_to_tgsi_instruction *call_inst;
2350    ir_function_signature *sig = ir->get_callee();
2351    function_entry *entry = get_function_signature(sig);
2352    int i;
2353
2354    /* Process in parameters. */
2355    exec_list_iterator sig_iter = sig->parameters.iterator();
2356    foreach_iter(exec_list_iterator, iter, *ir) {
2357       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2358       ir_variable *param = (ir_variable *)sig_iter.get();
2359
2360       if (param->mode == ir_var_in ||
2361           param->mode == ir_var_inout) {
2362          variable_storage *storage = find_variable_storage(param);
2363          assert(storage);
2364
2365          param_rval->accept(this);
2366          st_src_reg r = this->result;
2367
2368          st_dst_reg l;
2369          l.file = storage->file;
2370          l.index = storage->index;
2371          l.reladdr = NULL;
2372          l.writemask = WRITEMASK_XYZW;
2373          l.cond_mask = COND_TR;
2374
2375          for (i = 0; i < type_size(param->type); i++) {
2376             emit(ir, TGSI_OPCODE_MOV, l, r);
2377             l.index++;
2378             r.index++;
2379          }
2380       }
2381
2382       sig_iter.next();
2383    }
2384    assert(!sig_iter.has_next());
2385
2386    /* Emit call instruction */
2387    call_inst = emit(ir, TGSI_OPCODE_CAL);
2388    call_inst->function = entry;
2389
2390    /* Process out parameters. */
2391    sig_iter = sig->parameters.iterator();
2392    foreach_iter(exec_list_iterator, iter, *ir) {
2393       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2394       ir_variable *param = (ir_variable *)sig_iter.get();
2395
2396       if (param->mode == ir_var_out ||
2397           param->mode == ir_var_inout) {
2398          variable_storage *storage = find_variable_storage(param);
2399          assert(storage);
2400
2401          st_src_reg r;
2402          r.file = storage->file;
2403          r.index = storage->index;
2404          r.reladdr = NULL;
2405          r.swizzle = SWIZZLE_NOOP;
2406          r.negate = 0;
2407
2408          param_rval->accept(this);
2409          st_dst_reg l = st_dst_reg(this->result);
2410
2411          for (i = 0; i < type_size(param->type); i++) {
2412             emit(ir, TGSI_OPCODE_MOV, l, r);
2413             l.index++;
2414             r.index++;
2415          }
2416       }
2417
2418       sig_iter.next();
2419    }
2420    assert(!sig_iter.has_next());
2421
2422    /* Process return value. */
2423    this->result = entry->return_reg;
2424 }
2425
2426 void
2427 glsl_to_tgsi_visitor::visit(ir_texture *ir)
2428 {
2429    st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
2430    st_dst_reg result_dst, coord_dst;
2431    glsl_to_tgsi_instruction *inst = NULL;
2432    unsigned opcode = TGSI_OPCODE_NOP;
2433
2434    if (ir->coordinate) {
2435       ir->coordinate->accept(this);
2436
2437       /* Put our coords in a temp.  We'll need to modify them for shadow,
2438        * projection, or LOD, so the only case we'd use it as is is if
2439        * we're doing plain old texturing.  The optimization passes on
2440        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
2441        */
2442       coord = get_temp(glsl_type::vec4_type);
2443       coord_dst = st_dst_reg(coord);
2444       emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2445    }
2446
2447    if (ir->projector) {
2448       ir->projector->accept(this);
2449       projector = this->result;
2450    }
2451
2452    /* Storage for our result.  Ideally for an assignment we'd be using
2453     * the actual storage for the result here, instead.
2454     */
2455    result_src = get_temp(glsl_type::vec4_type);
2456    result_dst = st_dst_reg(result_src);
2457
2458    switch (ir->op) {
2459    case ir_tex:
2460       opcode = TGSI_OPCODE_TEX;
2461       break;
2462    case ir_txb:
2463       opcode = TGSI_OPCODE_TXB;
2464       ir->lod_info.bias->accept(this);
2465       lod_info = this->result;
2466       break;
2467    case ir_txl:
2468       opcode = TGSI_OPCODE_TXL;
2469       ir->lod_info.lod->accept(this);
2470       lod_info = this->result;
2471       break;
2472    case ir_txd:
2473       opcode = TGSI_OPCODE_TXD;
2474       ir->lod_info.grad.dPdx->accept(this);
2475       dx = this->result;
2476       ir->lod_info.grad.dPdy->accept(this);
2477       dy = this->result;
2478       break;
2479    case ir_txs:
2480       opcode = TGSI_OPCODE_TXQ;
2481       ir->lod_info.lod->accept(this);
2482       lod_info = this->result;
2483       break;
2484    case ir_txf:
2485       opcode = TGSI_OPCODE_TXF;
2486       ir->lod_info.lod->accept(this);
2487       lod_info = this->result;
2488       if (ir->offset) {
2489          ir->offset->accept(this);
2490          offset = this->result;
2491       }
2492       break;
2493    }
2494
2495    if (ir->projector) {
2496       if (opcode == TGSI_OPCODE_TEX) {
2497          /* Slot the projector in as the last component of the coord. */
2498          coord_dst.writemask = WRITEMASK_W;
2499          emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2500          coord_dst.writemask = WRITEMASK_XYZW;
2501          opcode = TGSI_OPCODE_TXP;
2502       } else {
2503          st_src_reg coord_w = coord;
2504          coord_w.swizzle = SWIZZLE_WWWW;
2505
2506          /* For the other TEX opcodes there's no projective version
2507           * since the last slot is taken up by LOD info.  Do the
2508           * projective divide now.
2509           */
2510          coord_dst.writemask = WRITEMASK_W;
2511          emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2512
2513          /* In the case where we have to project the coordinates "by hand,"
2514           * the shadow comparator value must also be projected.
2515           */
2516          st_src_reg tmp_src = coord;
2517          if (ir->shadow_comparitor) {
2518             /* Slot the shadow value in as the second to last component of the
2519              * coord.
2520              */
2521             ir->shadow_comparitor->accept(this);
2522
2523             tmp_src = get_temp(glsl_type::vec4_type);
2524             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2525
2526             tmp_dst.writemask = WRITEMASK_Z;
2527             emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2528
2529             tmp_dst.writemask = WRITEMASK_XY;
2530             emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2531          }
2532
2533          coord_dst.writemask = WRITEMASK_XYZ;
2534          emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2535
2536          coord_dst.writemask = WRITEMASK_XYZW;
2537          coord.swizzle = SWIZZLE_XYZW;
2538       }
2539    }
2540
2541    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2542     * comparator was put in the correct place (and projected) by the code,
2543     * above, that handles by-hand projection.
2544     */
2545    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2546       /* Slot the shadow value in as the second to last component of the
2547        * coord.
2548        */
2549       ir->shadow_comparitor->accept(this);
2550       coord_dst.writemask = WRITEMASK_Z;
2551       emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2552       coord_dst.writemask = WRITEMASK_XYZW;
2553    }
2554
2555    if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
2556        opcode == TGSI_OPCODE_TXF) {
2557       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2558       coord_dst.writemask = WRITEMASK_W;
2559       emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2560       coord_dst.writemask = WRITEMASK_XYZW;
2561    }
2562
2563    if (opcode == TGSI_OPCODE_TXD)
2564       inst = emit(ir, opcode, result_dst, coord, dx, dy);
2565    else if (opcode == TGSI_OPCODE_TXQ)
2566       inst = emit(ir, opcode, result_dst, lod_info);
2567    else if (opcode == TGSI_OPCODE_TXF) {
2568       inst = emit(ir, opcode, result_dst, coord);
2569    } else
2570       inst = emit(ir, opcode, result_dst, coord);
2571
2572    if (ir->shadow_comparitor)
2573       inst->tex_shadow = GL_TRUE;
2574
2575    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2576                                                    this->shader_program,
2577                                                    this->prog);
2578
2579    if (ir->offset) {
2580        inst->tex_offset_num_offset = 1;
2581        inst->tex_offsets[0].Index = offset.index;
2582        inst->tex_offsets[0].File = offset.file;
2583        inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
2584        inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
2585        inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
2586    }
2587
2588    const glsl_type *sampler_type = ir->sampler->type;
2589
2590    switch (sampler_type->sampler_dimensionality) {
2591    case GLSL_SAMPLER_DIM_1D:
2592       inst->tex_target = (sampler_type->sampler_array)
2593          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2594       break;
2595    case GLSL_SAMPLER_DIM_2D:
2596       inst->tex_target = (sampler_type->sampler_array)
2597          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2598       break;
2599    case GLSL_SAMPLER_DIM_3D:
2600       inst->tex_target = TEXTURE_3D_INDEX;
2601       break;
2602    case GLSL_SAMPLER_DIM_CUBE:
2603       inst->tex_target = TEXTURE_CUBE_INDEX;
2604       break;
2605    case GLSL_SAMPLER_DIM_RECT:
2606       inst->tex_target = TEXTURE_RECT_INDEX;
2607       break;
2608    case GLSL_SAMPLER_DIM_BUF:
2609       assert(!"FINISHME: Implement ARB_texture_buffer_object");
2610       break;
2611    default:
2612       assert(!"Should not get here.");
2613    }
2614
2615    this->result = result_src;
2616 }
2617
2618 void
2619 glsl_to_tgsi_visitor::visit(ir_return *ir)
2620 {
2621    if (ir->get_value()) {
2622       st_dst_reg l;
2623       int i;
2624
2625       assert(current_function);
2626
2627       ir->get_value()->accept(this);
2628       st_src_reg r = this->result;
2629
2630       l = st_dst_reg(current_function->return_reg);
2631
2632       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2633          emit(ir, TGSI_OPCODE_MOV, l, r);
2634          l.index++;
2635          r.index++;
2636       }
2637    }
2638
2639    emit(ir, TGSI_OPCODE_RET);
2640 }
2641
2642 void
2643 glsl_to_tgsi_visitor::visit(ir_discard *ir)
2644 {
2645    struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2646
2647    if (ir->condition) {
2648       ir->condition->accept(this);
2649       this->result.negate = ~this->result.negate;
2650       emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2651    } else {
2652       emit(ir, TGSI_OPCODE_KILP);
2653    }
2654
2655    fp->UsesKill = GL_TRUE;
2656 }
2657
2658 void
2659 glsl_to_tgsi_visitor::visit(ir_if *ir)
2660 {
2661    glsl_to_tgsi_instruction *cond_inst, *if_inst;
2662    glsl_to_tgsi_instruction *prev_inst;
2663
2664    prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2665
2666    ir->condition->accept(this);
2667    assert(this->result.file != PROGRAM_UNDEFINED);
2668
2669    if (this->options->EmitCondCodes) {
2670       cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2671
2672       /* See if we actually generated any instruction for generating
2673        * the condition.  If not, then cook up a move to a temp so we
2674        * have something to set cond_update on.
2675        */
2676       if (cond_inst == prev_inst) {
2677          st_src_reg temp = get_temp(glsl_type::bool_type);
2678          cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2679       }
2680       cond_inst->cond_update = GL_TRUE;
2681
2682       if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2683       if_inst->dst.cond_mask = COND_NE;
2684    } else {
2685       if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2686    }
2687
2688    this->instructions.push_tail(if_inst);
2689
2690    visit_exec_list(&ir->then_instructions, this);
2691
2692    if (!ir->else_instructions.is_empty()) {
2693       emit(ir->condition, TGSI_OPCODE_ELSE);
2694       visit_exec_list(&ir->else_instructions, this);
2695    }
2696
2697    if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2698 }
2699
2700 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2701 {
2702    result.file = PROGRAM_UNDEFINED;
2703    next_temp = 1;
2704    next_signature_id = 1;
2705    num_immediates = 0;
2706    current_function = NULL;
2707    num_address_regs = 0;
2708    indirect_addr_temps = false;
2709    indirect_addr_consts = false;
2710    mem_ctx = ralloc_context(NULL);
2711 }
2712
2713 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2714 {
2715    ralloc_free(mem_ctx);
2716 }
2717
2718 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2719 {
2720    delete v;
2721 }
2722
2723
2724 /**
2725  * Count resources used by the given gpu program (number of texture
2726  * samplers, etc).
2727  */
2728 static void
2729 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2730 {
2731    v->samplers_used = 0;
2732
2733    foreach_iter(exec_list_iterator, iter, v->instructions) {
2734       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2735
2736       if (is_tex_instruction(inst->op)) {
2737          v->samplers_used |= 1 << inst->sampler;
2738
2739          prog->SamplerTargets[inst->sampler] =
2740             (gl_texture_index)inst->tex_target;
2741          if (inst->tex_shadow) {
2742             prog->ShadowSamplers |= 1 << inst->sampler;
2743          }
2744       }
2745    }
2746
2747    prog->SamplersUsed = v->samplers_used;
2748    _mesa_update_shader_textures_used(prog);
2749 }
2750
2751
2752 /**
2753  * Check if the given vertex/fragment/shader program is within the
2754  * resource limits of the context (number of texture units, etc).
2755  * If any of those checks fail, record a linker error.
2756  *
2757  * XXX more checks are needed...
2758  */
2759 static void
2760 check_resources(const struct gl_context *ctx,
2761                 struct gl_shader_program *shader_program,
2762                 glsl_to_tgsi_visitor *prog,
2763                 struct gl_program *proginfo)
2764 {
2765    switch (proginfo->Target) {
2766    case GL_VERTEX_PROGRAM_ARB:
2767       if (_mesa_bitcount(prog->samplers_used) >
2768           ctx->Const.MaxVertexTextureImageUnits) {
2769          fail_link(shader_program, "Too many vertex shader texture samplers");
2770       }
2771       if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2772          fail_link(shader_program, "Too many vertex shader constants");
2773       }
2774       break;
2775    case MESA_GEOMETRY_PROGRAM:
2776       if (_mesa_bitcount(prog->samplers_used) >
2777           ctx->Const.MaxGeometryTextureImageUnits) {
2778          fail_link(shader_program, "Too many geometry shader texture samplers");
2779       }
2780       if (proginfo->Parameters->NumParameters >
2781           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2782          fail_link(shader_program, "Too many geometry shader constants");
2783       }
2784       break;
2785    case GL_FRAGMENT_PROGRAM_ARB:
2786       if (_mesa_bitcount(prog->samplers_used) >
2787           ctx->Const.MaxTextureImageUnits) {
2788          fail_link(shader_program, "Too many fragment shader texture samplers");
2789       }
2790       if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2791          fail_link(shader_program, "Too many fragment shader constants");
2792       }
2793       break;
2794    default:
2795       _mesa_problem(ctx, "unexpected program type in check_resources()");
2796    }
2797 }
2798
2799
2800
2801 struct uniform_sort {
2802    struct gl_uniform *u;
2803    int pos;
2804 };
2805
2806 /* The shader_program->Uniforms list is almost sorted in increasing
2807  * uniform->{Frag,Vert}Pos locations, but not quite when there are
2808  * uniforms shared between targets.  We need to add parameters in
2809  * increasing order for the targets.
2810  */
2811 static int
2812 sort_uniforms(const void *a, const void *b)
2813 {
2814    struct uniform_sort *u1 = (struct uniform_sort *)a;
2815    struct uniform_sort *u2 = (struct uniform_sort *)b;
2816
2817    return u1->pos - u2->pos;
2818 }
2819
2820 /* Add the uniforms to the parameters.  The linker chose locations
2821  * in our parameters lists (which weren't created yet), which the
2822  * uniforms code will use to poke values into our parameters list
2823  * when uniforms are updated.
2824  */
2825 static void
2826 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2827                                 struct gl_shader *shader,
2828                                 struct gl_program *prog)
2829 {
2830    unsigned int i;
2831    unsigned int next_sampler = 0, num_uniforms = 0;
2832    struct uniform_sort *sorted_uniforms;
2833
2834    sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2835                                   shader_program->Uniforms->NumUniforms);
2836
2837    for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2838       struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2839       int parameter_index = -1;
2840
2841       switch (shader->Type) {
2842       case GL_VERTEX_SHADER:
2843          parameter_index = uniform->VertPos;
2844          break;
2845       case GL_FRAGMENT_SHADER:
2846          parameter_index = uniform->FragPos;
2847          break;
2848       case GL_GEOMETRY_SHADER:
2849          parameter_index = uniform->GeomPos;
2850          break;
2851       }
2852
2853       /* Only add uniforms used in our target. */
2854       if (parameter_index != -1) {
2855          sorted_uniforms[num_uniforms].pos = parameter_index;
2856          sorted_uniforms[num_uniforms].u = uniform;
2857          num_uniforms++;
2858       }
2859    }
2860
2861    qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2862          sort_uniforms);
2863
2864    for (i = 0; i < num_uniforms; i++) {
2865       struct gl_uniform *uniform = sorted_uniforms[i].u;
2866       int parameter_index = sorted_uniforms[i].pos;
2867       const glsl_type *type = uniform->Type;
2868       unsigned int size;
2869
2870       if (type->is_vector() ||
2871           type->is_scalar()) {
2872          size = type->vector_elements;
2873       } else {
2874          size = type_size(type) * 4;
2875       }
2876
2877       gl_register_file file;
2878       if (type->is_sampler() ||
2879           (type->is_array() && type->fields.array->is_sampler())) {
2880          file = PROGRAM_SAMPLER;
2881       } else {
2882          file = PROGRAM_UNIFORM;
2883       }
2884
2885       GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2886                                                  uniform->Name);
2887
2888       if (index < 0) {
2889          index = _mesa_add_parameter(prog->Parameters, file,
2890                                      uniform->Name, size, type->gl_type,
2891                                      NULL, NULL, 0x0);
2892
2893          /* Sampler uniform values are stored in prog->SamplerUnits,
2894           * and the entry in that array is selected by this index we
2895           * store in ParameterValues[].
2896           */
2897          if (file == PROGRAM_SAMPLER) {
2898             for (unsigned int j = 0; j < size / 4; j++)
2899                prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
2900          }
2901
2902          /* The location chosen in the Parameters list here (returned
2903           * from _mesa_add_uniform) has to match what the linker chose.
2904           */
2905          if (index != parameter_index) {
2906             fail_link(shader_program, "Allocation of uniform `%s' to target "
2907                       "failed (%d vs %d)\n",
2908                       uniform->Name, index, parameter_index);
2909          }
2910       }
2911    }
2912
2913    ralloc_free(sorted_uniforms);
2914 }
2915
2916 static void
2917 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2918                         struct gl_shader_program *shader_program,
2919                         const char *name, const glsl_type *type,
2920                         ir_constant *val)
2921 {
2922    if (type->is_record()) {
2923       ir_constant *field_constant;
2924
2925       field_constant = (ir_constant *)val->components.get_head();
2926
2927       for (unsigned int i = 0; i < type->length; i++) {
2928          const glsl_type *field_type = type->fields.structure[i].type;
2929          const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2930                                             type->fields.structure[i].name);
2931          set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2932                                  field_type, field_constant);
2933          field_constant = (ir_constant *)field_constant->next;
2934       }
2935       return;
2936    }
2937
2938    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2939
2940    if (loc == -1) {
2941       fail_link(shader_program,
2942                 "Couldn't find uniform for initializer %s\n", name);
2943       return;
2944    }
2945
2946    for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2947       ir_constant *element;
2948       const glsl_type *element_type;
2949       if (type->is_array()) {
2950          element = val->array_elements[i];
2951          element_type = type->fields.array;
2952       } else {
2953          element = val;
2954          element_type = type;
2955       }
2956
2957       void *values;
2958
2959       if (element_type->base_type == GLSL_TYPE_BOOL) {
2960          int *conv = ralloc_array(mem_ctx, int, element_type->components());
2961          for (unsigned int j = 0; j < element_type->components(); j++) {
2962             conv[j] = element->value.b[j];
2963          }
2964          values = (void *)conv;
2965          element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2966                                                 element_type->vector_elements,
2967                                                 1);
2968       } else {
2969          values = &element->value;
2970       }
2971
2972       if (element_type->is_matrix()) {
2973          _mesa_uniform_matrix(ctx, shader_program,
2974                               element_type->matrix_columns,
2975                               element_type->vector_elements,
2976                               loc, 1, GL_FALSE, (GLfloat *)values);
2977          loc += element_type->matrix_columns;
2978       } else {
2979          _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2980                        values, element_type->gl_type);
2981          loc += type_size(element_type);
2982       }
2983    }
2984 }
2985
2986 /*
2987  * Scan/rewrite program to remove reads of custom (output) registers.
2988  * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
2989  * (for vertex shaders).
2990  * In GLSL shaders, varying vars can be read and written.
2991  * On some hardware, trying to read an output register causes trouble.
2992  * So, rewrite the program to use a temporary register in this case.
2993  *
2994  * Based on _mesa_remove_output_reads from programopt.c.
2995  */
2996 void
2997 glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
2998 {
2999    GLuint i;
3000    GLint outputMap[VERT_RESULT_MAX];
3001    GLint outputTypes[VERT_RESULT_MAX];
3002    GLuint numVaryingReads = 0;
3003    GLboolean usedTemps[MAX_TEMPS];
3004    GLuint firstTemp = 0;
3005
3006    _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
3007                              usedTemps, MAX_TEMPS);
3008
3009    assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
3010    assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
3011
3012    for (i = 0; i < VERT_RESULT_MAX; i++)
3013       outputMap[i] = -1;
3014
3015    /* look for instructions which read from varying vars */
3016    foreach_iter(exec_list_iterator, iter, this->instructions) {
3017       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3018       const GLuint numSrc = num_inst_src_regs(inst->op);
3019       GLuint j;
3020       for (j = 0; j < numSrc; j++) {
3021          if (inst->src[j].file == type) {
3022             /* replace the read with a temp reg */
3023             const GLuint var = inst->src[j].index;
3024             if (outputMap[var] == -1) {
3025                numVaryingReads++;
3026                outputMap[var] = _mesa_find_free_register(usedTemps,
3027                                                          MAX_TEMPS,
3028                                                          firstTemp);
3029                outputTypes[var] = inst->src[j].type;
3030                firstTemp = outputMap[var] + 1;
3031             }
3032             inst->src[j].file = PROGRAM_TEMPORARY;
3033             inst->src[j].index = outputMap[var];
3034          }
3035       }
3036    }
3037
3038    if (numVaryingReads == 0)
3039       return; /* nothing to be done */
3040
3041    /* look for instructions which write to the varying vars identified above */
3042    foreach_iter(exec_list_iterator, iter, this->instructions) {
3043       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3044       if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
3045          /* change inst to write to the temp reg, instead of the varying */
3046          inst->dst.file = PROGRAM_TEMPORARY;
3047          inst->dst.index = outputMap[inst->dst.index];
3048       }
3049    }
3050
3051    /* insert new MOV instructions at the end */
3052    for (i = 0; i < VERT_RESULT_MAX; i++) {
3053       if (outputMap[i] >= 0) {
3054          /* MOV VAR[i], TEMP[tmp]; */
3055          st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
3056          st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
3057          dst.index = i;
3058          this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
3059       }
3060    }
3061 }
3062
3063 /**
3064  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
3065  * are read from the given src in this instruction
3066  */
3067 static int
3068 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
3069 {
3070    int read_mask = 0, comp;
3071
3072    /* Now, given the src swizzle and the written channels, find which
3073     * components are actually read
3074     */
3075    for (comp = 0; comp < 4; ++comp) {
3076       const unsigned coord = GET_SWZ(src.swizzle, comp);
3077       ASSERT(coord < 4);
3078       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
3079          read_mask |= 1 << coord;
3080    }
3081
3082    return read_mask;
3083 }
3084
3085 /**
3086  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
3087  * instruction is the first instruction to write to register T0.  There are
3088  * several lowering passes done in GLSL IR (e.g. branches and
3089  * relative addressing) that create a large number of conditional assignments
3090  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
3091  *
3092  * Here is why this conversion is safe:
3093  * CMP T0, T1 T2 T0 can be expanded to:
3094  * if (T1 < 0.0)
3095  *      MOV T0, T2;
3096  * else
3097  *      MOV T0, T0;
3098  *
3099  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
3100  * as the original program.  If (T1 < 0.0) evaluates to false, executing
3101  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
3102  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
3103  * because any instruction that was going to read from T0 after this was going
3104  * to read a garbage value anyway.
3105  */
3106 void
3107 glsl_to_tgsi_visitor::simplify_cmp(void)
3108 {
3109    unsigned tempWrites[MAX_TEMPS];
3110    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
3111
3112    memset(tempWrites, 0, sizeof(tempWrites));
3113    memset(outputWrites, 0, sizeof(outputWrites));
3114
3115    foreach_iter(exec_list_iterator, iter, this->instructions) {
3116       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3117       unsigned prevWriteMask = 0;
3118
3119       /* Give up if we encounter relative addressing or flow control. */
3120       if (inst->dst.reladdr ||
3121           tgsi_get_opcode_info(inst->op)->is_branch ||
3122           inst->op == TGSI_OPCODE_BGNSUB ||
3123           inst->op == TGSI_OPCODE_CONT ||
3124           inst->op == TGSI_OPCODE_END ||
3125           inst->op == TGSI_OPCODE_ENDSUB ||
3126           inst->op == TGSI_OPCODE_RET) {
3127          return;
3128       }
3129
3130       if (inst->dst.file == PROGRAM_OUTPUT) {
3131          assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
3132          prevWriteMask = outputWrites[inst->dst.index];
3133          outputWrites[inst->dst.index] |= inst->dst.writemask;
3134       } else if (inst->dst.file == PROGRAM_TEMPORARY) {
3135          assert(inst->dst.index < MAX_TEMPS);
3136          prevWriteMask = tempWrites[inst->dst.index];
3137          tempWrites[inst->dst.index] |= inst->dst.writemask;
3138       }
3139
3140       /* For a CMP to be considered a conditional write, the destination
3141        * register and source register two must be the same. */
3142       if (inst->op == TGSI_OPCODE_CMP
3143           && !(inst->dst.writemask & prevWriteMask)
3144           && inst->src[2].file == inst->dst.file
3145           && inst->src[2].index == inst->dst.index
3146           && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
3147
3148          inst->op = TGSI_OPCODE_MOV;
3149          inst->src[0] = inst->src[1];
3150       }
3151    }
3152 }
3153
3154 /* Replaces all references to a temporary register index with another index. */
3155 void
3156 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3157 {
3158    foreach_iter(exec_list_iterator, iter, this->instructions) {
3159       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3160       unsigned j;
3161
3162       for (j=0; j < num_inst_src_regs(inst->op); j++) {
3163          if (inst->src[j].file == PROGRAM_TEMPORARY &&
3164              inst->src[j].index == index) {
3165             inst->src[j].index = new_index;
3166          }
3167       }
3168
3169       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3170          inst->dst.index = new_index;
3171       }
3172    }
3173 }
3174
3175 int
3176 glsl_to_tgsi_visitor::get_first_temp_read(int index)
3177 {
3178    int depth = 0; /* loop depth */
3179    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3180    unsigned i = 0, j;
3181
3182    foreach_iter(exec_list_iterator, iter, this->instructions) {
3183       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3184
3185       for (j=0; j < num_inst_src_regs(inst->op); j++) {
3186          if (inst->src[j].file == PROGRAM_TEMPORARY &&
3187              inst->src[j].index == index) {
3188             return (depth == 0) ? i : loop_start;
3189          }
3190       }
3191
3192       if (inst->op == TGSI_OPCODE_BGNLOOP) {
3193          if(depth++ == 0)
3194             loop_start = i;
3195       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3196          if (--depth == 0)
3197             loop_start = -1;
3198       }
3199       assert(depth >= 0);
3200
3201       i++;
3202    }
3203
3204    return -1;
3205 }
3206
3207 int
3208 glsl_to_tgsi_visitor::get_first_temp_write(int index)
3209 {
3210    int depth = 0; /* loop depth */
3211    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3212    int i = 0;
3213
3214    foreach_iter(exec_list_iterator, iter, this->instructions) {
3215       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3216
3217       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3218          return (depth == 0) ? i : loop_start;
3219       }
3220
3221       if (inst->op == TGSI_OPCODE_BGNLOOP) {
3222          if(depth++ == 0)
3223             loop_start = i;
3224       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3225          if (--depth == 0)
3226             loop_start = -1;
3227       }
3228       assert(depth >= 0);
3229
3230       i++;
3231    }
3232
3233    return -1;
3234 }
3235
3236 int
3237 glsl_to_tgsi_visitor::get_last_temp_read(int index)
3238 {
3239    int depth = 0; /* loop depth */
3240    int last = -1; /* index of last instruction that reads the temporary */
3241    unsigned i = 0, j;
3242
3243    foreach_iter(exec_list_iterator, iter, this->instructions) {
3244       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3245
3246       for (j=0; j < num_inst_src_regs(inst->op); j++) {
3247          if (inst->src[j].file == PROGRAM_TEMPORARY &&
3248              inst->src[j].index == index) {
3249             last = (depth == 0) ? i : -2;
3250          }
3251       }
3252
3253       if (inst->op == TGSI_OPCODE_BGNLOOP)
3254          depth++;
3255       else if (inst->op == TGSI_OPCODE_ENDLOOP)
3256          if (--depth == 0 && last == -2)
3257             last = i;
3258       assert(depth >= 0);
3259
3260       i++;
3261    }
3262
3263    assert(last >= -1);
3264    return last;
3265 }
3266
3267 int
3268 glsl_to_tgsi_visitor::get_last_temp_write(int index)
3269 {
3270    int depth = 0; /* loop depth */
3271    int last = -1; /* index of last instruction that writes to the temporary */
3272    int i = 0;
3273
3274    foreach_iter(exec_list_iterator, iter, this->instructions) {
3275       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3276
3277       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3278          last = (depth == 0) ? i : -2;
3279
3280       if (inst->op == TGSI_OPCODE_BGNLOOP)
3281          depth++;
3282       else if (inst->op == TGSI_OPCODE_ENDLOOP)
3283          if (--depth == 0 && last == -2)
3284             last = i;
3285       assert(depth >= 0);
3286
3287       i++;
3288    }
3289
3290    assert(last >= -1);
3291    return last;
3292 }
3293
3294 /*
3295  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3296  * channels for copy propagation and updates following instructions to
3297  * use the original versions.
3298  *
3299  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3300  * will occur.  As an example, a TXP production before this pass:
3301  *
3302  * 0: MOV TEMP[1], INPUT[4].xyyy;
3303  * 1: MOV TEMP[1].w, INPUT[4].wwww;
3304  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3305  *
3306  * and after:
3307  *
3308  * 0: MOV TEMP[1], INPUT[4].xyyy;
3309  * 1: MOV TEMP[1].w, INPUT[4].wwww;
3310  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3311  *
3312  * which allows for dead code elimination on TEMP[1]'s writes.
3313  */
3314 void
3315 glsl_to_tgsi_visitor::copy_propagate(void)
3316 {
3317    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3318                                                     glsl_to_tgsi_instruction *,
3319                                                     this->next_temp * 4);
3320    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3321    int level = 0;
3322
3323    foreach_iter(exec_list_iterator, iter, this->instructions) {
3324       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3325
3326       assert(inst->dst.file != PROGRAM_TEMPORARY
3327              || inst->dst.index < this->next_temp);
3328
3329       /* First, do any copy propagation possible into the src regs. */
3330       for (int r = 0; r < 3; r++) {
3331          glsl_to_tgsi_instruction *first = NULL;
3332          bool good = true;
3333          int acp_base = inst->src[r].index * 4;
3334
3335          if (inst->src[r].file != PROGRAM_TEMPORARY ||
3336              inst->src[r].reladdr)
3337             continue;
3338
3339          /* See if we can find entries in the ACP consisting of MOVs
3340           * from the same src register for all the swizzled channels
3341           * of this src register reference.
3342           */
3343          for (int i = 0; i < 4; i++) {
3344             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3345             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3346
3347             if (!copy_chan) {
3348                good = false;
3349                break;
3350             }
3351
3352             assert(acp_level[acp_base + src_chan] <= level);
3353
3354             if (!first) {
3355                first = copy_chan;
3356             } else {
3357                if (first->src[0].file != copy_chan->src[0].file ||
3358                    first->src[0].index != copy_chan->src[0].index) {
3359                   good = false;
3360                   break;
3361                }
3362             }
3363          }
3364
3365          if (good) {
3366             /* We've now validated that we can copy-propagate to
3367              * replace this src register reference.  Do it.
3368              */
3369             inst->src[r].file = first->src[0].file;
3370             inst->src[r].index = first->src[0].index;
3371
3372             int swizzle = 0;
3373             for (int i = 0; i < 4; i++) {
3374                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3375                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3376                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3377                            (3 * i));
3378             }
3379             inst->src[r].swizzle = swizzle;
3380          }
3381       }
3382
3383       switch (inst->op) {
3384       case TGSI_OPCODE_BGNLOOP:
3385       case TGSI_OPCODE_ENDLOOP:
3386          /* End of a basic block, clear the ACP entirely. */
3387          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3388          break;
3389
3390       case TGSI_OPCODE_IF:
3391          ++level;
3392          break;
3393
3394       case TGSI_OPCODE_ENDIF:
3395       case TGSI_OPCODE_ELSE:
3396          /* Clear all channels written inside the block from the ACP, but
3397           * leaving those that were not touched.
3398           */
3399          for (int r = 0; r < this->next_temp; r++) {
3400             for (int c = 0; c < 4; c++) {
3401                if (!acp[4 * r + c])
3402                   continue;
3403
3404                if (acp_level[4 * r + c] >= level)
3405                   acp[4 * r + c] = NULL;
3406             }
3407          }
3408          if (inst->op == TGSI_OPCODE_ENDIF)
3409             --level;
3410          break;
3411
3412       default:
3413          /* Continuing the block, clear any written channels from
3414           * the ACP.
3415           */
3416          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3417             /* Any temporary might be written, so no copy propagation
3418              * across this instruction.
3419              */
3420             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3421          } else if (inst->dst.file == PROGRAM_OUTPUT &&
3422                     inst->dst.reladdr) {
3423             /* Any output might be written, so no copy propagation
3424              * from outputs across this instruction.
3425              */
3426             for (int r = 0; r < this->next_temp; r++) {
3427                for (int c = 0; c < 4; c++) {
3428                   if (!acp[4 * r + c])
3429                      continue;
3430
3431                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3432                      acp[4 * r + c] = NULL;
3433                }
3434             }
3435          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3436                     inst->dst.file == PROGRAM_OUTPUT) {
3437             /* Clear where it's used as dst. */
3438             if (inst->dst.file == PROGRAM_TEMPORARY) {
3439                for (int c = 0; c < 4; c++) {
3440                   if (inst->dst.writemask & (1 << c)) {
3441                      acp[4 * inst->dst.index + c] = NULL;
3442                   }
3443                }
3444             }
3445
3446             /* Clear where it's used as src. */
3447             for (int r = 0; r < this->next_temp; r++) {
3448                for (int c = 0; c < 4; c++) {
3449                   if (!acp[4 * r + c])
3450                      continue;
3451
3452                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3453
3454                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3455                       acp[4 * r + c]->src[0].index == inst->dst.index &&
3456                       inst->dst.writemask & (1 << src_chan))
3457                   {
3458                      acp[4 * r + c] = NULL;
3459                   }
3460                }
3461             }
3462          }
3463          break;
3464       }
3465
3466       /* If this is a copy, add it to the ACP. */
3467       if (inst->op == TGSI_OPCODE_MOV &&
3468           inst->dst.file == PROGRAM_TEMPORARY &&
3469           !inst->dst.reladdr &&
3470           !inst->saturate &&
3471           !inst->src[0].reladdr &&
3472           !inst->src[0].negate) {
3473          for (int i = 0; i < 4; i++) {
3474             if (inst->dst.writemask & (1 << i)) {
3475                acp[4 * inst->dst.index + i] = inst;
3476                acp_level[4 * inst->dst.index + i] = level;
3477             }
3478          }
3479       }
3480    }
3481
3482    ralloc_free(acp_level);
3483    ralloc_free(acp);
3484 }
3485
3486 /*
3487  * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3488  *
3489  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3490  * will occur.  As an example, a TXP production after copy propagation but
3491  * before this pass:
3492  *
3493  * 0: MOV TEMP[1], INPUT[4].xyyy;
3494  * 1: MOV TEMP[1].w, INPUT[4].wwww;
3495  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3496  *
3497  * and after this pass:
3498  *
3499  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3500  *
3501  * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3502  * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3503  */
3504 void
3505 glsl_to_tgsi_visitor::eliminate_dead_code(void)
3506 {
3507    int i;
3508
3509    for (i=0; i < this->next_temp; i++) {
3510       int last_read = get_last_temp_read(i);
3511       int j = 0;
3512
3513       foreach_iter(exec_list_iterator, iter, this->instructions) {
3514          glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3515
3516          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3517              j > last_read)
3518          {
3519             iter.remove();
3520             delete inst;
3521          }
3522
3523          j++;
3524       }
3525    }
3526 }
3527
3528 /*
3529  * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3530  * code elimination.  This is less primitive than eliminate_dead_code(), as it
3531  * is per-channel and can detect consecutive writes without a read between them
3532  * as dead code.  However, there is some dead code that can be eliminated by
3533  * eliminate_dead_code() but not this function - for example, this function
3534  * cannot eliminate an instruction writing to a register that is never read and
3535  * is the only instruction writing to that register.
3536  *
3537  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3538  * will occur.
3539  */
3540 int
3541 glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3542 {
3543    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3544                                                      glsl_to_tgsi_instruction *,
3545                                                      this->next_temp * 4);
3546    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3547    int level = 0;
3548    int removed = 0;
3549
3550    foreach_iter(exec_list_iterator, iter, this->instructions) {
3551       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3552
3553       assert(inst->dst.file != PROGRAM_TEMPORARY
3554              || inst->dst.index < this->next_temp);
3555
3556       switch (inst->op) {
3557       case TGSI_OPCODE_BGNLOOP:
3558       case TGSI_OPCODE_ENDLOOP:
3559          /* End of a basic block, clear the write array entirely.
3560           * FIXME: This keeps us from killing dead code when the writes are
3561           * on either side of a loop, even when the register isn't touched
3562           * inside the loop.
3563           */
3564          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3565          break;
3566
3567       case TGSI_OPCODE_ENDIF:
3568          --level;
3569          break;
3570
3571       case TGSI_OPCODE_ELSE:
3572          /* Clear all channels written inside the preceding if block from the
3573           * write array, but leave those that were not touched.
3574           *
3575           * FIXME: This destroys opportunities to remove dead code inside of
3576           * IF blocks that are followed by an ELSE block.
3577           */
3578          for (int r = 0; r < this->next_temp; r++) {
3579             for (int c = 0; c < 4; c++) {
3580                if (!writes[4 * r + c])
3581                          continue;
3582
3583                if (write_level[4 * r + c] >= level)
3584                          writes[4 * r + c] = NULL;
3585             }
3586          }
3587          break;
3588
3589       case TGSI_OPCODE_IF:
3590          ++level;
3591          /* fallthrough to default case to mark the condition as read */
3592
3593       default:
3594          /* Continuing the block, clear any channels from the write array that
3595           * are read by this instruction.
3596           */
3597          for (unsigned i = 0; i < Elements(inst->src); i++) {
3598             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3599                /* Any temporary might be read, so no dead code elimination
3600                 * across this instruction.
3601                 */
3602                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3603             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3604                /* Clear where it's used as src. */
3605                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3606                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3607                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3608                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3609
3610                for (int c = 0; c < 4; c++) {
3611                    if (src_chans & (1 << c)) {
3612                       writes[4 * inst->src[i].index + c] = NULL;
3613                    }
3614                }
3615             }
3616          }
3617          break;
3618       }
3619
3620       /* If this instruction writes to a temporary, add it to the write array.
3621        * If there is already an instruction in the write array for one or more
3622        * of the channels, flag that channel write as dead.
3623        */
3624       if (inst->dst.file == PROGRAM_TEMPORARY &&
3625           !inst->dst.reladdr &&
3626           !inst->saturate) {
3627          for (int c = 0; c < 4; c++) {
3628             if (inst->dst.writemask & (1 << c)) {
3629                if (writes[4 * inst->dst.index + c]) {
3630                   if (write_level[4 * inst->dst.index + c] < level)
3631                      continue;
3632                   else
3633                      writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3634                }
3635                writes[4 * inst->dst.index + c] = inst;
3636                write_level[4 * inst->dst.index + c] = level;
3637             }
3638          }
3639       }
3640    }
3641
3642    /* Anything still in the write array at this point is dead code. */
3643    for (int r = 0; r < this->next_temp; r++) {
3644       for (int c = 0; c < 4; c++) {
3645          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3646          if (inst)
3647             inst->dead_mask |= (1 << c);
3648       }
3649    }
3650
3651    /* Now actually remove the instructions that are completely dead and update
3652     * the writemask of other instructions with dead channels.
3653     */
3654    foreach_iter(exec_list_iterator, iter, this->instructions) {
3655       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3656
3657       if (!inst->dead_mask || !inst->dst.writemask)
3658          continue;
3659       else if (inst->dead_mask == inst->dst.writemask) {
3660          iter.remove();
3661          delete inst;
3662          removed++;
3663       } else
3664          inst->dst.writemask &= ~(inst->dead_mask);
3665    }
3666
3667    ralloc_free(write_level);
3668    ralloc_free(writes);
3669
3670    return removed;
3671 }
3672
3673 /* Merges temporary registers together where possible to reduce the number of
3674  * registers needed to run a program.
3675  *
3676  * Produces optimal code only after copy propagation and dead code elimination
3677  * have been run. */
3678 void
3679 glsl_to_tgsi_visitor::merge_registers(void)
3680 {
3681    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3682    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3683    int i, j;
3684
3685    /* Read the indices of the last read and first write to each temp register
3686     * into an array so that we don't have to traverse the instruction list as
3687     * much. */
3688    for (i=0; i < this->next_temp; i++) {
3689       last_reads[i] = get_last_temp_read(i);
3690       first_writes[i] = get_first_temp_write(i);
3691    }
3692
3693    /* Start looking for registers with non-overlapping usages that can be
3694     * merged together. */
3695    for (i=0; i < this->next_temp; i++) {
3696       /* Don't touch unused registers. */
3697       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3698
3699       for (j=0; j < this->next_temp; j++) {
3700          /* Don't touch unused registers. */
3701          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3702
3703          /* We can merge the two registers if the first write to j is after or
3704           * in the same instruction as the last read from i.  Note that the
3705           * register at index i will always be used earlier or at the same time
3706           * as the register at index j. */
3707          if (first_writes[i] <= first_writes[j] &&
3708              last_reads[i] <= first_writes[j])
3709          {
3710             rename_temp_register(j, i); /* Replace all references to j with i.*/
3711
3712             /* Update the first_writes and last_reads arrays with the new
3713              * values for the merged register index, and mark the newly unused
3714              * register index as such. */
3715             last_reads[i] = last_reads[j];
3716             first_writes[j] = -1;
3717             last_reads[j] = -1;
3718          }
3719       }
3720    }
3721
3722    ralloc_free(last_reads);
3723    ralloc_free(first_writes);
3724 }
3725
3726 /* Reassign indices to temporary registers by reusing unused indices created
3727  * by optimization passes. */
3728 void
3729 glsl_to_tgsi_visitor::renumber_registers(void)
3730 {
3731    int i = 0;
3732    int new_index = 0;
3733
3734    for (i=0; i < this->next_temp; i++) {
3735       if (get_first_temp_read(i) < 0) continue;
3736       if (i != new_index)
3737          rename_temp_register(i, new_index);
3738       new_index++;
3739    }
3740
3741    this->next_temp = new_index;
3742 }
3743
3744 /**
3745  * Returns a fragment program which implements the current pixel transfer ops.
3746  * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3747  */
3748 extern "C" void
3749 get_pixel_transfer_visitor(struct st_fragment_program *fp,
3750                            glsl_to_tgsi_visitor *original,
3751                            int scale_and_bias, int pixel_maps)
3752 {
3753    glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3754    struct st_context *st = st_context(original->ctx);
3755    struct gl_program *prog = &fp->Base.Base;
3756    struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3757    st_src_reg coord, src0;
3758    st_dst_reg dst0;
3759    glsl_to_tgsi_instruction *inst;
3760
3761    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3762    v->ctx = original->ctx;
3763    v->prog = prog;
3764    v->glsl_version = original->glsl_version;
3765    v->native_integers = original->native_integers;
3766    v->options = original->options;
3767    v->next_temp = original->next_temp;
3768    v->num_address_regs = original->num_address_regs;
3769    v->samplers_used = prog->SamplersUsed = original->samplers_used;
3770    v->indirect_addr_temps = original->indirect_addr_temps;
3771    v->indirect_addr_consts = original->indirect_addr_consts;
3772    memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3773
3774    /*
3775     * Get initial pixel color from the texture.
3776     * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3777     */
3778    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3779    src0 = v->get_temp(glsl_type::vec4_type);
3780    dst0 = st_dst_reg(src0);
3781    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3782    inst->sampler = 0;
3783    inst->tex_target = TEXTURE_2D_INDEX;
3784
3785    prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3786    prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3787    v->samplers_used |= (1 << 0);
3788
3789    if (scale_and_bias) {
3790       static const gl_state_index scale_state[STATE_LENGTH] =
3791          { STATE_INTERNAL, STATE_PT_SCALE,
3792            (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3793       static const gl_state_index bias_state[STATE_LENGTH] =
3794          { STATE_INTERNAL, STATE_PT_BIAS,
3795            (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3796       GLint scale_p, bias_p;
3797       st_src_reg scale, bias;
3798
3799       scale_p = _mesa_add_state_reference(params, scale_state);
3800       bias_p = _mesa_add_state_reference(params, bias_state);
3801
3802       /* MAD colorTemp, colorTemp, scale, bias; */
3803       scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3804       bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3805       inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3806    }
3807
3808    if (pixel_maps) {
3809       st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3810       st_dst_reg temp_dst = st_dst_reg(temp);
3811
3812       assert(st->pixel_xfer.pixelmap_texture);
3813
3814       /* With a little effort, we can do four pixel map look-ups with
3815        * two TEX instructions:
3816        */
3817
3818       /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3819       temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3820       inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3821       inst->sampler = 1;
3822       inst->tex_target = TEXTURE_2D_INDEX;
3823
3824       /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3825       src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3826       temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3827       inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3828       inst->sampler = 1;
3829       inst->tex_target = TEXTURE_2D_INDEX;
3830
3831       prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3832       v->samplers_used |= (1 << 1);
3833
3834       /* MOV colorTemp, temp; */
3835       inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3836    }
3837
3838    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3839     * new visitor. */
3840    foreach_iter(exec_list_iterator, iter, original->instructions) {
3841       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3842       st_src_reg src_regs[3];
3843
3844       if (inst->dst.file == PROGRAM_OUTPUT)
3845          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3846
3847       for (int i=0; i<3; i++) {
3848          src_regs[i] = inst->src[i];
3849          if (src_regs[i].file == PROGRAM_INPUT &&
3850              src_regs[i].index == FRAG_ATTRIB_COL0)
3851          {
3852             src_regs[i].file = PROGRAM_TEMPORARY;
3853             src_regs[i].index = src0.index;
3854          }
3855          else if (src_regs[i].file == PROGRAM_INPUT)
3856             prog->InputsRead |= (1 << src_regs[i].index);
3857       }
3858
3859       v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3860    }
3861
3862    /* Make modifications to fragment program info. */
3863    prog->Parameters = _mesa_combine_parameter_lists(params,
3864                                                     original->prog->Parameters);
3865    prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
3866    prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
3867    _mesa_free_parameter_list(params);
3868    count_resources(v, prog);
3869    fp->glsl_to_tgsi = v;
3870 }
3871
3872 /**
3873  * Make fragment program for glBitmap:
3874  *   Sample the texture and kill the fragment if the bit is 0.
3875  * This program will be combined with the user's fragment program.
3876  *
3877  * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3878  */
3879 extern "C" void
3880 get_bitmap_visitor(struct st_fragment_program *fp,
3881                    glsl_to_tgsi_visitor *original, int samplerIndex)
3882 {
3883    glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3884    struct st_context *st = st_context(original->ctx);
3885    struct gl_program *prog = &fp->Base.Base;
3886    st_src_reg coord, src0;
3887    st_dst_reg dst0;
3888    glsl_to_tgsi_instruction *inst;
3889
3890    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3891    v->ctx = original->ctx;
3892    v->prog = prog;
3893    v->glsl_version = original->glsl_version;
3894    v->native_integers = original->native_integers;
3895    v->options = original->options;
3896    v->next_temp = original->next_temp;
3897    v->num_address_regs = original->num_address_regs;
3898    v->samplers_used = prog->SamplersUsed = original->samplers_used;
3899    v->indirect_addr_temps = original->indirect_addr_temps;
3900    v->indirect_addr_consts = original->indirect_addr_consts;
3901    memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3902
3903    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
3904    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3905    src0 = v->get_temp(glsl_type::vec4_type);
3906    dst0 = st_dst_reg(src0);
3907    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3908    inst->sampler = samplerIndex;
3909    inst->tex_target = TEXTURE_2D_INDEX;
3910
3911    prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3912    prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
3913    v->samplers_used |= (1 << samplerIndex);
3914
3915    /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
3916    src0.negate = NEGATE_XYZW;
3917    if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
3918       src0.swizzle = SWIZZLE_XXXX;
3919    inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
3920
3921    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3922     * new visitor. */
3923    foreach_iter(exec_list_iterator, iter, original->instructions) {
3924       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3925       st_src_reg src_regs[3];
3926
3927       if (inst->dst.file == PROGRAM_OUTPUT)
3928          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3929
3930       for (int i=0; i<3; i++) {
3931          src_regs[i] = inst->src[i];
3932          if (src_regs[i].file == PROGRAM_INPUT)
3933             prog->InputsRead |= (1 << src_regs[i].index);
3934       }
3935
3936       v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3937    }
3938
3939    /* Make modifications to fragment program info. */
3940    prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
3941    prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
3942    prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
3943    count_resources(v, prog);
3944    fp->glsl_to_tgsi = v;
3945 }
3946
3947 /* ------------------------- TGSI conversion stuff -------------------------- */
3948 struct label {
3949    unsigned branch_target;
3950    unsigned token;
3951 };
3952
3953 /**
3954  * Intermediate state used during shader translation.
3955  */
3956 struct st_translate {
3957    struct ureg_program *ureg;
3958
3959    struct ureg_dst temps[MAX_TEMPS];
3960    struct ureg_src *constants;
3961    struct ureg_src *immediates;
3962    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3963    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3964    struct ureg_dst address[1];
3965    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3966    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3967
3968    /* Extra info for handling point size clamping in vertex shader */
3969    struct ureg_dst pointSizeResult; /**< Actual point size output register */
3970    struct ureg_src pointSizeConst;  /**< Point size range constant register */
3971    GLint pointSizeOutIndex;         /**< Temp point size output register */
3972    GLboolean prevInstWrotePointSize;
3973
3974    const GLuint *inputMapping;
3975    const GLuint *outputMapping;
3976
3977    /* For every instruction that contains a label (eg CALL), keep
3978     * details so that we can go back afterwards and emit the correct
3979     * tgsi instruction number for each label.
3980     */
3981    struct label *labels;
3982    unsigned labels_size;
3983    unsigned labels_count;
3984
3985    /* Keep a record of the tgsi instruction number that each mesa
3986     * instruction starts at, will be used to fix up labels after
3987     * translation.
3988     */
3989    unsigned *insn;
3990    unsigned insn_size;
3991    unsigned insn_count;
3992
3993    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3994
3995    boolean error;
3996 };
3997
3998 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3999 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
4000    TGSI_SEMANTIC_FACE,
4001    TGSI_SEMANTIC_INSTANCEID
4002 };
4003
4004 /**
4005  * Make note of a branch to a label in the TGSI code.
4006  * After we've emitted all instructions, we'll go over the list
4007  * of labels built here and patch the TGSI code with the actual
4008  * location of each label.
4009  */
4010 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
4011 {
4012    unsigned i;
4013
4014    if (t->labels_count + 1 >= t->labels_size) {
4015       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
4016       t->labels = (struct label *)realloc(t->labels,
4017                                           t->labels_size * sizeof(struct label));
4018       if (t->labels == NULL) {
4019          static unsigned dummy;
4020          t->error = TRUE;
4021          return &dummy;
4022       }
4023    }
4024
4025    i = t->labels_count++;
4026    t->labels[i].branch_target = branch_target;
4027    return &t->labels[i].token;
4028 }
4029
4030 /**
4031  * Called prior to emitting the TGSI code for each instruction.
4032  * Allocate additional space for instructions if needed.
4033  * Update the insn[] array so the next glsl_to_tgsi_instruction points to
4034  * the next TGSI instruction.
4035  */
4036 static void set_insn_start(struct st_translate *t, unsigned start)
4037 {
4038    if (t->insn_count + 1 >= t->insn_size) {
4039       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
4040       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
4041       if (t->insn == NULL) {
4042          t->error = TRUE;
4043          return;
4044       }
4045    }
4046
4047    t->insn[t->insn_count++] = start;
4048 }
4049
4050 /**
4051  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
4052  */
4053 static struct ureg_src
4054 emit_immediate(struct st_translate *t,
4055                gl_constant_value values[4],
4056                int type, int size)
4057 {
4058    struct ureg_program *ureg = t->ureg;
4059
4060    switch(type)
4061    {
4062    case GL_FLOAT:
4063       return ureg_DECL_immediate(ureg, &values[0].f, size);
4064    case GL_INT:
4065       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
4066    case GL_UNSIGNED_INT:
4067    case GL_BOOL:
4068       return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
4069    default:
4070       assert(!"should not get here - type must be float, int, uint, or bool");
4071       return ureg_src_undef();
4072    }
4073 }
4074
4075 /**
4076  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
4077  */
4078 static struct ureg_dst
4079 dst_register(struct st_translate *t,
4080              gl_register_file file,
4081              GLuint index)
4082 {
4083    switch(file) {
4084    case PROGRAM_UNDEFINED:
4085       return ureg_dst_undef();
4086
4087    case PROGRAM_TEMPORARY:
4088       if (ureg_dst_is_undef(t->temps[index]))
4089          t->temps[index] = ureg_DECL_temporary(t->ureg);
4090
4091       return t->temps[index];
4092
4093    case PROGRAM_OUTPUT:
4094       if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
4095          t->prevInstWrotePointSize = GL_TRUE;
4096
4097       if (t->procType == TGSI_PROCESSOR_VERTEX)
4098          assert(index < VERT_RESULT_MAX);
4099       else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
4100          assert(index < FRAG_RESULT_MAX);
4101       else
4102          assert(index < GEOM_RESULT_MAX);
4103
4104       assert(t->outputMapping[index] < Elements(t->outputs));
4105
4106       return t->outputs[t->outputMapping[index]];
4107
4108    case PROGRAM_ADDRESS:
4109       return t->address[index];
4110
4111    default:
4112       assert(!"unknown dst register file");
4113       return ureg_dst_undef();
4114    }
4115 }
4116
4117 /**
4118  * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
4119  */
4120 static struct ureg_src
4121 src_register(struct st_translate *t,
4122              gl_register_file file,
4123              GLuint index)
4124 {
4125    switch(file) {
4126    case PROGRAM_UNDEFINED:
4127       return ureg_src_undef();
4128
4129    case PROGRAM_TEMPORARY:
4130       assert(index >= 0);
4131       assert(index < Elements(t->temps));
4132       if (ureg_dst_is_undef(t->temps[index]))
4133          t->temps[index] = ureg_DECL_temporary(t->ureg);
4134       return ureg_src(t->temps[index]);
4135
4136    case PROGRAM_NAMED_PARAM:
4137    case PROGRAM_ENV_PARAM:
4138    case PROGRAM_LOCAL_PARAM:
4139    case PROGRAM_UNIFORM:
4140       assert(index >= 0);
4141       return t->constants[index];
4142    case PROGRAM_STATE_VAR:
4143    case PROGRAM_CONSTANT:       /* ie, immediate */
4144       if (index < 0)
4145          return ureg_DECL_constant(t->ureg, 0);
4146       else
4147          return t->constants[index];
4148
4149    case PROGRAM_IMMEDIATE:
4150       return t->immediates[index];
4151
4152    case PROGRAM_INPUT:
4153       assert(t->inputMapping[index] < Elements(t->inputs));
4154       return t->inputs[t->inputMapping[index]];
4155
4156    case PROGRAM_OUTPUT:
4157       assert(t->outputMapping[index] < Elements(t->outputs));
4158       return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
4159
4160    case PROGRAM_ADDRESS:
4161       return ureg_src(t->address[index]);
4162
4163    case PROGRAM_SYSTEM_VALUE:
4164       assert(index < Elements(t->systemValues));
4165       return t->systemValues[index];
4166
4167    default:
4168       assert(!"unknown src register file");
4169       return ureg_src_undef();
4170    }
4171 }
4172
4173 /**
4174  * Create a TGSI ureg_dst register from an st_dst_reg.
4175  */
4176 static struct ureg_dst
4177 translate_dst(struct st_translate *t,
4178               const st_dst_reg *dst_reg,
4179               bool saturate)
4180 {
4181    struct ureg_dst dst = dst_register(t,
4182                                       dst_reg->file,
4183                                       dst_reg->index);
4184
4185    dst = ureg_writemask(dst, dst_reg->writemask);
4186
4187    if (saturate)
4188       dst = ureg_saturate(dst);
4189
4190    if (dst_reg->reladdr != NULL)
4191       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4192
4193    return dst;
4194 }
4195
4196 /**
4197  * Create a TGSI ureg_src register from an st_src_reg.
4198  */
4199 static struct ureg_src
4200 translate_src(struct st_translate *t, const st_src_reg *src_reg)
4201 {
4202    struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
4203
4204    src = ureg_swizzle(src,
4205                       GET_SWZ(src_reg->swizzle, 0) & 0x3,
4206                       GET_SWZ(src_reg->swizzle, 1) & 0x3,
4207                       GET_SWZ(src_reg->swizzle, 2) & 0x3,
4208                       GET_SWZ(src_reg->swizzle, 3) & 0x3);
4209
4210    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4211       src = ureg_negate(src);
4212
4213    if (src_reg->reladdr != NULL) {
4214       /* Normally ureg_src_indirect() would be used here, but a stupid compiler
4215        * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
4216        * set the bit for src.Negate.  So we have to do the operation manually
4217        * here to work around the compiler's problems. */
4218       /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
4219       struct ureg_src addr = ureg_src(t->address[0]);
4220       src.Indirect = 1;
4221       src.IndirectFile = addr.File;
4222       src.IndirectIndex = addr.Index;
4223       src.IndirectSwizzle = addr.SwizzleX;
4224
4225       if (src_reg->file != PROGRAM_INPUT &&
4226           src_reg->file != PROGRAM_OUTPUT) {
4227          /* If src_reg->index was negative, it was set to zero in
4228           * src_register().  Reassign it now.  But don't do this
4229           * for input/output regs since they get remapped while
4230           * const buffers don't.
4231           */
4232          src.Index = src_reg->index;
4233       }
4234    }
4235
4236    return src;
4237 }
4238
4239 static struct tgsi_texture_offset
4240 translate_tex_offset(struct st_translate *t,
4241                      const struct tgsi_texture_offset *in_offset)
4242 {
4243    struct tgsi_texture_offset offset;
4244
4245    assert(in_offset->File == PROGRAM_IMMEDIATE);
4246
4247    offset.File = TGSI_FILE_IMMEDIATE;
4248    offset.Index = in_offset->Index;
4249    offset.SwizzleX = in_offset->SwizzleX;
4250    offset.SwizzleY = in_offset->SwizzleY;
4251    offset.SwizzleZ = in_offset->SwizzleZ;
4252
4253    return offset;
4254 }
4255
4256 static void
4257 compile_tgsi_instruction(struct st_translate *t,
4258                          const glsl_to_tgsi_instruction *inst)
4259 {
4260    struct ureg_program *ureg = t->ureg;
4261    GLuint i;
4262    struct ureg_dst dst[1];
4263    struct ureg_src src[4];
4264    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4265
4266    unsigned num_dst;
4267    unsigned num_src;
4268
4269    num_dst = num_inst_dst_regs(inst->op);
4270    num_src = num_inst_src_regs(inst->op);
4271
4272    if (num_dst)
4273       dst[0] = translate_dst(t,
4274                              &inst->dst,
4275                              inst->saturate);
4276
4277    for (i = 0; i < num_src; i++)
4278       src[i] = translate_src(t, &inst->src[i]);
4279
4280    switch(inst->op) {
4281    case TGSI_OPCODE_BGNLOOP:
4282    case TGSI_OPCODE_CAL:
4283    case TGSI_OPCODE_ELSE:
4284    case TGSI_OPCODE_ENDLOOP:
4285    case TGSI_OPCODE_IF:
4286       assert(num_dst == 0);
4287       ureg_label_insn(ureg,
4288                       inst->op,
4289                       src, num_src,
4290                       get_label(t,
4291                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4292       return;
4293
4294    case TGSI_OPCODE_TEX:
4295    case TGSI_OPCODE_TXB:
4296    case TGSI_OPCODE_TXD:
4297    case TGSI_OPCODE_TXL:
4298    case TGSI_OPCODE_TXP:
4299    case TGSI_OPCODE_TXQ:
4300    case TGSI_OPCODE_TXF:
4301       src[num_src++] = t->samplers[inst->sampler];
4302       for (i = 0; i < inst->tex_offset_num_offset; i++) {
4303          texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
4304       }
4305       ureg_tex_insn(ureg,
4306                     inst->op,
4307                     dst, num_dst,
4308                     translate_texture_target(inst->tex_target, inst->tex_shadow),
4309                     texoffsets, inst->tex_offset_num_offset,
4310                     src, num_src);
4311       return;
4312
4313    case TGSI_OPCODE_SCS:
4314       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4315       ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4316       break;
4317
4318    default:
4319       ureg_insn(ureg,
4320                 inst->op,
4321                 dst, num_dst,
4322                 src, num_src);
4323       break;
4324    }
4325 }
4326
4327 /**
4328  * Emit the TGSI instructions to adjust the WPOS pixel center convention
4329  * Basically, add (adjX, adjY) to the fragment position.
4330  */
4331 static void
4332 emit_adjusted_wpos(struct st_translate *t,
4333                    const struct gl_program *program,
4334                    float adjX, float adjY)
4335 {
4336    struct ureg_program *ureg = t->ureg;
4337    struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
4338    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4339
4340    /* Note that we bias X and Y and pass Z and W through unchanged.
4341     * The shader might also use gl_FragCoord.w and .z.
4342     */
4343    ureg_ADD(ureg, wpos_temp, wpos_input,
4344             ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
4345
4346    t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4347 }
4348
4349
4350 /**
4351  * Emit the TGSI instructions for inverting the WPOS y coordinate.
4352  * This code is unavoidable because it also depends on whether
4353  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4354  */
4355 static void
4356 emit_wpos_inversion(struct st_translate *t,
4357                     const struct gl_program *program,
4358                     bool invert)
4359 {
4360    struct ureg_program *ureg = t->ureg;
4361
4362    /* Fragment program uses fragment position input.
4363     * Need to replace instances of INPUT[WPOS] with temp T
4364     * where T = INPUT[WPOS] by y is inverted.
4365     */
4366    static const gl_state_index wposTransformState[STATE_LENGTH]
4367       = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4368           (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4369
4370    /* XXX: note we are modifying the incoming shader here!  Need to
4371     * do this before emitting the constant decls below, or this
4372     * will be missed:
4373     */
4374    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4375                                                        wposTransformState);
4376
4377    struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
4378    struct ureg_dst wpos_temp;
4379    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4380
4381    /* MOV wpos_temp, input[wpos]
4382     */
4383    if (wpos_input.File == TGSI_FILE_TEMPORARY)
4384       wpos_temp = ureg_dst(wpos_input);
4385    else {
4386       wpos_temp = ureg_DECL_temporary(ureg);
4387       ureg_MOV(ureg, wpos_temp, wpos_input);
4388    }
4389
4390    if (invert) {
4391       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4392        */
4393       ureg_MAD(ureg,
4394                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4395                wpos_input,
4396                ureg_scalar(wpostrans, 0),
4397                ureg_scalar(wpostrans, 1));
4398    } else {
4399       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4400        */
4401       ureg_MAD(ureg,
4402                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4403                wpos_input,
4404                ureg_scalar(wpostrans, 2),
4405                ureg_scalar(wpostrans, 3));
4406    }
4407
4408    /* Use wpos_temp as position input from here on:
4409     */
4410    t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4411 }
4412
4413
4414 /**
4415  * Emit fragment position/ooordinate code.
4416  */
4417 static void
4418 emit_wpos(struct st_context *st,
4419           struct st_translate *t,
4420           const struct gl_program *program,
4421           struct ureg_program *ureg)
4422 {
4423    const struct gl_fragment_program *fp =
4424       (const struct gl_fragment_program *) program;
4425    struct pipe_screen *pscreen = st->pipe->screen;
4426    boolean invert = FALSE;
4427
4428    if (fp->OriginUpperLeft) {
4429       /* Fragment shader wants origin in upper-left */
4430       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4431          /* the driver supports upper-left origin */
4432       }
4433       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4434          /* the driver supports lower-left origin, need to invert Y */
4435          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4436          invert = TRUE;
4437       }
4438       else
4439          assert(0);
4440    }
4441    else {
4442       /* Fragment shader wants origin in lower-left */
4443       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4444          /* the driver supports lower-left origin */
4445          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4446       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4447          /* the driver supports upper-left origin, need to invert Y */
4448          invert = TRUE;
4449       else
4450          assert(0);
4451    }
4452
4453    if (fp->PixelCenterInteger) {
4454       /* Fragment shader wants pixel center integer */
4455       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
4456          /* the driver supports pixel center integer */
4457          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4458       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
4459          /* the driver supports pixel center half integer, need to bias X,Y */
4460          emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
4461       else
4462          assert(0);
4463    }
4464    else {
4465       /* Fragment shader wants pixel center half integer */
4466       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4467          /* the driver supports pixel center half integer */
4468       }
4469       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4470          /* the driver supports pixel center integer, need to bias X,Y */
4471          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4472          emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
4473       }
4474       else
4475          assert(0);
4476    }
4477
4478    /* we invert after adjustment so that we avoid the MOV to temporary,
4479     * and reuse the adjustment ADD instead */
4480    emit_wpos_inversion(t, program, invert);
4481 }
4482
4483 /**
4484  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4485  * TGSI uses +1 for front, -1 for back.
4486  * This function converts the TGSI value to the GL value.  Simply clamping/
4487  * saturating the value to [0,1] does the job.
4488  */
4489 static void
4490 emit_face_var(struct st_translate *t)
4491 {
4492    struct ureg_program *ureg = t->ureg;
4493    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4494    struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
4495
4496    /* MOV_SAT face_temp, input[face] */
4497    face_temp = ureg_saturate(face_temp);
4498    ureg_MOV(ureg, face_temp, face_input);
4499
4500    /* Use face_temp as face input from here on: */
4501    t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
4502 }
4503
4504 static void
4505 emit_edgeflags(struct st_translate *t)
4506 {
4507    struct ureg_program *ureg = t->ureg;
4508    struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
4509    struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4510
4511    ureg_MOV(ureg, edge_dst, edge_src);
4512 }
4513
4514 /**
4515  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4516  * \param program  the program to translate
4517  * \param numInputs  number of input registers used
4518  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
4519  *                      input indexes
4520  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
4521  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
4522  *                            each input
4523  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4524  * \param numOutputs  number of output registers used
4525  * \param outputMapping  maps Mesa fragment program outputs to TGSI
4526  *                       generic outputs
4527  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
4528  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
4529  *                             each output
4530  *
4531  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4532  */
4533 extern "C" enum pipe_error
4534 st_translate_program(
4535    struct gl_context *ctx,
4536    uint procType,
4537    struct ureg_program *ureg,
4538    glsl_to_tgsi_visitor *program,
4539    const struct gl_program *proginfo,
4540    GLuint numInputs,
4541    const GLuint inputMapping[],
4542    const ubyte inputSemanticName[],
4543    const ubyte inputSemanticIndex[],
4544    const GLuint interpMode[],
4545    GLuint numOutputs,
4546    const GLuint outputMapping[],
4547    const ubyte outputSemanticName[],
4548    const ubyte outputSemanticIndex[],
4549    boolean passthrough_edgeflags)
4550 {
4551    struct st_translate translate, *t;
4552    unsigned i;
4553    enum pipe_error ret = PIPE_OK;
4554
4555    assert(numInputs <= Elements(t->inputs));
4556    assert(numOutputs <= Elements(t->outputs));
4557
4558    t = &translate;
4559    memset(t, 0, sizeof *t);
4560
4561    t->procType = procType;
4562    t->inputMapping = inputMapping;
4563    t->outputMapping = outputMapping;
4564    t->ureg = ureg;
4565    t->pointSizeOutIndex = -1;
4566    t->prevInstWrotePointSize = GL_FALSE;
4567
4568    /*
4569     * Declare input attributes.
4570     */
4571    if (procType == TGSI_PROCESSOR_FRAGMENT) {
4572       for (i = 0; i < numInputs; i++) {
4573          t->inputs[i] = ureg_DECL_fs_input(ureg,
4574                                            inputSemanticName[i],
4575                                            inputSemanticIndex[i],
4576                                            interpMode[i]);
4577       }
4578
4579       if (proginfo->InputsRead & FRAG_BIT_WPOS) {
4580          /* Must do this after setting up t->inputs, and before
4581           * emitting constant references, below:
4582           */
4583           emit_wpos(st_context(ctx), t, proginfo, ureg);
4584       }
4585
4586       if (proginfo->InputsRead & FRAG_BIT_FACE)
4587          emit_face_var(t);
4588
4589       /*
4590        * Declare output attributes.
4591        */
4592       for (i = 0; i < numOutputs; i++) {
4593          switch (outputSemanticName[i]) {
4594          case TGSI_SEMANTIC_POSITION:
4595             t->outputs[i] = ureg_DECL_output(ureg,
4596                                              TGSI_SEMANTIC_POSITION, /* Z/Depth */
4597                                              outputSemanticIndex[i]);
4598             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4599             break;
4600          case TGSI_SEMANTIC_STENCIL:
4601             t->outputs[i] = ureg_DECL_output(ureg,
4602                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
4603                                              outputSemanticIndex[i]);
4604             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4605             break;
4606          case TGSI_SEMANTIC_COLOR:
4607             t->outputs[i] = ureg_DECL_output(ureg,
4608                                              TGSI_SEMANTIC_COLOR,
4609                                              outputSemanticIndex[i]);
4610             break;
4611          default:
4612             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4613             return PIPE_ERROR_BAD_INPUT;
4614          }
4615       }
4616    }
4617    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4618       for (i = 0; i < numInputs; i++) {
4619          t->inputs[i] = ureg_DECL_gs_input(ureg,
4620                                            i,
4621                                            inputSemanticName[i],
4622                                            inputSemanticIndex[i]);
4623       }
4624
4625       for (i = 0; i < numOutputs; i++) {
4626          t->outputs[i] = ureg_DECL_output(ureg,
4627                                           outputSemanticName[i],
4628                                           outputSemanticIndex[i]);
4629       }
4630    }
4631    else {
4632       assert(procType == TGSI_PROCESSOR_VERTEX);
4633
4634       for (i = 0; i < numInputs; i++) {
4635          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4636       }
4637
4638       for (i = 0; i < numOutputs; i++) {
4639          t->outputs[i] = ureg_DECL_output(ureg,
4640                                           outputSemanticName[i],
4641                                           outputSemanticIndex[i]);
4642          if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
4643             /* Writing to the point size result register requires special
4644              * handling to implement clamping.
4645              */
4646             static const gl_state_index pointSizeClampState[STATE_LENGTH]
4647                = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4648                /* XXX: note we are modifying the incoming shader here!  Need to
4649                * do this before emitting the constant decls below, or this
4650                * will be missed.
4651                */
4652             unsigned pointSizeClampConst =
4653                _mesa_add_state_reference(proginfo->Parameters,
4654                                          pointSizeClampState);
4655             struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
4656             t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
4657             t->pointSizeResult = t->outputs[i];
4658             t->pointSizeOutIndex = i;
4659             t->outputs[i] = psizregtemp;
4660          }
4661       }
4662       if (passthrough_edgeflags)
4663          emit_edgeflags(t);
4664    }
4665
4666    /* Declare address register.
4667     */
4668    if (program->num_address_regs > 0) {
4669       assert(program->num_address_regs == 1);
4670       t->address[0] = ureg_DECL_address(ureg);
4671    }
4672
4673    /* Declare misc input registers
4674     */
4675    {
4676       GLbitfield sysInputs = proginfo->SystemValuesRead;
4677       unsigned numSys = 0;
4678       for (i = 0; sysInputs; i++) {
4679          if (sysInputs & (1 << i)) {
4680             unsigned semName = mesa_sysval_to_semantic[i];
4681             t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4682             numSys++;
4683             sysInputs &= ~(1 << i);
4684          }
4685       }
4686    }
4687
4688    if (program->indirect_addr_temps) {
4689       /* If temps are accessed with indirect addressing, declare temporaries
4690        * in sequential order.  Else, we declare them on demand elsewhere.
4691        * (Note: the number of temporaries is equal to program->next_temp)
4692        */
4693       for (i = 0; i < (unsigned)program->next_temp; i++) {
4694          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4695          t->temps[i] = ureg_DECL_temporary(t->ureg);
4696       }
4697    }
4698
4699    /* Emit constants and uniforms.  TGSI uses a single index space for these,
4700     * so we put all the translated regs in t->constants.
4701     */
4702    if (proginfo->Parameters) {
4703       t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
4704       if (t->constants == NULL) {
4705          ret = PIPE_ERROR_OUT_OF_MEMORY;
4706          goto out;
4707       }
4708
4709       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4710          switch (proginfo->Parameters->Parameters[i].Type) {
4711          case PROGRAM_ENV_PARAM:
4712          case PROGRAM_LOCAL_PARAM:
4713          case PROGRAM_STATE_VAR:
4714          case PROGRAM_NAMED_PARAM:
4715          case PROGRAM_UNIFORM:
4716             t->constants[i] = ureg_DECL_constant(ureg, i);
4717             break;
4718
4719          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4720           * addressing of the const buffer.
4721           * FIXME: Be smarter and recognize param arrays:
4722           * indirect addressing is only valid within the referenced
4723           * array.
4724           */
4725          case PROGRAM_CONSTANT:
4726             if (program->indirect_addr_consts)
4727                t->constants[i] = ureg_DECL_constant(ureg, i);
4728             else
4729                t->constants[i] = emit_immediate(t,
4730                                                 proginfo->Parameters->ParameterValues[i],
4731                                                 proginfo->Parameters->Parameters[i].DataType,
4732                                                 4);
4733             break;
4734          default:
4735             break;
4736          }
4737       }
4738    }
4739
4740    /* Emit immediate values.
4741     */
4742    t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
4743    if (t->immediates == NULL) {
4744       ret = PIPE_ERROR_OUT_OF_MEMORY;
4745       goto out;
4746    }
4747    i = 0;
4748    foreach_iter(exec_list_iterator, iter, program->immediates) {
4749       immediate_storage *imm = (immediate_storage *)iter.get();
4750       t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
4751    }
4752
4753    /* texture samplers */
4754    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4755       if (program->samplers_used & (1 << i)) {
4756          t->samplers[i] = ureg_DECL_sampler(ureg, i);
4757       }
4758    }
4759
4760    /* Emit each instruction in turn:
4761     */
4762    foreach_iter(exec_list_iterator, iter, program->instructions) {
4763       set_insn_start(t, ureg_get_instruction_number(ureg));
4764       compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
4765
4766       if (t->prevInstWrotePointSize && proginfo->Id) {
4767          /* The previous instruction wrote to the (fake) vertex point size
4768           * result register.  Now we need to clamp that value to the min/max
4769           * point size range, putting the result into the real point size
4770           * register.
4771           * Note that we can't do this easily at the end of program due to
4772           * possible early return.
4773           */
4774          set_insn_start(t, ureg_get_instruction_number(ureg));
4775          ureg_MAX(t->ureg,
4776                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
4777                   ureg_src(t->outputs[t->pointSizeOutIndex]),
4778                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
4779          ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
4780                   ureg_src(t->outputs[t->pointSizeOutIndex]),
4781                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
4782       }
4783       t->prevInstWrotePointSize = GL_FALSE;
4784    }
4785
4786    /* Fix up all emitted labels:
4787     */
4788    for (i = 0; i < t->labels_count; i++) {
4789       ureg_fixup_label(ureg, t->labels[i].token,
4790                        t->insn[t->labels[i].branch_target]);
4791    }
4792
4793 out:
4794    FREE(t->insn);
4795    FREE(t->labels);
4796    FREE(t->constants);
4797    FREE(t->immediates);
4798
4799    if (t->error) {
4800       debug_printf("%s: translate error flag set\n", __FUNCTION__);
4801    }
4802
4803    return ret;
4804 }
4805 /* ----------------------------- End TGSI code ------------------------------ */
4806
4807 /**
4808  * Convert a shader's GLSL IR into a Mesa gl_program, although without
4809  * generating Mesa IR.
4810  */
4811 static struct gl_program *
4812 get_mesa_program(struct gl_context *ctx,
4813                  struct gl_shader_program *shader_program,
4814                  struct gl_shader *shader)
4815 {
4816    glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4817    struct gl_program *prog;
4818    GLenum target;
4819    const char *target_string;
4820    bool progress;
4821    struct gl_shader_compiler_options *options =
4822          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4823
4824    switch (shader->Type) {
4825    case GL_VERTEX_SHADER:
4826       target = GL_VERTEX_PROGRAM_ARB;
4827       target_string = "vertex";
4828       break;
4829    case GL_FRAGMENT_SHADER:
4830       target = GL_FRAGMENT_PROGRAM_ARB;
4831       target_string = "fragment";
4832       break;
4833    case GL_GEOMETRY_SHADER:
4834       target = GL_GEOMETRY_PROGRAM_NV;
4835       target_string = "geometry";
4836       break;
4837    default:
4838       assert(!"should not be reached");
4839       return NULL;
4840    }
4841
4842    validate_ir_tree(shader->ir);
4843
4844    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4845    if (!prog)
4846       return NULL;
4847    prog->Parameters = _mesa_new_parameter_list();
4848    prog->Varying = _mesa_new_parameter_list();
4849    prog->Attributes = _mesa_new_parameter_list();
4850    v->ctx = ctx;
4851    v->prog = prog;
4852    v->shader_program = shader_program;
4853    v->options = options;
4854    v->glsl_version = ctx->Const.GLSLVersion;
4855    v->native_integers = ctx->Const.NativeIntegers;
4856
4857    add_uniforms_to_parameters_list(shader_program, shader, prog);
4858
4859    /* Emit intermediate IR for main(). */
4860    visit_exec_list(shader->ir, v);
4861
4862    /* Now emit bodies for any functions that were used. */
4863    do {
4864       progress = GL_FALSE;
4865
4866       foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4867          function_entry *entry = (function_entry *)iter.get();
4868
4869          if (!entry->bgn_inst) {
4870             v->current_function = entry;
4871
4872             entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4873             entry->bgn_inst->function = entry;
4874
4875             visit_exec_list(&entry->sig->body, v);
4876
4877             glsl_to_tgsi_instruction *last;
4878             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4879             if (last->op != TGSI_OPCODE_RET)
4880                v->emit(NULL, TGSI_OPCODE_RET);
4881
4882             glsl_to_tgsi_instruction *end;
4883             end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4884             end->function = entry;
4885
4886             progress = GL_TRUE;
4887          }
4888       }
4889    } while (progress);
4890
4891 #if 0
4892    /* Print out some information (for debugging purposes) used by the
4893     * optimization passes. */
4894    for (i=0; i < v->next_temp; i++) {
4895       int fr = v->get_first_temp_read(i);
4896       int fw = v->get_first_temp_write(i);
4897       int lr = v->get_last_temp_read(i);
4898       int lw = v->get_last_temp_write(i);
4899
4900       printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4901       assert(fw <= fr);
4902    }
4903 #endif
4904
4905    /* Remove reads to output registers, and to varyings in vertex shaders. */
4906    v->remove_output_reads(PROGRAM_OUTPUT);
4907    if (target == GL_VERTEX_PROGRAM_ARB)
4908       v->remove_output_reads(PROGRAM_VARYING);
4909
4910    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
4911    v->simplify_cmp();
4912    v->copy_propagate();
4913    while (v->eliminate_dead_code_advanced());
4914
4915    /* FIXME: These passes to optimize temporary registers don't work when there
4916     * is indirect addressing of the temporary register space.  We need proper
4917     * array support so that we don't have to give up these passes in every
4918     * shader that uses arrays.
4919     */
4920    if (!v->indirect_addr_temps) {
4921       v->eliminate_dead_code();
4922       v->merge_registers();
4923       v->renumber_registers();
4924    }
4925
4926    /* Write the END instruction. */
4927    v->emit(NULL, TGSI_OPCODE_END);
4928
4929    if (ctx->Shader.Flags & GLSL_DUMP) {
4930       printf("\n");
4931       printf("GLSL IR for linked %s program %d:\n", target_string,
4932              shader_program->Name);
4933       _mesa_print_ir(shader->ir, NULL);
4934       printf("\n");
4935       printf("\n");
4936    }
4937
4938    prog->Instructions = NULL;
4939    prog->NumInstructions = 0;
4940
4941    do_set_program_inouts(shader->ir, prog);
4942    count_resources(v, prog);
4943
4944    check_resources(ctx, shader_program, v, prog);
4945
4946    _mesa_reference_program(ctx, &shader->Program, prog);
4947
4948    struct st_vertex_program *stvp;
4949    struct st_fragment_program *stfp;
4950    struct st_geometry_program *stgp;
4951
4952    switch (shader->Type) {
4953    case GL_VERTEX_SHADER:
4954       stvp = (struct st_vertex_program *)prog;
4955       stvp->glsl_to_tgsi = v;
4956       break;
4957    case GL_FRAGMENT_SHADER:
4958       stfp = (struct st_fragment_program *)prog;
4959       stfp->glsl_to_tgsi = v;
4960       break;
4961    case GL_GEOMETRY_SHADER:
4962       stgp = (struct st_geometry_program *)prog;
4963       stgp->glsl_to_tgsi = v;
4964       break;
4965    default:
4966       assert(!"should not be reached");
4967       return NULL;
4968    }
4969
4970    return prog;
4971 }
4972
4973 extern "C" {
4974
4975 struct gl_shader *
4976 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4977 {
4978    struct gl_shader *shader;
4979    assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4980           type == GL_GEOMETRY_SHADER_ARB);
4981    shader = rzalloc(NULL, struct gl_shader);
4982    if (shader) {
4983       shader->Type = type;
4984       shader->Name = name;
4985       _mesa_init_shader(ctx, shader);
4986    }
4987    return shader;
4988 }
4989
4990 struct gl_shader_program *
4991 st_new_shader_program(struct gl_context *ctx, GLuint name)
4992 {
4993    struct gl_shader_program *shProg;
4994    shProg = rzalloc(NULL, struct gl_shader_program);
4995    if (shProg) {
4996       shProg->Name = name;
4997       _mesa_init_shader_program(ctx, shProg);
4998    }
4999    return shProg;
5000 }
5001
5002 /**
5003  * Link a shader.
5004  * Called via ctx->Driver.LinkShader()
5005  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
5006  * with code lowering and other optimizations.
5007  */
5008 GLboolean
5009 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
5010 {
5011    assert(prog->LinkStatus);
5012
5013    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5014       if (prog->_LinkedShaders[i] == NULL)
5015          continue;
5016
5017       bool progress;
5018       exec_list *ir = prog->_LinkedShaders[i]->ir;
5019       const struct gl_shader_compiler_options *options =
5020             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
5021
5022       do {
5023          progress = false;
5024
5025          /* Lowering */
5026          do_mat_op_to_vec(ir);
5027          lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
5028                                  | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
5029                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
5030
5031          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
5032
5033          progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
5034
5035          progress = lower_quadop_vector(ir, false) || progress;
5036
5037          if (options->MaxIfDepth == 0)
5038             progress = lower_discard(ir) || progress;
5039
5040          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
5041
5042          if (options->EmitNoNoise)
5043             progress = lower_noise(ir) || progress;
5044
5045          /* If there are forms of indirect addressing that the driver
5046           * cannot handle, perform the lowering pass.
5047           */
5048          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
5049              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
5050            progress =
5051              lower_variable_index_to_cond_assign(ir,
5052                                                  options->EmitNoIndirectInput,
5053                                                  options->EmitNoIndirectOutput,
5054                                                  options->EmitNoIndirectTemp,
5055                                                  options->EmitNoIndirectUniform)
5056              || progress;
5057
5058          progress = do_vec_index_to_cond_assign(ir) || progress;
5059       } while (progress);
5060
5061       validate_ir_tree(ir);
5062    }
5063
5064    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5065       struct gl_program *linked_prog;
5066
5067       if (prog->_LinkedShaders[i] == NULL)
5068          continue;
5069
5070       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
5071
5072       if (linked_prog) {
5073          bool ok = true;
5074
5075          switch (prog->_LinkedShaders[i]->Type) {
5076          case GL_VERTEX_SHADER:
5077             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
5078                                      (struct gl_vertex_program *)linked_prog);
5079             ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
5080                                                  linked_prog);
5081             if (!ok) {
5082                _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
5083             }
5084             break;
5085          case GL_FRAGMENT_SHADER:
5086             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
5087                                      (struct gl_fragment_program *)linked_prog);
5088             ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
5089                                                  linked_prog);
5090             if (!ok) {
5091                _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
5092             }
5093             break;
5094          case GL_GEOMETRY_SHADER:
5095             _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
5096                                      (struct gl_geometry_program *)linked_prog);
5097             ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
5098                                                  linked_prog);
5099             if (!ok) {
5100                _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
5101             }
5102             break;
5103          }
5104          if (!ok) {
5105             _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL);
5106             _mesa_reference_program(ctx, &linked_prog, NULL);
5107             return GL_FALSE;
5108          }
5109       }
5110
5111       _mesa_reference_program(ctx, &linked_prog, NULL);
5112    }
5113
5114    return GL_TRUE;
5115 }
5116
5117 } /* extern "C" */