src/mesa/state_tracker/st_glsl_to_tgsi.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  * Copyright © 2011 Bryan Cain
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24  * DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 /**
  28  * \file glsl_to_tgsi.cpp
  29  *
  30  * Translate GLSL IR to TGSI.
  31  */
  32
  33 #include <stdio.h>
  34 #include "main/compiler.h"
  35 #include "ir.h"
  36 #include "ir_visitor.h"
  37 #include "ir_print_visitor.h"
  38 #include "ir_expression_flattening.h"
  39 #include "glsl_types.h"
  40 #include "glsl_parser_extras.h"
  41 #include "../glsl/program.h"
  42 #include "ir_optimization.h"
  43 #include "ast.h"
  44
  45 extern "C" {
  46 #include "main/mtypes.h"
  47 #include "main/shaderapi.h"
  48 #include "main/shaderobj.h"
  49 #include "main/uniforms.h"
  50 #include "program/hash_table.h"
  51 #include "program/prog_instruction.h"
  52 #include "program/prog_optimize.h"
  53 #include "program/prog_print.h"
  54 #include "program/program.h"
  55 #include "program/prog_uniform.h"
  56 #include "program/prog_parameter.h"
  57 #include "program/sampler.h"
  58
  59 #include "pipe/p_compiler.h"
  60 #include "pipe/p_context.h"
  61 #include "pipe/p_screen.h"
  62 #include "pipe/p_shader_tokens.h"
  63 #include "pipe/p_state.h"
  64 #include "util/u_math.h"
  65 #include "tgsi/tgsi_ureg.h"
  66 #include "tgsi/tgsi_info.h"
  67 #include "st_context.h"
  68 #include "st_program.h"
  69 #include "st_glsl_to_tgsi.h"
  70 #include "st_mesa_to_tgsi.h"
  71 }
  72
  73 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
  74                            (1 << PROGRAM_ENV_PARAM) |    \
  75                            (1 << PROGRAM_STATE_VAR) |    \
  76                            (1 << PROGRAM_NAMED_PARAM) |  \
  77                            (1 << PROGRAM_CONSTANT) |     \
  78                            (1 << PROGRAM_UNIFORM))
  79
  80 class st_src_reg;
  81 class st_dst_reg;
  82
  83 static int swizzle_for_size(int size);
  84
  85 /**
  86  * This struct is a corresponding struct to TGSI ureg_src.
  87  */
  88 class st_src_reg {
  89 public:
  90    st_src_reg(gl_register_file file, int index, const glsl_type *type)
  91    {
  92       this->file = file;
  93       this->index = index;
  94       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  95          this->swizzle = swizzle_for_size(type->vector_elements);
  96       else
  97          this->swizzle = SWIZZLE_XYZW;
  98       this->negate = 0;
  99       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
 100       this->reladdr = NULL;
 101    }
 102
 103    st_src_reg(gl_register_file file, int index, int type)
 104    {
 105       this->type = type;
 106       this->file = file;
 107       this->index = index;
 108       this->swizzle = SWIZZLE_XYZW;
 109       this->negate = 0;
 110       this->reladdr = NULL;
 111    }
 112
 113    st_src_reg()
 114    {
 115       this->type = GLSL_TYPE_ERROR;
 116       this->file = PROGRAM_UNDEFINED;
 117       this->index = 0;
 118       this->swizzle = 0;
 119       this->negate = 0;
 120       this->reladdr = NULL;
 121    }
 122
 123    explicit st_src_reg(st_dst_reg reg);
 124
 125    gl_register_file file; /**< PROGRAM_* from Mesa */
 126    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
 127    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
 128    int negate; /**< NEGATE_XYZW mask from mesa */
 129    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 130    /** Register index should be offset by the integer in this reg. */
 131    st_src_reg *reladdr;
 132 };
 133
 134 class st_dst_reg {
 135 public:
 136    st_dst_reg(gl_register_file file, int writemask, int type)
 137    {
 138       this->file = file;
 139       this->index = 0;
 140       this->writemask = writemask;
 141       this->cond_mask = COND_TR;
 142       this->reladdr = NULL;
 143       this->type = type;
 144    }
 145
 146    st_dst_reg()
 147    {
 148       this->type = GLSL_TYPE_ERROR;
 149       this->file = PROGRAM_UNDEFINED;
 150       this->index = 0;
 151       this->writemask = 0;
 152       this->cond_mask = COND_TR;
 153       this->reladdr = NULL;
 154    }
 155
 156    explicit st_dst_reg(st_src_reg reg);
 157
 158    gl_register_file file; /**< PROGRAM_* from Mesa */
 159    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
 160    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 161    GLuint cond_mask:4;
 162    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
 163    /** Register index should be offset by the integer in this reg. */
 164    st_src_reg *reladdr;
 165 };
 166
 167 st_src_reg::st_src_reg(st_dst_reg reg)
 168 {
 169    this->type = reg.type;
 170    this->file = reg.file;
 171    this->index = reg.index;
 172    this->swizzle = SWIZZLE_XYZW;
 173    this->negate = 0;
 174    this->reladdr = NULL;
 175 }
 176
 177 st_dst_reg::st_dst_reg(st_src_reg reg)
 178 {
 179    this->type = reg.type;
 180    this->file = reg.file;
 181    this->index = reg.index;
 182    this->writemask = WRITEMASK_XYZW;
 183    this->cond_mask = COND_TR;
 184    this->reladdr = reg.reladdr;
 185 }
 186
 187 class glsl_to_tgsi_instruction : public exec_node {
 188 public:
 189    /* Callers of this ralloc-based new need not call delete. It's
 190     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
 191    static void* operator new(size_t size, void *ctx)
 192    {
 193       void *node;
 194
 195       node = rzalloc_size(ctx, size);
 196       assert(node != NULL);
 197
 198       return node;
 199    }
 200
 201    unsigned op;
 202    st_dst_reg dst;
 203    st_src_reg src[3];
 204    /** Pointer to the ir source this tree came from for debugging */
 205    ir_instruction *ir;
 206    GLboolean cond_update;
 207    bool saturate;
 208    int sampler; /**< sampler index */
 209    int tex_target; /**< One of TEXTURE_*_INDEX */
 210    GLboolean tex_shadow;
 211
 212    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 213 };
 214
 215 class variable_storage : public exec_node {
 216 public:
 217    variable_storage(ir_variable *var, gl_register_file file, int index)
 218       : file(file), index(index), var(var)
 219    {
 220       /* empty */
 221    }
 222
 223    gl_register_file file;
 224    int index;
 225    ir_variable *var; /* variable that maps to this, if any */
 226 };
 227
 228 class function_entry : public exec_node {
 229 public:
 230    ir_function_signature *sig;
 231
 232    /**
 233     * identifier of this function signature used by the program.
 234     *
 235     * At the point that Mesa instructions for function calls are
 236     * generated, we don't know the address of the first instruction of
 237     * the function body.  So we make the BranchTarget that is called a
 238     * small integer and rewrite them during set_branchtargets().
 239     */
 240    int sig_id;
 241
 242    /**
 243     * Pointer to first instruction of the function body.
 244     *
 245     * Set during function body emits after main() is processed.
 246     */
 247    glsl_to_tgsi_instruction *bgn_inst;
 248
 249    /**
 250     * Index of the first instruction of the function body in actual
 251     * Mesa IR.
 252     *
 253     * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
 254     */
 255    int inst;
 256
 257    /** Storage for the return value. */
 258    st_src_reg return_reg;
 259 };
 260
 261 class glsl_to_tgsi_visitor : public ir_visitor {
 262 public:
 263    glsl_to_tgsi_visitor();
 264    ~glsl_to_tgsi_visitor();
 265
 266    function_entry *current_function;
 267
 268    struct gl_context *ctx;
 269    struct gl_program *prog;
 270    struct gl_shader_program *shader_program;
 271    struct gl_shader_compiler_options *options;
 272
 273    int next_temp;
 274
 275    int num_address_regs;
 276    int samplers_used;
 277    bool indirect_addr_temps;
 278    bool indirect_addr_consts;
 279
 280    int glsl_version;
 281
 282    variable_storage *find_variable_storage(ir_variable *var);
 283
 284    function_entry *get_function_signature(ir_function_signature *sig);
 285
 286    st_src_reg get_temp(const glsl_type *type);
 287    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 288
 289    st_src_reg st_src_reg_for_float(float val);
 290    st_src_reg st_src_reg_for_int(int val);
 291    st_src_reg st_src_reg_for_type(int type, int val);
 292
 293    /**
 294     * \name Visit methods
 295     *
 296     * As typical for the visitor pattern, there must be one \c visit method for
 297     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 298     * the hierarchy should not have \c visit methods.
 299     */
 300    /*@{*/
 301    virtual void visit(ir_variable *);
 302    virtual void visit(ir_loop *);
 303    virtual void visit(ir_loop_jump *);
 304    virtual void visit(ir_function_signature *);
 305    virtual void visit(ir_function *);
 306    virtual void visit(ir_expression *);
 307    virtual void visit(ir_swizzle *);
 308    virtual void visit(ir_dereference_variable  *);
 309    virtual void visit(ir_dereference_array *);
 310    virtual void visit(ir_dereference_record *);
 311    virtual void visit(ir_assignment *);
 312    virtual void visit(ir_constant *);
 313    virtual void visit(ir_call *);
 314    virtual void visit(ir_return *);
 315    virtual void visit(ir_discard *);
 316    virtual void visit(ir_texture *);
 317    virtual void visit(ir_if *);
 318    /*@}*/
 319
 320    st_src_reg result;
 321
 322    /** List of variable_storage */
 323    exec_list variables;
 324
 325    /** List of function_entry */
 326    exec_list function_signatures;
 327    int next_signature_id;
 328
 329    /** List of glsl_to_tgsi_instruction */
 330    exec_list instructions;
 331
 332    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
 333
 334    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
 335                                 st_dst_reg dst, st_src_reg src0);
 336
 337    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
 338                                 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 339
 340    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
 341                                 st_dst_reg dst,
 342                                 st_src_reg src0, st_src_reg src1, st_src_reg src2);
 343
 344    unsigned get_opcode(ir_instruction *ir, unsigned op,
 345                     st_dst_reg dst,
 346                     st_src_reg src0, st_src_reg src1);
 347
 348    /**
 349     * Emit the correct dot-product instruction for the type of arguments
 350     */
 351    void emit_dp(ir_instruction *ir,
 352                 st_dst_reg dst,
 353                 st_src_reg src0,
 354                 st_src_reg src1,
 355                 unsigned elements);
 356
 357    void emit_scalar(ir_instruction *ir, unsigned op,
 358                     st_dst_reg dst, st_src_reg src0);
 359
 360    void emit_scalar(ir_instruction *ir, unsigned op,
 361                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 362
 363    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 364
 365    void emit_scs(ir_instruction *ir, unsigned op,
 366                  st_dst_reg dst, const st_src_reg &src);
 367
 368    GLboolean try_emit_mad(ir_expression *ir,
 369                           int mul_operand);
 370    GLboolean try_emit_sat(ir_expression *ir);
 371
 372    void emit_swz(ir_expression *ir);
 373
 374    bool process_move_condition(ir_rvalue *ir);
 375
 376    void remove_output_reads(gl_register_file type);
 377    void simplify_cmp(void);
 378
 379    void rename_temp_register(int index, int new_index);
 380    int get_first_temp_read(int index);
 381    int get_first_temp_write(int index);
 382    int get_last_temp_read(int index);
 383    int get_last_temp_write(int index);
 384
 385    void copy_propagate(void);
 386    void eliminate_dead_code(void);
 387    void merge_registers(void);
 388    void renumber_registers(void);
 389
 390    void *mem_ctx;
 391 };
 392
 393 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 394
 395 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 396
 397 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
 398
 399 static void
 400 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
 401
 402 static void
 403 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
 404 {
 405    va_list args;
 406    va_start(args, fmt);
 407    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
 408    va_end(args);
 409
 410    prog->LinkStatus = GL_FALSE;
 411 }
 412
 413 static int
 414 swizzle_for_size(int size)
 415 {
 416    int size_swizzles[4] = {
 417       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 418       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 419       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 420       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 421    };
 422
 423    assert((size >= 1) && (size <= 4));
 424    return size_swizzles[size - 1];
 425 }
 426
 427 static bool
 428 is_tex_instruction(unsigned opcode)
 429 {
 430    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
 431    return info->is_tex;
 432 }
 433
 434 static unsigned
 435 num_inst_dst_regs(unsigned opcode)
 436 {
 437    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
 438    return info->num_dst;
 439 }
 440
 441 static unsigned
 442 num_inst_src_regs(unsigned opcode)
 443 {
 444    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
 445    return info->is_tex ? info->num_src - 1 : info->num_src;
 446 }
 447
 448 glsl_to_tgsi_instruction *
 449 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 450                          st_dst_reg dst,
 451                          st_src_reg src0, st_src_reg src1, st_src_reg src2)
 452 {
 453    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
 454    int num_reladdr = 0, i;
 455
 456    op = get_opcode(ir, op, dst, src0, src1);
 457
 458    /* If we have to do relative addressing, we want to load the ARL
 459     * reg directly for one of the regs, and preload the other reladdr
 460     * sources into temps.
 461     */
 462    num_reladdr += dst.reladdr != NULL;
 463    num_reladdr += src0.reladdr != NULL;
 464    num_reladdr += src1.reladdr != NULL;
 465    num_reladdr += src2.reladdr != NULL;
 466
 467    reladdr_to_temp(ir, &src2, &num_reladdr);
 468    reladdr_to_temp(ir, &src1, &num_reladdr);
 469    reladdr_to_temp(ir, &src0, &num_reladdr);
 470
 471    if (dst.reladdr) {
 472       emit_arl(ir, address_reg, *dst.reladdr);
 473       num_reladdr--;
 474    }
 475    assert(num_reladdr == 0);
 476
 477    inst->op = op;
 478    inst->dst = dst;
 479    inst->src[0] = src0;
 480    inst->src[1] = src1;
 481    inst->src[2] = src2;
 482    inst->ir = ir;
 483
 484    inst->function = NULL;
 485
 486    if (op == TGSI_OPCODE_ARL)
 487       this->num_address_regs = 1;
 488
 489    /* Update indirect addressing status used by TGSI */
 490    if (dst.reladdr) {
 491       switch(dst.file) {
 492       case PROGRAM_TEMPORARY:
 493          this->indirect_addr_temps = true;
 494          break;
 495       case PROGRAM_LOCAL_PARAM:
 496       case PROGRAM_ENV_PARAM:
 497       case PROGRAM_STATE_VAR:
 498       case PROGRAM_NAMED_PARAM:
 499       case PROGRAM_CONSTANT:
 500       case PROGRAM_UNIFORM:
 501          this->indirect_addr_consts = true;
 502          break;
 503       default:
 504          break;
 505       }
 506    }
 507    else {
 508       for (i=0; i<3; i++) {
 509          if(inst->src[i].reladdr) {
 510             switch(inst->src[i].file) {
 511             case PROGRAM_TEMPORARY:
 512                this->indirect_addr_temps = true;
 513                break;
 514             case PROGRAM_LOCAL_PARAM:
 515             case PROGRAM_ENV_PARAM:
 516             case PROGRAM_STATE_VAR:
 517             case PROGRAM_NAMED_PARAM:
 518             case PROGRAM_CONSTANT:
 519             case PROGRAM_UNIFORM:
 520                this->indirect_addr_consts = true;
 521                break;
 522             default:
 523                break;
 524             }
 525          }
 526       }
 527    }
 528
 529    this->instructions.push_tail(inst);
 530
 531    return inst;
 532 }
 533
 534
 535 glsl_to_tgsi_instruction *
 536 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 537                          st_dst_reg dst, st_src_reg src0, st_src_reg src1)
 538 {
 539    return emit(ir, op, dst, src0, src1, undef_src);
 540 }
 541
 542 glsl_to_tgsi_instruction *
 543 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 544                          st_dst_reg dst, st_src_reg src0)
 545 {
 546    assert(dst.writemask != 0);
 547    return emit(ir, op, dst, src0, undef_src, undef_src);
 548 }
 549
 550 glsl_to_tgsi_instruction *
 551 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
 552 {
 553    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 554 }
 555
 556 /**
 557  * Determines whether to use an integer, unsigned integer, or float opcode
 558  * based on the operands and input opcode, then emits the result.
 559  *
 560  * TODO: type checking for remaining TGSI opcodes
 561  */
 562 unsigned
 563 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
 564                          st_dst_reg dst,
 565                          st_src_reg src0, st_src_reg src1)
 566 {
 567    int type = GLSL_TYPE_FLOAT;
 568
 569    if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
 570       type = GLSL_TYPE_FLOAT;
 571    else if (glsl_version >= 130)
 572       type = src0.type;
 573
 574 #define case4(c, f, i, u) \
 575    case TGSI_OPCODE_##c: \
 576       if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
 577       else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
 578       else op = TGSI_OPCODE_##f; \
 579       break;
 580 #define case3(f, i, u)  case4(f, f, i, u)
 581 #define case2fi(f, i)   case4(f, f, i, i)
 582 #define case2iu(i, u)   case4(i, LAST, i, u)
 583
 584    switch(op) {
 585       case2fi(ADD, UADD);
 586       case2fi(MUL, UMUL);
 587       case2fi(MAD, UMAD);
 588       case3(DIV, IDIV, UDIV);
 589       case3(MAX, IMAX, UMAX);
 590       case3(MIN, IMIN, UMIN);
 591       case2iu(MOD, UMOD);
 592
 593       case2fi(SEQ, USEQ);
 594       case2fi(SNE, USNE);
 595       case3(SGE, ISGE, USGE);
 596       case3(SLT, ISLT, USLT);
 597
 598       case2iu(SHL, SHL);
 599       case2iu(ISHR, USHR);
 600       case2iu(NOT, NOT);
 601       case2iu(AND, AND);
 602       case2iu(OR, OR);
 603       case2iu(XOR, XOR);
 604
 605       default: break;
 606    }
 607
 608    assert(op != TGSI_OPCODE_LAST);
 609    return op;
 610 }
 611
 612 void
 613 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
 614                             st_dst_reg dst, st_src_reg src0, st_src_reg src1,
 615                             unsigned elements)
 616 {
 617    static const unsigned dot_opcodes[] = {
 618       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
 619    };
 620
 621    emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 622 }
 623
 624 /**
 625  * Emits TGSI scalar opcodes to produce unique answers across channels.
 626  *
 627  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
 628  * channel determines the result across all channels.  So to do a vec4
 629  * of this operation, we want to emit a scalar per source channel used
 630  * to produce dest channels.
 631  */
 632 void
 633 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 634                                 st_dst_reg dst,
 635                                 st_src_reg orig_src0, st_src_reg orig_src1)
 636 {
 637    int i, j;
 638    int done_mask = ~dst.writemask;
 639
 640    /* TGSI RCP is a scalar operation splatting results to all channels,
 641     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 642     * dst channels.
 643     */
 644    for (i = 0; i < 4; i++) {
 645       GLuint this_mask = (1 << i);
 646       glsl_to_tgsi_instruction *inst;
 647       st_src_reg src0 = orig_src0;
 648       st_src_reg src1 = orig_src1;
 649
 650       if (done_mask & this_mask)
 651          continue;
 652
 653       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 654       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 655       for (j = i + 1; j < 4; j++) {
 656          /* If there is another enabled component in the destination that is
 657           * derived from the same inputs, generate its value on this pass as
 658           * well.
 659           */
 660          if (!(done_mask & (1 << j)) &&
 661              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 662              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 663             this_mask |= (1 << j);
 664          }
 665       }
 666       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 667                                    src0_swiz, src0_swiz);
 668       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 669                                   src1_swiz, src1_swiz);
 670
 671       inst = emit(ir, op, dst, src0, src1);
 672       inst->dst.writemask = this_mask;
 673       done_mask |= this_mask;
 674    }
 675 }
 676
 677 void
 678 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 679                                 st_dst_reg dst, st_src_reg src0)
 680 {
 681    st_src_reg undef = undef_src;
 682
 683    undef.swizzle = SWIZZLE_XXXX;
 684
 685    emit_scalar(ir, op, dst, src0, undef);
 686 }
 687
 688 void
 689 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
 690                                 st_dst_reg dst, st_src_reg src0)
 691 {
 692    st_src_reg tmp = get_temp(glsl_type::float_type);
 693
 694    if (src0.type == GLSL_TYPE_INT)
 695       emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
 696    else if (src0.type == GLSL_TYPE_UINT)
 697       emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
 698    else
 699       tmp = src0;
 700
 701    emit(ir, TGSI_OPCODE_ARL, dst, tmp);
 702 }
 703
 704 /**
 705  * Emit an TGSI_OPCODE_SCS instruction
 706  *
 707  * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
 708  * Instead of splatting its result across all four components of the
 709  * destination, it writes one value to the \c x component and another value to
 710  * the \c y component.
 711  *
 712  * \param ir        IR instruction being processed
 713  * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
 714  *                  on which value is desired.
 715  * \param dst       Destination register
 716  * \param src       Source register
 717  */
 718 void
 719 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 720                              st_dst_reg dst,
 721                              const st_src_reg &src)
 722 {
 723    /* Vertex programs cannot use the SCS opcode.
 724     */
 725    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
 726       emit_scalar(ir, op, dst, src);
 727       return;
 728    }
 729
 730    const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
 731    const unsigned scs_mask = (1U << component);
 732    int done_mask = ~dst.writemask;
 733    st_src_reg tmp;
 734
 735    assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
 736
 737    /* If there are compnents in the destination that differ from the component
 738     * that will be written by the SCS instrution, we'll need a temporary.
 739     */
 740    if (scs_mask != unsigned(dst.writemask)) {
 741       tmp = get_temp(glsl_type::vec4_type);
 742    }
 743
 744    for (unsigned i = 0; i < 4; i++) {
 745       unsigned this_mask = (1U << i);
 746       st_src_reg src0 = src;
 747
 748       if ((done_mask & this_mask) != 0)
 749          continue;
 750
 751       /* The source swizzle specified which component of the source generates
 752        * sine / cosine for the current component in the destination.  The SCS
 753        * instruction requires that this value be swizzle to the X component.
 754        * Replace the current swizzle with a swizzle that puts the source in
 755        * the X component.
 756        */
 757       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
 758
 759       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 760                                    src0_swiz, src0_swiz);
 761       for (unsigned j = i + 1; j < 4; j++) {
 762          /* If there is another enabled component in the destination that is
 763           * derived from the same inputs, generate its value on this pass as
 764           * well.
 765           */
 766          if (!(done_mask & (1 << j)) &&
 767              GET_SWZ(src0.swizzle, j) == src0_swiz) {
 768             this_mask |= (1 << j);
 769          }
 770       }
 771
 772       if (this_mask != scs_mask) {
 773          glsl_to_tgsi_instruction *inst;
 774          st_dst_reg tmp_dst = st_dst_reg(tmp);
 775
 776          /* Emit the SCS instruction.
 777           */
 778          inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
 779          inst->dst.writemask = scs_mask;
 780
 781          /* Move the result of the SCS instruction to the desired location in
 782           * the destination.
 783           */
 784          tmp.swizzle = MAKE_SWIZZLE4(component, component,
 785                                      component, component);
 786          inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
 787          inst->dst.writemask = this_mask;
 788       } else {
 789          /* Emit the SCS instruction to write directly to the destination.
 790           */
 791          glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
 792          inst->dst.writemask = scs_mask;
 793       }
 794
 795       done_mask |= this_mask;
 796    }
 797 }
 798
 799 struct st_src_reg
 800 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 801 {
 802    st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
 803    union gl_constant_value uval;
 804
 805    uval.f = val;
 806    src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
 807                                           &uval, 1, GL_FLOAT, &src.swizzle);
 808
 809    return src;
 810 }
 811
 812 struct st_src_reg
 813 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 814 {
 815    st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
 816    union gl_constant_value uval;
 817
 818    assert(glsl_version >= 130);
 819
 820    uval.i = val;
 821    src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
 822                                           &uval, 1, GL_INT, &src.swizzle);
 823
 824    return src;
 825 }
 826
 827 struct st_src_reg
 828 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 829 {
 830    if (glsl_version >= 130)
 831       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
 832                                        st_src_reg_for_int(val);
 833    else
 834       return st_src_reg_for_float(val);
 835 }
 836
 837 static int
 838 type_size(const struct glsl_type *type)
 839 {
 840    unsigned int i;
 841    int size;
 842
 843    switch (type->base_type) {
 844    case GLSL_TYPE_UINT:
 845    case GLSL_TYPE_INT:
 846    case GLSL_TYPE_FLOAT:
 847    case GLSL_TYPE_BOOL:
 848       if (type->is_matrix()) {
 849          return type->matrix_columns;
 850       } else {
 851          /* Regardless of size of vector, it gets a vec4. This is bad
 852           * packing for things like floats, but otherwise arrays become a
 853           * mess.  Hopefully a later pass over the code can pack scalars
 854           * down if appropriate.
 855           */
 856          return 1;
 857       }
 858    case GLSL_TYPE_ARRAY:
 859       assert(type->length > 0);
 860       return type_size(type->fields.array) * type->length;
 861    case GLSL_TYPE_STRUCT:
 862       size = 0;
 863       for (i = 0; i < type->length; i++) {
 864          size += type_size(type->fields.structure[i].type);
 865       }
 866       return size;
 867    case GLSL_TYPE_SAMPLER:
 868       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 869        * at link time.
 870        */
 871       return 1;
 872    default:
 873       assert(0);
 874       return 0;
 875    }
 876 }
 877
 878 /**
 879  * In the initial pass of codegen, we assign temporary numbers to
 880  * intermediate results.  (not SSA -- variable assignments will reuse
 881  * storage).
 882  */
 883 st_src_reg
 884 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 885 {
 886    st_src_reg src;
 887    int swizzle[4];
 888    int i;
 889
 890    src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
 891    src.file = PROGRAM_TEMPORARY;
 892    src.index = next_temp;
 893    src.reladdr = NULL;
 894    next_temp += type_size(type);
 895
 896    if (type->is_array() || type->is_record()) {
 897       src.swizzle = SWIZZLE_NOOP;
 898    } else {
 899       for (i = 0; i < type->vector_elements; i++)
 900          swizzle[i] = i;
 901       for (; i < 4; i++)
 902          swizzle[i] = type->vector_elements - 1;
 903       src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
 904                                   swizzle[2], swizzle[3]);
 905    }
 906    src.negate = 0;
 907
 908    return src;
 909 }
 910
 911 variable_storage *
 912 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
 913 {
 914
 915    variable_storage *entry;
 916
 917    foreach_iter(exec_list_iterator, iter, this->variables) {
 918       entry = (variable_storage *)iter.get();
 919
 920       if (entry->var == var)
 921          return entry;
 922    }
 923
 924    return NULL;
 925 }
 926
 927 void
 928 glsl_to_tgsi_visitor::visit(ir_variable *ir)
 929 {
 930    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 931       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 932
 933       fp->OriginUpperLeft = ir->origin_upper_left;
 934       fp->PixelCenterInteger = ir->pixel_center_integer;
 935
 936    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
 937       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 938       switch (ir->depth_layout) {
 939       case ir_depth_layout_none:
 940          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
 941          break;
 942       case ir_depth_layout_any:
 943          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
 944          break;
 945       case ir_depth_layout_greater:
 946          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
 947          break;
 948       case ir_depth_layout_less:
 949          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
 950          break;
 951       case ir_depth_layout_unchanged:
 952          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
 953          break;
 954       default:
 955          assert(0);
 956          break;
 957       }
 958    }
 959
 960    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
 961       unsigned int i;
 962       const ir_state_slot *const slots = ir->state_slots;
 963       assert(ir->state_slots != NULL);
 964
 965       /* Check if this statevar's setup in the STATE file exactly
 966        * matches how we'll want to reference it as a
 967        * struct/array/whatever.  If not, then we need to move it into
 968        * temporary storage and hope that it'll get copy-propagated
 969        * out.
 970        */
 971       for (i = 0; i < ir->num_state_slots; i++) {
 972          if (slots[i].swizzle != SWIZZLE_XYZW) {
 973             break;
 974          }
 975       }
 976
 977       struct variable_storage *storage;
 978       st_dst_reg dst;
 979       if (i == ir->num_state_slots) {
 980          /* We'll set the index later. */
 981          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
 982          this->variables.push_tail(storage);
 983
 984          dst = undef_dst;
 985       } else {
 986          /* The variable_storage constructor allocates slots based on the size
 987           * of the type.  However, this had better match the number of state
 988           * elements that we're going to copy into the new temporary.
 989           */
 990          assert((int) ir->num_state_slots == type_size(ir->type));
 991
 992          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
 993                                                  this->next_temp);
 994          this->variables.push_tail(storage);
 995          this->next_temp += type_size(ir->type);
 996
 997          dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
 998                glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
 999       }
1000
1001
1002       for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1003          int index = _mesa_add_state_reference(this->prog->Parameters,
1004                                                (gl_state_index *)slots[i].tokens);
1005
1006          if (storage->file == PROGRAM_STATE_VAR) {
1007             if (storage->index == -1) {
1008                storage->index = index;
1009             } else {
1010                assert(index == storage->index + (int)i);
1011             }
1012          } else {
1013             st_src_reg src(PROGRAM_STATE_VAR, index,
1014                   glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
1015             src.swizzle = slots[i].swizzle;
1016             emit(ir, TGSI_OPCODE_MOV, dst, src);
1017             /* even a float takes up a whole vec4 reg in a struct/array. */
1018             dst.index++;
1019          }
1020       }
1021
1022       if (storage->file == PROGRAM_TEMPORARY &&
1023           dst.index != storage->index + (int) ir->num_state_slots) {
1024          fail_link(this->shader_program,
1025                    "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1026                    ir->name, dst.index - storage->index,
1027                    type_size(ir->type));
1028       }
1029    }
1030 }
1031
1032 void
1033 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1034 {
1035    ir_dereference_variable *counter = NULL;
1036
1037    if (ir->counter != NULL)
1038       counter = new(ir) ir_dereference_variable(ir->counter);
1039
1040    if (ir->from != NULL) {
1041       assert(ir->counter != NULL);
1042
1043       ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1044
1045       a->accept(this);
1046       delete a;
1047    }
1048
1049    emit(NULL, TGSI_OPCODE_BGNLOOP);
1050
1051    if (ir->to) {
1052       ir_expression *e =
1053          new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1054                                counter, ir->to);
1055       ir_if *if_stmt =  new(ir) ir_if(e);
1056
1057       ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1058
1059       if_stmt->then_instructions.push_tail(brk);
1060
1061       if_stmt->accept(this);
1062
1063       delete if_stmt;
1064       delete e;
1065       delete brk;
1066    }
1067
1068    visit_exec_list(&ir->body_instructions, this);
1069
1070    if (ir->increment) {
1071       ir_expression *e =
1072          new(ir) ir_expression(ir_binop_add, counter->type,
1073                                counter, ir->increment);
1074
1075       ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1076
1077       a->accept(this);
1078       delete a;
1079       delete e;
1080    }
1081
1082    emit(NULL, TGSI_OPCODE_ENDLOOP);
1083 }
1084
1085 void
1086 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1087 {
1088    switch (ir->mode) {
1089    case ir_loop_jump::jump_break:
1090       emit(NULL, TGSI_OPCODE_BRK);
1091       break;
1092    case ir_loop_jump::jump_continue:
1093       emit(NULL, TGSI_OPCODE_CONT);
1094       break;
1095    }
1096 }
1097
1098
1099 void
1100 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1101 {
1102    assert(0);
1103    (void)ir;
1104 }
1105
1106 void
1107 glsl_to_tgsi_visitor::visit(ir_function *ir)
1108 {
1109    /* Ignore function bodies other than main() -- we shouldn't see calls to
1110     * them since they should all be inlined before we get to glsl_to_tgsi.
1111     */
1112    if (strcmp(ir->name, "main") == 0) {
1113       const ir_function_signature *sig;
1114       exec_list empty;
1115
1116       sig = ir->matching_signature(&empty);
1117
1118       assert(sig);
1119
1120       foreach_iter(exec_list_iterator, iter, sig->body) {
1121          ir_instruction *ir = (ir_instruction *)iter.get();
1122
1123          ir->accept(this);
1124       }
1125    }
1126 }
1127
1128 GLboolean
1129 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1130 {
1131    int nonmul_operand = 1 - mul_operand;
1132    st_src_reg a, b, c;
1133
1134    ir_expression *expr = ir->operands[mul_operand]->as_expression();
1135    if (!expr || expr->operation != ir_binop_mul)
1136       return false;
1137
1138    expr->operands[0]->accept(this);
1139    a = this->result;
1140    expr->operands[1]->accept(this);
1141    b = this->result;
1142    ir->operands[nonmul_operand]->accept(this);
1143    c = this->result;
1144
1145    this->result = get_temp(ir->type);
1146    emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
1147
1148    return true;
1149 }
1150
1151 GLboolean
1152 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1153 {
1154    /* Saturates were only introduced to vertex programs in
1155     * NV_vertex_program3, so don't give them to drivers in the VP.
1156     */
1157    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1158       return false;
1159
1160    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1161    if (!sat_src)
1162       return false;
1163
1164    sat_src->accept(this);
1165    st_src_reg src = this->result;
1166
1167    this->result = get_temp(ir->type);
1168    glsl_to_tgsi_instruction *inst;
1169    inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
1170    inst->saturate = true;
1171
1172    return true;
1173 }
1174
1175 void
1176 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1177                                     st_src_reg *reg, int *num_reladdr)
1178 {
1179    if (!reg->reladdr)
1180       return;
1181
1182    emit_arl(ir, address_reg, *reg->reladdr);
1183
1184    if (*num_reladdr != 1) {
1185       st_src_reg temp = get_temp(glsl_type::vec4_type);
1186
1187       emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1188       *reg = temp;
1189    }
1190
1191    (*num_reladdr)--;
1192 }
1193
1194 void
1195 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1196 {
1197    unsigned int operand;
1198    st_src_reg op[Elements(ir->operands)];
1199    st_src_reg result_src;
1200    st_dst_reg result_dst;
1201
1202    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1203     */
1204    if (ir->operation == ir_binop_add) {
1205       if (try_emit_mad(ir, 1))
1206          return;
1207       if (try_emit_mad(ir, 0))
1208          return;
1209    }
1210    if (try_emit_sat(ir))
1211       return;
1212
1213    if (ir->operation == ir_quadop_vector)
1214       assert(!"ir_quadop_vector should have been lowered");
1215
1216    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1217       this->result.file = PROGRAM_UNDEFINED;
1218       ir->operands[operand]->accept(this);
1219       if (this->result.file == PROGRAM_UNDEFINED) {
1220          ir_print_visitor v;
1221          printf("Failed to get tree for expression operand:\n");
1222          ir->operands[operand]->accept(&v);
1223          exit(1);
1224       }
1225       op[operand] = this->result;
1226
1227       /* Matrix expression operands should have been broken down to vector
1228        * operations already.
1229        */
1230       assert(!ir->operands[operand]->type->is_matrix());
1231    }
1232
1233    int vector_elements = ir->operands[0]->type->vector_elements;
1234    if (ir->operands[1]) {
1235       vector_elements = MAX2(vector_elements,
1236                              ir->operands[1]->type->vector_elements);
1237    }
1238
1239    this->result.file = PROGRAM_UNDEFINED;
1240
1241    /* Storage for our result.  Ideally for an assignment we'd be using
1242     * the actual storage for the result here, instead.
1243     */
1244    result_src = get_temp(ir->type);
1245    /* convenience for the emit functions below. */
1246    result_dst = st_dst_reg(result_src);
1247    /* Limit writes to the channels that will be used by result_src later.
1248     * This does limit this temp's use as a temporary for multi-instruction
1249     * sequences.
1250     */
1251    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1252
1253    switch (ir->operation) {
1254    case ir_unop_logic_not:
1255       emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
1256       break;
1257    case ir_unop_neg:
1258       assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
1259       if (result_dst.type == GLSL_TYPE_INT)
1260          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1261       else {
1262          op[0].negate = ~op[0].negate;
1263          result_src = op[0];
1264       }
1265       break;
1266    case ir_unop_abs:
1267       assert(result_dst.type == GLSL_TYPE_FLOAT);
1268       emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1269       break;
1270    case ir_unop_sign:
1271       emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1272       break;
1273    case ir_unop_rcp:
1274       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1275       break;
1276
1277    case ir_unop_exp2:
1278       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1279       break;
1280    case ir_unop_exp:
1281    case ir_unop_log:
1282       assert(!"not reached: should be handled by ir_explog_to_explog2");
1283       break;
1284    case ir_unop_log2:
1285       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1286       break;
1287    case ir_unop_sin:
1288       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1289       break;
1290    case ir_unop_cos:
1291       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1292       break;
1293    case ir_unop_sin_reduced:
1294       emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1295       break;
1296    case ir_unop_cos_reduced:
1297       emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1298       break;
1299
1300    case ir_unop_dFdx:
1301       emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1302       break;
1303    case ir_unop_dFdy:
1304       op[0].negate = ~op[0].negate;
1305       emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1306       break;
1307
1308    case ir_unop_noise: {
1309       /* At some point, a motivated person could add a better
1310        * implementation of noise.  Currently not even the nvidia
1311        * binary drivers do anything more than this.  In any case, the
1312        * place to do this is in the GL state tracker, not the poor
1313        * driver.
1314        */
1315       emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1316       break;
1317    }
1318
1319    case ir_binop_add:
1320       emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1321       break;
1322    case ir_binop_sub:
1323       emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1324       break;
1325
1326    case ir_binop_mul:
1327       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1328       break;
1329    case ir_binop_div:
1330       if (result_dst.type == GLSL_TYPE_FLOAT)
1331          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1332       else
1333          emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1334       break;
1335    case ir_binop_mod:
1336       if (result_dst.type == GLSL_TYPE_FLOAT)
1337          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1338       else
1339          emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1340       break;
1341
1342    case ir_binop_less:
1343       emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1344       break;
1345    case ir_binop_greater:
1346       emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
1347       break;
1348    case ir_binop_lequal:
1349       emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
1350       break;
1351    case ir_binop_gequal:
1352       emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1353       break;
1354    case ir_binop_equal:
1355       emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1356       break;
1357    case ir_binop_nequal:
1358       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1359       break;
1360    case ir_binop_all_equal:
1361       /* "==" operator producing a scalar boolean. */
1362       if (ir->operands[0]->type->is_vector() ||
1363           ir->operands[1]->type->is_vector()) {
1364          st_src_reg temp = get_temp(glsl_version >= 130 ?
1365                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1366                glsl_type::vec4_type);
1367          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1368          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1369          emit_dp(ir, result_dst, temp, temp, vector_elements);
1370          emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
1371       } else {
1372          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1373       }
1374       break;
1375    case ir_binop_any_nequal:
1376       /* "!=" operator producing a scalar boolean. */
1377       if (ir->operands[0]->type->is_vector() ||
1378           ir->operands[1]->type->is_vector()) {
1379          st_src_reg temp = get_temp(glsl_version >= 130 ?
1380                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1381                glsl_type::vec4_type);
1382          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1383          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1384          emit_dp(ir, result_dst, temp, temp, vector_elements);
1385          emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1386       } else {
1387          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1388       }
1389       break;
1390
1391    case ir_unop_any:
1392       assert(ir->operands[0]->type->is_vector());
1393       emit_dp(ir, result_dst, op[0], op[0],
1394               ir->operands[0]->type->vector_elements);
1395       emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1396       break;
1397
1398    case ir_binop_logic_xor:
1399       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1400       break;
1401
1402    case ir_binop_logic_or:
1403       /* This could be a saturated add and skip the SNE. */
1404       emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1405       emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1406       break;
1407
1408    case ir_binop_logic_and:
1409       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1410       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1411       break;
1412
1413    case ir_binop_dot:
1414       assert(ir->operands[0]->type->is_vector());
1415       assert(ir->operands[0]->type == ir->operands[1]->type);
1416       emit_dp(ir, result_dst, op[0], op[1],
1417               ir->operands[0]->type->vector_elements);
1418       break;
1419
1420    case ir_unop_sqrt:
1421       /* sqrt(x) = x * rsq(x). */
1422       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1423       emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1424       /* For incoming channels <= 0, set the result to 0. */
1425       op[0].negate = ~op[0].negate;
1426       emit(ir, TGSI_OPCODE_CMP, result_dst,
1427                           op[0], result_src, st_src_reg_for_float(0.0));
1428       break;
1429    case ir_unop_rsq:
1430       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1431       break;
1432    case ir_unop_i2f:
1433    case ir_unop_b2f:
1434       if (glsl_version >= 130) {
1435          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1436          break;
1437       }
1438    case ir_unop_b2i:
1439       /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
1440       result_src = op[0];
1441       break;
1442    case ir_unop_f2i:
1443       if (glsl_version >= 130)
1444          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1445       else
1446          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1447       break;
1448    case ir_unop_f2b:
1449    case ir_unop_i2b:
1450       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0],
1451             st_src_reg_for_type(result_dst.type, 0));
1452       break;
1453    case ir_unop_trunc:
1454       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1455       break;
1456    case ir_unop_ceil:
1457       op[0].negate = ~op[0].negate;
1458       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1459       result_src.negate = ~result_src.negate;
1460       break;
1461    case ir_unop_floor:
1462       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1463       break;
1464    case ir_unop_fract:
1465       emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1466       break;
1467
1468    case ir_binop_min:
1469       emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1470       break;
1471    case ir_binop_max:
1472       emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1473       break;
1474    case ir_binop_pow:
1475       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1476       break;
1477
1478    case ir_unop_bit_not:
1479       if (glsl_version >= 130) {
1480          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1481          break;
1482       }
1483    case ir_unop_u2f:
1484       if (glsl_version >= 130) {
1485          emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1486          break;
1487       }
1488    case ir_binop_lshift:
1489       if (glsl_version >= 130) {
1490          emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
1491          break;
1492       }
1493    case ir_binop_rshift:
1494       if (glsl_version >= 130) {
1495          emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
1496          break;
1497       }
1498    case ir_binop_bit_and:
1499       if (glsl_version >= 130) {
1500          emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
1501          break;
1502       }
1503    case ir_binop_bit_xor:
1504       if (glsl_version >= 130) {
1505          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
1506          break;
1507       }
1508    case ir_binop_bit_or:
1509       if (glsl_version >= 130) {
1510          emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
1511          break;
1512       }
1513    case ir_unop_round_even:
1514       assert(!"GLSL 1.30 features unsupported");
1515       break;
1516
1517    case ir_quadop_vector:
1518       /* This operation should have already been handled.
1519        */
1520       assert(!"Should not get here.");
1521       break;
1522    }
1523
1524    this->result = result_src;
1525 }
1526
1527
1528 void
1529 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1530 {
1531    st_src_reg src;
1532    int i;
1533    int swizzle[4];
1534
1535    /* Note that this is only swizzles in expressions, not those on the left
1536     * hand side of an assignment, which do write masking.  See ir_assignment
1537     * for that.
1538     */
1539
1540    ir->val->accept(this);
1541    src = this->result;
1542    assert(src.file != PROGRAM_UNDEFINED);
1543
1544    for (i = 0; i < 4; i++) {
1545       if (i < ir->type->vector_elements) {
1546          switch (i) {
1547          case 0:
1548             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1549             break;
1550          case 1:
1551             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1552             break;
1553          case 2:
1554             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1555             break;
1556          case 3:
1557             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1558             break;
1559          }
1560       } else {
1561          /* If the type is smaller than a vec4, replicate the last
1562           * channel out.
1563           */
1564          swizzle[i] = swizzle[ir->type->vector_elements - 1];
1565       }
1566    }
1567
1568    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1569
1570    this->result = src;
1571 }
1572
1573 void
1574 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1575 {
1576    variable_storage *entry = find_variable_storage(ir->var);
1577    ir_variable *var = ir->var;
1578
1579    if (!entry) {
1580       switch (var->mode) {
1581       case ir_var_uniform:
1582          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1583                                                var->location);
1584          this->variables.push_tail(entry);
1585          break;
1586       case ir_var_in:
1587       case ir_var_inout:
1588          /* The linker assigns locations for varyings and attributes,
1589           * including deprecated builtins (like gl_Color), user-assign
1590           * generic attributes (glBindVertexLocation), and
1591           * user-defined varyings.
1592           *
1593           * FINISHME: We would hit this path for function arguments.  Fix!
1594           */
1595          assert(var->location != -1);
1596          entry = new(mem_ctx) variable_storage(var,
1597                                                PROGRAM_INPUT,
1598                                                var->location);
1599          if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1600              var->location >= VERT_ATTRIB_GENERIC0) {
1601             _mesa_add_attribute(this->prog->Attributes,
1602                                 var->name,
1603                                 _mesa_sizeof_glsl_type(var->type->gl_type),
1604                                 var->type->gl_type,
1605                                 var->location - VERT_ATTRIB_GENERIC0);
1606          }
1607          break;
1608       case ir_var_out:
1609          assert(var->location != -1);
1610          entry = new(mem_ctx) variable_storage(var,
1611                                                PROGRAM_OUTPUT,
1612                                                var->location);
1613          break;
1614       case ir_var_system_value:
1615          entry = new(mem_ctx) variable_storage(var,
1616                                                PROGRAM_SYSTEM_VALUE,
1617                                                var->location);
1618          break;
1619       case ir_var_auto:
1620       case ir_var_temporary:
1621          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1622                                                this->next_temp);
1623          this->variables.push_tail(entry);
1624
1625          next_temp += type_size(var->type);
1626          break;
1627       }
1628
1629       if (!entry) {
1630          printf("Failed to make storage for %s\n", var->name);
1631          exit(1);
1632       }
1633    }
1634
1635    this->result = st_src_reg(entry->file, entry->index, var->type);
1636    if (glsl_version <= 120)
1637       this->result.type = GLSL_TYPE_FLOAT;
1638 }
1639
1640 void
1641 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1642 {
1643    ir_constant *index;
1644    st_src_reg src;
1645    int element_size = type_size(ir->type);
1646
1647    index = ir->array_index->constant_expression_value();
1648
1649    ir->array->accept(this);
1650    src = this->result;
1651
1652    if (index) {
1653       src.index += index->value.i[0] * element_size;
1654    } else {
1655       st_src_reg array_base = this->result;
1656       /* Variable index array dereference.  It eats the "vec4" of the
1657        * base of the array and an index that offsets the Mesa register
1658        * index.
1659        */
1660       ir->array_index->accept(this);
1661
1662       st_src_reg index_reg;
1663
1664       if (element_size == 1) {
1665          index_reg = this->result;
1666       } else {
1667          index_reg = get_temp(glsl_type::float_type);
1668
1669          emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1670               this->result, st_src_reg_for_float(element_size));
1671       }
1672
1673       src.reladdr = ralloc(mem_ctx, st_src_reg);
1674       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1675    }
1676
1677    /* If the type is smaller than a vec4, replicate the last channel out. */
1678    if (ir->type->is_scalar() || ir->type->is_vector())
1679       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1680    else
1681       src.swizzle = SWIZZLE_NOOP;
1682
1683    this->result = src;
1684 }
1685
1686 void
1687 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
1688 {
1689    unsigned int i;
1690    const glsl_type *struct_type = ir->record->type;
1691    int offset = 0;
1692
1693    ir->record->accept(this);
1694
1695    for (i = 0; i < struct_type->length; i++) {
1696       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1697          break;
1698       offset += type_size(struct_type->fields.structure[i].type);
1699    }
1700
1701    /* If the type is smaller than a vec4, replicate the last channel out. */
1702    if (ir->type->is_scalar() || ir->type->is_vector())
1703       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1704    else
1705       this->result.swizzle = SWIZZLE_NOOP;
1706
1707    this->result.index += offset;
1708 }
1709
1710 /**
1711  * We want to be careful in assignment setup to hit the actual storage
1712  * instead of potentially using a temporary like we might with the
1713  * ir_dereference handler.
1714  */
1715 static st_dst_reg
1716 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
1717 {
1718    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1719     * access of a vector, it must be separated into a series conditional moves
1720     * before reaching this point (see ir_vec_index_to_cond_assign).
1721     */
1722    assert(ir->as_dereference());
1723    ir_dereference_array *deref_array = ir->as_dereference_array();
1724    if (deref_array) {
1725       assert(!deref_array->array->type->is_vector());
1726    }
1727
1728    /* Use the rvalue deref handler for the most part.  We'll ignore
1729     * swizzles in it and write swizzles using writemask, though.
1730     */
1731    ir->accept(v);
1732    return st_dst_reg(v->result);
1733 }
1734
1735 /**
1736  * Process the condition of a conditional assignment
1737  *
1738  * Examines the condition of a conditional assignment to generate the optimal
1739  * first operand of a \c CMP instruction.  If the condition is a relational
1740  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1741  * used as the source for the \c CMP instruction.  Otherwise the comparison
1742  * is processed to a boolean result, and the boolean result is used as the
1743  * operand to the CMP instruction.
1744  */
1745 bool
1746 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
1747 {
1748    ir_rvalue *src_ir = ir;
1749    bool negate = true;
1750    bool switch_order = false;
1751
1752    ir_expression *const expr = ir->as_expression();
1753    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1754       bool zero_on_left = false;
1755
1756       if (expr->operands[0]->is_zero()) {
1757          src_ir = expr->operands[1];
1758          zero_on_left = true;
1759       } else if (expr->operands[1]->is_zero()) {
1760          src_ir = expr->operands[0];
1761          zero_on_left = false;
1762       }
1763
1764       /*      a is -  0  +            -  0  +
1765        * (a <  0)  T  F  F  ( a < 0)  T  F  F
1766        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1767        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1768        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1769        * (a >  0)  F  F  T  (-a < 0)  F  F  T
1770        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
1771        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1772        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1773        *
1774        * Note that exchanging the order of 0 and 'a' in the comparison simply
1775        * means that the value of 'a' should be negated.
1776        */
1777       if (src_ir != ir) {
1778          switch (expr->operation) {
1779          case ir_binop_less:
1780             switch_order = false;
1781             negate = zero_on_left;
1782             break;
1783
1784          case ir_binop_greater:
1785             switch_order = false;
1786             negate = !zero_on_left;
1787             break;
1788
1789          case ir_binop_lequal:
1790             switch_order = true;
1791             negate = !zero_on_left;
1792             break;
1793
1794          case ir_binop_gequal:
1795             switch_order = true;
1796             negate = zero_on_left;
1797             break;
1798
1799          default:
1800             /* This isn't the right kind of comparison afterall, so make sure
1801              * the whole condition is visited.
1802              */
1803             src_ir = ir;
1804             break;
1805          }
1806       }
1807    }
1808
1809    src_ir->accept(this);
1810
1811    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1812     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1813     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
1814     * computing the condition.
1815     */
1816    if (negate)
1817       this->result.negate = ~this->result.negate;
1818
1819    return switch_order;
1820 }
1821
1822 void
1823 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
1824 {
1825    st_dst_reg l;
1826    st_src_reg r;
1827    int i;
1828
1829    ir->rhs->accept(this);
1830    r = this->result;
1831
1832    l = get_assignment_lhs(ir->lhs, this);
1833
1834    /* FINISHME: This should really set to the correct maximal writemask for each
1835     * FINISHME: component written (in the loops below).  This case can only
1836     * FINISHME: occur for matrices, arrays, and structures.
1837     */
1838    if (ir->write_mask == 0) {
1839       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1840       l.writemask = WRITEMASK_XYZW;
1841    } else if (ir->lhs->type->is_scalar()) {
1842       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1843        * FINISHME: W component of fragment shader output zero, work correctly.
1844        */
1845       l.writemask = WRITEMASK_XYZW;
1846    } else {
1847       int swizzles[4];
1848       int first_enabled_chan = 0;
1849       int rhs_chan = 0;
1850
1851       assert(ir->lhs->type->is_vector());
1852       l.writemask = ir->write_mask;
1853
1854       for (int i = 0; i < 4; i++) {
1855          if (l.writemask & (1 << i)) {
1856             first_enabled_chan = GET_SWZ(r.swizzle, i);
1857             break;
1858          }
1859       }
1860
1861       /* Swizzle a small RHS vector into the channels being written.
1862        *
1863        * glsl ir treats write_mask as dictating how many channels are
1864        * present on the RHS while Mesa IR treats write_mask as just
1865        * showing which channels of the vec4 RHS get written.
1866        */
1867       for (int i = 0; i < 4; i++) {
1868          if (l.writemask & (1 << i))
1869             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1870          else
1871             swizzles[i] = first_enabled_chan;
1872       }
1873       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1874                                 swizzles[2], swizzles[3]);
1875    }
1876
1877    assert(l.file != PROGRAM_UNDEFINED);
1878    assert(r.file != PROGRAM_UNDEFINED);
1879
1880    if (ir->condition) {
1881       const bool switch_order = this->process_move_condition(ir->condition);
1882       st_src_reg condition = this->result;
1883
1884       for (i = 0; i < type_size(ir->lhs->type); i++) {
1885          if (switch_order) {
1886             emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
1887          } else {
1888             emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
1889          }
1890
1891          l.index++;
1892          r.index++;
1893       }
1894    } else {
1895       for (i = 0; i < type_size(ir->lhs->type); i++) {
1896          emit(ir, TGSI_OPCODE_MOV, l, r);
1897          l.index++;
1898          r.index++;
1899       }
1900    }
1901 }
1902
1903
1904 void
1905 glsl_to_tgsi_visitor::visit(ir_constant *ir)
1906 {
1907    st_src_reg src;
1908    GLfloat stack_vals[4] = { 0 };
1909    gl_constant_value *values = (gl_constant_value *) stack_vals;
1910    GLenum gl_type = GL_NONE;
1911    unsigned int i;
1912
1913    /* Unfortunately, 4 floats is all we can get into
1914     * _mesa_add_unnamed_constant.  So, make a temp to store an
1915     * aggregate constant and move each constant value into it.  If we
1916     * get lucky, copy propagation will eliminate the extra moves.
1917     */
1918    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1919       st_src_reg temp_base = get_temp(ir->type);
1920       st_dst_reg temp = st_dst_reg(temp_base);
1921
1922       foreach_iter(exec_list_iterator, iter, ir->components) {
1923          ir_constant *field_value = (ir_constant *)iter.get();
1924          int size = type_size(field_value->type);
1925
1926          assert(size > 0);
1927
1928          field_value->accept(this);
1929          src = this->result;
1930
1931          for (i = 0; i < (unsigned int)size; i++) {
1932             emit(ir, TGSI_OPCODE_MOV, temp, src);
1933
1934             src.index++;
1935             temp.index++;
1936          }
1937       }
1938       this->result = temp_base;
1939       return;
1940    }
1941
1942    if (ir->type->is_array()) {
1943       st_src_reg temp_base = get_temp(ir->type);
1944       st_dst_reg temp = st_dst_reg(temp_base);
1945       int size = type_size(ir->type->fields.array);
1946
1947       assert(size > 0);
1948
1949       for (i = 0; i < ir->type->length; i++) {
1950          ir->array_elements[i]->accept(this);
1951          src = this->result;
1952          for (int j = 0; j < size; j++) {
1953             emit(ir, TGSI_OPCODE_MOV, temp, src);
1954
1955             src.index++;
1956             temp.index++;
1957          }
1958       }
1959       this->result = temp_base;
1960       return;
1961    }
1962
1963    if (ir->type->is_matrix()) {
1964       st_src_reg mat = get_temp(ir->type);
1965       st_dst_reg mat_column = st_dst_reg(mat);
1966
1967       for (i = 0; i < ir->type->matrix_columns; i++) {
1968          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1969          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
1970
1971          src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
1972          src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
1973                                                       values,
1974                                                       ir->type->vector_elements,
1975                                                       GL_FLOAT,
1976                                                       &src.swizzle);
1977          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
1978
1979          mat_column.index++;
1980       }
1981
1982       this->result = mat;
1983       return;
1984    }
1985
1986    src.file = PROGRAM_CONSTANT;
1987    switch (ir->type->base_type) {
1988    case GLSL_TYPE_FLOAT:
1989       gl_type = GL_FLOAT;
1990       for (i = 0; i < ir->type->vector_elements; i++) {
1991          values[i].f = ir->value.f[i];
1992       }
1993       break;
1994    case GLSL_TYPE_UINT:
1995       gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
1996       for (i = 0; i < ir->type->vector_elements; i++) {
1997          if (glsl_version >= 130)
1998             values[i].u = ir->value.u[i];
1999          else
2000             values[i].f = ir->value.u[i];
2001       }
2002       break;
2003    case GLSL_TYPE_INT:
2004       gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
2005       for (i = 0; i < ir->type->vector_elements; i++) {
2006          if (glsl_version >= 130)
2007             values[i].i = ir->value.i[i];
2008          else
2009             values[i].f = ir->value.i[i];
2010       }
2011       break;
2012    case GLSL_TYPE_BOOL:
2013       gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
2014       for (i = 0; i < ir->type->vector_elements; i++) {
2015          if (glsl_version >= 130)
2016             values[i].b = ir->value.b[i];
2017          else
2018             values[i].f = ir->value.b[i];
2019       }
2020       break;
2021    default:
2022       assert(!"Non-float/uint/int/bool constant");
2023    }
2024
2025    this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
2026    this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
2027                                                    values, ir->type->vector_elements, gl_type,
2028                                                    &this->result.swizzle);
2029 }
2030
2031 function_entry *
2032 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2033 {
2034    function_entry *entry;
2035
2036    foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2037       entry = (function_entry *)iter.get();
2038
2039       if (entry->sig == sig)
2040          return entry;
2041    }
2042
2043    entry = ralloc(mem_ctx, function_entry);
2044    entry->sig = sig;
2045    entry->sig_id = this->next_signature_id++;
2046    entry->bgn_inst = NULL;
2047
2048    /* Allocate storage for all the parameters. */
2049    foreach_iter(exec_list_iterator, iter, sig->parameters) {
2050       ir_variable *param = (ir_variable *)iter.get();
2051       variable_storage *storage;
2052
2053       storage = find_variable_storage(param);
2054       assert(!storage);
2055
2056       storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2057                                               this->next_temp);
2058       this->variables.push_tail(storage);
2059
2060       this->next_temp += type_size(param->type);
2061    }
2062
2063    if (!sig->return_type->is_void()) {
2064       entry->return_reg = get_temp(sig->return_type);
2065    } else {
2066       entry->return_reg = undef_src;
2067    }
2068
2069    this->function_signatures.push_tail(entry);
2070    return entry;
2071 }
2072
2073 void
2074 glsl_to_tgsi_visitor::visit(ir_call *ir)
2075 {
2076    glsl_to_tgsi_instruction *call_inst;
2077    ir_function_signature *sig = ir->get_callee();
2078    function_entry *entry = get_function_signature(sig);
2079    int i;
2080
2081    /* Process in parameters. */
2082    exec_list_iterator sig_iter = sig->parameters.iterator();
2083    foreach_iter(exec_list_iterator, iter, *ir) {
2084       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2085       ir_variable *param = (ir_variable *)sig_iter.get();
2086
2087       if (param->mode == ir_var_in ||
2088           param->mode == ir_var_inout) {
2089          variable_storage *storage = find_variable_storage(param);
2090          assert(storage);
2091
2092          param_rval->accept(this);
2093          st_src_reg r = this->result;
2094
2095          st_dst_reg l;
2096          l.file = storage->file;
2097          l.index = storage->index;
2098          l.reladdr = NULL;
2099          l.writemask = WRITEMASK_XYZW;
2100          l.cond_mask = COND_TR;
2101
2102          for (i = 0; i < type_size(param->type); i++) {
2103             emit(ir, TGSI_OPCODE_MOV, l, r);
2104             l.index++;
2105             r.index++;
2106          }
2107       }
2108
2109       sig_iter.next();
2110    }
2111    assert(!sig_iter.has_next());
2112
2113    /* Emit call instruction */
2114    call_inst = emit(ir, TGSI_OPCODE_CAL);
2115    call_inst->function = entry;
2116
2117    /* Process out parameters. */
2118    sig_iter = sig->parameters.iterator();
2119    foreach_iter(exec_list_iterator, iter, *ir) {
2120       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2121       ir_variable *param = (ir_variable *)sig_iter.get();
2122
2123       if (param->mode == ir_var_out ||
2124           param->mode == ir_var_inout) {
2125          variable_storage *storage = find_variable_storage(param);
2126          assert(storage);
2127
2128          st_src_reg r;
2129          r.file = storage->file;
2130          r.index = storage->index;
2131          r.reladdr = NULL;
2132          r.swizzle = SWIZZLE_NOOP;
2133          r.negate = 0;
2134
2135          param_rval->accept(this);
2136          st_dst_reg l = st_dst_reg(this->result);
2137
2138          for (i = 0; i < type_size(param->type); i++) {
2139             emit(ir, TGSI_OPCODE_MOV, l, r);
2140             l.index++;
2141             r.index++;
2142          }
2143       }
2144
2145       sig_iter.next();
2146    }
2147    assert(!sig_iter.has_next());
2148
2149    /* Process return value. */
2150    this->result = entry->return_reg;
2151 }
2152
2153 void
2154 glsl_to_tgsi_visitor::visit(ir_texture *ir)
2155 {
2156    st_src_reg result_src, coord, lod_info, projector, dx, dy;
2157    st_dst_reg result_dst, coord_dst;
2158    glsl_to_tgsi_instruction *inst = NULL;
2159    unsigned opcode = TGSI_OPCODE_NOP;
2160
2161    ir->coordinate->accept(this);
2162
2163    /* Put our coords in a temp.  We'll need to modify them for shadow,
2164     * projection, or LOD, so the only case we'd use it as is is if
2165     * we're doing plain old texturing.  Mesa IR optimization should
2166     * handle cleaning up our mess in that case.
2167     */
2168    coord = get_temp(glsl_type::vec4_type);
2169    coord_dst = st_dst_reg(coord);
2170    emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2171
2172    if (ir->projector) {
2173       ir->projector->accept(this);
2174       projector = this->result;
2175    }
2176
2177    /* Storage for our result.  Ideally for an assignment we'd be using
2178     * the actual storage for the result here, instead.
2179     */
2180    result_src = get_temp(glsl_type::vec4_type);
2181    result_dst = st_dst_reg(result_src);
2182
2183    switch (ir->op) {
2184    case ir_tex:
2185       opcode = TGSI_OPCODE_TEX;
2186       break;
2187    case ir_txb:
2188       opcode = TGSI_OPCODE_TXB;
2189       ir->lod_info.bias->accept(this);
2190       lod_info = this->result;
2191       break;
2192    case ir_txl:
2193       opcode = TGSI_OPCODE_TXL;
2194       ir->lod_info.lod->accept(this);
2195       lod_info = this->result;
2196       break;
2197    case ir_txd:
2198       opcode = TGSI_OPCODE_TXD;
2199       ir->lod_info.grad.dPdx->accept(this);
2200       dx = this->result;
2201       ir->lod_info.grad.dPdy->accept(this);
2202       dy = this->result;
2203       break;
2204    case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
2205       assert(!"GLSL 1.30 features unsupported");
2206       break;
2207    }
2208
2209    if (ir->projector) {
2210       if (opcode == TGSI_OPCODE_TEX) {
2211          /* Slot the projector in as the last component of the coord. */
2212          coord_dst.writemask = WRITEMASK_W;
2213          emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2214          coord_dst.writemask = WRITEMASK_XYZW;
2215          opcode = TGSI_OPCODE_TXP;
2216       } else {
2217          st_src_reg coord_w = coord;
2218          coord_w.swizzle = SWIZZLE_WWWW;
2219
2220          /* For the other TEX opcodes there's no projective version
2221           * since the last slot is taken up by LOD info.  Do the
2222           * projective divide now.
2223           */
2224          coord_dst.writemask = WRITEMASK_W;
2225          emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2226
2227          /* In the case where we have to project the coordinates "by hand,"
2228           * the shadow comparator value must also be projected.
2229           */
2230          st_src_reg tmp_src = coord;
2231          if (ir->shadow_comparitor) {
2232             /* Slot the shadow value in as the second to last component of the
2233              * coord.
2234              */
2235             ir->shadow_comparitor->accept(this);
2236
2237             tmp_src = get_temp(glsl_type::vec4_type);
2238             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2239
2240             tmp_dst.writemask = WRITEMASK_Z;
2241             emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2242
2243             tmp_dst.writemask = WRITEMASK_XY;
2244             emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2245          }
2246
2247          coord_dst.writemask = WRITEMASK_XYZ;
2248          emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2249
2250          coord_dst.writemask = WRITEMASK_XYZW;
2251          coord.swizzle = SWIZZLE_XYZW;
2252       }
2253    }
2254
2255    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2256     * comparator was put in the correct place (and projected) by the code,
2257     * above, that handles by-hand projection.
2258     */
2259    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2260       /* Slot the shadow value in as the second to last component of the
2261        * coord.
2262        */
2263       ir->shadow_comparitor->accept(this);
2264       coord_dst.writemask = WRITEMASK_Z;
2265       emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2266       coord_dst.writemask = WRITEMASK_XYZW;
2267    }
2268
2269    if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
2270       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2271       coord_dst.writemask = WRITEMASK_W;
2272       emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2273       coord_dst.writemask = WRITEMASK_XYZW;
2274    }
2275
2276    if (opcode == TGSI_OPCODE_TXD)
2277       inst = emit(ir, opcode, result_dst, coord, dx, dy);
2278    else
2279       inst = emit(ir, opcode, result_dst, coord);
2280
2281    if (ir->shadow_comparitor)
2282       inst->tex_shadow = GL_TRUE;
2283
2284    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2285                                                    this->shader_program,
2286                                                    this->prog);
2287
2288    const glsl_type *sampler_type = ir->sampler->type;
2289
2290    switch (sampler_type->sampler_dimensionality) {
2291    case GLSL_SAMPLER_DIM_1D:
2292       inst->tex_target = (sampler_type->sampler_array)
2293          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2294       break;
2295    case GLSL_SAMPLER_DIM_2D:
2296       inst->tex_target = (sampler_type->sampler_array)
2297          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2298       break;
2299    case GLSL_SAMPLER_DIM_3D:
2300       inst->tex_target = TEXTURE_3D_INDEX;
2301       break;
2302    case GLSL_SAMPLER_DIM_CUBE:
2303       inst->tex_target = TEXTURE_CUBE_INDEX;
2304       break;
2305    case GLSL_SAMPLER_DIM_RECT:
2306       inst->tex_target = TEXTURE_RECT_INDEX;
2307       break;
2308    case GLSL_SAMPLER_DIM_BUF:
2309       assert(!"FINISHME: Implement ARB_texture_buffer_object");
2310       break;
2311    default:
2312       assert(!"Should not get here.");
2313    }
2314
2315    this->result = result_src;
2316 }
2317
2318 void
2319 glsl_to_tgsi_visitor::visit(ir_return *ir)
2320 {
2321    if (ir->get_value()) {
2322       st_dst_reg l;
2323       int i;
2324
2325       assert(current_function);
2326
2327       ir->get_value()->accept(this);
2328       st_src_reg r = this->result;
2329
2330       l = st_dst_reg(current_function->return_reg);
2331
2332       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2333          emit(ir, TGSI_OPCODE_MOV, l, r);
2334          l.index++;
2335          r.index++;
2336       }
2337    }
2338
2339    emit(ir, TGSI_OPCODE_RET);
2340 }
2341
2342 void
2343 glsl_to_tgsi_visitor::visit(ir_discard *ir)
2344 {
2345    struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2346
2347    if (ir->condition) {
2348       ir->condition->accept(this);
2349       this->result.negate = ~this->result.negate;
2350       emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2351    } else {
2352       emit(ir, TGSI_OPCODE_KILP);
2353    }
2354
2355    fp->UsesKill = GL_TRUE;
2356 }
2357
2358 void
2359 glsl_to_tgsi_visitor::visit(ir_if *ir)
2360 {
2361    glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
2362    glsl_to_tgsi_instruction *prev_inst;
2363
2364    prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2365
2366    ir->condition->accept(this);
2367    assert(this->result.file != PROGRAM_UNDEFINED);
2368
2369    if (this->options->EmitCondCodes) {
2370       cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2371
2372       /* See if we actually generated any instruction for generating
2373        * the condition.  If not, then cook up a move to a temp so we
2374        * have something to set cond_update on.
2375        */
2376       if (cond_inst == prev_inst) {
2377          st_src_reg temp = get_temp(glsl_type::bool_type);
2378          cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2379       }
2380       cond_inst->cond_update = GL_TRUE;
2381
2382       if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2383       if_inst->dst.cond_mask = COND_NE;
2384    } else {
2385       if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2386    }
2387
2388    this->instructions.push_tail(if_inst);
2389
2390    visit_exec_list(&ir->then_instructions, this);
2391
2392    if (!ir->else_instructions.is_empty()) {
2393       else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
2394       visit_exec_list(&ir->else_instructions, this);
2395    }
2396
2397    if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2398 }
2399
2400 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2401 {
2402    result.file = PROGRAM_UNDEFINED;
2403    next_temp = 1;
2404    next_signature_id = 1;
2405    current_function = NULL;
2406    num_address_regs = 0;
2407    indirect_addr_temps = false;
2408    indirect_addr_consts = false;
2409    mem_ctx = ralloc_context(NULL);
2410 }
2411
2412 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2413 {
2414    ralloc_free(mem_ctx);
2415 }
2416
2417 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2418 {
2419    delete v;
2420 }
2421
2422
2423 /**
2424  * Count resources used by the given gpu program (number of texture
2425  * samplers, etc).
2426  */
2427 static void
2428 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2429 {
2430    v->samplers_used = 0;
2431
2432    foreach_iter(exec_list_iterator, iter, v->instructions) {
2433       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2434
2435       if (is_tex_instruction(inst->op)) {
2436          v->samplers_used |= 1 << inst->sampler;
2437
2438          prog->SamplerTargets[inst->sampler] =
2439             (gl_texture_index)inst->tex_target;
2440          if (inst->tex_shadow) {
2441             prog->ShadowSamplers |= 1 << inst->sampler;
2442          }
2443       }
2444    }
2445
2446    prog->SamplersUsed = v->samplers_used;
2447    _mesa_update_shader_textures_used(prog);
2448 }
2449
2450
2451 /**
2452  * Check if the given vertex/fragment/shader program is within the
2453  * resource limits of the context (number of texture units, etc).
2454  * If any of those checks fail, record a linker error.
2455  *
2456  * XXX more checks are needed...
2457  */
2458 static void
2459 check_resources(const struct gl_context *ctx,
2460                 struct gl_shader_program *shader_program,
2461                 glsl_to_tgsi_visitor *prog,
2462                 struct gl_program *proginfo)
2463 {
2464    switch (proginfo->Target) {
2465    case GL_VERTEX_PROGRAM_ARB:
2466       if (_mesa_bitcount(prog->samplers_used) >
2467           ctx->Const.MaxVertexTextureImageUnits) {
2468          fail_link(shader_program, "Too many vertex shader texture samplers");
2469       }
2470       if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2471          fail_link(shader_program, "Too many vertex shader constants");
2472       }
2473       break;
2474    case MESA_GEOMETRY_PROGRAM:
2475       if (_mesa_bitcount(prog->samplers_used) >
2476           ctx->Const.MaxGeometryTextureImageUnits) {
2477          fail_link(shader_program, "Too many geometry shader texture samplers");
2478       }
2479       if (proginfo->Parameters->NumParameters >
2480           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2481          fail_link(shader_program, "Too many geometry shader constants");
2482       }
2483       break;
2484    case GL_FRAGMENT_PROGRAM_ARB:
2485       if (_mesa_bitcount(prog->samplers_used) >
2486           ctx->Const.MaxTextureImageUnits) {
2487          fail_link(shader_program, "Too many fragment shader texture samplers");
2488       }
2489       if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2490          fail_link(shader_program, "Too many fragment shader constants");
2491       }
2492       break;
2493    default:
2494       _mesa_problem(ctx, "unexpected program type in check_resources()");
2495    }
2496 }
2497
2498
2499
2500 struct uniform_sort {
2501    struct gl_uniform *u;
2502    int pos;
2503 };
2504
2505 /* The shader_program->Uniforms list is almost sorted in increasing
2506  * uniform->{Frag,Vert}Pos locations, but not quite when there are
2507  * uniforms shared between targets.  We need to add parameters in
2508  * increasing order for the targets.
2509  */
2510 static int
2511 sort_uniforms(const void *a, const void *b)
2512 {
2513    struct uniform_sort *u1 = (struct uniform_sort *)a;
2514    struct uniform_sort *u2 = (struct uniform_sort *)b;
2515
2516    return u1->pos - u2->pos;
2517 }
2518
2519 /* Add the uniforms to the parameters.  The linker chose locations
2520  * in our parameters lists (which weren't created yet), which the
2521  * uniforms code will use to poke values into our parameters list
2522  * when uniforms are updated.
2523  */
2524 static void
2525 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2526                                 struct gl_shader *shader,
2527                                 struct gl_program *prog)
2528 {
2529    unsigned int i;
2530    unsigned int next_sampler = 0, num_uniforms = 0;
2531    struct uniform_sort *sorted_uniforms;
2532
2533    sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2534                                   shader_program->Uniforms->NumUniforms);
2535
2536    for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2537       struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2538       int parameter_index = -1;
2539
2540       switch (shader->Type) {
2541       case GL_VERTEX_SHADER:
2542          parameter_index = uniform->VertPos;
2543          break;
2544       case GL_FRAGMENT_SHADER:
2545          parameter_index = uniform->FragPos;
2546          break;
2547       case GL_GEOMETRY_SHADER:
2548          parameter_index = uniform->GeomPos;
2549          break;
2550       }
2551
2552       /* Only add uniforms used in our target. */
2553       if (parameter_index != -1) {
2554          sorted_uniforms[num_uniforms].pos = parameter_index;
2555          sorted_uniforms[num_uniforms].u = uniform;
2556          num_uniforms++;
2557       }
2558    }
2559
2560    qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2561          sort_uniforms);
2562
2563    for (i = 0; i < num_uniforms; i++) {
2564       struct gl_uniform *uniform = sorted_uniforms[i].u;
2565       int parameter_index = sorted_uniforms[i].pos;
2566       const glsl_type *type = uniform->Type;
2567       unsigned int size;
2568
2569       if (type->is_vector() ||
2570           type->is_scalar()) {
2571          size = type->vector_elements;
2572       } else {
2573          size = type_size(type) * 4;
2574       }
2575
2576       gl_register_file file;
2577       if (type->is_sampler() ||
2578           (type->is_array() && type->fields.array->is_sampler())) {
2579          file = PROGRAM_SAMPLER;
2580       } else {
2581          file = PROGRAM_UNIFORM;
2582       }
2583
2584       GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2585                                                  uniform->Name);
2586
2587       if (index < 0) {
2588          index = _mesa_add_parameter(prog->Parameters, file,
2589                                      uniform->Name, size, type->gl_type,
2590                                      NULL, NULL, 0x0);
2591
2592          /* Sampler uniform values are stored in prog->SamplerUnits,
2593           * and the entry in that array is selected by this index we
2594           * store in ParameterValues[].
2595           */
2596          if (file == PROGRAM_SAMPLER) {
2597             for (unsigned int j = 0; j < size / 4; j++)
2598                prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
2599          }
2600
2601          /* The location chosen in the Parameters list here (returned
2602           * from _mesa_add_uniform) has to match what the linker chose.
2603           */
2604          if (index != parameter_index) {
2605             fail_link(shader_program, "Allocation of uniform `%s' to target "
2606                       "failed (%d vs %d)\n",
2607                       uniform->Name, index, parameter_index);
2608          }
2609       }
2610    }
2611
2612    ralloc_free(sorted_uniforms);
2613 }
2614
2615 static void
2616 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2617                         struct gl_shader_program *shader_program,
2618                         const char *name, const glsl_type *type,
2619                         ir_constant *val)
2620 {
2621    if (type->is_record()) {
2622       ir_constant *field_constant;
2623
2624       field_constant = (ir_constant *)val->components.get_head();
2625
2626       for (unsigned int i = 0; i < type->length; i++) {
2627          const glsl_type *field_type = type->fields.structure[i].type;
2628          const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2629                                             type->fields.structure[i].name);
2630          set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2631                                  field_type, field_constant);
2632          field_constant = (ir_constant *)field_constant->next;
2633       }
2634       return;
2635    }
2636
2637    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2638
2639    if (loc == -1) {
2640       fail_link(shader_program,
2641                 "Couldn't find uniform for initializer %s\n", name);
2642       return;
2643    }
2644
2645    for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2646       ir_constant *element;
2647       const glsl_type *element_type;
2648       if (type->is_array()) {
2649          element = val->array_elements[i];
2650          element_type = type->fields.array;
2651       } else {
2652          element = val;
2653          element_type = type;
2654       }
2655
2656       void *values;
2657
2658       if (element_type->base_type == GLSL_TYPE_BOOL) {
2659          int *conv = ralloc_array(mem_ctx, int, element_type->components());
2660          for (unsigned int j = 0; j < element_type->components(); j++) {
2661             conv[j] = element->value.b[j];
2662          }
2663          values = (void *)conv;
2664          element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2665                                                 element_type->vector_elements,
2666                                                 1);
2667       } else {
2668          values = &element->value;
2669       }
2670
2671       if (element_type->is_matrix()) {
2672          _mesa_uniform_matrix(ctx, shader_program,
2673                               element_type->matrix_columns,
2674                               element_type->vector_elements,
2675                               loc, 1, GL_FALSE, (GLfloat *)values);
2676          loc += element_type->matrix_columns;
2677       } else {
2678          _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2679                        values, element_type->gl_type);
2680          loc += type_size(element_type);
2681       }
2682    }
2683 }
2684
2685 static void
2686 set_uniform_initializers(struct gl_context *ctx,
2687                          struct gl_shader_program *shader_program)
2688 {
2689    void *mem_ctx = NULL;
2690
2691    for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
2692       struct gl_shader *shader = shader_program->_LinkedShaders[i];
2693
2694       if (shader == NULL)
2695          continue;
2696
2697       foreach_iter(exec_list_iterator, iter, *shader->ir) {
2698          ir_instruction *ir = (ir_instruction *)iter.get();
2699          ir_variable *var = ir->as_variable();
2700
2701          if (!var || var->mode != ir_var_uniform || !var->constant_value)
2702             continue;
2703
2704          if (!mem_ctx)
2705             mem_ctx = ralloc_context(NULL);
2706
2707          set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
2708                                  var->type, var->constant_value);
2709       }
2710    }
2711
2712    ralloc_free(mem_ctx);
2713 }
2714
2715 /*
2716  * Scan/rewrite program to remove reads of custom (output) registers.
2717  * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
2718  * (for vertex shaders).
2719  * In GLSL shaders, varying vars can be read and written.
2720  * On some hardware, trying to read an output register causes trouble.
2721  * So, rewrite the program to use a temporary register in this case.
2722  *
2723  * Based on _mesa_remove_output_reads from programopt.c.
2724  */
2725 void
2726 glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
2727 {
2728    GLuint i;
2729    GLint outputMap[VERT_RESULT_MAX];
2730    GLint outputTypes[VERT_RESULT_MAX];
2731    GLuint numVaryingReads = 0;
2732    GLboolean usedTemps[MAX_PROGRAM_TEMPS];
2733    GLuint firstTemp = 0;
2734
2735    _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
2736                              usedTemps, MAX_PROGRAM_TEMPS);
2737
2738    assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
2739    assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
2740
2741    for (i = 0; i < VERT_RESULT_MAX; i++)
2742       outputMap[i] = -1;
2743
2744    /* look for instructions which read from varying vars */
2745    foreach_iter(exec_list_iterator, iter, this->instructions) {
2746       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2747       const GLuint numSrc = num_inst_src_regs(inst->op);
2748       GLuint j;
2749       for (j = 0; j < numSrc; j++) {
2750          if (inst->src[j].file == type) {
2751             /* replace the read with a temp reg */
2752             const GLuint var = inst->src[j].index;
2753             if (outputMap[var] == -1) {
2754                numVaryingReads++;
2755                outputMap[var] = _mesa_find_free_register(usedTemps,
2756                                                          MAX_PROGRAM_TEMPS,
2757                                                          firstTemp);
2758                outputTypes[var] = inst->src[j].type;
2759                firstTemp = outputMap[var] + 1;
2760             }
2761             inst->src[j].file = PROGRAM_TEMPORARY;
2762             inst->src[j].index = outputMap[var];
2763          }
2764       }
2765    }
2766
2767    if (numVaryingReads == 0)
2768       return; /* nothing to be done */
2769
2770    /* look for instructions which write to the varying vars identified above */
2771    foreach_iter(exec_list_iterator, iter, this->instructions) {
2772       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2773       if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
2774          /* change inst to write to the temp reg, instead of the varying */
2775          inst->dst.file = PROGRAM_TEMPORARY;
2776          inst->dst.index = outputMap[inst->dst.index];
2777       }
2778    }
2779
2780    /* insert new MOV instructions at the end */
2781    for (i = 0; i < VERT_RESULT_MAX; i++) {
2782       if (outputMap[i] >= 0) {
2783          /* MOV VAR[i], TEMP[tmp]; */
2784          st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
2785          st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
2786          dst.index = i;
2787          this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
2788       }
2789    }
2790 }
2791
2792 /**
2793  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
2794  * are read from the given src in this instruction
2795  */
2796 static int
2797 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
2798 {
2799    int read_mask = 0, comp;
2800
2801    /* Now, given the src swizzle and the written channels, find which
2802     * components are actually read
2803     */
2804    for (comp = 0; comp < 4; ++comp) {
2805       const unsigned coord = GET_SWZ(src.swizzle, comp);
2806       ASSERT(coord < 4);
2807       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
2808          read_mask |= 1 << coord;
2809    }
2810
2811    return read_mask;
2812 }
2813
2814 /**
2815  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
2816  * instruction is the first instruction to write to register T0.  There are
2817  * several lowering passes done in GLSL IR (e.g. branches and
2818  * relative addressing) that create a large number of conditional assignments
2819  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
2820  *
2821  * Here is why this conversion is safe:
2822  * CMP T0, T1 T2 T0 can be expanded to:
2823  * if (T1 < 0.0)
2824  *      MOV T0, T2;
2825  * else
2826  *      MOV T0, T0;
2827  *
2828  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
2829  * as the original program.  If (T1 < 0.0) evaluates to false, executing
2830  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
2831  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
2832  * because any instruction that was going to read from T0 after this was going
2833  * to read a garbage value anyway.
2834  */
2835 void
2836 glsl_to_tgsi_visitor::simplify_cmp(void)
2837 {
2838    unsigned tempWrites[MAX_PROGRAM_TEMPS];
2839    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
2840
2841    memset(tempWrites, 0, sizeof(tempWrites));
2842    memset(outputWrites, 0, sizeof(outputWrites));
2843
2844    foreach_iter(exec_list_iterator, iter, this->instructions) {
2845       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2846       unsigned prevWriteMask = 0;
2847
2848       /* Give up if we encounter relative addressing or flow control. */
2849       if (inst->dst.reladdr ||
2850           tgsi_get_opcode_info(inst->op)->is_branch ||
2851           inst->op == TGSI_OPCODE_BGNSUB ||
2852           inst->op == TGSI_OPCODE_CONT ||
2853           inst->op == TGSI_OPCODE_END ||
2854           inst->op == TGSI_OPCODE_ENDSUB ||
2855           inst->op == TGSI_OPCODE_RET) {
2856          return;
2857       }
2858
2859       if (inst->dst.file == PROGRAM_OUTPUT) {
2860          assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
2861          prevWriteMask = outputWrites[inst->dst.index];
2862          outputWrites[inst->dst.index] |= inst->dst.writemask;
2863       } else if (inst->dst.file == PROGRAM_TEMPORARY) {
2864          assert(inst->dst.index < MAX_PROGRAM_TEMPS);
2865          prevWriteMask = tempWrites[inst->dst.index];
2866          tempWrites[inst->dst.index] |= inst->dst.writemask;
2867       }
2868
2869       /* For a CMP to be considered a conditional write, the destination
2870        * register and source register two must be the same. */
2871       if (inst->op == TGSI_OPCODE_CMP
2872           && !(inst->dst.writemask & prevWriteMask)
2873           && inst->src[2].file == inst->dst.file
2874           && inst->src[2].index == inst->dst.index
2875           && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
2876
2877          inst->op = TGSI_OPCODE_MOV;
2878          inst->src[0] = inst->src[1];
2879       }
2880    }
2881 }
2882
2883 /* Replaces all references to a temporary register index with another index. */
2884 void
2885 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
2886 {
2887    foreach_iter(exec_list_iterator, iter, this->instructions) {
2888       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2889       unsigned j;
2890
2891       for (j=0; j < num_inst_src_regs(inst->op); j++) {
2892          if (inst->src[j].file == PROGRAM_TEMPORARY &&
2893              inst->src[j].index == index) {
2894             inst->src[j].index = new_index;
2895          }
2896       }
2897
2898       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
2899          inst->dst.index = new_index;
2900       }
2901    }
2902 }
2903
2904 int
2905 glsl_to_tgsi_visitor::get_first_temp_read(int index)
2906 {
2907    int depth = 0; /* loop depth */
2908    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
2909    unsigned i = 0, j;
2910
2911    foreach_iter(exec_list_iterator, iter, this->instructions) {
2912       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2913
2914       for (j=0; j < num_inst_src_regs(inst->op); j++) {
2915          if (inst->src[j].file == PROGRAM_TEMPORARY &&
2916              inst->src[j].index == index) {
2917             return (depth == 0) ? i : loop_start;
2918          }
2919       }
2920
2921       if (inst->op == TGSI_OPCODE_BGNLOOP) {
2922          if(depth++ == 0)
2923             loop_start = i;
2924       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
2925          if (--depth == 0)
2926             loop_start = -1;
2927       }
2928       assert(depth >= 0);
2929
2930       i++;
2931    }
2932
2933    return -1;
2934 }
2935
2936 int
2937 glsl_to_tgsi_visitor::get_first_temp_write(int index)
2938 {
2939    int depth = 0; /* loop depth */
2940    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
2941    int i = 0;
2942
2943    foreach_iter(exec_list_iterator, iter, this->instructions) {
2944       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2945
2946       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
2947          return (depth == 0) ? i : loop_start;
2948       }
2949
2950       if (inst->op == TGSI_OPCODE_BGNLOOP) {
2951          if(depth++ == 0)
2952             loop_start = i;
2953       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
2954          if (--depth == 0)
2955             loop_start = -1;
2956       }
2957       assert(depth >= 0);
2958
2959       i++;
2960    }
2961
2962    return -1;
2963 }
2964
2965 int
2966 glsl_to_tgsi_visitor::get_last_temp_read(int index)
2967 {
2968    int depth = 0; /* loop depth */
2969    int last = -1; /* index of last instruction that reads the temporary */
2970    unsigned i = 0, j;
2971
2972    foreach_iter(exec_list_iterator, iter, this->instructions) {
2973       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2974
2975       for (j=0; j < num_inst_src_regs(inst->op); j++) {
2976          if (inst->src[j].file == PROGRAM_TEMPORARY &&
2977              inst->src[j].index == index) {
2978             last = (depth == 0) ? i : -2;
2979          }
2980       }
2981
2982       if (inst->op == TGSI_OPCODE_BGNLOOP)
2983          depth++;
2984       else if (inst->op == TGSI_OPCODE_ENDLOOP)
2985          if (--depth == 0 && last == -2)
2986             last = i;
2987       assert(depth >= 0);
2988
2989       i++;
2990    }
2991
2992    assert(last >= -1);
2993    return last;
2994 }
2995
2996 int
2997 glsl_to_tgsi_visitor::get_last_temp_write(int index)
2998 {
2999    int depth = 0; /* loop depth */
3000    int last = -1; /* index of last instruction that writes to the temporary */
3001    int i = 0;
3002
3003    foreach_iter(exec_list_iterator, iter, this->instructions) {
3004       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3005
3006       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3007          last = (depth == 0) ? i : -2;
3008
3009       if (inst->op == TGSI_OPCODE_BGNLOOP)
3010          depth++;
3011       else if (inst->op == TGSI_OPCODE_ENDLOOP)
3012          if (--depth == 0 && last == -2)
3013             last = i;
3014       assert(depth >= 0);
3015
3016       i++;
3017    }
3018
3019    assert(last >= -1);
3020    return last;
3021 }
3022
3023 /*
3024  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3025  * channels for copy propagation and updates following instructions to
3026  * use the original versions.
3027  *
3028  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3029  * will occur.  As an example, a TXP production before this pass:
3030  *
3031  * 0: MOV TEMP[1], INPUT[4].xyyy;
3032  * 1: MOV TEMP[1].w, INPUT[4].wwww;
3033  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3034  *
3035  * and after:
3036  *
3037  * 0: MOV TEMP[1], INPUT[4].xyyy;
3038  * 1: MOV TEMP[1].w, INPUT[4].wwww;
3039  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3040  *
3041  * which allows for dead code elimination on TEMP[1]'s writes.
3042  */
3043 void
3044 glsl_to_tgsi_visitor::copy_propagate(void)
3045 {
3046    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3047                                                     glsl_to_tgsi_instruction *,
3048                                                     this->next_temp * 4);
3049    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3050    int level = 0;
3051
3052    foreach_iter(exec_list_iterator, iter, this->instructions) {
3053       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3054
3055       assert(inst->dst.file != PROGRAM_TEMPORARY
3056              || inst->dst.index < this->next_temp);
3057
3058       /* First, do any copy propagation possible into the src regs. */
3059       for (int r = 0; r < 3; r++) {
3060          glsl_to_tgsi_instruction *first = NULL;
3061          bool good = true;
3062          int acp_base = inst->src[r].index * 4;
3063
3064          if (inst->src[r].file != PROGRAM_TEMPORARY ||
3065              inst->src[r].reladdr)
3066             continue;
3067
3068          /* See if we can find entries in the ACP consisting of MOVs
3069           * from the same src register for all the swizzled channels
3070           * of this src register reference.
3071           */
3072          for (int i = 0; i < 4; i++) {
3073             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3074             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3075
3076             if (!copy_chan) {
3077                good = false;
3078                break;
3079             }
3080
3081             assert(acp_level[acp_base + src_chan] <= level);
3082
3083             if (!first) {
3084                first = copy_chan;
3085             } else {
3086                if (first->src[0].file != copy_chan->src[0].file ||
3087                    first->src[0].index != copy_chan->src[0].index) {
3088                   good = false;
3089                   break;
3090                }
3091             }
3092          }
3093
3094          if (good) {
3095             /* We've now validated that we can copy-propagate to
3096              * replace this src register reference.  Do it.
3097              */
3098             inst->src[r].file = first->src[0].file;
3099             inst->src[r].index = first->src[0].index;
3100
3101             int swizzle = 0;
3102             for (int i = 0; i < 4; i++) {
3103                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3104                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3105                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3106                            (3 * i));
3107             }
3108             inst->src[r].swizzle = swizzle;
3109          }
3110       }
3111
3112       switch (inst->op) {
3113       case TGSI_OPCODE_BGNLOOP:
3114       case TGSI_OPCODE_ENDLOOP:
3115          /* End of a basic block, clear the ACP entirely. */
3116          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3117          break;
3118
3119       case TGSI_OPCODE_IF:
3120          ++level;
3121          break;
3122
3123       case TGSI_OPCODE_ENDIF:
3124       case TGSI_OPCODE_ELSE:
3125          /* Clear all channels written inside the block from the ACP, but
3126           * leaving those that were not touched.
3127           */
3128          for (int r = 0; r < this->next_temp; r++) {
3129             for (int c = 0; c < 4; c++) {
3130                if (!acp[4 * r + c])
3131                   continue;
3132
3133                if (acp_level[4 * r + c] >= level)
3134                   acp[4 * r + c] = NULL;
3135             }
3136          }
3137          if (inst->op == TGSI_OPCODE_ENDIF)
3138             --level;
3139          break;
3140
3141       default:
3142          /* Continuing the block, clear any written channels from
3143           * the ACP.
3144           */
3145          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3146             /* Any temporary might be written, so no copy propagation
3147              * across this instruction.
3148              */
3149             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3150          } else if (inst->dst.file == PROGRAM_OUTPUT &&
3151                     inst->dst.reladdr) {
3152             /* Any output might be written, so no copy propagation
3153              * from outputs across this instruction.
3154              */
3155             for (int r = 0; r < this->next_temp; r++) {
3156                for (int c = 0; c < 4; c++) {
3157                   if (!acp[4 * r + c])
3158                      continue;
3159
3160                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3161                      acp[4 * r + c] = NULL;
3162                }
3163             }
3164          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3165                     inst->dst.file == PROGRAM_OUTPUT) {
3166             /* Clear where it's used as dst. */
3167             if (inst->dst.file == PROGRAM_TEMPORARY) {
3168                for (int c = 0; c < 4; c++) {
3169                   if (inst->dst.writemask & (1 << c)) {
3170                      acp[4 * inst->dst.index + c] = NULL;
3171                   }
3172                }
3173             }
3174
3175             /* Clear where it's used as src. */
3176             for (int r = 0; r < this->next_temp; r++) {
3177                for (int c = 0; c < 4; c++) {
3178                   if (!acp[4 * r + c])
3179                      continue;
3180
3181                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3182
3183                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3184                       acp[4 * r + c]->src[0].index == inst->dst.index &&
3185                       inst->dst.writemask & (1 << src_chan))
3186                   {
3187                      acp[4 * r + c] = NULL;
3188                   }
3189                }
3190             }
3191          }
3192          break;
3193       }
3194
3195       /* If this is a copy, add it to the ACP. */
3196       if (inst->op == TGSI_OPCODE_MOV &&
3197           inst->dst.file == PROGRAM_TEMPORARY &&
3198           !inst->dst.reladdr &&
3199           !inst->saturate &&
3200           !inst->src[0].reladdr &&
3201           !inst->src[0].negate) {
3202          for (int i = 0; i < 4; i++) {
3203             if (inst->dst.writemask & (1 << i)) {
3204                acp[4 * inst->dst.index + i] = inst;
3205                acp_level[4 * inst->dst.index + i] = level;
3206             }
3207          }
3208       }
3209    }
3210
3211    ralloc_free(acp_level);
3212    ralloc_free(acp);
3213 }
3214
3215 /*
3216  * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3217  *
3218  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3219  * will occur.  As an example, a TXP production after copy propagation but
3220  * before this pass:
3221  *
3222  * 0: MOV TEMP[1], INPUT[4].xyyy;
3223  * 1: MOV TEMP[1].w, INPUT[4].wwww;
3224  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3225  *
3226  * and after this pass:
3227  *
3228  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3229  *
3230  * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3231  * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3232  */
3233 void
3234 glsl_to_tgsi_visitor::eliminate_dead_code(void)
3235 {
3236    int i;
3237
3238    for (i=0; i < this->next_temp; i++) {
3239       int last_read = get_last_temp_read(i);
3240       int j = 0;
3241
3242       foreach_iter(exec_list_iterator, iter, this->instructions) {
3243          glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3244
3245          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3246              j > last_read)
3247          {
3248             iter.remove();
3249             delete inst;
3250          }
3251
3252          j++;
3253       }
3254    }
3255 }
3256
3257 /* Merges temporary registers together where possible to reduce the number of
3258  * registers needed to run a program.
3259  *
3260  * Produces optimal code only after copy propagation and dead code elimination
3261  * have been run. */
3262 void
3263 glsl_to_tgsi_visitor::merge_registers(void)
3264 {
3265    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3266    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3267    int i, j;
3268
3269    /* Read the indices of the last read and first write to each temp register
3270     * into an array so that we don't have to traverse the instruction list as
3271     * much. */
3272    for (i=0; i < this->next_temp; i++) {
3273       last_reads[i] = get_last_temp_read(i);
3274       first_writes[i] = get_first_temp_write(i);
3275    }
3276
3277    /* Start looking for registers with non-overlapping usages that can be
3278     * merged together. */
3279    for (i=0; i < this->next_temp; i++) {
3280       /* Don't touch unused registers. */
3281       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3282
3283       for (j=0; j < this->next_temp; j++) {
3284          /* Don't touch unused registers. */
3285          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3286
3287          /* We can merge the two registers if the first write to j is after or
3288           * in the same instruction as the last read from i.  Note that the
3289           * register at index i will always be used earlier or at the same time
3290           * as the register at index j. */
3291          if (first_writes[i] <= first_writes[j] &&
3292              last_reads[i] <= first_writes[j])
3293          {
3294             rename_temp_register(j, i); /* Replace all references to j with i.*/
3295
3296             /* Update the first_writes and last_reads arrays with the new
3297              * values for the merged register index, and mark the newly unused
3298              * register index as such. */
3299             last_reads[i] = last_reads[j];
3300             first_writes[j] = -1;
3301             last_reads[j] = -1;
3302          }
3303       }
3304    }
3305
3306    ralloc_free(last_reads);
3307    ralloc_free(first_writes);
3308 }
3309
3310 /* Reassign indices to temporary registers by reusing unused indices created
3311  * by optimization passes. */
3312 void
3313 glsl_to_tgsi_visitor::renumber_registers(void)
3314 {
3315    int i = 0;
3316    int new_index = 0;
3317
3318    for (i=0; i < this->next_temp; i++) {
3319       if (get_first_temp_read(i) < 0) continue;
3320       if (i != new_index)
3321          rename_temp_register(i, new_index);
3322       new_index++;
3323    }
3324
3325    this->next_temp = new_index;
3326 }
3327
3328 /* ------------------------- TGSI conversion stuff -------------------------- */
3329 struct label {
3330    unsigned branch_target;
3331    unsigned token;
3332 };
3333
3334 /**
3335  * Intermediate state used during shader translation.
3336  */
3337 struct st_translate {
3338    struct ureg_program *ureg;
3339
3340    struct ureg_dst temps[MAX_PROGRAM_TEMPS];
3341    struct ureg_src *constants;
3342    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3343    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3344    struct ureg_dst address[1];
3345    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3346    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3347
3348    /* Extra info for handling point size clamping in vertex shader */
3349    struct ureg_dst pointSizeResult; /**< Actual point size output register */
3350    struct ureg_src pointSizeConst;  /**< Point size range constant register */
3351    GLint pointSizeOutIndex;         /**< Temp point size output register */
3352    GLboolean prevInstWrotePointSize;
3353
3354    const GLuint *inputMapping;
3355    const GLuint *outputMapping;
3356
3357    /* For every instruction that contains a label (eg CALL), keep
3358     * details so that we can go back afterwards and emit the correct
3359     * tgsi instruction number for each label.
3360     */
3361    struct label *labels;
3362    unsigned labels_size;
3363    unsigned labels_count;
3364
3365    /* Keep a record of the tgsi instruction number that each mesa
3366     * instruction starts at, will be used to fix up labels after
3367     * translation.
3368     */
3369    unsigned *insn;
3370    unsigned insn_size;
3371    unsigned insn_count;
3372
3373    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3374
3375    boolean error;
3376 };
3377
3378 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3379 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
3380    TGSI_SEMANTIC_FACE,
3381    TGSI_SEMANTIC_INSTANCEID
3382 };
3383
3384 /**
3385  * Make note of a branch to a label in the TGSI code.
3386  * After we've emitted all instructions, we'll go over the list
3387  * of labels built here and patch the TGSI code with the actual
3388  * location of each label.
3389  */
3390 static unsigned *get_label( struct st_translate *t,
3391                             unsigned branch_target )
3392 {
3393    unsigned i;
3394
3395    if (t->labels_count + 1 >= t->labels_size) {
3396       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
3397       t->labels = (struct label *)realloc(t->labels,
3398                                           t->labels_size * sizeof t->labels[0]);
3399       if (t->labels == NULL) {
3400          static unsigned dummy;
3401          t->error = TRUE;
3402          return &dummy;
3403       }
3404    }
3405
3406    i = t->labels_count++;
3407    t->labels[i].branch_target = branch_target;
3408    return &t->labels[i].token;
3409 }
3410
3411 /**
3412  * Called prior to emitting the TGSI code for each Mesa instruction.
3413  * Allocate additional space for instructions if needed.
3414  * Update the insn[] array so the next Mesa instruction points to
3415  * the next TGSI instruction.
3416  */
3417 static void set_insn_start( struct st_translate *t,
3418                             unsigned start )
3419 {
3420    if (t->insn_count + 1 >= t->insn_size) {
3421       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
3422       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
3423       if (t->insn == NULL) {
3424          t->error = TRUE;
3425          return;
3426       }
3427    }
3428
3429    t->insn[t->insn_count++] = start;
3430 }
3431
3432 /**
3433  * Map a Mesa dst register to a TGSI ureg_dst register.
3434  */
3435 static struct ureg_dst
3436 dst_register( struct st_translate *t,
3437               gl_register_file file,
3438               GLuint index )
3439 {
3440    switch( file ) {
3441    case PROGRAM_UNDEFINED:
3442       return ureg_dst_undef();
3443
3444    case PROGRAM_TEMPORARY:
3445       if (ureg_dst_is_undef(t->temps[index]))
3446          t->temps[index] = ureg_DECL_temporary( t->ureg );
3447
3448       return t->temps[index];
3449
3450    case PROGRAM_OUTPUT:
3451       if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
3452          t->prevInstWrotePointSize = GL_TRUE;
3453
3454       if (t->procType == TGSI_PROCESSOR_VERTEX)
3455          assert(index < VERT_RESULT_MAX);
3456       else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
3457          assert(index < FRAG_RESULT_MAX);
3458       else
3459          assert(index < GEOM_RESULT_MAX);
3460
3461       assert(t->outputMapping[index] < Elements(t->outputs));
3462
3463       return t->outputs[t->outputMapping[index]];
3464
3465    case PROGRAM_ADDRESS:
3466       return t->address[index];
3467
3468    default:
3469       debug_assert( 0 );
3470       return ureg_dst_undef();
3471    }
3472 }
3473
3474 /**
3475  * Map a Mesa src register to a TGSI ureg_src register.
3476  */
3477 static struct ureg_src
3478 src_register( struct st_translate *t,
3479               gl_register_file file,
3480               GLuint index )
3481 {
3482    switch( file ) {
3483    case PROGRAM_UNDEFINED:
3484       return ureg_src_undef();
3485
3486    case PROGRAM_TEMPORARY:
3487       assert(index >= 0);
3488       assert(index < Elements(t->temps));
3489       if (ureg_dst_is_undef(t->temps[index]))
3490          t->temps[index] = ureg_DECL_temporary( t->ureg );
3491       return ureg_src(t->temps[index]);
3492
3493    case PROGRAM_NAMED_PARAM:
3494    case PROGRAM_ENV_PARAM:
3495    case PROGRAM_LOCAL_PARAM:
3496    case PROGRAM_UNIFORM:
3497       assert(index >= 0);
3498       return t->constants[index];
3499    case PROGRAM_STATE_VAR:
3500    case PROGRAM_CONSTANT:       /* ie, immediate */
3501       if (index < 0)
3502          return ureg_DECL_constant( t->ureg, 0 );
3503       else
3504          return t->constants[index];
3505
3506    case PROGRAM_INPUT:
3507       assert(t->inputMapping[index] < Elements(t->inputs));
3508       return t->inputs[t->inputMapping[index]];
3509
3510    case PROGRAM_OUTPUT:
3511       assert(t->outputMapping[index] < Elements(t->outputs));
3512       return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
3513
3514    case PROGRAM_ADDRESS:
3515       return ureg_src(t->address[index]);
3516
3517    case PROGRAM_SYSTEM_VALUE:
3518       assert(index < Elements(t->systemValues));
3519       return t->systemValues[index];
3520
3521    default:
3522       debug_assert( 0 );
3523       return ureg_src_undef();
3524    }
3525 }
3526
3527 /**
3528  * Create a TGSI ureg_dst register from an st_dst_reg.
3529  */
3530 static struct ureg_dst
3531 translate_dst( struct st_translate *t,
3532                const st_dst_reg *dst_reg,
3533                boolean saturate )
3534 {
3535    struct ureg_dst dst = dst_register( t,
3536                                        dst_reg->file,
3537                                        dst_reg->index );
3538
3539    dst = ureg_writemask( dst,
3540                          dst_reg->writemask );
3541
3542    if (saturate)
3543       dst = ureg_saturate( dst );
3544
3545    if (dst_reg->reladdr != NULL)
3546       dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
3547
3548    return dst;
3549 }
3550
3551 /**
3552  * Create a TGSI ureg_src register from an st_src_reg.
3553  */
3554 static struct ureg_src
3555 translate_src( struct st_translate *t,
3556                const st_src_reg *src_reg )
3557 {
3558    struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
3559
3560    src = ureg_swizzle( src,
3561                        GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
3562                        GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
3563                        GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
3564                        GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
3565
3566    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
3567       src = ureg_negate(src);
3568
3569    if (src_reg->reladdr != NULL) {
3570       /* Normally ureg_src_indirect() would be used here, but a stupid compiler
3571        * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
3572        * set the bit for src.Negate.  So we have to do the operation manually
3573        * here to work around the compiler's problems. */
3574       /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
3575       struct ureg_src addr = ureg_src(t->address[0]);
3576       src.Indirect = 1;
3577       src.IndirectFile = addr.File;
3578       src.IndirectIndex = addr.Index;
3579       src.IndirectSwizzle = addr.SwizzleX;
3580
3581       if (src_reg->file != PROGRAM_INPUT &&
3582           src_reg->file != PROGRAM_OUTPUT) {
3583          /* If src_reg->index was negative, it was set to zero in
3584           * src_register().  Reassign it now.  But don't do this
3585           * for input/output regs since they get remapped while
3586           * const buffers don't.
3587           */
3588          src.Index = src_reg->index;
3589       }
3590    }
3591
3592    return src;
3593 }
3594
3595 static void
3596 compile_tgsi_instruction(struct st_translate *t,
3597                                      const struct glsl_to_tgsi_instruction *inst)
3598 {
3599    struct ureg_program *ureg = t->ureg;
3600    GLuint i;
3601    struct ureg_dst dst[1];
3602    struct ureg_src src[4];
3603    unsigned num_dst;
3604    unsigned num_src;
3605
3606    num_dst = num_inst_dst_regs( inst->op );
3607    num_src = num_inst_src_regs( inst->op );
3608
3609    if (num_dst)
3610       dst[0] = translate_dst( t,
3611                               &inst->dst,
3612                               inst->saturate);
3613
3614    for (i = 0; i < num_src; i++)
3615       src[i] = translate_src( t, &inst->src[i] );
3616
3617    switch( inst->op ) {
3618    case TGSI_OPCODE_BGNLOOP:
3619    case TGSI_OPCODE_CAL:
3620    case TGSI_OPCODE_ELSE:
3621    case TGSI_OPCODE_ENDLOOP:
3622    case TGSI_OPCODE_IF:
3623       debug_assert(num_dst == 0);
3624       ureg_label_insn( ureg,
3625                        inst->op,
3626                        src, num_src,
3627                        get_label( t,
3628                                   inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
3629       return;
3630
3631    case TGSI_OPCODE_TEX:
3632    case TGSI_OPCODE_TXB:
3633    case TGSI_OPCODE_TXD:
3634    case TGSI_OPCODE_TXL:
3635    case TGSI_OPCODE_TXP:
3636       src[num_src++] = t->samplers[inst->sampler];
3637       ureg_tex_insn( ureg,
3638                      inst->op,
3639                      dst, num_dst,
3640                      translate_texture_target( inst->tex_target,
3641                                                inst->tex_shadow ),
3642                      src, num_src );
3643       return;
3644
3645    case TGSI_OPCODE_SCS:
3646       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
3647       ureg_insn( ureg,
3648                  inst->op,
3649                  dst, num_dst,
3650                  src, num_src );
3651       break;
3652
3653    case TGSI_OPCODE_XPD:
3654       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
3655       ureg_insn( ureg,
3656                  inst->op,
3657                  dst, num_dst,
3658                  src, num_src );
3659       break;
3660
3661    default:
3662       ureg_insn( ureg,
3663                  inst->op,
3664                  dst, num_dst,
3665                  src, num_src );
3666       break;
3667    }
3668 }
3669
3670 /**
3671  * Emit the TGSI instructions to adjust the WPOS pixel center convention
3672  * Basically, add (adjX, adjY) to the fragment position.
3673  */
3674 static void
3675 emit_adjusted_wpos( struct st_translate *t,
3676                     const struct gl_program *program,
3677                     GLfloat adjX, GLfloat adjY)
3678 {
3679    struct ureg_program *ureg = t->ureg;
3680    struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
3681    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
3682
3683    /* Note that we bias X and Y and pass Z and W through unchanged.
3684     * The shader might also use gl_FragCoord.w and .z.
3685     */
3686    ureg_ADD(ureg, wpos_temp, wpos_input,
3687             ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
3688
3689    t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
3690 }
3691
3692
3693 /**
3694  * Emit the TGSI instructions for inverting the WPOS y coordinate.
3695  * This code is unavoidable because it also depends on whether
3696  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
3697  */
3698 static void
3699 emit_wpos_inversion( struct st_translate *t,
3700                      const struct gl_program *program,
3701                      boolean invert)
3702 {
3703    struct ureg_program *ureg = t->ureg;
3704
3705    /* Fragment program uses fragment position input.
3706     * Need to replace instances of INPUT[WPOS] with temp T
3707     * where T = INPUT[WPOS] by y is inverted.
3708     */
3709    static const gl_state_index wposTransformState[STATE_LENGTH]
3710       = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
3711           (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
3712
3713    /* XXX: note we are modifying the incoming shader here!  Need to
3714     * do this before emitting the constant decls below, or this
3715     * will be missed:
3716     */
3717    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
3718                                                        wposTransformState);
3719
3720    struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
3721    struct ureg_dst wpos_temp;
3722    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
3723
3724    /* MOV wpos_temp, input[wpos]
3725     */
3726    if (wpos_input.File == TGSI_FILE_TEMPORARY)
3727       wpos_temp = ureg_dst(wpos_input);
3728    else {
3729       wpos_temp = ureg_DECL_temporary( ureg );
3730       ureg_MOV( ureg, wpos_temp, wpos_input );
3731    }
3732
3733    if (invert) {
3734       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
3735        */
3736       ureg_MAD( ureg,
3737                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
3738                 wpos_input,
3739                 ureg_scalar(wpostrans, 0),
3740                 ureg_scalar(wpostrans, 1));
3741    } else {
3742       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
3743        */
3744       ureg_MAD( ureg,
3745                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
3746                 wpos_input,
3747                 ureg_scalar(wpostrans, 2),
3748                 ureg_scalar(wpostrans, 3));
3749    }
3750
3751    /* Use wpos_temp as position input from here on:
3752     */
3753    t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
3754 }
3755
3756
3757 /**
3758  * Emit fragment position/ooordinate code.
3759  */
3760 static void
3761 emit_wpos(struct st_context *st,
3762           struct st_translate *t,
3763           const struct gl_program *program,
3764           struct ureg_program *ureg)
3765 {
3766    const struct gl_fragment_program *fp =
3767       (const struct gl_fragment_program *) program;
3768    struct pipe_screen *pscreen = st->pipe->screen;
3769    boolean invert = FALSE;
3770
3771    if (fp->OriginUpperLeft) {
3772       /* Fragment shader wants origin in upper-left */
3773       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
3774          /* the driver supports upper-left origin */
3775       }
3776       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
3777          /* the driver supports lower-left origin, need to invert Y */
3778          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
3779          invert = TRUE;
3780       }
3781       else
3782          assert(0);
3783    }
3784    else {
3785       /* Fragment shader wants origin in lower-left */
3786       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
3787          /* the driver supports lower-left origin */
3788          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
3789       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
3790          /* the driver supports upper-left origin, need to invert Y */
3791          invert = TRUE;
3792       else
3793          assert(0);
3794    }
3795
3796    if (fp->PixelCenterInteger) {
3797       /* Fragment shader wants pixel center integer */
3798       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
3799          /* the driver supports pixel center integer */
3800          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3801       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
3802          /* the driver supports pixel center half integer, need to bias X,Y */
3803          emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
3804       else
3805          assert(0);
3806    }
3807    else {
3808       /* Fragment shader wants pixel center half integer */
3809       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
3810          /* the driver supports pixel center half integer */
3811       }
3812       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
3813          /* the driver supports pixel center integer, need to bias X,Y */
3814          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3815          emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
3816       }
3817       else
3818          assert(0);
3819    }
3820
3821    /* we invert after adjustment so that we avoid the MOV to temporary,
3822     * and reuse the adjustment ADD instead */
3823    emit_wpos_inversion(t, program, invert);
3824 }
3825
3826 /**
3827  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
3828  * TGSI uses +1 for front, -1 for back.
3829  * This function converts the TGSI value to the GL value.  Simply clamping/
3830  * saturating the value to [0,1] does the job.
3831  */
3832 static void
3833 emit_face_var(struct st_translate *t)
3834 {
3835    struct ureg_program *ureg = t->ureg;
3836    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
3837    struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
3838
3839    /* MOV_SAT face_temp, input[face] */
3840    face_temp = ureg_saturate(face_temp);
3841    ureg_MOV(ureg, face_temp, face_input);
3842
3843    /* Use face_temp as face input from here on: */
3844    t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
3845 }
3846
3847 static void
3848 emit_edgeflags(struct st_translate *t)
3849 {
3850    struct ureg_program *ureg = t->ureg;
3851    struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
3852    struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
3853
3854    ureg_MOV(ureg, edge_dst, edge_src);
3855 }
3856
3857 /**
3858  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
3859  * \param program  the program to translate
3860  * \param numInputs  number of input registers used
3861  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
3862  *                      input indexes
3863  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
3864  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
3865  *                            each input
3866  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
3867  * \param numOutputs  number of output registers used
3868  * \param outputMapping  maps Mesa fragment program outputs to TGSI
3869  *                       generic outputs
3870  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
3871  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
3872  *                             each output
3873  *
3874  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
3875  */
3876 extern "C" enum pipe_error
3877 st_translate_program(
3878    struct gl_context *ctx,
3879    uint procType,
3880    struct ureg_program *ureg,
3881    glsl_to_tgsi_visitor *program,
3882    const struct gl_program *proginfo,
3883    GLuint numInputs,
3884    const GLuint inputMapping[],
3885    const ubyte inputSemanticName[],
3886    const ubyte inputSemanticIndex[],
3887    const GLuint interpMode[],
3888    GLuint numOutputs,
3889    const GLuint outputMapping[],
3890    const ubyte outputSemanticName[],
3891    const ubyte outputSemanticIndex[],
3892    boolean passthrough_edgeflags )
3893 {
3894    struct st_translate translate, *t;
3895    unsigned i;
3896    enum pipe_error ret = PIPE_OK;
3897
3898    assert(numInputs <= Elements(t->inputs));
3899    assert(numOutputs <= Elements(t->outputs));
3900
3901    t = &translate;
3902    memset(t, 0, sizeof *t);
3903
3904    t->procType = procType;
3905    t->inputMapping = inputMapping;
3906    t->outputMapping = outputMapping;
3907    t->ureg = ureg;
3908    t->pointSizeOutIndex = -1;
3909    t->prevInstWrotePointSize = GL_FALSE;
3910
3911    /*
3912     * Declare input attributes.
3913     */
3914    if (procType == TGSI_PROCESSOR_FRAGMENT) {
3915       for (i = 0; i < numInputs; i++) {
3916          t->inputs[i] = ureg_DECL_fs_input(ureg,
3917                                            inputSemanticName[i],
3918                                            inputSemanticIndex[i],
3919                                            interpMode[i]);
3920       }
3921
3922       if (proginfo->InputsRead & FRAG_BIT_WPOS) {
3923          /* Must do this after setting up t->inputs, and before
3924           * emitting constant references, below:
3925           */
3926           emit_wpos(st_context(ctx), t, proginfo, ureg);
3927       }
3928
3929       if (proginfo->InputsRead & FRAG_BIT_FACE)
3930          emit_face_var(t);
3931
3932       /*
3933        * Declare output attributes.
3934        */
3935       for (i = 0; i < numOutputs; i++) {
3936          switch (outputSemanticName[i]) {
3937          case TGSI_SEMANTIC_POSITION:
3938             t->outputs[i] = ureg_DECL_output( ureg,
3939                                               TGSI_SEMANTIC_POSITION, /* Z / Depth */
3940                                               outputSemanticIndex[i] );
3941
3942             t->outputs[i] = ureg_writemask( t->outputs[i],
3943                                             TGSI_WRITEMASK_Z );
3944             break;
3945          case TGSI_SEMANTIC_STENCIL:
3946             t->outputs[i] = ureg_DECL_output( ureg,
3947                                               TGSI_SEMANTIC_STENCIL, /* Stencil */
3948                                               outputSemanticIndex[i] );
3949             t->outputs[i] = ureg_writemask( t->outputs[i],
3950                                             TGSI_WRITEMASK_Y );
3951             break;
3952          case TGSI_SEMANTIC_COLOR:
3953             t->outputs[i] = ureg_DECL_output( ureg,
3954                                               TGSI_SEMANTIC_COLOR,
3955                                               outputSemanticIndex[i] );
3956             break;
3957          default:
3958             debug_assert(0);
3959             return PIPE_ERROR_BAD_INPUT;
3960          }
3961       }
3962    }
3963    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
3964       for (i = 0; i < numInputs; i++) {
3965          t->inputs[i] = ureg_DECL_gs_input(ureg,
3966                                            i,
3967                                            inputSemanticName[i],
3968                                            inputSemanticIndex[i]);
3969       }
3970
3971       for (i = 0; i < numOutputs; i++) {
3972          t->outputs[i] = ureg_DECL_output( ureg,
3973                                            outputSemanticName[i],
3974                                            outputSemanticIndex[i] );
3975       }
3976    }
3977    else {
3978       assert(procType == TGSI_PROCESSOR_VERTEX);
3979
3980       for (i = 0; i < numInputs; i++) {
3981          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
3982       }
3983
3984       for (i = 0; i < numOutputs; i++) {
3985          t->outputs[i] = ureg_DECL_output( ureg,
3986                                            outputSemanticName[i],
3987                                            outputSemanticIndex[i] );
3988          if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
3989             /* Writing to the point size result register requires special
3990              * handling to implement clamping.
3991              */
3992             static const gl_state_index pointSizeClampState[STATE_LENGTH]
3993                = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
3994                /* XXX: note we are modifying the incoming shader here!  Need to
3995                * do this before emitting the constant decls below, or this
3996                * will be missed.
3997                */
3998             unsigned pointSizeClampConst =
3999                _mesa_add_state_reference(proginfo->Parameters,
4000                                          pointSizeClampState);
4001             struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
4002             t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
4003             t->pointSizeResult = t->outputs[i];
4004             t->pointSizeOutIndex = i;
4005             t->outputs[i] = psizregtemp;
4006          }
4007       }
4008       if (passthrough_edgeflags)
4009          emit_edgeflags(t);
4010    }
4011
4012    /* Declare address register.
4013     */
4014    if (program->num_address_regs > 0) {
4015       debug_assert( program->num_address_regs == 1 );
4016       t->address[0] = ureg_DECL_address( ureg );
4017    }
4018
4019    /* Declare misc input registers
4020     */
4021    {
4022       GLbitfield sysInputs = proginfo->SystemValuesRead;
4023       unsigned numSys = 0;
4024       for (i = 0; sysInputs; i++) {
4025          if (sysInputs & (1 << i)) {
4026             unsigned semName = mesa_sysval_to_semantic[i];
4027             t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4028             numSys++;
4029             sysInputs &= ~(1 << i);
4030          }
4031       }
4032    }
4033
4034    if (program->indirect_addr_temps) {
4035       /* If temps are accessed with indirect addressing, declare temporaries
4036        * in sequential order.  Else, we declare them on demand elsewhere.
4037        * (Note: the number of temporaries is equal to program->next_temp)
4038        */
4039       for (i = 0; i < (unsigned)program->next_temp; i++) {
4040          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4041          t->temps[i] = ureg_DECL_temporary( t->ureg );
4042       }
4043    }
4044
4045    /* Emit constants and immediates.  Mesa uses a single index space
4046     * for these, so we put all the translated regs in t->constants.
4047     * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
4048     */
4049    if (proginfo->Parameters) {
4050       t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
4051       if (t->constants == NULL) {
4052          ret = PIPE_ERROR_OUT_OF_MEMORY;
4053          goto out;
4054       }
4055
4056       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4057          switch (proginfo->Parameters->Parameters[i].Type) {
4058          case PROGRAM_ENV_PARAM:
4059          case PROGRAM_LOCAL_PARAM:
4060          case PROGRAM_STATE_VAR:
4061          case PROGRAM_NAMED_PARAM:
4062          case PROGRAM_UNIFORM:
4063             t->constants[i] = ureg_DECL_constant( ureg, i );
4064             break;
4065
4066             /* Emit immediates only when there's no indirect addressing of
4067              * the const buffer.
4068              * FIXME: Be smarter and recognize param arrays:
4069              * indirect addressing is only valid within the referenced
4070              * array.
4071              */
4072          case PROGRAM_CONSTANT:
4073             if (program->indirect_addr_consts)
4074                t->constants[i] = ureg_DECL_constant( ureg, i );
4075             else
4076                switch(proginfo->Parameters->Parameters[i].DataType)
4077                {
4078                case GL_FLOAT:
4079                case GL_FLOAT_VEC2:
4080                case GL_FLOAT_VEC3:
4081                case GL_FLOAT_VEC4:
4082                   t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
4083                   break;
4084                case GL_INT:
4085                case GL_INT_VEC2:
4086                case GL_INT_VEC3:
4087                case GL_INT_VEC4:
4088                   t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
4089                   break;
4090                case GL_UNSIGNED_INT:
4091                case GL_UNSIGNED_INT_VEC2:
4092                case GL_UNSIGNED_INT_VEC3:
4093                case GL_UNSIGNED_INT_VEC4:
4094                case GL_BOOL:
4095                case GL_BOOL_VEC2:
4096                case GL_BOOL_VEC3:
4097                case GL_BOOL_VEC4:
4098                   t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
4099                   break;
4100                default:
4101                   assert(!"should not get here");
4102                }
4103             break;
4104          default:
4105             break;
4106          }
4107       }
4108    }
4109
4110    /* texture samplers */
4111    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4112       if (program->samplers_used & (1 << i)) {
4113          t->samplers[i] = ureg_DECL_sampler( ureg, i );
4114       }
4115    }
4116
4117    /* Emit each instruction in turn:
4118     */
4119    foreach_iter(exec_list_iterator, iter, program->instructions) {
4120       set_insn_start( t, ureg_get_instruction_number( ureg ));
4121       compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
4122
4123       if (t->prevInstWrotePointSize && proginfo->Id) {
4124          /* The previous instruction wrote to the (fake) vertex point size
4125           * result register.  Now we need to clamp that value to the min/max
4126           * point size range, putting the result into the real point size
4127           * register.
4128           * Note that we can't do this easily at the end of program due to
4129           * possible early return.
4130           */
4131          set_insn_start( t, ureg_get_instruction_number( ureg ));
4132          ureg_MAX( t->ureg,
4133                    ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
4134                    ureg_src(t->outputs[t->pointSizeOutIndex]),
4135                    ureg_swizzle(t->pointSizeConst, 1,1,1,1));
4136          ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
4137                    ureg_src(t->outputs[t->pointSizeOutIndex]),
4138                    ureg_swizzle(t->pointSizeConst, 2,2,2,2));
4139       }
4140       t->prevInstWrotePointSize = GL_FALSE;
4141    }
4142
4143    /* Fix up all emitted labels:
4144     */
4145    for (i = 0; i < t->labels_count; i++) {
4146       ureg_fixup_label( ureg,
4147                         t->labels[i].token,
4148                         t->insn[t->labels[i].branch_target] );
4149    }
4150
4151 out:
4152    FREE(t->insn);
4153    FREE(t->labels);
4154    FREE(t->constants);
4155
4156    if (t->error) {
4157       debug_printf("%s: translate error flag set\n", __FUNCTION__);
4158    }
4159
4160    return ret;
4161 }
4162 /* ----------------------------- End TGSI code ------------------------------ */
4163
4164 /**
4165  * Convert a shader's GLSL IR into a Mesa gl_program, although without
4166  * generating Mesa IR.
4167  */
4168 static struct gl_program *
4169 get_mesa_program(struct gl_context *ctx,
4170                  struct gl_shader_program *shader_program,
4171                  struct gl_shader *shader)
4172 {
4173    glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4174    struct gl_program *prog;
4175    GLenum target;
4176    const char *target_string;
4177    GLboolean progress;
4178    struct gl_shader_compiler_options *options =
4179          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4180
4181    switch (shader->Type) {
4182    case GL_VERTEX_SHADER:
4183       target = GL_VERTEX_PROGRAM_ARB;
4184       target_string = "vertex";
4185       break;
4186    case GL_FRAGMENT_SHADER:
4187       target = GL_FRAGMENT_PROGRAM_ARB;
4188       target_string = "fragment";
4189       break;
4190    case GL_GEOMETRY_SHADER:
4191       target = GL_GEOMETRY_PROGRAM_NV;
4192       target_string = "geometry";
4193       break;
4194    default:
4195       assert(!"should not be reached");
4196       return NULL;
4197    }
4198
4199    validate_ir_tree(shader->ir);
4200
4201    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4202    if (!prog)
4203       return NULL;
4204    prog->Parameters = _mesa_new_parameter_list();
4205    prog->Varying = _mesa_new_parameter_list();
4206    prog->Attributes = _mesa_new_parameter_list();
4207    v->ctx = ctx;
4208    v->prog = prog;
4209    v->shader_program = shader_program;
4210    v->options = options;
4211    v->glsl_version = ctx->Const.GLSLVersion;
4212
4213    add_uniforms_to_parameters_list(shader_program, shader, prog);
4214
4215    /* Emit intermediate IR for main(). */
4216    visit_exec_list(shader->ir, v);
4217
4218    /* Now emit bodies for any functions that were used. */
4219    do {
4220       progress = GL_FALSE;
4221
4222       foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4223          function_entry *entry = (function_entry *)iter.get();
4224
4225          if (!entry->bgn_inst) {
4226             v->current_function = entry;
4227
4228             entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4229             entry->bgn_inst->function = entry;
4230
4231             visit_exec_list(&entry->sig->body, v);
4232
4233             glsl_to_tgsi_instruction *last;
4234             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4235             if (last->op != TGSI_OPCODE_RET)
4236                v->emit(NULL, TGSI_OPCODE_RET);
4237
4238             glsl_to_tgsi_instruction *end;
4239             end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4240             end->function = entry;
4241
4242             progress = GL_TRUE;
4243          }
4244       }
4245    } while (progress);
4246
4247 #if 0
4248    /* Print out some information (for debugging purposes) used by the
4249     * optimization passes. */
4250    for (i=0; i < v->next_temp; i++) {
4251       int fr = v->get_first_temp_read(i);
4252       int fw = v->get_first_temp_write(i);
4253       int lr = v->get_last_temp_read(i);
4254       int lw = v->get_last_temp_write(i);
4255
4256       printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4257       assert(fw <= fr);
4258    }
4259 #endif
4260
4261    /* Remove reads to output registers, and to varyings in vertex shaders. */
4262    v->remove_output_reads(PROGRAM_OUTPUT);
4263    if (target == GL_VERTEX_PROGRAM_ARB)
4264       v->remove_output_reads(PROGRAM_VARYING);
4265
4266    /* Perform the simplify_cmp optimization, which is required by r300g. */
4267    v->simplify_cmp();
4268
4269    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
4270     * FIXME: These passes to optimize temporary registers don't work when there
4271     * is indirect addressing of the temporary register space.  We need proper
4272     * array support so that we don't have to give up these passes in every
4273     * shader that uses arrays.
4274     */
4275    if (!v->indirect_addr_temps) {
4276       v->copy_propagate();
4277       v->eliminate_dead_code();
4278       v->merge_registers();
4279       v->renumber_registers();
4280    }
4281
4282    /* Write the END instruction. */
4283    v->emit(NULL, TGSI_OPCODE_END);
4284
4285    if (ctx->Shader.Flags & GLSL_DUMP) {
4286       printf("\n");
4287       printf("GLSL IR for linked %s program %d:\n", target_string,
4288              shader_program->Name);
4289       _mesa_print_ir(shader->ir, NULL);
4290       printf("\n");
4291       printf("\n");
4292    }
4293
4294    prog->Instructions = NULL;
4295    prog->NumInstructions = 0;
4296
4297    do_set_program_inouts(shader->ir, prog);
4298    count_resources(v, prog);
4299
4300    check_resources(ctx, shader_program, v, prog);
4301
4302    _mesa_reference_program(ctx, &shader->Program, prog);
4303
4304    struct st_vertex_program *stvp;
4305    struct st_fragment_program *stfp;
4306    struct st_geometry_program *stgp;
4307
4308    switch (shader->Type) {
4309    case GL_VERTEX_SHADER:
4310       stvp = (struct st_vertex_program *)prog;
4311       stvp->glsl_to_tgsi = v;
4312       break;
4313    case GL_FRAGMENT_SHADER:
4314       stfp = (struct st_fragment_program *)prog;
4315       stfp->glsl_to_tgsi = v;
4316       break;
4317    case GL_GEOMETRY_SHADER:
4318       stgp = (struct st_geometry_program *)prog;
4319       stgp->glsl_to_tgsi = v;
4320       break;
4321    default:
4322       assert(!"should not be reached");
4323       return NULL;
4324    }
4325
4326    return prog;
4327 }
4328
4329 extern "C" {
4330
4331 struct gl_shader *
4332 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4333 {
4334    struct gl_shader *shader;
4335    assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4336           type == GL_GEOMETRY_SHADER_ARB);
4337    shader = rzalloc(NULL, struct gl_shader);
4338    if (shader) {
4339       shader->Type = type;
4340       shader->Name = name;
4341       _mesa_init_shader(ctx, shader);
4342    }
4343    return shader;
4344 }
4345
4346 struct gl_shader_program *
4347 st_new_shader_program(struct gl_context *ctx, GLuint name)
4348 {
4349    struct gl_shader_program *shProg;
4350    shProg = rzalloc(NULL, struct gl_shader_program);
4351    if (shProg) {
4352       shProg->Name = name;
4353       _mesa_init_shader_program(ctx, shProg);
4354    }
4355    return shProg;
4356 }
4357
4358 /**
4359  * Link a shader.
4360  * Called via ctx->Driver.LinkShader()
4361  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
4362  * with code lowering and other optimizations.
4363  */
4364 GLboolean
4365 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4366 {
4367    assert(prog->LinkStatus);
4368
4369    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4370       if (prog->_LinkedShaders[i] == NULL)
4371          continue;
4372
4373       bool progress;
4374       exec_list *ir = prog->_LinkedShaders[i]->ir;
4375       const struct gl_shader_compiler_options *options =
4376             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
4377
4378       do {
4379          progress = false;
4380
4381          /* Lowering */
4382          do_mat_op_to_vec(ir);
4383          lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
4384                                  | LOG_TO_LOG2
4385                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
4386
4387          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
4388
4389          progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
4390
4391          progress = lower_quadop_vector(ir, true) || progress;
4392
4393          if (options->EmitNoIfs) {
4394             progress = lower_discard(ir) || progress;
4395             progress = lower_if_to_cond_assign(ir) || progress;
4396          }
4397
4398          if (options->EmitNoNoise)
4399             progress = lower_noise(ir) || progress;
4400
4401          /* If there are forms of indirect addressing that the driver
4402           * cannot handle, perform the lowering pass.
4403           */
4404          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
4405              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
4406            progress =
4407              lower_variable_index_to_cond_assign(ir,
4408                                                  options->EmitNoIndirectInput,
4409                                                  options->EmitNoIndirectOutput,
4410                                                  options->EmitNoIndirectTemp,
4411                                                  options->EmitNoIndirectUniform)
4412              || progress;
4413
4414          progress = do_vec_index_to_cond_assign(ir) || progress;
4415       } while (progress);
4416
4417       validate_ir_tree(ir);
4418    }
4419
4420    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4421       struct gl_program *linked_prog;
4422
4423       if (prog->_LinkedShaders[i] == NULL)
4424          continue;
4425
4426       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
4427
4428       if (linked_prog) {
4429          bool ok = true;
4430
4431          switch (prog->_LinkedShaders[i]->Type) {
4432          case GL_VERTEX_SHADER:
4433             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
4434                                      (struct gl_vertex_program *)linked_prog);
4435             ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
4436                                                  linked_prog);
4437             break;
4438          case GL_FRAGMENT_SHADER:
4439             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
4440                                      (struct gl_fragment_program *)linked_prog);
4441             ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
4442                                                  linked_prog);
4443             break;
4444          case GL_GEOMETRY_SHADER:
4445             _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
4446                                      (struct gl_geometry_program *)linked_prog);
4447             ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
4448                                                  linked_prog);
4449             break;
4450          }
4451          if (!ok) {
4452             return GL_FALSE;
4453          }
4454       }
4455
4456       _mesa_reference_program(ctx, &linked_prog, NULL);
4457    }
4458
4459    return GL_TRUE;
4460 }
4461
4462
4463 /**
4464  * Link a GLSL shader program.  Called via glLinkProgram().
4465  */
4466 void
4467 st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4468 {
4469    unsigned int i;
4470
4471    _mesa_clear_shader_program_data(ctx, prog);
4472
4473    prog->LinkStatus = GL_TRUE;
4474
4475    for (i = 0; i < prog->NumShaders; i++) {
4476       if (!prog->Shaders[i]->CompileStatus) {
4477          fail_link(prog, "linking with uncompiled shader");
4478          prog->LinkStatus = GL_FALSE;
4479       }
4480    }
4481
4482    prog->Varying = _mesa_new_parameter_list();
4483    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
4484    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
4485    _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
4486
4487    if (prog->LinkStatus) {
4488       link_shaders(ctx, prog);
4489    }
4490
4491    if (prog->LinkStatus) {
4492       if (!ctx->Driver.LinkShader(ctx, prog)) {
4493          prog->LinkStatus = GL_FALSE;
4494       }
4495    }
4496
4497    set_uniform_initializers(ctx, prog);
4498
4499    if (ctx->Shader.Flags & GLSL_DUMP) {
4500       if (!prog->LinkStatus) {
4501          printf("GLSL shader program %d failed to link\n", prog->Name);
4502       }
4503
4504       if (prog->InfoLog && prog->InfoLog[0] != 0) {
4505          printf("GLSL shader program %d info log:\n", prog->Name);
4506          printf("%s\n", prog->InfoLog);
4507       }
4508    }
4509 }
4510
4511 } /* extern "C" */