src/mesa/program/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translate GLSL IR to Mesa's gl_program representation.
  30  */
  31
  32 #include <stdio.h>
  33 #include "main/compiler.h"
  34 #include "ir.h"
  35 #include "ir_visitor.h"
  36 #include "ir_print_visitor.h"
  37 #include "ir_expression_flattening.h"
  38 #include "ir_uniform.h"
  39 #include "glsl_types.h"
  40 #include "glsl_parser_extras.h"
  41 #include "../glsl/program.h"
  42 #include "ir_optimization.h"
  43 #include "ast.h"
  44 #include "linker.h"
  45
  46 #include "main/mtypes.h"
  47 #include "main/shaderobj.h"
  48 #include "program/hash_table.h"
  49
  50 extern "C" {
  51 #include "main/shaderapi.h"
  52 #include "main/uniforms.h"
  53 #include "program/prog_instruction.h"
  54 #include "program/prog_optimize.h"
  55 #include "program/prog_print.h"
  56 #include "program/program.h"
  57 #include "program/prog_uniform.h"
  58 #include "program/prog_parameter.h"
  59 #include "program/sampler.h"
  60 }
  61
  62 class src_reg;
  63 class dst_reg;
  64
  65 static int swizzle_for_size(int size);
  66
  67 /**
  68  * This struct is a corresponding struct to Mesa prog_src_register, with
  69  * wider fields.
  70  */
  71 class src_reg {
  72 public:
  73    src_reg(gl_register_file file, int index, const glsl_type *type)
  74    {
  75       this->file = file;
  76       this->index = index;
  77       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  78          this->swizzle = swizzle_for_size(type->vector_elements);
  79       else
  80          this->swizzle = SWIZZLE_XYZW;
  81       this->negate = 0;
  82       this->reladdr = NULL;
  83    }
  84
  85    src_reg()
  86    {
  87       this->file = PROGRAM_UNDEFINED;
  88       this->index = 0;
  89       this->swizzle = 0;
  90       this->negate = 0;
  91       this->reladdr = NULL;
  92    }
  93
  94    explicit src_reg(dst_reg reg);
  95
  96    gl_register_file file; /**< PROGRAM_* from Mesa */
  97    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  98    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  99    int negate; /**< NEGATE_XYZW mask from mesa */
 100    /** Register index should be offset by the integer in this reg. */
 101    src_reg *reladdr;
 102 };
 103
 104 class dst_reg {
 105 public:
 106    dst_reg(gl_register_file file, int writemask)
 107    {
 108       this->file = file;
 109       this->index = 0;
 110       this->writemask = writemask;
 111       this->cond_mask = COND_TR;
 112       this->reladdr = NULL;
 113    }
 114
 115    dst_reg()
 116    {
 117       this->file = PROGRAM_UNDEFINED;
 118       this->index = 0;
 119       this->writemask = 0;
 120       this->cond_mask = COND_TR;
 121       this->reladdr = NULL;
 122    }
 123
 124    explicit dst_reg(src_reg reg);
 125
 126    gl_register_file file; /**< PROGRAM_* from Mesa */
 127    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
 128    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 129    GLuint cond_mask:4;
 130    /** Register index should be offset by the integer in this reg. */
 131    src_reg *reladdr;
 132 };
 133
 134 src_reg::src_reg(dst_reg reg)
 135 {
 136    this->file = reg.file;
 137    this->index = reg.index;
 138    this->swizzle = SWIZZLE_XYZW;
 139    this->negate = 0;
 140    this->reladdr = reg.reladdr;
 141 }
 142
 143 dst_reg::dst_reg(src_reg reg)
 144 {
 145    this->file = reg.file;
 146    this->index = reg.index;
 147    this->writemask = WRITEMASK_XYZW;
 148    this->cond_mask = COND_TR;
 149    this->reladdr = reg.reladdr;
 150 }
 151
 152 class ir_to_mesa_instruction : public exec_node {
 153 public:
 154    /* Callers of this ralloc-based new need not call delete. It's
 155     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
 156    static void* operator new(size_t size, void *ctx)
 157    {
 158       void *node;
 159
 160       node = rzalloc_size(ctx, size);
 161       assert(node != NULL);
 162
 163       return node;
 164    }
 165
 166    enum prog_opcode op;
 167    dst_reg dst;
 168    src_reg src[3];
 169    /** Pointer to the ir source this tree came from for debugging */
 170    ir_instruction *ir;
 171    GLboolean cond_update;
 172    bool saturate;
 173    int sampler; /**< sampler index */
 174    int tex_target; /**< One of TEXTURE_*_INDEX */
 175    GLboolean tex_shadow;
 176
 177    class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
 178 };
 179
 180 class variable_storage : public exec_node {
 181 public:
 182    variable_storage(ir_variable *var, gl_register_file file, int index)
 183       : file(file), index(index), var(var)
 184    {
 185       /* empty */
 186    }
 187
 188    gl_register_file file;
 189    int index;
 190    ir_variable *var; /* variable that maps to this, if any */
 191 };
 192
 193 class function_entry : public exec_node {
 194 public:
 195    ir_function_signature *sig;
 196
 197    /**
 198     * identifier of this function signature used by the program.
 199     *
 200     * At the point that Mesa instructions for function calls are
 201     * generated, we don't know the address of the first instruction of
 202     * the function body.  So we make the BranchTarget that is called a
 203     * small integer and rewrite them during set_branchtargets().
 204     */
 205    int sig_id;
 206
 207    /**
 208     * Pointer to first instruction of the function body.
 209     *
 210     * Set during function body emits after main() is processed.
 211     */
 212    ir_to_mesa_instruction *bgn_inst;
 213
 214    /**
 215     * Index of the first instruction of the function body in actual
 216     * Mesa IR.
 217     *
 218     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
 219     */
 220    int inst;
 221
 222    /** Storage for the return value. */
 223    src_reg return_reg;
 224 };
 225
 226 class ir_to_mesa_visitor : public ir_visitor {
 227 public:
 228    ir_to_mesa_visitor();
 229    ~ir_to_mesa_visitor();
 230
 231    function_entry *current_function;
 232
 233    struct gl_context *ctx;
 234    struct gl_program *prog;
 235    struct gl_shader_program *shader_program;
 236    struct gl_shader_compiler_options *options;
 237
 238    int next_temp;
 239
 240    variable_storage *find_variable_storage(ir_variable *var);
 241
 242    function_entry *get_function_signature(ir_function_signature *sig);
 243
 244    src_reg get_temp(const glsl_type *type);
 245    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
 246
 247    src_reg src_reg_for_float(float val);
 248
 249    /**
 250     * \name Visit methods
 251     *
 252     * As typical for the visitor pattern, there must be one \c visit method for
 253     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 254     * the hierarchy should not have \c visit methods.
 255     */
 256    /*@{*/
 257    virtual void visit(ir_variable *);
 258    virtual void visit(ir_loop *);
 259    virtual void visit(ir_loop_jump *);
 260    virtual void visit(ir_function_signature *);
 261    virtual void visit(ir_function *);
 262    virtual void visit(ir_expression *);
 263    virtual void visit(ir_swizzle *);
 264    virtual void visit(ir_dereference_variable  *);
 265    virtual void visit(ir_dereference_array *);
 266    virtual void visit(ir_dereference_record *);
 267    virtual void visit(ir_assignment *);
 268    virtual void visit(ir_constant *);
 269    virtual void visit(ir_call *);
 270    virtual void visit(ir_return *);
 271    virtual void visit(ir_discard *);
 272    virtual void visit(ir_texture *);
 273    virtual void visit(ir_if *);
 274    /*@}*/
 275
 276    src_reg result;
 277
 278    /** List of variable_storage */
 279    exec_list variables;
 280
 281    /** List of function_entry */
 282    exec_list function_signatures;
 283    int next_signature_id;
 284
 285    /** List of ir_to_mesa_instruction */
 286    exec_list instructions;
 287
 288    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
 289
 290    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 291                                 dst_reg dst, src_reg src0);
 292
 293    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 294                                 dst_reg dst, src_reg src0, src_reg src1);
 295
 296    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 297                                 dst_reg dst,
 298                                 src_reg src0, src_reg src1, src_reg src2);
 299
 300    /**
 301     * Emit the correct dot-product instruction for the type of arguments
 302     */
 303    ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
 304                                     dst_reg dst,
 305                                     src_reg src0,
 306                                     src_reg src1,
 307                                     unsigned elements);
 308
 309    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 310                     dst_reg dst, src_reg src0);
 311
 312    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 313                     dst_reg dst, src_reg src0, src_reg src1);
 314
 315    void emit_scs(ir_instruction *ir, enum prog_opcode op,
 316                  dst_reg dst, const src_reg &src);
 317
 318    bool try_emit_mad(ir_expression *ir,
 319                           int mul_operand);
 320    bool try_emit_mad_for_and_not(ir_expression *ir,
 321                                  int mul_operand);
 322    bool try_emit_sat(ir_expression *ir);
 323
 324    void emit_swz(ir_expression *ir);
 325
 326    bool process_move_condition(ir_rvalue *ir);
 327
 328    void copy_propagate(void);
 329
 330    void *mem_ctx;
 331 };
 332
 333 src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
 334
 335 dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
 336
 337 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 338
 339 static int
 340 swizzle_for_size(int size)
 341 {
 342    int size_swizzles[4] = {
 343       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 344       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 345       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 346       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 347    };
 348
 349    assert((size >= 1) && (size <= 4));
 350    return size_swizzles[size - 1];
 351 }
 352
 353 ir_to_mesa_instruction *
 354 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 355                          dst_reg dst,
 356                          src_reg src0, src_reg src1, src_reg src2)
 357 {
 358    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 359    int num_reladdr = 0;
 360
 361    /* If we have to do relative addressing, we want to load the ARL
 362     * reg directly for one of the regs, and preload the other reladdr
 363     * sources into temps.
 364     */
 365    num_reladdr += dst.reladdr != NULL;
 366    num_reladdr += src0.reladdr != NULL;
 367    num_reladdr += src1.reladdr != NULL;
 368    num_reladdr += src2.reladdr != NULL;
 369
 370    reladdr_to_temp(ir, &src2, &num_reladdr);
 371    reladdr_to_temp(ir, &src1, &num_reladdr);
 372    reladdr_to_temp(ir, &src0, &num_reladdr);
 373
 374    if (dst.reladdr) {
 375       emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
 376       num_reladdr--;
 377    }
 378    assert(num_reladdr == 0);
 379
 380    inst->op = op;
 381    inst->dst = dst;
 382    inst->src[0] = src0;
 383    inst->src[1] = src1;
 384    inst->src[2] = src2;
 385    inst->ir = ir;
 386
 387    inst->function = NULL;
 388
 389    this->instructions.push_tail(inst);
 390
 391    return inst;
 392 }
 393
 394
 395 ir_to_mesa_instruction *
 396 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 397                          dst_reg dst, src_reg src0, src_reg src1)
 398 {
 399    return emit(ir, op, dst, src0, src1, undef_src);
 400 }
 401
 402 ir_to_mesa_instruction *
 403 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 404                          dst_reg dst, src_reg src0)
 405 {
 406    assert(dst.writemask != 0);
 407    return emit(ir, op, dst, src0, undef_src, undef_src);
 408 }
 409
 410 ir_to_mesa_instruction *
 411 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
 412 {
 413    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 414 }
 415
 416 ir_to_mesa_instruction *
 417 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 418                             dst_reg dst, src_reg src0, src_reg src1,
 419                             unsigned elements)
 420 {
 421    static const gl_inst_opcode dot_opcodes[] = {
 422       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
 423    };
 424
 425    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 426 }
 427
 428 /**
 429  * Emits Mesa scalar opcodes to produce unique answers across channels.
 430  *
 431  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 432  * channel determines the result across all channels.  So to do a vec4
 433  * of this operation, we want to emit a scalar per source channel used
 434  * to produce dest channels.
 435  */
 436 void
 437 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 438                                 dst_reg dst,
 439                                 src_reg orig_src0, src_reg orig_src1)
 440 {
 441    int i, j;
 442    int done_mask = ~dst.writemask;
 443
 444    /* Mesa RCP is a scalar operation splatting results to all channels,
 445     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 446     * dst channels.
 447     */
 448    for (i = 0; i < 4; i++) {
 449       GLuint this_mask = (1 << i);
 450       ir_to_mesa_instruction *inst;
 451       src_reg src0 = orig_src0;
 452       src_reg src1 = orig_src1;
 453
 454       if (done_mask & this_mask)
 455          continue;
 456
 457       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 458       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 459       for (j = i + 1; j < 4; j++) {
 460          /* If there is another enabled component in the destination that is
 461           * derived from the same inputs, generate its value on this pass as
 462           * well.
 463           */
 464          if (!(done_mask & (1 << j)) &&
 465              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 466              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 467             this_mask |= (1 << j);
 468          }
 469       }
 470       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 471                                    src0_swiz, src0_swiz);
 472       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 473                                   src1_swiz, src1_swiz);
 474
 475       inst = emit(ir, op, dst, src0, src1);
 476       inst->dst.writemask = this_mask;
 477       done_mask |= this_mask;
 478    }
 479 }
 480
 481 void
 482 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 483                                 dst_reg dst, src_reg src0)
 484 {
 485    src_reg undef = undef_src;
 486
 487    undef.swizzle = SWIZZLE_XXXX;
 488
 489    emit_scalar(ir, op, dst, src0, undef);
 490 }
 491
 492 /**
 493  * Emit an OPCODE_SCS instruction
 494  *
 495  * The \c SCS opcode functions a bit differently than the other Mesa (or
 496  * ARB_fragment_program) opcodes.  Instead of splatting its result across all
 497  * four components of the destination, it writes one value to the \c x
 498  * component and another value to the \c y component.
 499  *
 500  * \param ir        IR instruction being processed
 501  * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
 502  *                  value is desired.
 503  * \param dst       Destination register
 504  * \param src       Source register
 505  */
 506 void
 507 ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
 508                              dst_reg dst,
 509                              const src_reg &src)
 510 {
 511    /* Vertex programs cannot use the SCS opcode.
 512     */
 513    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
 514       emit_scalar(ir, op, dst, src);
 515       return;
 516    }
 517
 518    const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
 519    const unsigned scs_mask = (1U << component);
 520    int done_mask = ~dst.writemask;
 521    src_reg tmp;
 522
 523    assert(op == OPCODE_SIN || op == OPCODE_COS);
 524
 525    /* If there are compnents in the destination that differ from the component
 526     * that will be written by the SCS instrution, we'll need a temporary.
 527     */
 528    if (scs_mask != unsigned(dst.writemask)) {
 529       tmp = get_temp(glsl_type::vec4_type);
 530    }
 531
 532    for (unsigned i = 0; i < 4; i++) {
 533       unsigned this_mask = (1U << i);
 534       src_reg src0 = src;
 535
 536       if ((done_mask & this_mask) != 0)
 537          continue;
 538
 539       /* The source swizzle specified which component of the source generates
 540        * sine / cosine for the current component in the destination.  The SCS
 541        * instruction requires that this value be swizzle to the X component.
 542        * Replace the current swizzle with a swizzle that puts the source in
 543        * the X component.
 544        */
 545       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
 546
 547       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 548                                    src0_swiz, src0_swiz);
 549       for (unsigned j = i + 1; j < 4; j++) {
 550          /* If there is another enabled component in the destination that is
 551           * derived from the same inputs, generate its value on this pass as
 552           * well.
 553           */
 554          if (!(done_mask & (1 << j)) &&
 555              GET_SWZ(src0.swizzle, j) == src0_swiz) {
 556             this_mask |= (1 << j);
 557          }
 558       }
 559
 560       if (this_mask != scs_mask) {
 561          ir_to_mesa_instruction *inst;
 562          dst_reg tmp_dst = dst_reg(tmp);
 563
 564          /* Emit the SCS instruction.
 565           */
 566          inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
 567          inst->dst.writemask = scs_mask;
 568
 569          /* Move the result of the SCS instruction to the desired location in
 570           * the destination.
 571           */
 572          tmp.swizzle = MAKE_SWIZZLE4(component, component,
 573                                      component, component);
 574          inst = emit(ir, OPCODE_SCS, dst, tmp);
 575          inst->dst.writemask = this_mask;
 576       } else {
 577          /* Emit the SCS instruction to write directly to the destination.
 578           */
 579          ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
 580          inst->dst.writemask = scs_mask;
 581       }
 582
 583       done_mask |= this_mask;
 584    }
 585 }
 586
 587 src_reg
 588 ir_to_mesa_visitor::src_reg_for_float(float val)
 589 {
 590    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 591
 592    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 593                                           (const gl_constant_value *)&val, 1, &src.swizzle);
 594
 595    return src;
 596 }
 597
 598 static int
 599 type_size(const struct glsl_type *type)
 600 {
 601    unsigned int i;
 602    int size;
 603
 604    switch (type->base_type) {
 605    case GLSL_TYPE_UINT:
 606    case GLSL_TYPE_INT:
 607    case GLSL_TYPE_FLOAT:
 608    case GLSL_TYPE_BOOL:
 609       if (type->is_matrix()) {
 610          return type->matrix_columns;
 611       } else {
 612          /* Regardless of size of vector, it gets a vec4. This is bad
 613           * packing for things like floats, but otherwise arrays become a
 614           * mess.  Hopefully a later pass over the code can pack scalars
 615           * down if appropriate.
 616           */
 617          return 1;
 618       }
 619    case GLSL_TYPE_ARRAY:
 620       assert(type->length > 0);
 621       return type_size(type->fields.array) * type->length;
 622    case GLSL_TYPE_STRUCT:
 623       size = 0;
 624       for (i = 0; i < type->length; i++) {
 625          size += type_size(type->fields.structure[i].type);
 626       }
 627       return size;
 628    case GLSL_TYPE_SAMPLER:
 629       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 630        * at link time.
 631        */
 632       return 1;
 633    default:
 634       assert(0);
 635       return 0;
 636    }
 637 }
 638
 639 /**
 640  * In the initial pass of codegen, we assign temporary numbers to
 641  * intermediate results.  (not SSA -- variable assignments will reuse
 642  * storage).  Actual register allocation for the Mesa VM occurs in a
 643  * pass over the Mesa IR later.
 644  */
 645 src_reg
 646 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 647 {
 648    src_reg src;
 649
 650    src.file = PROGRAM_TEMPORARY;
 651    src.index = next_temp;
 652    src.reladdr = NULL;
 653    next_temp += type_size(type);
 654
 655    if (type->is_array() || type->is_record()) {
 656       src.swizzle = SWIZZLE_NOOP;
 657    } else {
 658       src.swizzle = swizzle_for_size(type->vector_elements);
 659    }
 660    src.negate = 0;
 661
 662    return src;
 663 }
 664
 665 variable_storage *
 666 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
 667 {
 668
 669    variable_storage *entry;
 670
 671    foreach_iter(exec_list_iterator, iter, this->variables) {
 672       entry = (variable_storage *)iter.get();
 673
 674       if (entry->var == var)
 675          return entry;
 676    }
 677
 678    return NULL;
 679 }
 680
 681 void
 682 ir_to_mesa_visitor::visit(ir_variable *ir)
 683 {
 684    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 685       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 686
 687       fp->OriginUpperLeft = ir->origin_upper_left;
 688       fp->PixelCenterInteger = ir->pixel_center_integer;
 689
 690    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
 691       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 692       switch (ir->depth_layout) {
 693       case ir_depth_layout_none:
 694          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
 695          break;
 696       case ir_depth_layout_any:
 697          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
 698          break;
 699       case ir_depth_layout_greater:
 700          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
 701          break;
 702       case ir_depth_layout_less:
 703          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
 704          break;
 705       case ir_depth_layout_unchanged:
 706          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
 707          break;
 708       default:
 709          assert(0);
 710          break;
 711       }
 712    }
 713
 714    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
 715       unsigned int i;
 716       const ir_state_slot *const slots = ir->state_slots;
 717       assert(ir->state_slots != NULL);
 718
 719       /* Check if this statevar's setup in the STATE file exactly
 720        * matches how we'll want to reference it as a
 721        * struct/array/whatever.  If not, then we need to move it into
 722        * temporary storage and hope that it'll get copy-propagated
 723        * out.
 724        */
 725       for (i = 0; i < ir->num_state_slots; i++) {
 726          if (slots[i].swizzle != SWIZZLE_XYZW) {
 727             break;
 728          }
 729       }
 730
 731       variable_storage *storage;
 732       dst_reg dst;
 733       if (i == ir->num_state_slots) {
 734          /* We'll set the index later. */
 735          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
 736          this->variables.push_tail(storage);
 737
 738          dst = undef_dst;
 739       } else {
 740          /* The variable_storage constructor allocates slots based on the size
 741           * of the type.  However, this had better match the number of state
 742           * elements that we're going to copy into the new temporary.
 743           */
 744          assert((int) ir->num_state_slots == type_size(ir->type));
 745
 746          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
 747                                                  this->next_temp);
 748          this->variables.push_tail(storage);
 749          this->next_temp += type_size(ir->type);
 750
 751          dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
 752       }
 753
 754
 755       for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 756          int index = _mesa_add_state_reference(this->prog->Parameters,
 757                                                (gl_state_index *)slots[i].tokens);
 758
 759          if (storage->file == PROGRAM_STATE_VAR) {
 760             if (storage->index == -1) {
 761                storage->index = index;
 762             } else {
 763                assert(index == storage->index + (int)i);
 764             }
 765          } else {
 766             src_reg src(PROGRAM_STATE_VAR, index, NULL);
 767             src.swizzle = slots[i].swizzle;
 768             emit(ir, OPCODE_MOV, dst, src);
 769             /* even a float takes up a whole vec4 reg in a struct/array. */
 770             dst.index++;
 771          }
 772       }
 773
 774       if (storage->file == PROGRAM_TEMPORARY &&
 775           dst.index != storage->index + (int) ir->num_state_slots) {
 776          linker_error(this->shader_program,
 777                       "failed to load builtin uniform `%s' "
 778                       "(%d/%d regs loaded)\n",
 779                       ir->name, dst.index - storage->index,
 780                       type_size(ir->type));
 781       }
 782    }
 783 }
 784
 785 void
 786 ir_to_mesa_visitor::visit(ir_loop *ir)
 787 {
 788    ir_dereference_variable *counter = NULL;
 789
 790    if (ir->counter != NULL)
 791       counter = new(mem_ctx) ir_dereference_variable(ir->counter);
 792
 793    if (ir->from != NULL) {
 794       assert(ir->counter != NULL);
 795
 796       ir_assignment *a =
 797         new(mem_ctx) ir_assignment(counter, ir->from, NULL);
 798
 799       a->accept(this);
 800    }
 801
 802    emit(NULL, OPCODE_BGNLOOP);
 803
 804    if (ir->to) {
 805       ir_expression *e =
 806          new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
 807                                           counter, ir->to);
 808       ir_if *if_stmt =  new(mem_ctx) ir_if(e);
 809
 810       ir_loop_jump *brk =
 811         new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
 812
 813       if_stmt->then_instructions.push_tail(brk);
 814
 815       if_stmt->accept(this);
 816    }
 817
 818    visit_exec_list(&ir->body_instructions, this);
 819
 820    if (ir->increment) {
 821       ir_expression *e =
 822          new(mem_ctx) ir_expression(ir_binop_add, counter->type,
 823                                           counter, ir->increment);
 824
 825       ir_assignment *a =
 826         new(mem_ctx) ir_assignment(counter, e, NULL);
 827
 828       a->accept(this);
 829    }
 830
 831    emit(NULL, OPCODE_ENDLOOP);
 832 }
 833
 834 void
 835 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 836 {
 837    switch (ir->mode) {
 838    case ir_loop_jump::jump_break:
 839       emit(NULL, OPCODE_BRK);
 840       break;
 841    case ir_loop_jump::jump_continue:
 842       emit(NULL, OPCODE_CONT);
 843       break;
 844    }
 845 }
 846
 847
 848 void
 849 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 850 {
 851    assert(0);
 852    (void)ir;
 853 }
 854
 855 void
 856 ir_to_mesa_visitor::visit(ir_function *ir)
 857 {
 858    /* Ignore function bodies other than main() -- we shouldn't see calls to
 859     * them since they should all be inlined before we get to ir_to_mesa.
 860     */
 861    if (strcmp(ir->name, "main") == 0) {
 862       const ir_function_signature *sig;
 863       exec_list empty;
 864
 865       sig = ir->matching_signature(&empty);
 866
 867       assert(sig);
 868
 869       foreach_iter(exec_list_iterator, iter, sig->body) {
 870          ir_instruction *ir = (ir_instruction *)iter.get();
 871
 872          ir->accept(this);
 873       }
 874    }
 875 }
 876
 877 bool
 878 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 879 {
 880    int nonmul_operand = 1 - mul_operand;
 881    src_reg a, b, c;
 882
 883    ir_expression *expr = ir->operands[mul_operand]->as_expression();
 884    if (!expr || expr->operation != ir_binop_mul)
 885       return false;
 886
 887    expr->operands[0]->accept(this);
 888    a = this->result;
 889    expr->operands[1]->accept(this);
 890    b = this->result;
 891    ir->operands[nonmul_operand]->accept(this);
 892    c = this->result;
 893
 894    this->result = get_temp(ir->type);
 895    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
 896
 897    return true;
 898 }
 899
 900 /**
 901  * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
 902  *
 903  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
 904  * implemented using multiplication, and logical-or is implemented using
 905  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
 906  * As result, the logical expression (a & !b) can be rewritten as:
 907  *
 908  *     - a * !b
 909  *     - a * (1 - b)
 910  *     - (a * 1) - (a * b)
 911  *     - a + -(a * b)
 912  *     - a + (a * -b)
 913  *
 914  * This final expression can be implemented as a single MAD(a, -b, a)
 915  * instruction.
 916  */
 917 bool
 918 ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
 919 {
 920    const int other_operand = 1 - try_operand;
 921    src_reg a, b;
 922
 923    ir_expression *expr = ir->operands[try_operand]->as_expression();
 924    if (!expr || expr->operation != ir_unop_logic_not)
 925       return false;
 926
 927    ir->operands[other_operand]->accept(this);
 928    a = this->result;
 929    expr->operands[0]->accept(this);
 930    b = this->result;
 931
 932    b.negate = ~b.negate;
 933
 934    this->result = get_temp(ir->type);
 935    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
 936
 937    return true;
 938 }
 939
 940 bool
 941 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 942 {
 943    /* Saturates were only introduced to vertex programs in
 944     * NV_vertex_program3, so don't give them to drivers in the VP.
 945     */
 946    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
 947       return false;
 948
 949    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 950    if (!sat_src)
 951       return false;
 952
 953    sat_src->accept(this);
 954    src_reg src = this->result;
 955
 956    /* If we generated an expression instruction into a temporary in
 957     * processing the saturate's operand, apply the saturate to that
 958     * instruction.  Otherwise, generate a MOV to do the saturate.
 959     *
 960     * Note that we have to be careful to only do this optimization if
 961     * the instruction in question was what generated src->result.  For
 962     * example, ir_dereference_array might generate a MUL instruction
 963     * to create the reladdr, and return us a src reg using that
 964     * reladdr.  That MUL result is not the value we're trying to
 965     * saturate.
 966     */
 967    ir_expression *sat_src_expr = sat_src->as_expression();
 968    ir_to_mesa_instruction *new_inst;
 969    new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
 970    if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
 971                         sat_src_expr->operation == ir_binop_add ||
 972                         sat_src_expr->operation == ir_binop_dot)) {
 973       new_inst->saturate = true;
 974    } else {
 975       this->result = get_temp(ir->type);
 976       ir_to_mesa_instruction *inst;
 977       inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
 978       inst->saturate = true;
 979    }
 980
 981    return true;
 982 }
 983
 984 void
 985 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 986                                     src_reg *reg, int *num_reladdr)
 987 {
 988    if (!reg->reladdr)
 989       return;
 990
 991    emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
 992
 993    if (*num_reladdr != 1) {
 994       src_reg temp = get_temp(glsl_type::vec4_type);
 995
 996       emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
 997       *reg = temp;
 998    }
 999
1000    (*num_reladdr)--;
1001 }
1002
1003 void
1004 ir_to_mesa_visitor::emit_swz(ir_expression *ir)
1005 {
1006    /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
1007     * This means that each of the operands is either an immediate value of -1,
1008     * 0, or 1, or is a component from one source register (possibly with
1009     * negation).
1010     */
1011    uint8_t components[4] = { 0 };
1012    bool negate[4] = { false };
1013    ir_variable *var = NULL;
1014
1015    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
1016       ir_rvalue *op = ir->operands[i];
1017
1018       assert(op->type->is_scalar());
1019
1020       while (op != NULL) {
1021          switch (op->ir_type) {
1022          case ir_type_constant: {
1023
1024             assert(op->type->is_scalar());
1025
1026             const ir_constant *const c = op->as_constant();
1027             if (c->is_one()) {
1028                components[i] = SWIZZLE_ONE;
1029             } else if (c->is_zero()) {
1030                components[i] = SWIZZLE_ZERO;
1031             } else if (c->is_negative_one()) {
1032                components[i] = SWIZZLE_ONE;
1033                negate[i] = true;
1034             } else {
1035                assert(!"SWZ constant must be 0.0 or 1.0.");
1036             }
1037
1038             op = NULL;
1039             break;
1040          }
1041
1042          case ir_type_dereference_variable: {
1043             ir_dereference_variable *const deref =
1044                (ir_dereference_variable *) op;
1045
1046             assert((var == NULL) || (deref->var == var));
1047             components[i] = SWIZZLE_X;
1048             var = deref->var;
1049             op = NULL;
1050             break;
1051          }
1052
1053          case ir_type_expression: {
1054             ir_expression *const expr = (ir_expression *) op;
1055
1056             assert(expr->operation == ir_unop_neg);
1057             negate[i] = true;
1058
1059             op = expr->operands[0];
1060             break;
1061          }
1062
1063          case ir_type_swizzle: {
1064             ir_swizzle *const swiz = (ir_swizzle *) op;
1065
1066             components[i] = swiz->mask.x;
1067             op = swiz->val;
1068             break;
1069          }
1070
1071          default:
1072             assert(!"Should not get here.");
1073             return;
1074          }
1075       }
1076    }
1077
1078    assert(var != NULL);
1079
1080    ir_dereference_variable *const deref =
1081       new(mem_ctx) ir_dereference_variable(var);
1082
1083    this->result.file = PROGRAM_UNDEFINED;
1084    deref->accept(this);
1085    if (this->result.file == PROGRAM_UNDEFINED) {
1086       ir_print_visitor v;
1087       printf("Failed to get tree for expression operand:\n");
1088       deref->accept(&v);
1089       exit(1);
1090    }
1091
1092    src_reg src;
1093
1094    src = this->result;
1095    src.swizzle = MAKE_SWIZZLE4(components[0],
1096                                components[1],
1097                                components[2],
1098                                components[3]);
1099    src.negate = ((unsigned(negate[0]) << 0)
1100                  | (unsigned(negate[1]) << 1)
1101                  | (unsigned(negate[2]) << 2)
1102                  | (unsigned(negate[3]) << 3));
1103
1104    /* Storage for our result.  Ideally for an assignment we'd be using the
1105     * actual storage for the result here, instead.
1106     */
1107    const src_reg result_src = get_temp(ir->type);
1108    dst_reg result_dst = dst_reg(result_src);
1109
1110    /* Limit writes to the channels that will be used by result_src later.
1111     * This does limit this temp's use as a temporary for multi-instruction
1112     * sequences.
1113     */
1114    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1115
1116    emit(ir, OPCODE_SWZ, result_dst, src);
1117    this->result = result_src;
1118 }
1119
1120 void
1121 ir_to_mesa_visitor::visit(ir_expression *ir)
1122 {
1123    unsigned int operand;
1124    src_reg op[Elements(ir->operands)];
1125    src_reg result_src;
1126    dst_reg result_dst;
1127
1128    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
1129     */
1130    if (ir->operation == ir_binop_add) {
1131       if (try_emit_mad(ir, 1))
1132          return;
1133       if (try_emit_mad(ir, 0))
1134          return;
1135    }
1136
1137    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1138     */
1139    if (ir->operation == ir_binop_logic_and) {
1140       if (try_emit_mad_for_and_not(ir, 1))
1141          return;
1142       if (try_emit_mad_for_and_not(ir, 0))
1143          return;
1144    }
1145
1146    if (try_emit_sat(ir))
1147       return;
1148
1149    if (ir->operation == ir_quadop_vector) {
1150       this->emit_swz(ir);
1151       return;
1152    }
1153
1154    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1155       this->result.file = PROGRAM_UNDEFINED;
1156       ir->operands[operand]->accept(this);
1157       if (this->result.file == PROGRAM_UNDEFINED) {
1158          ir_print_visitor v;
1159          printf("Failed to get tree for expression operand:\n");
1160          ir->operands[operand]->accept(&v);
1161          exit(1);
1162       }
1163       op[operand] = this->result;
1164
1165       /* Matrix expression operands should have been broken down to vector
1166        * operations already.
1167        */
1168       assert(!ir->operands[operand]->type->is_matrix());
1169    }
1170
1171    int vector_elements = ir->operands[0]->type->vector_elements;
1172    if (ir->operands[1]) {
1173       vector_elements = MAX2(vector_elements,
1174                              ir->operands[1]->type->vector_elements);
1175    }
1176
1177    this->result.file = PROGRAM_UNDEFINED;
1178
1179    /* Storage for our result.  Ideally for an assignment we'd be using
1180     * the actual storage for the result here, instead.
1181     */
1182    result_src = get_temp(ir->type);
1183    /* convenience for the emit functions below. */
1184    result_dst = dst_reg(result_src);
1185    /* Limit writes to the channels that will be used by result_src later.
1186     * This does limit this temp's use as a temporary for multi-instruction
1187     * sequences.
1188     */
1189    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1190
1191    switch (ir->operation) {
1192    case ir_unop_logic_not:
1193       /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1194        * older GPUs implement SEQ using multiple instructions (i915 uses two
1195        * SGE instructions and a MUL instruction).  Since our logic values are
1196        * 0.0 and 1.0, 1-x also implements !x.
1197        */
1198       op[0].negate = ~op[0].negate;
1199       emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
1200       break;
1201    case ir_unop_neg:
1202       op[0].negate = ~op[0].negate;
1203       result_src = op[0];
1204       break;
1205    case ir_unop_abs:
1206       emit(ir, OPCODE_ABS, result_dst, op[0]);
1207       break;
1208    case ir_unop_sign:
1209       emit(ir, OPCODE_SSG, result_dst, op[0]);
1210       break;
1211    case ir_unop_rcp:
1212       emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1213       break;
1214
1215    case ir_unop_exp2:
1216       emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1217       break;
1218    case ir_unop_exp:
1219    case ir_unop_log:
1220       assert(!"not reached: should be handled by ir_explog_to_explog2");
1221       break;
1222    case ir_unop_log2:
1223       emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1224       break;
1225    case ir_unop_sin:
1226       emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1227       break;
1228    case ir_unop_cos:
1229       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1230       break;
1231    case ir_unop_sin_reduced:
1232       emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
1233       break;
1234    case ir_unop_cos_reduced:
1235       emit_scs(ir, OPCODE_COS, result_dst, op[0]);
1236       break;
1237
1238    case ir_unop_dFdx:
1239       emit(ir, OPCODE_DDX, result_dst, op[0]);
1240       break;
1241    case ir_unop_dFdy:
1242       emit(ir, OPCODE_DDY, result_dst, op[0]);
1243       break;
1244
1245    case ir_unop_noise: {
1246       const enum prog_opcode opcode =
1247          prog_opcode(OPCODE_NOISE1
1248                      + (ir->operands[0]->type->vector_elements) - 1);
1249       assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1250
1251       emit(ir, opcode, result_dst, op[0]);
1252       break;
1253    }
1254
1255    case ir_binop_add:
1256       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1257       break;
1258    case ir_binop_sub:
1259       emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1260       break;
1261
1262    case ir_binop_mul:
1263       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1264       break;
1265    case ir_binop_div:
1266       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1267       break;
1268    case ir_binop_mod:
1269       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1270       assert(ir->type->is_integer());
1271       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1272       break;
1273
1274    case ir_binop_less:
1275       emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1276       break;
1277    case ir_binop_greater:
1278       emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
1279       break;
1280    case ir_binop_lequal:
1281       emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
1282       break;
1283    case ir_binop_gequal:
1284       emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1285       break;
1286    case ir_binop_equal:
1287       emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1288       break;
1289    case ir_binop_nequal:
1290       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1291       break;
1292    case ir_binop_all_equal:
1293       /* "==" operator producing a scalar boolean. */
1294       if (ir->operands[0]->type->is_vector() ||
1295           ir->operands[1]->type->is_vector()) {
1296          src_reg temp = get_temp(glsl_type::vec4_type);
1297          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1298
1299          /* After the dot-product, the value will be an integer on the
1300           * range [0,4].  Zero becomes 1.0, and positive values become zero.
1301           */
1302          emit_dp(ir, result_dst, temp, temp, vector_elements);
1303
1304          /* Negating the result of the dot-product gives values on the range
1305           * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
1306           * achieved using SGE.
1307           */
1308          src_reg sge_src = result_src;
1309          sge_src.negate = ~sge_src.negate;
1310          emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
1311       } else {
1312          emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1313       }
1314       break;
1315    case ir_binop_any_nequal:
1316       /* "!=" operator producing a scalar boolean. */
1317       if (ir->operands[0]->type->is_vector() ||
1318           ir->operands[1]->type->is_vector()) {
1319          src_reg temp = get_temp(glsl_type::vec4_type);
1320          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1321
1322          /* After the dot-product, the value will be an integer on the
1323           * range [0,4].  Zero stays zero, and positive values become 1.0.
1324           */
1325          ir_to_mesa_instruction *const dp =
1326             emit_dp(ir, result_dst, temp, temp, vector_elements);
1327          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1328             /* The clamping to [0,1] can be done for free in the fragment
1329              * shader with a saturate.
1330              */
1331             dp->saturate = true;
1332          } else {
1333             /* Negating the result of the dot-product gives values on the range
1334              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1335              * achieved using SLT.
1336              */
1337             src_reg slt_src = result_src;
1338             slt_src.negate = ~slt_src.negate;
1339             emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1340          }
1341       } else {
1342          emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1343       }
1344       break;
1345
1346    case ir_unop_any: {
1347       assert(ir->operands[0]->type->is_vector());
1348
1349       /* After the dot-product, the value will be an integer on the
1350        * range [0,4].  Zero stays zero, and positive values become 1.0.
1351        */
1352       ir_to_mesa_instruction *const dp =
1353          emit_dp(ir, result_dst, op[0], op[0],
1354                  ir->operands[0]->type->vector_elements);
1355       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1356          /* The clamping to [0,1] can be done for free in the fragment
1357           * shader with a saturate.
1358           */
1359          dp->saturate = true;
1360       } else {
1361          /* Negating the result of the dot-product gives values on the range
1362           * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1363           * is achieved using SLT.
1364           */
1365          src_reg slt_src = result_src;
1366          slt_src.negate = ~slt_src.negate;
1367          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1368       }
1369       break;
1370    }
1371
1372    case ir_binop_logic_xor:
1373       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1374       break;
1375
1376    case ir_binop_logic_or: {
1377       /* After the addition, the value will be an integer on the
1378        * range [0,2].  Zero stays zero, and positive values become 1.0.
1379        */
1380       ir_to_mesa_instruction *add =
1381          emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1382       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1383          /* The clamping to [0,1] can be done for free in the fragment
1384           * shader with a saturate.
1385           */
1386          add->saturate = true;
1387       } else {
1388          /* Negating the result of the addition gives values on the range
1389           * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1390           * is achieved using SLT.
1391           */
1392          src_reg slt_src = result_src;
1393          slt_src.negate = ~slt_src.negate;
1394          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1395       }
1396       break;
1397    }
1398
1399    case ir_binop_logic_and:
1400       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1401       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1402       break;
1403
1404    case ir_binop_dot:
1405       assert(ir->operands[0]->type->is_vector());
1406       assert(ir->operands[0]->type == ir->operands[1]->type);
1407       emit_dp(ir, result_dst, op[0], op[1],
1408               ir->operands[0]->type->vector_elements);
1409       break;
1410
1411    case ir_unop_sqrt:
1412       /* sqrt(x) = x * rsq(x). */
1413       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1414       emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1415       /* For incoming channels <= 0, set the result to 0. */
1416       op[0].negate = ~op[0].negate;
1417       emit(ir, OPCODE_CMP, result_dst,
1418                           op[0], result_src, src_reg_for_float(0.0));
1419       break;
1420    case ir_unop_rsq:
1421       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1422       break;
1423    case ir_unop_i2f:
1424    case ir_unop_u2f:
1425    case ir_unop_b2f:
1426    case ir_unop_b2i:
1427    case ir_unop_i2u:
1428    case ir_unop_u2i:
1429       /* Mesa IR lacks types, ints are stored as truncated floats. */
1430       result_src = op[0];
1431       break;
1432    case ir_unop_f2i:
1433       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1434       break;
1435    case ir_unop_f2b:
1436    case ir_unop_i2b:
1437       emit(ir, OPCODE_SNE, result_dst,
1438                           op[0], src_reg_for_float(0.0));
1439       break;
1440    case ir_unop_trunc:
1441       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1442       break;
1443    case ir_unop_ceil:
1444       op[0].negate = ~op[0].negate;
1445       emit(ir, OPCODE_FLR, result_dst, op[0]);
1446       result_src.negate = ~result_src.negate;
1447       break;
1448    case ir_unop_floor:
1449       emit(ir, OPCODE_FLR, result_dst, op[0]);
1450       break;
1451    case ir_unop_fract:
1452       emit(ir, OPCODE_FRC, result_dst, op[0]);
1453       break;
1454
1455    case ir_binop_min:
1456       emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1457       break;
1458    case ir_binop_max:
1459       emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1460       break;
1461    case ir_binop_pow:
1462       emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1463       break;
1464
1465       /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
1466        * hardware backends have no way to avoid Mesa IR generation
1467        * even if they don't use it, we need to emit "something" and
1468        * continue.
1469        */
1470    case ir_binop_lshift:
1471    case ir_binop_rshift:
1472    case ir_binop_bit_and:
1473    case ir_binop_bit_xor:
1474    case ir_binop_bit_or:
1475       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1476       break;
1477
1478    case ir_unop_bit_not:
1479    case ir_unop_round_even:
1480       emit(ir, OPCODE_MOV, result_dst, op[0]);
1481       break;
1482
1483    case ir_quadop_vector:
1484       /* This operation should have already been handled.
1485        */
1486       assert(!"Should not get here.");
1487       break;
1488    }
1489
1490    this->result = result_src;
1491 }
1492
1493
1494 void
1495 ir_to_mesa_visitor::visit(ir_swizzle *ir)
1496 {
1497    src_reg src;
1498    int i;
1499    int swizzle[4];
1500
1501    /* Note that this is only swizzles in expressions, not those on the left
1502     * hand side of an assignment, which do write masking.  See ir_assignment
1503     * for that.
1504     */
1505
1506    ir->val->accept(this);
1507    src = this->result;
1508    assert(src.file != PROGRAM_UNDEFINED);
1509
1510    for (i = 0; i < 4; i++) {
1511       if (i < ir->type->vector_elements) {
1512          switch (i) {
1513          case 0:
1514             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1515             break;
1516          case 1:
1517             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1518             break;
1519          case 2:
1520             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1521             break;
1522          case 3:
1523             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1524             break;
1525          }
1526       } else {
1527          /* If the type is smaller than a vec4, replicate the last
1528           * channel out.
1529           */
1530          swizzle[i] = swizzle[ir->type->vector_elements - 1];
1531       }
1532    }
1533
1534    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1535
1536    this->result = src;
1537 }
1538
1539 void
1540 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1541 {
1542    variable_storage *entry = find_variable_storage(ir->var);
1543    ir_variable *var = ir->var;
1544
1545    if (!entry) {
1546       switch (var->mode) {
1547       case ir_var_uniform:
1548          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1549                                                var->location);
1550          this->variables.push_tail(entry);
1551          break;
1552       case ir_var_in:
1553       case ir_var_inout:
1554          /* The linker assigns locations for varyings and attributes,
1555           * including deprecated builtins (like gl_Color),
1556           * user-assigned generic attributes (glBindVertexLocation),
1557           * and user-defined varyings.
1558           *
1559           * FINISHME: We would hit this path for function arguments.  Fix!
1560           */
1561          assert(var->location != -1);
1562          entry = new(mem_ctx) variable_storage(var,
1563                                                PROGRAM_INPUT,
1564                                                var->location);
1565          break;
1566       case ir_var_out:
1567          assert(var->location != -1);
1568          entry = new(mem_ctx) variable_storage(var,
1569                                                PROGRAM_OUTPUT,
1570                                                var->location);
1571          break;
1572       case ir_var_system_value:
1573          entry = new(mem_ctx) variable_storage(var,
1574                                                PROGRAM_SYSTEM_VALUE,
1575                                                var->location);
1576          break;
1577       case ir_var_auto:
1578       case ir_var_temporary:
1579          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1580                                                this->next_temp);
1581          this->variables.push_tail(entry);
1582
1583          next_temp += type_size(var->type);
1584          break;
1585       }
1586
1587       if (!entry) {
1588          printf("Failed to make storage for %s\n", var->name);
1589          exit(1);
1590       }
1591    }
1592
1593    this->result = src_reg(entry->file, entry->index, var->type);
1594 }
1595
1596 void
1597 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1598 {
1599    ir_constant *index;
1600    src_reg src;
1601    int element_size = type_size(ir->type);
1602
1603    index = ir->array_index->constant_expression_value();
1604
1605    ir->array->accept(this);
1606    src = this->result;
1607
1608    if (index) {
1609       src.index += index->value.i[0] * element_size;
1610    } else {
1611       /* Variable index array dereference.  It eats the "vec4" of the
1612        * base of the array and an index that offsets the Mesa register
1613        * index.
1614        */
1615       ir->array_index->accept(this);
1616
1617       src_reg index_reg;
1618
1619       if (element_size == 1) {
1620          index_reg = this->result;
1621       } else {
1622          index_reg = get_temp(glsl_type::float_type);
1623
1624          emit(ir, OPCODE_MUL, dst_reg(index_reg),
1625               this->result, src_reg_for_float(element_size));
1626       }
1627
1628       /* If there was already a relative address register involved, add the
1629        * new and the old together to get the new offset.
1630        */
1631       if (src.reladdr != NULL)  {
1632          src_reg accum_reg = get_temp(glsl_type::float_type);
1633
1634          emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1635               index_reg, *src.reladdr);
1636
1637          index_reg = accum_reg;
1638       }
1639
1640       src.reladdr = ralloc(mem_ctx, src_reg);
1641       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1642    }
1643
1644    /* If the type is smaller than a vec4, replicate the last channel out. */
1645    if (ir->type->is_scalar() || ir->type->is_vector())
1646       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1647    else
1648       src.swizzle = SWIZZLE_NOOP;
1649
1650    this->result = src;
1651 }
1652
1653 void
1654 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1655 {
1656    unsigned int i;
1657    const glsl_type *struct_type = ir->record->type;
1658    int offset = 0;
1659
1660    ir->record->accept(this);
1661
1662    for (i = 0; i < struct_type->length; i++) {
1663       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1664          break;
1665       offset += type_size(struct_type->fields.structure[i].type);
1666    }
1667
1668    /* If the type is smaller than a vec4, replicate the last channel out. */
1669    if (ir->type->is_scalar() || ir->type->is_vector())
1670       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1671    else
1672       this->result.swizzle = SWIZZLE_NOOP;
1673
1674    this->result.index += offset;
1675 }
1676
1677 /**
1678  * We want to be careful in assignment setup to hit the actual storage
1679  * instead of potentially using a temporary like we might with the
1680  * ir_dereference handler.
1681  */
1682 static dst_reg
1683 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1684 {
1685    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1686     * access of a vector, it must be separated into a series conditional moves
1687     * before reaching this point (see ir_vec_index_to_cond_assign).
1688     */
1689    assert(ir->as_dereference());
1690    ir_dereference_array *deref_array = ir->as_dereference_array();
1691    if (deref_array) {
1692       assert(!deref_array->array->type->is_vector());
1693    }
1694
1695    /* Use the rvalue deref handler for the most part.  We'll ignore
1696     * swizzles in it and write swizzles using writemask, though.
1697     */
1698    ir->accept(v);
1699    return dst_reg(v->result);
1700 }
1701
1702 /**
1703  * Process the condition of a conditional assignment
1704  *
1705  * Examines the condition of a conditional assignment to generate the optimal
1706  * first operand of a \c CMP instruction.  If the condition is a relational
1707  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1708  * used as the source for the \c CMP instruction.  Otherwise the comparison
1709  * is processed to a boolean result, and the boolean result is used as the
1710  * operand to the CMP instruction.
1711  */
1712 bool
1713 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1714 {
1715    ir_rvalue *src_ir = ir;
1716    bool negate = true;
1717    bool switch_order = false;
1718
1719    ir_expression *const expr = ir->as_expression();
1720    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1721       bool zero_on_left = false;
1722
1723       if (expr->operands[0]->is_zero()) {
1724          src_ir = expr->operands[1];
1725          zero_on_left = true;
1726       } else if (expr->operands[1]->is_zero()) {
1727          src_ir = expr->operands[0];
1728          zero_on_left = false;
1729       }
1730
1731       /*      a is -  0  +            -  0  +
1732        * (a <  0)  T  F  F  ( a < 0)  T  F  F
1733        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1734        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1735        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1736        * (a >  0)  F  F  T  (-a < 0)  F  F  T
1737        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
1738        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1739        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1740        *
1741        * Note that exchanging the order of 0 and 'a' in the comparison simply
1742        * means that the value of 'a' should be negated.
1743        */
1744       if (src_ir != ir) {
1745          switch (expr->operation) {
1746          case ir_binop_less:
1747             switch_order = false;
1748             negate = zero_on_left;
1749             break;
1750
1751          case ir_binop_greater:
1752             switch_order = false;
1753             negate = !zero_on_left;
1754             break;
1755
1756          case ir_binop_lequal:
1757             switch_order = true;
1758             negate = !zero_on_left;
1759             break;
1760
1761          case ir_binop_gequal:
1762             switch_order = true;
1763             negate = zero_on_left;
1764             break;
1765
1766          default:
1767             /* This isn't the right kind of comparison afterall, so make sure
1768              * the whole condition is visited.
1769              */
1770             src_ir = ir;
1771             break;
1772          }
1773       }
1774    }
1775
1776    src_ir->accept(this);
1777
1778    /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1779     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1780     * choose which value OPCODE_CMP produces without an extra instruction
1781     * computing the condition.
1782     */
1783    if (negate)
1784       this->result.negate = ~this->result.negate;
1785
1786    return switch_order;
1787 }
1788
1789 void
1790 ir_to_mesa_visitor::visit(ir_assignment *ir)
1791 {
1792    dst_reg l;
1793    src_reg r;
1794    int i;
1795
1796    ir->rhs->accept(this);
1797    r = this->result;
1798
1799    l = get_assignment_lhs(ir->lhs, this);
1800
1801    /* FINISHME: This should really set to the correct maximal writemask for each
1802     * FINISHME: component written (in the loops below).  This case can only
1803     * FINISHME: occur for matrices, arrays, and structures.
1804     */
1805    if (ir->write_mask == 0) {
1806       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1807       l.writemask = WRITEMASK_XYZW;
1808    } else if (ir->lhs->type->is_scalar()) {
1809       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1810        * FINISHME: W component of fragment shader output zero, work correctly.
1811        */
1812       l.writemask = WRITEMASK_XYZW;
1813    } else {
1814       int swizzles[4];
1815       int first_enabled_chan = 0;
1816       int rhs_chan = 0;
1817
1818       assert(ir->lhs->type->is_vector());
1819       l.writemask = ir->write_mask;
1820
1821       for (int i = 0; i < 4; i++) {
1822          if (l.writemask & (1 << i)) {
1823             first_enabled_chan = GET_SWZ(r.swizzle, i);
1824             break;
1825          }
1826       }
1827
1828       /* Swizzle a small RHS vector into the channels being written.
1829        *
1830        * glsl ir treats write_mask as dictating how many channels are
1831        * present on the RHS while Mesa IR treats write_mask as just
1832        * showing which channels of the vec4 RHS get written.
1833        */
1834       for (int i = 0; i < 4; i++) {
1835          if (l.writemask & (1 << i))
1836             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1837          else
1838             swizzles[i] = first_enabled_chan;
1839       }
1840       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1841                                 swizzles[2], swizzles[3]);
1842    }
1843
1844    assert(l.file != PROGRAM_UNDEFINED);
1845    assert(r.file != PROGRAM_UNDEFINED);
1846
1847    if (ir->condition) {
1848       const bool switch_order = this->process_move_condition(ir->condition);
1849       src_reg condition = this->result;
1850
1851       for (i = 0; i < type_size(ir->lhs->type); i++) {
1852          if (switch_order) {
1853             emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1854          } else {
1855             emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1856          }
1857
1858          l.index++;
1859          r.index++;
1860       }
1861    } else {
1862       for (i = 0; i < type_size(ir->lhs->type); i++) {
1863          emit(ir, OPCODE_MOV, l, r);
1864          l.index++;
1865          r.index++;
1866       }
1867    }
1868 }
1869
1870
1871 void
1872 ir_to_mesa_visitor::visit(ir_constant *ir)
1873 {
1874    src_reg src;
1875    GLfloat stack_vals[4] = { 0 };
1876    GLfloat *values = stack_vals;
1877    unsigned int i;
1878
1879    /* Unfortunately, 4 floats is all we can get into
1880     * _mesa_add_unnamed_constant.  So, make a temp to store an
1881     * aggregate constant and move each constant value into it.  If we
1882     * get lucky, copy propagation will eliminate the extra moves.
1883     */
1884
1885    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1886       src_reg temp_base = get_temp(ir->type);
1887       dst_reg temp = dst_reg(temp_base);
1888
1889       foreach_iter(exec_list_iterator, iter, ir->components) {
1890          ir_constant *field_value = (ir_constant *)iter.get();
1891          int size = type_size(field_value->type);
1892
1893          assert(size > 0);
1894
1895          field_value->accept(this);
1896          src = this->result;
1897
1898          for (i = 0; i < (unsigned int)size; i++) {
1899             emit(ir, OPCODE_MOV, temp, src);
1900
1901             src.index++;
1902             temp.index++;
1903          }
1904       }
1905       this->result = temp_base;
1906       return;
1907    }
1908
1909    if (ir->type->is_array()) {
1910       src_reg temp_base = get_temp(ir->type);
1911       dst_reg temp = dst_reg(temp_base);
1912       int size = type_size(ir->type->fields.array);
1913
1914       assert(size > 0);
1915
1916       for (i = 0; i < ir->type->length; i++) {
1917          ir->array_elements[i]->accept(this);
1918          src = this->result;
1919          for (int j = 0; j < size; j++) {
1920             emit(ir, OPCODE_MOV, temp, src);
1921
1922             src.index++;
1923             temp.index++;
1924          }
1925       }
1926       this->result = temp_base;
1927       return;
1928    }
1929
1930    if (ir->type->is_matrix()) {
1931       src_reg mat = get_temp(ir->type);
1932       dst_reg mat_column = dst_reg(mat);
1933
1934       for (i = 0; i < ir->type->matrix_columns; i++) {
1935          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1936          values = &ir->value.f[i * ir->type->vector_elements];
1937
1938          src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1939          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1940                                                 (gl_constant_value *) values,
1941                                                 ir->type->vector_elements,
1942                                                 &src.swizzle);
1943          emit(ir, OPCODE_MOV, mat_column, src);
1944
1945          mat_column.index++;
1946       }
1947
1948       this->result = mat;
1949       return;
1950    }
1951
1952    src.file = PROGRAM_CONSTANT;
1953    switch (ir->type->base_type) {
1954    case GLSL_TYPE_FLOAT:
1955       values = &ir->value.f[0];
1956       break;
1957    case GLSL_TYPE_UINT:
1958       for (i = 0; i < ir->type->vector_elements; i++) {
1959          values[i] = ir->value.u[i];
1960       }
1961       break;
1962    case GLSL_TYPE_INT:
1963       for (i = 0; i < ir->type->vector_elements; i++) {
1964          values[i] = ir->value.i[i];
1965       }
1966       break;
1967    case GLSL_TYPE_BOOL:
1968       for (i = 0; i < ir->type->vector_elements; i++) {
1969          values[i] = ir->value.b[i];
1970       }
1971       break;
1972    default:
1973       assert(!"Non-float/uint/int/bool constant");
1974    }
1975
1976    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1977    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1978                                                    (gl_constant_value *) values,
1979                                                    ir->type->vector_elements,
1980                                                    &this->result.swizzle);
1981 }
1982
1983 function_entry *
1984 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1985 {
1986    function_entry *entry;
1987
1988    foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1989       entry = (function_entry *)iter.get();
1990
1991       if (entry->sig == sig)
1992          return entry;
1993    }
1994
1995    entry = ralloc(mem_ctx, function_entry);
1996    entry->sig = sig;
1997    entry->sig_id = this->next_signature_id++;
1998    entry->bgn_inst = NULL;
1999
2000    /* Allocate storage for all the parameters. */
2001    foreach_iter(exec_list_iterator, iter, sig->parameters) {
2002       ir_variable *param = (ir_variable *)iter.get();
2003       variable_storage *storage;
2004
2005       storage = find_variable_storage(param);
2006       assert(!storage);
2007
2008       storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2009                                               this->next_temp);
2010       this->variables.push_tail(storage);
2011
2012       this->next_temp += type_size(param->type);
2013    }
2014
2015    if (!sig->return_type->is_void()) {
2016       entry->return_reg = get_temp(sig->return_type);
2017    } else {
2018       entry->return_reg = undef_src;
2019    }
2020
2021    this->function_signatures.push_tail(entry);
2022    return entry;
2023 }
2024
2025 void
2026 ir_to_mesa_visitor::visit(ir_call *ir)
2027 {
2028    ir_to_mesa_instruction *call_inst;
2029    ir_function_signature *sig = ir->get_callee();
2030    function_entry *entry = get_function_signature(sig);
2031    int i;
2032
2033    /* Process in parameters. */
2034    exec_list_iterator sig_iter = sig->parameters.iterator();
2035    foreach_iter(exec_list_iterator, iter, *ir) {
2036       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2037       ir_variable *param = (ir_variable *)sig_iter.get();
2038
2039       if (param->mode == ir_var_in ||
2040           param->mode == ir_var_inout) {
2041          variable_storage *storage = find_variable_storage(param);
2042          assert(storage);
2043
2044          param_rval->accept(this);
2045          src_reg r = this->result;
2046
2047          dst_reg l;
2048          l.file = storage->file;
2049          l.index = storage->index;
2050          l.reladdr = NULL;
2051          l.writemask = WRITEMASK_XYZW;
2052          l.cond_mask = COND_TR;
2053
2054          for (i = 0; i < type_size(param->type); i++) {
2055             emit(ir, OPCODE_MOV, l, r);
2056             l.index++;
2057             r.index++;
2058          }
2059       }
2060
2061       sig_iter.next();
2062    }
2063    assert(!sig_iter.has_next());
2064
2065    /* Emit call instruction */
2066    call_inst = emit(ir, OPCODE_CAL);
2067    call_inst->function = entry;
2068
2069    /* Process out parameters. */
2070    sig_iter = sig->parameters.iterator();
2071    foreach_iter(exec_list_iterator, iter, *ir) {
2072       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2073       ir_variable *param = (ir_variable *)sig_iter.get();
2074
2075       if (param->mode == ir_var_out ||
2076           param->mode == ir_var_inout) {
2077          variable_storage *storage = find_variable_storage(param);
2078          assert(storage);
2079
2080          src_reg r;
2081          r.file = storage->file;
2082          r.index = storage->index;
2083          r.reladdr = NULL;
2084          r.swizzle = SWIZZLE_NOOP;
2085          r.negate = 0;
2086
2087          param_rval->accept(this);
2088          dst_reg l = dst_reg(this->result);
2089
2090          for (i = 0; i < type_size(param->type); i++) {
2091             emit(ir, OPCODE_MOV, l, r);
2092             l.index++;
2093             r.index++;
2094          }
2095       }
2096
2097       sig_iter.next();
2098    }
2099    assert(!sig_iter.has_next());
2100
2101    /* Process return value. */
2102    this->result = entry->return_reg;
2103 }
2104
2105 void
2106 ir_to_mesa_visitor::visit(ir_texture *ir)
2107 {
2108    src_reg result_src, coord, lod_info, projector, dx, dy;
2109    dst_reg result_dst, coord_dst;
2110    ir_to_mesa_instruction *inst = NULL;
2111    prog_opcode opcode = OPCODE_NOP;
2112
2113    if (ir->op == ir_txs)
2114       this->result = src_reg_for_float(0.0);
2115    else
2116       ir->coordinate->accept(this);
2117
2118    /* Put our coords in a temp.  We'll need to modify them for shadow,
2119     * projection, or LOD, so the only case we'd use it as is is if
2120     * we're doing plain old texturing.  Mesa IR optimization should
2121     * handle cleaning up our mess in that case.
2122     */
2123    coord = get_temp(glsl_type::vec4_type);
2124    coord_dst = dst_reg(coord);
2125    emit(ir, OPCODE_MOV, coord_dst, this->result);
2126
2127    if (ir->projector) {
2128       ir->projector->accept(this);
2129       projector = this->result;
2130    }
2131
2132    /* Storage for our result.  Ideally for an assignment we'd be using
2133     * the actual storage for the result here, instead.
2134     */
2135    result_src = get_temp(glsl_type::vec4_type);
2136    result_dst = dst_reg(result_src);
2137
2138    switch (ir->op) {
2139    case ir_tex:
2140    case ir_txs:
2141       opcode = OPCODE_TEX;
2142       break;
2143    case ir_txb:
2144       opcode = OPCODE_TXB;
2145       ir->lod_info.bias->accept(this);
2146       lod_info = this->result;
2147       break;
2148    case ir_txf:
2149       /* Pretend to be TXL so the sampler, coordinate, lod are available */
2150    case ir_txl:
2151       opcode = OPCODE_TXL;
2152       ir->lod_info.lod->accept(this);
2153       lod_info = this->result;
2154       break;
2155    case ir_txd:
2156       opcode = OPCODE_TXD;
2157       ir->lod_info.grad.dPdx->accept(this);
2158       dx = this->result;
2159       ir->lod_info.grad.dPdy->accept(this);
2160       dy = this->result;
2161       break;
2162    }
2163
2164    const glsl_type *sampler_type = ir->sampler->type;
2165
2166    if (ir->projector) {
2167       if (opcode == OPCODE_TEX) {
2168          /* Slot the projector in as the last component of the coord. */
2169          coord_dst.writemask = WRITEMASK_W;
2170          emit(ir, OPCODE_MOV, coord_dst, projector);
2171          coord_dst.writemask = WRITEMASK_XYZW;
2172          opcode = OPCODE_TXP;
2173       } else {
2174          src_reg coord_w = coord;
2175          coord_w.swizzle = SWIZZLE_WWWW;
2176
2177          /* For the other TEX opcodes there's no projective version
2178           * since the last slot is taken up by lod info.  Do the
2179           * projective divide now.
2180           */
2181          coord_dst.writemask = WRITEMASK_W;
2182          emit(ir, OPCODE_RCP, coord_dst, projector);
2183
2184          /* In the case where we have to project the coordinates "by hand,"
2185           * the shadow comparitor value must also be projected.
2186           */
2187          src_reg tmp_src = coord;
2188          if (ir->shadow_comparitor) {
2189             /* Slot the shadow value in as the second to last component of the
2190              * coord.
2191              */
2192             ir->shadow_comparitor->accept(this);
2193
2194             tmp_src = get_temp(glsl_type::vec4_type);
2195             dst_reg tmp_dst = dst_reg(tmp_src);
2196
2197             /* Projective division not allowed for array samplers. */
2198             assert(!sampler_type->sampler_array);
2199
2200             tmp_dst.writemask = WRITEMASK_Z;
2201             emit(ir, OPCODE_MOV, tmp_dst, this->result);
2202
2203             tmp_dst.writemask = WRITEMASK_XY;
2204             emit(ir, OPCODE_MOV, tmp_dst, coord);
2205          }
2206
2207          coord_dst.writemask = WRITEMASK_XYZ;
2208          emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2209
2210          coord_dst.writemask = WRITEMASK_XYZW;
2211          coord.swizzle = SWIZZLE_XYZW;
2212       }
2213    }
2214
2215    /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2216     * comparitor was put in the correct place (and projected) by the code,
2217     * above, that handles by-hand projection.
2218     */
2219    if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
2220       /* Slot the shadow value in as the second to last component of the
2221        * coord.
2222        */
2223       ir->shadow_comparitor->accept(this);
2224
2225       /* XXX This will need to be updated for cubemap array samplers. */
2226       if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2227           sampler_type->sampler_array) {
2228          coord_dst.writemask = WRITEMASK_W;
2229       } else {
2230          coord_dst.writemask = WRITEMASK_Z;
2231       }
2232
2233       emit(ir, OPCODE_MOV, coord_dst, this->result);
2234       coord_dst.writemask = WRITEMASK_XYZW;
2235    }
2236
2237    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2238       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2239       coord_dst.writemask = WRITEMASK_W;
2240       emit(ir, OPCODE_MOV, coord_dst, lod_info);
2241       coord_dst.writemask = WRITEMASK_XYZW;
2242    }
2243
2244    if (opcode == OPCODE_TXD)
2245       inst = emit(ir, opcode, result_dst, coord, dx, dy);
2246    else
2247       inst = emit(ir, opcode, result_dst, coord);
2248
2249    if (ir->shadow_comparitor)
2250       inst->tex_shadow = GL_TRUE;
2251
2252    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2253                                                    this->shader_program,
2254                                                    this->prog);
2255
2256    switch (sampler_type->sampler_dimensionality) {
2257    case GLSL_SAMPLER_DIM_1D:
2258       inst->tex_target = (sampler_type->sampler_array)
2259          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2260       break;
2261    case GLSL_SAMPLER_DIM_2D:
2262       inst->tex_target = (sampler_type->sampler_array)
2263          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2264       break;
2265    case GLSL_SAMPLER_DIM_3D:
2266       inst->tex_target = TEXTURE_3D_INDEX;
2267       break;
2268    case GLSL_SAMPLER_DIM_CUBE:
2269       inst->tex_target = TEXTURE_CUBE_INDEX;
2270       break;
2271    case GLSL_SAMPLER_DIM_RECT:
2272       inst->tex_target = TEXTURE_RECT_INDEX;
2273       break;
2274    case GLSL_SAMPLER_DIM_BUF:
2275       assert(!"FINISHME: Implement ARB_texture_buffer_object");
2276       break;
2277    case GLSL_SAMPLER_DIM_EXTERNAL:
2278       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2279       break;
2280    default:
2281       assert(!"Should not get here.");
2282    }
2283
2284    this->result = result_src;
2285 }
2286
2287 void
2288 ir_to_mesa_visitor::visit(ir_return *ir)
2289 {
2290    if (ir->get_value()) {
2291       dst_reg l;
2292       int i;
2293
2294       assert(current_function);
2295
2296       ir->get_value()->accept(this);
2297       src_reg r = this->result;
2298
2299       l = dst_reg(current_function->return_reg);
2300
2301       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2302          emit(ir, OPCODE_MOV, l, r);
2303          l.index++;
2304          r.index++;
2305       }
2306    }
2307
2308    emit(ir, OPCODE_RET);
2309 }
2310
2311 void
2312 ir_to_mesa_visitor::visit(ir_discard *ir)
2313 {
2314    struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2315
2316    if (ir->condition) {
2317       ir->condition->accept(this);
2318       this->result.negate = ~this->result.negate;
2319       emit(ir, OPCODE_KIL, undef_dst, this->result);
2320    } else {
2321       emit(ir, OPCODE_KIL_NV);
2322    }
2323
2324    fp->UsesKill = GL_TRUE;
2325 }
2326
2327 void
2328 ir_to_mesa_visitor::visit(ir_if *ir)
2329 {
2330    ir_to_mesa_instruction *cond_inst, *if_inst;
2331    ir_to_mesa_instruction *prev_inst;
2332
2333    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2334
2335    ir->condition->accept(this);
2336    assert(this->result.file != PROGRAM_UNDEFINED);
2337
2338    if (this->options->EmitCondCodes) {
2339       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2340
2341       /* See if we actually generated any instruction for generating
2342        * the condition.  If not, then cook up a move to a temp so we
2343        * have something to set cond_update on.
2344        */
2345       if (cond_inst == prev_inst) {
2346          src_reg temp = get_temp(glsl_type::bool_type);
2347          cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
2348       }
2349       cond_inst->cond_update = GL_TRUE;
2350
2351       if_inst = emit(ir->condition, OPCODE_IF);
2352       if_inst->dst.cond_mask = COND_NE;
2353    } else {
2354       if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2355    }
2356
2357    this->instructions.push_tail(if_inst);
2358
2359    visit_exec_list(&ir->then_instructions, this);
2360
2361    if (!ir->else_instructions.is_empty()) {
2362       emit(ir->condition, OPCODE_ELSE);
2363       visit_exec_list(&ir->else_instructions, this);
2364    }
2365
2366    if_inst = emit(ir->condition, OPCODE_ENDIF);
2367 }
2368
2369 ir_to_mesa_visitor::ir_to_mesa_visitor()
2370 {
2371    result.file = PROGRAM_UNDEFINED;
2372    next_temp = 1;
2373    next_signature_id = 1;
2374    current_function = NULL;
2375    mem_ctx = ralloc_context(NULL);
2376 }
2377
2378 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2379 {
2380    ralloc_free(mem_ctx);
2381 }
2382
2383 static struct prog_src_register
2384 mesa_src_reg_from_ir_src_reg(src_reg reg)
2385 {
2386    struct prog_src_register mesa_reg;
2387
2388    mesa_reg.File = reg.file;
2389    assert(reg.index < (1 << INST_INDEX_BITS));
2390    mesa_reg.Index = reg.index;
2391    mesa_reg.Swizzle = reg.swizzle;
2392    mesa_reg.RelAddr = reg.reladdr != NULL;
2393    mesa_reg.Negate = reg.negate;
2394    mesa_reg.Abs = 0;
2395    mesa_reg.HasIndex2 = GL_FALSE;
2396    mesa_reg.RelAddr2 = 0;
2397    mesa_reg.Index2 = 0;
2398
2399    return mesa_reg;
2400 }
2401
2402 static void
2403 set_branchtargets(ir_to_mesa_visitor *v,
2404                   struct prog_instruction *mesa_instructions,
2405                   int num_instructions)
2406 {
2407    int if_count = 0, loop_count = 0;
2408    int *if_stack, *loop_stack;
2409    int if_stack_pos = 0, loop_stack_pos = 0;
2410    int i, j;
2411
2412    for (i = 0; i < num_instructions; i++) {
2413       switch (mesa_instructions[i].Opcode) {
2414       case OPCODE_IF:
2415          if_count++;
2416          break;
2417       case OPCODE_BGNLOOP:
2418          loop_count++;
2419          break;
2420       case OPCODE_BRK:
2421       case OPCODE_CONT:
2422          mesa_instructions[i].BranchTarget = -1;
2423          break;
2424       default:
2425          break;
2426       }
2427    }
2428
2429    if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2430    loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2431
2432    for (i = 0; i < num_instructions; i++) {
2433       switch (mesa_instructions[i].Opcode) {
2434       case OPCODE_IF:
2435          if_stack[if_stack_pos] = i;
2436          if_stack_pos++;
2437          break;
2438       case OPCODE_ELSE:
2439          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2440          if_stack[if_stack_pos - 1] = i;
2441          break;
2442       case OPCODE_ENDIF:
2443          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2444          if_stack_pos--;
2445          break;
2446       case OPCODE_BGNLOOP:
2447          loop_stack[loop_stack_pos] = i;
2448          loop_stack_pos++;
2449          break;
2450       case OPCODE_ENDLOOP:
2451          loop_stack_pos--;
2452          /* Rewrite any breaks/conts at this nesting level (haven't
2453           * already had a BranchTarget assigned) to point to the end
2454           * of the loop.
2455           */
2456          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2457             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2458                 mesa_instructions[j].Opcode == OPCODE_CONT) {
2459                if (mesa_instructions[j].BranchTarget == -1) {
2460                   mesa_instructions[j].BranchTarget = i;
2461                }
2462             }
2463          }
2464          /* The loop ends point at each other. */
2465          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2466          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2467          break;
2468       case OPCODE_CAL:
2469          foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2470             function_entry *entry = (function_entry *)iter.get();
2471
2472             if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2473                mesa_instructions[i].BranchTarget = entry->inst;
2474                break;
2475             }
2476          }
2477          break;
2478       default:
2479          break;
2480       }
2481    }
2482 }
2483
2484 static void
2485 print_program(struct prog_instruction *mesa_instructions,
2486               ir_instruction **mesa_instruction_annotation,
2487               int num_instructions)
2488 {
2489    ir_instruction *last_ir = NULL;
2490    int i;
2491    int indent = 0;
2492
2493    for (i = 0; i < num_instructions; i++) {
2494       struct prog_instruction *mesa_inst = mesa_instructions + i;
2495       ir_instruction *ir = mesa_instruction_annotation[i];
2496
2497       fprintf(stdout, "%3d: ", i);
2498
2499       if (last_ir != ir && ir) {
2500          int j;
2501
2502          for (j = 0; j < indent; j++) {
2503             fprintf(stdout, " ");
2504          }
2505          ir->print();
2506          printf("\n");
2507          last_ir = ir;
2508
2509          fprintf(stdout, "     "); /* line number spacing. */
2510       }
2511
2512       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2513                                             PROG_PRINT_DEBUG, NULL);
2514    }
2515 }
2516
2517
2518 /**
2519  * Count resources used by the given gpu program (number of texture
2520  * samplers, etc).
2521  */
2522 static void
2523 count_resources(struct gl_program *prog)
2524 {
2525    unsigned int i;
2526
2527    prog->SamplersUsed = 0;
2528
2529    for (i = 0; i < prog->NumInstructions; i++) {
2530       struct prog_instruction *inst = &prog->Instructions[i];
2531
2532       if (_mesa_is_tex_instruction(inst->Opcode)) {
2533          prog->SamplerTargets[inst->TexSrcUnit] =
2534             (gl_texture_index)inst->TexSrcTarget;
2535          prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2536          if (inst->TexShadow) {
2537             prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2538          }
2539       }
2540    }
2541
2542    _mesa_update_shader_textures_used(prog);
2543 }
2544
2545
2546 /**
2547  * Check if the given vertex/fragment/shader program is within the
2548  * resource limits of the context (number of texture units, etc).
2549  * If any of those checks fail, record a linker error.
2550  *
2551  * XXX more checks are needed...
2552  */
2553 static bool
2554 check_resources(const struct gl_context *ctx,
2555                 struct gl_shader_program *shader_program,
2556                 struct gl_program *prog)
2557 {
2558    switch (prog->Target) {
2559    case GL_VERTEX_PROGRAM_ARB:
2560       if (_mesa_bitcount(prog->SamplersUsed) >
2561           ctx->Const.MaxVertexTextureImageUnits) {
2562          linker_error(shader_program,
2563                       "Too many vertex shader texture samplers");
2564       }
2565       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
2566          linker_error(shader_program, "Too many vertex shader constants");
2567       }
2568       break;
2569    case MESA_GEOMETRY_PROGRAM:
2570       if (_mesa_bitcount(prog->SamplersUsed) >
2571           ctx->Const.MaxGeometryTextureImageUnits) {
2572          linker_error(shader_program,
2573                       "Too many geometry shader texture samplers");
2574       }
2575       if (prog->Parameters->NumParameters >
2576           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2577          linker_error(shader_program, "Too many geometry shader constants");
2578       }
2579       break;
2580    case GL_FRAGMENT_PROGRAM_ARB:
2581       if (_mesa_bitcount(prog->SamplersUsed) >
2582           ctx->Const.MaxTextureImageUnits) {
2583          linker_error(shader_program,
2584                       "Too many fragment shader texture samplers");
2585       }
2586       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
2587          linker_error(shader_program, "Too many fragment shader constants");
2588       }
2589       break;
2590    default:
2591       _mesa_problem(ctx, "unexpected program type in check_resources()");
2592    }
2593
2594    return shader_program->LinkStatus;
2595 }
2596
2597 class add_uniform_to_shader : public uniform_field_visitor {
2598 public:
2599    add_uniform_to_shader(struct gl_shader_program *shader_program,
2600                          struct gl_program_parameter_list *params)
2601       : shader_program(shader_program), params(params)
2602    {
2603       /* empty */
2604    }
2605
2606    void process(ir_variable *var)
2607    {
2608       this->idx = -1;
2609       this->uniform_field_visitor::process(var);
2610
2611       var->location = this->idx;
2612    }
2613
2614 private:
2615    virtual void visit_field(const glsl_type *type, const char *name);
2616
2617    struct gl_shader_program *shader_program;
2618    struct gl_program_parameter_list *params;
2619    int idx;
2620 };
2621
2622 void
2623 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name)
2624 {
2625    unsigned int size;
2626
2627    if (type->is_vector() || type->is_scalar()) {
2628       size = type->vector_elements;
2629    } else {
2630       size = type_size(type) * 4;
2631    }
2632
2633    gl_register_file file;
2634    if (type->is_sampler() ||
2635        (type->is_array() && type->fields.array->is_sampler())) {
2636       file = PROGRAM_SAMPLER;
2637    } else {
2638       file = PROGRAM_UNIFORM;
2639    }
2640
2641    int index = _mesa_lookup_parameter_index(params, -1, name);
2642    if (index < 0) {
2643       index = _mesa_add_parameter(params, file, name, size, type->gl_type,
2644                                   NULL, NULL, 0x0);
2645
2646       /* Sampler uniform values are stored in prog->SamplerUnits,
2647        * and the entry in that array is selected by this index we
2648        * store in ParameterValues[].
2649        */
2650       if (file == PROGRAM_SAMPLER) {
2651          unsigned location;
2652          const bool found =
2653             this->shader_program->UniformHash->get(location,
2654                                                    params->Parameters[index].Name);
2655          assert(found);
2656
2657          if (!found)
2658             return;
2659
2660          struct gl_uniform_storage *storage =
2661             &this->shader_program->UniformStorage[location];
2662
2663          for (unsigned int j = 0; j < size / 4; j++)
2664             params->ParameterValues[index + j][0].f = storage->sampler + j;
2665       }
2666    }
2667
2668    /* The first part of the uniform that's processed determines the base
2669     * location of the whole uniform (for structures).
2670     */
2671    if (this->idx < 0)
2672       this->idx = index;
2673 }
2674
2675 /**
2676  * Generate the program parameters list for the user uniforms in a shader
2677  *
2678  * \param shader_program Linked shader program.  This is only used to
2679  *                       emit possible link errors to the info log.
2680  * \param sh             Shader whose uniforms are to be processed.
2681  * \param params         Parameter list to be filled in.
2682  */
2683 void
2684 _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
2685                                             *shader_program,
2686                                             struct gl_shader *sh,
2687                                             struct gl_program_parameter_list
2688                                             *params)
2689 {
2690    add_uniform_to_shader add(shader_program, params);
2691
2692    foreach_list(node, sh->ir) {
2693       ir_variable *var = ((ir_instruction *) node)->as_variable();
2694
2695       if ((var == NULL) || (var->mode != ir_var_uniform)
2696           || (strncmp(var->name, "gl_", 3) == 0))
2697          continue;
2698
2699       add.process(var);
2700    }
2701 }
2702
2703 void
2704 _mesa_associate_uniform_storage(struct gl_context *ctx,
2705                                 struct gl_shader_program *shader_program,
2706                                 struct gl_program_parameter_list *params)
2707 {
2708    /* After adding each uniform to the parameter list, connect the storage for
2709     * the parameter with the tracking structure used by the API for the
2710     * uniform.
2711     */
2712    unsigned last_location = unsigned(~0);
2713    for (unsigned i = 0; i < params->NumParameters; i++) {
2714       if (params->Parameters[i].Type != PROGRAM_UNIFORM)
2715          continue;
2716
2717       unsigned location;
2718       const bool found =
2719          shader_program->UniformHash->get(location, params->Parameters[i].Name);
2720       assert(found);
2721
2722       if (!found)
2723          continue;
2724
2725       if (location != last_location) {
2726          struct gl_uniform_storage *storage =
2727             &shader_program->UniformStorage[location];
2728          enum gl_uniform_driver_format format = uniform_native;
2729
2730          unsigned columns = 0;
2731          switch (storage->type->base_type) {
2732          case GLSL_TYPE_UINT:
2733             assert(ctx->Const.NativeIntegers);
2734             format = uniform_native;
2735             columns = 1;
2736             break;
2737          case GLSL_TYPE_INT:
2738             format =
2739                (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
2740             columns = 1;
2741             break;
2742          case GLSL_TYPE_FLOAT:
2743             format = uniform_native;
2744             columns = storage->type->matrix_columns;
2745             break;
2746          case GLSL_TYPE_BOOL:
2747             if (ctx->Const.NativeIntegers) {
2748                format = (ctx->Const.UniformBooleanTrue == 1)
2749                   ? uniform_bool_int_0_1 : uniform_bool_int_0_not0;
2750             } else {
2751                format = uniform_bool_float;
2752             }
2753             columns = 1;
2754             break;
2755          case GLSL_TYPE_SAMPLER:
2756             format = uniform_native;
2757             columns = 1;
2758             break;
2759          default:
2760             assert(!"Should not get here.");
2761             break;
2762          }
2763
2764          _mesa_uniform_attach_driver_storage(storage,
2765                                              4 * sizeof(float) * columns,
2766                                              4 * sizeof(float),
2767                                              format,
2768                                              &params->ParameterValues[i]);
2769          last_location = location;
2770       }
2771    }
2772 }
2773
2774 static void
2775 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2776                         struct gl_shader_program *shader_program,
2777                         const char *name, const glsl_type *type,
2778                         ir_constant *val)
2779 {
2780    if (type->is_record()) {
2781       ir_constant *field_constant;
2782
2783       field_constant = (ir_constant *)val->components.get_head();
2784
2785       for (unsigned int i = 0; i < type->length; i++) {
2786          const glsl_type *field_type = type->fields.structure[i].type;
2787          const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2788                                             type->fields.structure[i].name);
2789          set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2790                                  field_type, field_constant);
2791          field_constant = (ir_constant *)field_constant->next;
2792       }
2793       return;
2794    }
2795
2796    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2797
2798    if (loc == -1) {
2799       linker_error(shader_program,
2800                    "Couldn't find uniform for initializer %s\n", name);
2801       return;
2802    }
2803
2804    for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2805       ir_constant *element;
2806       const glsl_type *element_type;
2807       if (type->is_array()) {
2808          element = val->array_elements[i];
2809          element_type = type->fields.array;
2810       } else {
2811          element = val;
2812          element_type = type;
2813       }
2814
2815       void *values;
2816
2817       if (element_type->base_type == GLSL_TYPE_BOOL) {
2818          int *conv = ralloc_array(mem_ctx, int, element_type->components());
2819          for (unsigned int j = 0; j < element_type->components(); j++) {
2820             conv[j] = element->value.b[j];
2821          }
2822          values = (void *)conv;
2823          element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2824                                                 element_type->vector_elements,
2825                                                 1);
2826       } else {
2827          values = &element->value;
2828       }
2829
2830       if (element_type->is_matrix()) {
2831          _mesa_uniform_matrix(ctx, shader_program,
2832                               element_type->matrix_columns,
2833                               element_type->vector_elements,
2834                               loc, 1, GL_FALSE, (GLfloat *)values);
2835       } else {
2836          _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2837                        values, element_type->gl_type);
2838       }
2839
2840       loc++;
2841    }
2842 }
2843
2844 static void
2845 set_uniform_initializers(struct gl_context *ctx,
2846                          struct gl_shader_program *shader_program)
2847 {
2848    void *mem_ctx = NULL;
2849
2850    for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
2851       struct gl_shader *shader = shader_program->_LinkedShaders[i];
2852
2853       if (shader == NULL)
2854          continue;
2855
2856       foreach_iter(exec_list_iterator, iter, *shader->ir) {
2857          ir_instruction *ir = (ir_instruction *)iter.get();
2858          ir_variable *var = ir->as_variable();
2859
2860          if (!var || var->mode != ir_var_uniform || !var->constant_value)
2861             continue;
2862
2863          if (!mem_ctx)
2864             mem_ctx = ralloc_context(NULL);
2865
2866          set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
2867                                  var->type, var->constant_value);
2868       }
2869    }
2870
2871    ralloc_free(mem_ctx);
2872 }
2873
2874 /*
2875  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2876  * channels for copy propagation and updates following instructions to
2877  * use the original versions.
2878  *
2879  * The ir_to_mesa_visitor lazily produces code assuming that this pass
2880  * will occur.  As an example, a TXP production before this pass:
2881  *
2882  * 0: MOV TEMP[1], INPUT[4].xyyy;
2883  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2884  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2885  *
2886  * and after:
2887  *
2888  * 0: MOV TEMP[1], INPUT[4].xyyy;
2889  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2890  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2891  *
2892  * which allows for dead code elimination on TEMP[1]'s writes.
2893  */
2894 void
2895 ir_to_mesa_visitor::copy_propagate(void)
2896 {
2897    ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2898                                                     ir_to_mesa_instruction *,
2899                                                     this->next_temp * 4);
2900    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2901    int level = 0;
2902
2903    foreach_iter(exec_list_iterator, iter, this->instructions) {
2904       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2905
2906       assert(inst->dst.file != PROGRAM_TEMPORARY
2907              || inst->dst.index < this->next_temp);
2908
2909       /* First, do any copy propagation possible into the src regs. */
2910       for (int r = 0; r < 3; r++) {
2911          ir_to_mesa_instruction *first = NULL;
2912          bool good = true;
2913          int acp_base = inst->src[r].index * 4;
2914
2915          if (inst->src[r].file != PROGRAM_TEMPORARY ||
2916              inst->src[r].reladdr)
2917             continue;
2918
2919          /* See if we can find entries in the ACP consisting of MOVs
2920           * from the same src register for all the swizzled channels
2921           * of this src register reference.
2922           */
2923          for (int i = 0; i < 4; i++) {
2924             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2925             ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2926
2927             if (!copy_chan) {
2928                good = false;
2929                break;
2930             }
2931
2932             assert(acp_level[acp_base + src_chan] <= level);
2933
2934             if (!first) {
2935                first = copy_chan;
2936             } else {
2937                if (first->src[0].file != copy_chan->src[0].file ||
2938                    first->src[0].index != copy_chan->src[0].index) {
2939                   good = false;
2940                   break;
2941                }
2942             }
2943          }
2944
2945          if (good) {
2946             /* We've now validated that we can copy-propagate to
2947              * replace this src register reference.  Do it.
2948              */
2949             inst->src[r].file = first->src[0].file;
2950             inst->src[r].index = first->src[0].index;
2951
2952             int swizzle = 0;
2953             for (int i = 0; i < 4; i++) {
2954                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2955                ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2956                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2957                            (3 * i));
2958             }
2959             inst->src[r].swizzle = swizzle;
2960          }
2961       }
2962
2963       switch (inst->op) {
2964       case OPCODE_BGNLOOP:
2965       case OPCODE_ENDLOOP:
2966          /* End of a basic block, clear the ACP entirely. */
2967          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2968          break;
2969
2970       case OPCODE_IF:
2971          ++level;
2972          break;
2973
2974       case OPCODE_ENDIF:
2975       case OPCODE_ELSE:
2976          /* Clear all channels written inside the block from the ACP, but
2977           * leaving those that were not touched.
2978           */
2979          for (int r = 0; r < this->next_temp; r++) {
2980             for (int c = 0; c < 4; c++) {
2981                if (!acp[4 * r + c])
2982                   continue;
2983
2984                if (acp_level[4 * r + c] >= level)
2985                   acp[4 * r + c] = NULL;
2986             }
2987          }
2988          if (inst->op == OPCODE_ENDIF)
2989             --level;
2990          break;
2991
2992       default:
2993          /* Continuing the block, clear any written channels from
2994           * the ACP.
2995           */
2996          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2997             /* Any temporary might be written, so no copy propagation
2998              * across this instruction.
2999              */
3000             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3001          } else if (inst->dst.file == PROGRAM_OUTPUT &&
3002                     inst->dst.reladdr) {
3003             /* Any output might be written, so no copy propagation
3004              * from outputs across this instruction.
3005              */
3006             for (int r = 0; r < this->next_temp; r++) {
3007                for (int c = 0; c < 4; c++) {
3008                   if (!acp[4 * r + c])
3009                      continue;
3010
3011                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3012                      acp[4 * r + c] = NULL;
3013                }
3014             }
3015          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3016                     inst->dst.file == PROGRAM_OUTPUT) {
3017             /* Clear where it's used as dst. */
3018             if (inst->dst.file == PROGRAM_TEMPORARY) {
3019                for (int c = 0; c < 4; c++) {
3020                   if (inst->dst.writemask & (1 << c)) {
3021                      acp[4 * inst->dst.index + c] = NULL;
3022                   }
3023                }
3024             }
3025
3026             /* Clear where it's used as src. */
3027             for (int r = 0; r < this->next_temp; r++) {
3028                for (int c = 0; c < 4; c++) {
3029                   if (!acp[4 * r + c])
3030                      continue;
3031
3032                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3033
3034                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3035                       acp[4 * r + c]->src[0].index == inst->dst.index &&
3036                       inst->dst.writemask & (1 << src_chan))
3037                   {
3038                      acp[4 * r + c] = NULL;
3039                   }
3040                }
3041             }
3042          }
3043          break;
3044       }
3045
3046       /* If this is a copy, add it to the ACP. */
3047       if (inst->op == OPCODE_MOV &&
3048           inst->dst.file == PROGRAM_TEMPORARY &&
3049           !inst->dst.reladdr &&
3050           !inst->saturate &&
3051           !inst->src[0].reladdr &&
3052           !inst->src[0].negate) {
3053          for (int i = 0; i < 4; i++) {
3054             if (inst->dst.writemask & (1 << i)) {
3055                acp[4 * inst->dst.index + i] = inst;
3056                acp_level[4 * inst->dst.index + i] = level;
3057             }
3058          }
3059       }
3060    }
3061
3062    ralloc_free(acp_level);
3063    ralloc_free(acp);
3064 }
3065
3066
3067 /**
3068  * Convert a shader's GLSL IR into a Mesa gl_program.
3069  */
3070 static struct gl_program *
3071 get_mesa_program(struct gl_context *ctx,
3072                  struct gl_shader_program *shader_program,
3073                  struct gl_shader *shader)
3074 {
3075    ir_to_mesa_visitor v;
3076    struct prog_instruction *mesa_instructions, *mesa_inst;
3077    ir_instruction **mesa_instruction_annotation;
3078    int i;
3079    struct gl_program *prog;
3080    GLenum target;
3081    const char *target_string;
3082    GLboolean progress;
3083    struct gl_shader_compiler_options *options =
3084          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
3085
3086    switch (shader->Type) {
3087    case GL_VERTEX_SHADER:
3088       target = GL_VERTEX_PROGRAM_ARB;
3089       target_string = "vertex";
3090       break;
3091    case GL_FRAGMENT_SHADER:
3092       target = GL_FRAGMENT_PROGRAM_ARB;
3093       target_string = "fragment";
3094       break;
3095    case GL_GEOMETRY_SHADER:
3096       target = GL_GEOMETRY_PROGRAM_NV;
3097       target_string = "geometry";
3098       break;
3099    default:
3100       assert(!"should not be reached");
3101       return NULL;
3102    }
3103
3104    validate_ir_tree(shader->ir);
3105
3106    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
3107    if (!prog)
3108       return NULL;
3109    prog->Parameters = _mesa_new_parameter_list();
3110    v.ctx = ctx;
3111    v.prog = prog;
3112    v.shader_program = shader_program;
3113    v.options = options;
3114
3115    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
3116                                                prog->Parameters);
3117
3118    /* Emit Mesa IR for main(). */
3119    visit_exec_list(shader->ir, &v);
3120    v.emit(NULL, OPCODE_END);
3121
3122    /* Now emit bodies for any functions that were used. */
3123    do {
3124       progress = GL_FALSE;
3125
3126       foreach_iter(exec_list_iterator, iter, v.function_signatures) {
3127          function_entry *entry = (function_entry *)iter.get();
3128
3129          if (!entry->bgn_inst) {
3130             v.current_function = entry;
3131
3132             entry->bgn_inst = v.emit(NULL, OPCODE_BGNSUB);
3133             entry->bgn_inst->function = entry;
3134
3135             visit_exec_list(&entry->sig->body, &v);
3136
3137             ir_to_mesa_instruction *last;
3138             last = (ir_to_mesa_instruction *)v.instructions.get_tail();
3139             if (last->op != OPCODE_RET)
3140                v.emit(NULL, OPCODE_RET);
3141
3142             ir_to_mesa_instruction *end;
3143             end = v.emit(NULL, OPCODE_ENDSUB);
3144             end->function = entry;
3145
3146             progress = GL_TRUE;
3147          }
3148       }
3149    } while (progress);
3150
3151    prog->NumTemporaries = v.next_temp;
3152
3153    int num_instructions = 0;
3154    foreach_iter(exec_list_iterator, iter, v.instructions) {
3155       num_instructions++;
3156    }
3157
3158    mesa_instructions =
3159       (struct prog_instruction *)calloc(num_instructions,
3160                                         sizeof(*mesa_instructions));
3161    mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
3162                                               num_instructions);
3163
3164    v.copy_propagate();
3165
3166    /* Convert ir_mesa_instructions into prog_instructions.
3167     */
3168    mesa_inst = mesa_instructions;
3169    i = 0;
3170    foreach_iter(exec_list_iterator, iter, v.instructions) {
3171       const ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
3172
3173       mesa_inst->Opcode = inst->op;
3174       mesa_inst->CondUpdate = inst->cond_update;
3175       if (inst->saturate)
3176          mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
3177       mesa_inst->DstReg.File = inst->dst.file;
3178       mesa_inst->DstReg.Index = inst->dst.index;
3179       mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
3180       mesa_inst->DstReg.WriteMask = inst->dst.writemask;
3181       mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
3182       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
3183       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
3184       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
3185       mesa_inst->TexSrcUnit = inst->sampler;
3186       mesa_inst->TexSrcTarget = inst->tex_target;
3187       mesa_inst->TexShadow = inst->tex_shadow;
3188       mesa_instruction_annotation[i] = inst->ir;
3189
3190       /* Set IndirectRegisterFiles. */
3191       if (mesa_inst->DstReg.RelAddr)
3192          prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
3193
3194       /* Update program's bitmask of indirectly accessed register files */
3195       for (unsigned src = 0; src < 3; src++)
3196          if (mesa_inst->SrcReg[src].RelAddr)
3197             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
3198
3199       switch (mesa_inst->Opcode) {
3200       case OPCODE_IF:
3201          if (options->MaxIfDepth == 0) {
3202             linker_warning(shader_program,
3203                            "Couldn't flatten if-statement.  "
3204                            "This will likely result in software "
3205                            "rasterization.\n");
3206          }
3207          break;
3208       case OPCODE_BGNLOOP:
3209          if (options->EmitNoLoops) {
3210             linker_warning(shader_program,
3211                            "Couldn't unroll loop.  "
3212                            "This will likely result in software "
3213                            "rasterization.\n");
3214          }
3215          break;
3216       case OPCODE_CONT:
3217          if (options->EmitNoCont) {
3218             linker_warning(shader_program,
3219                            "Couldn't lower continue-statement.  "
3220                            "This will likely result in software "
3221                            "rasterization.\n");
3222          }
3223          break;
3224       case OPCODE_BGNSUB:
3225          inst->function->inst = i;
3226          mesa_inst->Comment = strdup(inst->function->sig->function_name());
3227          break;
3228       case OPCODE_ENDSUB:
3229          mesa_inst->Comment = strdup(inst->function->sig->function_name());
3230          break;
3231       case OPCODE_CAL:
3232          mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
3233          break;
3234       case OPCODE_ARL:
3235          prog->NumAddressRegs = 1;
3236          break;
3237       default:
3238          break;
3239       }
3240
3241       mesa_inst++;
3242       i++;
3243
3244       if (!shader_program->LinkStatus)
3245          break;
3246    }
3247
3248    if (!shader_program->LinkStatus) {
3249       goto fail_exit;
3250    }
3251
3252    set_branchtargets(&v, mesa_instructions, num_instructions);
3253
3254    if (ctx->Shader.Flags & GLSL_DUMP) {
3255       printf("\n");
3256       printf("GLSL IR for linked %s program %d:\n", target_string,
3257              shader_program->Name);
3258       _mesa_print_ir(shader->ir, NULL);
3259       printf("\n");
3260       printf("\n");
3261       printf("Mesa IR for linked %s program %d:\n", target_string,
3262              shader_program->Name);
3263       print_program(mesa_instructions, mesa_instruction_annotation,
3264                     num_instructions);
3265    }
3266
3267    prog->Instructions = mesa_instructions;
3268    prog->NumInstructions = num_instructions;
3269
3270    /* Setting this to NULL prevents a possible double free in the fail_exit
3271     * path (far below).
3272     */
3273    mesa_instructions = NULL;
3274
3275    do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
3276    count_resources(prog);
3277
3278    if (!check_resources(ctx, shader_program, prog))
3279       goto fail_exit;
3280
3281    _mesa_reference_program(ctx, &shader->Program, prog);
3282
3283    if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
3284       _mesa_optimize_program(ctx, prog);
3285    }
3286
3287    /* This has to be done last.  Any operation that can cause
3288     * prog->ParameterValues to get reallocated (e.g., anything that adds a
3289     * program constant) has to happen before creating this linkage.
3290     */
3291    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
3292    if (!shader_program->LinkStatus) {
3293       goto fail_exit;
3294    }
3295
3296    return prog;
3297
3298 fail_exit:
3299    free(mesa_instructions);
3300    _mesa_reference_program(ctx, &shader->Program, NULL);
3301    return NULL;
3302 }
3303
3304 extern "C" {
3305
3306 /**
3307  * Link a shader.
3308  * Called via ctx->Driver.LinkShader()
3309  * This actually involves converting GLSL IR into Mesa gl_programs with
3310  * code lowering and other optimizations.
3311  */
3312 GLboolean
3313 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3314 {
3315    assert(prog->LinkStatus);
3316
3317    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
3318       if (prog->_LinkedShaders[i] == NULL)
3319          continue;
3320
3321       bool progress;
3322       exec_list *ir = prog->_LinkedShaders[i]->ir;
3323       const struct gl_shader_compiler_options *options =
3324             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
3325
3326       do {
3327          progress = false;
3328
3329          /* Lowering */
3330          do_mat_op_to_vec(ir);
3331          lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
3332                                  | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
3333                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
3334
3335          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
3336
3337          progress = do_common_optimization(ir, true, true,
3338                                            options->MaxUnrollIterations)
3339            || progress;
3340
3341          progress = lower_quadop_vector(ir, true) || progress;
3342
3343          if (options->MaxIfDepth == 0)
3344             progress = lower_discard(ir) || progress;
3345
3346          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
3347
3348          if (options->EmitNoNoise)
3349             progress = lower_noise(ir) || progress;
3350
3351          /* If there are forms of indirect addressing that the driver
3352           * cannot handle, perform the lowering pass.
3353           */
3354          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
3355              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
3356            progress =
3357              lower_variable_index_to_cond_assign(ir,
3358                                                  options->EmitNoIndirectInput,
3359                                                  options->EmitNoIndirectOutput,
3360                                                  options->EmitNoIndirectTemp,
3361                                                  options->EmitNoIndirectUniform)
3362              || progress;
3363
3364          progress = do_vec_index_to_cond_assign(ir) || progress;
3365       } while (progress);
3366
3367       validate_ir_tree(ir);
3368    }
3369
3370    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
3371       struct gl_program *linked_prog;
3372
3373       if (prog->_LinkedShaders[i] == NULL)
3374          continue;
3375
3376       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
3377
3378       if (linked_prog) {
3379          static const GLenum targets[] = {
3380             GL_VERTEX_PROGRAM_ARB,
3381             GL_FRAGMENT_PROGRAM_ARB,
3382             GL_GEOMETRY_PROGRAM_NV
3383          };
3384
3385          if (i == MESA_SHADER_VERTEX) {
3386             ((struct gl_vertex_program *)linked_prog)->UsesClipDistance
3387                = prog->Vert.UsesClipDistance;
3388          }
3389
3390          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
3391                                  linked_prog);
3392          if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
3393             return GL_FALSE;
3394          }
3395       }
3396
3397       _mesa_reference_program(ctx, &linked_prog, NULL);
3398    }
3399
3400    return prog->LinkStatus;
3401 }
3402
3403
3404 /**
3405  * Compile a GLSL shader.  Called via glCompileShader().
3406  */
3407 void
3408 _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
3409 {
3410    struct _mesa_glsl_parse_state *state =
3411       new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
3412
3413    const char *source = shader->Source;
3414    /* Check if the user called glCompileShader without first calling
3415     * glShaderSource.  This should fail to compile, but not raise a GL_ERROR.
3416     */
3417    if (source == NULL) {
3418       shader->CompileStatus = GL_FALSE;
3419       return;
3420    }
3421
3422    state->error = preprocess(state, &source, &state->info_log,
3423                              &ctx->Extensions, ctx->API);
3424
3425    if (ctx->Shader.Flags & GLSL_DUMP) {
3426       printf("GLSL source for %s shader %d:\n",
3427              _mesa_glsl_shader_target_name(state->target), shader->Name);
3428       printf("%s\n", shader->Source);
3429    }
3430
3431    if (!state->error) {
3432      _mesa_glsl_lexer_ctor(state, source);
3433      _mesa_glsl_parse(state);
3434      _mesa_glsl_lexer_dtor(state);
3435    }
3436
3437    ralloc_free(shader->ir);
3438    shader->ir = new(shader) exec_list;
3439    if (!state->error && !state->translation_unit.is_empty())
3440       _mesa_ast_to_hir(shader->ir, state);
3441
3442    if (!state->error && !shader->ir->is_empty()) {
3443       validate_ir_tree(shader->ir);
3444
3445       /* Do some optimization at compile time to reduce shader IR size
3446        * and reduce later work if the same shader is linked multiple times
3447        */
3448       while (do_common_optimization(shader->ir, false, false, 32))
3449          ;
3450
3451       validate_ir_tree(shader->ir);
3452    }
3453
3454    shader->symbols = state->symbols;
3455
3456    shader->CompileStatus = !state->error;
3457    shader->InfoLog = state->info_log;
3458    shader->Version = state->language_version;
3459    memcpy(shader->builtins_to_link, state->builtins_to_link,
3460           sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
3461    shader->num_builtins_to_link = state->num_builtins_to_link;
3462
3463    if (ctx->Shader.Flags & GLSL_LOG) {
3464       _mesa_write_shader_to_file(shader);
3465    }
3466
3467    if (ctx->Shader.Flags & GLSL_DUMP) {
3468       if (shader->CompileStatus) {
3469          printf("GLSL IR for shader %d:\n", shader->Name);
3470          _mesa_print_ir(shader->ir, NULL);
3471          printf("\n\n");
3472       } else {
3473          printf("GLSL shader %d failed to compile.\n", shader->Name);
3474       }
3475       if (shader->InfoLog && shader->InfoLog[0] != 0) {
3476          printf("GLSL shader %d info log:\n", shader->Name);
3477          printf("%s\n", shader->InfoLog);
3478       }
3479    }
3480
3481    /* Retain any live IR, but trash the rest. */
3482    reparent_ir(shader->ir, shader->ir);
3483
3484    ralloc_free(state);
3485 }
3486
3487
3488 /**
3489  * Link a GLSL shader program.  Called via glLinkProgram().
3490  */
3491 void
3492 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3493 {
3494    unsigned int i;
3495
3496    _mesa_clear_shader_program_data(ctx, prog);
3497
3498    prog->LinkStatus = GL_TRUE;
3499
3500    for (i = 0; i < prog->NumShaders; i++) {
3501       if (!prog->Shaders[i]->CompileStatus) {
3502          linker_error(prog, "linking with uncompiled shader");
3503          prog->LinkStatus = GL_FALSE;
3504       }
3505    }
3506
3507    if (prog->LinkStatus) {
3508       link_shaders(ctx, prog);
3509    }
3510
3511    if (prog->LinkStatus) {
3512       if (!ctx->Driver.LinkShader(ctx, prog)) {
3513          prog->LinkStatus = GL_FALSE;
3514       }
3515    }
3516
3517    set_uniform_initializers(ctx, prog);
3518
3519    if (ctx->Shader.Flags & GLSL_DUMP) {
3520       if (!prog->LinkStatus) {
3521          printf("GLSL shader program %d failed to link\n", prog->Name);
3522       }
3523
3524       if (prog->InfoLog && prog->InfoLog[0] != 0) {
3525          printf("GLSL shader program %d info log:\n", prog->Name);
3526          printf("%s\n", prog->InfoLog);
3527       }
3528    }
3529 }
3530
3531 } /* extern "C" */