src/mesa/program/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translate GLSL IR to Mesa's gl_program representation.
  30  */
  31
  32 #include <stdio.h>
  33 #include "main/compiler.h"
  34 #include "ir.h"
  35 #include "ir_visitor.h"
  36 #include "ir_print_visitor.h"
  37 #include "ir_expression_flattening.h"
  38 #include "glsl_types.h"
  39 #include "glsl_parser_extras.h"
  40 #include "../glsl/program.h"
  41 #include "ir_optimization.h"
  42 #include "ast.h"
  43
  44 extern "C" {
  45 #include "main/mtypes.h"
  46 #include "main/shaderapi.h"
  47 #include "main/shaderobj.h"
  48 #include "main/uniforms.h"
  49 #include "program/hash_table.h"
  50 #include "program/prog_instruction.h"
  51 #include "program/prog_optimize.h"
  52 #include "program/prog_print.h"
  53 #include "program/program.h"
  54 #include "program/prog_uniform.h"
  55 #include "program/prog_parameter.h"
  56 #include "program/sampler.h"
  57 }
  58
  59 class src_reg;
  60 class dst_reg;
  61
  62 static int swizzle_for_size(int size);
  63
  64 /**
  65  * This struct is a corresponding struct to Mesa prog_src_register, with
  66  * wider fields.
  67  */
  68 class src_reg {
  69 public:
  70    src_reg(gl_register_file file, int index, const glsl_type *type)
  71    {
  72       this->file = file;
  73       this->index = index;
  74       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  75          this->swizzle = swizzle_for_size(type->vector_elements);
  76       else
  77          this->swizzle = SWIZZLE_XYZW;
  78       this->negate = 0;
  79       this->reladdr = NULL;
  80    }
  81
  82    src_reg()
  83    {
  84       this->file = PROGRAM_UNDEFINED;
  85       this->index = 0;
  86       this->swizzle = 0;
  87       this->negate = 0;
  88       this->reladdr = NULL;
  89    }
  90
  91    explicit src_reg(dst_reg reg);
  92
  93    gl_register_file file; /**< PROGRAM_* from Mesa */
  94    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  95    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  96    int negate; /**< NEGATE_XYZW mask from mesa */
  97    /** Register index should be offset by the integer in this reg. */
  98    src_reg *reladdr;
  99 };
 100
 101 class dst_reg {
 102 public:
 103    dst_reg(gl_register_file file, int writemask)
 104    {
 105       this->file = file;
 106       this->index = 0;
 107       this->writemask = writemask;
 108       this->cond_mask = COND_TR;
 109       this->reladdr = NULL;
 110    }
 111
 112    dst_reg()
 113    {
 114       this->file = PROGRAM_UNDEFINED;
 115       this->index = 0;
 116       this->writemask = 0;
 117       this->cond_mask = COND_TR;
 118       this->reladdr = NULL;
 119    }
 120
 121    explicit dst_reg(src_reg reg);
 122
 123    gl_register_file file; /**< PROGRAM_* from Mesa */
 124    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
 125    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 126    GLuint cond_mask:4;
 127    /** Register index should be offset by the integer in this reg. */
 128    src_reg *reladdr;
 129 };
 130
 131 src_reg::src_reg(dst_reg reg)
 132 {
 133    this->file = reg.file;
 134    this->index = reg.index;
 135    this->swizzle = SWIZZLE_XYZW;
 136    this->negate = 0;
 137    this->reladdr = reg.reladdr;
 138 }
 139
 140 dst_reg::dst_reg(src_reg reg)
 141 {
 142    this->file = reg.file;
 143    this->index = reg.index;
 144    this->writemask = WRITEMASK_XYZW;
 145    this->cond_mask = COND_TR;
 146    this->reladdr = reg.reladdr;
 147 }
 148
 149 class ir_to_mesa_instruction : public exec_node {
 150 public:
 151    /* Callers of this ralloc-based new need not call delete. It's
 152     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
 153    static void* operator new(size_t size, void *ctx)
 154    {
 155       void *node;
 156
 157       node = rzalloc_size(ctx, size);
 158       assert(node != NULL);
 159
 160       return node;
 161    }
 162
 163    enum prog_opcode op;
 164    dst_reg dst;
 165    src_reg src[3];
 166    /** Pointer to the ir source this tree came from for debugging */
 167    ir_instruction *ir;
 168    GLboolean cond_update;
 169    bool saturate;
 170    int sampler; /**< sampler index */
 171    int tex_target; /**< One of TEXTURE_*_INDEX */
 172    GLboolean tex_shadow;
 173
 174    class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
 175 };
 176
 177 class variable_storage : public exec_node {
 178 public:
 179    variable_storage(ir_variable *var, gl_register_file file, int index)
 180       : file(file), index(index), var(var)
 181    {
 182       /* empty */
 183    }
 184
 185    gl_register_file file;
 186    int index;
 187    ir_variable *var; /* variable that maps to this, if any */
 188 };
 189
 190 class function_entry : public exec_node {
 191 public:
 192    ir_function_signature *sig;
 193
 194    /**
 195     * identifier of this function signature used by the program.
 196     *
 197     * At the point that Mesa instructions for function calls are
 198     * generated, we don't know the address of the first instruction of
 199     * the function body.  So we make the BranchTarget that is called a
 200     * small integer and rewrite them during set_branchtargets().
 201     */
 202    int sig_id;
 203
 204    /**
 205     * Pointer to first instruction of the function body.
 206     *
 207     * Set during function body emits after main() is processed.
 208     */
 209    ir_to_mesa_instruction *bgn_inst;
 210
 211    /**
 212     * Index of the first instruction of the function body in actual
 213     * Mesa IR.
 214     *
 215     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
 216     */
 217    int inst;
 218
 219    /** Storage for the return value. */
 220    src_reg return_reg;
 221 };
 222
 223 class ir_to_mesa_visitor : public ir_visitor {
 224 public:
 225    ir_to_mesa_visitor();
 226    ~ir_to_mesa_visitor();
 227
 228    function_entry *current_function;
 229
 230    struct gl_context *ctx;
 231    struct gl_program *prog;
 232    struct gl_shader_program *shader_program;
 233    struct gl_shader_compiler_options *options;
 234
 235    int next_temp;
 236
 237    variable_storage *find_variable_storage(ir_variable *var);
 238
 239    function_entry *get_function_signature(ir_function_signature *sig);
 240
 241    src_reg get_temp(const glsl_type *type);
 242    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
 243
 244    src_reg src_reg_for_float(float val);
 245
 246    /**
 247     * \name Visit methods
 248     *
 249     * As typical for the visitor pattern, there must be one \c visit method for
 250     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 251     * the hierarchy should not have \c visit methods.
 252     */
 253    /*@{*/
 254    virtual void visit(ir_variable *);
 255    virtual void visit(ir_loop *);
 256    virtual void visit(ir_loop_jump *);
 257    virtual void visit(ir_function_signature *);
 258    virtual void visit(ir_function *);
 259    virtual void visit(ir_expression *);
 260    virtual void visit(ir_swizzle *);
 261    virtual void visit(ir_dereference_variable  *);
 262    virtual void visit(ir_dereference_array *);
 263    virtual void visit(ir_dereference_record *);
 264    virtual void visit(ir_assignment *);
 265    virtual void visit(ir_constant *);
 266    virtual void visit(ir_call *);
 267    virtual void visit(ir_return *);
 268    virtual void visit(ir_discard *);
 269    virtual void visit(ir_texture *);
 270    virtual void visit(ir_if *);
 271    /*@}*/
 272
 273    src_reg result;
 274
 275    /** List of variable_storage */
 276    exec_list variables;
 277
 278    /** List of function_entry */
 279    exec_list function_signatures;
 280    int next_signature_id;
 281
 282    /** List of ir_to_mesa_instruction */
 283    exec_list instructions;
 284
 285    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
 286
 287    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 288                                 dst_reg dst, src_reg src0);
 289
 290    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 291                                 dst_reg dst, src_reg src0, src_reg src1);
 292
 293    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 294                                 dst_reg dst,
 295                                 src_reg src0, src_reg src1, src_reg src2);
 296
 297    /**
 298     * Emit the correct dot-product instruction for the type of arguments
 299     */
 300    ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
 301                                     dst_reg dst,
 302                                     src_reg src0,
 303                                     src_reg src1,
 304                                     unsigned elements);
 305
 306    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 307                     dst_reg dst, src_reg src0);
 308
 309    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 310                     dst_reg dst, src_reg src0, src_reg src1);
 311
 312    void emit_scs(ir_instruction *ir, enum prog_opcode op,
 313                  dst_reg dst, const src_reg &src);
 314
 315    GLboolean try_emit_mad(ir_expression *ir,
 316                           int mul_operand);
 317    GLboolean try_emit_sat(ir_expression *ir);
 318
 319    void emit_swz(ir_expression *ir);
 320
 321    bool process_move_condition(ir_rvalue *ir);
 322
 323    void copy_propagate(void);
 324
 325    void *mem_ctx;
 326 };
 327
 328 src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
 329
 330 dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
 331
 332 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 333
 334 static int
 335 swizzle_for_size(int size)
 336 {
 337    int size_swizzles[4] = {
 338       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 339       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 340       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 341       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 342    };
 343
 344    assert((size >= 1) && (size <= 4));
 345    return size_swizzles[size - 1];
 346 }
 347
 348 ir_to_mesa_instruction *
 349 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 350                          dst_reg dst,
 351                          src_reg src0, src_reg src1, src_reg src2)
 352 {
 353    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 354    int num_reladdr = 0;
 355
 356    /* If we have to do relative addressing, we want to load the ARL
 357     * reg directly for one of the regs, and preload the other reladdr
 358     * sources into temps.
 359     */
 360    num_reladdr += dst.reladdr != NULL;
 361    num_reladdr += src0.reladdr != NULL;
 362    num_reladdr += src1.reladdr != NULL;
 363    num_reladdr += src2.reladdr != NULL;
 364
 365    reladdr_to_temp(ir, &src2, &num_reladdr);
 366    reladdr_to_temp(ir, &src1, &num_reladdr);
 367    reladdr_to_temp(ir, &src0, &num_reladdr);
 368
 369    if (dst.reladdr) {
 370       emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
 371       num_reladdr--;
 372    }
 373    assert(num_reladdr == 0);
 374
 375    inst->op = op;
 376    inst->dst = dst;
 377    inst->src[0] = src0;
 378    inst->src[1] = src1;
 379    inst->src[2] = src2;
 380    inst->ir = ir;
 381
 382    inst->function = NULL;
 383
 384    this->instructions.push_tail(inst);
 385
 386    return inst;
 387 }
 388
 389
 390 ir_to_mesa_instruction *
 391 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 392                          dst_reg dst, src_reg src0, src_reg src1)
 393 {
 394    return emit(ir, op, dst, src0, src1, undef_src);
 395 }
 396
 397 ir_to_mesa_instruction *
 398 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 399                          dst_reg dst, src_reg src0)
 400 {
 401    assert(dst.writemask != 0);
 402    return emit(ir, op, dst, src0, undef_src, undef_src);
 403 }
 404
 405 ir_to_mesa_instruction *
 406 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
 407 {
 408    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 409 }
 410
 411 ir_to_mesa_instruction *
 412 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 413                             dst_reg dst, src_reg src0, src_reg src1,
 414                             unsigned elements)
 415 {
 416    static const gl_inst_opcode dot_opcodes[] = {
 417       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
 418    };
 419
 420    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 421 }
 422
 423 /**
 424  * Emits Mesa scalar opcodes to produce unique answers across channels.
 425  *
 426  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 427  * channel determines the result across all channels.  So to do a vec4
 428  * of this operation, we want to emit a scalar per source channel used
 429  * to produce dest channels.
 430  */
 431 void
 432 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 433                                 dst_reg dst,
 434                                 src_reg orig_src0, src_reg orig_src1)
 435 {
 436    int i, j;
 437    int done_mask = ~dst.writemask;
 438
 439    /* Mesa RCP is a scalar operation splatting results to all channels,
 440     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 441     * dst channels.
 442     */
 443    for (i = 0; i < 4; i++) {
 444       GLuint this_mask = (1 << i);
 445       ir_to_mesa_instruction *inst;
 446       src_reg src0 = orig_src0;
 447       src_reg src1 = orig_src1;
 448
 449       if (done_mask & this_mask)
 450          continue;
 451
 452       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 453       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 454       for (j = i + 1; j < 4; j++) {
 455          /* If there is another enabled component in the destination that is
 456           * derived from the same inputs, generate its value on this pass as
 457           * well.
 458           */
 459          if (!(done_mask & (1 << j)) &&
 460              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 461              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 462             this_mask |= (1 << j);
 463          }
 464       }
 465       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 466                                    src0_swiz, src0_swiz);
 467       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 468                                   src1_swiz, src1_swiz);
 469
 470       inst = emit(ir, op, dst, src0, src1);
 471       inst->dst.writemask = this_mask;
 472       done_mask |= this_mask;
 473    }
 474 }
 475
 476 void
 477 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 478                                 dst_reg dst, src_reg src0)
 479 {
 480    src_reg undef = undef_src;
 481
 482    undef.swizzle = SWIZZLE_XXXX;
 483
 484    emit_scalar(ir, op, dst, src0, undef);
 485 }
 486
 487 /**
 488  * Emit an OPCODE_SCS instruction
 489  *
 490  * The \c SCS opcode functions a bit differently than the other Mesa (or
 491  * ARB_fragment_program) opcodes.  Instead of splatting its result across all
 492  * four components of the destination, it writes one value to the \c x
 493  * component and another value to the \c y component.
 494  *
 495  * \param ir        IR instruction being processed
 496  * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
 497  *                  value is desired.
 498  * \param dst       Destination register
 499  * \param src       Source register
 500  */
 501 void
 502 ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
 503                              dst_reg dst,
 504                              const src_reg &src)
 505 {
 506    /* Vertex programs cannot use the SCS opcode.
 507     */
 508    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
 509       emit_scalar(ir, op, dst, src);
 510       return;
 511    }
 512
 513    const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
 514    const unsigned scs_mask = (1U << component);
 515    int done_mask = ~dst.writemask;
 516    src_reg tmp;
 517
 518    assert(op == OPCODE_SIN || op == OPCODE_COS);
 519
 520    /* If there are compnents in the destination that differ from the component
 521     * that will be written by the SCS instrution, we'll need a temporary.
 522     */
 523    if (scs_mask != unsigned(dst.writemask)) {
 524       tmp = get_temp(glsl_type::vec4_type);
 525    }
 526
 527    for (unsigned i = 0; i < 4; i++) {
 528       unsigned this_mask = (1U << i);
 529       src_reg src0 = src;
 530
 531       if ((done_mask & this_mask) != 0)
 532          continue;
 533
 534       /* The source swizzle specified which component of the source generates
 535        * sine / cosine for the current component in the destination.  The SCS
 536        * instruction requires that this value be swizzle to the X component.
 537        * Replace the current swizzle with a swizzle that puts the source in
 538        * the X component.
 539        */
 540       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
 541
 542       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 543                                    src0_swiz, src0_swiz);
 544       for (unsigned j = i + 1; j < 4; j++) {
 545          /* If there is another enabled component in the destination that is
 546           * derived from the same inputs, generate its value on this pass as
 547           * well.
 548           */
 549          if (!(done_mask & (1 << j)) &&
 550              GET_SWZ(src0.swizzle, j) == src0_swiz) {
 551             this_mask |= (1 << j);
 552          }
 553       }
 554
 555       if (this_mask != scs_mask) {
 556          ir_to_mesa_instruction *inst;
 557          dst_reg tmp_dst = dst_reg(tmp);
 558
 559          /* Emit the SCS instruction.
 560           */
 561          inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
 562          inst->dst.writemask = scs_mask;
 563
 564          /* Move the result of the SCS instruction to the desired location in
 565           * the destination.
 566           */
 567          tmp.swizzle = MAKE_SWIZZLE4(component, component,
 568                                      component, component);
 569          inst = emit(ir, OPCODE_SCS, dst, tmp);
 570          inst->dst.writemask = this_mask;
 571       } else {
 572          /* Emit the SCS instruction to write directly to the destination.
 573           */
 574          ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
 575          inst->dst.writemask = scs_mask;
 576       }
 577
 578       done_mask |= this_mask;
 579    }
 580 }
 581
 582 struct src_reg
 583 ir_to_mesa_visitor::src_reg_for_float(float val)
 584 {
 585    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 586
 587    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 588                                           (const gl_constant_value *)&val, 1, &src.swizzle);
 589
 590    return src;
 591 }
 592
 593 static int
 594 type_size(const struct glsl_type *type)
 595 {
 596    unsigned int i;
 597    int size;
 598
 599    switch (type->base_type) {
 600    case GLSL_TYPE_UINT:
 601    case GLSL_TYPE_INT:
 602    case GLSL_TYPE_FLOAT:
 603    case GLSL_TYPE_BOOL:
 604       if (type->is_matrix()) {
 605          return type->matrix_columns;
 606       } else {
 607          /* Regardless of size of vector, it gets a vec4. This is bad
 608           * packing for things like floats, but otherwise arrays become a
 609           * mess.  Hopefully a later pass over the code can pack scalars
 610           * down if appropriate.
 611           */
 612          return 1;
 613       }
 614    case GLSL_TYPE_ARRAY:
 615       assert(type->length > 0);
 616       return type_size(type->fields.array) * type->length;
 617    case GLSL_TYPE_STRUCT:
 618       size = 0;
 619       for (i = 0; i < type->length; i++) {
 620          size += type_size(type->fields.structure[i].type);
 621       }
 622       return size;
 623    case GLSL_TYPE_SAMPLER:
 624       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 625        * at link time.
 626        */
 627       return 1;
 628    default:
 629       assert(0);
 630       return 0;
 631    }
 632 }
 633
 634 /**
 635  * In the initial pass of codegen, we assign temporary numbers to
 636  * intermediate results.  (not SSA -- variable assignments will reuse
 637  * storage).  Actual register allocation for the Mesa VM occurs in a
 638  * pass over the Mesa IR later.
 639  */
 640 src_reg
 641 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 642 {
 643    src_reg src;
 644
 645    src.file = PROGRAM_TEMPORARY;
 646    src.index = next_temp;
 647    src.reladdr = NULL;
 648    next_temp += type_size(type);
 649
 650    if (type->is_array() || type->is_record()) {
 651       src.swizzle = SWIZZLE_NOOP;
 652    } else {
 653       src.swizzle = swizzle_for_size(type->vector_elements);
 654    }
 655    src.negate = 0;
 656
 657    return src;
 658 }
 659
 660 variable_storage *
 661 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
 662 {
 663
 664    variable_storage *entry;
 665
 666    foreach_iter(exec_list_iterator, iter, this->variables) {
 667       entry = (variable_storage *)iter.get();
 668
 669       if (entry->var == var)
 670          return entry;
 671    }
 672
 673    return NULL;
 674 }
 675
 676 void
 677 ir_to_mesa_visitor::visit(ir_variable *ir)
 678 {
 679    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 680       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 681
 682       fp->OriginUpperLeft = ir->origin_upper_left;
 683       fp->PixelCenterInteger = ir->pixel_center_integer;
 684
 685    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
 686       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 687       switch (ir->depth_layout) {
 688       case ir_depth_layout_none:
 689          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
 690          break;
 691       case ir_depth_layout_any:
 692          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
 693          break;
 694       case ir_depth_layout_greater:
 695          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
 696          break;
 697       case ir_depth_layout_less:
 698          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
 699          break;
 700       case ir_depth_layout_unchanged:
 701          fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
 702          break;
 703       default:
 704          assert(0);
 705          break;
 706       }
 707    }
 708
 709    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
 710       unsigned int i;
 711       const ir_state_slot *const slots = ir->state_slots;
 712       assert(ir->state_slots != NULL);
 713
 714       /* Check if this statevar's setup in the STATE file exactly
 715        * matches how we'll want to reference it as a
 716        * struct/array/whatever.  If not, then we need to move it into
 717        * temporary storage and hope that it'll get copy-propagated
 718        * out.
 719        */
 720       for (i = 0; i < ir->num_state_slots; i++) {
 721          if (slots[i].swizzle != SWIZZLE_XYZW) {
 722             break;
 723          }
 724       }
 725
 726       struct variable_storage *storage;
 727       dst_reg dst;
 728       if (i == ir->num_state_slots) {
 729          /* We'll set the index later. */
 730          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
 731          this->variables.push_tail(storage);
 732
 733          dst = undef_dst;
 734       } else {
 735          /* The variable_storage constructor allocates slots based on the size
 736           * of the type.  However, this had better match the number of state
 737           * elements that we're going to copy into the new temporary.
 738           */
 739          assert((int) ir->num_state_slots == type_size(ir->type));
 740
 741          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
 742                                                  this->next_temp);
 743          this->variables.push_tail(storage);
 744          this->next_temp += type_size(ir->type);
 745
 746          dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
 747       }
 748
 749
 750       for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 751          int index = _mesa_add_state_reference(this->prog->Parameters,
 752                                                (gl_state_index *)slots[i].tokens);
 753
 754          if (storage->file == PROGRAM_STATE_VAR) {
 755             if (storage->index == -1) {
 756                storage->index = index;
 757             } else {
 758                assert(index == storage->index + (int)i);
 759             }
 760          } else {
 761             src_reg src(PROGRAM_STATE_VAR, index, NULL);
 762             src.swizzle = slots[i].swizzle;
 763             emit(ir, OPCODE_MOV, dst, src);
 764             /* even a float takes up a whole vec4 reg in a struct/array. */
 765             dst.index++;
 766          }
 767       }
 768
 769       if (storage->file == PROGRAM_TEMPORARY &&
 770           dst.index != storage->index + (int) ir->num_state_slots) {
 771          linker_error(this->shader_program,
 772                       "failed to load builtin uniform `%s' "
 773                       "(%d/%d regs loaded)\n",
 774                       ir->name, dst.index - storage->index,
 775                       type_size(ir->type));
 776       }
 777    }
 778 }
 779
 780 void
 781 ir_to_mesa_visitor::visit(ir_loop *ir)
 782 {
 783    ir_dereference_variable *counter = NULL;
 784
 785    if (ir->counter != NULL)
 786       counter = new(mem_ctx) ir_dereference_variable(ir->counter);
 787
 788    if (ir->from != NULL) {
 789       assert(ir->counter != NULL);
 790
 791       ir_assignment *a =
 792         new(mem_ctx) ir_assignment(counter, ir->from, NULL);
 793
 794       a->accept(this);
 795    }
 796
 797    emit(NULL, OPCODE_BGNLOOP);
 798
 799    if (ir->to) {
 800       ir_expression *e =
 801          new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
 802                                           counter, ir->to);
 803       ir_if *if_stmt =  new(mem_ctx) ir_if(e);
 804
 805       ir_loop_jump *brk =
 806         new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
 807
 808       if_stmt->then_instructions.push_tail(brk);
 809
 810       if_stmt->accept(this);
 811    }
 812
 813    visit_exec_list(&ir->body_instructions, this);
 814
 815    if (ir->increment) {
 816       ir_expression *e =
 817          new(mem_ctx) ir_expression(ir_binop_add, counter->type,
 818                                           counter, ir->increment);
 819
 820       ir_assignment *a =
 821         new(mem_ctx) ir_assignment(counter, e, NULL);
 822
 823       a->accept(this);
 824    }
 825
 826    emit(NULL, OPCODE_ENDLOOP);
 827 }
 828
 829 void
 830 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 831 {
 832    switch (ir->mode) {
 833    case ir_loop_jump::jump_break:
 834       emit(NULL, OPCODE_BRK);
 835       break;
 836    case ir_loop_jump::jump_continue:
 837       emit(NULL, OPCODE_CONT);
 838       break;
 839    }
 840 }
 841
 842
 843 void
 844 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 845 {
 846    assert(0);
 847    (void)ir;
 848 }
 849
 850 void
 851 ir_to_mesa_visitor::visit(ir_function *ir)
 852 {
 853    /* Ignore function bodies other than main() -- we shouldn't see calls to
 854     * them since they should all be inlined before we get to ir_to_mesa.
 855     */
 856    if (strcmp(ir->name, "main") == 0) {
 857       const ir_function_signature *sig;
 858       exec_list empty;
 859
 860       sig = ir->matching_signature(&empty);
 861
 862       assert(sig);
 863
 864       foreach_iter(exec_list_iterator, iter, sig->body) {
 865          ir_instruction *ir = (ir_instruction *)iter.get();
 866
 867          ir->accept(this);
 868       }
 869    }
 870 }
 871
 872 GLboolean
 873 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 874 {
 875    int nonmul_operand = 1 - mul_operand;
 876    src_reg a, b, c;
 877
 878    ir_expression *expr = ir->operands[mul_operand]->as_expression();
 879    if (!expr || expr->operation != ir_binop_mul)
 880       return false;
 881
 882    expr->operands[0]->accept(this);
 883    a = this->result;
 884    expr->operands[1]->accept(this);
 885    b = this->result;
 886    ir->operands[nonmul_operand]->accept(this);
 887    c = this->result;
 888
 889    this->result = get_temp(ir->type);
 890    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
 891
 892    return true;
 893 }
 894
 895 GLboolean
 896 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 897 {
 898    /* Saturates were only introduced to vertex programs in
 899     * NV_vertex_program3, so don't give them to drivers in the VP.
 900     */
 901    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
 902       return false;
 903
 904    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 905    if (!sat_src)
 906       return false;
 907
 908    sat_src->accept(this);
 909    src_reg src = this->result;
 910
 911    /* If we generated an expression instruction into a temporary in
 912     * processing the saturate's operand, apply the saturate to that
 913     * instruction.  Otherwise, generate a MOV to do the saturate.
 914     *
 915     * Note that we have to be careful to only do this optimization if
 916     * the instruction in question was what generated src->result.  For
 917     * example, ir_dereference_array might generate a MUL instruction
 918     * to create the reladdr, and return us a src reg using that
 919     * reladdr.  That MUL result is not the value we're trying to
 920     * saturate.
 921     */
 922    ir_expression *sat_src_expr = sat_src->as_expression();
 923    ir_to_mesa_instruction *new_inst;
 924    new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
 925    if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
 926                         sat_src_expr->operation == ir_binop_add ||
 927                         sat_src_expr->operation == ir_binop_dot)) {
 928       new_inst->saturate = true;
 929    } else {
 930       this->result = get_temp(ir->type);
 931       ir_to_mesa_instruction *inst;
 932       inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
 933       inst->saturate = true;
 934    }
 935
 936    return true;
 937 }
 938
 939 void
 940 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 941                                     src_reg *reg, int *num_reladdr)
 942 {
 943    if (!reg->reladdr)
 944       return;
 945
 946    emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
 947
 948    if (*num_reladdr != 1) {
 949       src_reg temp = get_temp(glsl_type::vec4_type);
 950
 951       emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
 952       *reg = temp;
 953    }
 954
 955    (*num_reladdr)--;
 956 }
 957
 958 void
 959 ir_to_mesa_visitor::emit_swz(ir_expression *ir)
 960 {
 961    /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
 962     * This means that each of the operands is either an immediate value of -1,
 963     * 0, or 1, or is a component from one source register (possibly with
 964     * negation).
 965     */
 966    uint8_t components[4] = { 0 };
 967    bool negate[4] = { false };
 968    ir_variable *var = NULL;
 969
 970    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
 971       ir_rvalue *op = ir->operands[i];
 972
 973       assert(op->type->is_scalar());
 974
 975       while (op != NULL) {
 976          switch (op->ir_type) {
 977          case ir_type_constant: {
 978
 979             assert(op->type->is_scalar());
 980
 981             const ir_constant *const c = op->as_constant();
 982             if (c->is_one()) {
 983                components[i] = SWIZZLE_ONE;
 984             } else if (c->is_zero()) {
 985                components[i] = SWIZZLE_ZERO;
 986             } else if (c->is_negative_one()) {
 987                components[i] = SWIZZLE_ONE;
 988                negate[i] = true;
 989             } else {
 990                assert(!"SWZ constant must be 0.0 or 1.0.");
 991             }
 992
 993             op = NULL;
 994             break;
 995          }
 996
 997          case ir_type_dereference_variable: {
 998             ir_dereference_variable *const deref =
 999                (ir_dereference_variable *) op;
1000
1001             assert((var == NULL) || (deref->var == var));
1002             components[i] = SWIZZLE_X;
1003             var = deref->var;
1004             op = NULL;
1005             break;
1006          }
1007
1008          case ir_type_expression: {
1009             ir_expression *const expr = (ir_expression *) op;
1010
1011             assert(expr->operation == ir_unop_neg);
1012             negate[i] = true;
1013
1014             op = expr->operands[0];
1015             break;
1016          }
1017
1018          case ir_type_swizzle: {
1019             ir_swizzle *const swiz = (ir_swizzle *) op;
1020
1021             components[i] = swiz->mask.x;
1022             op = swiz->val;
1023             break;
1024          }
1025
1026          default:
1027             assert(!"Should not get here.");
1028             return;
1029          }
1030       }
1031    }
1032
1033    assert(var != NULL);
1034
1035    ir_dereference_variable *const deref =
1036       new(mem_ctx) ir_dereference_variable(var);
1037
1038    this->result.file = PROGRAM_UNDEFINED;
1039    deref->accept(this);
1040    if (this->result.file == PROGRAM_UNDEFINED) {
1041       ir_print_visitor v;
1042       printf("Failed to get tree for expression operand:\n");
1043       deref->accept(&v);
1044       exit(1);
1045    }
1046
1047    src_reg src;
1048
1049    src = this->result;
1050    src.swizzle = MAKE_SWIZZLE4(components[0],
1051                                components[1],
1052                                components[2],
1053                                components[3]);
1054    src.negate = ((unsigned(negate[0]) << 0)
1055                  | (unsigned(negate[1]) << 1)
1056                  | (unsigned(negate[2]) << 2)
1057                  | (unsigned(negate[3]) << 3));
1058
1059    /* Storage for our result.  Ideally for an assignment we'd be using the
1060     * actual storage for the result here, instead.
1061     */
1062    const src_reg result_src = get_temp(ir->type);
1063    dst_reg result_dst = dst_reg(result_src);
1064
1065    /* Limit writes to the channels that will be used by result_src later.
1066     * This does limit this temp's use as a temporary for multi-instruction
1067     * sequences.
1068     */
1069    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1070
1071    emit(ir, OPCODE_SWZ, result_dst, src);
1072    this->result = result_src;
1073 }
1074
1075 void
1076 ir_to_mesa_visitor::visit(ir_expression *ir)
1077 {
1078    unsigned int operand;
1079    src_reg op[Elements(ir->operands)];
1080    src_reg result_src;
1081    dst_reg result_dst;
1082
1083    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
1084     */
1085    if (ir->operation == ir_binop_add) {
1086       if (try_emit_mad(ir, 1))
1087          return;
1088       if (try_emit_mad(ir, 0))
1089          return;
1090    }
1091    if (try_emit_sat(ir))
1092       return;
1093
1094    if (ir->operation == ir_quadop_vector) {
1095       this->emit_swz(ir);
1096       return;
1097    }
1098
1099    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1100       this->result.file = PROGRAM_UNDEFINED;
1101       ir->operands[operand]->accept(this);
1102       if (this->result.file == PROGRAM_UNDEFINED) {
1103          ir_print_visitor v;
1104          printf("Failed to get tree for expression operand:\n");
1105          ir->operands[operand]->accept(&v);
1106          exit(1);
1107       }
1108       op[operand] = this->result;
1109
1110       /* Matrix expression operands should have been broken down to vector
1111        * operations already.
1112        */
1113       assert(!ir->operands[operand]->type->is_matrix());
1114    }
1115
1116    int vector_elements = ir->operands[0]->type->vector_elements;
1117    if (ir->operands[1]) {
1118       vector_elements = MAX2(vector_elements,
1119                              ir->operands[1]->type->vector_elements);
1120    }
1121
1122    this->result.file = PROGRAM_UNDEFINED;
1123
1124    /* Storage for our result.  Ideally for an assignment we'd be using
1125     * the actual storage for the result here, instead.
1126     */
1127    result_src = get_temp(ir->type);
1128    /* convenience for the emit functions below. */
1129    result_dst = dst_reg(result_src);
1130    /* Limit writes to the channels that will be used by result_src later.
1131     * This does limit this temp's use as a temporary for multi-instruction
1132     * sequences.
1133     */
1134    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1135
1136    switch (ir->operation) {
1137    case ir_unop_logic_not:
1138       /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1139        * older GPUs implement SEQ using multiple instructions (i915 uses two
1140        * SGE instructions and a MUL instruction).  Since our logic values are
1141        * 0.0 and 1.0, 1-x also implements !x.
1142        */
1143       op[0].negate = ~op[0].negate;
1144       emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
1145       break;
1146    case ir_unop_neg:
1147       op[0].negate = ~op[0].negate;
1148       result_src = op[0];
1149       break;
1150    case ir_unop_abs:
1151       emit(ir, OPCODE_ABS, result_dst, op[0]);
1152       break;
1153    case ir_unop_sign:
1154       emit(ir, OPCODE_SSG, result_dst, op[0]);
1155       break;
1156    case ir_unop_rcp:
1157       emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1158       break;
1159
1160    case ir_unop_exp2:
1161       emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1162       break;
1163    case ir_unop_exp:
1164    case ir_unop_log:
1165       assert(!"not reached: should be handled by ir_explog_to_explog2");
1166       break;
1167    case ir_unop_log2:
1168       emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1169       break;
1170    case ir_unop_sin:
1171       emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1172       break;
1173    case ir_unop_cos:
1174       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1175       break;
1176    case ir_unop_sin_reduced:
1177       emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
1178       break;
1179    case ir_unop_cos_reduced:
1180       emit_scs(ir, OPCODE_COS, result_dst, op[0]);
1181       break;
1182
1183    case ir_unop_dFdx:
1184       emit(ir, OPCODE_DDX, result_dst, op[0]);
1185       break;
1186    case ir_unop_dFdy:
1187       emit(ir, OPCODE_DDY, result_dst, op[0]);
1188       break;
1189
1190    case ir_unop_noise: {
1191       const enum prog_opcode opcode =
1192          prog_opcode(OPCODE_NOISE1
1193                      + (ir->operands[0]->type->vector_elements) - 1);
1194       assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1195
1196       emit(ir, opcode, result_dst, op[0]);
1197       break;
1198    }
1199
1200    case ir_binop_add:
1201       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1202       break;
1203    case ir_binop_sub:
1204       emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1205       break;
1206
1207    case ir_binop_mul:
1208       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1209       break;
1210    case ir_binop_div:
1211       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1212    case ir_binop_mod:
1213       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1214       break;
1215
1216    case ir_binop_less:
1217       emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1218       break;
1219    case ir_binop_greater:
1220       emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
1221       break;
1222    case ir_binop_lequal:
1223       emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
1224       break;
1225    case ir_binop_gequal:
1226       emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1227       break;
1228    case ir_binop_equal:
1229       emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1230       break;
1231    case ir_binop_nequal:
1232       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1233       break;
1234    case ir_binop_all_equal:
1235       /* "==" operator producing a scalar boolean. */
1236       if (ir->operands[0]->type->is_vector() ||
1237           ir->operands[1]->type->is_vector()) {
1238          src_reg temp = get_temp(glsl_type::vec4_type);
1239          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1240          emit_dp(ir, result_dst, temp, temp, vector_elements);
1241          emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
1242       } else {
1243          emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1244       }
1245       break;
1246    case ir_binop_any_nequal:
1247       /* "!=" operator producing a scalar boolean. */
1248       if (ir->operands[0]->type->is_vector() ||
1249           ir->operands[1]->type->is_vector()) {
1250          src_reg temp = get_temp(glsl_type::vec4_type);
1251          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1252
1253          /* After the dot-product, the value will be an integer on the
1254           * range [0,4].  Zero stays zero, and positive values become 1.0.
1255           */
1256          ir_to_mesa_instruction *const dp =
1257             emit_dp(ir, result_dst, temp, temp, vector_elements);
1258          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1259             /* The clamping to [0,1] can be done for free in the fragment
1260              * shader with a saturate.
1261              */
1262             dp->saturate = true;
1263          } else {
1264             /* Negating the result of the dot-product gives values on the range
1265              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1266              * achieved using SLT.
1267              */
1268             src_reg slt_src = result_src;
1269             slt_src.negate = ~slt_src.negate;
1270             emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1271          }
1272       } else {
1273          emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1274       }
1275       break;
1276
1277    case ir_unop_any: {
1278       assert(ir->operands[0]->type->is_vector());
1279
1280       /* After the dot-product, the value will be an integer on the
1281        * range [0,4].  Zero stays zero, and positive values become 1.0.
1282        */
1283       ir_to_mesa_instruction *const dp =
1284          emit_dp(ir, result_dst, op[0], op[0],
1285                  ir->operands[0]->type->vector_elements);
1286       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1287          /* The clamping to [0,1] can be done for free in the fragment
1288           * shader with a saturate.
1289           */
1290          dp->saturate = true;
1291       } else {
1292          /* Negating the result of the dot-product gives values on the range
1293           * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1294           * is achieved using SLT.
1295           */
1296          src_reg slt_src = result_src;
1297          slt_src.negate = ~slt_src.negate;
1298          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1299       }
1300       break;
1301    }
1302
1303    case ir_binop_logic_xor:
1304       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1305       break;
1306
1307    case ir_binop_logic_or: {
1308       /* After the addition, the value will be an integer on the
1309        * range [0,2].  Zero stays zero, and positive values become 1.0.
1310        */
1311       ir_to_mesa_instruction *add =
1312          emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1313       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1314          /* The clamping to [0,1] can be done for free in the fragment
1315           * shader with a saturate.
1316           */
1317          add->saturate = true;
1318       } else {
1319          /* Negating the result of the addition gives values on the range
1320           * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1321           * is achieved using SLT.
1322           */
1323          src_reg slt_src = result_src;
1324          slt_src.negate = ~slt_src.negate;
1325          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1326       }
1327       break;
1328    }
1329
1330    case ir_binop_logic_and:
1331       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1332       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1333       break;
1334
1335    case ir_binop_dot:
1336       assert(ir->operands[0]->type->is_vector());
1337       assert(ir->operands[0]->type == ir->operands[1]->type);
1338       emit_dp(ir, result_dst, op[0], op[1],
1339               ir->operands[0]->type->vector_elements);
1340       break;
1341
1342    case ir_unop_sqrt:
1343       /* sqrt(x) = x * rsq(x). */
1344       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1345       emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1346       /* For incoming channels <= 0, set the result to 0. */
1347       op[0].negate = ~op[0].negate;
1348       emit(ir, OPCODE_CMP, result_dst,
1349                           op[0], result_src, src_reg_for_float(0.0));
1350       break;
1351    case ir_unop_rsq:
1352       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1353       break;
1354    case ir_unop_i2f:
1355    case ir_unop_u2f:
1356    case ir_unop_b2f:
1357    case ir_unop_b2i:
1358    case ir_unop_i2u:
1359    case ir_unop_u2i:
1360       /* Mesa IR lacks types, ints are stored as truncated floats. */
1361       result_src = op[0];
1362       break;
1363    case ir_unop_f2i:
1364       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1365       break;
1366    case ir_unop_f2b:
1367    case ir_unop_i2b:
1368       emit(ir, OPCODE_SNE, result_dst,
1369                           op[0], src_reg_for_float(0.0));
1370       break;
1371    case ir_unop_trunc:
1372       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1373       break;
1374    case ir_unop_ceil:
1375       op[0].negate = ~op[0].negate;
1376       emit(ir, OPCODE_FLR, result_dst, op[0]);
1377       result_src.negate = ~result_src.negate;
1378       break;
1379    case ir_unop_floor:
1380       emit(ir, OPCODE_FLR, result_dst, op[0]);
1381       break;
1382    case ir_unop_fract:
1383       emit(ir, OPCODE_FRC, result_dst, op[0]);
1384       break;
1385
1386    case ir_binop_min:
1387       emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1388       break;
1389    case ir_binop_max:
1390       emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1391       break;
1392    case ir_binop_pow:
1393       emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1394       break;
1395
1396    case ir_unop_bit_not:
1397    case ir_binop_lshift:
1398    case ir_binop_rshift:
1399    case ir_binop_bit_and:
1400    case ir_binop_bit_xor:
1401    case ir_binop_bit_or:
1402    case ir_unop_round_even:
1403       assert(!"GLSL 1.30 features unsupported");
1404       break;
1405
1406    case ir_quadop_vector:
1407       /* This operation should have already been handled.
1408        */
1409       assert(!"Should not get here.");
1410       break;
1411    }
1412
1413    this->result = result_src;
1414 }
1415
1416
1417 void
1418 ir_to_mesa_visitor::visit(ir_swizzle *ir)
1419 {
1420    src_reg src;
1421    int i;
1422    int swizzle[4];
1423
1424    /* Note that this is only swizzles in expressions, not those on the left
1425     * hand side of an assignment, which do write masking.  See ir_assignment
1426     * for that.
1427     */
1428
1429    ir->val->accept(this);
1430    src = this->result;
1431    assert(src.file != PROGRAM_UNDEFINED);
1432
1433    for (i = 0; i < 4; i++) {
1434       if (i < ir->type->vector_elements) {
1435          switch (i) {
1436          case 0:
1437             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1438             break;
1439          case 1:
1440             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1441             break;
1442          case 2:
1443             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1444             break;
1445          case 3:
1446             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1447             break;
1448          }
1449       } else {
1450          /* If the type is smaller than a vec4, replicate the last
1451           * channel out.
1452           */
1453          swizzle[i] = swizzle[ir->type->vector_elements - 1];
1454       }
1455    }
1456
1457    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1458
1459    this->result = src;
1460 }
1461
1462 void
1463 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1464 {
1465    variable_storage *entry = find_variable_storage(ir->var);
1466    ir_variable *var = ir->var;
1467
1468    if (!entry) {
1469       switch (var->mode) {
1470       case ir_var_uniform:
1471          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1472                                                var->location);
1473          this->variables.push_tail(entry);
1474          break;
1475       case ir_var_in:
1476       case ir_var_inout:
1477          /* The linker assigns locations for varyings and attributes,
1478           * including deprecated builtins (like gl_Color),
1479           * user-assigned generic attributes (glBindVertexLocation),
1480           * and user-defined varyings.
1481           *
1482           * FINISHME: We would hit this path for function arguments.  Fix!
1483           */
1484          assert(var->location != -1);
1485          entry = new(mem_ctx) variable_storage(var,
1486                                                PROGRAM_INPUT,
1487                                                var->location);
1488          if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1489              var->location >= VERT_ATTRIB_GENERIC0) {
1490             _mesa_add_attribute(this->prog->Attributes,
1491                                 var->name,
1492                                 _mesa_sizeof_glsl_type(var->type->gl_type),
1493                                 var->type->gl_type,
1494                                 var->location - VERT_ATTRIB_GENERIC0);
1495          }
1496          break;
1497       case ir_var_out:
1498          assert(var->location != -1);
1499          entry = new(mem_ctx) variable_storage(var,
1500                                                PROGRAM_OUTPUT,
1501                                                var->location);
1502          break;
1503       case ir_var_system_value:
1504          entry = new(mem_ctx) variable_storage(var,
1505                                                PROGRAM_SYSTEM_VALUE,
1506                                                var->location);
1507          break;
1508       case ir_var_auto:
1509       case ir_var_temporary:
1510          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1511                                                this->next_temp);
1512          this->variables.push_tail(entry);
1513
1514          next_temp += type_size(var->type);
1515          break;
1516       }
1517
1518       if (!entry) {
1519          printf("Failed to make storage for %s\n", var->name);
1520          exit(1);
1521       }
1522    }
1523
1524    this->result = src_reg(entry->file, entry->index, var->type);
1525 }
1526
1527 void
1528 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1529 {
1530    ir_constant *index;
1531    src_reg src;
1532    int element_size = type_size(ir->type);
1533
1534    index = ir->array_index->constant_expression_value();
1535
1536    ir->array->accept(this);
1537    src = this->result;
1538
1539    if (index) {
1540       src.index += index->value.i[0] * element_size;
1541    } else {
1542       /* Variable index array dereference.  It eats the "vec4" of the
1543        * base of the array and an index that offsets the Mesa register
1544        * index.
1545        */
1546       ir->array_index->accept(this);
1547
1548       src_reg index_reg;
1549
1550       if (element_size == 1) {
1551          index_reg = this->result;
1552       } else {
1553          index_reg = get_temp(glsl_type::float_type);
1554
1555          emit(ir, OPCODE_MUL, dst_reg(index_reg),
1556               this->result, src_reg_for_float(element_size));
1557       }
1558
1559       /* If there was already a relative address register involved, add the
1560        * new and the old together to get the new offset.
1561        */
1562       if (src.reladdr != NULL)  {
1563          src_reg accum_reg = get_temp(glsl_type::float_type);
1564
1565          emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1566               index_reg, *src.reladdr);
1567
1568          index_reg = accum_reg;
1569       }
1570
1571       src.reladdr = ralloc(mem_ctx, src_reg);
1572       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1573    }
1574
1575    /* If the type is smaller than a vec4, replicate the last channel out. */
1576    if (ir->type->is_scalar() || ir->type->is_vector())
1577       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1578    else
1579       src.swizzle = SWIZZLE_NOOP;
1580
1581    this->result = src;
1582 }
1583
1584 void
1585 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1586 {
1587    unsigned int i;
1588    const glsl_type *struct_type = ir->record->type;
1589    int offset = 0;
1590
1591    ir->record->accept(this);
1592
1593    for (i = 0; i < struct_type->length; i++) {
1594       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1595          break;
1596       offset += type_size(struct_type->fields.structure[i].type);
1597    }
1598
1599    /* If the type is smaller than a vec4, replicate the last channel out. */
1600    if (ir->type->is_scalar() || ir->type->is_vector())
1601       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1602    else
1603       this->result.swizzle = SWIZZLE_NOOP;
1604
1605    this->result.index += offset;
1606 }
1607
1608 /**
1609  * We want to be careful in assignment setup to hit the actual storage
1610  * instead of potentially using a temporary like we might with the
1611  * ir_dereference handler.
1612  */
1613 static dst_reg
1614 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1615 {
1616    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1617     * access of a vector, it must be separated into a series conditional moves
1618     * before reaching this point (see ir_vec_index_to_cond_assign).
1619     */
1620    assert(ir->as_dereference());
1621    ir_dereference_array *deref_array = ir->as_dereference_array();
1622    if (deref_array) {
1623       assert(!deref_array->array->type->is_vector());
1624    }
1625
1626    /* Use the rvalue deref handler for the most part.  We'll ignore
1627     * swizzles in it and write swizzles using writemask, though.
1628     */
1629    ir->accept(v);
1630    return dst_reg(v->result);
1631 }
1632
1633 /**
1634  * Process the condition of a conditional assignment
1635  *
1636  * Examines the condition of a conditional assignment to generate the optimal
1637  * first operand of a \c CMP instruction.  If the condition is a relational
1638  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1639  * used as the source for the \c CMP instruction.  Otherwise the comparison
1640  * is processed to a boolean result, and the boolean result is used as the
1641  * operand to the CMP instruction.
1642  */
1643 bool
1644 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1645 {
1646    ir_rvalue *src_ir = ir;
1647    bool negate = true;
1648    bool switch_order = false;
1649
1650    ir_expression *const expr = ir->as_expression();
1651    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1652       bool zero_on_left = false;
1653
1654       if (expr->operands[0]->is_zero()) {
1655          src_ir = expr->operands[1];
1656          zero_on_left = true;
1657       } else if (expr->operands[1]->is_zero()) {
1658          src_ir = expr->operands[0];
1659          zero_on_left = false;
1660       }
1661
1662       /*      a is -  0  +            -  0  +
1663        * (a <  0)  T  F  F  ( a < 0)  T  F  F
1664        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1665        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1666        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1667        * (a >  0)  F  F  T  (-a < 0)  F  F  T
1668        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
1669        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1670        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1671        *
1672        * Note that exchanging the order of 0 and 'a' in the comparison simply
1673        * means that the value of 'a' should be negated.
1674        */
1675       if (src_ir != ir) {
1676          switch (expr->operation) {
1677          case ir_binop_less:
1678             switch_order = false;
1679             negate = zero_on_left;
1680             break;
1681
1682          case ir_binop_greater:
1683             switch_order = false;
1684             negate = !zero_on_left;
1685             break;
1686
1687          case ir_binop_lequal:
1688             switch_order = true;
1689             negate = !zero_on_left;
1690             break;
1691
1692          case ir_binop_gequal:
1693             switch_order = true;
1694             negate = zero_on_left;
1695             break;
1696
1697          default:
1698             /* This isn't the right kind of comparison afterall, so make sure
1699              * the whole condition is visited.
1700              */
1701             src_ir = ir;
1702             break;
1703          }
1704       }
1705    }
1706
1707    src_ir->accept(this);
1708
1709    /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1710     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1711     * choose which value OPCODE_CMP produces without an extra instruction
1712     * computing the condition.
1713     */
1714    if (negate)
1715       this->result.negate = ~this->result.negate;
1716
1717    return switch_order;
1718 }
1719
1720 void
1721 ir_to_mesa_visitor::visit(ir_assignment *ir)
1722 {
1723    dst_reg l;
1724    src_reg r;
1725    int i;
1726
1727    ir->rhs->accept(this);
1728    r = this->result;
1729
1730    l = get_assignment_lhs(ir->lhs, this);
1731
1732    /* FINISHME: This should really set to the correct maximal writemask for each
1733     * FINISHME: component written (in the loops below).  This case can only
1734     * FINISHME: occur for matrices, arrays, and structures.
1735     */
1736    if (ir->write_mask == 0) {
1737       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1738       l.writemask = WRITEMASK_XYZW;
1739    } else if (ir->lhs->type->is_scalar()) {
1740       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1741        * FINISHME: W component of fragment shader output zero, work correctly.
1742        */
1743       l.writemask = WRITEMASK_XYZW;
1744    } else {
1745       int swizzles[4];
1746       int first_enabled_chan = 0;
1747       int rhs_chan = 0;
1748
1749       assert(ir->lhs->type->is_vector());
1750       l.writemask = ir->write_mask;
1751
1752       for (int i = 0; i < 4; i++) {
1753          if (l.writemask & (1 << i)) {
1754             first_enabled_chan = GET_SWZ(r.swizzle, i);
1755             break;
1756          }
1757       }
1758
1759       /* Swizzle a small RHS vector into the channels being written.
1760        *
1761        * glsl ir treats write_mask as dictating how many channels are
1762        * present on the RHS while Mesa IR treats write_mask as just
1763        * showing which channels of the vec4 RHS get written.
1764        */
1765       for (int i = 0; i < 4; i++) {
1766          if (l.writemask & (1 << i))
1767             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1768          else
1769             swizzles[i] = first_enabled_chan;
1770       }
1771       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1772                                 swizzles[2], swizzles[3]);
1773    }
1774
1775    assert(l.file != PROGRAM_UNDEFINED);
1776    assert(r.file != PROGRAM_UNDEFINED);
1777
1778    if (ir->condition) {
1779       const bool switch_order = this->process_move_condition(ir->condition);
1780       src_reg condition = this->result;
1781
1782       for (i = 0; i < type_size(ir->lhs->type); i++) {
1783          if (switch_order) {
1784             emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1785          } else {
1786             emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1787          }
1788
1789          l.index++;
1790          r.index++;
1791       }
1792    } else {
1793       for (i = 0; i < type_size(ir->lhs->type); i++) {
1794          emit(ir, OPCODE_MOV, l, r);
1795          l.index++;
1796          r.index++;
1797       }
1798    }
1799 }
1800
1801
1802 void
1803 ir_to_mesa_visitor::visit(ir_constant *ir)
1804 {
1805    src_reg src;
1806    GLfloat stack_vals[4] = { 0 };
1807    GLfloat *values = stack_vals;
1808    unsigned int i;
1809
1810    /* Unfortunately, 4 floats is all we can get into
1811     * _mesa_add_unnamed_constant.  So, make a temp to store an
1812     * aggregate constant and move each constant value into it.  If we
1813     * get lucky, copy propagation will eliminate the extra moves.
1814     */
1815
1816    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1817       src_reg temp_base = get_temp(ir->type);
1818       dst_reg temp = dst_reg(temp_base);
1819
1820       foreach_iter(exec_list_iterator, iter, ir->components) {
1821          ir_constant *field_value = (ir_constant *)iter.get();
1822          int size = type_size(field_value->type);
1823
1824          assert(size > 0);
1825
1826          field_value->accept(this);
1827          src = this->result;
1828
1829          for (i = 0; i < (unsigned int)size; i++) {
1830             emit(ir, OPCODE_MOV, temp, src);
1831
1832             src.index++;
1833             temp.index++;
1834          }
1835       }
1836       this->result = temp_base;
1837       return;
1838    }
1839
1840    if (ir->type->is_array()) {
1841       src_reg temp_base = get_temp(ir->type);
1842       dst_reg temp = dst_reg(temp_base);
1843       int size = type_size(ir->type->fields.array);
1844
1845       assert(size > 0);
1846
1847       for (i = 0; i < ir->type->length; i++) {
1848          ir->array_elements[i]->accept(this);
1849          src = this->result;
1850          for (int j = 0; j < size; j++) {
1851             emit(ir, OPCODE_MOV, temp, src);
1852
1853             src.index++;
1854             temp.index++;
1855          }
1856       }
1857       this->result = temp_base;
1858       return;
1859    }
1860
1861    if (ir->type->is_matrix()) {
1862       src_reg mat = get_temp(ir->type);
1863       dst_reg mat_column = dst_reg(mat);
1864
1865       for (i = 0; i < ir->type->matrix_columns; i++) {
1866          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1867          values = &ir->value.f[i * ir->type->vector_elements];
1868
1869          src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1870          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1871                                                 (gl_constant_value *) values,
1872                                                 ir->type->vector_elements,
1873                                                 &src.swizzle);
1874          emit(ir, OPCODE_MOV, mat_column, src);
1875
1876          mat_column.index++;
1877       }
1878
1879       this->result = mat;
1880       return;
1881    }
1882
1883    src.file = PROGRAM_CONSTANT;
1884    switch (ir->type->base_type) {
1885    case GLSL_TYPE_FLOAT:
1886       values = &ir->value.f[0];
1887       break;
1888    case GLSL_TYPE_UINT:
1889       for (i = 0; i < ir->type->vector_elements; i++) {
1890          values[i] = ir->value.u[i];
1891       }
1892       break;
1893    case GLSL_TYPE_INT:
1894       for (i = 0; i < ir->type->vector_elements; i++) {
1895          values[i] = ir->value.i[i];
1896       }
1897       break;
1898    case GLSL_TYPE_BOOL:
1899       for (i = 0; i < ir->type->vector_elements; i++) {
1900          values[i] = ir->value.b[i];
1901       }
1902       break;
1903    default:
1904       assert(!"Non-float/uint/int/bool constant");
1905    }
1906
1907    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1908    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1909                                                    (gl_constant_value *) values,
1910                                                    ir->type->vector_elements,
1911                                                    &this->result.swizzle);
1912 }
1913
1914 function_entry *
1915 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1916 {
1917    function_entry *entry;
1918
1919    foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1920       entry = (function_entry *)iter.get();
1921
1922       if (entry->sig == sig)
1923          return entry;
1924    }
1925
1926    entry = ralloc(mem_ctx, function_entry);
1927    entry->sig = sig;
1928    entry->sig_id = this->next_signature_id++;
1929    entry->bgn_inst = NULL;
1930
1931    /* Allocate storage for all the parameters. */
1932    foreach_iter(exec_list_iterator, iter, sig->parameters) {
1933       ir_variable *param = (ir_variable *)iter.get();
1934       variable_storage *storage;
1935
1936       storage = find_variable_storage(param);
1937       assert(!storage);
1938
1939       storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1940                                               this->next_temp);
1941       this->variables.push_tail(storage);
1942
1943       this->next_temp += type_size(param->type);
1944    }
1945
1946    if (!sig->return_type->is_void()) {
1947       entry->return_reg = get_temp(sig->return_type);
1948    } else {
1949       entry->return_reg = undef_src;
1950    }
1951
1952    this->function_signatures.push_tail(entry);
1953    return entry;
1954 }
1955
1956 void
1957 ir_to_mesa_visitor::visit(ir_call *ir)
1958 {
1959    ir_to_mesa_instruction *call_inst;
1960    ir_function_signature *sig = ir->get_callee();
1961    function_entry *entry = get_function_signature(sig);
1962    int i;
1963
1964    /* Process in parameters. */
1965    exec_list_iterator sig_iter = sig->parameters.iterator();
1966    foreach_iter(exec_list_iterator, iter, *ir) {
1967       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1968       ir_variable *param = (ir_variable *)sig_iter.get();
1969
1970       if (param->mode == ir_var_in ||
1971           param->mode == ir_var_inout) {
1972          variable_storage *storage = find_variable_storage(param);
1973          assert(storage);
1974
1975          param_rval->accept(this);
1976          src_reg r = this->result;
1977
1978          dst_reg l;
1979          l.file = storage->file;
1980          l.index = storage->index;
1981          l.reladdr = NULL;
1982          l.writemask = WRITEMASK_XYZW;
1983          l.cond_mask = COND_TR;
1984
1985          for (i = 0; i < type_size(param->type); i++) {
1986             emit(ir, OPCODE_MOV, l, r);
1987             l.index++;
1988             r.index++;
1989          }
1990       }
1991
1992       sig_iter.next();
1993    }
1994    assert(!sig_iter.has_next());
1995
1996    /* Emit call instruction */
1997    call_inst = emit(ir, OPCODE_CAL);
1998    call_inst->function = entry;
1999
2000    /* Process out parameters. */
2001    sig_iter = sig->parameters.iterator();
2002    foreach_iter(exec_list_iterator, iter, *ir) {
2003       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2004       ir_variable *param = (ir_variable *)sig_iter.get();
2005
2006       if (param->mode == ir_var_out ||
2007           param->mode == ir_var_inout) {
2008          variable_storage *storage = find_variable_storage(param);
2009          assert(storage);
2010
2011          src_reg r;
2012          r.file = storage->file;
2013          r.index = storage->index;
2014          r.reladdr = NULL;
2015          r.swizzle = SWIZZLE_NOOP;
2016          r.negate = 0;
2017
2018          param_rval->accept(this);
2019          dst_reg l = dst_reg(this->result);
2020
2021          for (i = 0; i < type_size(param->type); i++) {
2022             emit(ir, OPCODE_MOV, l, r);
2023             l.index++;
2024             r.index++;
2025          }
2026       }
2027
2028       sig_iter.next();
2029    }
2030    assert(!sig_iter.has_next());
2031
2032    /* Process return value. */
2033    this->result = entry->return_reg;
2034 }
2035
2036 void
2037 ir_to_mesa_visitor::visit(ir_texture *ir)
2038 {
2039    src_reg result_src, coord, lod_info, projector, dx, dy;
2040    dst_reg result_dst, coord_dst;
2041    ir_to_mesa_instruction *inst = NULL;
2042    prog_opcode opcode = OPCODE_NOP;
2043
2044    ir->coordinate->accept(this);
2045
2046    /* Put our coords in a temp.  We'll need to modify them for shadow,
2047     * projection, or LOD, so the only case we'd use it as is is if
2048     * we're doing plain old texturing.  Mesa IR optimization should
2049     * handle cleaning up our mess in that case.
2050     */
2051    coord = get_temp(glsl_type::vec4_type);
2052    coord_dst = dst_reg(coord);
2053    emit(ir, OPCODE_MOV, coord_dst, this->result);
2054
2055    if (ir->projector) {
2056       ir->projector->accept(this);
2057       projector = this->result;
2058    }
2059
2060    /* Storage for our result.  Ideally for an assignment we'd be using
2061     * the actual storage for the result here, instead.
2062     */
2063    result_src = get_temp(glsl_type::vec4_type);
2064    result_dst = dst_reg(result_src);
2065
2066    switch (ir->op) {
2067    case ir_tex:
2068       opcode = OPCODE_TEX;
2069       break;
2070    case ir_txb:
2071       opcode = OPCODE_TXB;
2072       ir->lod_info.bias->accept(this);
2073       lod_info = this->result;
2074       break;
2075    case ir_txl:
2076       opcode = OPCODE_TXL;
2077       ir->lod_info.lod->accept(this);
2078       lod_info = this->result;
2079       break;
2080    case ir_txd:
2081       opcode = OPCODE_TXD;
2082       ir->lod_info.grad.dPdx->accept(this);
2083       dx = this->result;
2084       ir->lod_info.grad.dPdy->accept(this);
2085       dy = this->result;
2086       break;
2087    case ir_txf:
2088       assert(!"GLSL 1.30 features unsupported");
2089       break;
2090    }
2091
2092    if (ir->projector) {
2093       if (opcode == OPCODE_TEX) {
2094          /* Slot the projector in as the last component of the coord. */
2095          coord_dst.writemask = WRITEMASK_W;
2096          emit(ir, OPCODE_MOV, coord_dst, projector);
2097          coord_dst.writemask = WRITEMASK_XYZW;
2098          opcode = OPCODE_TXP;
2099       } else {
2100          src_reg coord_w = coord;
2101          coord_w.swizzle = SWIZZLE_WWWW;
2102
2103          /* For the other TEX opcodes there's no projective version
2104           * since the last slot is taken up by lod info.  Do the
2105           * projective divide now.
2106           */
2107          coord_dst.writemask = WRITEMASK_W;
2108          emit(ir, OPCODE_RCP, coord_dst, projector);
2109
2110          /* In the case where we have to project the coordinates "by hand,"
2111           * the shadow comparitor value must also be projected.
2112           */
2113          src_reg tmp_src = coord;
2114          if (ir->shadow_comparitor) {
2115             /* Slot the shadow value in as the second to last component of the
2116              * coord.
2117              */
2118             ir->shadow_comparitor->accept(this);
2119
2120             tmp_src = get_temp(glsl_type::vec4_type);
2121             dst_reg tmp_dst = dst_reg(tmp_src);
2122
2123             tmp_dst.writemask = WRITEMASK_Z;
2124             emit(ir, OPCODE_MOV, tmp_dst, this->result);
2125
2126             tmp_dst.writemask = WRITEMASK_XY;
2127             emit(ir, OPCODE_MOV, tmp_dst, coord);
2128          }
2129
2130          coord_dst.writemask = WRITEMASK_XYZ;
2131          emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2132
2133          coord_dst.writemask = WRITEMASK_XYZW;
2134          coord.swizzle = SWIZZLE_XYZW;
2135       }
2136    }
2137
2138    /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2139     * comparitor was put in the correct place (and projected) by the code,
2140     * above, that handles by-hand projection.
2141     */
2142    if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
2143       /* Slot the shadow value in as the second to last component of the
2144        * coord.
2145        */
2146       ir->shadow_comparitor->accept(this);
2147       coord_dst.writemask = WRITEMASK_Z;
2148       emit(ir, OPCODE_MOV, coord_dst, this->result);
2149       coord_dst.writemask = WRITEMASK_XYZW;
2150    }
2151
2152    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2153       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2154       coord_dst.writemask = WRITEMASK_W;
2155       emit(ir, OPCODE_MOV, coord_dst, lod_info);
2156       coord_dst.writemask = WRITEMASK_XYZW;
2157    }
2158
2159    if (opcode == OPCODE_TXD)
2160       inst = emit(ir, opcode, result_dst, coord, dx, dy);
2161    else
2162       inst = emit(ir, opcode, result_dst, coord);
2163
2164    if (ir->shadow_comparitor)
2165       inst->tex_shadow = GL_TRUE;
2166
2167    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2168                                                    this->shader_program,
2169                                                    this->prog);
2170
2171    const glsl_type *sampler_type = ir->sampler->type;
2172
2173    switch (sampler_type->sampler_dimensionality) {
2174    case GLSL_SAMPLER_DIM_1D:
2175       inst->tex_target = (sampler_type->sampler_array)
2176          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2177       break;
2178    case GLSL_SAMPLER_DIM_2D:
2179       inst->tex_target = (sampler_type->sampler_array)
2180          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2181       break;
2182    case GLSL_SAMPLER_DIM_3D:
2183       inst->tex_target = TEXTURE_3D_INDEX;
2184       break;
2185    case GLSL_SAMPLER_DIM_CUBE:
2186       inst->tex_target = TEXTURE_CUBE_INDEX;
2187       break;
2188    case GLSL_SAMPLER_DIM_RECT:
2189       inst->tex_target = TEXTURE_RECT_INDEX;
2190       break;
2191    case GLSL_SAMPLER_DIM_BUF:
2192       assert(!"FINISHME: Implement ARB_texture_buffer_object");
2193       break;
2194    default:
2195       assert(!"Should not get here.");
2196    }
2197
2198    this->result = result_src;
2199 }
2200
2201 void
2202 ir_to_mesa_visitor::visit(ir_return *ir)
2203 {
2204    if (ir->get_value()) {
2205       dst_reg l;
2206       int i;
2207
2208       assert(current_function);
2209
2210       ir->get_value()->accept(this);
2211       src_reg r = this->result;
2212
2213       l = dst_reg(current_function->return_reg);
2214
2215       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2216          emit(ir, OPCODE_MOV, l, r);
2217          l.index++;
2218          r.index++;
2219       }
2220    }
2221
2222    emit(ir, OPCODE_RET);
2223 }
2224
2225 void
2226 ir_to_mesa_visitor::visit(ir_discard *ir)
2227 {
2228    struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2229
2230    if (ir->condition) {
2231       ir->condition->accept(this);
2232       this->result.negate = ~this->result.negate;
2233       emit(ir, OPCODE_KIL, undef_dst, this->result);
2234    } else {
2235       emit(ir, OPCODE_KIL_NV);
2236    }
2237
2238    fp->UsesKill = GL_TRUE;
2239 }
2240
2241 void
2242 ir_to_mesa_visitor::visit(ir_if *ir)
2243 {
2244    ir_to_mesa_instruction *cond_inst, *if_inst;
2245    ir_to_mesa_instruction *prev_inst;
2246
2247    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2248
2249    ir->condition->accept(this);
2250    assert(this->result.file != PROGRAM_UNDEFINED);
2251
2252    if (this->options->EmitCondCodes) {
2253       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2254
2255       /* See if we actually generated any instruction for generating
2256        * the condition.  If not, then cook up a move to a temp so we
2257        * have something to set cond_update on.
2258        */
2259       if (cond_inst == prev_inst) {
2260          src_reg temp = get_temp(glsl_type::bool_type);
2261          cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
2262       }
2263       cond_inst->cond_update = GL_TRUE;
2264
2265       if_inst = emit(ir->condition, OPCODE_IF);
2266       if_inst->dst.cond_mask = COND_NE;
2267    } else {
2268       if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2269    }
2270
2271    this->instructions.push_tail(if_inst);
2272
2273    visit_exec_list(&ir->then_instructions, this);
2274
2275    if (!ir->else_instructions.is_empty()) {
2276       emit(ir->condition, OPCODE_ELSE);
2277       visit_exec_list(&ir->else_instructions, this);
2278    }
2279
2280    if_inst = emit(ir->condition, OPCODE_ENDIF);
2281 }
2282
2283 ir_to_mesa_visitor::ir_to_mesa_visitor()
2284 {
2285    result.file = PROGRAM_UNDEFINED;
2286    next_temp = 1;
2287    next_signature_id = 1;
2288    current_function = NULL;
2289    mem_ctx = ralloc_context(NULL);
2290 }
2291
2292 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2293 {
2294    ralloc_free(mem_ctx);
2295 }
2296
2297 static struct prog_src_register
2298 mesa_src_reg_from_ir_src_reg(src_reg reg)
2299 {
2300    struct prog_src_register mesa_reg;
2301
2302    mesa_reg.File = reg.file;
2303    assert(reg.index < (1 << INST_INDEX_BITS));
2304    mesa_reg.Index = reg.index;
2305    mesa_reg.Swizzle = reg.swizzle;
2306    mesa_reg.RelAddr = reg.reladdr != NULL;
2307    mesa_reg.Negate = reg.negate;
2308    mesa_reg.Abs = 0;
2309    mesa_reg.HasIndex2 = GL_FALSE;
2310    mesa_reg.RelAddr2 = 0;
2311    mesa_reg.Index2 = 0;
2312
2313    return mesa_reg;
2314 }
2315
2316 static void
2317 set_branchtargets(ir_to_mesa_visitor *v,
2318                   struct prog_instruction *mesa_instructions,
2319                   int num_instructions)
2320 {
2321    int if_count = 0, loop_count = 0;
2322    int *if_stack, *loop_stack;
2323    int if_stack_pos = 0, loop_stack_pos = 0;
2324    int i, j;
2325
2326    for (i = 0; i < num_instructions; i++) {
2327       switch (mesa_instructions[i].Opcode) {
2328       case OPCODE_IF:
2329          if_count++;
2330          break;
2331       case OPCODE_BGNLOOP:
2332          loop_count++;
2333          break;
2334       case OPCODE_BRK:
2335       case OPCODE_CONT:
2336          mesa_instructions[i].BranchTarget = -1;
2337          break;
2338       default:
2339          break;
2340       }
2341    }
2342
2343    if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2344    loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2345
2346    for (i = 0; i < num_instructions; i++) {
2347       switch (mesa_instructions[i].Opcode) {
2348       case OPCODE_IF:
2349          if_stack[if_stack_pos] = i;
2350          if_stack_pos++;
2351          break;
2352       case OPCODE_ELSE:
2353          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2354          if_stack[if_stack_pos - 1] = i;
2355          break;
2356       case OPCODE_ENDIF:
2357          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2358          if_stack_pos--;
2359          break;
2360       case OPCODE_BGNLOOP:
2361          loop_stack[loop_stack_pos] = i;
2362          loop_stack_pos++;
2363          break;
2364       case OPCODE_ENDLOOP:
2365          loop_stack_pos--;
2366          /* Rewrite any breaks/conts at this nesting level (haven't
2367           * already had a BranchTarget assigned) to point to the end
2368           * of the loop.
2369           */
2370          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2371             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2372                 mesa_instructions[j].Opcode == OPCODE_CONT) {
2373                if (mesa_instructions[j].BranchTarget == -1) {
2374                   mesa_instructions[j].BranchTarget = i;
2375                }
2376             }
2377          }
2378          /* The loop ends point at each other. */
2379          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2380          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2381          break;
2382       case OPCODE_CAL:
2383          foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2384             function_entry *entry = (function_entry *)iter.get();
2385
2386             if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2387                mesa_instructions[i].BranchTarget = entry->inst;
2388                break;
2389             }
2390          }
2391          break;
2392       default:
2393          break;
2394       }
2395    }
2396 }
2397
2398 static void
2399 print_program(struct prog_instruction *mesa_instructions,
2400               ir_instruction **mesa_instruction_annotation,
2401               int num_instructions)
2402 {
2403    ir_instruction *last_ir = NULL;
2404    int i;
2405    int indent = 0;
2406
2407    for (i = 0; i < num_instructions; i++) {
2408       struct prog_instruction *mesa_inst = mesa_instructions + i;
2409       ir_instruction *ir = mesa_instruction_annotation[i];
2410
2411       fprintf(stdout, "%3d: ", i);
2412
2413       if (last_ir != ir && ir) {
2414          int j;
2415
2416          for (j = 0; j < indent; j++) {
2417             fprintf(stdout, " ");
2418          }
2419          ir->print();
2420          printf("\n");
2421          last_ir = ir;
2422
2423          fprintf(stdout, "     "); /* line number spacing. */
2424       }
2425
2426       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2427                                             PROG_PRINT_DEBUG, NULL);
2428    }
2429 }
2430
2431
2432 /**
2433  * Count resources used by the given gpu program (number of texture
2434  * samplers, etc).
2435  */
2436 static void
2437 count_resources(struct gl_program *prog)
2438 {
2439    unsigned int i;
2440
2441    prog->SamplersUsed = 0;
2442
2443    for (i = 0; i < prog->NumInstructions; i++) {
2444       struct prog_instruction *inst = &prog->Instructions[i];
2445
2446       if (_mesa_is_tex_instruction(inst->Opcode)) {
2447          prog->SamplerTargets[inst->TexSrcUnit] =
2448             (gl_texture_index)inst->TexSrcTarget;
2449          prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2450          if (inst->TexShadow) {
2451             prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2452          }
2453       }
2454    }
2455
2456    _mesa_update_shader_textures_used(prog);
2457 }
2458
2459
2460 /**
2461  * Check if the given vertex/fragment/shader program is within the
2462  * resource limits of the context (number of texture units, etc).
2463  * If any of those checks fail, record a linker error.
2464  *
2465  * XXX more checks are needed...
2466  */
2467 static void
2468 check_resources(const struct gl_context *ctx,
2469                 struct gl_shader_program *shader_program,
2470                 struct gl_program *prog)
2471 {
2472    switch (prog->Target) {
2473    case GL_VERTEX_PROGRAM_ARB:
2474       if (_mesa_bitcount(prog->SamplersUsed) >
2475           ctx->Const.MaxVertexTextureImageUnits) {
2476          linker_error(shader_program,
2477                       "Too many vertex shader texture samplers");
2478       }
2479       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
2480          linker_error(shader_program, "Too many vertex shader constants");
2481       }
2482       break;
2483    case MESA_GEOMETRY_PROGRAM:
2484       if (_mesa_bitcount(prog->SamplersUsed) >
2485           ctx->Const.MaxGeometryTextureImageUnits) {
2486          linker_error(shader_program,
2487                       "Too many geometry shader texture samplers");
2488       }
2489       if (prog->Parameters->NumParameters >
2490           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2491          linker_error(shader_program, "Too many geometry shader constants");
2492       }
2493       break;
2494    case GL_FRAGMENT_PROGRAM_ARB:
2495       if (_mesa_bitcount(prog->SamplersUsed) >
2496           ctx->Const.MaxTextureImageUnits) {
2497          linker_error(shader_program,
2498                       "Too many fragment shader texture samplers");
2499       }
2500       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
2501          linker_error(shader_program, "Too many fragment shader constants");
2502       }
2503       break;
2504    default:
2505       _mesa_problem(ctx, "unexpected program type in check_resources()");
2506    }
2507 }
2508
2509
2510
2511 struct uniform_sort {
2512    struct gl_uniform *u;
2513    int pos;
2514 };
2515
2516 /* The shader_program->Uniforms list is almost sorted in increasing
2517  * uniform->{Frag,Vert}Pos locations, but not quite when there are
2518  * uniforms shared between targets.  We need to add parameters in
2519  * increasing order for the targets.
2520  */
2521 static int
2522 sort_uniforms(const void *a, const void *b)
2523 {
2524    struct uniform_sort *u1 = (struct uniform_sort *)a;
2525    struct uniform_sort *u2 = (struct uniform_sort *)b;
2526
2527    return u1->pos - u2->pos;
2528 }
2529
2530 /* Add the uniforms to the parameters.  The linker chose locations
2531  * in our parameters lists (which weren't created yet), which the
2532  * uniforms code will use to poke values into our parameters list
2533  * when uniforms are updated.
2534  */
2535 static void
2536 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2537                                 struct gl_shader *shader,
2538                                 struct gl_program *prog)
2539 {
2540    unsigned int i;
2541    unsigned int next_sampler = 0, num_uniforms = 0;
2542    struct uniform_sort *sorted_uniforms;
2543
2544    sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2545                                   shader_program->Uniforms->NumUniforms);
2546
2547    for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2548       struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2549       int parameter_index = -1;
2550
2551       switch (shader->Type) {
2552       case GL_VERTEX_SHADER:
2553          parameter_index = uniform->VertPos;
2554          break;
2555       case GL_FRAGMENT_SHADER:
2556          parameter_index = uniform->FragPos;
2557          break;
2558       case GL_GEOMETRY_SHADER:
2559          parameter_index = uniform->GeomPos;
2560          break;
2561       }
2562
2563       /* Only add uniforms used in our target. */
2564       if (parameter_index != -1) {
2565          sorted_uniforms[num_uniforms].pos = parameter_index;
2566          sorted_uniforms[num_uniforms].u = uniform;
2567          num_uniforms++;
2568       }
2569    }
2570
2571    qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2572          sort_uniforms);
2573
2574    for (i = 0; i < num_uniforms; i++) {
2575       struct gl_uniform *uniform = sorted_uniforms[i].u;
2576       int parameter_index = sorted_uniforms[i].pos;
2577       const glsl_type *type = uniform->Type;
2578       unsigned int size;
2579
2580       if (type->is_vector() ||
2581           type->is_scalar()) {
2582          size = type->vector_elements;
2583       } else {
2584          size = type_size(type) * 4;
2585       }
2586
2587       gl_register_file file;
2588       if (type->is_sampler() ||
2589           (type->is_array() && type->fields.array->is_sampler())) {
2590          file = PROGRAM_SAMPLER;
2591       } else {
2592          file = PROGRAM_UNIFORM;
2593       }
2594
2595       GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2596                                                  uniform->Name);
2597
2598       if (index < 0) {
2599          index = _mesa_add_parameter(prog->Parameters, file,
2600                                      uniform->Name, size, type->gl_type,
2601                                      NULL, NULL, 0x0);
2602
2603          /* Sampler uniform values are stored in prog->SamplerUnits,
2604           * and the entry in that array is selected by this index we
2605           * store in ParameterValues[].
2606           */
2607          if (file == PROGRAM_SAMPLER) {
2608             for (unsigned int j = 0; j < size / 4; j++)
2609                prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
2610          }
2611
2612          /* The location chosen in the Parameters list here (returned
2613           * from _mesa_add_uniform) has to match what the linker chose.
2614           */
2615          if (index != parameter_index) {
2616             linker_error(shader_program,
2617                          "Allocation of uniform `%s' to target failed "
2618                          "(%d vs %d)\n",
2619                          uniform->Name, index, parameter_index);
2620          }
2621       }
2622    }
2623
2624    ralloc_free(sorted_uniforms);
2625 }
2626
2627 static void
2628 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2629                         struct gl_shader_program *shader_program,
2630                         const char *name, const glsl_type *type,
2631                         ir_constant *val)
2632 {
2633    if (type->is_record()) {
2634       ir_constant *field_constant;
2635
2636       field_constant = (ir_constant *)val->components.get_head();
2637
2638       for (unsigned int i = 0; i < type->length; i++) {
2639          const glsl_type *field_type = type->fields.structure[i].type;
2640          const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2641                                             type->fields.structure[i].name);
2642          set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2643                                  field_type, field_constant);
2644          field_constant = (ir_constant *)field_constant->next;
2645       }
2646       return;
2647    }
2648
2649    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2650
2651    if (loc == -1) {
2652       linker_error(shader_program,
2653                    "Couldn't find uniform for initializer %s\n", name);
2654       return;
2655    }
2656
2657    for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2658       ir_constant *element;
2659       const glsl_type *element_type;
2660       if (type->is_array()) {
2661          element = val->array_elements[i];
2662          element_type = type->fields.array;
2663       } else {
2664          element = val;
2665          element_type = type;
2666       }
2667
2668       void *values;
2669
2670       if (element_type->base_type == GLSL_TYPE_BOOL) {
2671          int *conv = ralloc_array(mem_ctx, int, element_type->components());
2672          for (unsigned int j = 0; j < element_type->components(); j++) {
2673             conv[j] = element->value.b[j];
2674          }
2675          values = (void *)conv;
2676          element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2677                                                 element_type->vector_elements,
2678                                                 1);
2679       } else {
2680          values = &element->value;
2681       }
2682
2683       if (element_type->is_matrix()) {
2684          _mesa_uniform_matrix(ctx, shader_program,
2685                               element_type->matrix_columns,
2686                               element_type->vector_elements,
2687                               loc, 1, GL_FALSE, (GLfloat *)values);
2688          loc += element_type->matrix_columns;
2689       } else {
2690          _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2691                        values, element_type->gl_type);
2692          loc += type_size(element_type);
2693       }
2694    }
2695 }
2696
2697 static void
2698 set_uniform_initializers(struct gl_context *ctx,
2699                          struct gl_shader_program *shader_program)
2700 {
2701    void *mem_ctx = NULL;
2702
2703    for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
2704       struct gl_shader *shader = shader_program->_LinkedShaders[i];
2705
2706       if (shader == NULL)
2707          continue;
2708
2709       foreach_iter(exec_list_iterator, iter, *shader->ir) {
2710          ir_instruction *ir = (ir_instruction *)iter.get();
2711          ir_variable *var = ir->as_variable();
2712
2713          if (!var || var->mode != ir_var_uniform || !var->constant_value)
2714             continue;
2715
2716          if (!mem_ctx)
2717             mem_ctx = ralloc_context(NULL);
2718
2719          set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
2720                                  var->type, var->constant_value);
2721       }
2722    }
2723
2724    ralloc_free(mem_ctx);
2725 }
2726
2727 /*
2728  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2729  * channels for copy propagation and updates following instructions to
2730  * use the original versions.
2731  *
2732  * The ir_to_mesa_visitor lazily produces code assuming that this pass
2733  * will occur.  As an example, a TXP production before this pass:
2734  *
2735  * 0: MOV TEMP[1], INPUT[4].xyyy;
2736  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2737  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2738  *
2739  * and after:
2740  *
2741  * 0: MOV TEMP[1], INPUT[4].xyyy;
2742  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2743  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2744  *
2745  * which allows for dead code elimination on TEMP[1]'s writes.
2746  */
2747 void
2748 ir_to_mesa_visitor::copy_propagate(void)
2749 {
2750    ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2751                                                     ir_to_mesa_instruction *,
2752                                                     this->next_temp * 4);
2753    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2754    int level = 0;
2755
2756    foreach_iter(exec_list_iterator, iter, this->instructions) {
2757       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2758
2759       assert(inst->dst.file != PROGRAM_TEMPORARY
2760              || inst->dst.index < this->next_temp);
2761
2762       /* First, do any copy propagation possible into the src regs. */
2763       for (int r = 0; r < 3; r++) {
2764          ir_to_mesa_instruction *first = NULL;
2765          bool good = true;
2766          int acp_base = inst->src[r].index * 4;
2767
2768          if (inst->src[r].file != PROGRAM_TEMPORARY ||
2769              inst->src[r].reladdr)
2770             continue;
2771
2772          /* See if we can find entries in the ACP consisting of MOVs
2773           * from the same src register for all the swizzled channels
2774           * of this src register reference.
2775           */
2776          for (int i = 0; i < 4; i++) {
2777             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2778             ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2779
2780             if (!copy_chan) {
2781                good = false;
2782                break;
2783             }
2784
2785             assert(acp_level[acp_base + src_chan] <= level);
2786
2787             if (!first) {
2788                first = copy_chan;
2789             } else {
2790                if (first->src[0].file != copy_chan->src[0].file ||
2791                    first->src[0].index != copy_chan->src[0].index) {
2792                   good = false;
2793                   break;
2794                }
2795             }
2796          }
2797
2798          if (good) {
2799             /* We've now validated that we can copy-propagate to
2800              * replace this src register reference.  Do it.
2801              */
2802             inst->src[r].file = first->src[0].file;
2803             inst->src[r].index = first->src[0].index;
2804
2805             int swizzle = 0;
2806             for (int i = 0; i < 4; i++) {
2807                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2808                ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2809                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2810                            (3 * i));
2811             }
2812             inst->src[r].swizzle = swizzle;
2813          }
2814       }
2815
2816       switch (inst->op) {
2817       case OPCODE_BGNLOOP:
2818       case OPCODE_ENDLOOP:
2819          /* End of a basic block, clear the ACP entirely. */
2820          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2821          break;
2822
2823       case OPCODE_IF:
2824          ++level;
2825          break;
2826
2827       case OPCODE_ENDIF:
2828       case OPCODE_ELSE:
2829          /* Clear all channels written inside the block from the ACP, but
2830           * leaving those that were not touched.
2831           */
2832          for (int r = 0; r < this->next_temp; r++) {
2833             for (int c = 0; c < 4; c++) {
2834                if (!acp[4 * r + c])
2835                   continue;
2836
2837                if (acp_level[4 * r + c] >= level)
2838                   acp[4 * r + c] = NULL;
2839             }
2840          }
2841          if (inst->op == OPCODE_ENDIF)
2842             --level;
2843          break;
2844
2845       default:
2846          /* Continuing the block, clear any written channels from
2847           * the ACP.
2848           */
2849          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2850             /* Any temporary might be written, so no copy propagation
2851              * across this instruction.
2852              */
2853             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2854          } else if (inst->dst.file == PROGRAM_OUTPUT &&
2855                     inst->dst.reladdr) {
2856             /* Any output might be written, so no copy propagation
2857              * from outputs across this instruction.
2858              */
2859             for (int r = 0; r < this->next_temp; r++) {
2860                for (int c = 0; c < 4; c++) {
2861                   if (!acp[4 * r + c])
2862                      continue;
2863
2864                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2865                      acp[4 * r + c] = NULL;
2866                }
2867             }
2868          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2869                     inst->dst.file == PROGRAM_OUTPUT) {
2870             /* Clear where it's used as dst. */
2871             if (inst->dst.file == PROGRAM_TEMPORARY) {
2872                for (int c = 0; c < 4; c++) {
2873                   if (inst->dst.writemask & (1 << c)) {
2874                      acp[4 * inst->dst.index + c] = NULL;
2875                   }
2876                }
2877             }
2878
2879             /* Clear where it's used as src. */
2880             for (int r = 0; r < this->next_temp; r++) {
2881                for (int c = 0; c < 4; c++) {
2882                   if (!acp[4 * r + c])
2883                      continue;
2884
2885                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2886
2887                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2888                       acp[4 * r + c]->src[0].index == inst->dst.index &&
2889                       inst->dst.writemask & (1 << src_chan))
2890                   {
2891                      acp[4 * r + c] = NULL;
2892                   }
2893                }
2894             }
2895          }
2896          break;
2897       }
2898
2899       /* If this is a copy, add it to the ACP. */
2900       if (inst->op == OPCODE_MOV &&
2901           inst->dst.file == PROGRAM_TEMPORARY &&
2902           !inst->dst.reladdr &&
2903           !inst->saturate &&
2904           !inst->src[0].reladdr &&
2905           !inst->src[0].negate) {
2906          for (int i = 0; i < 4; i++) {
2907             if (inst->dst.writemask & (1 << i)) {
2908                acp[4 * inst->dst.index + i] = inst;
2909                acp_level[4 * inst->dst.index + i] = level;
2910             }
2911          }
2912       }
2913    }
2914
2915    ralloc_free(acp_level);
2916    ralloc_free(acp);
2917 }
2918
2919
2920 /**
2921  * Convert a shader's GLSL IR into a Mesa gl_program.
2922  */
2923 static struct gl_program *
2924 get_mesa_program(struct gl_context *ctx,
2925                  struct gl_shader_program *shader_program,
2926                  struct gl_shader *shader)
2927 {
2928    ir_to_mesa_visitor v;
2929    struct prog_instruction *mesa_instructions, *mesa_inst;
2930    ir_instruction **mesa_instruction_annotation;
2931    int i;
2932    struct gl_program *prog;
2933    GLenum target;
2934    const char *target_string;
2935    GLboolean progress;
2936    struct gl_shader_compiler_options *options =
2937          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
2938
2939    switch (shader->Type) {
2940    case GL_VERTEX_SHADER:
2941       target = GL_VERTEX_PROGRAM_ARB;
2942       target_string = "vertex";
2943       break;
2944    case GL_FRAGMENT_SHADER:
2945       target = GL_FRAGMENT_PROGRAM_ARB;
2946       target_string = "fragment";
2947       break;
2948    case GL_GEOMETRY_SHADER:
2949       target = GL_GEOMETRY_PROGRAM_NV;
2950       target_string = "geometry";
2951       break;
2952    default:
2953       assert(!"should not be reached");
2954       return NULL;
2955    }
2956
2957    validate_ir_tree(shader->ir);
2958
2959    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2960    if (!prog)
2961       return NULL;
2962    prog->Parameters = _mesa_new_parameter_list();
2963    prog->Varying = _mesa_new_parameter_list();
2964    prog->Attributes = _mesa_new_parameter_list();
2965    v.ctx = ctx;
2966    v.prog = prog;
2967    v.shader_program = shader_program;
2968    v.options = options;
2969
2970    add_uniforms_to_parameters_list(shader_program, shader, prog);
2971
2972    /* Emit Mesa IR for main(). */
2973    visit_exec_list(shader->ir, &v);
2974    v.emit(NULL, OPCODE_END);
2975
2976    /* Now emit bodies for any functions that were used. */
2977    do {
2978       progress = GL_FALSE;
2979
2980       foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2981          function_entry *entry = (function_entry *)iter.get();
2982
2983          if (!entry->bgn_inst) {
2984             v.current_function = entry;
2985
2986             entry->bgn_inst = v.emit(NULL, OPCODE_BGNSUB);
2987             entry->bgn_inst->function = entry;
2988
2989             visit_exec_list(&entry->sig->body, &v);
2990
2991             ir_to_mesa_instruction *last;
2992             last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2993             if (last->op != OPCODE_RET)
2994                v.emit(NULL, OPCODE_RET);
2995
2996             ir_to_mesa_instruction *end;
2997             end = v.emit(NULL, OPCODE_ENDSUB);
2998             end->function = entry;
2999
3000             progress = GL_TRUE;
3001          }
3002       }
3003    } while (progress);
3004
3005    prog->NumTemporaries = v.next_temp;
3006
3007    int num_instructions = 0;
3008    foreach_iter(exec_list_iterator, iter, v.instructions) {
3009       num_instructions++;
3010    }
3011
3012    mesa_instructions =
3013       (struct prog_instruction *)calloc(num_instructions,
3014                                         sizeof(*mesa_instructions));
3015    mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
3016                                               num_instructions);
3017
3018    v.copy_propagate();
3019
3020    /* Convert ir_mesa_instructions into prog_instructions.
3021     */
3022    mesa_inst = mesa_instructions;
3023    i = 0;
3024    foreach_iter(exec_list_iterator, iter, v.instructions) {
3025       const ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
3026
3027       mesa_inst->Opcode = inst->op;
3028       mesa_inst->CondUpdate = inst->cond_update;
3029       if (inst->saturate)
3030          mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
3031       mesa_inst->DstReg.File = inst->dst.file;
3032       mesa_inst->DstReg.Index = inst->dst.index;
3033       mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
3034       mesa_inst->DstReg.WriteMask = inst->dst.writemask;
3035       mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
3036       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
3037       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
3038       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
3039       mesa_inst->TexSrcUnit = inst->sampler;
3040       mesa_inst->TexSrcTarget = inst->tex_target;
3041       mesa_inst->TexShadow = inst->tex_shadow;
3042       mesa_instruction_annotation[i] = inst->ir;
3043
3044       /* Set IndirectRegisterFiles. */
3045       if (mesa_inst->DstReg.RelAddr)
3046          prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
3047
3048       /* Update program's bitmask of indirectly accessed register files */
3049       for (unsigned src = 0; src < 3; src++)
3050          if (mesa_inst->SrcReg[src].RelAddr)
3051             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
3052
3053       switch (mesa_inst->Opcode) {
3054       case OPCODE_IF:
3055          if (options->EmitNoIfs) {
3056             linker_warning(shader_program,
3057                            "Couldn't flatten if-statement.  "
3058                            "This will likely result in software "
3059                            "rasterization.\n");
3060          }
3061          break;
3062       case OPCODE_BGNLOOP:
3063          if (options->EmitNoLoops) {
3064             linker_warning(shader_program,
3065                            "Couldn't unroll loop.  "
3066                            "This will likely result in software "
3067                            "rasterization.\n");
3068          }
3069          break;
3070       case OPCODE_CONT:
3071          if (options->EmitNoCont) {
3072             linker_warning(shader_program,
3073                            "Couldn't lower continue-statement.  "
3074                            "This will likely result in software "
3075                            "rasterization.\n");
3076          }
3077          break;
3078       case OPCODE_BGNSUB:
3079          inst->function->inst = i;
3080          mesa_inst->Comment = strdup(inst->function->sig->function_name());
3081          break;
3082       case OPCODE_ENDSUB:
3083          mesa_inst->Comment = strdup(inst->function->sig->function_name());
3084          break;
3085       case OPCODE_CAL:
3086          mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
3087          break;
3088       case OPCODE_ARL:
3089          prog->NumAddressRegs = 1;
3090          break;
3091       default:
3092          break;
3093       }
3094
3095       mesa_inst++;
3096       i++;
3097
3098       if (!shader_program->LinkStatus)
3099          break;
3100    }
3101
3102    if (!shader_program->LinkStatus) {
3103       free(mesa_instructions);
3104       _mesa_reference_program(ctx, &shader->Program, NULL);
3105       return NULL;
3106    }
3107
3108    set_branchtargets(&v, mesa_instructions, num_instructions);
3109
3110    if (ctx->Shader.Flags & GLSL_DUMP) {
3111       printf("\n");
3112       printf("GLSL IR for linked %s program %d:\n", target_string,
3113              shader_program->Name);
3114       _mesa_print_ir(shader->ir, NULL);
3115       printf("\n");
3116       printf("\n");
3117       printf("Mesa IR for linked %s program %d:\n", target_string,
3118              shader_program->Name);
3119       print_program(mesa_instructions, mesa_instruction_annotation,
3120                     num_instructions);
3121    }
3122
3123    prog->Instructions = mesa_instructions;
3124    prog->NumInstructions = num_instructions;
3125
3126    do_set_program_inouts(shader->ir, prog);
3127    count_resources(prog);
3128
3129    check_resources(ctx, shader_program, prog);
3130
3131    _mesa_reference_program(ctx, &shader->Program, prog);
3132
3133    if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
3134       _mesa_optimize_program(ctx, prog);
3135    }
3136
3137    return prog;
3138 }
3139
3140 extern "C" {
3141
3142 /**
3143  * Link a shader.
3144  * Called via ctx->Driver.LinkShader()
3145  * This actually involves converting GLSL IR into Mesa gl_programs with
3146  * code lowering and other optimizations.
3147  */
3148 GLboolean
3149 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3150 {
3151    assert(prog->LinkStatus);
3152
3153    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
3154       if (prog->_LinkedShaders[i] == NULL)
3155          continue;
3156
3157       bool progress;
3158       exec_list *ir = prog->_LinkedShaders[i]->ir;
3159       const struct gl_shader_compiler_options *options =
3160             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
3161
3162       do {
3163          progress = false;
3164
3165          /* Lowering */
3166          do_mat_op_to_vec(ir);
3167          lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
3168                                  | LOG_TO_LOG2
3169                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
3170
3171          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
3172
3173          progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
3174
3175          progress = lower_quadop_vector(ir, true) || progress;
3176
3177          if (options->EmitNoIfs) {
3178             progress = lower_discard(ir) || progress;
3179             progress = lower_if_to_cond_assign(ir) || progress;
3180          }
3181
3182          if (options->EmitNoNoise)
3183             progress = lower_noise(ir) || progress;
3184
3185          /* If there are forms of indirect addressing that the driver
3186           * cannot handle, perform the lowering pass.
3187           */
3188          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
3189              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
3190            progress =
3191              lower_variable_index_to_cond_assign(ir,
3192                                                  options->EmitNoIndirectInput,
3193                                                  options->EmitNoIndirectOutput,
3194                                                  options->EmitNoIndirectTemp,
3195                                                  options->EmitNoIndirectUniform)
3196              || progress;
3197
3198          progress = do_vec_index_to_cond_assign(ir) || progress;
3199       } while (progress);
3200
3201       validate_ir_tree(ir);
3202    }
3203
3204    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
3205       struct gl_program *linked_prog;
3206
3207       if (prog->_LinkedShaders[i] == NULL)
3208          continue;
3209
3210       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
3211
3212       if (linked_prog) {
3213          bool ok = true;
3214
3215          switch (prog->_LinkedShaders[i]->Type) {
3216          case GL_VERTEX_SHADER:
3217             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
3218                                      (struct gl_vertex_program *)linked_prog);
3219             ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
3220                                                  linked_prog);
3221             break;
3222          case GL_FRAGMENT_SHADER:
3223             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
3224                                      (struct gl_fragment_program *)linked_prog);
3225             ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
3226                                                  linked_prog);
3227             break;
3228          case GL_GEOMETRY_SHADER:
3229             _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
3230                                      (struct gl_geometry_program *)linked_prog);
3231             ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
3232                                                  linked_prog);
3233             break;
3234          }
3235          if (!ok) {
3236             return GL_FALSE;
3237          }
3238       }
3239
3240       _mesa_reference_program(ctx, &linked_prog, NULL);
3241    }
3242
3243    return GL_TRUE;
3244 }
3245
3246
3247 /**
3248  * Compile a GLSL shader.  Called via glCompileShader().
3249  */
3250 void
3251 _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
3252 {
3253    struct _mesa_glsl_parse_state *state =
3254       new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
3255
3256    const char *source = shader->Source;
3257    /* Check if the user called glCompileShader without first calling
3258     * glShaderSource.  This should fail to compile, but not raise a GL_ERROR.
3259     */
3260    if (source == NULL) {
3261       shader->CompileStatus = GL_FALSE;
3262       return;
3263    }
3264
3265    state->error = preprocess(state, &source, &state->info_log,
3266                              &ctx->Extensions, ctx->API);
3267
3268    if (ctx->Shader.Flags & GLSL_DUMP) {
3269       printf("GLSL source for %s shader %d:\n",
3270              _mesa_glsl_shader_target_name(state->target), shader->Name);
3271       printf("%s\n", shader->Source);
3272    }
3273
3274    if (!state->error) {
3275      _mesa_glsl_lexer_ctor(state, source);
3276      _mesa_glsl_parse(state);
3277      _mesa_glsl_lexer_dtor(state);
3278    }
3279
3280    ralloc_free(shader->ir);
3281    shader->ir = new(shader) exec_list;
3282    if (!state->error && !state->translation_unit.is_empty())
3283       _mesa_ast_to_hir(shader->ir, state);
3284
3285    if (!state->error && !shader->ir->is_empty()) {
3286       validate_ir_tree(shader->ir);
3287
3288       /* Do some optimization at compile time to reduce shader IR size
3289        * and reduce later work if the same shader is linked multiple times
3290        */
3291       while (do_common_optimization(shader->ir, false, 32))
3292          ;
3293
3294       validate_ir_tree(shader->ir);
3295    }
3296
3297    shader->symbols = state->symbols;
3298
3299    shader->CompileStatus = !state->error;
3300    shader->InfoLog = state->info_log;
3301    shader->Version = state->language_version;
3302    memcpy(shader->builtins_to_link, state->builtins_to_link,
3303           sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
3304    shader->num_builtins_to_link = state->num_builtins_to_link;
3305
3306    if (ctx->Shader.Flags & GLSL_LOG) {
3307       _mesa_write_shader_to_file(shader);
3308    }
3309
3310    if (ctx->Shader.Flags & GLSL_DUMP) {
3311       if (shader->CompileStatus) {
3312          printf("GLSL IR for shader %d:\n", shader->Name);
3313          _mesa_print_ir(shader->ir, NULL);
3314          printf("\n\n");
3315       } else {
3316          printf("GLSL shader %d failed to compile.\n", shader->Name);
3317       }
3318       if (shader->InfoLog && shader->InfoLog[0] != 0) {
3319          printf("GLSL shader %d info log:\n", shader->Name);
3320          printf("%s\n", shader->InfoLog);
3321       }
3322    }
3323
3324    /* Retain any live IR, but trash the rest. */
3325    reparent_ir(shader->ir, shader->ir);
3326
3327    ralloc_free(state);
3328 }
3329
3330
3331 /**
3332  * Link a GLSL shader program.  Called via glLinkProgram().
3333  */
3334 void
3335 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3336 {
3337    unsigned int i;
3338
3339    _mesa_clear_shader_program_data(ctx, prog);
3340
3341    prog->LinkStatus = GL_TRUE;
3342
3343    for (i = 0; i < prog->NumShaders; i++) {
3344       if (!prog->Shaders[i]->CompileStatus) {
3345          linker_error(prog, "linking with uncompiled shader");
3346          prog->LinkStatus = GL_FALSE;
3347       }
3348    }
3349
3350    prog->Varying = _mesa_new_parameter_list();
3351    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
3352    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
3353    _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
3354
3355    if (prog->LinkStatus) {
3356       link_shaders(ctx, prog);
3357    }
3358
3359    if (prog->LinkStatus) {
3360       if (!ctx->Driver.LinkShader(ctx, prog)) {
3361          prog->LinkStatus = GL_FALSE;
3362       }
3363    }
3364
3365    set_uniform_initializers(ctx, prog);
3366
3367    if (ctx->Shader.Flags & GLSL_DUMP) {
3368       if (!prog->LinkStatus) {
3369          printf("GLSL shader program %d failed to link\n", prog->Name);
3370       }
3371
3372       if (prog->InfoLog && prog->InfoLog[0] != 0) {
3373          printf("GLSL shader program %d info log:\n", prog->Name);
3374          printf("%s\n", prog->InfoLog);
3375       }
3376    }
3377 }
3378
3379 } /* extern "C" */