src/mesa/program/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translate GLSL IR to Mesa's gl_program representation.
  30  */
  31
  32 #include <stdio.h>
  33 #include "main/compiler.h"
  34 #include "main/mtypes.h"
  35 #include "main/shaderapi.h"
  36 #include "main/shaderobj.h"
  37 #include "main/uniforms.h"
  38 #include "compiler/glsl/ast.h"
  39 #include "compiler/glsl/ir.h"
  40 #include "compiler/glsl/ir_expression_flattening.h"
  41 #include "compiler/glsl/ir_visitor.h"
  42 #include "compiler/glsl/ir_optimization.h"
  43 #include "compiler/glsl/ir_uniform.h"
  44 #include "compiler/glsl/glsl_parser_extras.h"
  45 #include "compiler/glsl_types.h"
  46 #include "compiler/glsl/linker.h"
  47 #include "compiler/glsl/program.h"
  48 #include "program/hash_table.h"
  49 #include "program/prog_instruction.h"
  50 #include "program/prog_optimize.h"
  51 #include "program/prog_print.h"
  52 #include "program/program.h"
  53 #include "program/prog_parameter.h"
  54
  55
  56 static int swizzle_for_size(int size);
  57
  58 namespace {
  59
  60 class src_reg;
  61 class dst_reg;
  62
  63 /**
  64  * This struct is a corresponding struct to Mesa prog_src_register, with
  65  * wider fields.
  66  */
  67 class src_reg {
  68 public:
  69    src_reg(gl_register_file file, int index, const glsl_type *type)
  70    {
  71       this->file = file;
  72       this->index = index;
  73       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  74          this->swizzle = swizzle_for_size(type->vector_elements);
  75       else
  76          this->swizzle = SWIZZLE_XYZW;
  77       this->negate = 0;
  78       this->reladdr = NULL;
  79    }
  80
  81    src_reg()
  82    {
  83       this->file = PROGRAM_UNDEFINED;
  84       this->index = 0;
  85       this->swizzle = 0;
  86       this->negate = 0;
  87       this->reladdr = NULL;
  88    }
  89
  90    explicit src_reg(dst_reg reg);
  91
  92    gl_register_file file; /**< PROGRAM_* from Mesa */
  93    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  94    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  95    int negate; /**< NEGATE_XYZW mask from mesa */
  96    /** Register index should be offset by the integer in this reg. */
  97    src_reg *reladdr;
  98 };
  99
 100 class dst_reg {
 101 public:
 102    dst_reg(gl_register_file file, int writemask)
 103    {
 104       this->file = file;
 105       this->index = 0;
 106       this->writemask = writemask;
 107       this->cond_mask = COND_TR;
 108       this->reladdr = NULL;
 109    }
 110
 111    dst_reg()
 112    {
 113       this->file = PROGRAM_UNDEFINED;
 114       this->index = 0;
 115       this->writemask = 0;
 116       this->cond_mask = COND_TR;
 117       this->reladdr = NULL;
 118    }
 119
 120    explicit dst_reg(src_reg reg);
 121
 122    gl_register_file file; /**< PROGRAM_* from Mesa */
 123    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 124    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 125    GLuint cond_mask:4;
 126    /** Register index should be offset by the integer in this reg. */
 127    src_reg *reladdr;
 128 };
 129
 130 } /* anonymous namespace */
 131
 132 src_reg::src_reg(dst_reg reg)
 133 {
 134    this->file = reg.file;
 135    this->index = reg.index;
 136    this->swizzle = SWIZZLE_XYZW;
 137    this->negate = 0;
 138    this->reladdr = reg.reladdr;
 139 }
 140
 141 dst_reg::dst_reg(src_reg reg)
 142 {
 143    this->file = reg.file;
 144    this->index = reg.index;
 145    this->writemask = WRITEMASK_XYZW;
 146    this->cond_mask = COND_TR;
 147    this->reladdr = reg.reladdr;
 148 }
 149
 150 namespace {
 151
 152 class ir_to_mesa_instruction : public exec_node {
 153 public:
 154    DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction)
 155
 156    enum prog_opcode op;
 157    dst_reg dst;
 158    src_reg src[3];
 159    /** Pointer to the ir source this tree came from for debugging */
 160    ir_instruction *ir;
 161    GLboolean cond_update;
 162    bool saturate;
 163    int sampler; /**< sampler index */
 164    int tex_target; /**< One of TEXTURE_*_INDEX */
 165    GLboolean tex_shadow;
 166 };
 167
 168 class variable_storage : public exec_node {
 169 public:
 170    variable_storage(ir_variable *var, gl_register_file file, int index)
 171       : file(file), index(index), var(var)
 172    {
 173       /* empty */
 174    }
 175
 176    gl_register_file file;
 177    int index;
 178    ir_variable *var; /* variable that maps to this, if any */
 179 };
 180
 181 class function_entry : public exec_node {
 182 public:
 183    ir_function_signature *sig;
 184
 185    /**
 186     * identifier of this function signature used by the program.
 187     *
 188     * At the point that Mesa instructions for function calls are
 189     * generated, we don't know the address of the first instruction of
 190     * the function body.  So we make the BranchTarget that is called a
 191     * small integer and rewrite them during set_branchtargets().
 192     */
 193    int sig_id;
 194
 195    /**
 196     * Pointer to first instruction of the function body.
 197     *
 198     * Set during function body emits after main() is processed.
 199     */
 200    ir_to_mesa_instruction *bgn_inst;
 201
 202    /**
 203     * Index of the first instruction of the function body in actual
 204     * Mesa IR.
 205     *
 206     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
 207     */
 208    int inst;
 209
 210    /** Storage for the return value. */
 211    src_reg return_reg;
 212 };
 213
 214 class ir_to_mesa_visitor : public ir_visitor {
 215 public:
 216    ir_to_mesa_visitor();
 217    ~ir_to_mesa_visitor();
 218
 219    function_entry *current_function;
 220
 221    struct gl_context *ctx;
 222    struct gl_program *prog;
 223    struct gl_shader_program *shader_program;
 224    struct gl_shader_compiler_options *options;
 225
 226    int next_temp;
 227
 228    variable_storage *find_variable_storage(const ir_variable *var);
 229
 230    src_reg get_temp(const glsl_type *type);
 231    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
 232
 233    src_reg src_reg_for_float(float val);
 234
 235    /**
 236     * \name Visit methods
 237     *
 238     * As typical for the visitor pattern, there must be one \c visit method for
 239     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 240     * the hierarchy should not have \c visit methods.
 241     */
 242    /*@{*/
 243    virtual void visit(ir_variable *);
 244    virtual void visit(ir_loop *);
 245    virtual void visit(ir_loop_jump *);
 246    virtual void visit(ir_function_signature *);
 247    virtual void visit(ir_function *);
 248    virtual void visit(ir_expression *);
 249    virtual void visit(ir_swizzle *);
 250    virtual void visit(ir_dereference_variable  *);
 251    virtual void visit(ir_dereference_array *);
 252    virtual void visit(ir_dereference_record *);
 253    virtual void visit(ir_assignment *);
 254    virtual void visit(ir_constant *);
 255    virtual void visit(ir_call *);
 256    virtual void visit(ir_return *);
 257    virtual void visit(ir_discard *);
 258    virtual void visit(ir_texture *);
 259    virtual void visit(ir_if *);
 260    virtual void visit(ir_emit_vertex *);
 261    virtual void visit(ir_end_primitive *);
 262    virtual void visit(ir_barrier *);
 263    /*@}*/
 264
 265    src_reg result;
 266
 267    /** List of variable_storage */
 268    exec_list variables;
 269
 270    /** List of function_entry */
 271    exec_list function_signatures;
 272    int next_signature_id;
 273
 274    /** List of ir_to_mesa_instruction */
 275    exec_list instructions;
 276
 277    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
 278
 279    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 280                                 dst_reg dst, src_reg src0);
 281
 282    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 283                                 dst_reg dst, src_reg src0, src_reg src1);
 284
 285    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 286                                 dst_reg dst,
 287                                 src_reg src0, src_reg src1, src_reg src2);
 288
 289    /**
 290     * Emit the correct dot-product instruction for the type of arguments
 291     */
 292    ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
 293                                     dst_reg dst,
 294                                     src_reg src0,
 295                                     src_reg src1,
 296                                     unsigned elements);
 297
 298    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 299                     dst_reg dst, src_reg src0);
 300
 301    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 302                     dst_reg dst, src_reg src0, src_reg src1);
 303
 304    bool try_emit_mad(ir_expression *ir,
 305                           int mul_operand);
 306    bool try_emit_mad_for_and_not(ir_expression *ir,
 307                                  int mul_operand);
 308
 309    void emit_swz(ir_expression *ir);
 310
 311    bool process_move_condition(ir_rvalue *ir);
 312
 313    void copy_propagate(void);
 314
 315    void *mem_ctx;
 316 };
 317
 318 } /* anonymous namespace */
 319
 320 static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
 321
 322 static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
 323
 324 static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 325
 326 static int
 327 swizzle_for_size(int size)
 328 {
 329    static const int size_swizzles[4] = {
 330       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 331       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 332       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 333       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 334    };
 335
 336    assert((size >= 1) && (size <= 4));
 337    return size_swizzles[size - 1];
 338 }
 339
 340 ir_to_mesa_instruction *
 341 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 342                          dst_reg dst,
 343                          src_reg src0, src_reg src1, src_reg src2)
 344 {
 345    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 346    int num_reladdr = 0;
 347
 348    /* If we have to do relative addressing, we want to load the ARL
 349     * reg directly for one of the regs, and preload the other reladdr
 350     * sources into temps.
 351     */
 352    num_reladdr += dst.reladdr != NULL;
 353    num_reladdr += src0.reladdr != NULL;
 354    num_reladdr += src1.reladdr != NULL;
 355    num_reladdr += src2.reladdr != NULL;
 356
 357    reladdr_to_temp(ir, &src2, &num_reladdr);
 358    reladdr_to_temp(ir, &src1, &num_reladdr);
 359    reladdr_to_temp(ir, &src0, &num_reladdr);
 360
 361    if (dst.reladdr) {
 362       emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
 363       num_reladdr--;
 364    }
 365    assert(num_reladdr == 0);
 366
 367    inst->op = op;
 368    inst->dst = dst;
 369    inst->src[0] = src0;
 370    inst->src[1] = src1;
 371    inst->src[2] = src2;
 372    inst->ir = ir;
 373
 374    this->instructions.push_tail(inst);
 375
 376    return inst;
 377 }
 378
 379
 380 ir_to_mesa_instruction *
 381 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 382                          dst_reg dst, src_reg src0, src_reg src1)
 383 {
 384    return emit(ir, op, dst, src0, src1, undef_src);
 385 }
 386
 387 ir_to_mesa_instruction *
 388 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 389                          dst_reg dst, src_reg src0)
 390 {
 391    assert(dst.writemask != 0);
 392    return emit(ir, op, dst, src0, undef_src, undef_src);
 393 }
 394
 395 ir_to_mesa_instruction *
 396 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
 397 {
 398    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 399 }
 400
 401 ir_to_mesa_instruction *
 402 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 403                             dst_reg dst, src_reg src0, src_reg src1,
 404                             unsigned elements)
 405 {
 406    static const enum prog_opcode dot_opcodes[] = {
 407       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
 408    };
 409
 410    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 411 }
 412
 413 /**
 414  * Emits Mesa scalar opcodes to produce unique answers across channels.
 415  *
 416  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 417  * channel determines the result across all channels.  So to do a vec4
 418  * of this operation, we want to emit a scalar per source channel used
 419  * to produce dest channels.
 420  */
 421 void
 422 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 423                                 dst_reg dst,
 424                                 src_reg orig_src0, src_reg orig_src1)
 425 {
 426    int i, j;
 427    int done_mask = ~dst.writemask;
 428
 429    /* Mesa RCP is a scalar operation splatting results to all channels,
 430     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 431     * dst channels.
 432     */
 433    for (i = 0; i < 4; i++) {
 434       GLuint this_mask = (1 << i);
 435       ir_to_mesa_instruction *inst;
 436       src_reg src0 = orig_src0;
 437       src_reg src1 = orig_src1;
 438
 439       if (done_mask & this_mask)
 440          continue;
 441
 442       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 443       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 444       for (j = i + 1; j < 4; j++) {
 445          /* If there is another enabled component in the destination that is
 446           * derived from the same inputs, generate its value on this pass as
 447           * well.
 448           */
 449          if (!(done_mask & (1 << j)) &&
 450              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 451              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 452             this_mask |= (1 << j);
 453          }
 454       }
 455       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 456                                    src0_swiz, src0_swiz);
 457       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 458                                   src1_swiz, src1_swiz);
 459
 460       inst = emit(ir, op, dst, src0, src1);
 461       inst->dst.writemask = this_mask;
 462       done_mask |= this_mask;
 463    }
 464 }
 465
 466 void
 467 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 468                                 dst_reg dst, src_reg src0)
 469 {
 470    src_reg undef = undef_src;
 471
 472    undef.swizzle = SWIZZLE_XXXX;
 473
 474    emit_scalar(ir, op, dst, src0, undef);
 475 }
 476
 477 src_reg
 478 ir_to_mesa_visitor::src_reg_for_float(float val)
 479 {
 480    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 481
 482    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 483                                           (const gl_constant_value *)&val, 1, &src.swizzle);
 484
 485    return src;
 486 }
 487
 488 static int
 489 type_size(const struct glsl_type *type)
 490 {
 491    unsigned int i;
 492    int size;
 493
 494    switch (type->base_type) {
 495    case GLSL_TYPE_UINT:
 496    case GLSL_TYPE_INT:
 497    case GLSL_TYPE_FLOAT:
 498    case GLSL_TYPE_BOOL:
 499       if (type->is_matrix()) {
 500          return type->matrix_columns;
 501       } else {
 502          /* Regardless of size of vector, it gets a vec4. This is bad
 503           * packing for things like floats, but otherwise arrays become a
 504           * mess.  Hopefully a later pass over the code can pack scalars
 505           * down if appropriate.
 506           */
 507          return 1;
 508       }
 509       break;
 510    case GLSL_TYPE_DOUBLE:
 511       if (type->is_matrix()) {
 512          if (type->vector_elements > 2)
 513             return type->matrix_columns * 2;
 514          else
 515             return type->matrix_columns;
 516       } else {
 517          if (type->vector_elements > 2)
 518             return 2;
 519          else
 520             return 1;
 521       }
 522       break;
 523    case GLSL_TYPE_ARRAY:
 524       assert(type->length > 0);
 525       return type_size(type->fields.array) * type->length;
 526    case GLSL_TYPE_STRUCT:
 527       size = 0;
 528       for (i = 0; i < type->length; i++) {
 529          size += type_size(type->fields.structure[i].type);
 530       }
 531       return size;
 532    case GLSL_TYPE_SAMPLER:
 533    case GLSL_TYPE_IMAGE:
 534    case GLSL_TYPE_SUBROUTINE:
 535       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 536        * at link time.
 537        */
 538       return 1;
 539    case GLSL_TYPE_ATOMIC_UINT:
 540    case GLSL_TYPE_VOID:
 541    case GLSL_TYPE_ERROR:
 542    case GLSL_TYPE_INTERFACE:
 543       assert(!"Invalid type in type_size");
 544       break;
 545    }
 546
 547    return 0;
 548 }
 549
 550 /**
 551  * In the initial pass of codegen, we assign temporary numbers to
 552  * intermediate results.  (not SSA -- variable assignments will reuse
 553  * storage).  Actual register allocation for the Mesa VM occurs in a
 554  * pass over the Mesa IR later.
 555  */
 556 src_reg
 557 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 558 {
 559    src_reg src;
 560
 561    src.file = PROGRAM_TEMPORARY;
 562    src.index = next_temp;
 563    src.reladdr = NULL;
 564    next_temp += type_size(type);
 565
 566    if (type->is_array() || type->is_record()) {
 567       src.swizzle = SWIZZLE_NOOP;
 568    } else {
 569       src.swizzle = swizzle_for_size(type->vector_elements);
 570    }
 571    src.negate = 0;
 572
 573    return src;
 574 }
 575
 576 variable_storage *
 577 ir_to_mesa_visitor::find_variable_storage(const ir_variable *var)
 578 {
 579    foreach_in_list(variable_storage, entry, &this->variables) {
 580       if (entry->var == var)
 581          return entry;
 582    }
 583
 584    return NULL;
 585 }
 586
 587 void
 588 ir_to_mesa_visitor::visit(ir_variable *ir)
 589 {
 590    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 591       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 592
 593       fp->OriginUpperLeft = ir->data.origin_upper_left;
 594       fp->PixelCenterInteger = ir->data.pixel_center_integer;
 595    }
 596
 597    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
 598       unsigned int i;
 599       const ir_state_slot *const slots = ir->get_state_slots();
 600       assert(slots != NULL);
 601
 602       /* Check if this statevar's setup in the STATE file exactly
 603        * matches how we'll want to reference it as a
 604        * struct/array/whatever.  If not, then we need to move it into
 605        * temporary storage and hope that it'll get copy-propagated
 606        * out.
 607        */
 608       for (i = 0; i < ir->get_num_state_slots(); i++) {
 609          if (slots[i].swizzle != SWIZZLE_XYZW) {
 610             break;
 611          }
 612       }
 613
 614       variable_storage *storage;
 615       dst_reg dst;
 616       if (i == ir->get_num_state_slots()) {
 617          /* We'll set the index later. */
 618          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
 619          this->variables.push_tail(storage);
 620
 621          dst = undef_dst;
 622       } else {
 623          /* The variable_storage constructor allocates slots based on the size
 624           * of the type.  However, this had better match the number of state
 625           * elements that we're going to copy into the new temporary.
 626           */
 627          assert((int) ir->get_num_state_slots() == type_size(ir->type));
 628
 629          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
 630                                                  this->next_temp);
 631          this->variables.push_tail(storage);
 632          this->next_temp += type_size(ir->type);
 633
 634          dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
 635       }
 636
 637
 638       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
 639          int index = _mesa_add_state_reference(this->prog->Parameters,
 640                                                (gl_state_index *)slots[i].tokens);
 641
 642          if (storage->file == PROGRAM_STATE_VAR) {
 643             if (storage->index == -1) {
 644                storage->index = index;
 645             } else {
 646                assert(index == storage->index + (int)i);
 647             }
 648          } else {
 649             src_reg src(PROGRAM_STATE_VAR, index, NULL);
 650             src.swizzle = slots[i].swizzle;
 651             emit(ir, OPCODE_MOV, dst, src);
 652             /* even a float takes up a whole vec4 reg in a struct/array. */
 653             dst.index++;
 654          }
 655       }
 656
 657       if (storage->file == PROGRAM_TEMPORARY &&
 658           dst.index != storage->index + (int) ir->get_num_state_slots()) {
 659          linker_error(this->shader_program,
 660                       "failed to load builtin uniform `%s' "
 661                       "(%d/%d regs loaded)\n",
 662                       ir->name, dst.index - storage->index,
 663                       type_size(ir->type));
 664       }
 665    }
 666 }
 667
 668 void
 669 ir_to_mesa_visitor::visit(ir_loop *ir)
 670 {
 671    emit(NULL, OPCODE_BGNLOOP);
 672
 673    visit_exec_list(&ir->body_instructions, this);
 674
 675    emit(NULL, OPCODE_ENDLOOP);
 676 }
 677
 678 void
 679 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 680 {
 681    switch (ir->mode) {
 682    case ir_loop_jump::jump_break:
 683       emit(NULL, OPCODE_BRK);
 684       break;
 685    case ir_loop_jump::jump_continue:
 686       emit(NULL, OPCODE_CONT);
 687       break;
 688    }
 689 }
 690
 691
 692 void
 693 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 694 {
 695    assert(0);
 696    (void)ir;
 697 }
 698
 699 void
 700 ir_to_mesa_visitor::visit(ir_function *ir)
 701 {
 702    /* Ignore function bodies other than main() -- we shouldn't see calls to
 703     * them since they should all be inlined before we get to ir_to_mesa.
 704     */
 705    if (strcmp(ir->name, "main") == 0) {
 706       const ir_function_signature *sig;
 707       exec_list empty;
 708
 709       sig = ir->matching_signature(NULL, &empty, false);
 710
 711       assert(sig);
 712
 713       foreach_in_list(ir_instruction, ir, &sig->body) {
 714          ir->accept(this);
 715       }
 716    }
 717 }
 718
 719 bool
 720 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 721 {
 722    int nonmul_operand = 1 - mul_operand;
 723    src_reg a, b, c;
 724
 725    ir_expression *expr = ir->operands[mul_operand]->as_expression();
 726    if (!expr || expr->operation != ir_binop_mul)
 727       return false;
 728
 729    expr->operands[0]->accept(this);
 730    a = this->result;
 731    expr->operands[1]->accept(this);
 732    b = this->result;
 733    ir->operands[nonmul_operand]->accept(this);
 734    c = this->result;
 735
 736    this->result = get_temp(ir->type);
 737    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
 738
 739    return true;
 740 }
 741
 742 /**
 743  * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
 744  *
 745  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
 746  * implemented using multiplication, and logical-or is implemented using
 747  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
 748  * As result, the logical expression (a & !b) can be rewritten as:
 749  *
 750  *     - a * !b
 751  *     - a * (1 - b)
 752  *     - (a * 1) - (a * b)
 753  *     - a + -(a * b)
 754  *     - a + (a * -b)
 755  *
 756  * This final expression can be implemented as a single MAD(a, -b, a)
 757  * instruction.
 758  */
 759 bool
 760 ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
 761 {
 762    const int other_operand = 1 - try_operand;
 763    src_reg a, b;
 764
 765    ir_expression *expr = ir->operands[try_operand]->as_expression();
 766    if (!expr || expr->operation != ir_unop_logic_not)
 767       return false;
 768
 769    ir->operands[other_operand]->accept(this);
 770    a = this->result;
 771    expr->operands[0]->accept(this);
 772    b = this->result;
 773
 774    b.negate = ~b.negate;
 775
 776    this->result = get_temp(ir->type);
 777    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
 778
 779    return true;
 780 }
 781
 782 void
 783 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 784                                     src_reg *reg, int *num_reladdr)
 785 {
 786    if (!reg->reladdr)
 787       return;
 788
 789    emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
 790
 791    if (*num_reladdr != 1) {
 792       src_reg temp = get_temp(glsl_type::vec4_type);
 793
 794       emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
 795       *reg = temp;
 796    }
 797
 798    (*num_reladdr)--;
 799 }
 800
 801 void
 802 ir_to_mesa_visitor::emit_swz(ir_expression *ir)
 803 {
 804    /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
 805     * This means that each of the operands is either an immediate value of -1,
 806     * 0, or 1, or is a component from one source register (possibly with
 807     * negation).
 808     */
 809    uint8_t components[4] = { 0 };
 810    bool negate[4] = { false };
 811    ir_variable *var = NULL;
 812
 813    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
 814       ir_rvalue *op = ir->operands[i];
 815
 816       assert(op->type->is_scalar());
 817
 818       while (op != NULL) {
 819          switch (op->ir_type) {
 820          case ir_type_constant: {
 821
 822             assert(op->type->is_scalar());
 823
 824             const ir_constant *const c = op->as_constant();
 825             if (c->is_one()) {
 826                components[i] = SWIZZLE_ONE;
 827             } else if (c->is_zero()) {
 828                components[i] = SWIZZLE_ZERO;
 829             } else if (c->is_negative_one()) {
 830                components[i] = SWIZZLE_ONE;
 831                negate[i] = true;
 832             } else {
 833                assert(!"SWZ constant must be 0.0 or 1.0.");
 834             }
 835
 836             op = NULL;
 837             break;
 838          }
 839
 840          case ir_type_dereference_variable: {
 841             ir_dereference_variable *const deref =
 842                (ir_dereference_variable *) op;
 843
 844             assert((var == NULL) || (deref->var == var));
 845             components[i] = SWIZZLE_X;
 846             var = deref->var;
 847             op = NULL;
 848             break;
 849          }
 850
 851          case ir_type_expression: {
 852             ir_expression *const expr = (ir_expression *) op;
 853
 854             assert(expr->operation == ir_unop_neg);
 855             negate[i] = true;
 856
 857             op = expr->operands[0];
 858             break;
 859          }
 860
 861          case ir_type_swizzle: {
 862             ir_swizzle *const swiz = (ir_swizzle *) op;
 863
 864             components[i] = swiz->mask.x;
 865             op = swiz->val;
 866             break;
 867          }
 868
 869          default:
 870             assert(!"Should not get here.");
 871             return;
 872          }
 873       }
 874    }
 875
 876    assert(var != NULL);
 877
 878    ir_dereference_variable *const deref =
 879       new(mem_ctx) ir_dereference_variable(var);
 880
 881    this->result.file = PROGRAM_UNDEFINED;
 882    deref->accept(this);
 883    if (this->result.file == PROGRAM_UNDEFINED) {
 884       printf("Failed to get tree for expression operand:\n");
 885       deref->print();
 886       printf("\n");
 887       exit(1);
 888    }
 889
 890    src_reg src;
 891
 892    src = this->result;
 893    src.swizzle = MAKE_SWIZZLE4(components[0],
 894                                components[1],
 895                                components[2],
 896                                components[3]);
 897    src.negate = ((unsigned(negate[0]) << 0)
 898                  | (unsigned(negate[1]) << 1)
 899                  | (unsigned(negate[2]) << 2)
 900                  | (unsigned(negate[3]) << 3));
 901
 902    /* Storage for our result.  Ideally for an assignment we'd be using the
 903     * actual storage for the result here, instead.
 904     */
 905    const src_reg result_src = get_temp(ir->type);
 906    dst_reg result_dst = dst_reg(result_src);
 907
 908    /* Limit writes to the channels that will be used by result_src later.
 909     * This does limit this temp's use as a temporary for multi-instruction
 910     * sequences.
 911     */
 912    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 913
 914    emit(ir, OPCODE_SWZ, result_dst, src);
 915    this->result = result_src;
 916 }
 917
 918 void
 919 ir_to_mesa_visitor::visit(ir_expression *ir)
 920 {
 921    unsigned int operand;
 922    src_reg op[ARRAY_SIZE(ir->operands)];
 923    src_reg result_src;
 924    dst_reg result_dst;
 925
 926    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
 927     */
 928    if (ir->operation == ir_binop_add) {
 929       if (try_emit_mad(ir, 1))
 930          return;
 931       if (try_emit_mad(ir, 0))
 932          return;
 933    }
 934
 935    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
 936     */
 937    if (ir->operation == ir_binop_logic_and) {
 938       if (try_emit_mad_for_and_not(ir, 1))
 939          return;
 940       if (try_emit_mad_for_and_not(ir, 0))
 941          return;
 942    }
 943
 944    if (ir->operation == ir_quadop_vector) {
 945       this->emit_swz(ir);
 946       return;
 947    }
 948
 949    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 950       this->result.file = PROGRAM_UNDEFINED;
 951       ir->operands[operand]->accept(this);
 952       if (this->result.file == PROGRAM_UNDEFINED) {
 953          printf("Failed to get tree for expression operand:\n");
 954          ir->operands[operand]->print();
 955          printf("\n");
 956          exit(1);
 957       }
 958       op[operand] = this->result;
 959
 960       /* Matrix expression operands should have been broken down to vector
 961        * operations already.
 962        */
 963       assert(!ir->operands[operand]->type->is_matrix());
 964    }
 965
 966    int vector_elements = ir->operands[0]->type->vector_elements;
 967    if (ir->operands[1]) {
 968       vector_elements = MAX2(vector_elements,
 969                              ir->operands[1]->type->vector_elements);
 970    }
 971
 972    this->result.file = PROGRAM_UNDEFINED;
 973
 974    /* Storage for our result.  Ideally for an assignment we'd be using
 975     * the actual storage for the result here, instead.
 976     */
 977    result_src = get_temp(ir->type);
 978    /* convenience for the emit functions below. */
 979    result_dst = dst_reg(result_src);
 980    /* Limit writes to the channels that will be used by result_src later.
 981     * This does limit this temp's use as a temporary for multi-instruction
 982     * sequences.
 983     */
 984    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 985
 986    switch (ir->operation) {
 987    case ir_unop_logic_not:
 988       /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
 989        * older GPUs implement SEQ using multiple instructions (i915 uses two
 990        * SGE instructions and a MUL instruction).  Since our logic values are
 991        * 0.0 and 1.0, 1-x also implements !x.
 992        */
 993       op[0].negate = ~op[0].negate;
 994       emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
 995       break;
 996    case ir_unop_neg:
 997       op[0].negate = ~op[0].negate;
 998       result_src = op[0];
 999       break;
1000    case ir_unop_abs:
1001       emit(ir, OPCODE_ABS, result_dst, op[0]);
1002       break;
1003    case ir_unop_sign:
1004       emit(ir, OPCODE_SSG, result_dst, op[0]);
1005       break;
1006    case ir_unop_rcp:
1007       emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1008       break;
1009
1010    case ir_unop_exp2:
1011       emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1012       break;
1013    case ir_unop_exp:
1014    case ir_unop_log:
1015       assert(!"not reached: should be handled by ir_explog_to_explog2");
1016       break;
1017    case ir_unop_log2:
1018       emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1019       break;
1020    case ir_unop_sin:
1021       emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1022       break;
1023    case ir_unop_cos:
1024       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1025       break;
1026
1027    case ir_unop_dFdx:
1028       emit(ir, OPCODE_DDX, result_dst, op[0]);
1029       break;
1030    case ir_unop_dFdy:
1031       emit(ir, OPCODE_DDY, result_dst, op[0]);
1032       break;
1033
1034    case ir_unop_saturate: {
1035       ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
1036                                           result_dst, op[0]);
1037       inst->saturate = true;
1038       break;
1039    }
1040    case ir_unop_noise: {
1041       const enum prog_opcode opcode =
1042          prog_opcode(OPCODE_NOISE1
1043                      + (ir->operands[0]->type->vector_elements) - 1);
1044       assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1045
1046       emit(ir, opcode, result_dst, op[0]);
1047       break;
1048    }
1049
1050    case ir_binop_add:
1051       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1052       break;
1053    case ir_binop_sub:
1054       emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1055       break;
1056
1057    case ir_binop_mul:
1058       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1059       break;
1060    case ir_binop_div:
1061       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1062       break;
1063    case ir_binop_mod:
1064       /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
1065       assert(ir->type->is_integer());
1066       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1067       break;
1068
1069    case ir_binop_less:
1070       emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1071       break;
1072    case ir_binop_greater:
1073       emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
1074       break;
1075    case ir_binop_lequal:
1076       emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
1077       break;
1078    case ir_binop_gequal:
1079       emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1080       break;
1081    case ir_binop_equal:
1082       emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1083       break;
1084    case ir_binop_nequal:
1085       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1086       break;
1087    case ir_binop_all_equal:
1088       /* "==" operator producing a scalar boolean. */
1089       if (ir->operands[0]->type->is_vector() ||
1090           ir->operands[1]->type->is_vector()) {
1091          src_reg temp = get_temp(glsl_type::vec4_type);
1092          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1093
1094          /* After the dot-product, the value will be an integer on the
1095           * range [0,4].  Zero becomes 1.0, and positive values become zero.
1096           */
1097          emit_dp(ir, result_dst, temp, temp, vector_elements);
1098
1099          /* Negating the result of the dot-product gives values on the range
1100           * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
1101           * achieved using SGE.
1102           */
1103          src_reg sge_src = result_src;
1104          sge_src.negate = ~sge_src.negate;
1105          emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
1106       } else {
1107          emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1108       }
1109       break;
1110    case ir_binop_any_nequal:
1111       /* "!=" operator producing a scalar boolean. */
1112       if (ir->operands[0]->type->is_vector() ||
1113           ir->operands[1]->type->is_vector()) {
1114          src_reg temp = get_temp(glsl_type::vec4_type);
1115          if (ir->operands[0]->type->is_boolean() &&
1116              ir->operands[1]->as_constant() &&
1117              ir->operands[1]->as_constant()->is_zero()) {
1118             temp = op[0];
1119          } else {
1120             emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1121          }
1122
1123          /* After the dot-product, the value will be an integer on the
1124           * range [0,4].  Zero stays zero, and positive values become 1.0.
1125           */
1126          ir_to_mesa_instruction *const dp =
1127             emit_dp(ir, result_dst, temp, temp, vector_elements);
1128          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1129             /* The clamping to [0,1] can be done for free in the fragment
1130              * shader with a saturate.
1131              */
1132             dp->saturate = true;
1133          } else {
1134             /* Negating the result of the dot-product gives values on the range
1135              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1136              * achieved using SLT.
1137              */
1138             src_reg slt_src = result_src;
1139             slt_src.negate = ~slt_src.negate;
1140             emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1141          }
1142       } else {
1143          emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1144       }
1145       break;
1146
1147    case ir_binop_logic_xor:
1148       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1149       break;
1150
1151    case ir_binop_logic_or: {
1152       /* After the addition, the value will be an integer on the
1153        * range [0,2].  Zero stays zero, and positive values become 1.0.
1154        */
1155       ir_to_mesa_instruction *add =
1156          emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1157       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1158          /* The clamping to [0,1] can be done for free in the fragment
1159           * shader with a saturate.
1160           */
1161          add->saturate = true;
1162       } else {
1163          /* Negating the result of the addition gives values on the range
1164           * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1165           * is achieved using SLT.
1166           */
1167          src_reg slt_src = result_src;
1168          slt_src.negate = ~slt_src.negate;
1169          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1170       }
1171       break;
1172    }
1173
1174    case ir_binop_logic_and:
1175       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1176       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1177       break;
1178
1179    case ir_binop_dot:
1180       assert(ir->operands[0]->type->is_vector());
1181       assert(ir->operands[0]->type == ir->operands[1]->type);
1182       emit_dp(ir, result_dst, op[0], op[1],
1183               ir->operands[0]->type->vector_elements);
1184       break;
1185
1186    case ir_unop_sqrt:
1187       /* sqrt(x) = x * rsq(x). */
1188       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1189       emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1190       /* For incoming channels <= 0, set the result to 0. */
1191       op[0].negate = ~op[0].negate;
1192       emit(ir, OPCODE_CMP, result_dst,
1193                           op[0], result_src, src_reg_for_float(0.0));
1194       break;
1195    case ir_unop_rsq:
1196       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1197       break;
1198    case ir_unop_i2f:
1199    case ir_unop_u2f:
1200    case ir_unop_b2f:
1201    case ir_unop_b2i:
1202    case ir_unop_i2u:
1203    case ir_unop_u2i:
1204       /* Mesa IR lacks types, ints are stored as truncated floats. */
1205       result_src = op[0];
1206       break;
1207    case ir_unop_f2i:
1208    case ir_unop_f2u:
1209       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1210       break;
1211    case ir_unop_f2b:
1212    case ir_unop_i2b:
1213       emit(ir, OPCODE_SNE, result_dst,
1214                           op[0], src_reg_for_float(0.0));
1215       break;
1216    case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
1217    case ir_unop_bitcast_f2u:
1218    case ir_unop_bitcast_i2f:
1219    case ir_unop_bitcast_u2f:
1220       break;
1221    case ir_unop_trunc:
1222       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1223       break;
1224    case ir_unop_ceil:
1225       op[0].negate = ~op[0].negate;
1226       emit(ir, OPCODE_FLR, result_dst, op[0]);
1227       result_src.negate = ~result_src.negate;
1228       break;
1229    case ir_unop_floor:
1230       emit(ir, OPCODE_FLR, result_dst, op[0]);
1231       break;
1232    case ir_unop_fract:
1233       emit(ir, OPCODE_FRC, result_dst, op[0]);
1234       break;
1235    case ir_unop_pack_snorm_2x16:
1236    case ir_unop_pack_snorm_4x8:
1237    case ir_unop_pack_unorm_2x16:
1238    case ir_unop_pack_unorm_4x8:
1239    case ir_unop_pack_half_2x16:
1240    case ir_unop_pack_double_2x32:
1241    case ir_unop_unpack_snorm_2x16:
1242    case ir_unop_unpack_snorm_4x8:
1243    case ir_unop_unpack_unorm_2x16:
1244    case ir_unop_unpack_unorm_4x8:
1245    case ir_unop_unpack_half_2x16:
1246    case ir_unop_unpack_double_2x32:
1247    case ir_unop_bitfield_reverse:
1248    case ir_unop_bit_count:
1249    case ir_unop_find_msb:
1250    case ir_unop_find_lsb:
1251    case ir_unop_d2f:
1252    case ir_unop_f2d:
1253    case ir_unop_d2i:
1254    case ir_unop_i2d:
1255    case ir_unop_d2u:
1256    case ir_unop_u2d:
1257    case ir_unop_d2b:
1258    case ir_unop_frexp_sig:
1259    case ir_unop_frexp_exp:
1260       assert(!"not supported");
1261       break;
1262    case ir_binop_min:
1263       emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1264       break;
1265    case ir_binop_max:
1266       emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1267       break;
1268    case ir_binop_pow:
1269       emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1270       break;
1271
1272       /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
1273        * hardware backends have no way to avoid Mesa IR generation
1274        * even if they don't use it, we need to emit "something" and
1275        * continue.
1276        */
1277    case ir_binop_lshift:
1278    case ir_binop_rshift:
1279    case ir_binop_bit_and:
1280    case ir_binop_bit_xor:
1281    case ir_binop_bit_or:
1282       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1283       break;
1284
1285    case ir_unop_bit_not:
1286    case ir_unop_round_even:
1287       emit(ir, OPCODE_MOV, result_dst, op[0]);
1288       break;
1289
1290    case ir_binop_ubo_load:
1291       assert(!"not supported");
1292       break;
1293
1294    case ir_triop_lrp:
1295       /* ir_triop_lrp operands are (x, y, a) while
1296        * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program.
1297        */
1298       emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
1299       break;
1300
1301    case ir_binop_vector_extract:
1302    case ir_triop_fma:
1303    case ir_triop_bitfield_extract:
1304    case ir_triop_vector_insert:
1305    case ir_quadop_bitfield_insert:
1306    case ir_binop_ldexp:
1307    case ir_triop_csel:
1308    case ir_binop_carry:
1309    case ir_binop_borrow:
1310    case ir_binop_imul_high:
1311    case ir_unop_interpolate_at_centroid:
1312    case ir_binop_interpolate_at_offset:
1313    case ir_binop_interpolate_at_sample:
1314    case ir_unop_dFdx_coarse:
1315    case ir_unop_dFdx_fine:
1316    case ir_unop_dFdy_coarse:
1317    case ir_unop_dFdy_fine:
1318    case ir_unop_subroutine_to_int:
1319    case ir_unop_get_buffer_size:
1320       assert(!"not supported");
1321       break;
1322
1323    case ir_unop_ssbo_unsized_array_length:
1324    case ir_quadop_vector:
1325       /* This operation should have already been handled.
1326        */
1327       assert(!"Should not get here.");
1328       break;
1329    }
1330
1331    this->result = result_src;
1332 }
1333
1334
1335 void
1336 ir_to_mesa_visitor::visit(ir_swizzle *ir)
1337 {
1338    src_reg src;
1339    int i;
1340    int swizzle[4];
1341
1342    /* Note that this is only swizzles in expressions, not those on the left
1343     * hand side of an assignment, which do write masking.  See ir_assignment
1344     * for that.
1345     */
1346
1347    ir->val->accept(this);
1348    src = this->result;
1349    assert(src.file != PROGRAM_UNDEFINED);
1350    assert(ir->type->vector_elements > 0);
1351
1352    for (i = 0; i < 4; i++) {
1353       if (i < ir->type->vector_elements) {
1354          switch (i) {
1355          case 0:
1356             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1357             break;
1358          case 1:
1359             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1360             break;
1361          case 2:
1362             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1363             break;
1364          case 3:
1365             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1366             break;
1367          }
1368       } else {
1369          /* If the type is smaller than a vec4, replicate the last
1370           * channel out.
1371           */
1372          swizzle[i] = swizzle[ir->type->vector_elements - 1];
1373       }
1374    }
1375
1376    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1377
1378    this->result = src;
1379 }
1380
1381 void
1382 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1383 {
1384    variable_storage *entry = find_variable_storage(ir->var);
1385    ir_variable *var = ir->var;
1386
1387    if (!entry) {
1388       switch (var->data.mode) {
1389       case ir_var_uniform:
1390          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1391                                                var->data.param_index);
1392          this->variables.push_tail(entry);
1393          break;
1394       case ir_var_shader_in:
1395          /* The linker assigns locations for varyings and attributes,
1396           * including deprecated builtins (like gl_Color),
1397           * user-assigned generic attributes (glBindVertexLocation),
1398           * and user-defined varyings.
1399           */
1400          assert(var->data.location != -1);
1401          entry = new(mem_ctx) variable_storage(var,
1402                                                PROGRAM_INPUT,
1403                                                var->data.location);
1404          break;
1405       case ir_var_shader_out:
1406          assert(var->data.location != -1);
1407          entry = new(mem_ctx) variable_storage(var,
1408                                                PROGRAM_OUTPUT,
1409                                                var->data.location);
1410          break;
1411       case ir_var_system_value:
1412          entry = new(mem_ctx) variable_storage(var,
1413                                                PROGRAM_SYSTEM_VALUE,
1414                                                var->data.location);
1415          break;
1416       case ir_var_auto:
1417       case ir_var_temporary:
1418          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1419                                                this->next_temp);
1420          this->variables.push_tail(entry);
1421
1422          next_temp += type_size(var->type);
1423          break;
1424       }
1425
1426       if (!entry) {
1427          printf("Failed to make storage for %s\n", var->name);
1428          exit(1);
1429       }
1430    }
1431
1432    this->result = src_reg(entry->file, entry->index, var->type);
1433 }
1434
1435 void
1436 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1437 {
1438    ir_constant *index;
1439    src_reg src;
1440    int element_size = type_size(ir->type);
1441
1442    index = ir->array_index->constant_expression_value();
1443
1444    ir->array->accept(this);
1445    src = this->result;
1446
1447    if (index) {
1448       src.index += index->value.i[0] * element_size;
1449    } else {
1450       /* Variable index array dereference.  It eats the "vec4" of the
1451        * base of the array and an index that offsets the Mesa register
1452        * index.
1453        */
1454       ir->array_index->accept(this);
1455
1456       src_reg index_reg;
1457
1458       if (element_size == 1) {
1459          index_reg = this->result;
1460       } else {
1461          index_reg = get_temp(glsl_type::float_type);
1462
1463          emit(ir, OPCODE_MUL, dst_reg(index_reg),
1464               this->result, src_reg_for_float(element_size));
1465       }
1466
1467       /* If there was already a relative address register involved, add the
1468        * new and the old together to get the new offset.
1469        */
1470       if (src.reladdr != NULL)  {
1471          src_reg accum_reg = get_temp(glsl_type::float_type);
1472
1473          emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1474               index_reg, *src.reladdr);
1475
1476          index_reg = accum_reg;
1477       }
1478
1479       src.reladdr = ralloc(mem_ctx, src_reg);
1480       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1481    }
1482
1483    /* If the type is smaller than a vec4, replicate the last channel out. */
1484    if (ir->type->is_scalar() || ir->type->is_vector())
1485       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1486    else
1487       src.swizzle = SWIZZLE_NOOP;
1488
1489    this->result = src;
1490 }
1491
1492 void
1493 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1494 {
1495    unsigned int i;
1496    const glsl_type *struct_type = ir->record->type;
1497    int offset = 0;
1498
1499    ir->record->accept(this);
1500
1501    for (i = 0; i < struct_type->length; i++) {
1502       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1503          break;
1504       offset += type_size(struct_type->fields.structure[i].type);
1505    }
1506
1507    /* If the type is smaller than a vec4, replicate the last channel out. */
1508    if (ir->type->is_scalar() || ir->type->is_vector())
1509       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1510    else
1511       this->result.swizzle = SWIZZLE_NOOP;
1512
1513    this->result.index += offset;
1514 }
1515
1516 /**
1517  * We want to be careful in assignment setup to hit the actual storage
1518  * instead of potentially using a temporary like we might with the
1519  * ir_dereference handler.
1520  */
1521 static dst_reg
1522 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1523 {
1524    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1525     * access of a vector, it must be separated into a series conditional moves
1526     * before reaching this point (see ir_vec_index_to_cond_assign).
1527     */
1528    assert(ir->as_dereference());
1529    ir_dereference_array *deref_array = ir->as_dereference_array();
1530    if (deref_array) {
1531       assert(!deref_array->array->type->is_vector());
1532    }
1533
1534    /* Use the rvalue deref handler for the most part.  We'll ignore
1535     * swizzles in it and write swizzles using writemask, though.
1536     */
1537    ir->accept(v);
1538    return dst_reg(v->result);
1539 }
1540
1541 /* Calculate the sampler index and also calculate the base uniform location
1542  * for struct members.
1543  */
1544 static void
1545 calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref,
1546                      unsigned *offset, unsigned *array_elements,
1547                      unsigned *location)
1548 {
1549    if (deref->ir_type == ir_type_dereference_variable)
1550       return;
1551
1552    switch (deref->ir_type) {
1553    case ir_type_dereference_array: {
1554       ir_dereference_array *deref_arr = deref->as_dereference_array();
1555       ir_constant *array_index =
1556          deref_arr->array_index->constant_expression_value();
1557
1558       if (!array_index) {
1559          /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
1560           * while GLSL 1.30 requires that the array indices be
1561           * constant integer expressions.  We don't expect any driver
1562           * to actually work with a really variable array index, so
1563           * all that would work would be an unrolled loop counter that ends
1564           * up being constant above.
1565           */
1566          ralloc_strcat(&prog->InfoLog,
1567                        "warning: Variable sampler array index unsupported.\n"
1568                        "This feature of the language was removed in GLSL 1.20 "
1569                        "and is unlikely to be supported for 1.10 in Mesa.\n");
1570       } else {
1571          *offset += array_index->value.u[0] * *array_elements;
1572       }
1573
1574       *array_elements *= deref_arr->array->type->length;
1575
1576       calc_sampler_offsets(prog, deref_arr->array->as_dereference(),
1577                            offset, array_elements, location);
1578       break;
1579    }
1580
1581    case ir_type_dereference_record: {
1582       ir_dereference_record *deref_record = deref->as_dereference_record();
1583       unsigned field_index =
1584          deref_record->record->type->field_index(deref_record->field);
1585       *location +=
1586          deref_record->record->type->record_location_offset(field_index);
1587       calc_sampler_offsets(prog, deref_record->record->as_dereference(),
1588                            offset, array_elements, location);
1589       break;
1590    }
1591
1592    default:
1593       unreachable("Invalid deref type");
1594       break;
1595    }
1596 }
1597
1598 static int
1599 get_sampler_uniform_value(class ir_dereference *sampler,
1600                           struct gl_shader_program *shader_program,
1601                           const struct gl_program *prog)
1602 {
1603    GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
1604    ir_variable *var = sampler->variable_referenced();
1605    unsigned location = var->data.location;
1606    unsigned array_elements = 1;
1607    unsigned offset = 0;
1608
1609    calc_sampler_offsets(shader_program, sampler, &offset, &array_elements,
1610                         &location);
1611
1612    assert(shader_program->UniformStorage[location].opaque[shader].active);
1613    return shader_program->UniformStorage[location].opaque[shader].index +
1614           offset;
1615 }
1616
1617 /**
1618  * Process the condition of a conditional assignment
1619  *
1620  * Examines the condition of a conditional assignment to generate the optimal
1621  * first operand of a \c CMP instruction.  If the condition is a relational
1622  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1623  * used as the source for the \c CMP instruction.  Otherwise the comparison
1624  * is processed to a boolean result, and the boolean result is used as the
1625  * operand to the CMP instruction.
1626  */
1627 bool
1628 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1629 {
1630    ir_rvalue *src_ir = ir;
1631    bool negate = true;
1632    bool switch_order = false;
1633
1634    ir_expression *const expr = ir->as_expression();
1635    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1636       bool zero_on_left = false;
1637
1638       if (expr->operands[0]->is_zero()) {
1639          src_ir = expr->operands[1];
1640          zero_on_left = true;
1641       } else if (expr->operands[1]->is_zero()) {
1642          src_ir = expr->operands[0];
1643          zero_on_left = false;
1644       }
1645
1646       /*      a is -  0  +            -  0  +
1647        * (a <  0)  T  F  F  ( a < 0)  T  F  F
1648        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1649        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1650        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1651        * (a >  0)  F  F  T  (-a < 0)  F  F  T
1652        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
1653        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1654        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1655        *
1656        * Note that exchanging the order of 0 and 'a' in the comparison simply
1657        * means that the value of 'a' should be negated.
1658        */
1659       if (src_ir != ir) {
1660          switch (expr->operation) {
1661          case ir_binop_less:
1662             switch_order = false;
1663             negate = zero_on_left;
1664             break;
1665
1666          case ir_binop_greater:
1667             switch_order = false;
1668             negate = !zero_on_left;
1669             break;
1670
1671          case ir_binop_lequal:
1672             switch_order = true;
1673             negate = !zero_on_left;
1674             break;
1675
1676          case ir_binop_gequal:
1677             switch_order = true;
1678             negate = zero_on_left;
1679             break;
1680
1681          default:
1682             /* This isn't the right kind of comparison afterall, so make sure
1683              * the whole condition is visited.
1684              */
1685             src_ir = ir;
1686             break;
1687          }
1688       }
1689    }
1690
1691    src_ir->accept(this);
1692
1693    /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1694     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1695     * choose which value OPCODE_CMP produces without an extra instruction
1696     * computing the condition.
1697     */
1698    if (negate)
1699       this->result.negate = ~this->result.negate;
1700
1701    return switch_order;
1702 }
1703
1704 void
1705 ir_to_mesa_visitor::visit(ir_assignment *ir)
1706 {
1707    dst_reg l;
1708    src_reg r;
1709    int i;
1710
1711    ir->rhs->accept(this);
1712    r = this->result;
1713
1714    l = get_assignment_lhs(ir->lhs, this);
1715
1716    /* FINISHME: This should really set to the correct maximal writemask for each
1717     * FINISHME: component written (in the loops below).  This case can only
1718     * FINISHME: occur for matrices, arrays, and structures.
1719     */
1720    if (ir->write_mask == 0) {
1721       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1722       l.writemask = WRITEMASK_XYZW;
1723    } else if (ir->lhs->type->is_scalar()) {
1724       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1725        * FINISHME: W component of fragment shader output zero, work correctly.
1726        */
1727       l.writemask = WRITEMASK_XYZW;
1728    } else {
1729       int swizzles[4];
1730       int first_enabled_chan = 0;
1731       int rhs_chan = 0;
1732
1733       assert(ir->lhs->type->is_vector());
1734       l.writemask = ir->write_mask;
1735
1736       for (int i = 0; i < 4; i++) {
1737          if (l.writemask & (1 << i)) {
1738             first_enabled_chan = GET_SWZ(r.swizzle, i);
1739             break;
1740          }
1741       }
1742
1743       /* Swizzle a small RHS vector into the channels being written.
1744        *
1745        * glsl ir treats write_mask as dictating how many channels are
1746        * present on the RHS while Mesa IR treats write_mask as just
1747        * showing which channels of the vec4 RHS get written.
1748        */
1749       for (int i = 0; i < 4; i++) {
1750          if (l.writemask & (1 << i))
1751             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1752          else
1753             swizzles[i] = first_enabled_chan;
1754       }
1755       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1756                                 swizzles[2], swizzles[3]);
1757    }
1758
1759    assert(l.file != PROGRAM_UNDEFINED);
1760    assert(r.file != PROGRAM_UNDEFINED);
1761
1762    if (ir->condition) {
1763       const bool switch_order = this->process_move_condition(ir->condition);
1764       src_reg condition = this->result;
1765
1766       for (i = 0; i < type_size(ir->lhs->type); i++) {
1767          if (switch_order) {
1768             emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1769          } else {
1770             emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1771          }
1772
1773          l.index++;
1774          r.index++;
1775       }
1776    } else {
1777       for (i = 0; i < type_size(ir->lhs->type); i++) {
1778          emit(ir, OPCODE_MOV, l, r);
1779          l.index++;
1780          r.index++;
1781       }
1782    }
1783 }
1784
1785
1786 void
1787 ir_to_mesa_visitor::visit(ir_constant *ir)
1788 {
1789    src_reg src;
1790    GLfloat stack_vals[4] = { 0 };
1791    GLfloat *values = stack_vals;
1792    unsigned int i;
1793
1794    /* Unfortunately, 4 floats is all we can get into
1795     * _mesa_add_unnamed_constant.  So, make a temp to store an
1796     * aggregate constant and move each constant value into it.  If we
1797     * get lucky, copy propagation will eliminate the extra moves.
1798     */
1799
1800    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1801       src_reg temp_base = get_temp(ir->type);
1802       dst_reg temp = dst_reg(temp_base);
1803
1804       foreach_in_list(ir_constant, field_value, &ir->components) {
1805          int size = type_size(field_value->type);
1806
1807          assert(size > 0);
1808
1809          field_value->accept(this);
1810          src = this->result;
1811
1812          for (i = 0; i < (unsigned int)size; i++) {
1813             emit(ir, OPCODE_MOV, temp, src);
1814
1815             src.index++;
1816             temp.index++;
1817          }
1818       }
1819       this->result = temp_base;
1820       return;
1821    }
1822
1823    if (ir->type->is_array()) {
1824       src_reg temp_base = get_temp(ir->type);
1825       dst_reg temp = dst_reg(temp_base);
1826       int size = type_size(ir->type->fields.array);
1827
1828       assert(size > 0);
1829
1830       for (i = 0; i < ir->type->length; i++) {
1831          ir->array_elements[i]->accept(this);
1832          src = this->result;
1833          for (int j = 0; j < size; j++) {
1834             emit(ir, OPCODE_MOV, temp, src);
1835
1836             src.index++;
1837             temp.index++;
1838          }
1839       }
1840       this->result = temp_base;
1841       return;
1842    }
1843
1844    if (ir->type->is_matrix()) {
1845       src_reg mat = get_temp(ir->type);
1846       dst_reg mat_column = dst_reg(mat);
1847
1848       for (i = 0; i < ir->type->matrix_columns; i++) {
1849          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1850          values = &ir->value.f[i * ir->type->vector_elements];
1851
1852          src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1853          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1854                                                 (gl_constant_value *) values,
1855                                                 ir->type->vector_elements,
1856                                                 &src.swizzle);
1857          emit(ir, OPCODE_MOV, mat_column, src);
1858
1859          mat_column.index++;
1860       }
1861
1862       this->result = mat;
1863       return;
1864    }
1865
1866    src.file = PROGRAM_CONSTANT;
1867    switch (ir->type->base_type) {
1868    case GLSL_TYPE_FLOAT:
1869       values = &ir->value.f[0];
1870       break;
1871    case GLSL_TYPE_UINT:
1872       for (i = 0; i < ir->type->vector_elements; i++) {
1873          values[i] = ir->value.u[i];
1874       }
1875       break;
1876    case GLSL_TYPE_INT:
1877       for (i = 0; i < ir->type->vector_elements; i++) {
1878          values[i] = ir->value.i[i];
1879       }
1880       break;
1881    case GLSL_TYPE_BOOL:
1882       for (i = 0; i < ir->type->vector_elements; i++) {
1883          values[i] = ir->value.b[i];
1884       }
1885       break;
1886    default:
1887       assert(!"Non-float/uint/int/bool constant");
1888    }
1889
1890    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1891    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1892                                                    (gl_constant_value *) values,
1893                                                    ir->type->vector_elements,
1894                                                    &this->result.swizzle);
1895 }
1896
1897 void
1898 ir_to_mesa_visitor::visit(ir_call *)
1899 {
1900    assert(!"ir_to_mesa: All function calls should have been inlined by now.");
1901 }
1902
1903 void
1904 ir_to_mesa_visitor::visit(ir_texture *ir)
1905 {
1906    src_reg result_src, coord, lod_info, projector, dx, dy;
1907    dst_reg result_dst, coord_dst;
1908    ir_to_mesa_instruction *inst = NULL;
1909    prog_opcode opcode = OPCODE_NOP;
1910
1911    if (ir->op == ir_txs)
1912       this->result = src_reg_for_float(0.0);
1913    else
1914       ir->coordinate->accept(this);
1915
1916    /* Put our coords in a temp.  We'll need to modify them for shadow,
1917     * projection, or LOD, so the only case we'd use it as is is if
1918     * we're doing plain old texturing.  Mesa IR optimization should
1919     * handle cleaning up our mess in that case.
1920     */
1921    coord = get_temp(glsl_type::vec4_type);
1922    coord_dst = dst_reg(coord);
1923    emit(ir, OPCODE_MOV, coord_dst, this->result);
1924
1925    if (ir->projector) {
1926       ir->projector->accept(this);
1927       projector = this->result;
1928    }
1929
1930    /* Storage for our result.  Ideally for an assignment we'd be using
1931     * the actual storage for the result here, instead.
1932     */
1933    result_src = get_temp(glsl_type::vec4_type);
1934    result_dst = dst_reg(result_src);
1935
1936    switch (ir->op) {
1937    case ir_tex:
1938    case ir_txs:
1939       opcode = OPCODE_TEX;
1940       break;
1941    case ir_txb:
1942       opcode = OPCODE_TXB;
1943       ir->lod_info.bias->accept(this);
1944       lod_info = this->result;
1945       break;
1946    case ir_txf:
1947       /* Pretend to be TXL so the sampler, coordinate, lod are available */
1948    case ir_txl:
1949       opcode = OPCODE_TXL;
1950       ir->lod_info.lod->accept(this);
1951       lod_info = this->result;
1952       break;
1953    case ir_txd:
1954       opcode = OPCODE_TXD;
1955       ir->lod_info.grad.dPdx->accept(this);
1956       dx = this->result;
1957       ir->lod_info.grad.dPdy->accept(this);
1958       dy = this->result;
1959       break;
1960    case ir_txf_ms:
1961       assert(!"Unexpected ir_txf_ms opcode");
1962       break;
1963    case ir_lod:
1964       assert(!"Unexpected ir_lod opcode");
1965       break;
1966    case ir_tg4:
1967       assert(!"Unexpected ir_tg4 opcode");
1968       break;
1969    case ir_query_levels:
1970       assert(!"Unexpected ir_query_levels opcode");
1971       break;
1972    case ir_samples_identical:
1973       unreachable("Unexpected ir_samples_identical opcode");
1974    case ir_texture_samples:
1975       unreachable("Unexpected ir_texture_samples opcode");
1976    }
1977
1978    const glsl_type *sampler_type = ir->sampler->type;
1979
1980    if (ir->projector) {
1981       if (opcode == OPCODE_TEX) {
1982          /* Slot the projector in as the last component of the coord. */
1983          coord_dst.writemask = WRITEMASK_W;
1984          emit(ir, OPCODE_MOV, coord_dst, projector);
1985          coord_dst.writemask = WRITEMASK_XYZW;
1986          opcode = OPCODE_TXP;
1987       } else {
1988          src_reg coord_w = coord;
1989          coord_w.swizzle = SWIZZLE_WWWW;
1990
1991          /* For the other TEX opcodes there's no projective version
1992           * since the last slot is taken up by lod info.  Do the
1993           * projective divide now.
1994           */
1995          coord_dst.writemask = WRITEMASK_W;
1996          emit(ir, OPCODE_RCP, coord_dst, projector);
1997
1998          /* In the case where we have to project the coordinates "by hand,"
1999           * the shadow comparitor value must also be projected.
2000           */
2001          src_reg tmp_src = coord;
2002          if (ir->shadow_comparitor) {
2003             /* Slot the shadow value in as the second to last component of the
2004              * coord.
2005              */
2006             ir->shadow_comparitor->accept(this);
2007
2008             tmp_src = get_temp(glsl_type::vec4_type);
2009             dst_reg tmp_dst = dst_reg(tmp_src);
2010
2011             /* Projective division not allowed for array samplers. */
2012             assert(!sampler_type->sampler_array);
2013
2014             tmp_dst.writemask = WRITEMASK_Z;
2015             emit(ir, OPCODE_MOV, tmp_dst, this->result);
2016
2017             tmp_dst.writemask = WRITEMASK_XY;
2018             emit(ir, OPCODE_MOV, tmp_dst, coord);
2019          }
2020
2021          coord_dst.writemask = WRITEMASK_XYZ;
2022          emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2023
2024          coord_dst.writemask = WRITEMASK_XYZW;
2025          coord.swizzle = SWIZZLE_XYZW;
2026       }
2027    }
2028
2029    /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2030     * comparitor was put in the correct place (and projected) by the code,
2031     * above, that handles by-hand projection.
2032     */
2033    if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
2034       /* Slot the shadow value in as the second to last component of the
2035        * coord.
2036        */
2037       ir->shadow_comparitor->accept(this);
2038
2039       /* XXX This will need to be updated for cubemap array samplers. */
2040       if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2041           sampler_type->sampler_array) {
2042          coord_dst.writemask = WRITEMASK_W;
2043       } else {
2044          coord_dst.writemask = WRITEMASK_Z;
2045       }
2046
2047       emit(ir, OPCODE_MOV, coord_dst, this->result);
2048       coord_dst.writemask = WRITEMASK_XYZW;
2049    }
2050
2051    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2052       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2053       coord_dst.writemask = WRITEMASK_W;
2054       emit(ir, OPCODE_MOV, coord_dst, lod_info);
2055       coord_dst.writemask = WRITEMASK_XYZW;
2056    }
2057
2058    if (opcode == OPCODE_TXD)
2059       inst = emit(ir, opcode, result_dst, coord, dx, dy);
2060    else
2061       inst = emit(ir, opcode, result_dst, coord);
2062
2063    if (ir->shadow_comparitor)
2064       inst->tex_shadow = GL_TRUE;
2065
2066    inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program,
2067                                              prog);
2068
2069    switch (sampler_type->sampler_dimensionality) {
2070    case GLSL_SAMPLER_DIM_1D:
2071       inst->tex_target = (sampler_type->sampler_array)
2072          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2073       break;
2074    case GLSL_SAMPLER_DIM_2D:
2075       inst->tex_target = (sampler_type->sampler_array)
2076          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2077       break;
2078    case GLSL_SAMPLER_DIM_3D:
2079       inst->tex_target = TEXTURE_3D_INDEX;
2080       break;
2081    case GLSL_SAMPLER_DIM_CUBE:
2082       inst->tex_target = TEXTURE_CUBE_INDEX;
2083       break;
2084    case GLSL_SAMPLER_DIM_RECT:
2085       inst->tex_target = TEXTURE_RECT_INDEX;
2086       break;
2087    case GLSL_SAMPLER_DIM_BUF:
2088       assert(!"FINISHME: Implement ARB_texture_buffer_object");
2089       break;
2090    case GLSL_SAMPLER_DIM_EXTERNAL:
2091       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2092       break;
2093    default:
2094       assert(!"Should not get here.");
2095    }
2096
2097    this->result = result_src;
2098 }
2099
2100 void
2101 ir_to_mesa_visitor::visit(ir_return *ir)
2102 {
2103    /* Non-void functions should have been inlined.  We may still emit RETs
2104     * from main() unless the EmitNoMainReturn option is set.
2105     */
2106    assert(!ir->get_value());
2107    emit(ir, OPCODE_RET);
2108 }
2109
2110 void
2111 ir_to_mesa_visitor::visit(ir_discard *ir)
2112 {
2113    if (ir->condition) {
2114       ir->condition->accept(this);
2115       this->result.negate = ~this->result.negate;
2116       emit(ir, OPCODE_KIL, undef_dst, this->result);
2117    } else {
2118       emit(ir, OPCODE_KIL_NV);
2119    }
2120 }
2121
2122 void
2123 ir_to_mesa_visitor::visit(ir_if *ir)
2124 {
2125    ir_to_mesa_instruction *cond_inst, *if_inst;
2126    ir_to_mesa_instruction *prev_inst;
2127
2128    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2129
2130    ir->condition->accept(this);
2131    assert(this->result.file != PROGRAM_UNDEFINED);
2132
2133    if (this->options->EmitCondCodes) {
2134       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2135
2136       /* See if we actually generated any instruction for generating
2137        * the condition.  If not, then cook up a move to a temp so we
2138        * have something to set cond_update on.
2139        */
2140       if (cond_inst == prev_inst) {
2141          src_reg temp = get_temp(glsl_type::bool_type);
2142          cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
2143       }
2144       cond_inst->cond_update = GL_TRUE;
2145
2146       if_inst = emit(ir->condition, OPCODE_IF);
2147       if_inst->dst.cond_mask = COND_NE;
2148    } else {
2149       if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2150    }
2151
2152    this->instructions.push_tail(if_inst);
2153
2154    visit_exec_list(&ir->then_instructions, this);
2155
2156    if (!ir->else_instructions.is_empty()) {
2157       emit(ir->condition, OPCODE_ELSE);
2158       visit_exec_list(&ir->else_instructions, this);
2159    }
2160
2161    emit(ir->condition, OPCODE_ENDIF);
2162 }
2163
2164 void
2165 ir_to_mesa_visitor::visit(ir_emit_vertex *)
2166 {
2167    assert(!"Geometry shaders not supported.");
2168 }
2169
2170 void
2171 ir_to_mesa_visitor::visit(ir_end_primitive *)
2172 {
2173    assert(!"Geometry shaders not supported.");
2174 }
2175
2176 void
2177 ir_to_mesa_visitor::visit(ir_barrier *)
2178 {
2179    unreachable("GLSL barrier() not supported.");
2180 }
2181
2182 ir_to_mesa_visitor::ir_to_mesa_visitor()
2183 {
2184    result.file = PROGRAM_UNDEFINED;
2185    next_temp = 1;
2186    next_signature_id = 1;
2187    current_function = NULL;
2188    mem_ctx = ralloc_context(NULL);
2189 }
2190
2191 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2192 {
2193    ralloc_free(mem_ctx);
2194 }
2195
2196 static struct prog_src_register
2197 mesa_src_reg_from_ir_src_reg(src_reg reg)
2198 {
2199    struct prog_src_register mesa_reg;
2200
2201    mesa_reg.File = reg.file;
2202    assert(reg.index < (1 << INST_INDEX_BITS));
2203    mesa_reg.Index = reg.index;
2204    mesa_reg.Swizzle = reg.swizzle;
2205    mesa_reg.RelAddr = reg.reladdr != NULL;
2206    mesa_reg.Negate = reg.negate;
2207    mesa_reg.Abs = 0;
2208    mesa_reg.HasIndex2 = GL_FALSE;
2209    mesa_reg.RelAddr2 = 0;
2210    mesa_reg.Index2 = 0;
2211
2212    return mesa_reg;
2213 }
2214
2215 static void
2216 set_branchtargets(ir_to_mesa_visitor *v,
2217                   struct prog_instruction *mesa_instructions,
2218                   int num_instructions)
2219 {
2220    int if_count = 0, loop_count = 0;
2221    int *if_stack, *loop_stack;
2222    int if_stack_pos = 0, loop_stack_pos = 0;
2223    int i, j;
2224
2225    for (i = 0; i < num_instructions; i++) {
2226       switch (mesa_instructions[i].Opcode) {
2227       case OPCODE_IF:
2228          if_count++;
2229          break;
2230       case OPCODE_BGNLOOP:
2231          loop_count++;
2232          break;
2233       case OPCODE_BRK:
2234       case OPCODE_CONT:
2235          mesa_instructions[i].BranchTarget = -1;
2236          break;
2237       default:
2238          break;
2239       }
2240    }
2241
2242    if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2243    loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2244
2245    for (i = 0; i < num_instructions; i++) {
2246       switch (mesa_instructions[i].Opcode) {
2247       case OPCODE_IF:
2248          if_stack[if_stack_pos] = i;
2249          if_stack_pos++;
2250          break;
2251       case OPCODE_ELSE:
2252          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2253          if_stack[if_stack_pos - 1] = i;
2254          break;
2255       case OPCODE_ENDIF:
2256          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2257          if_stack_pos--;
2258          break;
2259       case OPCODE_BGNLOOP:
2260          loop_stack[loop_stack_pos] = i;
2261          loop_stack_pos++;
2262          break;
2263       case OPCODE_ENDLOOP:
2264          loop_stack_pos--;
2265          /* Rewrite any breaks/conts at this nesting level (haven't
2266           * already had a BranchTarget assigned) to point to the end
2267           * of the loop.
2268           */
2269          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2270             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2271                 mesa_instructions[j].Opcode == OPCODE_CONT) {
2272                if (mesa_instructions[j].BranchTarget == -1) {
2273                   mesa_instructions[j].BranchTarget = i;
2274                }
2275             }
2276          }
2277          /* The loop ends point at each other. */
2278          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2279          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2280          break;
2281       case OPCODE_CAL:
2282          foreach_in_list(function_entry, entry, &v->function_signatures) {
2283             if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2284                mesa_instructions[i].BranchTarget = entry->inst;
2285                break;
2286             }
2287          }
2288          break;
2289       default:
2290          break;
2291       }
2292    }
2293 }
2294
2295 static void
2296 print_program(struct prog_instruction *mesa_instructions,
2297               ir_instruction **mesa_instruction_annotation,
2298               int num_instructions)
2299 {
2300    ir_instruction *last_ir = NULL;
2301    int i;
2302    int indent = 0;
2303
2304    for (i = 0; i < num_instructions; i++) {
2305       struct prog_instruction *mesa_inst = mesa_instructions + i;
2306       ir_instruction *ir = mesa_instruction_annotation[i];
2307
2308       fprintf(stdout, "%3d: ", i);
2309
2310       if (last_ir != ir && ir) {
2311          int j;
2312
2313          for (j = 0; j < indent; j++) {
2314             fprintf(stdout, " ");
2315          }
2316          ir->print();
2317          printf("\n");
2318          last_ir = ir;
2319
2320          fprintf(stdout, "     "); /* line number spacing. */
2321       }
2322
2323       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2324                                             PROG_PRINT_DEBUG, NULL);
2325    }
2326 }
2327
2328 namespace {
2329
2330 class add_uniform_to_shader : public program_resource_visitor {
2331 public:
2332    add_uniform_to_shader(struct gl_shader_program *shader_program,
2333                          struct gl_program_parameter_list *params,
2334                          gl_shader_stage shader_type)
2335       : shader_program(shader_program), params(params), idx(-1),
2336         shader_type(shader_type)
2337    {
2338       /* empty */
2339    }
2340
2341    void process(ir_variable *var)
2342    {
2343       this->idx = -1;
2344       this->program_resource_visitor::process(var);
2345       var->data.param_index = this->idx;
2346    }
2347
2348 private:
2349    virtual void visit_field(const glsl_type *type, const char *name,
2350                             bool row_major);
2351
2352    struct gl_shader_program *shader_program;
2353    struct gl_program_parameter_list *params;
2354    int idx;
2355    gl_shader_stage shader_type;
2356 };
2357
2358 } /* anonymous namespace */
2359
2360 void
2361 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
2362                                    bool row_major)
2363 {
2364    unsigned int size;
2365
2366    (void) row_major;
2367
2368    /* atomics don't get real storage */
2369    if (type->contains_atomic())
2370       return;
2371
2372    if (type->is_vector() || type->is_scalar()) {
2373       size = type->vector_elements;
2374       if (type->is_double())
2375          size *= 2;
2376    } else {
2377       size = type_size(type) * 4;
2378    }
2379
2380    gl_register_file file;
2381    if (type->without_array()->is_sampler()) {
2382       file = PROGRAM_SAMPLER;
2383    } else {
2384       file = PROGRAM_UNIFORM;
2385    }
2386
2387    int index = _mesa_lookup_parameter_index(params, -1, name);
2388    if (index < 0) {
2389       index = _mesa_add_parameter(params, file, name, size, type->gl_type,
2390                                   NULL, NULL);
2391
2392       /* Sampler uniform values are stored in prog->SamplerUnits,
2393        * and the entry in that array is selected by this index we
2394        * store in ParameterValues[].
2395        */
2396       if (file == PROGRAM_SAMPLER) {
2397          unsigned location;
2398          const bool found =
2399             this->shader_program->UniformHash->get(location,
2400                                                    params->Parameters[index].Name);
2401          assert(found);
2402
2403          if (!found)
2404             return;
2405
2406          struct gl_uniform_storage *storage =
2407             &this->shader_program->UniformStorage[location];
2408
2409          assert(storage->type->is_sampler() &&
2410                 storage->opaque[shader_type].active);
2411
2412          for (unsigned int j = 0; j < size / 4; j++)
2413             params->ParameterValues[index + j][0].f =
2414                storage->opaque[shader_type].index + j;
2415       }
2416    }
2417
2418    /* The first part of the uniform that's processed determines the base
2419     * location of the whole uniform (for structures).
2420     */
2421    if (this->idx < 0)
2422       this->idx = index;
2423 }
2424
2425 /**
2426  * Generate the program parameters list for the user uniforms in a shader
2427  *
2428  * \param shader_program Linked shader program.  This is only used to
2429  *                       emit possible link errors to the info log.
2430  * \param sh             Shader whose uniforms are to be processed.
2431  * \param params         Parameter list to be filled in.
2432  */
2433 void
2434 _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
2435                                             *shader_program,
2436                                             struct gl_shader *sh,
2437                                             struct gl_program_parameter_list
2438                                             *params)
2439 {
2440    add_uniform_to_shader add(shader_program, params, sh->Stage);
2441
2442    foreach_in_list(ir_instruction, node, sh->ir) {
2443       ir_variable *var = node->as_variable();
2444
2445       if ((var == NULL) || (var->data.mode != ir_var_uniform)
2446           || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0))
2447          continue;
2448
2449       add.process(var);
2450    }
2451 }
2452
2453 void
2454 _mesa_associate_uniform_storage(struct gl_context *ctx,
2455                                 struct gl_shader_program *shader_program,
2456                                 struct gl_program_parameter_list *params)
2457 {
2458    /* After adding each uniform to the parameter list, connect the storage for
2459     * the parameter with the tracking structure used by the API for the
2460     * uniform.
2461     */
2462    unsigned last_location = unsigned(~0);
2463    for (unsigned i = 0; i < params->NumParameters; i++) {
2464       if (params->Parameters[i].Type != PROGRAM_UNIFORM)
2465          continue;
2466
2467       unsigned location;
2468       const bool found =
2469          shader_program->UniformHash->get(location, params->Parameters[i].Name);
2470       assert(found);
2471
2472       if (!found)
2473          continue;
2474
2475       struct gl_uniform_storage *storage =
2476          &shader_program->UniformStorage[location];
2477
2478       /* Do not associate any uniform storage to built-in uniforms */
2479       if (storage->builtin)
2480          continue;
2481
2482       if (location != last_location) {
2483          enum gl_uniform_driver_format format = uniform_native;
2484
2485          unsigned columns = 0;
2486          int dmul = 4 * sizeof(float);
2487          switch (storage->type->base_type) {
2488          case GLSL_TYPE_UINT:
2489             assert(ctx->Const.NativeIntegers);
2490             format = uniform_native;
2491             columns = 1;
2492             break;
2493          case GLSL_TYPE_INT:
2494             format =
2495                (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
2496             columns = 1;
2497             break;
2498
2499          case GLSL_TYPE_DOUBLE:
2500             if (storage->type->vector_elements > 2)
2501                dmul *= 2;
2502             /* fallthrough */
2503          case GLSL_TYPE_FLOAT:
2504             format = uniform_native;
2505             columns = storage->type->matrix_columns;
2506             break;
2507          case GLSL_TYPE_BOOL:
2508             format = uniform_native;
2509             columns = 1;
2510             break;
2511          case GLSL_TYPE_SAMPLER:
2512          case GLSL_TYPE_IMAGE:
2513          case GLSL_TYPE_SUBROUTINE:
2514             format = uniform_native;
2515             columns = 1;
2516             break;
2517          case GLSL_TYPE_ATOMIC_UINT:
2518          case GLSL_TYPE_ARRAY:
2519          case GLSL_TYPE_VOID:
2520          case GLSL_TYPE_STRUCT:
2521          case GLSL_TYPE_ERROR:
2522          case GLSL_TYPE_INTERFACE:
2523             assert(!"Should not get here.");
2524             break;
2525          }
2526
2527          _mesa_uniform_attach_driver_storage(storage,
2528                                              dmul * columns,
2529                                              dmul,
2530                                              format,
2531                                              &params->ParameterValues[i]);
2532
2533          /* After attaching the driver's storage to the uniform, propagate any
2534           * data from the linker's backing store.  This will cause values from
2535           * initializers in the source code to be copied over.
2536           */
2537          _mesa_propagate_uniforms_to_driver_storage(storage,
2538                                                     0,
2539                                                     MAX2(1, storage->array_elements));
2540
2541          last_location = location;
2542       }
2543    }
2544 }
2545
2546 /*
2547  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2548  * channels for copy propagation and updates following instructions to
2549  * use the original versions.
2550  *
2551  * The ir_to_mesa_visitor lazily produces code assuming that this pass
2552  * will occur.  As an example, a TXP production before this pass:
2553  *
2554  * 0: MOV TEMP[1], INPUT[4].xyyy;
2555  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2556  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2557  *
2558  * and after:
2559  *
2560  * 0: MOV TEMP[1], INPUT[4].xyyy;
2561  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2562  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2563  *
2564  * which allows for dead code elimination on TEMP[1]'s writes.
2565  */
2566 void
2567 ir_to_mesa_visitor::copy_propagate(void)
2568 {
2569    ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2570                                                     ir_to_mesa_instruction *,
2571                                                     this->next_temp * 4);
2572    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2573    int level = 0;
2574
2575    foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) {
2576       assert(inst->dst.file != PROGRAM_TEMPORARY
2577              || inst->dst.index < this->next_temp);
2578
2579       /* First, do any copy propagation possible into the src regs. */
2580       for (int r = 0; r < 3; r++) {
2581          ir_to_mesa_instruction *first = NULL;
2582          bool good = true;
2583          int acp_base = inst->src[r].index * 4;
2584
2585          if (inst->src[r].file != PROGRAM_TEMPORARY ||
2586              inst->src[r].reladdr)
2587             continue;
2588
2589          /* See if we can find entries in the ACP consisting of MOVs
2590           * from the same src register for all the swizzled channels
2591           * of this src register reference.
2592           */
2593          for (int i = 0; i < 4; i++) {
2594             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2595             ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2596
2597             if (!copy_chan) {
2598                good = false;
2599                break;
2600             }
2601
2602             assert(acp_level[acp_base + src_chan] <= level);
2603
2604             if (!first) {
2605                first = copy_chan;
2606             } else {
2607                if (first->src[0].file != copy_chan->src[0].file ||
2608                    first->src[0].index != copy_chan->src[0].index) {
2609                   good = false;
2610                   break;
2611                }
2612             }
2613          }
2614
2615          if (good) {
2616             /* We've now validated that we can copy-propagate to
2617              * replace this src register reference.  Do it.
2618              */
2619             inst->src[r].file = first->src[0].file;
2620             inst->src[r].index = first->src[0].index;
2621
2622             int swizzle = 0;
2623             for (int i = 0; i < 4; i++) {
2624                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2625                ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2626                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2627                            (3 * i));
2628             }
2629             inst->src[r].swizzle = swizzle;
2630          }
2631       }
2632
2633       switch (inst->op) {
2634       case OPCODE_BGNLOOP:
2635       case OPCODE_ENDLOOP:
2636          /* End of a basic block, clear the ACP entirely. */
2637          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2638          break;
2639
2640       case OPCODE_IF:
2641          ++level;
2642          break;
2643
2644       case OPCODE_ENDIF:
2645       case OPCODE_ELSE:
2646          /* Clear all channels written inside the block from the ACP, but
2647           * leaving those that were not touched.
2648           */
2649          for (int r = 0; r < this->next_temp; r++) {
2650             for (int c = 0; c < 4; c++) {
2651                if (!acp[4 * r + c])
2652                   continue;
2653
2654                if (acp_level[4 * r + c] >= level)
2655                   acp[4 * r + c] = NULL;
2656             }
2657          }
2658          if (inst->op == OPCODE_ENDIF)
2659             --level;
2660          break;
2661
2662       default:
2663          /* Continuing the block, clear any written channels from
2664           * the ACP.
2665           */
2666          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2667             /* Any temporary might be written, so no copy propagation
2668              * across this instruction.
2669              */
2670             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2671          } else if (inst->dst.file == PROGRAM_OUTPUT &&
2672                     inst->dst.reladdr) {
2673             /* Any output might be written, so no copy propagation
2674              * from outputs across this instruction.
2675              */
2676             for (int r = 0; r < this->next_temp; r++) {
2677                for (int c = 0; c < 4; c++) {
2678                   if (!acp[4 * r + c])
2679                      continue;
2680
2681                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2682                      acp[4 * r + c] = NULL;
2683                }
2684             }
2685          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2686                     inst->dst.file == PROGRAM_OUTPUT) {
2687             /* Clear where it's used as dst. */
2688             if (inst->dst.file == PROGRAM_TEMPORARY) {
2689                for (int c = 0; c < 4; c++) {
2690                   if (inst->dst.writemask & (1 << c)) {
2691                      acp[4 * inst->dst.index + c] = NULL;
2692                   }
2693                }
2694             }
2695
2696             /* Clear where it's used as src. */
2697             for (int r = 0; r < this->next_temp; r++) {
2698                for (int c = 0; c < 4; c++) {
2699                   if (!acp[4 * r + c])
2700                      continue;
2701
2702                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2703
2704                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2705                       acp[4 * r + c]->src[0].index == inst->dst.index &&
2706                       inst->dst.writemask & (1 << src_chan))
2707                   {
2708                      acp[4 * r + c] = NULL;
2709                   }
2710                }
2711             }
2712          }
2713          break;
2714       }
2715
2716       /* If this is a copy, add it to the ACP. */
2717       if (inst->op == OPCODE_MOV &&
2718           inst->dst.file == PROGRAM_TEMPORARY &&
2719           !(inst->dst.file == inst->src[0].file &&
2720             inst->dst.index == inst->src[0].index) &&
2721           !inst->dst.reladdr &&
2722           !inst->saturate &&
2723           !inst->src[0].reladdr &&
2724           !inst->src[0].negate) {
2725          for (int i = 0; i < 4; i++) {
2726             if (inst->dst.writemask & (1 << i)) {
2727                acp[4 * inst->dst.index + i] = inst;
2728                acp_level[4 * inst->dst.index + i] = level;
2729             }
2730          }
2731       }
2732    }
2733
2734    ralloc_free(acp_level);
2735    ralloc_free(acp);
2736 }
2737
2738
2739 /**
2740  * Convert a shader's GLSL IR into a Mesa gl_program.
2741  */
2742 static struct gl_program *
2743 get_mesa_program(struct gl_context *ctx,
2744                  struct gl_shader_program *shader_program,
2745                  struct gl_shader *shader)
2746 {
2747    ir_to_mesa_visitor v;
2748    struct prog_instruction *mesa_instructions, *mesa_inst;
2749    ir_instruction **mesa_instruction_annotation;
2750    int i;
2751    struct gl_program *prog;
2752    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
2753    const char *target_string = _mesa_shader_stage_to_string(shader->Stage);
2754    struct gl_shader_compiler_options *options =
2755          &ctx->Const.ShaderCompilerOptions[shader->Stage];
2756
2757    validate_ir_tree(shader->ir);
2758
2759    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2760    if (!prog)
2761       return NULL;
2762    prog->Parameters = _mesa_new_parameter_list();
2763    v.ctx = ctx;
2764    v.prog = prog;
2765    v.shader_program = shader_program;
2766    v.options = options;
2767
2768    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
2769                                                prog->Parameters);
2770
2771    /* Emit Mesa IR for main(). */
2772    visit_exec_list(shader->ir, &v);
2773    v.emit(NULL, OPCODE_END);
2774
2775    prog->NumTemporaries = v.next_temp;
2776
2777    unsigned num_instructions = v.instructions.length();
2778
2779    mesa_instructions =
2780       (struct prog_instruction *)calloc(num_instructions,
2781                                         sizeof(*mesa_instructions));
2782    mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
2783                                               num_instructions);
2784
2785    v.copy_propagate();
2786
2787    /* Convert ir_mesa_instructions into prog_instructions.
2788     */
2789    mesa_inst = mesa_instructions;
2790    i = 0;
2791    foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) {
2792       mesa_inst->Opcode = inst->op;
2793       mesa_inst->CondUpdate = inst->cond_update;
2794       if (inst->saturate)
2795          mesa_inst->Saturate = GL_TRUE;
2796       mesa_inst->DstReg.File = inst->dst.file;
2797       mesa_inst->DstReg.Index = inst->dst.index;
2798       mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
2799       mesa_inst->DstReg.WriteMask = inst->dst.writemask;
2800       mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
2801       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
2802       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
2803       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
2804       mesa_inst->TexSrcUnit = inst->sampler;
2805       mesa_inst->TexSrcTarget = inst->tex_target;
2806       mesa_inst->TexShadow = inst->tex_shadow;
2807       mesa_instruction_annotation[i] = inst->ir;
2808
2809       /* Set IndirectRegisterFiles. */
2810       if (mesa_inst->DstReg.RelAddr)
2811          prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
2812
2813       /* Update program's bitmask of indirectly accessed register files */
2814       for (unsigned src = 0; src < 3; src++)
2815          if (mesa_inst->SrcReg[src].RelAddr)
2816             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
2817
2818       switch (mesa_inst->Opcode) {
2819       case OPCODE_IF:
2820          if (options->MaxIfDepth == 0) {
2821             linker_warning(shader_program,
2822                            "Couldn't flatten if-statement.  "
2823                            "This will likely result in software "
2824                            "rasterization.\n");
2825          }
2826          break;
2827       case OPCODE_BGNLOOP:
2828          if (options->EmitNoLoops) {
2829             linker_warning(shader_program,
2830                            "Couldn't unroll loop.  "
2831                            "This will likely result in software "
2832                            "rasterization.\n");
2833          }
2834          break;
2835       case OPCODE_CONT:
2836          if (options->EmitNoCont) {
2837             linker_warning(shader_program,
2838                            "Couldn't lower continue-statement.  "
2839                            "This will likely result in software "
2840                            "rasterization.\n");
2841          }
2842          break;
2843       case OPCODE_ARL:
2844          prog->NumAddressRegs = 1;
2845          break;
2846       default:
2847          break;
2848       }
2849
2850       mesa_inst++;
2851       i++;
2852
2853       if (!shader_program->LinkStatus)
2854          break;
2855    }
2856
2857    if (!shader_program->LinkStatus) {
2858       goto fail_exit;
2859    }
2860
2861    set_branchtargets(&v, mesa_instructions, num_instructions);
2862
2863    if (ctx->_Shader->Flags & GLSL_DUMP) {
2864       fprintf(stderr, "\n");
2865       fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string,
2866               shader_program->Name);
2867       _mesa_print_ir(stderr, shader->ir, NULL);
2868       fprintf(stderr, "\n");
2869       fprintf(stderr, "\n");
2870       fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string,
2871               shader_program->Name);
2872       print_program(mesa_instructions, mesa_instruction_annotation,
2873                     num_instructions);
2874       fflush(stderr);
2875    }
2876
2877    prog->Instructions = mesa_instructions;
2878    prog->NumInstructions = num_instructions;
2879
2880    /* Setting this to NULL prevents a possible double free in the fail_exit
2881     * path (far below).
2882     */
2883    mesa_instructions = NULL;
2884
2885    do_set_program_inouts(shader->ir, prog, shader->Stage);
2886
2887    prog->SamplersUsed = shader->active_samplers;
2888    prog->ShadowSamplers = shader->shadow_samplers;
2889    _mesa_update_shader_textures_used(shader_program, prog);
2890
2891    /* Set the gl_FragDepth layout. */
2892    if (target == GL_FRAGMENT_PROGRAM_ARB) {
2893       struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
2894       fp->FragDepthLayout = shader_program->FragDepthLayout;
2895    }
2896
2897    _mesa_reference_program(ctx, &shader->Program, prog);
2898
2899    if ((ctx->_Shader->Flags & GLSL_NO_OPT) == 0) {
2900       _mesa_optimize_program(ctx, prog);
2901    }
2902
2903    /* This has to be done last.  Any operation that can cause
2904     * prog->ParameterValues to get reallocated (e.g., anything that adds a
2905     * program constant) has to happen before creating this linkage.
2906     */
2907    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
2908    if (!shader_program->LinkStatus) {
2909       goto fail_exit;
2910    }
2911
2912    return prog;
2913
2914 fail_exit:
2915    free(mesa_instructions);
2916    _mesa_reference_program(ctx, &shader->Program, NULL);
2917    return NULL;
2918 }
2919
2920 extern "C" {
2921
2922 /**
2923  * Link a shader.
2924  * Called via ctx->Driver.LinkShader()
2925  * This actually involves converting GLSL IR into Mesa gl_programs with
2926  * code lowering and other optimizations.
2927  */
2928 GLboolean
2929 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
2930 {
2931    assert(prog->LinkStatus);
2932
2933    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2934       if (prog->_LinkedShaders[i] == NULL)
2935          continue;
2936
2937       bool progress;
2938       exec_list *ir = prog->_LinkedShaders[i]->ir;
2939       const struct gl_shader_compiler_options *options =
2940             &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage];
2941
2942       do {
2943          progress = false;
2944
2945          /* Lowering */
2946          do_mat_op_to_vec(ir);
2947          lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
2948                                  | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
2949                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
2950
2951          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
2952
2953          progress = do_common_optimization(ir, true, true,
2954                                            options, ctx->Const.NativeIntegers)
2955            || progress;
2956
2957          progress = lower_quadop_vector(ir, true) || progress;
2958
2959          if (options->MaxIfDepth == 0)
2960             progress = lower_discard(ir) || progress;
2961
2962          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
2963
2964          if (options->EmitNoNoise)
2965             progress = lower_noise(ir) || progress;
2966
2967          /* If there are forms of indirect addressing that the driver
2968           * cannot handle, perform the lowering pass.
2969           */
2970          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
2971              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
2972            progress =
2973              lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
2974                                                  options->EmitNoIndirectInput,
2975                                                  options->EmitNoIndirectOutput,
2976                                                  options->EmitNoIndirectTemp,
2977                                                  options->EmitNoIndirectUniform)
2978              || progress;
2979
2980          progress = do_vec_index_to_cond_assign(ir) || progress;
2981          progress = lower_vector_insert(ir, true) || progress;
2982       } while (progress);
2983
2984       validate_ir_tree(ir);
2985    }
2986
2987    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2988       struct gl_program *linked_prog;
2989
2990       if (prog->_LinkedShaders[i] == NULL)
2991          continue;
2992
2993       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
2994
2995       if (linked_prog) {
2996          _mesa_copy_linked_program_data((gl_shader_stage) i, prog, linked_prog);
2997
2998          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
2999                                  linked_prog);
3000          if (!ctx->Driver.ProgramStringNotify(ctx,
3001                                               _mesa_shader_stage_to_program(i),
3002                                               linked_prog)) {
3003             return GL_FALSE;
3004          }
3005       }
3006
3007       _mesa_reference_program(ctx, &linked_prog, NULL);
3008    }
3009
3010    return prog->LinkStatus;
3011 }
3012
3013 /**
3014  * Link a GLSL shader program.  Called via glLinkProgram().
3015  */
3016 void
3017 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3018 {
3019    unsigned int i;
3020
3021    _mesa_clear_shader_program_data(prog);
3022
3023    prog->LinkStatus = GL_TRUE;
3024
3025    for (i = 0; i < prog->NumShaders; i++) {
3026       if (!prog->Shaders[i]->CompileStatus) {
3027          linker_error(prog, "linking with uncompiled shader");
3028       }
3029    }
3030
3031    if (prog->LinkStatus) {
3032       link_shaders(ctx, prog);
3033    }
3034
3035    if (prog->LinkStatus) {
3036       if (!ctx->Driver.LinkShader(ctx, prog)) {
3037          prog->LinkStatus = GL_FALSE;
3038       } else {
3039          build_program_resource_list(prog);
3040       }
3041    }
3042
3043    if (ctx->_Shader->Flags & GLSL_DUMP) {
3044       if (!prog->LinkStatus) {
3045          fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name);
3046       }
3047
3048       if (prog->InfoLog && prog->InfoLog[0] != 0) {
3049          fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name);
3050          fprintf(stderr, "%s\n", prog->InfoLog);
3051       }
3052    }
3053 }
3054
3055 } /* extern "C" */