src/mesa/program/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translate GLSL IR to Mesa's gl_program representation.
  30  */
  31
  32 #include <stdio.h>
  33 #include "main/compiler.h"
  34 #include "main/macros.h"
  35 #include "main/mtypes.h"
  36 #include "main/shaderapi.h"
  37 #include "main/shaderobj.h"
  38 #include "main/uniforms.h"
  39 #include "compiler/glsl/ast.h"
  40 #include "compiler/glsl/ir.h"
  41 #include "compiler/glsl/ir_expression_flattening.h"
  42 #include "compiler/glsl/ir_visitor.h"
  43 #include "compiler/glsl/ir_optimization.h"
  44 #include "compiler/glsl/ir_uniform.h"
  45 #include "compiler/glsl/glsl_parser_extras.h"
  46 #include "compiler/glsl_types.h"
  47 #include "compiler/glsl/linker.h"
  48 #include "compiler/glsl/program.h"
  49 #include "program/hash_table.h"
  50 #include "program/prog_instruction.h"
  51 #include "program/prog_optimize.h"
  52 #include "program/prog_print.h"
  53 #include "program/program.h"
  54 #include "program/prog_parameter.h"
  55
  56
  57 static int swizzle_for_size(int size);
  58
  59 namespace {
  60
  61 class src_reg;
  62 class dst_reg;
  63
  64 /**
  65  * This struct is a corresponding struct to Mesa prog_src_register, with
  66  * wider fields.
  67  */
  68 class src_reg {
  69 public:
  70    src_reg(gl_register_file file, int index, const glsl_type *type)
  71    {
  72       this->file = file;
  73       this->index = index;
  74       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  75          this->swizzle = swizzle_for_size(type->vector_elements);
  76       else
  77          this->swizzle = SWIZZLE_XYZW;
  78       this->negate = 0;
  79       this->reladdr = NULL;
  80    }
  81
  82    src_reg()
  83    {
  84       this->file = PROGRAM_UNDEFINED;
  85       this->index = 0;
  86       this->swizzle = 0;
  87       this->negate = 0;
  88       this->reladdr = NULL;
  89    }
  90
  91    explicit src_reg(dst_reg reg);
  92
  93    gl_register_file file; /**< PROGRAM_* from Mesa */
  94    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  95    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  96    int negate; /**< NEGATE_XYZW mask from mesa */
  97    /** Register index should be offset by the integer in this reg. */
  98    src_reg *reladdr;
  99 };
 100
 101 class dst_reg {
 102 public:
 103    dst_reg(gl_register_file file, int writemask)
 104    {
 105       this->file = file;
 106       this->index = 0;
 107       this->writemask = writemask;
 108       this->reladdr = NULL;
 109    }
 110
 111    dst_reg()
 112    {
 113       this->file = PROGRAM_UNDEFINED;
 114       this->index = 0;
 115       this->writemask = 0;
 116       this->reladdr = NULL;
 117    }
 118
 119    explicit dst_reg(src_reg reg);
 120
 121    gl_register_file file; /**< PROGRAM_* from Mesa */
 122    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 123    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
 124    /** Register index should be offset by the integer in this reg. */
 125    src_reg *reladdr;
 126 };
 127
 128 } /* anonymous namespace */
 129
 130 src_reg::src_reg(dst_reg reg)
 131 {
 132    this->file = reg.file;
 133    this->index = reg.index;
 134    this->swizzle = SWIZZLE_XYZW;
 135    this->negate = 0;
 136    this->reladdr = reg.reladdr;
 137 }
 138
 139 dst_reg::dst_reg(src_reg reg)
 140 {
 141    this->file = reg.file;
 142    this->index = reg.index;
 143    this->writemask = WRITEMASK_XYZW;
 144    this->reladdr = reg.reladdr;
 145 }
 146
 147 namespace {
 148
 149 class ir_to_mesa_instruction : public exec_node {
 150 public:
 151    DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction)
 152
 153    enum prog_opcode op;
 154    dst_reg dst;
 155    src_reg src[3];
 156    /** Pointer to the ir source this tree came from for debugging */
 157    ir_instruction *ir;
 158    bool saturate;
 159    int sampler; /**< sampler index */
 160    int tex_target; /**< One of TEXTURE_*_INDEX */
 161    GLboolean tex_shadow;
 162 };
 163
 164 class variable_storage : public exec_node {
 165 public:
 166    variable_storage(ir_variable *var, gl_register_file file, int index)
 167       : file(file), index(index), var(var)
 168    {
 169       /* empty */
 170    }
 171
 172    gl_register_file file;
 173    int index;
 174    ir_variable *var; /* variable that maps to this, if any */
 175 };
 176
 177 class function_entry : public exec_node {
 178 public:
 179    ir_function_signature *sig;
 180
 181    /**
 182     * identifier of this function signature used by the program.
 183     *
 184     * At the point that Mesa instructions for function calls are
 185     * generated, we don't know the address of the first instruction of
 186     * the function body.  So we make the BranchTarget that is called a
 187     * small integer and rewrite them during set_branchtargets().
 188     */
 189    int sig_id;
 190
 191    /**
 192     * Pointer to first instruction of the function body.
 193     *
 194     * Set during function body emits after main() is processed.
 195     */
 196    ir_to_mesa_instruction *bgn_inst;
 197
 198    /**
 199     * Index of the first instruction of the function body in actual
 200     * Mesa IR.
 201     *
 202     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
 203     */
 204    int inst;
 205
 206    /** Storage for the return value. */
 207    src_reg return_reg;
 208 };
 209
 210 class ir_to_mesa_visitor : public ir_visitor {
 211 public:
 212    ir_to_mesa_visitor();
 213    ~ir_to_mesa_visitor();
 214
 215    function_entry *current_function;
 216
 217    struct gl_context *ctx;
 218    struct gl_program *prog;
 219    struct gl_shader_program *shader_program;
 220    struct gl_shader_compiler_options *options;
 221
 222    int next_temp;
 223
 224    variable_storage *find_variable_storage(const ir_variable *var);
 225
 226    src_reg get_temp(const glsl_type *type);
 227    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
 228
 229    src_reg src_reg_for_float(float val);
 230
 231    /**
 232     * \name Visit methods
 233     *
 234     * As typical for the visitor pattern, there must be one \c visit method for
 235     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 236     * the hierarchy should not have \c visit methods.
 237     */
 238    /*@{*/
 239    virtual void visit(ir_variable *);
 240    virtual void visit(ir_loop *);
 241    virtual void visit(ir_loop_jump *);
 242    virtual void visit(ir_function_signature *);
 243    virtual void visit(ir_function *);
 244    virtual void visit(ir_expression *);
 245    virtual void visit(ir_swizzle *);
 246    virtual void visit(ir_dereference_variable  *);
 247    virtual void visit(ir_dereference_array *);
 248    virtual void visit(ir_dereference_record *);
 249    virtual void visit(ir_assignment *);
 250    virtual void visit(ir_constant *);
 251    virtual void visit(ir_call *);
 252    virtual void visit(ir_return *);
 253    virtual void visit(ir_discard *);
 254    virtual void visit(ir_texture *);
 255    virtual void visit(ir_if *);
 256    virtual void visit(ir_emit_vertex *);
 257    virtual void visit(ir_end_primitive *);
 258    virtual void visit(ir_barrier *);
 259    /*@}*/
 260
 261    src_reg result;
 262
 263    /** List of variable_storage */
 264    exec_list variables;
 265
 266    /** List of function_entry */
 267    exec_list function_signatures;
 268    int next_signature_id;
 269
 270    /** List of ir_to_mesa_instruction */
 271    exec_list instructions;
 272
 273    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
 274
 275    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 276                                 dst_reg dst, src_reg src0);
 277
 278    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 279                                 dst_reg dst, src_reg src0, src_reg src1);
 280
 281    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
 282                                 dst_reg dst,
 283                                 src_reg src0, src_reg src1, src_reg src2);
 284
 285    /**
 286     * Emit the correct dot-product instruction for the type of arguments
 287     */
 288    ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
 289                                     dst_reg dst,
 290                                     src_reg src0,
 291                                     src_reg src1,
 292                                     unsigned elements);
 293
 294    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 295                     dst_reg dst, src_reg src0);
 296
 297    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 298                     dst_reg dst, src_reg src0, src_reg src1);
 299
 300    bool try_emit_mad(ir_expression *ir,
 301                           int mul_operand);
 302    bool try_emit_mad_for_and_not(ir_expression *ir,
 303                                  int mul_operand);
 304
 305    void emit_swz(ir_expression *ir);
 306
 307    bool process_move_condition(ir_rvalue *ir);
 308
 309    void copy_propagate(void);
 310
 311    void *mem_ctx;
 312 };
 313
 314 } /* anonymous namespace */
 315
 316 static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
 317
 318 static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
 319
 320 static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 321
 322 static int
 323 swizzle_for_size(int size)
 324 {
 325    static const int size_swizzles[4] = {
 326       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 327       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 328       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 329       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 330    };
 331
 332    assert((size >= 1) && (size <= 4));
 333    return size_swizzles[size - 1];
 334 }
 335
 336 ir_to_mesa_instruction *
 337 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 338                          dst_reg dst,
 339                          src_reg src0, src_reg src1, src_reg src2)
 340 {
 341    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 342    int num_reladdr = 0;
 343
 344    /* If we have to do relative addressing, we want to load the ARL
 345     * reg directly for one of the regs, and preload the other reladdr
 346     * sources into temps.
 347     */
 348    num_reladdr += dst.reladdr != NULL;
 349    num_reladdr += src0.reladdr != NULL;
 350    num_reladdr += src1.reladdr != NULL;
 351    num_reladdr += src2.reladdr != NULL;
 352
 353    reladdr_to_temp(ir, &src2, &num_reladdr);
 354    reladdr_to_temp(ir, &src1, &num_reladdr);
 355    reladdr_to_temp(ir, &src0, &num_reladdr);
 356
 357    if (dst.reladdr) {
 358       emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
 359       num_reladdr--;
 360    }
 361    assert(num_reladdr == 0);
 362
 363    inst->op = op;
 364    inst->dst = dst;
 365    inst->src[0] = src0;
 366    inst->src[1] = src1;
 367    inst->src[2] = src2;
 368    inst->ir = ir;
 369
 370    this->instructions.push_tail(inst);
 371
 372    return inst;
 373 }
 374
 375
 376 ir_to_mesa_instruction *
 377 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 378                          dst_reg dst, src_reg src0, src_reg src1)
 379 {
 380    return emit(ir, op, dst, src0, src1, undef_src);
 381 }
 382
 383 ir_to_mesa_instruction *
 384 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 385                          dst_reg dst, src_reg src0)
 386 {
 387    assert(dst.writemask != 0);
 388    return emit(ir, op, dst, src0, undef_src, undef_src);
 389 }
 390
 391 ir_to_mesa_instruction *
 392 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
 393 {
 394    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 395 }
 396
 397 ir_to_mesa_instruction *
 398 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 399                             dst_reg dst, src_reg src0, src_reg src1,
 400                             unsigned elements)
 401 {
 402    static const enum prog_opcode dot_opcodes[] = {
 403       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
 404    };
 405
 406    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 407 }
 408
 409 /**
 410  * Emits Mesa scalar opcodes to produce unique answers across channels.
 411  *
 412  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 413  * channel determines the result across all channels.  So to do a vec4
 414  * of this operation, we want to emit a scalar per source channel used
 415  * to produce dest channels.
 416  */
 417 void
 418 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 419                                 dst_reg dst,
 420                                 src_reg orig_src0, src_reg orig_src1)
 421 {
 422    int i, j;
 423    int done_mask = ~dst.writemask;
 424
 425    /* Mesa RCP is a scalar operation splatting results to all channels,
 426     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 427     * dst channels.
 428     */
 429    for (i = 0; i < 4; i++) {
 430       GLuint this_mask = (1 << i);
 431       ir_to_mesa_instruction *inst;
 432       src_reg src0 = orig_src0;
 433       src_reg src1 = orig_src1;
 434
 435       if (done_mask & this_mask)
 436          continue;
 437
 438       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 439       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 440       for (j = i + 1; j < 4; j++) {
 441          /* If there is another enabled component in the destination that is
 442           * derived from the same inputs, generate its value on this pass as
 443           * well.
 444           */
 445          if (!(done_mask & (1 << j)) &&
 446              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 447              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 448             this_mask |= (1 << j);
 449          }
 450       }
 451       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 452                                    src0_swiz, src0_swiz);
 453       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 454                                   src1_swiz, src1_swiz);
 455
 456       inst = emit(ir, op, dst, src0, src1);
 457       inst->dst.writemask = this_mask;
 458       done_mask |= this_mask;
 459    }
 460 }
 461
 462 void
 463 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 464                                 dst_reg dst, src_reg src0)
 465 {
 466    src_reg undef = undef_src;
 467
 468    undef.swizzle = SWIZZLE_XXXX;
 469
 470    emit_scalar(ir, op, dst, src0, undef);
 471 }
 472
 473 src_reg
 474 ir_to_mesa_visitor::src_reg_for_float(float val)
 475 {
 476    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 477
 478    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 479                                           (const gl_constant_value *)&val, 1, &src.swizzle);
 480
 481    return src;
 482 }
 483
 484 static int
 485 type_size(const struct glsl_type *type)
 486 {
 487    unsigned int i;
 488    int size;
 489
 490    switch (type->base_type) {
 491    case GLSL_TYPE_UINT:
 492    case GLSL_TYPE_INT:
 493    case GLSL_TYPE_FLOAT:
 494    case GLSL_TYPE_BOOL:
 495       if (type->is_matrix()) {
 496          return type->matrix_columns;
 497       } else {
 498          /* Regardless of size of vector, it gets a vec4. This is bad
 499           * packing for things like floats, but otherwise arrays become a
 500           * mess.  Hopefully a later pass over the code can pack scalars
 501           * down if appropriate.
 502           */
 503          return 1;
 504       }
 505       break;
 506    case GLSL_TYPE_DOUBLE:
 507       if (type->is_matrix()) {
 508          if (type->vector_elements > 2)
 509             return type->matrix_columns * 2;
 510          else
 511             return type->matrix_columns;
 512       } else {
 513          if (type->vector_elements > 2)
 514             return 2;
 515          else
 516             return 1;
 517       }
 518       break;
 519    case GLSL_TYPE_ARRAY:
 520       assert(type->length > 0);
 521       return type_size(type->fields.array) * type->length;
 522    case GLSL_TYPE_STRUCT:
 523       size = 0;
 524       for (i = 0; i < type->length; i++) {
 525          size += type_size(type->fields.structure[i].type);
 526       }
 527       return size;
 528    case GLSL_TYPE_SAMPLER:
 529    case GLSL_TYPE_IMAGE:
 530    case GLSL_TYPE_SUBROUTINE:
 531       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 532        * at link time.
 533        */
 534       return 1;
 535    case GLSL_TYPE_ATOMIC_UINT:
 536    case GLSL_TYPE_VOID:
 537    case GLSL_TYPE_ERROR:
 538    case GLSL_TYPE_INTERFACE:
 539    case GLSL_TYPE_FUNCTION:
 540       assert(!"Invalid type in type_size");
 541       break;
 542    }
 543
 544    return 0;
 545 }
 546
 547 /**
 548  * In the initial pass of codegen, we assign temporary numbers to
 549  * intermediate results.  (not SSA -- variable assignments will reuse
 550  * storage).  Actual register allocation for the Mesa VM occurs in a
 551  * pass over the Mesa IR later.
 552  */
 553 src_reg
 554 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 555 {
 556    src_reg src;
 557
 558    src.file = PROGRAM_TEMPORARY;
 559    src.index = next_temp;
 560    src.reladdr = NULL;
 561    next_temp += type_size(type);
 562
 563    if (type->is_array() || type->is_record()) {
 564       src.swizzle = SWIZZLE_NOOP;
 565    } else {
 566       src.swizzle = swizzle_for_size(type->vector_elements);
 567    }
 568    src.negate = 0;
 569
 570    return src;
 571 }
 572
 573 variable_storage *
 574 ir_to_mesa_visitor::find_variable_storage(const ir_variable *var)
 575 {
 576    foreach_in_list(variable_storage, entry, &this->variables) {
 577       if (entry->var == var)
 578          return entry;
 579    }
 580
 581    return NULL;
 582 }
 583
 584 void
 585 ir_to_mesa_visitor::visit(ir_variable *ir)
 586 {
 587    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 588       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 589
 590       fp->OriginUpperLeft = ir->data.origin_upper_left;
 591       fp->PixelCenterInteger = ir->data.pixel_center_integer;
 592    }
 593
 594    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
 595       unsigned int i;
 596       const ir_state_slot *const slots = ir->get_state_slots();
 597       assert(slots != NULL);
 598
 599       /* Check if this statevar's setup in the STATE file exactly
 600        * matches how we'll want to reference it as a
 601        * struct/array/whatever.  If not, then we need to move it into
 602        * temporary storage and hope that it'll get copy-propagated
 603        * out.
 604        */
 605       for (i = 0; i < ir->get_num_state_slots(); i++) {
 606          if (slots[i].swizzle != SWIZZLE_XYZW) {
 607             break;
 608          }
 609       }
 610
 611       variable_storage *storage;
 612       dst_reg dst;
 613       if (i == ir->get_num_state_slots()) {
 614          /* We'll set the index later. */
 615          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
 616          this->variables.push_tail(storage);
 617
 618          dst = undef_dst;
 619       } else {
 620          /* The variable_storage constructor allocates slots based on the size
 621           * of the type.  However, this had better match the number of state
 622           * elements that we're going to copy into the new temporary.
 623           */
 624          assert((int) ir->get_num_state_slots() == type_size(ir->type));
 625
 626          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
 627                                                  this->next_temp);
 628          this->variables.push_tail(storage);
 629          this->next_temp += type_size(ir->type);
 630
 631          dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
 632       }
 633
 634
 635       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
 636          int index = _mesa_add_state_reference(this->prog->Parameters,
 637                                                (gl_state_index *)slots[i].tokens);
 638
 639          if (storage->file == PROGRAM_STATE_VAR) {
 640             if (storage->index == -1) {
 641                storage->index = index;
 642             } else {
 643                assert(index == storage->index + (int)i);
 644             }
 645          } else {
 646             src_reg src(PROGRAM_STATE_VAR, index, NULL);
 647             src.swizzle = slots[i].swizzle;
 648             emit(ir, OPCODE_MOV, dst, src);
 649             /* even a float takes up a whole vec4 reg in a struct/array. */
 650             dst.index++;
 651          }
 652       }
 653
 654       if (storage->file == PROGRAM_TEMPORARY &&
 655           dst.index != storage->index + (int) ir->get_num_state_slots()) {
 656          linker_error(this->shader_program,
 657                       "failed to load builtin uniform `%s' "
 658                       "(%d/%d regs loaded)\n",
 659                       ir->name, dst.index - storage->index,
 660                       type_size(ir->type));
 661       }
 662    }
 663 }
 664
 665 void
 666 ir_to_mesa_visitor::visit(ir_loop *ir)
 667 {
 668    emit(NULL, OPCODE_BGNLOOP);
 669
 670    visit_exec_list(&ir->body_instructions, this);
 671
 672    emit(NULL, OPCODE_ENDLOOP);
 673 }
 674
 675 void
 676 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 677 {
 678    switch (ir->mode) {
 679    case ir_loop_jump::jump_break:
 680       emit(NULL, OPCODE_BRK);
 681       break;
 682    case ir_loop_jump::jump_continue:
 683       emit(NULL, OPCODE_CONT);
 684       break;
 685    }
 686 }
 687
 688
 689 void
 690 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 691 {
 692    assert(0);
 693    (void)ir;
 694 }
 695
 696 void
 697 ir_to_mesa_visitor::visit(ir_function *ir)
 698 {
 699    /* Ignore function bodies other than main() -- we shouldn't see calls to
 700     * them since they should all be inlined before we get to ir_to_mesa.
 701     */
 702    if (strcmp(ir->name, "main") == 0) {
 703       const ir_function_signature *sig;
 704       exec_list empty;
 705
 706       sig = ir->matching_signature(NULL, &empty, false);
 707
 708       assert(sig);
 709
 710       foreach_in_list(ir_instruction, ir, &sig->body) {
 711          ir->accept(this);
 712       }
 713    }
 714 }
 715
 716 bool
 717 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 718 {
 719    int nonmul_operand = 1 - mul_operand;
 720    src_reg a, b, c;
 721
 722    ir_expression *expr = ir->operands[mul_operand]->as_expression();
 723    if (!expr || expr->operation != ir_binop_mul)
 724       return false;
 725
 726    expr->operands[0]->accept(this);
 727    a = this->result;
 728    expr->operands[1]->accept(this);
 729    b = this->result;
 730    ir->operands[nonmul_operand]->accept(this);
 731    c = this->result;
 732
 733    this->result = get_temp(ir->type);
 734    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
 735
 736    return true;
 737 }
 738
 739 /**
 740  * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
 741  *
 742  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
 743  * implemented using multiplication, and logical-or is implemented using
 744  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
 745  * As result, the logical expression (a & !b) can be rewritten as:
 746  *
 747  *     - a * !b
 748  *     - a * (1 - b)
 749  *     - (a * 1) - (a * b)
 750  *     - a + -(a * b)
 751  *     - a + (a * -b)
 752  *
 753  * This final expression can be implemented as a single MAD(a, -b, a)
 754  * instruction.
 755  */
 756 bool
 757 ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
 758 {
 759    const int other_operand = 1 - try_operand;
 760    src_reg a, b;
 761
 762    ir_expression *expr = ir->operands[try_operand]->as_expression();
 763    if (!expr || expr->operation != ir_unop_logic_not)
 764       return false;
 765
 766    ir->operands[other_operand]->accept(this);
 767    a = this->result;
 768    expr->operands[0]->accept(this);
 769    b = this->result;
 770
 771    b.negate = ~b.negate;
 772
 773    this->result = get_temp(ir->type);
 774    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
 775
 776    return true;
 777 }
 778
 779 void
 780 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 781                                     src_reg *reg, int *num_reladdr)
 782 {
 783    if (!reg->reladdr)
 784       return;
 785
 786    emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
 787
 788    if (*num_reladdr != 1) {
 789       src_reg temp = get_temp(glsl_type::vec4_type);
 790
 791       emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
 792       *reg = temp;
 793    }
 794
 795    (*num_reladdr)--;
 796 }
 797
 798 void
 799 ir_to_mesa_visitor::emit_swz(ir_expression *ir)
 800 {
 801    /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
 802     * This means that each of the operands is either an immediate value of -1,
 803     * 0, or 1, or is a component from one source register (possibly with
 804     * negation).
 805     */
 806    uint8_t components[4] = { 0 };
 807    bool negate[4] = { false };
 808    ir_variable *var = NULL;
 809
 810    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
 811       ir_rvalue *op = ir->operands[i];
 812
 813       assert(op->type->is_scalar());
 814
 815       while (op != NULL) {
 816          switch (op->ir_type) {
 817          case ir_type_constant: {
 818
 819             assert(op->type->is_scalar());
 820
 821             const ir_constant *const c = op->as_constant();
 822             if (c->is_one()) {
 823                components[i] = SWIZZLE_ONE;
 824             } else if (c->is_zero()) {
 825                components[i] = SWIZZLE_ZERO;
 826             } else if (c->is_negative_one()) {
 827                components[i] = SWIZZLE_ONE;
 828                negate[i] = true;
 829             } else {
 830                assert(!"SWZ constant must be 0.0 or 1.0.");
 831             }
 832
 833             op = NULL;
 834             break;
 835          }
 836
 837          case ir_type_dereference_variable: {
 838             ir_dereference_variable *const deref =
 839                (ir_dereference_variable *) op;
 840
 841             assert((var == NULL) || (deref->var == var));
 842             components[i] = SWIZZLE_X;
 843             var = deref->var;
 844             op = NULL;
 845             break;
 846          }
 847
 848          case ir_type_expression: {
 849             ir_expression *const expr = (ir_expression *) op;
 850
 851             assert(expr->operation == ir_unop_neg);
 852             negate[i] = true;
 853
 854             op = expr->operands[0];
 855             break;
 856          }
 857
 858          case ir_type_swizzle: {
 859             ir_swizzle *const swiz = (ir_swizzle *) op;
 860
 861             components[i] = swiz->mask.x;
 862             op = swiz->val;
 863             break;
 864          }
 865
 866          default:
 867             assert(!"Should not get here.");
 868             return;
 869          }
 870       }
 871    }
 872
 873    assert(var != NULL);
 874
 875    ir_dereference_variable *const deref =
 876       new(mem_ctx) ir_dereference_variable(var);
 877
 878    this->result.file = PROGRAM_UNDEFINED;
 879    deref->accept(this);
 880    if (this->result.file == PROGRAM_UNDEFINED) {
 881       printf("Failed to get tree for expression operand:\n");
 882       deref->print();
 883       printf("\n");
 884       exit(1);
 885    }
 886
 887    src_reg src;
 888
 889    src = this->result;
 890    src.swizzle = MAKE_SWIZZLE4(components[0],
 891                                components[1],
 892                                components[2],
 893                                components[3]);
 894    src.negate = ((unsigned(negate[0]) << 0)
 895                  | (unsigned(negate[1]) << 1)
 896                  | (unsigned(negate[2]) << 2)
 897                  | (unsigned(negate[3]) << 3));
 898
 899    /* Storage for our result.  Ideally for an assignment we'd be using the
 900     * actual storage for the result here, instead.
 901     */
 902    const src_reg result_src = get_temp(ir->type);
 903    dst_reg result_dst = dst_reg(result_src);
 904
 905    /* Limit writes to the channels that will be used by result_src later.
 906     * This does limit this temp's use as a temporary for multi-instruction
 907     * sequences.
 908     */
 909    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 910
 911    emit(ir, OPCODE_SWZ, result_dst, src);
 912    this->result = result_src;
 913 }
 914
 915 void
 916 ir_to_mesa_visitor::visit(ir_expression *ir)
 917 {
 918    unsigned int operand;
 919    src_reg op[ARRAY_SIZE(ir->operands)];
 920    src_reg result_src;
 921    dst_reg result_dst;
 922
 923    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
 924     */
 925    if (ir->operation == ir_binop_add) {
 926       if (try_emit_mad(ir, 1))
 927          return;
 928       if (try_emit_mad(ir, 0))
 929          return;
 930    }
 931
 932    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
 933     */
 934    if (ir->operation == ir_binop_logic_and) {
 935       if (try_emit_mad_for_and_not(ir, 1))
 936          return;
 937       if (try_emit_mad_for_and_not(ir, 0))
 938          return;
 939    }
 940
 941    if (ir->operation == ir_quadop_vector) {
 942       this->emit_swz(ir);
 943       return;
 944    }
 945
 946    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 947       this->result.file = PROGRAM_UNDEFINED;
 948       ir->operands[operand]->accept(this);
 949       if (this->result.file == PROGRAM_UNDEFINED) {
 950          printf("Failed to get tree for expression operand:\n");
 951          ir->operands[operand]->print();
 952          printf("\n");
 953          exit(1);
 954       }
 955       op[operand] = this->result;
 956
 957       /* Matrix expression operands should have been broken down to vector
 958        * operations already.
 959        */
 960       assert(!ir->operands[operand]->type->is_matrix());
 961    }
 962
 963    int vector_elements = ir->operands[0]->type->vector_elements;
 964    if (ir->operands[1]) {
 965       vector_elements = MAX2(vector_elements,
 966                              ir->operands[1]->type->vector_elements);
 967    }
 968
 969    this->result.file = PROGRAM_UNDEFINED;
 970
 971    /* Storage for our result.  Ideally for an assignment we'd be using
 972     * the actual storage for the result here, instead.
 973     */
 974    result_src = get_temp(ir->type);
 975    /* convenience for the emit functions below. */
 976    result_dst = dst_reg(result_src);
 977    /* Limit writes to the channels that will be used by result_src later.
 978     * This does limit this temp's use as a temporary for multi-instruction
 979     * sequences.
 980     */
 981    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 982
 983    switch (ir->operation) {
 984    case ir_unop_logic_not:
 985       /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
 986        * older GPUs implement SEQ using multiple instructions (i915 uses two
 987        * SGE instructions and a MUL instruction).  Since our logic values are
 988        * 0.0 and 1.0, 1-x also implements !x.
 989        */
 990       op[0].negate = ~op[0].negate;
 991       emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
 992       break;
 993    case ir_unop_neg:
 994       op[0].negate = ~op[0].negate;
 995       result_src = op[0];
 996       break;
 997    case ir_unop_abs:
 998       emit(ir, OPCODE_ABS, result_dst, op[0]);
 999       break;
1000    case ir_unop_sign:
1001       emit(ir, OPCODE_SSG, result_dst, op[0]);
1002       break;
1003    case ir_unop_rcp:
1004       emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1005       break;
1006
1007    case ir_unop_exp2:
1008       emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1009       break;
1010    case ir_unop_exp:
1011    case ir_unop_log:
1012       assert(!"not reached: should be handled by ir_explog_to_explog2");
1013       break;
1014    case ir_unop_log2:
1015       emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1016       break;
1017    case ir_unop_sin:
1018       emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1019       break;
1020    case ir_unop_cos:
1021       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1022       break;
1023
1024    case ir_unop_dFdx:
1025       emit(ir, OPCODE_DDX, result_dst, op[0]);
1026       break;
1027    case ir_unop_dFdy:
1028       emit(ir, OPCODE_DDY, result_dst, op[0]);
1029       break;
1030
1031    case ir_unop_saturate: {
1032       ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
1033                                           result_dst, op[0]);
1034       inst->saturate = true;
1035       break;
1036    }
1037    case ir_unop_noise: {
1038       const enum prog_opcode opcode =
1039          prog_opcode(OPCODE_NOISE1
1040                      + (ir->operands[0]->type->vector_elements) - 1);
1041       assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1042
1043       emit(ir, opcode, result_dst, op[0]);
1044       break;
1045    }
1046
1047    case ir_binop_add:
1048       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1049       break;
1050    case ir_binop_sub:
1051       emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1052       break;
1053
1054    case ir_binop_mul:
1055       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1056       break;
1057    case ir_binop_div:
1058       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1059       break;
1060    case ir_binop_mod:
1061       /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
1062       assert(ir->type->is_integer());
1063       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1064       break;
1065
1066    case ir_binop_less:
1067       emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1068       break;
1069    case ir_binop_greater:
1070       emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
1071       break;
1072    case ir_binop_lequal:
1073       emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
1074       break;
1075    case ir_binop_gequal:
1076       emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1077       break;
1078    case ir_binop_equal:
1079       emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1080       break;
1081    case ir_binop_nequal:
1082       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1083       break;
1084    case ir_binop_all_equal:
1085       /* "==" operator producing a scalar boolean. */
1086       if (ir->operands[0]->type->is_vector() ||
1087           ir->operands[1]->type->is_vector()) {
1088          src_reg temp = get_temp(glsl_type::vec4_type);
1089          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1090
1091          /* After the dot-product, the value will be an integer on the
1092           * range [0,4].  Zero becomes 1.0, and positive values become zero.
1093           */
1094          emit_dp(ir, result_dst, temp, temp, vector_elements);
1095
1096          /* Negating the result of the dot-product gives values on the range
1097           * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
1098           * achieved using SGE.
1099           */
1100          src_reg sge_src = result_src;
1101          sge_src.negate = ~sge_src.negate;
1102          emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
1103       } else {
1104          emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1105       }
1106       break;
1107    case ir_binop_any_nequal:
1108       /* "!=" operator producing a scalar boolean. */
1109       if (ir->operands[0]->type->is_vector() ||
1110           ir->operands[1]->type->is_vector()) {
1111          src_reg temp = get_temp(glsl_type::vec4_type);
1112          if (ir->operands[0]->type->is_boolean() &&
1113              ir->operands[1]->as_constant() &&
1114              ir->operands[1]->as_constant()->is_zero()) {
1115             temp = op[0];
1116          } else {
1117             emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1118          }
1119
1120          /* After the dot-product, the value will be an integer on the
1121           * range [0,4].  Zero stays zero, and positive values become 1.0.
1122           */
1123          ir_to_mesa_instruction *const dp =
1124             emit_dp(ir, result_dst, temp, temp, vector_elements);
1125          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1126             /* The clamping to [0,1] can be done for free in the fragment
1127              * shader with a saturate.
1128              */
1129             dp->saturate = true;
1130          } else {
1131             /* Negating the result of the dot-product gives values on the range
1132              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1133              * achieved using SLT.
1134              */
1135             src_reg slt_src = result_src;
1136             slt_src.negate = ~slt_src.negate;
1137             emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1138          }
1139       } else {
1140          emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1141       }
1142       break;
1143
1144    case ir_binop_logic_xor:
1145       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1146       break;
1147
1148    case ir_binop_logic_or: {
1149       /* After the addition, the value will be an integer on the
1150        * range [0,2].  Zero stays zero, and positive values become 1.0.
1151        */
1152       ir_to_mesa_instruction *add =
1153          emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1154       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1155          /* The clamping to [0,1] can be done for free in the fragment
1156           * shader with a saturate.
1157           */
1158          add->saturate = true;
1159       } else {
1160          /* Negating the result of the addition gives values on the range
1161           * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1162           * is achieved using SLT.
1163           */
1164          src_reg slt_src = result_src;
1165          slt_src.negate = ~slt_src.negate;
1166          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1167       }
1168       break;
1169    }
1170
1171    case ir_binop_logic_and:
1172       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1173       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1174       break;
1175
1176    case ir_binop_dot:
1177       assert(ir->operands[0]->type->is_vector());
1178       assert(ir->operands[0]->type == ir->operands[1]->type);
1179       emit_dp(ir, result_dst, op[0], op[1],
1180               ir->operands[0]->type->vector_elements);
1181       break;
1182
1183    case ir_unop_sqrt:
1184       /* sqrt(x) = x * rsq(x). */
1185       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1186       emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1187       /* For incoming channels <= 0, set the result to 0. */
1188       op[0].negate = ~op[0].negate;
1189       emit(ir, OPCODE_CMP, result_dst,
1190                           op[0], result_src, src_reg_for_float(0.0));
1191       break;
1192    case ir_unop_rsq:
1193       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1194       break;
1195    case ir_unop_i2f:
1196    case ir_unop_u2f:
1197    case ir_unop_b2f:
1198    case ir_unop_b2i:
1199    case ir_unop_i2u:
1200    case ir_unop_u2i:
1201       /* Mesa IR lacks types, ints are stored as truncated floats. */
1202       result_src = op[0];
1203       break;
1204    case ir_unop_f2i:
1205    case ir_unop_f2u:
1206       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1207       break;
1208    case ir_unop_f2b:
1209    case ir_unop_i2b:
1210       emit(ir, OPCODE_SNE, result_dst,
1211                           op[0], src_reg_for_float(0.0));
1212       break;
1213    case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
1214    case ir_unop_bitcast_f2u:
1215    case ir_unop_bitcast_i2f:
1216    case ir_unop_bitcast_u2f:
1217       break;
1218    case ir_unop_trunc:
1219       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1220       break;
1221    case ir_unop_ceil:
1222       op[0].negate = ~op[0].negate;
1223       emit(ir, OPCODE_FLR, result_dst, op[0]);
1224       result_src.negate = ~result_src.negate;
1225       break;
1226    case ir_unop_floor:
1227       emit(ir, OPCODE_FLR, result_dst, op[0]);
1228       break;
1229    case ir_unop_fract:
1230       emit(ir, OPCODE_FRC, result_dst, op[0]);
1231       break;
1232    case ir_unop_pack_snorm_2x16:
1233    case ir_unop_pack_snorm_4x8:
1234    case ir_unop_pack_unorm_2x16:
1235    case ir_unop_pack_unorm_4x8:
1236    case ir_unop_pack_half_2x16:
1237    case ir_unop_pack_double_2x32:
1238    case ir_unop_unpack_snorm_2x16:
1239    case ir_unop_unpack_snorm_4x8:
1240    case ir_unop_unpack_unorm_2x16:
1241    case ir_unop_unpack_unorm_4x8:
1242    case ir_unop_unpack_half_2x16:
1243    case ir_unop_unpack_double_2x32:
1244    case ir_unop_bitfield_reverse:
1245    case ir_unop_bit_count:
1246    case ir_unop_find_msb:
1247    case ir_unop_find_lsb:
1248    case ir_unop_d2f:
1249    case ir_unop_f2d:
1250    case ir_unop_d2i:
1251    case ir_unop_i2d:
1252    case ir_unop_d2u:
1253    case ir_unop_u2d:
1254    case ir_unop_d2b:
1255    case ir_unop_frexp_sig:
1256    case ir_unop_frexp_exp:
1257       assert(!"not supported");
1258       break;
1259    case ir_binop_min:
1260       emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1261       break;
1262    case ir_binop_max:
1263       emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1264       break;
1265    case ir_binop_pow:
1266       emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1267       break;
1268
1269       /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
1270        * hardware backends have no way to avoid Mesa IR generation
1271        * even if they don't use it, we need to emit "something" and
1272        * continue.
1273        */
1274    case ir_binop_lshift:
1275    case ir_binop_rshift:
1276    case ir_binop_bit_and:
1277    case ir_binop_bit_xor:
1278    case ir_binop_bit_or:
1279       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1280       break;
1281
1282    case ir_unop_bit_not:
1283    case ir_unop_round_even:
1284       emit(ir, OPCODE_MOV, result_dst, op[0]);
1285       break;
1286
1287    case ir_binop_ubo_load:
1288       assert(!"not supported");
1289       break;
1290
1291    case ir_triop_lrp:
1292       /* ir_triop_lrp operands are (x, y, a) while
1293        * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program.
1294        */
1295       emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
1296       break;
1297
1298    case ir_binop_vector_extract:
1299    case ir_triop_fma:
1300    case ir_triop_bitfield_extract:
1301    case ir_triop_vector_insert:
1302    case ir_quadop_bitfield_insert:
1303    case ir_binop_ldexp:
1304    case ir_triop_csel:
1305    case ir_binop_carry:
1306    case ir_binop_borrow:
1307    case ir_binop_imul_high:
1308    case ir_unop_interpolate_at_centroid:
1309    case ir_binop_interpolate_at_offset:
1310    case ir_binop_interpolate_at_sample:
1311    case ir_unop_dFdx_coarse:
1312    case ir_unop_dFdx_fine:
1313    case ir_unop_dFdy_coarse:
1314    case ir_unop_dFdy_fine:
1315    case ir_unop_subroutine_to_int:
1316    case ir_unop_get_buffer_size:
1317       assert(!"not supported");
1318       break;
1319
1320    case ir_unop_ssbo_unsized_array_length:
1321    case ir_quadop_vector:
1322       /* This operation should have already been handled.
1323        */
1324       assert(!"Should not get here.");
1325       break;
1326    }
1327
1328    this->result = result_src;
1329 }
1330
1331
1332 void
1333 ir_to_mesa_visitor::visit(ir_swizzle *ir)
1334 {
1335    src_reg src;
1336    int i;
1337    int swizzle[4];
1338
1339    /* Note that this is only swizzles in expressions, not those on the left
1340     * hand side of an assignment, which do write masking.  See ir_assignment
1341     * for that.
1342     */
1343
1344    ir->val->accept(this);
1345    src = this->result;
1346    assert(src.file != PROGRAM_UNDEFINED);
1347    assert(ir->type->vector_elements > 0);
1348
1349    for (i = 0; i < 4; i++) {
1350       if (i < ir->type->vector_elements) {
1351          switch (i) {
1352          case 0:
1353             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1354             break;
1355          case 1:
1356             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1357             break;
1358          case 2:
1359             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1360             break;
1361          case 3:
1362             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1363             break;
1364          }
1365       } else {
1366          /* If the type is smaller than a vec4, replicate the last
1367           * channel out.
1368           */
1369          swizzle[i] = swizzle[ir->type->vector_elements - 1];
1370       }
1371    }
1372
1373    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1374
1375    this->result = src;
1376 }
1377
1378 void
1379 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1380 {
1381    variable_storage *entry = find_variable_storage(ir->var);
1382    ir_variable *var = ir->var;
1383
1384    if (!entry) {
1385       switch (var->data.mode) {
1386       case ir_var_uniform:
1387          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1388                                                var->data.param_index);
1389          this->variables.push_tail(entry);
1390          break;
1391       case ir_var_shader_in:
1392          /* The linker assigns locations for varyings and attributes,
1393           * including deprecated builtins (like gl_Color),
1394           * user-assigned generic attributes (glBindVertexLocation),
1395           * and user-defined varyings.
1396           */
1397          assert(var->data.location != -1);
1398          entry = new(mem_ctx) variable_storage(var,
1399                                                PROGRAM_INPUT,
1400                                                var->data.location);
1401          break;
1402       case ir_var_shader_out:
1403          assert(var->data.location != -1);
1404          entry = new(mem_ctx) variable_storage(var,
1405                                                PROGRAM_OUTPUT,
1406                                                var->data.location);
1407          break;
1408       case ir_var_system_value:
1409          entry = new(mem_ctx) variable_storage(var,
1410                                                PROGRAM_SYSTEM_VALUE,
1411                                                var->data.location);
1412          break;
1413       case ir_var_auto:
1414       case ir_var_temporary:
1415          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1416                                                this->next_temp);
1417          this->variables.push_tail(entry);
1418
1419          next_temp += type_size(var->type);
1420          break;
1421       }
1422
1423       if (!entry) {
1424          printf("Failed to make storage for %s\n", var->name);
1425          exit(1);
1426       }
1427    }
1428
1429    this->result = src_reg(entry->file, entry->index, var->type);
1430 }
1431
1432 void
1433 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1434 {
1435    ir_constant *index;
1436    src_reg src;
1437    int element_size = type_size(ir->type);
1438
1439    index = ir->array_index->constant_expression_value();
1440
1441    ir->array->accept(this);
1442    src = this->result;
1443
1444    if (index) {
1445       src.index += index->value.i[0] * element_size;
1446    } else {
1447       /* Variable index array dereference.  It eats the "vec4" of the
1448        * base of the array and an index that offsets the Mesa register
1449        * index.
1450        */
1451       ir->array_index->accept(this);
1452
1453       src_reg index_reg;
1454
1455       if (element_size == 1) {
1456          index_reg = this->result;
1457       } else {
1458          index_reg = get_temp(glsl_type::float_type);
1459
1460          emit(ir, OPCODE_MUL, dst_reg(index_reg),
1461               this->result, src_reg_for_float(element_size));
1462       }
1463
1464       /* If there was already a relative address register involved, add the
1465        * new and the old together to get the new offset.
1466        */
1467       if (src.reladdr != NULL)  {
1468          src_reg accum_reg = get_temp(glsl_type::float_type);
1469
1470          emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1471               index_reg, *src.reladdr);
1472
1473          index_reg = accum_reg;
1474       }
1475
1476       src.reladdr = ralloc(mem_ctx, src_reg);
1477       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1478    }
1479
1480    /* If the type is smaller than a vec4, replicate the last channel out. */
1481    if (ir->type->is_scalar() || ir->type->is_vector())
1482       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1483    else
1484       src.swizzle = SWIZZLE_NOOP;
1485
1486    this->result = src;
1487 }
1488
1489 void
1490 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1491 {
1492    unsigned int i;
1493    const glsl_type *struct_type = ir->record->type;
1494    int offset = 0;
1495
1496    ir->record->accept(this);
1497
1498    for (i = 0; i < struct_type->length; i++) {
1499       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1500          break;
1501       offset += type_size(struct_type->fields.structure[i].type);
1502    }
1503
1504    /* If the type is smaller than a vec4, replicate the last channel out. */
1505    if (ir->type->is_scalar() || ir->type->is_vector())
1506       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1507    else
1508       this->result.swizzle = SWIZZLE_NOOP;
1509
1510    this->result.index += offset;
1511 }
1512
1513 /**
1514  * We want to be careful in assignment setup to hit the actual storage
1515  * instead of potentially using a temporary like we might with the
1516  * ir_dereference handler.
1517  */
1518 static dst_reg
1519 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1520 {
1521    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1522     * access of a vector, it must be separated into a series conditional moves
1523     * before reaching this point (see ir_vec_index_to_cond_assign).
1524     */
1525    assert(ir->as_dereference());
1526    ir_dereference_array *deref_array = ir->as_dereference_array();
1527    if (deref_array) {
1528       assert(!deref_array->array->type->is_vector());
1529    }
1530
1531    /* Use the rvalue deref handler for the most part.  We'll ignore
1532     * swizzles in it and write swizzles using writemask, though.
1533     */
1534    ir->accept(v);
1535    return dst_reg(v->result);
1536 }
1537
1538 /* Calculate the sampler index and also calculate the base uniform location
1539  * for struct members.
1540  */
1541 static void
1542 calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref,
1543                      unsigned *offset, unsigned *array_elements,
1544                      unsigned *location)
1545 {
1546    if (deref->ir_type == ir_type_dereference_variable)
1547       return;
1548
1549    switch (deref->ir_type) {
1550    case ir_type_dereference_array: {
1551       ir_dereference_array *deref_arr = deref->as_dereference_array();
1552       ir_constant *array_index =
1553          deref_arr->array_index->constant_expression_value();
1554
1555       if (!array_index) {
1556          /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
1557           * while GLSL 1.30 requires that the array indices be
1558           * constant integer expressions.  We don't expect any driver
1559           * to actually work with a really variable array index, so
1560           * all that would work would be an unrolled loop counter that ends
1561           * up being constant above.
1562           */
1563          ralloc_strcat(&prog->InfoLog,
1564                        "warning: Variable sampler array index unsupported.\n"
1565                        "This feature of the language was removed in GLSL 1.20 "
1566                        "and is unlikely to be supported for 1.10 in Mesa.\n");
1567       } else {
1568          *offset += array_index->value.u[0] * *array_elements;
1569       }
1570
1571       *array_elements *= deref_arr->array->type->length;
1572
1573       calc_sampler_offsets(prog, deref_arr->array->as_dereference(),
1574                            offset, array_elements, location);
1575       break;
1576    }
1577
1578    case ir_type_dereference_record: {
1579       ir_dereference_record *deref_record = deref->as_dereference_record();
1580       unsigned field_index =
1581          deref_record->record->type->field_index(deref_record->field);
1582       *location +=
1583          deref_record->record->type->record_location_offset(field_index);
1584       calc_sampler_offsets(prog, deref_record->record->as_dereference(),
1585                            offset, array_elements, location);
1586       break;
1587    }
1588
1589    default:
1590       unreachable("Invalid deref type");
1591       break;
1592    }
1593 }
1594
1595 static int
1596 get_sampler_uniform_value(class ir_dereference *sampler,
1597                           struct gl_shader_program *shader_program,
1598                           const struct gl_program *prog)
1599 {
1600    GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
1601    ir_variable *var = sampler->variable_referenced();
1602    unsigned location = var->data.location;
1603    unsigned array_elements = 1;
1604    unsigned offset = 0;
1605
1606    calc_sampler_offsets(shader_program, sampler, &offset, &array_elements,
1607                         &location);
1608
1609    assert(shader_program->UniformStorage[location].opaque[shader].active);
1610    return shader_program->UniformStorage[location].opaque[shader].index +
1611           offset;
1612 }
1613
1614 /**
1615  * Process the condition of a conditional assignment
1616  *
1617  * Examines the condition of a conditional assignment to generate the optimal
1618  * first operand of a \c CMP instruction.  If the condition is a relational
1619  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1620  * used as the source for the \c CMP instruction.  Otherwise the comparison
1621  * is processed to a boolean result, and the boolean result is used as the
1622  * operand to the CMP instruction.
1623  */
1624 bool
1625 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1626 {
1627    ir_rvalue *src_ir = ir;
1628    bool negate = true;
1629    bool switch_order = false;
1630
1631    ir_expression *const expr = ir->as_expression();
1632    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1633       bool zero_on_left = false;
1634
1635       if (expr->operands[0]->is_zero()) {
1636          src_ir = expr->operands[1];
1637          zero_on_left = true;
1638       } else if (expr->operands[1]->is_zero()) {
1639          src_ir = expr->operands[0];
1640          zero_on_left = false;
1641       }
1642
1643       /*      a is -  0  +            -  0  +
1644        * (a <  0)  T  F  F  ( a < 0)  T  F  F
1645        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1646        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1647        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1648        * (a >  0)  F  F  T  (-a < 0)  F  F  T
1649        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
1650        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1651        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1652        *
1653        * Note that exchanging the order of 0 and 'a' in the comparison simply
1654        * means that the value of 'a' should be negated.
1655        */
1656       if (src_ir != ir) {
1657          switch (expr->operation) {
1658          case ir_binop_less:
1659             switch_order = false;
1660             negate = zero_on_left;
1661             break;
1662
1663          case ir_binop_greater:
1664             switch_order = false;
1665             negate = !zero_on_left;
1666             break;
1667
1668          case ir_binop_lequal:
1669             switch_order = true;
1670             negate = !zero_on_left;
1671             break;
1672
1673          case ir_binop_gequal:
1674             switch_order = true;
1675             negate = zero_on_left;
1676             break;
1677
1678          default:
1679             /* This isn't the right kind of comparison afterall, so make sure
1680              * the whole condition is visited.
1681              */
1682             src_ir = ir;
1683             break;
1684          }
1685       }
1686    }
1687
1688    src_ir->accept(this);
1689
1690    /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1691     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1692     * choose which value OPCODE_CMP produces without an extra instruction
1693     * computing the condition.
1694     */
1695    if (negate)
1696       this->result.negate = ~this->result.negate;
1697
1698    return switch_order;
1699 }
1700
1701 void
1702 ir_to_mesa_visitor::visit(ir_assignment *ir)
1703 {
1704    dst_reg l;
1705    src_reg r;
1706    int i;
1707
1708    ir->rhs->accept(this);
1709    r = this->result;
1710
1711    l = get_assignment_lhs(ir->lhs, this);
1712
1713    /* FINISHME: This should really set to the correct maximal writemask for each
1714     * FINISHME: component written (in the loops below).  This case can only
1715     * FINISHME: occur for matrices, arrays, and structures.
1716     */
1717    if (ir->write_mask == 0) {
1718       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1719       l.writemask = WRITEMASK_XYZW;
1720    } else if (ir->lhs->type->is_scalar()) {
1721       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1722        * FINISHME: W component of fragment shader output zero, work correctly.
1723        */
1724       l.writemask = WRITEMASK_XYZW;
1725    } else {
1726       int swizzles[4];
1727       int first_enabled_chan = 0;
1728       int rhs_chan = 0;
1729
1730       assert(ir->lhs->type->is_vector());
1731       l.writemask = ir->write_mask;
1732
1733       for (int i = 0; i < 4; i++) {
1734          if (l.writemask & (1 << i)) {
1735             first_enabled_chan = GET_SWZ(r.swizzle, i);
1736             break;
1737          }
1738       }
1739
1740       /* Swizzle a small RHS vector into the channels being written.
1741        *
1742        * glsl ir treats write_mask as dictating how many channels are
1743        * present on the RHS while Mesa IR treats write_mask as just
1744        * showing which channels of the vec4 RHS get written.
1745        */
1746       for (int i = 0; i < 4; i++) {
1747          if (l.writemask & (1 << i))
1748             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1749          else
1750             swizzles[i] = first_enabled_chan;
1751       }
1752       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1753                                 swizzles[2], swizzles[3]);
1754    }
1755
1756    assert(l.file != PROGRAM_UNDEFINED);
1757    assert(r.file != PROGRAM_UNDEFINED);
1758
1759    if (ir->condition) {
1760       const bool switch_order = this->process_move_condition(ir->condition);
1761       src_reg condition = this->result;
1762
1763       for (i = 0; i < type_size(ir->lhs->type); i++) {
1764          if (switch_order) {
1765             emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1766          } else {
1767             emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1768          }
1769
1770          l.index++;
1771          r.index++;
1772       }
1773    } else {
1774       for (i = 0; i < type_size(ir->lhs->type); i++) {
1775          emit(ir, OPCODE_MOV, l, r);
1776          l.index++;
1777          r.index++;
1778       }
1779    }
1780 }
1781
1782
1783 void
1784 ir_to_mesa_visitor::visit(ir_constant *ir)
1785 {
1786    src_reg src;
1787    GLfloat stack_vals[4] = { 0 };
1788    GLfloat *values = stack_vals;
1789    unsigned int i;
1790
1791    /* Unfortunately, 4 floats is all we can get into
1792     * _mesa_add_unnamed_constant.  So, make a temp to store an
1793     * aggregate constant and move each constant value into it.  If we
1794     * get lucky, copy propagation will eliminate the extra moves.
1795     */
1796
1797    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1798       src_reg temp_base = get_temp(ir->type);
1799       dst_reg temp = dst_reg(temp_base);
1800
1801       foreach_in_list(ir_constant, field_value, &ir->components) {
1802          int size = type_size(field_value->type);
1803
1804          assert(size > 0);
1805
1806          field_value->accept(this);
1807          src = this->result;
1808
1809          for (i = 0; i < (unsigned int)size; i++) {
1810             emit(ir, OPCODE_MOV, temp, src);
1811
1812             src.index++;
1813             temp.index++;
1814          }
1815       }
1816       this->result = temp_base;
1817       return;
1818    }
1819
1820    if (ir->type->is_array()) {
1821       src_reg temp_base = get_temp(ir->type);
1822       dst_reg temp = dst_reg(temp_base);
1823       int size = type_size(ir->type->fields.array);
1824
1825       assert(size > 0);
1826
1827       for (i = 0; i < ir->type->length; i++) {
1828          ir->array_elements[i]->accept(this);
1829          src = this->result;
1830          for (int j = 0; j < size; j++) {
1831             emit(ir, OPCODE_MOV, temp, src);
1832
1833             src.index++;
1834             temp.index++;
1835          }
1836       }
1837       this->result = temp_base;
1838       return;
1839    }
1840
1841    if (ir->type->is_matrix()) {
1842       src_reg mat = get_temp(ir->type);
1843       dst_reg mat_column = dst_reg(mat);
1844
1845       for (i = 0; i < ir->type->matrix_columns; i++) {
1846          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1847          values = &ir->value.f[i * ir->type->vector_elements];
1848
1849          src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1850          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1851                                                 (gl_constant_value *) values,
1852                                                 ir->type->vector_elements,
1853                                                 &src.swizzle);
1854          emit(ir, OPCODE_MOV, mat_column, src);
1855
1856          mat_column.index++;
1857       }
1858
1859       this->result = mat;
1860       return;
1861    }
1862
1863    src.file = PROGRAM_CONSTANT;
1864    switch (ir->type->base_type) {
1865    case GLSL_TYPE_FLOAT:
1866       values = &ir->value.f[0];
1867       break;
1868    case GLSL_TYPE_UINT:
1869       for (i = 0; i < ir->type->vector_elements; i++) {
1870          values[i] = ir->value.u[i];
1871       }
1872       break;
1873    case GLSL_TYPE_INT:
1874       for (i = 0; i < ir->type->vector_elements; i++) {
1875          values[i] = ir->value.i[i];
1876       }
1877       break;
1878    case GLSL_TYPE_BOOL:
1879       for (i = 0; i < ir->type->vector_elements; i++) {
1880          values[i] = ir->value.b[i];
1881       }
1882       break;
1883    default:
1884       assert(!"Non-float/uint/int/bool constant");
1885    }
1886
1887    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1888    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1889                                                    (gl_constant_value *) values,
1890                                                    ir->type->vector_elements,
1891                                                    &this->result.swizzle);
1892 }
1893
1894 void
1895 ir_to_mesa_visitor::visit(ir_call *)
1896 {
1897    assert(!"ir_to_mesa: All function calls should have been inlined by now.");
1898 }
1899
1900 void
1901 ir_to_mesa_visitor::visit(ir_texture *ir)
1902 {
1903    src_reg result_src, coord, lod_info, projector, dx, dy;
1904    dst_reg result_dst, coord_dst;
1905    ir_to_mesa_instruction *inst = NULL;
1906    prog_opcode opcode = OPCODE_NOP;
1907
1908    if (ir->op == ir_txs)
1909       this->result = src_reg_for_float(0.0);
1910    else
1911       ir->coordinate->accept(this);
1912
1913    /* Put our coords in a temp.  We'll need to modify them for shadow,
1914     * projection, or LOD, so the only case we'd use it as is is if
1915     * we're doing plain old texturing.  Mesa IR optimization should
1916     * handle cleaning up our mess in that case.
1917     */
1918    coord = get_temp(glsl_type::vec4_type);
1919    coord_dst = dst_reg(coord);
1920    emit(ir, OPCODE_MOV, coord_dst, this->result);
1921
1922    if (ir->projector) {
1923       ir->projector->accept(this);
1924       projector = this->result;
1925    }
1926
1927    /* Storage for our result.  Ideally for an assignment we'd be using
1928     * the actual storage for the result here, instead.
1929     */
1930    result_src = get_temp(glsl_type::vec4_type);
1931    result_dst = dst_reg(result_src);
1932
1933    switch (ir->op) {
1934    case ir_tex:
1935    case ir_txs:
1936       opcode = OPCODE_TEX;
1937       break;
1938    case ir_txb:
1939       opcode = OPCODE_TXB;
1940       ir->lod_info.bias->accept(this);
1941       lod_info = this->result;
1942       break;
1943    case ir_txf:
1944       /* Pretend to be TXL so the sampler, coordinate, lod are available */
1945    case ir_txl:
1946       opcode = OPCODE_TXL;
1947       ir->lod_info.lod->accept(this);
1948       lod_info = this->result;
1949       break;
1950    case ir_txd:
1951       opcode = OPCODE_TXD;
1952       ir->lod_info.grad.dPdx->accept(this);
1953       dx = this->result;
1954       ir->lod_info.grad.dPdy->accept(this);
1955       dy = this->result;
1956       break;
1957    case ir_txf_ms:
1958       assert(!"Unexpected ir_txf_ms opcode");
1959       break;
1960    case ir_lod:
1961       assert(!"Unexpected ir_lod opcode");
1962       break;
1963    case ir_tg4:
1964       assert(!"Unexpected ir_tg4 opcode");
1965       break;
1966    case ir_query_levels:
1967       assert(!"Unexpected ir_query_levels opcode");
1968       break;
1969    case ir_samples_identical:
1970       unreachable("Unexpected ir_samples_identical opcode");
1971    case ir_texture_samples:
1972       unreachable("Unexpected ir_texture_samples opcode");
1973    }
1974
1975    const glsl_type *sampler_type = ir->sampler->type;
1976
1977    if (ir->projector) {
1978       if (opcode == OPCODE_TEX) {
1979          /* Slot the projector in as the last component of the coord. */
1980          coord_dst.writemask = WRITEMASK_W;
1981          emit(ir, OPCODE_MOV, coord_dst, projector);
1982          coord_dst.writemask = WRITEMASK_XYZW;
1983          opcode = OPCODE_TXP;
1984       } else {
1985          src_reg coord_w = coord;
1986          coord_w.swizzle = SWIZZLE_WWWW;
1987
1988          /* For the other TEX opcodes there's no projective version
1989           * since the last slot is taken up by lod info.  Do the
1990           * projective divide now.
1991           */
1992          coord_dst.writemask = WRITEMASK_W;
1993          emit(ir, OPCODE_RCP, coord_dst, projector);
1994
1995          /* In the case where we have to project the coordinates "by hand,"
1996           * the shadow comparitor value must also be projected.
1997           */
1998          src_reg tmp_src = coord;
1999          if (ir->shadow_comparitor) {
2000             /* Slot the shadow value in as the second to last component of the
2001              * coord.
2002              */
2003             ir->shadow_comparitor->accept(this);
2004
2005             tmp_src = get_temp(glsl_type::vec4_type);
2006             dst_reg tmp_dst = dst_reg(tmp_src);
2007
2008             /* Projective division not allowed for array samplers. */
2009             assert(!sampler_type->sampler_array);
2010
2011             tmp_dst.writemask = WRITEMASK_Z;
2012             emit(ir, OPCODE_MOV, tmp_dst, this->result);
2013
2014             tmp_dst.writemask = WRITEMASK_XY;
2015             emit(ir, OPCODE_MOV, tmp_dst, coord);
2016          }
2017
2018          coord_dst.writemask = WRITEMASK_XYZ;
2019          emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2020
2021          coord_dst.writemask = WRITEMASK_XYZW;
2022          coord.swizzle = SWIZZLE_XYZW;
2023       }
2024    }
2025
2026    /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2027     * comparitor was put in the correct place (and projected) by the code,
2028     * above, that handles by-hand projection.
2029     */
2030    if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
2031       /* Slot the shadow value in as the second to last component of the
2032        * coord.
2033        */
2034       ir->shadow_comparitor->accept(this);
2035
2036       /* XXX This will need to be updated for cubemap array samplers. */
2037       if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2038           sampler_type->sampler_array) {
2039          coord_dst.writemask = WRITEMASK_W;
2040       } else {
2041          coord_dst.writemask = WRITEMASK_Z;
2042       }
2043
2044       emit(ir, OPCODE_MOV, coord_dst, this->result);
2045       coord_dst.writemask = WRITEMASK_XYZW;
2046    }
2047
2048    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2049       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2050       coord_dst.writemask = WRITEMASK_W;
2051       emit(ir, OPCODE_MOV, coord_dst, lod_info);
2052       coord_dst.writemask = WRITEMASK_XYZW;
2053    }
2054
2055    if (opcode == OPCODE_TXD)
2056       inst = emit(ir, opcode, result_dst, coord, dx, dy);
2057    else
2058       inst = emit(ir, opcode, result_dst, coord);
2059
2060    if (ir->shadow_comparitor)
2061       inst->tex_shadow = GL_TRUE;
2062
2063    inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program,
2064                                              prog);
2065
2066    switch (sampler_type->sampler_dimensionality) {
2067    case GLSL_SAMPLER_DIM_1D:
2068       inst->tex_target = (sampler_type->sampler_array)
2069          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2070       break;
2071    case GLSL_SAMPLER_DIM_2D:
2072       inst->tex_target = (sampler_type->sampler_array)
2073          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2074       break;
2075    case GLSL_SAMPLER_DIM_3D:
2076       inst->tex_target = TEXTURE_3D_INDEX;
2077       break;
2078    case GLSL_SAMPLER_DIM_CUBE:
2079       inst->tex_target = TEXTURE_CUBE_INDEX;
2080       break;
2081    case GLSL_SAMPLER_DIM_RECT:
2082       inst->tex_target = TEXTURE_RECT_INDEX;
2083       break;
2084    case GLSL_SAMPLER_DIM_BUF:
2085       assert(!"FINISHME: Implement ARB_texture_buffer_object");
2086       break;
2087    case GLSL_SAMPLER_DIM_EXTERNAL:
2088       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2089       break;
2090    default:
2091       assert(!"Should not get here.");
2092    }
2093
2094    this->result = result_src;
2095 }
2096
2097 void
2098 ir_to_mesa_visitor::visit(ir_return *ir)
2099 {
2100    /* Non-void functions should have been inlined.  We may still emit RETs
2101     * from main() unless the EmitNoMainReturn option is set.
2102     */
2103    assert(!ir->get_value());
2104    emit(ir, OPCODE_RET);
2105 }
2106
2107 void
2108 ir_to_mesa_visitor::visit(ir_discard *ir)
2109 {
2110    if (!ir->condition)
2111       ir->condition = new(mem_ctx) ir_constant(true);
2112
2113    ir->condition->accept(this);
2114    this->result.negate = ~this->result.negate;
2115    emit(ir, OPCODE_KIL, undef_dst, this->result);
2116 }
2117
2118 void
2119 ir_to_mesa_visitor::visit(ir_if *ir)
2120 {
2121    ir_to_mesa_instruction *if_inst;
2122
2123    ir->condition->accept(this);
2124    assert(this->result.file != PROGRAM_UNDEFINED);
2125
2126    if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2127
2128    this->instructions.push_tail(if_inst);
2129
2130    visit_exec_list(&ir->then_instructions, this);
2131
2132    if (!ir->else_instructions.is_empty()) {
2133       emit(ir->condition, OPCODE_ELSE);
2134       visit_exec_list(&ir->else_instructions, this);
2135    }
2136
2137    emit(ir->condition, OPCODE_ENDIF);
2138 }
2139
2140 void
2141 ir_to_mesa_visitor::visit(ir_emit_vertex *)
2142 {
2143    assert(!"Geometry shaders not supported.");
2144 }
2145
2146 void
2147 ir_to_mesa_visitor::visit(ir_end_primitive *)
2148 {
2149    assert(!"Geometry shaders not supported.");
2150 }
2151
2152 void
2153 ir_to_mesa_visitor::visit(ir_barrier *)
2154 {
2155    unreachable("GLSL barrier() not supported.");
2156 }
2157
2158 ir_to_mesa_visitor::ir_to_mesa_visitor()
2159 {
2160    result.file = PROGRAM_UNDEFINED;
2161    next_temp = 1;
2162    next_signature_id = 1;
2163    current_function = NULL;
2164    mem_ctx = ralloc_context(NULL);
2165 }
2166
2167 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2168 {
2169    ralloc_free(mem_ctx);
2170 }
2171
2172 static struct prog_src_register
2173 mesa_src_reg_from_ir_src_reg(src_reg reg)
2174 {
2175    struct prog_src_register mesa_reg;
2176
2177    mesa_reg.File = reg.file;
2178    assert(reg.index < (1 << INST_INDEX_BITS));
2179    mesa_reg.Index = reg.index;
2180    mesa_reg.Swizzle = reg.swizzle;
2181    mesa_reg.RelAddr = reg.reladdr != NULL;
2182    mesa_reg.Negate = reg.negate;
2183
2184    return mesa_reg;
2185 }
2186
2187 static void
2188 set_branchtargets(ir_to_mesa_visitor *v,
2189                   struct prog_instruction *mesa_instructions,
2190                   int num_instructions)
2191 {
2192    int if_count = 0, loop_count = 0;
2193    int *if_stack, *loop_stack;
2194    int if_stack_pos = 0, loop_stack_pos = 0;
2195    int i, j;
2196
2197    for (i = 0; i < num_instructions; i++) {
2198       switch (mesa_instructions[i].Opcode) {
2199       case OPCODE_IF:
2200          if_count++;
2201          break;
2202       case OPCODE_BGNLOOP:
2203          loop_count++;
2204          break;
2205       case OPCODE_BRK:
2206       case OPCODE_CONT:
2207          mesa_instructions[i].BranchTarget = -1;
2208          break;
2209       default:
2210          break;
2211       }
2212    }
2213
2214    if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2215    loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2216
2217    for (i = 0; i < num_instructions; i++) {
2218       switch (mesa_instructions[i].Opcode) {
2219       case OPCODE_IF:
2220          if_stack[if_stack_pos] = i;
2221          if_stack_pos++;
2222          break;
2223       case OPCODE_ELSE:
2224          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2225          if_stack[if_stack_pos - 1] = i;
2226          break;
2227       case OPCODE_ENDIF:
2228          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2229          if_stack_pos--;
2230          break;
2231       case OPCODE_BGNLOOP:
2232          loop_stack[loop_stack_pos] = i;
2233          loop_stack_pos++;
2234          break;
2235       case OPCODE_ENDLOOP:
2236          loop_stack_pos--;
2237          /* Rewrite any breaks/conts at this nesting level (haven't
2238           * already had a BranchTarget assigned) to point to the end
2239           * of the loop.
2240           */
2241          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2242             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2243                 mesa_instructions[j].Opcode == OPCODE_CONT) {
2244                if (mesa_instructions[j].BranchTarget == -1) {
2245                   mesa_instructions[j].BranchTarget = i;
2246                }
2247             }
2248          }
2249          /* The loop ends point at each other. */
2250          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2251          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2252          break;
2253       case OPCODE_CAL:
2254          foreach_in_list(function_entry, entry, &v->function_signatures) {
2255             if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2256                mesa_instructions[i].BranchTarget = entry->inst;
2257                break;
2258             }
2259          }
2260          break;
2261       default:
2262          break;
2263       }
2264    }
2265 }
2266
2267 static void
2268 print_program(struct prog_instruction *mesa_instructions,
2269               ir_instruction **mesa_instruction_annotation,
2270               int num_instructions)
2271 {
2272    ir_instruction *last_ir = NULL;
2273    int i;
2274    int indent = 0;
2275
2276    for (i = 0; i < num_instructions; i++) {
2277       struct prog_instruction *mesa_inst = mesa_instructions + i;
2278       ir_instruction *ir = mesa_instruction_annotation[i];
2279
2280       fprintf(stdout, "%3d: ", i);
2281
2282       if (last_ir != ir && ir) {
2283          int j;
2284
2285          for (j = 0; j < indent; j++) {
2286             fprintf(stdout, " ");
2287          }
2288          ir->print();
2289          printf("\n");
2290          last_ir = ir;
2291
2292          fprintf(stdout, "     "); /* line number spacing. */
2293       }
2294
2295       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2296                                             PROG_PRINT_DEBUG, NULL);
2297    }
2298 }
2299
2300 namespace {
2301
2302 class add_uniform_to_shader : public program_resource_visitor {
2303 public:
2304    add_uniform_to_shader(struct gl_shader_program *shader_program,
2305                          struct gl_program_parameter_list *params,
2306                          gl_shader_stage shader_type)
2307       : shader_program(shader_program), params(params), idx(-1),
2308         shader_type(shader_type)
2309    {
2310       /* empty */
2311    }
2312
2313    void process(ir_variable *var)
2314    {
2315       this->idx = -1;
2316       this->program_resource_visitor::process(var);
2317       var->data.param_index = this->idx;
2318    }
2319
2320 private:
2321    virtual void visit_field(const glsl_type *type, const char *name,
2322                             bool row_major);
2323
2324    struct gl_shader_program *shader_program;
2325    struct gl_program_parameter_list *params;
2326    int idx;
2327    gl_shader_stage shader_type;
2328 };
2329
2330 } /* anonymous namespace */
2331
2332 void
2333 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
2334                                    bool row_major)
2335 {
2336    unsigned int size;
2337
2338    (void) row_major;
2339
2340    /* atomics don't get real storage */
2341    if (type->contains_atomic())
2342       return;
2343
2344    if (type->is_vector() || type->is_scalar()) {
2345       size = type->vector_elements;
2346       if (type->is_double())
2347          size *= 2;
2348    } else {
2349       size = type_size(type) * 4;
2350    }
2351
2352    gl_register_file file;
2353    if (type->without_array()->is_sampler()) {
2354       file = PROGRAM_SAMPLER;
2355    } else {
2356       file = PROGRAM_UNIFORM;
2357    }
2358
2359    int index = _mesa_lookup_parameter_index(params, name);
2360    if (index < 0) {
2361       index = _mesa_add_parameter(params, file, name, size, type->gl_type,
2362                                   NULL, NULL);
2363
2364       /* Sampler uniform values are stored in prog->SamplerUnits,
2365        * and the entry in that array is selected by this index we
2366        * store in ParameterValues[].
2367        */
2368       if (file == PROGRAM_SAMPLER) {
2369          unsigned location;
2370          const bool found =
2371             this->shader_program->UniformHash->get(location,
2372                                                    params->Parameters[index].Name);
2373          assert(found);
2374
2375          if (!found)
2376             return;
2377
2378          struct gl_uniform_storage *storage =
2379             &this->shader_program->UniformStorage[location];
2380
2381          assert(storage->type->is_sampler() &&
2382                 storage->opaque[shader_type].active);
2383
2384          for (unsigned int j = 0; j < size / 4; j++)
2385             params->ParameterValues[index + j][0].f =
2386                storage->opaque[shader_type].index + j;
2387       }
2388    }
2389
2390    /* The first part of the uniform that's processed determines the base
2391     * location of the whole uniform (for structures).
2392     */
2393    if (this->idx < 0)
2394       this->idx = index;
2395 }
2396
2397 /**
2398  * Generate the program parameters list for the user uniforms in a shader
2399  *
2400  * \param shader_program Linked shader program.  This is only used to
2401  *                       emit possible link errors to the info log.
2402  * \param sh             Shader whose uniforms are to be processed.
2403  * \param params         Parameter list to be filled in.
2404  */
2405 void
2406 _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
2407                                             *shader_program,
2408                                             struct gl_shader *sh,
2409                                             struct gl_program_parameter_list
2410                                             *params)
2411 {
2412    add_uniform_to_shader add(shader_program, params, sh->Stage);
2413
2414    foreach_in_list(ir_instruction, node, sh->ir) {
2415       ir_variable *var = node->as_variable();
2416
2417       if ((var == NULL) || (var->data.mode != ir_var_uniform)
2418           || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0))
2419          continue;
2420
2421       add.process(var);
2422    }
2423 }
2424
2425 void
2426 _mesa_associate_uniform_storage(struct gl_context *ctx,
2427                                 struct gl_shader_program *shader_program,
2428                                 struct gl_program_parameter_list *params)
2429 {
2430    /* After adding each uniform to the parameter list, connect the storage for
2431     * the parameter with the tracking structure used by the API for the
2432     * uniform.
2433     */
2434    unsigned last_location = unsigned(~0);
2435    for (unsigned i = 0; i < params->NumParameters; i++) {
2436       if (params->Parameters[i].Type != PROGRAM_UNIFORM)
2437          continue;
2438
2439       unsigned location;
2440       const bool found =
2441          shader_program->UniformHash->get(location, params->Parameters[i].Name);
2442       assert(found);
2443
2444       if (!found)
2445          continue;
2446
2447       struct gl_uniform_storage *storage =
2448          &shader_program->UniformStorage[location];
2449
2450       /* Do not associate any uniform storage to built-in uniforms */
2451       if (storage->builtin)
2452          continue;
2453
2454       if (location != last_location) {
2455          enum gl_uniform_driver_format format = uniform_native;
2456
2457          unsigned columns = 0;
2458          int dmul = 4 * sizeof(float);
2459          switch (storage->type->base_type) {
2460          case GLSL_TYPE_UINT:
2461             assert(ctx->Const.NativeIntegers);
2462             format = uniform_native;
2463             columns = 1;
2464             break;
2465          case GLSL_TYPE_INT:
2466             format =
2467                (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
2468             columns = 1;
2469             break;
2470
2471          case GLSL_TYPE_DOUBLE:
2472             if (storage->type->vector_elements > 2)
2473                dmul *= 2;
2474             /* fallthrough */
2475          case GLSL_TYPE_FLOAT:
2476             format = uniform_native;
2477             columns = storage->type->matrix_columns;
2478             break;
2479          case GLSL_TYPE_BOOL:
2480             format = uniform_native;
2481             columns = 1;
2482             break;
2483          case GLSL_TYPE_SAMPLER:
2484          case GLSL_TYPE_IMAGE:
2485          case GLSL_TYPE_SUBROUTINE:
2486             format = uniform_native;
2487             columns = 1;
2488             break;
2489          case GLSL_TYPE_ATOMIC_UINT:
2490          case GLSL_TYPE_ARRAY:
2491          case GLSL_TYPE_VOID:
2492          case GLSL_TYPE_STRUCT:
2493          case GLSL_TYPE_ERROR:
2494          case GLSL_TYPE_INTERFACE:
2495          case GLSL_TYPE_FUNCTION:
2496             assert(!"Should not get here.");
2497             break;
2498          }
2499
2500          _mesa_uniform_attach_driver_storage(storage,
2501                                              dmul * columns,
2502                                              dmul,
2503                                              format,
2504                                              &params->ParameterValues[i]);
2505
2506          /* After attaching the driver's storage to the uniform, propagate any
2507           * data from the linker's backing store.  This will cause values from
2508           * initializers in the source code to be copied over.
2509           */
2510          _mesa_propagate_uniforms_to_driver_storage(storage,
2511                                                     0,
2512                                                     MAX2(1, storage->array_elements));
2513
2514          last_location = location;
2515       }
2516    }
2517 }
2518
2519 /*
2520  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2521  * channels for copy propagation and updates following instructions to
2522  * use the original versions.
2523  *
2524  * The ir_to_mesa_visitor lazily produces code assuming that this pass
2525  * will occur.  As an example, a TXP production before this pass:
2526  *
2527  * 0: MOV TEMP[1], INPUT[4].xyyy;
2528  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2529  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2530  *
2531  * and after:
2532  *
2533  * 0: MOV TEMP[1], INPUT[4].xyyy;
2534  * 1: MOV TEMP[1].w, INPUT[4].wwww;
2535  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2536  *
2537  * which allows for dead code elimination on TEMP[1]'s writes.
2538  */
2539 void
2540 ir_to_mesa_visitor::copy_propagate(void)
2541 {
2542    ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2543                                                     ir_to_mesa_instruction *,
2544                                                     this->next_temp * 4);
2545    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2546    int level = 0;
2547
2548    foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) {
2549       assert(inst->dst.file != PROGRAM_TEMPORARY
2550              || inst->dst.index < this->next_temp);
2551
2552       /* First, do any copy propagation possible into the src regs. */
2553       for (int r = 0; r < 3; r++) {
2554          ir_to_mesa_instruction *first = NULL;
2555          bool good = true;
2556          int acp_base = inst->src[r].index * 4;
2557
2558          if (inst->src[r].file != PROGRAM_TEMPORARY ||
2559              inst->src[r].reladdr)
2560             continue;
2561
2562          /* See if we can find entries in the ACP consisting of MOVs
2563           * from the same src register for all the swizzled channels
2564           * of this src register reference.
2565           */
2566          for (int i = 0; i < 4; i++) {
2567             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2568             ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2569
2570             if (!copy_chan) {
2571                good = false;
2572                break;
2573             }
2574
2575             assert(acp_level[acp_base + src_chan] <= level);
2576
2577             if (!first) {
2578                first = copy_chan;
2579             } else {
2580                if (first->src[0].file != copy_chan->src[0].file ||
2581                    first->src[0].index != copy_chan->src[0].index) {
2582                   good = false;
2583                   break;
2584                }
2585             }
2586          }
2587
2588          if (good) {
2589             /* We've now validated that we can copy-propagate to
2590              * replace this src register reference.  Do it.
2591              */
2592             inst->src[r].file = first->src[0].file;
2593             inst->src[r].index = first->src[0].index;
2594
2595             int swizzle = 0;
2596             for (int i = 0; i < 4; i++) {
2597                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2598                ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2599                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2600                            (3 * i));
2601             }
2602             inst->src[r].swizzle = swizzle;
2603          }
2604       }
2605
2606       switch (inst->op) {
2607       case OPCODE_BGNLOOP:
2608       case OPCODE_ENDLOOP:
2609          /* End of a basic block, clear the ACP entirely. */
2610          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2611          break;
2612
2613       case OPCODE_IF:
2614          ++level;
2615          break;
2616
2617       case OPCODE_ENDIF:
2618       case OPCODE_ELSE:
2619          /* Clear all channels written inside the block from the ACP, but
2620           * leaving those that were not touched.
2621           */
2622          for (int r = 0; r < this->next_temp; r++) {
2623             for (int c = 0; c < 4; c++) {
2624                if (!acp[4 * r + c])
2625                   continue;
2626
2627                if (acp_level[4 * r + c] >= level)
2628                   acp[4 * r + c] = NULL;
2629             }
2630          }
2631          if (inst->op == OPCODE_ENDIF)
2632             --level;
2633          break;
2634
2635       default:
2636          /* Continuing the block, clear any written channels from
2637           * the ACP.
2638           */
2639          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2640             /* Any temporary might be written, so no copy propagation
2641              * across this instruction.
2642              */
2643             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2644          } else if (inst->dst.file == PROGRAM_OUTPUT &&
2645                     inst->dst.reladdr) {
2646             /* Any output might be written, so no copy propagation
2647              * from outputs across this instruction.
2648              */
2649             for (int r = 0; r < this->next_temp; r++) {
2650                for (int c = 0; c < 4; c++) {
2651                   if (!acp[4 * r + c])
2652                      continue;
2653
2654                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2655                      acp[4 * r + c] = NULL;
2656                }
2657             }
2658          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2659                     inst->dst.file == PROGRAM_OUTPUT) {
2660             /* Clear where it's used as dst. */
2661             if (inst->dst.file == PROGRAM_TEMPORARY) {
2662                for (int c = 0; c < 4; c++) {
2663                   if (inst->dst.writemask & (1 << c)) {
2664                      acp[4 * inst->dst.index + c] = NULL;
2665                   }
2666                }
2667             }
2668
2669             /* Clear where it's used as src. */
2670             for (int r = 0; r < this->next_temp; r++) {
2671                for (int c = 0; c < 4; c++) {
2672                   if (!acp[4 * r + c])
2673                      continue;
2674
2675                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2676
2677                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2678                       acp[4 * r + c]->src[0].index == inst->dst.index &&
2679                       inst->dst.writemask & (1 << src_chan))
2680                   {
2681                      acp[4 * r + c] = NULL;
2682                   }
2683                }
2684             }
2685          }
2686          break;
2687       }
2688
2689       /* If this is a copy, add it to the ACP. */
2690       if (inst->op == OPCODE_MOV &&
2691           inst->dst.file == PROGRAM_TEMPORARY &&
2692           !(inst->dst.file == inst->src[0].file &&
2693             inst->dst.index == inst->src[0].index) &&
2694           !inst->dst.reladdr &&
2695           !inst->saturate &&
2696           !inst->src[0].reladdr &&
2697           !inst->src[0].negate) {
2698          for (int i = 0; i < 4; i++) {
2699             if (inst->dst.writemask & (1 << i)) {
2700                acp[4 * inst->dst.index + i] = inst;
2701                acp_level[4 * inst->dst.index + i] = level;
2702             }
2703          }
2704       }
2705    }
2706
2707    ralloc_free(acp_level);
2708    ralloc_free(acp);
2709 }
2710
2711
2712 /**
2713  * Convert a shader's GLSL IR into a Mesa gl_program.
2714  */
2715 static struct gl_program *
2716 get_mesa_program(struct gl_context *ctx,
2717                  struct gl_shader_program *shader_program,
2718                  struct gl_shader *shader)
2719 {
2720    ir_to_mesa_visitor v;
2721    struct prog_instruction *mesa_instructions, *mesa_inst;
2722    ir_instruction **mesa_instruction_annotation;
2723    int i;
2724    struct gl_program *prog;
2725    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
2726    const char *target_string = _mesa_shader_stage_to_string(shader->Stage);
2727    struct gl_shader_compiler_options *options =
2728          &ctx->Const.ShaderCompilerOptions[shader->Stage];
2729
2730    validate_ir_tree(shader->ir);
2731
2732    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2733    if (!prog)
2734       return NULL;
2735    prog->Parameters = _mesa_new_parameter_list();
2736    v.ctx = ctx;
2737    v.prog = prog;
2738    v.shader_program = shader_program;
2739    v.options = options;
2740
2741    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
2742                                                prog->Parameters);
2743
2744    /* Emit Mesa IR for main(). */
2745    visit_exec_list(shader->ir, &v);
2746    v.emit(NULL, OPCODE_END);
2747
2748    prog->NumTemporaries = v.next_temp;
2749
2750    unsigned num_instructions = v.instructions.length();
2751
2752    mesa_instructions =
2753       (struct prog_instruction *)calloc(num_instructions,
2754                                         sizeof(*mesa_instructions));
2755    mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
2756                                               num_instructions);
2757
2758    v.copy_propagate();
2759
2760    /* Convert ir_mesa_instructions into prog_instructions.
2761     */
2762    mesa_inst = mesa_instructions;
2763    i = 0;
2764    foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) {
2765       mesa_inst->Opcode = inst->op;
2766       if (inst->saturate)
2767          mesa_inst->Saturate = GL_TRUE;
2768       mesa_inst->DstReg.File = inst->dst.file;
2769       mesa_inst->DstReg.Index = inst->dst.index;
2770       mesa_inst->DstReg.WriteMask = inst->dst.writemask;
2771       mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
2772       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
2773       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
2774       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
2775       mesa_inst->TexSrcUnit = inst->sampler;
2776       mesa_inst->TexSrcTarget = inst->tex_target;
2777       mesa_inst->TexShadow = inst->tex_shadow;
2778       mesa_instruction_annotation[i] = inst->ir;
2779
2780       /* Set IndirectRegisterFiles. */
2781       if (mesa_inst->DstReg.RelAddr)
2782          prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
2783
2784       /* Update program's bitmask of indirectly accessed register files */
2785       for (unsigned src = 0; src < 3; src++)
2786          if (mesa_inst->SrcReg[src].RelAddr)
2787             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
2788
2789       switch (mesa_inst->Opcode) {
2790       case OPCODE_IF:
2791          if (options->MaxIfDepth == 0) {
2792             linker_warning(shader_program,
2793                            "Couldn't flatten if-statement.  "
2794                            "This will likely result in software "
2795                            "rasterization.\n");
2796          }
2797          break;
2798       case OPCODE_BGNLOOP:
2799          if (options->EmitNoLoops) {
2800             linker_warning(shader_program,
2801                            "Couldn't unroll loop.  "
2802                            "This will likely result in software "
2803                            "rasterization.\n");
2804          }
2805          break;
2806       case OPCODE_CONT:
2807          if (options->EmitNoCont) {
2808             linker_warning(shader_program,
2809                            "Couldn't lower continue-statement.  "
2810                            "This will likely result in software "
2811                            "rasterization.\n");
2812          }
2813          break;
2814       case OPCODE_ARL:
2815          prog->NumAddressRegs = 1;
2816          break;
2817       default:
2818          break;
2819       }
2820
2821       mesa_inst++;
2822       i++;
2823
2824       if (!shader_program->LinkStatus)
2825          break;
2826    }
2827
2828    if (!shader_program->LinkStatus) {
2829       goto fail_exit;
2830    }
2831
2832    set_branchtargets(&v, mesa_instructions, num_instructions);
2833
2834    if (ctx->_Shader->Flags & GLSL_DUMP) {
2835       fprintf(stderr, "\n");
2836       fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string,
2837               shader_program->Name);
2838       _mesa_print_ir(stderr, shader->ir, NULL);
2839       fprintf(stderr, "\n");
2840       fprintf(stderr, "\n");
2841       fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string,
2842               shader_program->Name);
2843       print_program(mesa_instructions, mesa_instruction_annotation,
2844                     num_instructions);
2845       fflush(stderr);
2846    }
2847
2848    prog->Instructions = mesa_instructions;
2849    prog->NumInstructions = num_instructions;
2850
2851    /* Setting this to NULL prevents a possible double free in the fail_exit
2852     * path (far below).
2853     */
2854    mesa_instructions = NULL;
2855
2856    do_set_program_inouts(shader->ir, prog, shader->Stage);
2857
2858    prog->SamplersUsed = shader->active_samplers;
2859    prog->ShadowSamplers = shader->shadow_samplers;
2860    _mesa_update_shader_textures_used(shader_program, prog);
2861
2862    /* Set the gl_FragDepth layout. */
2863    if (target == GL_FRAGMENT_PROGRAM_ARB) {
2864       struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
2865       fp->FragDepthLayout = shader_program->FragDepthLayout;
2866    }
2867
2868    _mesa_reference_program(ctx, &shader->Program, prog);
2869
2870    if ((ctx->_Shader->Flags & GLSL_NO_OPT) == 0) {
2871       _mesa_optimize_program(ctx, prog);
2872    }
2873
2874    /* This has to be done last.  Any operation that can cause
2875     * prog->ParameterValues to get reallocated (e.g., anything that adds a
2876     * program constant) has to happen before creating this linkage.
2877     */
2878    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
2879    if (!shader_program->LinkStatus) {
2880       goto fail_exit;
2881    }
2882
2883    return prog;
2884
2885 fail_exit:
2886    free(mesa_instructions);
2887    _mesa_reference_program(ctx, &shader->Program, NULL);
2888    return NULL;
2889 }
2890
2891 extern "C" {
2892
2893 /**
2894  * Link a shader.
2895  * Called via ctx->Driver.LinkShader()
2896  * This actually involves converting GLSL IR into Mesa gl_programs with
2897  * code lowering and other optimizations.
2898  */
2899 GLboolean
2900 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
2901 {
2902    assert(prog->LinkStatus);
2903
2904    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2905       if (prog->_LinkedShaders[i] == NULL)
2906          continue;
2907
2908       bool progress;
2909       exec_list *ir = prog->_LinkedShaders[i]->ir;
2910       const struct gl_shader_compiler_options *options =
2911             &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage];
2912
2913       do {
2914          progress = false;
2915
2916          /* Lowering */
2917          do_mat_op_to_vec(ir);
2918          lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
2919                                  | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
2920                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
2921
2922          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
2923
2924          progress = do_common_optimization(ir, true, true,
2925                                            options, ctx->Const.NativeIntegers)
2926            || progress;
2927
2928          progress = lower_quadop_vector(ir, true) || progress;
2929
2930          if (options->MaxIfDepth == 0)
2931             progress = lower_discard(ir) || progress;
2932
2933          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
2934
2935          if (options->EmitNoNoise)
2936             progress = lower_noise(ir) || progress;
2937
2938          /* If there are forms of indirect addressing that the driver
2939           * cannot handle, perform the lowering pass.
2940           */
2941          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
2942              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
2943            progress =
2944              lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
2945                                                  options->EmitNoIndirectInput,
2946                                                  options->EmitNoIndirectOutput,
2947                                                  options->EmitNoIndirectTemp,
2948                                                  options->EmitNoIndirectUniform)
2949              || progress;
2950
2951          progress = do_vec_index_to_cond_assign(ir) || progress;
2952          progress = lower_vector_insert(ir, true) || progress;
2953       } while (progress);
2954
2955       validate_ir_tree(ir);
2956    }
2957
2958    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2959       struct gl_program *linked_prog;
2960
2961       if (prog->_LinkedShaders[i] == NULL)
2962          continue;
2963
2964       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
2965
2966       if (linked_prog) {
2967          _mesa_copy_linked_program_data((gl_shader_stage) i, prog, linked_prog);
2968
2969          if (!ctx->Driver.ProgramStringNotify(ctx,
2970                                               _mesa_shader_stage_to_program(i),
2971                                               linked_prog)) {
2972             return GL_FALSE;
2973          }
2974       }
2975
2976       _mesa_reference_program(ctx, &linked_prog, NULL);
2977    }
2978
2979    build_program_resource_list(prog);
2980    return prog->LinkStatus;
2981 }
2982
2983 /**
2984  * Link a GLSL shader program.  Called via glLinkProgram().
2985  */
2986 void
2987 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
2988 {
2989    unsigned int i;
2990
2991    _mesa_clear_shader_program_data(prog);
2992
2993    prog->LinkStatus = GL_TRUE;
2994
2995    for (i = 0; i < prog->NumShaders; i++) {
2996       if (!prog->Shaders[i]->CompileStatus) {
2997          linker_error(prog, "linking with uncompiled shader");
2998       }
2999    }
3000
3001    if (prog->LinkStatus) {
3002       link_shaders(ctx, prog);
3003    }
3004
3005    if (prog->LinkStatus) {
3006       if (!ctx->Driver.LinkShader(ctx, prog)) {
3007          prog->LinkStatus = GL_FALSE;
3008       }
3009    }
3010
3011    if (ctx->_Shader->Flags & GLSL_DUMP) {
3012       if (!prog->LinkStatus) {
3013          fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name);
3014       }
3015
3016       if (prog->InfoLog && prog->InfoLog[0] != 0) {
3017          fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name);
3018          fprintf(stderr, "%s\n", prog->InfoLog);
3019       }
3020    }
3021 }
3022
3023 } /* extern "C" */