src/mesa/program/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translates the IR to ARB_fragment_program text if possible,
  30  * printing the result
  31  */
  32
  33 #include <stdio.h>
  34 #include "main/compiler.h"
  35 #include "ir.h"
  36 #include "ir_visitor.h"
  37 #include "ir_print_visitor.h"
  38 #include "ir_expression_flattening.h"
  39 #include "glsl_types.h"
  40 #include "glsl_parser_extras.h"
  41 #include "../glsl/program.h"
  42 #include "ir_optimization.h"
  43 #include "ast.h"
  44
  45 extern "C" {
  46 #include "main/mtypes.h"
  47 #include "main/shaderapi.h"
  48 #include "main/shaderobj.h"
  49 #include "main/uniforms.h"
  50 #include "program/hash_table.h"
  51 #include "program/prog_instruction.h"
  52 #include "program/prog_optimize.h"
  53 #include "program/prog_print.h"
  54 #include "program/program.h"
  55 #include "program/prog_uniform.h"
  56 #include "program/prog_parameter.h"
  57 }
  58
  59 static int swizzle_for_size(int size);
  60
  61 /**
  62  * This struct is a corresponding struct to Mesa prog_src_register, with
  63  * wider fields.
  64  */
  65 typedef struct ir_to_mesa_src_reg {
  66    ir_to_mesa_src_reg(int file, int index, const glsl_type *type)
  67    {
  68       this->file = file;
  69       this->index = index;
  70       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  71          this->swizzle = swizzle_for_size(type->vector_elements);
  72       else
  73          this->swizzle = SWIZZLE_XYZW;
  74       this->negate = 0;
  75       this->reladdr = NULL;
  76    }
  77
  78    ir_to_mesa_src_reg()
  79    {
  80       this->file = PROGRAM_UNDEFINED;
  81    }
  82
  83    int file; /**< PROGRAM_* from Mesa */
  84    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  85    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  86    int negate; /**< NEGATE_XYZW mask from mesa */
  87    /** Register index should be offset by the integer in this reg. */
  88    ir_to_mesa_src_reg *reladdr;
  89 } ir_to_mesa_src_reg;
  90
  91 typedef struct ir_to_mesa_dst_reg {
  92    int file; /**< PROGRAM_* from Mesa */
  93    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  94    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  95    GLuint cond_mask:4;
  96    /** Register index should be offset by the integer in this reg. */
  97    ir_to_mesa_src_reg *reladdr;
  98 } ir_to_mesa_dst_reg;
  99
 100 extern ir_to_mesa_src_reg ir_to_mesa_undef;
 101
 102 class ir_to_mesa_instruction : public exec_node {
 103 public:
 104    enum prog_opcode op;
 105    ir_to_mesa_dst_reg dst_reg;
 106    ir_to_mesa_src_reg src_reg[3];
 107    /** Pointer to the ir source this tree came from for debugging */
 108    ir_instruction *ir;
 109    GLboolean cond_update;
 110    int sampler; /**< sampler index */
 111    int tex_target; /**< One of TEXTURE_*_INDEX */
 112    GLboolean tex_shadow;
 113
 114    class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
 115 };
 116
 117 class variable_storage : public exec_node {
 118 public:
 119    variable_storage(ir_variable *var, int file, int index)
 120       : file(file), index(index), var(var)
 121    {
 122       /* empty */
 123    }
 124
 125    int file;
 126    int index;
 127    ir_variable *var; /* variable that maps to this, if any */
 128 };
 129
 130 class function_entry : public exec_node {
 131 public:
 132    ir_function_signature *sig;
 133
 134    /**
 135     * identifier of this function signature used by the program.
 136     *
 137     * At the point that Mesa instructions for function calls are
 138     * generated, we don't know the address of the first instruction of
 139     * the function body.  So we make the BranchTarget that is called a
 140     * small integer and rewrite them during set_branchtargets().
 141     */
 142    int sig_id;
 143
 144    /**
 145     * Pointer to first instruction of the function body.
 146     *
 147     * Set during function body emits after main() is processed.
 148     */
 149    ir_to_mesa_instruction *bgn_inst;
 150
 151    /**
 152     * Index of the first instruction of the function body in actual
 153     * Mesa IR.
 154     *
 155     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
 156     */
 157    int inst;
 158
 159    /** Storage for the return value. */
 160    ir_to_mesa_src_reg return_reg;
 161 };
 162
 163 class ir_to_mesa_visitor : public ir_visitor {
 164 public:
 165    ir_to_mesa_visitor();
 166    ~ir_to_mesa_visitor();
 167
 168    function_entry *current_function;
 169
 170    GLcontext *ctx;
 171    struct gl_program *prog;
 172
 173    int next_temp;
 174
 175    variable_storage *find_variable_storage(ir_variable *var);
 176
 177    function_entry *get_function_signature(ir_function_signature *sig);
 178
 179    ir_to_mesa_src_reg get_temp(const glsl_type *type);
 180    void reladdr_to_temp(ir_instruction *ir,
 181                         ir_to_mesa_src_reg *reg, int *num_reladdr);
 182
 183    struct ir_to_mesa_src_reg src_reg_for_float(float val);
 184
 185    /**
 186     * \name Visit methods
 187     *
 188     * As typical for the visitor pattern, there must be one \c visit method for
 189     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 190     * the hierarchy should not have \c visit methods.
 191     */
 192    /*@{*/
 193    virtual void visit(ir_variable *);
 194    virtual void visit(ir_loop *);
 195    virtual void visit(ir_loop_jump *);
 196    virtual void visit(ir_function_signature *);
 197    virtual void visit(ir_function *);
 198    virtual void visit(ir_expression *);
 199    virtual void visit(ir_swizzle *);
 200    virtual void visit(ir_dereference_variable  *);
 201    virtual void visit(ir_dereference_array *);
 202    virtual void visit(ir_dereference_record *);
 203    virtual void visit(ir_assignment *);
 204    virtual void visit(ir_constant *);
 205    virtual void visit(ir_call *);
 206    virtual void visit(ir_return *);
 207    virtual void visit(ir_discard *);
 208    virtual void visit(ir_texture *);
 209    virtual void visit(ir_if *);
 210    /*@}*/
 211
 212    struct ir_to_mesa_src_reg result;
 213
 214    /** List of variable_storage */
 215    exec_list variables;
 216
 217    /** List of function_entry */
 218    exec_list function_signatures;
 219    int next_signature_id;
 220
 221    /** List of ir_to_mesa_instruction */
 222    exec_list instructions;
 223
 224    ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
 225                                                enum prog_opcode op);
 226
 227    ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
 228                                                enum prog_opcode op,
 229                                                ir_to_mesa_dst_reg dst,
 230                                                ir_to_mesa_src_reg src0);
 231
 232    ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
 233                                                enum prog_opcode op,
 234                                                ir_to_mesa_dst_reg dst,
 235                                                ir_to_mesa_src_reg src0,
 236                                                ir_to_mesa_src_reg src1);
 237
 238    ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
 239                                                enum prog_opcode op,
 240                                                ir_to_mesa_dst_reg dst,
 241                                                ir_to_mesa_src_reg src0,
 242                                                ir_to_mesa_src_reg src1,
 243                                                ir_to_mesa_src_reg src2);
 244
 245    void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 246                                    enum prog_opcode op,
 247                                    ir_to_mesa_dst_reg dst,
 248                                    ir_to_mesa_src_reg src0);
 249
 250    void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
 251                                    enum prog_opcode op,
 252                                    ir_to_mesa_dst_reg dst,
 253                                    ir_to_mesa_src_reg src0,
 254                                    ir_to_mesa_src_reg src1);
 255
 256    GLboolean try_emit_mad(ir_expression *ir,
 257                           int mul_operand);
 258
 259    int add_uniform(const char *name,
 260                    const glsl_type *type,
 261                    ir_constant *constant);
 262    void add_aggregate_uniform(ir_instruction *ir,
 263                               const char *name,
 264                               const struct glsl_type *type,
 265                               ir_constant *constant,
 266                               struct ir_to_mesa_dst_reg temp);
 267
 268    struct hash_table *sampler_map;
 269
 270    void set_sampler_location(ir_variable *sampler, int location);
 271    int get_sampler_location(ir_variable *sampler);
 272
 273    void *mem_ctx;
 274 };
 275
 276 ir_to_mesa_src_reg ir_to_mesa_undef = ir_to_mesa_src_reg(PROGRAM_UNDEFINED, 0, NULL);
 277
 278 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
 279    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
 280 };
 281
 282 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
 283    PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
 284 };
 285
 286 static int swizzle_for_size(int size)
 287 {
 288    int size_swizzles[4] = {
 289       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 290       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 291       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 292       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 293    };
 294
 295    return size_swizzles[size - 1];
 296 }
 297
 298 ir_to_mesa_instruction *
 299 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
 300                                         enum prog_opcode op,
 301                                         ir_to_mesa_dst_reg dst,
 302                                         ir_to_mesa_src_reg src0,
 303                                         ir_to_mesa_src_reg src1,
 304                                         ir_to_mesa_src_reg src2)
 305 {
 306    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 307    int num_reladdr = 0;
 308
 309    /* If we have to do relative addressing, we want to load the ARL
 310     * reg directly for one of the regs, and preload the other reladdr
 311     * sources into temps.
 312     */
 313    num_reladdr += dst.reladdr != NULL;
 314    num_reladdr += src0.reladdr != NULL;
 315    num_reladdr += src1.reladdr != NULL;
 316    num_reladdr += src2.reladdr != NULL;
 317
 318    reladdr_to_temp(ir, &src2, &num_reladdr);
 319    reladdr_to_temp(ir, &src1, &num_reladdr);
 320    reladdr_to_temp(ir, &src0, &num_reladdr);
 321
 322    if (dst.reladdr) {
 323       ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
 324                           *dst.reladdr);
 325
 326       num_reladdr--;
 327    }
 328    assert(num_reladdr == 0);
 329
 330    inst->op = op;
 331    inst->dst_reg = dst;
 332    inst->src_reg[0] = src0;
 333    inst->src_reg[1] = src1;
 334    inst->src_reg[2] = src2;
 335    inst->ir = ir;
 336
 337    inst->function = NULL;
 338
 339    this->instructions.push_tail(inst);
 340
 341    return inst;
 342 }
 343
 344
 345 ir_to_mesa_instruction *
 346 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
 347                                         enum prog_opcode op,
 348                                         ir_to_mesa_dst_reg dst,
 349                                         ir_to_mesa_src_reg src0,
 350                                         ir_to_mesa_src_reg src1)
 351 {
 352    return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
 353 }
 354
 355 ir_to_mesa_instruction *
 356 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
 357                                         enum prog_opcode op,
 358                                         ir_to_mesa_dst_reg dst,
 359                                         ir_to_mesa_src_reg src0)
 360 {
 361    assert(dst.writemask != 0);
 362    return ir_to_mesa_emit_op3(ir, op, dst,
 363                               src0, ir_to_mesa_undef, ir_to_mesa_undef);
 364 }
 365
 366 ir_to_mesa_instruction *
 367 ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
 368                                         enum prog_opcode op)
 369 {
 370    return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
 371                               ir_to_mesa_undef,
 372                               ir_to_mesa_undef,
 373                               ir_to_mesa_undef);
 374 }
 375
 376 void
 377 ir_to_mesa_visitor::set_sampler_location(ir_variable *sampler, int location)
 378 {
 379    if (this->sampler_map == NULL) {
 380       this->sampler_map = hash_table_ctor(0, hash_table_pointer_hash,
 381                                           hash_table_pointer_compare);
 382    }
 383
 384    hash_table_insert(this->sampler_map, (void *)(uintptr_t)location, sampler);
 385 }
 386
 387 int
 388 ir_to_mesa_visitor::get_sampler_location(ir_variable *sampler)
 389 {
 390    void *result = hash_table_find(this->sampler_map, sampler);
 391
 392    return (int)(uintptr_t)result;
 393 }
 394
 395 inline ir_to_mesa_dst_reg
 396 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
 397 {
 398    ir_to_mesa_dst_reg dst_reg;
 399
 400    dst_reg.file = reg.file;
 401    dst_reg.index = reg.index;
 402    dst_reg.writemask = WRITEMASK_XYZW;
 403    dst_reg.cond_mask = COND_TR;
 404    dst_reg.reladdr = reg.reladdr;
 405
 406    return dst_reg;
 407 }
 408
 409 inline ir_to_mesa_src_reg
 410 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
 411 {
 412    return ir_to_mesa_src_reg(reg.file, reg.index, NULL);
 413 }
 414
 415 /**
 416  * Emits Mesa scalar opcodes to produce unique answers across channels.
 417  *
 418  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 419  * channel determines the result across all channels.  So to do a vec4
 420  * of this operation, we want to emit a scalar per source channel used
 421  * to produce dest channels.
 422  */
 423 void
 424 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
 425                                                enum prog_opcode op,
 426                                                ir_to_mesa_dst_reg dst,
 427                                                ir_to_mesa_src_reg orig_src0,
 428                                                ir_to_mesa_src_reg orig_src1)
 429 {
 430    int i, j;
 431    int done_mask = ~dst.writemask;
 432
 433    /* Mesa RCP is a scalar operation splatting results to all channels,
 434     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 435     * dst channels.
 436     */
 437    for (i = 0; i < 4; i++) {
 438       GLuint this_mask = (1 << i);
 439       ir_to_mesa_instruction *inst;
 440       ir_to_mesa_src_reg src0 = orig_src0;
 441       ir_to_mesa_src_reg src1 = orig_src1;
 442
 443       if (done_mask & this_mask)
 444          continue;
 445
 446       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 447       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 448       for (j = i + 1; j < 4; j++) {
 449          if (!(done_mask & (1 << j)) &&
 450              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 451              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 452             this_mask |= (1 << j);
 453          }
 454       }
 455       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 456                                    src0_swiz, src0_swiz);
 457       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 458                                   src1_swiz, src1_swiz);
 459
 460       inst = ir_to_mesa_emit_op2(ir, op,
 461                                  dst,
 462                                  src0,
 463                                  src1);
 464       inst->dst_reg.writemask = this_mask;
 465       done_mask |= this_mask;
 466    }
 467 }
 468
 469 void
 470 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 471                                                enum prog_opcode op,
 472                                                ir_to_mesa_dst_reg dst,
 473                                                ir_to_mesa_src_reg src0)
 474 {
 475    ir_to_mesa_src_reg undef = ir_to_mesa_undef;
 476
 477    undef.swizzle = SWIZZLE_XXXX;
 478
 479    ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
 480 }
 481
 482 struct ir_to_mesa_src_reg
 483 ir_to_mesa_visitor::src_reg_for_float(float val)
 484 {
 485    ir_to_mesa_src_reg src_reg(PROGRAM_CONSTANT, -1, NULL);
 486
 487    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 488                                               &val, 1, &src_reg.swizzle);
 489
 490    return src_reg;
 491 }
 492
 493 static int
 494 type_size(const struct glsl_type *type)
 495 {
 496    unsigned int i;
 497    int size;
 498
 499    switch (type->base_type) {
 500    case GLSL_TYPE_UINT:
 501    case GLSL_TYPE_INT:
 502    case GLSL_TYPE_FLOAT:
 503    case GLSL_TYPE_BOOL:
 504       if (type->is_matrix()) {
 505          return type->matrix_columns;
 506       } else {
 507          /* Regardless of size of vector, it gets a vec4. This is bad
 508           * packing for things like floats, but otherwise arrays become a
 509           * mess.  Hopefully a later pass over the code can pack scalars
 510           * down if appropriate.
 511           */
 512          return 1;
 513       }
 514    case GLSL_TYPE_ARRAY:
 515       return type_size(type->fields.array) * type->length;
 516    case GLSL_TYPE_STRUCT:
 517       size = 0;
 518       for (i = 0; i < type->length; i++) {
 519          size += type_size(type->fields.structure[i].type);
 520       }
 521       return size;
 522    case GLSL_TYPE_SAMPLER:
 523       /* Samplers take up no register space, since they're baked in at
 524        * link time.
 525        */
 526       return 0;
 527    default:
 528       assert(0);
 529    }
 530 }
 531
 532 /**
 533  * In the initial pass of codegen, we assign temporary numbers to
 534  * intermediate results.  (not SSA -- variable assignments will reuse
 535  * storage).  Actual register allocation for the Mesa VM occurs in a
 536  * pass over the Mesa IR later.
 537  */
 538 ir_to_mesa_src_reg
 539 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 540 {
 541    ir_to_mesa_src_reg src_reg;
 542    int swizzle[4];
 543    int i;
 544
 545    src_reg.file = PROGRAM_TEMPORARY;
 546    src_reg.index = next_temp;
 547    src_reg.reladdr = NULL;
 548    next_temp += type_size(type);
 549
 550    if (type->is_array() || type->is_record()) {
 551       src_reg.swizzle = SWIZZLE_NOOP;
 552    } else {
 553       for (i = 0; i < type->vector_elements; i++)
 554          swizzle[i] = i;
 555       for (; i < 4; i++)
 556          swizzle[i] = type->vector_elements - 1;
 557       src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
 558                                       swizzle[2], swizzle[3]);
 559    }
 560    src_reg.negate = 0;
 561
 562    return src_reg;
 563 }
 564
 565 variable_storage *
 566 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
 567 {
 568
 569    variable_storage *entry;
 570
 571    foreach_iter(exec_list_iterator, iter, this->variables) {
 572       entry = (variable_storage *)iter.get();
 573
 574       if (entry->var == var)
 575          return entry;
 576    }
 577
 578    return NULL;
 579 }
 580
 581 void
 582 ir_to_mesa_visitor::visit(ir_variable *ir)
 583 {
 584    if (strcmp(ir->name, "gl_FragCoord") == 0) {
 585       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 586
 587       fp->OriginUpperLeft = ir->origin_upper_left;
 588       fp->PixelCenterInteger = ir->pixel_center_integer;
 589    }
 590 }
 591
 592 void
 593 ir_to_mesa_visitor::visit(ir_loop *ir)
 594 {
 595    assert(!ir->from);
 596    assert(!ir->to);
 597    assert(!ir->increment);
 598    assert(!ir->counter);
 599
 600    ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
 601    visit_exec_list(&ir->body_instructions, this);
 602    ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
 603 }
 604
 605 void
 606 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 607 {
 608    switch (ir->mode) {
 609    case ir_loop_jump::jump_break:
 610       ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
 611       break;
 612    case ir_loop_jump::jump_continue:
 613       ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
 614       break;
 615    }
 616 }
 617
 618
 619 void
 620 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 621 {
 622    assert(0);
 623    (void)ir;
 624 }
 625
 626 void
 627 ir_to_mesa_visitor::visit(ir_function *ir)
 628 {
 629    /* Ignore function bodies other than main() -- we shouldn't see calls to
 630     * them since they should all be inlined before we get to ir_to_mesa.
 631     */
 632    if (strcmp(ir->name, "main") == 0) {
 633       const ir_function_signature *sig;
 634       exec_list empty;
 635
 636       sig = ir->matching_signature(&empty);
 637
 638       assert(sig);
 639
 640       foreach_iter(exec_list_iterator, iter, sig->body) {
 641          ir_instruction *ir = (ir_instruction *)iter.get();
 642
 643          ir->accept(this);
 644       }
 645    }
 646 }
 647
 648 GLboolean
 649 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 650 {
 651    int nonmul_operand = 1 - mul_operand;
 652    ir_to_mesa_src_reg a, b, c;
 653
 654    ir_expression *expr = ir->operands[mul_operand]->as_expression();
 655    if (!expr || expr->operation != ir_binop_mul)
 656       return false;
 657
 658    expr->operands[0]->accept(this);
 659    a = this->result;
 660    expr->operands[1]->accept(this);
 661    b = this->result;
 662    ir->operands[nonmul_operand]->accept(this);
 663    c = this->result;
 664
 665    this->result = get_temp(ir->type);
 666    ir_to_mesa_emit_op3(ir, OPCODE_MAD,
 667                        ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
 668
 669    return true;
 670 }
 671
 672 void
 673 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 674                                     ir_to_mesa_src_reg *reg, int *num_reladdr)
 675 {
 676    if (!reg->reladdr)
 677       return;
 678
 679    ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
 680
 681    if (*num_reladdr != 1) {
 682       ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
 683
 684       ir_to_mesa_emit_op1(ir, OPCODE_MOV,
 685                           ir_to_mesa_dst_reg_from_src(temp), *reg);
 686       *reg = temp;
 687    }
 688
 689    (*num_reladdr)--;
 690 }
 691
 692 void
 693 ir_to_mesa_visitor::visit(ir_expression *ir)
 694 {
 695    unsigned int operand;
 696    struct ir_to_mesa_src_reg op[2];
 697    struct ir_to_mesa_src_reg result_src;
 698    struct ir_to_mesa_dst_reg result_dst;
 699    const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
 700    const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
 701    const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
 702
 703    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
 704     */
 705    if (ir->operation == ir_binop_add) {
 706       if (try_emit_mad(ir, 1))
 707          return;
 708       if (try_emit_mad(ir, 0))
 709          return;
 710    }
 711
 712    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 713       this->result.file = PROGRAM_UNDEFINED;
 714       ir->operands[operand]->accept(this);
 715       if (this->result.file == PROGRAM_UNDEFINED) {
 716          ir_print_visitor v;
 717          printf("Failed to get tree for expression operand:\n");
 718          ir->operands[operand]->accept(&v);
 719          exit(1);
 720       }
 721       op[operand] = this->result;
 722
 723       /* Matrix expression operands should have been broken down to vector
 724        * operations already.
 725        */
 726       assert(!ir->operands[operand]->type->is_matrix());
 727    }
 728
 729    this->result.file = PROGRAM_UNDEFINED;
 730
 731    /* Storage for our result.  Ideally for an assignment we'd be using
 732     * the actual storage for the result here, instead.
 733     */
 734    result_src = get_temp(ir->type);
 735    /* convenience for the emit functions below. */
 736    result_dst = ir_to_mesa_dst_reg_from_src(result_src);
 737    /* Limit writes to the channels that will be used by result_src later.
 738     * This does limit this temp's use as a temporary for multi-instruction
 739     * sequences.
 740     */
 741    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 742
 743    switch (ir->operation) {
 744    case ir_unop_logic_not:
 745       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
 746                           op[0], src_reg_for_float(0.0));
 747       break;
 748    case ir_unop_neg:
 749       op[0].negate = ~op[0].negate;
 750       result_src = op[0];
 751       break;
 752    case ir_unop_abs:
 753       ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
 754       break;
 755    case ir_unop_sign:
 756       ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
 757       break;
 758    case ir_unop_rcp:
 759       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
 760       break;
 761
 762    case ir_unop_exp2:
 763       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
 764       break;
 765    case ir_unop_exp:
 766    case ir_unop_log:
 767       assert(!"not reached: should be handled by ir_explog_to_explog2");
 768       break;
 769    case ir_unop_log2:
 770       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
 771       break;
 772    case ir_unop_sin:
 773       ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
 774       break;
 775    case ir_unop_cos:
 776       ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
 777       break;
 778
 779    case ir_unop_dFdx:
 780       ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
 781       break;
 782    case ir_unop_dFdy:
 783       ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
 784       break;
 785
 786    case ir_binop_add:
 787       ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
 788       break;
 789    case ir_binop_sub:
 790       ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
 791       break;
 792
 793    case ir_binop_mul:
 794       ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
 795       break;
 796    case ir_binop_div:
 797       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 798    case ir_binop_mod:
 799       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 800       break;
 801
 802    case ir_binop_less:
 803       ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
 804       break;
 805    case ir_binop_greater:
 806       ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
 807       break;
 808    case ir_binop_lequal:
 809       ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
 810       break;
 811    case ir_binop_gequal:
 812       ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
 813       break;
 814    case ir_binop_equal:
 815       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
 816       break;
 817    case ir_binop_logic_xor:
 818    case ir_binop_nequal:
 819       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
 820       break;
 821
 822    case ir_binop_logic_or:
 823       /* This could be a saturated add and skip the SNE. */
 824       ir_to_mesa_emit_op2(ir, OPCODE_ADD,
 825                           result_dst,
 826                           op[0], op[1]);
 827
 828       ir_to_mesa_emit_op2(ir, OPCODE_SNE,
 829                           result_dst,
 830                           result_src, src_reg_for_float(0.0));
 831       break;
 832
 833    case ir_binop_logic_and:
 834       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
 835       ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 836                           result_dst,
 837                           op[0], op[1]);
 838       break;
 839
 840    case ir_binop_dot:
 841       if (ir->operands[0]->type == vec4_type) {
 842          assert(ir->operands[1]->type == vec4_type);
 843          ir_to_mesa_emit_op2(ir, OPCODE_DP4,
 844                              result_dst,
 845                              op[0], op[1]);
 846       } else if (ir->operands[0]->type == vec3_type) {
 847          assert(ir->operands[1]->type == vec3_type);
 848          ir_to_mesa_emit_op2(ir, OPCODE_DP3,
 849                              result_dst,
 850                              op[0], op[1]);
 851       } else if (ir->operands[0]->type == vec2_type) {
 852          assert(ir->operands[1]->type == vec2_type);
 853          ir_to_mesa_emit_op2(ir, OPCODE_DP2,
 854                              result_dst,
 855                              op[0], op[1]);
 856       }
 857       break;
 858
 859    case ir_binop_cross:
 860       ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
 861       break;
 862
 863    case ir_unop_sqrt:
 864       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 865       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
 866       /* For incoming channels < 0, set the result to 0. */
 867       ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
 868                           op[0], src_reg_for_float(0.0), result_src);
 869       break;
 870    case ir_unop_rsq:
 871       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 872       break;
 873    case ir_unop_i2f:
 874    case ir_unop_b2f:
 875    case ir_unop_b2i:
 876       /* Mesa IR lacks types, ints are stored as truncated floats. */
 877       result_src = op[0];
 878       break;
 879    case ir_unop_f2i:
 880       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 881       break;
 882    case ir_unop_f2b:
 883    case ir_unop_i2b:
 884       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
 885                           result_src, src_reg_for_float(0.0));
 886       break;
 887    case ir_unop_trunc:
 888       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 889       break;
 890    case ir_unop_ceil:
 891       op[0].negate = ~op[0].negate;
 892       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 893       result_src.negate = ~result_src.negate;
 894       break;
 895    case ir_unop_floor:
 896       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 897       break;
 898    case ir_unop_fract:
 899       ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
 900       break;
 901
 902    case ir_binop_min:
 903       ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
 904       break;
 905    case ir_binop_max:
 906       ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
 907       break;
 908    case ir_binop_pow:
 909       ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
 910       break;
 911
 912    case ir_unop_bit_not:
 913    case ir_unop_u2f:
 914    case ir_binop_lshift:
 915    case ir_binop_rshift:
 916    case ir_binop_bit_and:
 917    case ir_binop_bit_xor:
 918    case ir_binop_bit_or:
 919       assert(!"GLSL 1.30 features unsupported");
 920       break;
 921    }
 922
 923    this->result = result_src;
 924 }
 925
 926
 927 void
 928 ir_to_mesa_visitor::visit(ir_swizzle *ir)
 929 {
 930    ir_to_mesa_src_reg src_reg;
 931    int i;
 932    int swizzle[4];
 933
 934    /* Note that this is only swizzles in expressions, not those on the left
 935     * hand side of an assignment, which do write masking.  See ir_assignment
 936     * for that.
 937     */
 938
 939    ir->val->accept(this);
 940    src_reg = this->result;
 941    assert(src_reg.file != PROGRAM_UNDEFINED);
 942
 943    for (i = 0; i < 4; i++) {
 944       if (i < ir->type->vector_elements) {
 945          switch (i) {
 946          case 0:
 947             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
 948             break;
 949          case 1:
 950             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
 951             break;
 952          case 2:
 953             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
 954             break;
 955          case 3:
 956             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
 957             break;
 958          }
 959       } else {
 960          /* If the type is smaller than a vec4, replicate the last
 961           * channel out.
 962           */
 963          swizzle[i] = swizzle[ir->type->vector_elements - 1];
 964       }
 965    }
 966
 967    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
 968                                    swizzle[1],
 969                                    swizzle[2],
 970                                    swizzle[3]);
 971
 972    this->result = src_reg;
 973 }
 974
 975 static const struct {
 976    const char *name;
 977    const char *field;
 978    int tokens[STATE_LENGTH];
 979    int swizzle;
 980    bool array_indexed;
 981 } statevars[] = {
 982    {"gl_DepthRange", "near",
 983     {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX},
 984    {"gl_DepthRange", "far",
 985     {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY},
 986    {"gl_DepthRange", "diff",
 987     {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ},
 988
 989    {"gl_ClipPlane", NULL,
 990     {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW, true}
 991 ,
 992    {"gl_Point", "size",
 993     {STATE_POINT_SIZE}, SWIZZLE_XXXX},
 994    {"gl_Point", "sizeMin",
 995     {STATE_POINT_SIZE}, SWIZZLE_YYYY},
 996    {"gl_Point", "sizeMax",
 997     {STATE_POINT_SIZE}, SWIZZLE_ZZZZ},
 998    {"gl_Point", "fadeThresholdSize",
 999     {STATE_POINT_SIZE}, SWIZZLE_WWWW},
1000    {"gl_Point", "distanceConstantAttenuation",
1001     {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX},
1002    {"gl_Point", "distanceLinearAttenuation",
1003     {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY},
1004    {"gl_Point", "distanceQuadraticAttenuation",
1005     {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ},
1006
1007    {"gl_FrontMaterial", "emission",
1008     {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW},
1009    {"gl_FrontMaterial", "ambient",
1010     {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW},
1011    {"gl_FrontMaterial", "diffuse",
1012     {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW},
1013    {"gl_FrontMaterial", "specular",
1014     {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW},
1015    {"gl_FrontMaterial", "shininess",
1016     {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX},
1017
1018    {"gl_BackMaterial", "emission",
1019     {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW},
1020    {"gl_BackMaterial", "ambient",
1021     {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW},
1022    {"gl_BackMaterial", "diffuse",
1023     {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW},
1024    {"gl_BackMaterial", "specular",
1025     {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW},
1026    {"gl_BackMaterial", "shininess",
1027     {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX},
1028
1029    {"gl_LightSource", "ambient",
1030     {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1031    {"gl_LightSource", "diffuse",
1032     {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1033    {"gl_LightSource", "specular",
1034     {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1035    {"gl_LightSource", "position",
1036     {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW, true},
1037    {"gl_LightSource", "halfVector",
1038     {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW, true},
1039    {"gl_LightSource", "spotDirection",
1040     {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_XYZW, true},
1041    {"gl_LightSource", "spotCosCutoff",
1042     {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW, true},
1043    {"gl_LightSource", "spotCutoff",
1044     {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX, true},
1045    {"gl_LightSource", "spotExponent",
1046     {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW, true},
1047    {"gl_LightSource", "constantAttenuation",
1048     {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX, true},
1049    {"gl_LightSource", "linearAttenuation",
1050     {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY, true},
1051    {"gl_LightSource", "quadraticAttenuation",
1052     {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ, true},
1053
1054    {"gl_LightModel", NULL,
1055     {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW},
1056
1057    {"gl_FrontLightModelProduct", NULL,
1058     {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW},
1059    {"gl_BackLightModelProduct", NULL,
1060     {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW},
1061
1062    {"gl_FrontLightProduct", "ambient",
1063     {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1064    {"gl_FrontLightProduct", "diffuse",
1065     {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1066    {"gl_FrontLightProduct", "specular",
1067     {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1068
1069    {"gl_BackLightProduct", "ambient",
1070     {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1071    {"gl_BackLightProduct", "diffuse",
1072     {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1073    {"gl_BackLightProduct", "specular",
1074     {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1075
1076    {"gl_TextureEnvColor", "ambient",
1077     {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW, true},
1078
1079    {"gl_EyePlaneS", NULL,
1080     {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW, true},
1081    {"gl_EyePlaneT", NULL,
1082     {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW, true},
1083    {"gl_EyePlaneR", NULL,
1084     {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW, true},
1085    {"gl_EyePlaneQ", NULL,
1086     {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW, true},
1087
1088    {"gl_ObjectPlaneS", NULL,
1089     {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW, true},
1090    {"gl_ObjectPlaneT", NULL,
1091     {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW, true},
1092    {"gl_ObjectPlaneR", NULL,
1093     {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW, true},
1094    {"gl_ObjectPlaneQ", NULL,
1095     {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW, true},
1096
1097    {"gl_Fog", "color",
1098     {STATE_FOG_COLOR}, SWIZZLE_XYZW},
1099    {"gl_Fog", "density",
1100     {STATE_FOG_PARAMS}, SWIZZLE_XXXX},
1101    {"gl_Fog", "start",
1102     {STATE_FOG_PARAMS}, SWIZZLE_YYYY},
1103    {"gl_Fog", "end",
1104     {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ},
1105    {"gl_Fog", "scale",
1106     {STATE_FOG_PARAMS}, SWIZZLE_WWWW},
1107 };
1108
1109 static ir_to_mesa_src_reg
1110 get_builtin_uniform_reg(struct gl_program *prog,
1111                         const char *name, int array_index, const char *field)
1112 {
1113    unsigned int i;
1114    ir_to_mesa_src_reg src_reg;
1115    int tokens[STATE_LENGTH];
1116
1117    for (i = 0; i < Elements(statevars); i++) {
1118       if (strcmp(statevars[i].name, name) != 0)
1119          continue;
1120       if (!field && statevars[i].field) {
1121          assert(!"FINISHME: whole-structure state var dereference");
1122       }
1123       if (field && strcmp(statevars[i].field, field) != 0)
1124          continue;
1125       break;
1126    }
1127
1128    if (i ==  Elements(statevars)) {
1129       printf("builtin uniform %s%s%s not found\n",
1130              name,
1131              field ? "." : "",
1132              field ? field : "");
1133       abort();
1134    }
1135
1136    memcpy(&tokens, statevars[i].tokens, sizeof(tokens));
1137    if (statevars[i].array_indexed)
1138       tokens[1] = array_index;
1139
1140    src_reg.file = PROGRAM_STATE_VAR;
1141    src_reg.index = _mesa_add_state_reference(prog->Parameters,
1142                                              (gl_state_index *)tokens);
1143    src_reg.swizzle = statevars[i].swizzle;
1144    src_reg.negate = 0;
1145    src_reg.reladdr = false;
1146
1147    return src_reg;
1148 }
1149
1150 static int
1151 add_matrix_ref(struct gl_program *prog, int *tokens)
1152 {
1153    int base_pos = -1;
1154    int i;
1155
1156    /* Add a ref for each column.  It looks like the reason we do
1157     * it this way is that _mesa_add_state_reference doesn't work
1158     * for things that aren't vec4s, so the tokens[2]/tokens[3]
1159     * range has to be equal.
1160     */
1161    for (i = 0; i < 4; i++) {
1162       tokens[2] = i;
1163       tokens[3] = i;
1164       int pos = _mesa_add_state_reference(prog->Parameters,
1165                                           (gl_state_index *)tokens);
1166       if (base_pos == -1)
1167          base_pos = pos;
1168       else
1169          assert(base_pos + i == pos);
1170    }
1171
1172    return base_pos;
1173 }
1174
1175 static variable_storage *
1176 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
1177                        ir_rvalue *array_index)
1178 {
1179    /*
1180     * NOTE: The ARB_vertex_program extension specified that matrices get
1181     * loaded in registers in row-major order.  With GLSL, we want column-
1182     * major order.  So, we need to transpose all matrices here...
1183     */
1184    static const struct {
1185       const char *name;
1186       int matrix;
1187       int modifier;
1188    } matrices[] = {
1189       { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
1190       { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
1191       { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
1192       { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1193
1194       { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
1195       { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
1196       { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
1197       { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
1198
1199       { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
1200       { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
1201       { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
1202       { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
1203
1204       { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
1205       { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
1206       { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
1207       { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
1208
1209       { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1210
1211    };
1212    unsigned int i;
1213    variable_storage *entry;
1214
1215    /* C++ gets angry when we try to use an int as a gl_state_index, so we use
1216     * ints for gl_state_index.  Make sure they're compatible.
1217     */
1218    assert(sizeof(gl_state_index) == sizeof(int));
1219
1220    for (i = 0; i < Elements(matrices); i++) {
1221       if (strcmp(var->name, matrices[i].name) == 0) {
1222          int tokens[STATE_LENGTH];
1223          int base_pos = -1;
1224
1225          tokens[0] = matrices[i].matrix;
1226          tokens[4] = matrices[i].modifier;
1227          if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
1228             ir_constant *index = array_index->constant_expression_value();
1229             if (index) {
1230                tokens[1] = index->value.i[0];
1231                base_pos = add_matrix_ref(prog, tokens);
1232             } else {
1233                for (i = 0; i < var->type->length; i++) {
1234                   tokens[1] = i;
1235                   int pos = add_matrix_ref(prog, tokens);
1236                   if (base_pos == -1)
1237                      base_pos = pos;
1238                   else
1239                      assert(base_pos + (int)i * 4 == pos);
1240                }
1241             }
1242          } else {
1243             tokens[1] = 0; /* unused array index */
1244             base_pos = add_matrix_ref(prog, tokens);
1245          }
1246          tokens[4] = matrices[i].modifier;
1247
1248          entry = new(mem_ctx) variable_storage(var,
1249                                                PROGRAM_STATE_VAR,
1250                                                base_pos);
1251
1252          return entry;
1253       }
1254    }
1255
1256    return NULL;
1257 }
1258
1259 int
1260 ir_to_mesa_visitor::add_uniform(const char *name,
1261                                 const glsl_type *type,
1262                                 ir_constant *constant)
1263 {
1264    int len;
1265
1266    if (type->is_vector() ||
1267        type->is_scalar()) {
1268       len = type->vector_elements;
1269    } else {
1270       len = type_size(type) * 4;
1271    }
1272
1273    float *values = NULL;
1274    if (constant && type->is_array()) {
1275       values = (float *)malloc(type->length * 4 * sizeof(float));
1276
1277       assert(type->fields.array->is_scalar() ||
1278              type->fields.array->is_vector() ||
1279              !"FINISHME: uniform array initializers for non-vector");
1280
1281       for (unsigned int i = 0; i < type->length; i++) {
1282          ir_constant *element = constant->array_elements[i];
1283          unsigned int c;
1284
1285          for (c = 0; c < type->fields.array->vector_elements; c++) {
1286             switch (type->fields.array->base_type) {
1287             case GLSL_TYPE_FLOAT:
1288                values[4 * i + c] = element->value.f[c];
1289                break;
1290             case GLSL_TYPE_INT:
1291                values[4 * i + c] = element->value.i[c];
1292                break;
1293             case GLSL_TYPE_UINT:
1294                values[4 * i + c] = element->value.u[c];
1295                break;
1296             case GLSL_TYPE_BOOL:
1297                values[4 * i + c] = element->value.b[c];
1298                break;
1299             default:
1300                assert(!"not reached");
1301             }
1302          }
1303       }
1304    } else if (constant) {
1305       values = (float *)malloc(16 * sizeof(float));
1306       for (unsigned int i = 0; i < type->components(); i++) {
1307          switch (type->base_type) {
1308          case GLSL_TYPE_FLOAT:
1309             values[i] = constant->value.f[i];
1310             break;
1311          case GLSL_TYPE_INT:
1312             values[i] = constant->value.i[i];
1313             break;
1314          case GLSL_TYPE_UINT:
1315             values[i] = constant->value.u[i];
1316             break;
1317          case GLSL_TYPE_BOOL:
1318             values[i] = constant->value.b[i];
1319             break;
1320          default:
1321             assert(!"not reached");
1322          }
1323       }
1324    }
1325
1326    int loc = _mesa_add_uniform(this->prog->Parameters,
1327                                name,
1328                                len,
1329                                type->gl_type,
1330                                values);
1331    free(values);
1332
1333    return loc;
1334 }
1335
1336 /* Recursively add all the members of the aggregate uniform as uniform names
1337  * to Mesa, moving those uniforms to our structured temporary.
1338  */
1339 void
1340 ir_to_mesa_visitor::add_aggregate_uniform(ir_instruction *ir,
1341                                           const char *name,
1342                                           const struct glsl_type *type,
1343                                           ir_constant *constant,
1344                                           struct ir_to_mesa_dst_reg temp)
1345 {
1346    int loc;
1347
1348    if (type->is_record()) {
1349       void *mem_ctx = talloc_new(NULL);
1350       ir_constant *field_constant = NULL;
1351
1352       if (constant)
1353          field_constant = (ir_constant *)constant->components.get_head();
1354
1355       for (unsigned int i = 0; i < type->length; i++) {
1356          const glsl_type *field_type = type->fields.structure[i].type;
1357
1358          add_aggregate_uniform(ir,
1359                                talloc_asprintf(mem_ctx, "%s.%s", name,
1360                                                type->fields.structure[i].name),
1361                                field_type, field_constant, temp);
1362          temp.index += type_size(field_type);
1363
1364          if (constant)
1365             field_constant = (ir_constant *)field_constant->next;
1366       }
1367
1368       talloc_free(mem_ctx);
1369
1370       return;
1371    }
1372
1373    assert(type->is_vector() || type->is_scalar() || !"FINISHME: other types");
1374
1375    loc = add_uniform(name, type, constant);
1376
1377    ir_to_mesa_src_reg uniform(PROGRAM_UNIFORM, loc, type);
1378
1379    for (int i = 0; i < type_size(type); i++) {
1380       ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, uniform);
1381       temp.index++;
1382       uniform.index++;
1383    }
1384 }
1385
1386
1387 void
1388 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1389 {
1390    variable_storage *entry = find_variable_storage(ir->var);
1391    unsigned int loc;
1392
1393    if (!entry) {
1394       switch (ir->var->mode) {
1395       case ir_var_uniform:
1396          entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1397                                         NULL);
1398          if (entry)
1399             break;
1400
1401          /* FINISHME: Fix up uniform name for arrays and things */
1402          if (ir->var->type->base_type == GLSL_TYPE_SAMPLER ||
1403              (ir->var->type->base_type == GLSL_TYPE_ARRAY &&
1404               ir->var->type->fields.array->base_type == GLSL_TYPE_SAMPLER)) {
1405             int array_length;
1406
1407             if (ir->var->type->base_type == GLSL_TYPE_ARRAY)
1408                array_length = ir->var->type->length;
1409             else
1410                array_length = 1;
1411             int sampler = _mesa_add_sampler(this->prog->Parameters,
1412                                             ir->var->name,
1413                                             ir->var->type->gl_type,
1414                                             array_length);
1415             set_sampler_location(ir->var, sampler);
1416
1417             entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
1418                                                   sampler);
1419             this->variables.push_tail(entry);
1420             break;
1421          }
1422
1423          assert(ir->var->type->gl_type != 0 &&
1424                 ir->var->type->gl_type != GL_INVALID_ENUM);
1425
1426          /* Oh, the joy of aggregate types in Mesa.  Like constants,
1427           * we can only really do vec4s.  So, make a temp, chop the
1428           * aggregate up into vec4s, and move those vec4s to the temp.
1429           */
1430          if (ir->var->type->is_record()) {
1431             ir_to_mesa_src_reg temp = get_temp(ir->var->type);
1432
1433             entry = new(mem_ctx) variable_storage(ir->var,
1434                                                   temp.file,
1435                                                   temp.index);
1436             this->variables.push_tail(entry);
1437
1438             add_aggregate_uniform(ir->var, ir->var->name, ir->var->type,
1439                                   ir->var->constant_value,
1440                                   ir_to_mesa_dst_reg_from_src(temp));
1441             break;
1442          }
1443
1444          loc = add_uniform(ir->var->name,
1445                            ir->var->type,
1446                            ir->var->constant_value);
1447
1448          /* Always mark the uniform used at this point.  If it isn't
1449           * used, dead code elimination should have nuked the decl already.
1450           */
1451          this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1452
1453          entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
1454          this->variables.push_tail(entry);
1455          break;
1456       case ir_var_in:
1457       case ir_var_out:
1458       case ir_var_inout:
1459          /* The linker assigns locations for varyings and attributes,
1460           * including deprecated builtins (like gl_Color), user-assign
1461           * generic attributes (glBindVertexLocation), and
1462           * user-defined varyings.
1463           *
1464           * FINISHME: We would hit this path for function arguments.  Fix!
1465           */
1466          assert(ir->var->location != -1);
1467          if (ir->var->mode == ir_var_in ||
1468              ir->var->mode == ir_var_inout) {
1469             entry = new(mem_ctx) variable_storage(ir->var,
1470                                                   PROGRAM_INPUT,
1471                                                   ir->var->location);
1472
1473             if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1474                 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1475                _mesa_add_attribute(prog->Attributes,
1476                                    ir->var->name,
1477                                    _mesa_sizeof_glsl_type(ir->var->type->gl_type),
1478                                    ir->var->type->gl_type,
1479                                    ir->var->location - VERT_ATTRIB_GENERIC0);
1480             }
1481          } else {
1482             entry = new(mem_ctx) variable_storage(ir->var,
1483                                                   PROGRAM_OUTPUT,
1484                                                   ir->var->location);
1485          }
1486
1487          break;
1488       case ir_var_auto:
1489       case ir_var_temporary:
1490          entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
1491                                                this->next_temp);
1492          this->variables.push_tail(entry);
1493
1494          next_temp += type_size(ir->var->type);
1495          break;
1496       }
1497
1498       if (!entry) {
1499          printf("Failed to make storage for %s\n", ir->var->name);
1500          exit(1);
1501       }
1502    }
1503
1504    this->result = ir_to_mesa_src_reg(entry->file, entry->index, ir->var->type);
1505 }
1506
1507 void
1508 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1509 {
1510    ir_variable *var = ir->variable_referenced();
1511    ir_constant *index;
1512    ir_to_mesa_src_reg src_reg;
1513    ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1514    int element_size = type_size(ir->type);
1515
1516    index = ir->array_index->constant_expression_value();
1517
1518    if (deref_var && strncmp(deref_var->var->name,
1519                             "gl_TextureMatrix",
1520                             strlen("gl_TextureMatrix")) == 0) {
1521       struct variable_storage *entry;
1522
1523       entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1524                                      ir->array_index);
1525       assert(entry);
1526
1527       ir_to_mesa_src_reg src_reg(entry->file, entry->index, ir->type);
1528
1529       if (index) {
1530          src_reg.reladdr = NULL;
1531       } else {
1532          ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1533
1534          ir->array_index->accept(this);
1535          ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1536                              ir_to_mesa_dst_reg_from_src(index_reg),
1537                              this->result, src_reg_for_float(element_size));
1538
1539          src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1540          memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1541       }
1542
1543       this->result = src_reg;
1544       return;
1545    }
1546
1547    if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform &&
1548        !var->type->is_matrix()) {
1549       ir_dereference_record *record = NULL;
1550       if (ir->array->ir_type == ir_type_dereference_record)
1551          record = (ir_dereference_record *)ir->array;
1552
1553       assert(index || !"FINISHME: variable-indexed builtin uniform access");
1554
1555       this->result = get_builtin_uniform_reg(prog,
1556                                              var->name,
1557                                              index->value.i[0],
1558                                              record ? record->field : NULL);
1559    }
1560
1561    ir->array->accept(this);
1562    src_reg = this->result;
1563
1564    if (index) {
1565       src_reg.index += index->value.i[0] * element_size;
1566    } else {
1567       ir_to_mesa_src_reg array_base = this->result;
1568       /* Variable index array dereference.  It eats the "vec4" of the
1569        * base of the array and an index that offsets the Mesa register
1570        * index.
1571        */
1572       ir->array_index->accept(this);
1573
1574       ir_to_mesa_src_reg index_reg;
1575
1576       if (element_size == 1) {
1577          index_reg = this->result;
1578       } else {
1579          index_reg = get_temp(glsl_type::float_type);
1580
1581          ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1582                              ir_to_mesa_dst_reg_from_src(index_reg),
1583                              this->result, src_reg_for_float(element_size));
1584       }
1585
1586       src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1587       memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1588    }
1589
1590    /* If the type is smaller than a vec4, replicate the last channel out. */
1591    if (ir->type->is_scalar() || ir->type->is_vector())
1592       src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1593    else
1594       src_reg.swizzle = SWIZZLE_NOOP;
1595
1596    this->result = src_reg;
1597 }
1598
1599 void
1600 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1601 {
1602    unsigned int i;
1603    const glsl_type *struct_type = ir->record->type;
1604    int offset = 0;
1605    ir_variable *var = ir->record->variable_referenced();
1606
1607    if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform) {
1608       assert(var);
1609
1610       this->result = get_builtin_uniform_reg(prog,
1611                                              var->name,
1612                                              0,
1613                                              ir->field);
1614       return;
1615    }
1616
1617    ir->record->accept(this);
1618
1619    for (i = 0; i < struct_type->length; i++) {
1620       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1621          break;
1622       offset += type_size(struct_type->fields.structure[i].type);
1623    }
1624    this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1625    this->result.index += offset;
1626 }
1627
1628 /**
1629  * We want to be careful in assignment setup to hit the actual storage
1630  * instead of potentially using a temporary like we might with the
1631  * ir_dereference handler.
1632  */
1633 static struct ir_to_mesa_dst_reg
1634 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v,
1635                    ir_to_mesa_src_reg *r)
1636 {
1637    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1638     * access of a vector, it must be separated into a series conditional moves
1639     * before reaching this point (see ir_vec_index_to_cond_assign).
1640     */
1641    assert(ir->as_dereference());
1642    ir_dereference_array *deref_array = ir->as_dereference_array();
1643    if (deref_array) {
1644       assert(!deref_array->array->type->is_vector());
1645    }
1646
1647    /* Use the rvalue deref handler for the most part.  We'll ignore
1648     * swizzles in it and write swizzles using writemask, though.
1649     */
1650    ir->accept(v);
1651    return ir_to_mesa_dst_reg_from_src(v->result);
1652 }
1653
1654 void
1655 ir_to_mesa_visitor::visit(ir_assignment *ir)
1656 {
1657    struct ir_to_mesa_dst_reg l;
1658    struct ir_to_mesa_src_reg r;
1659    int i;
1660
1661    ir->rhs->accept(this);
1662    r = this->result;
1663
1664    l = get_assignment_lhs(ir->lhs, this, &r);
1665
1666    /* FINISHME: This should really set to the correct maximal writemask for each
1667     * FINISHME: component written (in the loops below).  This case can only
1668     * FINISHME: occur for matrices, arrays, and structures.
1669     */
1670    if (ir->write_mask == 0) {
1671       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1672       l.writemask = WRITEMASK_XYZW;
1673    } else if (ir->lhs->type->is_scalar()) {
1674       /* FINISHME: This hack makes writing to gl_FragData, which lives in the
1675        * FINISHME: W component of fragment shader output zero, work correctly.
1676        */
1677       l.writemask = WRITEMASK_XYZW;
1678    } else {
1679       assert(ir->lhs->type->is_vector());
1680       l.writemask = ir->write_mask;
1681    }
1682
1683    assert(l.file != PROGRAM_UNDEFINED);
1684    assert(r.file != PROGRAM_UNDEFINED);
1685
1686    if (ir->condition) {
1687       ir_to_mesa_src_reg condition;
1688
1689       ir->condition->accept(this);
1690       condition = this->result;
1691
1692       /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1693        * and the condition we produced is 0.0 or 1.0.  By flipping the
1694        * sign, we can choose which value OPCODE_CMP produces without
1695        * an extra computing the condition.
1696        */
1697       condition.negate = ~condition.negate;
1698       for (i = 0; i < type_size(ir->lhs->type); i++) {
1699          ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1700                              condition, r, ir_to_mesa_src_reg_from_dst(l));
1701          l.index++;
1702          r.index++;
1703       }
1704    } else {
1705       for (i = 0; i < type_size(ir->lhs->type); i++) {
1706          ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1707          l.index++;
1708          r.index++;
1709       }
1710    }
1711 }
1712
1713
1714 void
1715 ir_to_mesa_visitor::visit(ir_constant *ir)
1716 {
1717    ir_to_mesa_src_reg src_reg;
1718    GLfloat stack_vals[4];
1719    GLfloat *values = stack_vals;
1720    unsigned int i;
1721
1722    /* Unfortunately, 4 floats is all we can get into
1723     * _mesa_add_unnamed_constant.  So, make a temp to store an
1724     * aggregate constant and move each constant value into it.  If we
1725     * get lucky, copy propagation will eliminate the extra moves.
1726     */
1727
1728    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1729       ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1730       ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1731
1732       foreach_iter(exec_list_iterator, iter, ir->components) {
1733          ir_constant *field_value = (ir_constant *)iter.get();
1734          int size = type_size(field_value->type);
1735
1736          assert(size > 0);
1737
1738          field_value->accept(this);
1739          src_reg = this->result;
1740
1741          for (i = 0; i < (unsigned int)size; i++) {
1742             ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1743
1744             src_reg.index++;
1745             temp.index++;
1746          }
1747       }
1748       this->result = temp_base;
1749       return;
1750    }
1751
1752    if (ir->type->is_array()) {
1753       ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1754       ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1755       int size = type_size(ir->type->fields.array);
1756
1757       assert(size > 0);
1758
1759       for (i = 0; i < ir->type->length; i++) {
1760          ir->array_elements[i]->accept(this);
1761          src_reg = this->result;
1762          for (int j = 0; j < size; j++) {
1763             ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1764
1765             src_reg.index++;
1766             temp.index++;
1767          }
1768       }
1769       this->result = temp_base;
1770       return;
1771    }
1772
1773    if (ir->type->is_matrix()) {
1774       ir_to_mesa_src_reg mat = get_temp(ir->type);
1775       ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
1776
1777       for (i = 0; i < ir->type->matrix_columns; i++) {
1778          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1779          values = &ir->value.f[i * ir->type->vector_elements];
1780
1781          src_reg = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, NULL);
1782          src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1783                                                 values,
1784                                                 ir->type->vector_elements,
1785                                                 &src_reg.swizzle);
1786          ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
1787
1788          mat_column.index++;
1789       }
1790
1791       this->result = mat;
1792    }
1793
1794    src_reg.file = PROGRAM_CONSTANT;
1795    switch (ir->type->base_type) {
1796    case GLSL_TYPE_FLOAT:
1797       values = &ir->value.f[0];
1798       break;
1799    case GLSL_TYPE_UINT:
1800       for (i = 0; i < ir->type->vector_elements; i++) {
1801          values[i] = ir->value.u[i];
1802       }
1803       break;
1804    case GLSL_TYPE_INT:
1805       for (i = 0; i < ir->type->vector_elements; i++) {
1806          values[i] = ir->value.i[i];
1807       }
1808       break;
1809    case GLSL_TYPE_BOOL:
1810       for (i = 0; i < ir->type->vector_elements; i++) {
1811          values[i] = ir->value.b[i];
1812       }
1813       break;
1814    default:
1815       assert(!"Non-float/uint/int/bool constant");
1816    }
1817
1818    this->result = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, ir->type);
1819    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1820                                                    values,
1821                                                    ir->type->vector_elements,
1822                                                    &this->result.swizzle);
1823 }
1824
1825 function_entry *
1826 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1827 {
1828    function_entry *entry;
1829
1830    foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1831       entry = (function_entry *)iter.get();
1832
1833       if (entry->sig == sig)
1834          return entry;
1835    }
1836
1837    entry = talloc(mem_ctx, function_entry);
1838    entry->sig = sig;
1839    entry->sig_id = this->next_signature_id++;
1840    entry->bgn_inst = NULL;
1841
1842    /* Allocate storage for all the parameters. */
1843    foreach_iter(exec_list_iterator, iter, sig->parameters) {
1844       ir_variable *param = (ir_variable *)iter.get();
1845       variable_storage *storage;
1846
1847       storage = find_variable_storage(param);
1848       assert(!storage);
1849
1850       storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1851                                               this->next_temp);
1852       this->variables.push_tail(storage);
1853
1854       this->next_temp += type_size(param->type);
1855    }
1856
1857    if (!sig->return_type->is_void()) {
1858       entry->return_reg = get_temp(sig->return_type);
1859    } else {
1860       entry->return_reg = ir_to_mesa_undef;
1861    }
1862
1863    this->function_signatures.push_tail(entry);
1864    return entry;
1865 }
1866
1867 void
1868 ir_to_mesa_visitor::visit(ir_call *ir)
1869 {
1870    ir_to_mesa_instruction *call_inst;
1871    ir_function_signature *sig = ir->get_callee();
1872    function_entry *entry = get_function_signature(sig);
1873    int i;
1874
1875    /* Process in parameters. */
1876    exec_list_iterator sig_iter = sig->parameters.iterator();
1877    foreach_iter(exec_list_iterator, iter, *ir) {
1878       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1879       ir_variable *param = (ir_variable *)sig_iter.get();
1880
1881       if (param->mode == ir_var_in ||
1882           param->mode == ir_var_inout) {
1883          variable_storage *storage = find_variable_storage(param);
1884          assert(storage);
1885
1886          param_rval->accept(this);
1887          ir_to_mesa_src_reg r = this->result;
1888
1889          ir_to_mesa_dst_reg l;
1890          l.file = storage->file;
1891          l.index = storage->index;
1892          l.reladdr = NULL;
1893          l.writemask = WRITEMASK_XYZW;
1894          l.cond_mask = COND_TR;
1895
1896          for (i = 0; i < type_size(param->type); i++) {
1897             ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1898             l.index++;
1899             r.index++;
1900          }
1901       }
1902
1903       sig_iter.next();
1904    }
1905    assert(!sig_iter.has_next());
1906
1907    /* Emit call instruction */
1908    call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
1909                                    ir_to_mesa_undef_dst, ir_to_mesa_undef);
1910    call_inst->function = entry;
1911
1912    /* Process out parameters. */
1913    sig_iter = sig->parameters.iterator();
1914    foreach_iter(exec_list_iterator, iter, *ir) {
1915       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1916       ir_variable *param = (ir_variable *)sig_iter.get();
1917
1918       if (param->mode == ir_var_out ||
1919           param->mode == ir_var_inout) {
1920          variable_storage *storage = find_variable_storage(param);
1921          assert(storage);
1922
1923          ir_to_mesa_src_reg r;
1924          r.file = storage->file;
1925          r.index = storage->index;
1926          r.reladdr = NULL;
1927          r.swizzle = SWIZZLE_NOOP;
1928          r.negate = 0;
1929
1930          param_rval->accept(this);
1931          ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
1932
1933          for (i = 0; i < type_size(param->type); i++) {
1934             ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1935             l.index++;
1936             r.index++;
1937          }
1938       }
1939
1940       sig_iter.next();
1941    }
1942    assert(!sig_iter.has_next());
1943
1944    /* Process return value. */
1945    this->result = entry->return_reg;
1946 }
1947
1948
1949 void
1950 ir_to_mesa_visitor::visit(ir_texture *ir)
1951 {
1952    ir_to_mesa_src_reg result_src, coord, lod_info, projector;
1953    ir_to_mesa_dst_reg result_dst, coord_dst;
1954    ir_to_mesa_instruction *inst = NULL;
1955    prog_opcode opcode = OPCODE_NOP;
1956
1957    ir->coordinate->accept(this);
1958
1959    /* Put our coords in a temp.  We'll need to modify them for shadow,
1960     * projection, or LOD, so the only case we'd use it as is is if
1961     * we're doing plain old texturing.  Mesa IR optimization should
1962     * handle cleaning up our mess in that case.
1963     */
1964    coord = get_temp(glsl_type::vec4_type);
1965    coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1966    ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1967                        this->result);
1968
1969    if (ir->projector) {
1970       ir->projector->accept(this);
1971       projector = this->result;
1972    }
1973
1974    /* Storage for our result.  Ideally for an assignment we'd be using
1975     * the actual storage for the result here, instead.
1976     */
1977    result_src = get_temp(glsl_type::vec4_type);
1978    result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1979
1980    switch (ir->op) {
1981    case ir_tex:
1982       opcode = OPCODE_TEX;
1983       break;
1984    case ir_txb:
1985       opcode = OPCODE_TXB;
1986       ir->lod_info.bias->accept(this);
1987       lod_info = this->result;
1988       break;
1989    case ir_txl:
1990       opcode = OPCODE_TXL;
1991       ir->lod_info.lod->accept(this);
1992       lod_info = this->result;
1993       break;
1994    case ir_txd:
1995    case ir_txf:
1996       assert(!"GLSL 1.30 features unsupported");
1997       break;
1998    }
1999
2000    if (ir->projector) {
2001       if (opcode == OPCODE_TEX) {
2002          /* Slot the projector in as the last component of the coord. */
2003          coord_dst.writemask = WRITEMASK_W;
2004          ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
2005          coord_dst.writemask = WRITEMASK_XYZW;
2006          opcode = OPCODE_TXP;
2007       } else {
2008          ir_to_mesa_src_reg coord_w = coord;
2009          coord_w.swizzle = SWIZZLE_WWWW;
2010
2011          /* For the other TEX opcodes there's no projective version
2012           * since the last slot is taken up by lod info.  Do the
2013           * projective divide now.
2014           */
2015          coord_dst.writemask = WRITEMASK_W;
2016          ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
2017
2018          coord_dst.writemask = WRITEMASK_XYZ;
2019          ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
2020
2021          coord_dst.writemask = WRITEMASK_XYZW;
2022          coord.swizzle = SWIZZLE_XYZW;
2023       }
2024    }
2025
2026    if (ir->shadow_comparitor) {
2027       /* Slot the shadow value in as the second to last component of the
2028        * coord.
2029        */
2030       ir->shadow_comparitor->accept(this);
2031       coord_dst.writemask = WRITEMASK_Z;
2032       ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
2033       coord_dst.writemask = WRITEMASK_XYZW;
2034    }
2035
2036    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2037       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2038       coord_dst.writemask = WRITEMASK_W;
2039       ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
2040       coord_dst.writemask = WRITEMASK_XYZW;
2041    }
2042
2043    inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
2044
2045    if (ir->shadow_comparitor)
2046       inst->tex_shadow = GL_TRUE;
2047
2048    ir_variable *sampler = ir->sampler->variable_referenced();
2049
2050    /* generate the mapping, remove when we generate storage at
2051     * declaration time
2052     */
2053    ir->sampler->accept(this);
2054
2055    inst->sampler = get_sampler_location(sampler);
2056
2057    ir_dereference_array *sampler_array = ir->sampler->as_dereference_array();
2058    if (sampler_array) {
2059       ir_constant *array_index =
2060          sampler_array->array_index->constant_expression_value();
2061
2062       /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
2063        * while GLSL 1.30 requires that the array indices be constant
2064        * integer expressions.  We don't expect any driver to actually
2065        * work with a really variable array index, and in 1.20 all that
2066        * would work would be an unrolled loop counter, so assert that
2067        * we ended up with a constant at least..
2068        */
2069       assert(array_index);
2070       inst->sampler += array_index->value.i[0];
2071    }
2072
2073    const glsl_type *sampler_type = sampler->type;
2074    while (sampler_type->base_type == GLSL_TYPE_ARRAY)
2075       sampler_type = sampler_type->fields.array;
2076
2077    switch (sampler_type->sampler_dimensionality) {
2078    case GLSL_SAMPLER_DIM_1D:
2079       inst->tex_target = (sampler_type->sampler_array)
2080          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2081       break;
2082    case GLSL_SAMPLER_DIM_2D:
2083       inst->tex_target = (sampler_type->sampler_array)
2084          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2085       break;
2086    case GLSL_SAMPLER_DIM_3D:
2087       inst->tex_target = TEXTURE_3D_INDEX;
2088       break;
2089    case GLSL_SAMPLER_DIM_CUBE:
2090       inst->tex_target = TEXTURE_CUBE_INDEX;
2091       break;
2092    default:
2093       assert(!"FINISHME: other texture targets");
2094    }
2095
2096    this->result = result_src;
2097 }
2098
2099 void
2100 ir_to_mesa_visitor::visit(ir_return *ir)
2101 {
2102    assert(current_function);
2103
2104    if (ir->get_value()) {
2105       ir_to_mesa_dst_reg l;
2106       int i;
2107
2108       ir->get_value()->accept(this);
2109       ir_to_mesa_src_reg r = this->result;
2110
2111       l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
2112
2113       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2114          ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
2115          l.index++;
2116          r.index++;
2117       }
2118    }
2119
2120    ir_to_mesa_emit_op0(ir, OPCODE_RET);
2121 }
2122
2123 void
2124 ir_to_mesa_visitor::visit(ir_discard *ir)
2125 {
2126    assert(ir->condition == NULL); /* FINISHME */
2127
2128    ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
2129 }
2130
2131 void
2132 ir_to_mesa_visitor::visit(ir_if *ir)
2133 {
2134    ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
2135    ir_to_mesa_instruction *prev_inst;
2136
2137    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2138
2139    ir->condition->accept(this);
2140    assert(this->result.file != PROGRAM_UNDEFINED);
2141
2142    if (ctx->Shader.EmitCondCodes) {
2143       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2144
2145       /* See if we actually generated any instruction for generating
2146        * the condition.  If not, then cook up a move to a temp so we
2147        * have something to set cond_update on.
2148        */
2149       if (cond_inst == prev_inst) {
2150          ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
2151          cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
2152                                          ir_to_mesa_dst_reg_from_src(temp),
2153                                          result);
2154       }
2155       cond_inst->cond_update = GL_TRUE;
2156
2157       if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
2158       if_inst->dst_reg.cond_mask = COND_NE;
2159    } else {
2160       if_inst = ir_to_mesa_emit_op1(ir->condition,
2161                                     OPCODE_IF, ir_to_mesa_undef_dst,
2162                                     this->result);
2163    }
2164
2165    this->instructions.push_tail(if_inst);
2166
2167    visit_exec_list(&ir->then_instructions, this);
2168
2169    if (!ir->else_instructions.is_empty()) {
2170       else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
2171       visit_exec_list(&ir->else_instructions, this);
2172    }
2173
2174    if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
2175                                  ir_to_mesa_undef_dst, ir_to_mesa_undef);
2176 }
2177
2178 ir_to_mesa_visitor::ir_to_mesa_visitor()
2179 {
2180    result.file = PROGRAM_UNDEFINED;
2181    next_temp = 1;
2182    next_signature_id = 1;
2183    sampler_map = NULL;
2184    current_function = NULL;
2185 }
2186
2187 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2188 {
2189    if (this->sampler_map)
2190       hash_table_dtor(this->sampler_map);
2191 }
2192
2193 static struct prog_src_register
2194 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
2195 {
2196    struct prog_src_register mesa_reg;
2197
2198    mesa_reg.File = reg.file;
2199    assert(reg.index < (1 << INST_INDEX_BITS) - 1);
2200    mesa_reg.Index = reg.index;
2201    mesa_reg.Swizzle = reg.swizzle;
2202    mesa_reg.RelAddr = reg.reladdr != NULL;
2203    mesa_reg.Negate = reg.negate;
2204    mesa_reg.Abs = 0;
2205    mesa_reg.HasIndex2 = GL_FALSE;
2206
2207    return mesa_reg;
2208 }
2209
2210 static void
2211 set_branchtargets(ir_to_mesa_visitor *v,
2212                   struct prog_instruction *mesa_instructions,
2213                   int num_instructions)
2214 {
2215    int if_count = 0, loop_count = 0;
2216    int *if_stack, *loop_stack;
2217    int if_stack_pos = 0, loop_stack_pos = 0;
2218    int i, j;
2219
2220    for (i = 0; i < num_instructions; i++) {
2221       switch (mesa_instructions[i].Opcode) {
2222       case OPCODE_IF:
2223          if_count++;
2224          break;
2225       case OPCODE_BGNLOOP:
2226          loop_count++;
2227          break;
2228       case OPCODE_BRK:
2229       case OPCODE_CONT:
2230          mesa_instructions[i].BranchTarget = -1;
2231          break;
2232       default:
2233          break;
2234       }
2235    }
2236
2237    if_stack = (int *)calloc(if_count, sizeof(*if_stack));
2238    loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
2239
2240    for (i = 0; i < num_instructions; i++) {
2241       switch (mesa_instructions[i].Opcode) {
2242       case OPCODE_IF:
2243          if_stack[if_stack_pos] = i;
2244          if_stack_pos++;
2245          break;
2246       case OPCODE_ELSE:
2247          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2248          if_stack[if_stack_pos - 1] = i;
2249          break;
2250       case OPCODE_ENDIF:
2251          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2252          if_stack_pos--;
2253          break;
2254       case OPCODE_BGNLOOP:
2255          loop_stack[loop_stack_pos] = i;
2256          loop_stack_pos++;
2257          break;
2258       case OPCODE_ENDLOOP:
2259          loop_stack_pos--;
2260          /* Rewrite any breaks/conts at this nesting level (haven't
2261           * already had a BranchTarget assigned) to point to the end
2262           * of the loop.
2263           */
2264          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2265             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2266                 mesa_instructions[j].Opcode == OPCODE_CONT) {
2267                if (mesa_instructions[j].BranchTarget == -1) {
2268                   mesa_instructions[j].BranchTarget = i;
2269                }
2270             }
2271          }
2272          /* The loop ends point at each other. */
2273          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2274          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2275          break;
2276       case OPCODE_CAL:
2277          foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2278             function_entry *entry = (function_entry *)iter.get();
2279
2280             if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2281                mesa_instructions[i].BranchTarget = entry->inst;
2282                break;
2283             }
2284          }
2285          break;
2286       default:
2287          break;
2288       }
2289    }
2290
2291    free(if_stack);
2292 }
2293
2294 static void
2295 print_program(struct prog_instruction *mesa_instructions,
2296               ir_instruction **mesa_instruction_annotation,
2297               int num_instructions)
2298 {
2299    ir_instruction *last_ir = NULL;
2300    int i;
2301    int indent = 0;
2302
2303    for (i = 0; i < num_instructions; i++) {
2304       struct prog_instruction *mesa_inst = mesa_instructions + i;
2305       ir_instruction *ir = mesa_instruction_annotation[i];
2306
2307       fprintf(stdout, "%3d: ", i);
2308
2309       if (last_ir != ir && ir) {
2310          int j;
2311
2312          for (j = 0; j < indent; j++) {
2313             fprintf(stdout, " ");
2314          }
2315          ir->print();
2316          printf("\n");
2317          last_ir = ir;
2318
2319          fprintf(stdout, "     "); /* line number spacing. */
2320       }
2321
2322       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2323                                             PROG_PRINT_DEBUG, NULL);
2324    }
2325 }
2326
2327 static void
2328 count_resources(struct gl_program *prog)
2329 {
2330    unsigned int i;
2331
2332    prog->SamplersUsed = 0;
2333
2334    for (i = 0; i < prog->NumInstructions; i++) {
2335       struct prog_instruction *inst = &prog->Instructions[i];
2336
2337       /* Instead of just using the uniform's value to map to a
2338        * sampler, Mesa first allocates a separate number for the
2339        * sampler (_mesa_add_sampler), then we reindex it down to a
2340        * small integer (sampler_map[], SamplersUsed), then that gets
2341        * mapped to the uniform's value, and we get an actual sampler.
2342        */
2343       if (_mesa_is_tex_instruction(inst->Opcode)) {
2344          prog->SamplerTargets[inst->TexSrcUnit] =
2345             (gl_texture_index)inst->TexSrcTarget;
2346          prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2347          if (inst->TexShadow) {
2348             prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2349          }
2350       }
2351    }
2352
2353    _mesa_update_shader_textures_used(prog);
2354 }
2355
2356 /* Each stage has some uniforms in its Parameters list.  The Uniforms
2357  * list for the linked shader program has a pointer to these uniforms
2358  * in each of the stage's Parameters list, so that their values can be
2359  * updated when a uniform is set.
2360  */
2361 static void
2362 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
2363                                      struct gl_program *prog)
2364 {
2365    unsigned int i;
2366
2367    for (i = 0; i < prog->Parameters->NumParameters; i++) {
2368       const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
2369
2370       if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
2371          struct gl_uniform *uniform =
2372             _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
2373          if (uniform)
2374             uniform->Initialized = p->Initialized;
2375       }
2376    }
2377 }
2378
2379 struct gl_program *
2380 get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
2381                  struct gl_shader *shader)
2382 {
2383    void *mem_ctx = shader_program;
2384    ir_to_mesa_visitor v;
2385    struct prog_instruction *mesa_instructions, *mesa_inst;
2386    ir_instruction **mesa_instruction_annotation;
2387    int i;
2388    struct gl_program *prog;
2389    GLenum target;
2390    const char *target_string;
2391    GLboolean progress;
2392
2393    switch (shader->Type) {
2394    case GL_VERTEX_SHADER:
2395       target = GL_VERTEX_PROGRAM_ARB;
2396       target_string = "vertex";
2397       break;
2398    case GL_FRAGMENT_SHADER:
2399       target = GL_FRAGMENT_PROGRAM_ARB;
2400       target_string = "fragment";
2401       break;
2402    default:
2403       assert(!"should not be reached");
2404       break;
2405    }
2406
2407    validate_ir_tree(shader->ir);
2408
2409    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2410    if (!prog)
2411       return NULL;
2412    prog->Parameters = _mesa_new_parameter_list();
2413    prog->Varying = _mesa_new_parameter_list();
2414    prog->Attributes = _mesa_new_parameter_list();
2415    v.ctx = ctx;
2416    v.prog = prog;
2417
2418    v.mem_ctx = talloc_new(NULL);
2419
2420    /* Emit Mesa IR for main(). */
2421    visit_exec_list(shader->ir, &v);
2422    v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
2423
2424    /* Now emit bodies for any functions that were used. */
2425    do {
2426       progress = GL_FALSE;
2427
2428       foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2429          function_entry *entry = (function_entry *)iter.get();
2430
2431          if (!entry->bgn_inst) {
2432             v.current_function = entry;
2433
2434             entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
2435             entry->bgn_inst->function = entry;
2436
2437             visit_exec_list(&entry->sig->body, &v);
2438
2439             ir_to_mesa_instruction *last;
2440             last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2441             if (last->op != OPCODE_RET)
2442                v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
2443
2444             ir_to_mesa_instruction *end;
2445             end = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
2446             end->function = entry;
2447
2448             progress = GL_TRUE;
2449          }
2450       }
2451    } while (progress);
2452
2453    prog->NumTemporaries = v.next_temp;
2454
2455    int num_instructions = 0;
2456    foreach_iter(exec_list_iterator, iter, v.instructions) {
2457       num_instructions++;
2458    }
2459
2460    mesa_instructions =
2461       (struct prog_instruction *)calloc(num_instructions,
2462                                         sizeof(*mesa_instructions));
2463    mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
2464                                               num_instructions);
2465
2466    mesa_inst = mesa_instructions;
2467    i = 0;
2468    foreach_iter(exec_list_iterator, iter, v.instructions) {
2469       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2470
2471       mesa_inst->Opcode = inst->op;
2472       mesa_inst->CondUpdate = inst->cond_update;
2473       mesa_inst->DstReg.File = inst->dst_reg.file;
2474       mesa_inst->DstReg.Index = inst->dst_reg.index;
2475       mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
2476       mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
2477       mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
2478       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
2479       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
2480       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
2481       mesa_inst->TexSrcUnit = inst->sampler;
2482       mesa_inst->TexSrcTarget = inst->tex_target;
2483       mesa_inst->TexShadow = inst->tex_shadow;
2484       mesa_instruction_annotation[i] = inst->ir;
2485
2486       if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
2487          shader_program->InfoLog =
2488             talloc_asprintf_append(shader_program->InfoLog,
2489                                    "Couldn't flatten if statement\n");
2490          shader_program->LinkStatus = false;
2491       }
2492
2493       switch (mesa_inst->Opcode) {
2494       case OPCODE_BGNSUB:
2495          inst->function->inst = i;
2496          mesa_inst->Comment = strdup(inst->function->sig->function_name());
2497          break;
2498       case OPCODE_ENDSUB:
2499          mesa_inst->Comment = strdup(inst->function->sig->function_name());
2500          break;
2501       case OPCODE_CAL:
2502          mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
2503          break;
2504       case OPCODE_ARL:
2505          prog->NumAddressRegs = 1;
2506          break;
2507       default:
2508          break;
2509       }
2510
2511       mesa_inst++;
2512       i++;
2513    }
2514
2515    set_branchtargets(&v, mesa_instructions, num_instructions);
2516
2517    if (ctx->Shader.Flags & GLSL_DUMP) {
2518       printf("\n");
2519       printf("GLSL IR for linked %s program %d:\n", target_string,
2520              shader_program->Name);
2521       _mesa_print_ir(shader->ir, NULL);
2522       printf("\n");
2523       printf("\n");
2524       printf("Mesa IR for linked %s program %d:\n", target_string,
2525              shader_program->Name);
2526       print_program(mesa_instructions, mesa_instruction_annotation,
2527                     num_instructions);
2528    }
2529
2530    prog->Instructions = mesa_instructions;
2531    prog->NumInstructions = num_instructions;
2532
2533    do_set_program_inouts(shader->ir, prog);
2534    count_resources(prog);
2535
2536    _mesa_reference_program(ctx, &shader->Program, prog);
2537
2538    if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
2539       _mesa_optimize_program(ctx, prog);
2540    }
2541
2542    return prog;
2543 }
2544
2545 extern "C" {
2546
2547 void
2548 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
2549 {
2550    struct _mesa_glsl_parse_state *state =
2551       new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
2552
2553    const char *source = shader->Source;
2554    state->error = preprocess(state, &source, &state->info_log,
2555                              &ctx->Extensions);
2556
2557    if (!state->error) {
2558      _mesa_glsl_lexer_ctor(state, source);
2559      _mesa_glsl_parse(state);
2560      _mesa_glsl_lexer_dtor(state);
2561    }
2562
2563    shader->ir = new(shader) exec_list;
2564    if (!state->error && !state->translation_unit.is_empty())
2565       _mesa_ast_to_hir(shader->ir, state);
2566
2567    if (!state->error && !shader->ir->is_empty()) {
2568       validate_ir_tree(shader->ir);
2569
2570       /* Do some optimization at compile time to reduce shader IR size
2571        * and reduce later work if the same shader is linked multiple times
2572        */
2573       while (do_common_optimization(shader->ir, false))
2574          ;
2575
2576       validate_ir_tree(shader->ir);
2577    }
2578
2579    shader->symbols = state->symbols;
2580
2581    shader->CompileStatus = !state->error;
2582    shader->InfoLog = state->info_log;
2583    shader->Version = state->language_version;
2584    memcpy(shader->builtins_to_link, state->builtins_to_link,
2585           sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
2586    shader->num_builtins_to_link = state->num_builtins_to_link;
2587
2588    if (ctx->Shader.Flags & GLSL_LOG) {
2589       _mesa_write_shader_to_file(shader);
2590    }
2591
2592    if (ctx->Shader.Flags & GLSL_DUMP) {
2593       printf("GLSL source for shader %d:\n", shader->Name);
2594       printf("%s\n", shader->Source);
2595
2596       if (shader->CompileStatus) {
2597          printf("GLSL IR for shader %d:\n", shader->Name);
2598          _mesa_print_ir(shader->ir, NULL);
2599          printf("\n\n");
2600       }
2601    }
2602
2603    /* Retain any live IR, but trash the rest. */
2604    reparent_ir(shader->ir, shader);
2605
2606    talloc_free(state);
2607  }
2608
2609 void
2610 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
2611 {
2612    unsigned int i;
2613
2614    _mesa_clear_shader_program_data(ctx, prog);
2615
2616    prog->LinkStatus = GL_TRUE;
2617
2618    for (i = 0; i < prog->NumShaders; i++) {
2619       if (!prog->Shaders[i]->CompileStatus) {
2620          prog->InfoLog =
2621             talloc_asprintf_append(prog->InfoLog,
2622                                    "linking with uncompiled shader");
2623          prog->LinkStatus = GL_FALSE;
2624       }
2625    }
2626
2627    prog->Varying = _mesa_new_parameter_list();
2628    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
2629    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
2630
2631    if (prog->LinkStatus) {
2632       link_shaders(prog);
2633
2634       /* We don't use the linker's uniforms list, and cook up our own at
2635        * generate time.
2636        */
2637       free(prog->Uniforms);
2638       prog->Uniforms = _mesa_new_uniform_list();
2639    }
2640
2641    if (prog->LinkStatus) {
2642       for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
2643          bool progress;
2644          exec_list *ir = prog->_LinkedShaders[i]->ir;
2645
2646          do {
2647             progress = false;
2648
2649             /* Lowering */
2650             do_mat_op_to_vec(ir);
2651             do_mod_to_fract(ir);
2652             do_div_to_mul_rcp(ir);
2653             do_explog_to_explog2(ir);
2654
2655             progress = do_common_optimization(ir, true) || progress;
2656
2657             if (ctx->Shader.EmitNoIfs)
2658                progress = do_if_to_cond_assign(ir) || progress;
2659
2660             progress = do_vec_index_to_cond_assign(ir) || progress;
2661          } while (progress);
2662       }
2663    }
2664
2665    if (prog->LinkStatus) {
2666       for (i = 0; i < prog->_NumLinkedShaders; i++) {
2667          struct gl_program *linked_prog;
2668          bool ok = true;
2669
2670          linked_prog = get_mesa_program(ctx, prog,
2671                                         prog->_LinkedShaders[i]);
2672
2673          link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
2674
2675          switch (prog->_LinkedShaders[i]->Type) {
2676          case GL_VERTEX_SHADER:
2677             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
2678                                      (struct gl_vertex_program *)linked_prog);
2679             ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
2680                                                  linked_prog);
2681             break;
2682          case GL_FRAGMENT_SHADER:
2683             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
2684                                      (struct gl_fragment_program *)linked_prog);
2685             ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
2686                                                  linked_prog);
2687             break;
2688          }
2689          if (!ok) {
2690             prog->LinkStatus = GL_FALSE;
2691          }
2692       }
2693    }
2694 }
2695
2696 } /* extern "C" */