src/mesa/shader/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translates the IR to ARB_fragment_program text if possible,
  30  * printing the result
  31  */
  32
  33 #include <stdio.h>
  34 #include "ir.h"
  35 #include "ir_visitor.h"
  36 #include "ir_print_visitor.h"
  37 #include "ir_expression_flattening.h"
  38 #include "glsl_types.h"
  39 #include "glsl_parser_extras.h"
  40 #include "../glsl/program.h"
  41 #include "ir_optimization.h"
  42 #include "ast.h"
  43
  44 extern "C" {
  45 #include "main/mtypes.h"
  46 #include "shader/prog_instruction.h"
  47 #include "shader/prog_optimize.h"
  48 #include "shader/prog_print.h"
  49 #include "shader/program.h"
  50 #include "shader/prog_uniform.h"
  51 #include "shader/prog_parameter.h"
  52 #include "shader/shader_api.h"
  53 }
  54
  55 /**
  56  * This struct is a corresponding struct to Mesa prog_src_register, with
  57  * wider fields.
  58  */
  59 typedef struct ir_to_mesa_src_reg {
  60    int file; /**< PROGRAM_* from Mesa */
  61    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  62    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  63    int negate; /**< NEGATE_XYZW mask from mesa */
  64    bool reladdr; /**< Register index should be offset by address reg. */
  65 } ir_to_mesa_src_reg;
  66
  67 typedef struct ir_to_mesa_dst_reg {
  68    int file; /**< PROGRAM_* from Mesa */
  69    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  70    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  71    GLuint cond_mask:4;
  72 } ir_to_mesa_dst_reg;
  73
  74 extern ir_to_mesa_src_reg ir_to_mesa_undef;
  75
  76 class ir_to_mesa_instruction : public exec_node {
  77 public:
  78    enum prog_opcode op;
  79    ir_to_mesa_dst_reg dst_reg;
  80    ir_to_mesa_src_reg src_reg[3];
  81    /** Pointer to the ir source this tree came from for debugging */
  82    ir_instruction *ir;
  83    GLboolean cond_update;
  84    int sampler; /**< sampler index */
  85    int tex_target; /**< One of TEXTURE_*_INDEX */
  86    GLboolean tex_shadow;
  87 };
  88
  89 class temp_entry : public exec_node {
  90 public:
  91    temp_entry(ir_variable *var, int file, int index)
  92       : file(file), index(index), var(var)
  93    {
  94       /* empty */
  95    }
  96
  97    int file;
  98    int index;
  99    ir_variable *var; /* variable that maps to this, if any */
 100 };
 101
 102 class ir_to_mesa_visitor : public ir_visitor {
 103 public:
 104    ir_to_mesa_visitor();
 105
 106    GLcontext *ctx;
 107    struct gl_program *prog;
 108
 109    int next_temp;
 110
 111    temp_entry *find_variable_storage(ir_variable *var);
 112
 113    ir_to_mesa_src_reg get_temp(const glsl_type *type);
 114
 115    struct ir_to_mesa_src_reg src_reg_for_float(float val);
 116
 117    /**
 118     * \name Visit methods
 119     *
 120     * As typical for the visitor pattern, there must be one \c visit method for
 121     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 122     * the hierarchy should not have \c visit methods.
 123     */
 124    /*@{*/
 125    virtual void visit(ir_variable *);
 126    virtual void visit(ir_loop *);
 127    virtual void visit(ir_loop_jump *);
 128    virtual void visit(ir_function_signature *);
 129    virtual void visit(ir_function *);
 130    virtual void visit(ir_expression *);
 131    virtual void visit(ir_swizzle *);
 132    virtual void visit(ir_dereference_variable  *);
 133    virtual void visit(ir_dereference_array *);
 134    virtual void visit(ir_dereference_record *);
 135    virtual void visit(ir_assignment *);
 136    virtual void visit(ir_constant *);
 137    virtual void visit(ir_call *);
 138    virtual void visit(ir_return *);
 139    virtual void visit(ir_discard *);
 140    virtual void visit(ir_texture *);
 141    virtual void visit(ir_if *);
 142    /*@}*/
 143
 144    struct ir_to_mesa_src_reg result;
 145
 146    /** List of temp_entry */
 147    exec_list variable_storage;
 148
 149    /** List of ir_to_mesa_instruction */
 150    exec_list instructions;
 151
 152    ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
 153                                                enum prog_opcode op,
 154                                                ir_to_mesa_dst_reg dst,
 155                                                ir_to_mesa_src_reg src0);
 156
 157    ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
 158                                                enum prog_opcode op,
 159                                                ir_to_mesa_dst_reg dst,
 160                                                ir_to_mesa_src_reg src0,
 161                                                ir_to_mesa_src_reg src1);
 162
 163    ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
 164                                                enum prog_opcode op,
 165                                                ir_to_mesa_dst_reg dst,
 166                                                ir_to_mesa_src_reg src0,
 167                                                ir_to_mesa_src_reg src1,
 168                                                ir_to_mesa_src_reg src2);
 169
 170    void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 171                                    enum prog_opcode op,
 172                                    ir_to_mesa_dst_reg dst,
 173                                    ir_to_mesa_src_reg src0);
 174
 175    void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
 176                                    enum prog_opcode op,
 177                                    ir_to_mesa_dst_reg dst,
 178                                    ir_to_mesa_src_reg src0,
 179                                    ir_to_mesa_src_reg src1);
 180
 181    int *sampler_map;
 182    int sampler_map_size;
 183
 184    void map_sampler(int location, int sampler);
 185    int get_sampler_number(int location);
 186
 187    void *mem_ctx;
 188 };
 189
 190 ir_to_mesa_src_reg ir_to_mesa_undef = {
 191    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, false,
 192 };
 193
 194 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
 195    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR
 196 };
 197
 198 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
 199    PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR
 200 };
 201
 202 static int swizzle_for_size(int size)
 203 {
 204    int size_swizzles[4] = {
 205       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 206       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 207       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 208       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 209    };
 210
 211    return size_swizzles[size - 1];
 212 }
 213
 214 ir_to_mesa_instruction *
 215 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
 216                                         enum prog_opcode op,
 217                                         ir_to_mesa_dst_reg dst,
 218                                         ir_to_mesa_src_reg src0,
 219                                         ir_to_mesa_src_reg src1,
 220                                         ir_to_mesa_src_reg src2)
 221 {
 222    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 223
 224    inst->op = op;
 225    inst->dst_reg = dst;
 226    inst->src_reg[0] = src0;
 227    inst->src_reg[1] = src1;
 228    inst->src_reg[2] = src2;
 229    inst->ir = ir;
 230
 231    this->instructions.push_tail(inst);
 232
 233    return inst;
 234 }
 235
 236
 237 ir_to_mesa_instruction *
 238 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
 239                                         enum prog_opcode op,
 240                                         ir_to_mesa_dst_reg dst,
 241                                         ir_to_mesa_src_reg src0,
 242                                         ir_to_mesa_src_reg src1)
 243 {
 244    return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
 245 }
 246
 247 ir_to_mesa_instruction *
 248 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
 249                                         enum prog_opcode op,
 250                                         ir_to_mesa_dst_reg dst,
 251                                         ir_to_mesa_src_reg src0)
 252 {
 253    return ir_to_mesa_emit_op3(ir, op, dst,
 254                               src0, ir_to_mesa_undef, ir_to_mesa_undef);
 255 }
 256
 257 void
 258 ir_to_mesa_visitor::map_sampler(int location, int sampler)
 259 {
 260    if (this->sampler_map_size <= location) {
 261       this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map,
 262                                          int, location + 1);
 263       this->sampler_map_size = location + 1;
 264    }
 265
 266    this->sampler_map[location] = sampler;
 267 }
 268
 269 int
 270 ir_to_mesa_visitor::get_sampler_number(int location)
 271 {
 272    assert(location < this->sampler_map_size);
 273    return this->sampler_map[location];
 274 }
 275
 276 inline ir_to_mesa_dst_reg
 277 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
 278 {
 279    ir_to_mesa_dst_reg dst_reg;
 280
 281    dst_reg.file = reg.file;
 282    dst_reg.index = reg.index;
 283    dst_reg.writemask = WRITEMASK_XYZW;
 284    dst_reg.cond_mask = COND_TR;
 285
 286    return dst_reg;
 287 }
 288
 289 inline ir_to_mesa_src_reg
 290 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
 291 {
 292    ir_to_mesa_src_reg src_reg;
 293
 294    src_reg.file = reg.file;
 295    src_reg.index = reg.index;
 296    src_reg.swizzle = SWIZZLE_XYZW;
 297    src_reg.negate = 0;
 298    src_reg.reladdr = 0;
 299
 300    return src_reg;
 301 }
 302
 303 /**
 304  * Emits Mesa scalar opcodes to produce unique answers across channels.
 305  *
 306  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 307  * channel determines the result across all channels.  So to do a vec4
 308  * of this operation, we want to emit a scalar per source channel used
 309  * to produce dest channels.
 310  */
 311 void
 312 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
 313                                                enum prog_opcode op,
 314                                                ir_to_mesa_dst_reg dst,
 315                                                ir_to_mesa_src_reg orig_src0,
 316                                                ir_to_mesa_src_reg orig_src1)
 317 {
 318    int i, j;
 319    int done_mask = ~dst.writemask;
 320
 321    /* Mesa RCP is a scalar operation splatting results to all channels,
 322     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 323     * dst channels.
 324     */
 325    for (i = 0; i < 4; i++) {
 326       GLuint this_mask = (1 << i);
 327       ir_to_mesa_instruction *inst;
 328       ir_to_mesa_src_reg src0 = orig_src0;
 329       ir_to_mesa_src_reg src1 = orig_src1;
 330
 331       if (done_mask & this_mask)
 332          continue;
 333
 334       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
 335       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
 336       for (j = i + 1; j < 4; j++) {
 337          if (!(done_mask & (1 << j)) &&
 338              GET_SWZ(src0.swizzle, j) == src0_swiz &&
 339              GET_SWZ(src1.swizzle, j) == src1_swiz) {
 340             this_mask |= (1 << j);
 341          }
 342       }
 343       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
 344                                    src0_swiz, src0_swiz);
 345       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
 346                                   src1_swiz, src1_swiz);
 347
 348       inst = ir_to_mesa_emit_op2(ir, op,
 349                                  dst,
 350                                  src0,
 351                                  src1);
 352       inst->dst_reg.writemask = this_mask;
 353       done_mask |= this_mask;
 354    }
 355 }
 356
 357 void
 358 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 359                                                enum prog_opcode op,
 360                                                ir_to_mesa_dst_reg dst,
 361                                                ir_to_mesa_src_reg src0)
 362 {
 363    ir_to_mesa_src_reg undef = ir_to_mesa_undef;
 364
 365    undef.swizzle = SWIZZLE_XXXX;
 366
 367    ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
 368 }
 369
 370 struct ir_to_mesa_src_reg
 371 ir_to_mesa_visitor::src_reg_for_float(float val)
 372 {
 373    ir_to_mesa_src_reg src_reg;
 374
 375    src_reg.file = PROGRAM_CONSTANT;
 376    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 377                                               &val, 1, &src_reg.swizzle);
 378    src_reg.reladdr = GL_FALSE;
 379    src_reg.negate = 0;
 380
 381    return src_reg;
 382 }
 383
 384 static int
 385 type_size(const struct glsl_type *type)
 386 {
 387    unsigned int i;
 388    int size;
 389
 390    switch (type->base_type) {
 391    case GLSL_TYPE_UINT:
 392    case GLSL_TYPE_INT:
 393    case GLSL_TYPE_FLOAT:
 394    case GLSL_TYPE_BOOL:
 395       if (type->is_matrix()) {
 396          return type->matrix_columns;
 397       } else {
 398          /* Regardless of size of vector, it gets a vec4. This is bad
 399           * packing for things like floats, but otherwise arrays become a
 400           * mess.  Hopefully a later pass over the code can pack scalars
 401           * down if appropriate.
 402           */
 403          return 1;
 404       }
 405    case GLSL_TYPE_ARRAY:
 406       return type_size(type->fields.array) * type->length;
 407    case GLSL_TYPE_STRUCT:
 408       size = 0;
 409       for (i = 0; i < type->length; i++) {
 410          size += type_size(type->fields.structure[i].type);
 411       }
 412       return size;
 413    default:
 414       assert(0);
 415    }
 416 }
 417
 418 /**
 419  * In the initial pass of codegen, we assign temporary numbers to
 420  * intermediate results.  (not SSA -- variable assignments will reuse
 421  * storage).  Actual register allocation for the Mesa VM occurs in a
 422  * pass over the Mesa IR later.
 423  */
 424 ir_to_mesa_src_reg
 425 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 426 {
 427    ir_to_mesa_src_reg src_reg;
 428    int swizzle[4];
 429    int i;
 430
 431    assert(!type->is_array());
 432
 433    src_reg.file = PROGRAM_TEMPORARY;
 434    src_reg.index = next_temp;
 435    src_reg.reladdr = false;
 436    next_temp += type_size(type);
 437
 438    for (i = 0; i < type->vector_elements; i++)
 439       swizzle[i] = i;
 440    for (; i < 4; i++)
 441       swizzle[i] = type->vector_elements - 1;
 442    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
 443                                    swizzle[2], swizzle[3]);
 444    src_reg.negate = 0;
 445
 446    return src_reg;
 447 }
 448
 449 temp_entry *
 450 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
 451 {
 452
 453    temp_entry *entry;
 454
 455    foreach_iter(exec_list_iterator, iter, this->variable_storage) {
 456       entry = (temp_entry *)iter.get();
 457
 458       if (entry->var == var)
 459          return entry;
 460    }
 461
 462    return NULL;
 463 }
 464
 465 void
 466 ir_to_mesa_visitor::visit(ir_variable *ir)
 467 {
 468    (void)ir;
 469 }
 470
 471 void
 472 ir_to_mesa_visitor::visit(ir_loop *ir)
 473 {
 474    assert(!ir->from);
 475    assert(!ir->to);
 476    assert(!ir->increment);
 477    assert(!ir->counter);
 478
 479    ir_to_mesa_emit_op1(NULL, OPCODE_BGNLOOP,
 480                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
 481
 482    visit_exec_list(&ir->body_instructions, this);
 483
 484    ir_to_mesa_emit_op1(NULL, OPCODE_ENDLOOP,
 485                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
 486 }
 487
 488 void
 489 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 490 {
 491    switch (ir->mode) {
 492    case ir_loop_jump::jump_break:
 493       ir_to_mesa_emit_op1(NULL, OPCODE_BRK,
 494                           ir_to_mesa_undef_dst, ir_to_mesa_undef);
 495       break;
 496    case ir_loop_jump::jump_continue:
 497       ir_to_mesa_emit_op1(NULL, OPCODE_CONT,
 498                           ir_to_mesa_undef_dst, ir_to_mesa_undef);
 499       break;
 500    }
 501 }
 502
 503
 504 void
 505 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 506 {
 507    assert(0);
 508    (void)ir;
 509 }
 510
 511 void
 512 ir_to_mesa_visitor::visit(ir_function *ir)
 513 {
 514    /* Ignore function bodies other than main() -- we shouldn't see calls to
 515     * them since they should all be inlined before we get to ir_to_mesa.
 516     */
 517    if (strcmp(ir->name, "main") == 0) {
 518       const ir_function_signature *sig;
 519       exec_list empty;
 520
 521       sig = ir->matching_signature(&empty);
 522
 523       assert(sig);
 524
 525       foreach_iter(exec_list_iterator, iter, sig->body) {
 526          ir_instruction *ir = (ir_instruction *)iter.get();
 527
 528          ir->accept(this);
 529       }
 530    }
 531 }
 532
 533 void
 534 ir_to_mesa_visitor::visit(ir_expression *ir)
 535 {
 536    unsigned int operand;
 537    struct ir_to_mesa_src_reg op[2];
 538    struct ir_to_mesa_src_reg result_src;
 539    struct ir_to_mesa_dst_reg result_dst;
 540    const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
 541    const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
 542    const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
 543
 544    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 545       this->result.file = PROGRAM_UNDEFINED;
 546       ir->operands[operand]->accept(this);
 547       if (this->result.file == PROGRAM_UNDEFINED) {
 548          ir_print_visitor v;
 549          printf("Failed to get tree for expression operand:\n");
 550          ir->operands[operand]->accept(&v);
 551          exit(1);
 552       }
 553       op[operand] = this->result;
 554
 555       /* Only expression implemented for matrices yet */
 556       assert(!ir->operands[operand]->type->is_matrix() ||
 557              ir->operation == ir_binop_mul);
 558    }
 559
 560    this->result.file = PROGRAM_UNDEFINED;
 561
 562    /* Storage for our result.  Ideally for an assignment we'd be using
 563     * the actual storage for the result here, instead.
 564     */
 565    result_src = get_temp(ir->type);
 566    /* convenience for the emit functions below. */
 567    result_dst = ir_to_mesa_dst_reg_from_src(result_src);
 568    /* Limit writes to the channels that will be used by result_src later.
 569     * This does limit this temp's use as a temporary for multi-instruction
 570     * sequences.
 571     */
 572    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 573
 574    switch (ir->operation) {
 575    case ir_unop_logic_not:
 576       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
 577                           op[0], src_reg_for_float(0.0));
 578       break;
 579    case ir_unop_neg:
 580       op[0].negate = ~op[0].negate;
 581       result_src = op[0];
 582       break;
 583    case ir_unop_abs:
 584       ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
 585       break;
 586    case ir_unop_sign:
 587       ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
 588       break;
 589    case ir_unop_rcp:
 590       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
 591       break;
 592
 593    case ir_unop_exp:
 594       ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
 595                                  src_reg_for_float(M_E), op[0]);
 596       break;
 597    case ir_unop_exp2:
 598       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
 599       break;
 600    case ir_unop_log:
 601       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
 602       break;
 603    case ir_unop_log2:
 604       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
 605       break;
 606    case ir_unop_sin:
 607       ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
 608       break;
 609    case ir_unop_cos:
 610       ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
 611       break;
 612
 613    case ir_unop_dFdx:
 614       ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
 615       break;
 616    case ir_unop_dFdy:
 617       ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
 618       break;
 619
 620    case ir_binop_add:
 621       ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
 622       break;
 623    case ir_binop_sub:
 624       ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
 625       break;
 626
 627    case ir_binop_mul:
 628       if (ir->operands[0]->type->is_matrix()) {
 629          if (ir->operands[1]->type->is_scalar()) {
 630             ir_to_mesa_dst_reg dst_column = result_dst;
 631             ir_to_mesa_src_reg src_column = op[0];
 632             for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
 633                ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 634                                    dst_column, src_column, op[1]);
 635                dst_column.index++;
 636                src_column.index++;
 637             }
 638          } else {
 639             /* matrix * vec or matrix * matrix */
 640             int op1_col;
 641             ir_to_mesa_dst_reg dst_column = result_dst;
 642             ir_to_mesa_src_reg dst_column_src;
 643             ir_to_mesa_src_reg src_chan = op[1];
 644
 645             dst_column_src = ir_to_mesa_src_reg_from_dst(result_dst);
 646             for (op1_col = 0; op1_col < ir->operands[1]->type->matrix_columns;
 647                  op1_col++) {
 648                ir_to_mesa_src_reg src_column = op[0];
 649
 650                for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
 651                   src_chan.swizzle = MAKE_SWIZZLE4(i, i, i, i);
 652                   if (i == 0) {
 653                      ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 654                                          dst_column, src_column, src_chan);
 655                   } else {
 656                      ir_to_mesa_emit_op3(ir, OPCODE_MAD,
 657                                          dst_column, src_column, src_chan,
 658                                          dst_column_src);
 659                   }
 660                   src_column.index++;
 661                }
 662                src_chan.index++;
 663                dst_column.index++;
 664                dst_column_src.index++;
 665             }
 666          }
 667       } else if (ir->operands[1]->type->is_matrix()) {
 668          if (ir->operands[0]->type->is_scalar()) {
 669             ir_to_mesa_dst_reg dst_column = result_dst;
 670             ir_to_mesa_src_reg src_column = op[1];
 671             for (int i = 0; i < ir->operands[1]->type->matrix_columns; i++) {
 672                ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 673                                    dst_column, src_column, op[0]);
 674                dst_column.index++;
 675                src_column.index++;
 676             }
 677          } else {
 678             ir_to_mesa_src_reg src_column = op[1];
 679             ir_to_mesa_dst_reg dst_chan = result_dst;
 680
 681             /* FINISHME here and above: non-square matrices */
 682             assert(ir->operands[1]->type->vector_elements ==
 683                    ir->operands[1]->type->matrix_columns);
 684
 685             for (int i = 0; i < ir->operands[0]->type->vector_elements; i++) {
 686                dst_chan.writemask = (1 << i);
 687                switch (ir->operands[0]->type->vector_elements) {
 688                case 2:
 689                   ir_to_mesa_emit_op2(ir, OPCODE_DP2, dst_chan, op[0], src_column);
 690                   break;
 691                case 3:
 692                   ir_to_mesa_emit_op2(ir, OPCODE_DP3, dst_chan, op[0], src_column);
 693                   break;
 694                case 4:
 695                   ir_to_mesa_emit_op2(ir, OPCODE_DP4, dst_chan, op[0], src_column);
 696                   break;
 697                default:
 698                   assert(0);
 699                }
 700                src_column.index++;
 701             }
 702          }
 703       } else {
 704          assert(!ir->operands[0]->type->is_matrix());
 705          assert(!ir->operands[1]->type->is_matrix());
 706          ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
 707       }
 708       break;
 709    case ir_binop_div:
 710       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 711    case ir_binop_mod:
 712       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 713       break;
 714
 715    case ir_binop_less:
 716       ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
 717       break;
 718    case ir_binop_greater:
 719       ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
 720       break;
 721    case ir_binop_lequal:
 722       ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
 723       break;
 724    case ir_binop_gequal:
 725       ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
 726       break;
 727    case ir_binop_equal:
 728       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
 729       break;
 730    case ir_binop_logic_xor:
 731    case ir_binop_nequal:
 732       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
 733       break;
 734
 735    case ir_binop_logic_or:
 736       /* This could be a saturated add and skip the SNE. */
 737       ir_to_mesa_emit_op2(ir, OPCODE_ADD,
 738                           result_dst,
 739                           op[0], op[1]);
 740
 741       ir_to_mesa_emit_op2(ir, OPCODE_SNE,
 742                           result_dst,
 743                           result_src, src_reg_for_float(0.0));
 744       break;
 745
 746    case ir_binop_logic_and:
 747       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
 748       ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 749                           result_dst,
 750                           op[0], op[1]);
 751       break;
 752
 753    case ir_binop_dot:
 754       if (ir->operands[0]->type == vec4_type) {
 755          assert(ir->operands[1]->type == vec4_type);
 756          ir_to_mesa_emit_op2(ir, OPCODE_DP4,
 757                              result_dst,
 758                              op[0], op[1]);
 759       } else if (ir->operands[0]->type == vec3_type) {
 760          assert(ir->operands[1]->type == vec3_type);
 761          ir_to_mesa_emit_op2(ir, OPCODE_DP3,
 762                              result_dst,
 763                              op[0], op[1]);
 764       } else if (ir->operands[0]->type == vec2_type) {
 765          assert(ir->operands[1]->type == vec2_type);
 766          ir_to_mesa_emit_op2(ir, OPCODE_DP2,
 767                              result_dst,
 768                              op[0], op[1]);
 769       }
 770       break;
 771    case ir_unop_sqrt:
 772       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 773       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
 774       /* For incoming channels < 0, set the result to 0. */
 775       ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
 776                           op[0], src_reg_for_float(0.0), result_src);
 777       break;
 778    case ir_unop_rsq:
 779       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 780       break;
 781    case ir_unop_i2f:
 782    case ir_unop_b2f:
 783    case ir_unop_b2i:
 784       /* Mesa IR lacks types, ints are stored as truncated floats. */
 785       result_src = op[0];
 786       break;
 787    case ir_unop_f2i:
 788       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 789       break;
 790    case ir_unop_f2b:
 791    case ir_unop_i2b:
 792       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
 793                           result_src, src_reg_for_float(0.0));
 794       break;
 795    case ir_unop_trunc:
 796       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 797       break;
 798    case ir_unop_ceil:
 799       op[0].negate = ~op[0].negate;
 800       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 801       result_src.negate = ~result_src.negate;
 802       break;
 803    case ir_unop_floor:
 804       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 805       break;
 806    case ir_unop_fract:
 807       ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
 808       break;
 809
 810    case ir_binop_min:
 811       ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
 812       break;
 813    case ir_binop_max:
 814       ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
 815       break;
 816    case ir_binop_pow:
 817       ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
 818       break;
 819
 820    case ir_unop_bit_not:
 821    case ir_unop_u2f:
 822    case ir_binop_lshift:
 823    case ir_binop_rshift:
 824    case ir_binop_bit_and:
 825    case ir_binop_bit_xor:
 826    case ir_binop_bit_or:
 827       assert(!"GLSL 1.30 features unsupported");
 828       break;
 829    }
 830
 831    this->result = result_src;
 832 }
 833
 834
 835 void
 836 ir_to_mesa_visitor::visit(ir_swizzle *ir)
 837 {
 838    ir_to_mesa_src_reg src_reg;
 839    int i;
 840    int swizzle[4];
 841
 842    /* Note that this is only swizzles in expressions, not those on the left
 843     * hand side of an assignment, which do write masking.  See ir_assignment
 844     * for that.
 845     */
 846
 847    ir->val->accept(this);
 848    src_reg = this->result;
 849    assert(src_reg.file != PROGRAM_UNDEFINED);
 850
 851    for (i = 0; i < 4; i++) {
 852       if (i < ir->type->vector_elements) {
 853          switch (i) {
 854          case 0:
 855             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
 856             break;
 857          case 1:
 858             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
 859             break;
 860          case 2:
 861             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
 862             break;
 863          case 3:
 864             swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
 865             break;
 866          }
 867       } else {
 868          /* If the type is smaller than a vec4, replicate the last
 869           * channel out.
 870           */
 871          swizzle[i] = swizzle[ir->type->vector_elements - 1];
 872       }
 873    }
 874
 875    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
 876                                    swizzle[1],
 877                                    swizzle[2],
 878                                    swizzle[3]);
 879
 880    this->result = src_reg;
 881 }
 882
 883 static int
 884 add_matrix_ref(struct gl_program *prog, int *tokens)
 885 {
 886    int base_pos = -1;
 887    int i;
 888
 889    /* Add a ref for each column.  It looks like the reason we do
 890     * it this way is that _mesa_add_state_reference doesn't work
 891     * for things that aren't vec4s, so the tokens[2]/tokens[3]
 892     * range has to be equal.
 893     */
 894    for (i = 0; i < 4; i++) {
 895       tokens[2] = i;
 896       tokens[3] = i;
 897       int pos = _mesa_add_state_reference(prog->Parameters,
 898                                           (gl_state_index *)tokens);
 899       if (base_pos == -1)
 900          base_pos = pos;
 901       else
 902          assert(base_pos + i == pos);
 903    }
 904
 905    return base_pos;
 906 }
 907
 908 static temp_entry *
 909 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
 910                        ir_rvalue *array_index)
 911 {
 912    /*
 913     * NOTE: The ARB_vertex_program extension specified that matrices get
 914     * loaded in registers in row-major order.  With GLSL, we want column-
 915     * major order.  So, we need to transpose all matrices here...
 916     */
 917    static const struct {
 918       const char *name;
 919       int matrix;
 920       int modifier;
 921    } matrices[] = {
 922       { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
 923       { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
 924       { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
 925       { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
 926
 927       { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
 928       { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
 929       { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
 930       { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
 931
 932       { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
 933       { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
 934       { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
 935       { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
 936
 937       { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
 938       { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
 939       { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
 940       { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
 941
 942       { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
 943
 944    };
 945    unsigned int i;
 946    temp_entry *entry;
 947
 948    /* C++ gets angry when we try to use an int as a gl_state_index, so we use
 949     * ints for gl_state_index.  Make sure they're compatible.
 950     */
 951    assert(sizeof(gl_state_index) == sizeof(int));
 952
 953    for (i = 0; i < Elements(matrices); i++) {
 954       if (strcmp(var->name, matrices[i].name) == 0) {
 955          int tokens[STATE_LENGTH];
 956          int base_pos = -1;
 957
 958          tokens[0] = matrices[i].matrix;
 959          tokens[4] = matrices[i].modifier;
 960          if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
 961             ir_constant *index = array_index->constant_expression_value();
 962             if (index) {
 963                tokens[1] = index->value.i[0];
 964                base_pos = add_matrix_ref(prog, tokens);
 965             } else {
 966                for (i = 0; i < var->type->length; i++) {
 967                   tokens[1] = i;
 968                   int pos = add_matrix_ref(prog, tokens);
 969                   if (base_pos == -1)
 970                      base_pos = pos;
 971                   else
 972                      assert(base_pos + (int)i * 4 == pos);
 973                }
 974             }
 975          } else {
 976             tokens[1] = 0; /* unused array index */
 977             base_pos = add_matrix_ref(prog, tokens);
 978          }
 979          tokens[4] = matrices[i].modifier;
 980
 981          entry = new(mem_ctx) temp_entry(var,
 982                                          PROGRAM_STATE_VAR,
 983                                          base_pos);
 984
 985          return entry;
 986       }
 987    }
 988
 989    return NULL;
 990 }
 991
 992 void
 993 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
 994 {
 995    ir_to_mesa_src_reg src_reg;
 996    temp_entry *entry = find_variable_storage(ir->var);
 997    unsigned int loc;
 998
 999    if (!entry) {
1000       switch (ir->var->mode) {
1001       case ir_var_uniform:
1002          entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1003                                         NULL);
1004          if (entry)
1005             break;
1006
1007          /* FINISHME: Fix up uniform name for arrays and things */
1008          if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) {
1009             /* FINISHME: we whack the location of the var here, which
1010              * is probably not expected.  But we need to communicate
1011              * mesa's sampler number to the tex instruction.
1012              */
1013             int sampler = _mesa_add_sampler(this->prog->Parameters,
1014                                             ir->var->name,
1015                                             ir->var->type->gl_type);
1016             map_sampler(ir->var->location, sampler);
1017
1018             entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_SAMPLER, sampler);
1019             this->variable_storage.push_tail(entry);
1020             break;
1021          }
1022
1023          assert(ir->var->type->gl_type != 0 &&
1024                 ir->var->type->gl_type != GL_INVALID_ENUM);
1025          loc = _mesa_add_uniform(this->prog->Parameters,
1026                                  ir->var->name,
1027                                  type_size(ir->var->type) * 4,
1028                                  ir->var->type->gl_type,
1029                                  NULL);
1030
1031          /* Always mark the uniform used at this point.  If it isn't
1032           * used, dead code elimination should have nuked the decl already.
1033           */
1034          this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1035
1036          entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_UNIFORM, loc);
1037          this->variable_storage.push_tail(entry);
1038          break;
1039       case ir_var_in:
1040       case ir_var_out:
1041       case ir_var_inout:
1042          /* The linker assigns locations for varyings and attributes,
1043           * including deprecated builtins (like gl_Color), user-assign
1044           * generic attributes (glBindVertexLocation), and
1045           * user-defined varyings.
1046           *
1047           * FINISHME: We would hit this path for function arguments.  Fix!
1048           */
1049          assert(ir->var->location != -1);
1050          if (ir->var->mode == ir_var_in ||
1051              ir->var->mode == ir_var_inout) {
1052             entry = new(mem_ctx) temp_entry(ir->var,
1053                                             PROGRAM_INPUT,
1054                                             ir->var->location);
1055
1056             if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1057                 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1058                _mesa_add_attribute(prog->Attributes,
1059                                    ir->var->name,
1060                                    type_size(ir->var->type) * 4,
1061                                    ir->var->type->gl_type,
1062                                    ir->var->location - VERT_ATTRIB_GENERIC0);
1063             }
1064          } else {
1065             entry = new(mem_ctx) temp_entry(ir->var,
1066                                             PROGRAM_OUTPUT,
1067                                             ir->var->location);
1068          }
1069
1070          break;
1071       case ir_var_auto:
1072          entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_TEMPORARY,
1073                                          this->next_temp);
1074          this->variable_storage.push_tail(entry);
1075
1076          next_temp += type_size(ir->var->type);
1077          break;
1078       }
1079
1080       if (!entry) {
1081          printf("Failed to make storage for %s\n", ir->var->name);
1082          exit(1);
1083       }
1084    }
1085
1086    src_reg.file = entry->file;
1087    src_reg.index = entry->index;
1088    /* If the type is smaller than a vec4, replicate the last channel out. */
1089    src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
1090    src_reg.reladdr = false;
1091    src_reg.negate = 0;
1092
1093    this->result = src_reg;
1094 }
1095
1096 void
1097 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1098 {
1099    ir_constant *index;
1100    ir_to_mesa_src_reg src_reg;
1101    ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1102
1103    index = ir->array_index->constant_expression_value();
1104
1105    if (deref_var && strncmp(deref_var->var->name,
1106                             "gl_TextureMatrix",
1107                             strlen("gl_TextureMatrix")) == 0) {
1108       ir_to_mesa_src_reg src_reg;
1109       struct temp_entry *entry;
1110
1111       entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1112                                      ir->array_index);
1113       assert(entry);
1114
1115       src_reg.file = entry->file;
1116       src_reg.index = entry->index;
1117       src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1118       src_reg.negate = 0;
1119
1120       if (index) {
1121          src_reg.reladdr = GL_FALSE;
1122       } else {
1123          ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1124
1125          ir->array_index->accept(this);
1126          ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1127                              ir_to_mesa_dst_reg_from_src(index_reg),
1128                              this->result, src_reg_for_float(4.0));
1129
1130          src_reg.reladdr = true;
1131          ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
1132                              index_reg);
1133       }
1134
1135       this->result = src_reg;
1136       return;
1137    }
1138
1139    /* By the time we make it to this stage, matrices should be broken down
1140     * to vectors.
1141     */
1142    assert(!ir->type->is_matrix());
1143
1144    ir->array->accept(this);
1145    src_reg = this->result;
1146
1147    if (src_reg.file == PROGRAM_INPUT ||
1148        src_reg.file == PROGRAM_OUTPUT) {
1149       assert(index); /* FINISHME: Handle variable indexing of builtins. */
1150
1151       src_reg.index += index->value.i[0];
1152    } else {
1153       if (index) {
1154          src_reg.index += index->value.i[0];
1155       } else {
1156          ir_to_mesa_src_reg array_base = this->result;
1157          /* Variable index array dereference.  It eats the "vec4" of the
1158           * base of the array and an index that offsets the Mesa register
1159           * index.
1160           */
1161          ir->array_index->accept(this);
1162
1163          /* FINISHME: This doesn't work when we're trying to do the LHS
1164           * of an assignment.
1165           */
1166          src_reg.reladdr = true;
1167          ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
1168                              this->result);
1169
1170          this->result = get_temp(ir->type);
1171          ir_to_mesa_emit_op1(ir, OPCODE_MOV,
1172                              ir_to_mesa_dst_reg_from_src(this->result),
1173                              src_reg);
1174       }
1175    }
1176
1177    /* If the type is smaller than a vec4, replicate the last channel out. */
1178    src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1179
1180    this->result = src_reg;
1181 }
1182
1183 void
1184 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1185 {
1186    unsigned int i;
1187    const glsl_type *struct_type = ir->record->type;
1188    int offset = 0;
1189
1190    ir->record->accept(this);
1191
1192    for (i = 0; i < struct_type->length; i++) {
1193       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1194          break;
1195       offset += type_size(struct_type->fields.structure[i].type);
1196    }
1197    this->result.index += offset;
1198 }
1199
1200 /**
1201  * We want to be careful in assignment setup to hit the actual storage
1202  * instead of potentially using a temporary like we might with the
1203  * ir_dereference handler.
1204  *
1205  * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
1206  * should only see potentially one variable array index of a vector,
1207  * and one swizzle, before getting to actual vec4 storage.  So handle
1208  * those, then go use ir_dereference to handle the rest.
1209  */
1210 static struct ir_to_mesa_dst_reg
1211 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v)
1212 {
1213    struct ir_to_mesa_dst_reg dst_reg;
1214    ir_dereference *deref;
1215    ir_swizzle *swiz;
1216
1217    /* Use the rvalue deref handler for the most part.  We'll ignore
1218     * swizzles in it and write swizzles using writemask, though.
1219     */
1220    ir->accept(v);
1221    dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1222
1223    if ((deref = ir->as_dereference())) {
1224       ir_dereference_array *deref_array = ir->as_dereference_array();
1225       assert(!deref_array || deref_array->array->type->is_array());
1226
1227       ir->accept(v);
1228    } else if ((swiz = ir->as_swizzle())) {
1229       dst_reg.writemask = 0;
1230       if (swiz->mask.num_components >= 1)
1231          dst_reg.writemask |= (1 << swiz->mask.x);
1232       if (swiz->mask.num_components >= 2)
1233          dst_reg.writemask |= (1 << swiz->mask.y);
1234       if (swiz->mask.num_components >= 3)
1235          dst_reg.writemask |= (1 << swiz->mask.z);
1236       if (swiz->mask.num_components >= 4)
1237          dst_reg.writemask |= (1 << swiz->mask.w);
1238    }
1239
1240    return dst_reg;
1241 }
1242
1243 static GLuint
1244 reswizzle_for_writemask(GLuint writemask, GLuint swizzle)
1245 {
1246    int new_swizzle[4], pos = 0;
1247    int i;
1248
1249    /* reswizzle the rhs so the components are in place for the
1250     * components we'll assign to the lhs.
1251     */
1252    for (i = 0; i < 4; i++) {
1253       if (writemask & (1 << i)) {
1254          new_swizzle[i] = GET_SWZ(swizzle, pos++);
1255       } else {
1256          new_swizzle[i] = GET_SWZ(swizzle, 0);
1257       }
1258    }
1259
1260    return MAKE_SWIZZLE4(new_swizzle[0],
1261                         new_swizzle[1],
1262                         new_swizzle[2],
1263                         new_swizzle[3]);
1264 }
1265
1266 void
1267 ir_to_mesa_visitor::visit(ir_assignment *ir)
1268 {
1269    struct ir_to_mesa_dst_reg l;
1270    struct ir_to_mesa_src_reg r;
1271    int i;
1272
1273    assert(!ir->lhs->type->is_array());
1274    assert(ir->lhs->type->base_type != GLSL_TYPE_STRUCT);
1275
1276    l = get_assignment_lhs(ir->lhs, this);
1277
1278    ir->rhs->accept(this);
1279    r = this->result;
1280
1281    r.swizzle = reswizzle_for_writemask(l.writemask, r.swizzle);
1282
1283    assert(l.file != PROGRAM_UNDEFINED);
1284    assert(r.file != PROGRAM_UNDEFINED);
1285
1286    if (ir->condition) {
1287       ir_to_mesa_src_reg condition;
1288
1289       ir->condition->accept(this);
1290       condition = this->result;
1291
1292       /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1293        * and the condition we produced is 0.0 or 1.0.  By flipping the
1294        * sign, we can choose which value OPCODE_CMP produces without
1295        * an extra computing the condition.
1296        */
1297       condition.negate = ~condition.negate;
1298       for (i = 0; i < type_size(ir->lhs->type); i++) {
1299          ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1300                              condition, r, ir_to_mesa_src_reg_from_dst(l));
1301          l.index++;
1302          r.index++;
1303       }
1304    } else {
1305       for (i = 0; i < type_size(ir->lhs->type); i++) {
1306          ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1307          l.index++;
1308          r.index++;
1309       }
1310    }
1311 }
1312
1313
1314 void
1315 ir_to_mesa_visitor::visit(ir_constant *ir)
1316 {
1317    ir_to_mesa_src_reg src_reg;
1318    GLfloat stack_vals[4];
1319    GLfloat *values = stack_vals;
1320    unsigned int i;
1321
1322    if (ir->type->is_matrix() || ir->type->is_array()) {
1323       assert(!"FINISHME: array/matrix constants");
1324    }
1325
1326    src_reg.file = PROGRAM_CONSTANT;
1327    switch (ir->type->base_type) {
1328    case GLSL_TYPE_FLOAT:
1329       values = &ir->value.f[0];
1330       break;
1331    case GLSL_TYPE_UINT:
1332       for (i = 0; i < ir->type->vector_elements; i++) {
1333          values[i] = ir->value.u[i];
1334       }
1335       break;
1336    case GLSL_TYPE_INT:
1337       for (i = 0; i < ir->type->vector_elements; i++) {
1338          values[i] = ir->value.i[i];
1339       }
1340       break;
1341    case GLSL_TYPE_BOOL:
1342       for (i = 0; i < ir->type->vector_elements; i++) {
1343          values[i] = ir->value.b[i];
1344       }
1345       break;
1346    default:
1347       assert(!"Non-float/uint/int/bool constant");
1348    }
1349
1350    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1351                                               values, ir->type->vector_elements,
1352                                               &src_reg.swizzle);
1353    src_reg.reladdr = false;
1354    src_reg.negate = 0;
1355
1356    this->result = src_reg;
1357 }
1358
1359
1360 void
1361 ir_to_mesa_visitor::visit(ir_call *ir)
1362 {
1363    printf("Can't support call to %s\n", ir->callee_name());
1364    exit(1);
1365 }
1366
1367
1368 void
1369 ir_to_mesa_visitor::visit(ir_texture *ir)
1370 {
1371    ir_to_mesa_src_reg result_src, coord, lod_info, projector;
1372    ir_to_mesa_dst_reg result_dst, coord_dst;
1373    ir_to_mesa_instruction *inst = NULL;
1374    prog_opcode opcode = OPCODE_NOP;
1375
1376    ir->coordinate->accept(this);
1377
1378    /* Put our coords in a temp.  We'll need to modify them for shadow,
1379     * projection, or LOD, so the only case we'd use it as is is if
1380     * we're doing plain old texturing.  Mesa IR optimization should
1381     * handle cleaning up our mess in that case.
1382     */
1383    coord = get_temp(glsl_type::vec4_type);
1384    coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1385    ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1386                        this->result);
1387
1388    if (ir->projector) {
1389       ir->projector->accept(this);
1390       projector = this->result;
1391    }
1392
1393    /* Storage for our result.  Ideally for an assignment we'd be using
1394     * the actual storage for the result here, instead.
1395     */
1396    result_src = get_temp(glsl_type::vec4_type);
1397    result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1398
1399    switch (ir->op) {
1400    case ir_tex:
1401       opcode = OPCODE_TEX;
1402       break;
1403    case ir_txb:
1404       opcode = OPCODE_TXB;
1405       ir->lod_info.bias->accept(this);
1406       lod_info = this->result;
1407       break;
1408    case ir_txl:
1409       opcode = OPCODE_TXL;
1410       ir->lod_info.lod->accept(this);
1411       lod_info = this->result;
1412       break;
1413    case ir_txd:
1414    case ir_txf:
1415       assert(!"GLSL 1.30 features unsupported");
1416       break;
1417    }
1418
1419    if (ir->projector) {
1420       if (opcode == OPCODE_TEX) {
1421          /* Slot the projector in as the last component of the coord. */
1422          coord_dst.writemask = WRITEMASK_W;
1423          ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
1424          coord_dst.writemask = WRITEMASK_XYZW;
1425          opcode = OPCODE_TXP;
1426       } else {
1427          ir_to_mesa_src_reg coord_w = coord;
1428          coord_w.swizzle = SWIZZLE_WWWW;
1429
1430          /* For the other TEX opcodes there's no projective version
1431           * since the last slot is taken up by lod info.  Do the
1432           * projective divide now.
1433           */
1434          coord_dst.writemask = WRITEMASK_W;
1435          ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
1436
1437          coord_dst.writemask = WRITEMASK_XYZ;
1438          ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
1439
1440          coord_dst.writemask = WRITEMASK_XYZW;
1441          coord.swizzle = SWIZZLE_XYZW;
1442       }
1443    }
1444
1445    if (ir->shadow_comparitor) {
1446       /* Slot the shadow value in as the second to last component of the
1447        * coord.
1448        */
1449       ir->shadow_comparitor->accept(this);
1450       coord_dst.writemask = WRITEMASK_Z;
1451       ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
1452       coord_dst.writemask = WRITEMASK_XYZW;
1453    }
1454
1455    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
1456       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
1457       coord_dst.writemask = WRITEMASK_W;
1458       ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
1459       coord_dst.writemask = WRITEMASK_XYZW;
1460    }
1461
1462    inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
1463
1464    if (ir->shadow_comparitor)
1465       inst->tex_shadow = GL_TRUE;
1466
1467    ir_dereference_variable *sampler = ir->sampler->as_dereference_variable();
1468    assert(sampler); /* FINISHME: sampler arrays */
1469    /* generate the mapping, remove when we generate storage at
1470     * declaration time
1471     */
1472    sampler->accept(this);
1473
1474    inst->sampler = get_sampler_number(sampler->var->location);
1475
1476    switch (sampler->type->sampler_dimensionality) {
1477    case GLSL_SAMPLER_DIM_1D:
1478       inst->tex_target = TEXTURE_1D_INDEX;
1479       break;
1480    case GLSL_SAMPLER_DIM_2D:
1481       inst->tex_target = TEXTURE_2D_INDEX;
1482       break;
1483    case GLSL_SAMPLER_DIM_3D:
1484       inst->tex_target = TEXTURE_3D_INDEX;
1485       break;
1486    case GLSL_SAMPLER_DIM_CUBE:
1487       inst->tex_target = TEXTURE_CUBE_INDEX;
1488       break;
1489    default:
1490       assert(!"FINISHME: other texture targets");
1491    }
1492
1493    this->result = result_src;
1494 }
1495
1496 void
1497 ir_to_mesa_visitor::visit(ir_return *ir)
1498 {
1499    assert(0);
1500
1501    ir->get_value()->accept(this);
1502 }
1503
1504 void
1505 ir_to_mesa_visitor::visit(ir_discard *ir)
1506 {
1507    assert(ir->condition == NULL); /* FINISHME */
1508
1509    ir_to_mesa_emit_op1(ir, OPCODE_KIL_NV,
1510                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
1511 }
1512
1513 void
1514 ir_to_mesa_visitor::visit(ir_if *ir)
1515 {
1516    ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
1517    ir_to_mesa_instruction *prev_inst;
1518
1519    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
1520
1521    ir->condition->accept(this);
1522    assert(this->result.file != PROGRAM_UNDEFINED);
1523
1524    if (ctx->Shader.EmitCondCodes) {
1525       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
1526
1527       /* See if we actually generated any instruction for generating
1528        * the condition.  If not, then cook up a move to a temp so we
1529        * have something to set cond_update on.
1530        */
1531       if (cond_inst == prev_inst) {
1532          ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
1533          cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
1534                                          ir_to_mesa_dst_reg_from_src(temp),
1535                                          result);
1536       }
1537       cond_inst->cond_update = GL_TRUE;
1538
1539       if_inst = ir_to_mesa_emit_op1(ir->condition,
1540                                     OPCODE_IF, ir_to_mesa_undef_dst,
1541                                     ir_to_mesa_undef);
1542       if_inst->dst_reg.cond_mask = COND_NE;
1543    } else {
1544       if_inst = ir_to_mesa_emit_op1(ir->condition,
1545                                     OPCODE_IF, ir_to_mesa_undef_dst,
1546                                     this->result);
1547    }
1548
1549    this->instructions.push_tail(if_inst);
1550
1551    visit_exec_list(&ir->then_instructions, this);
1552
1553    if (!ir->else_instructions.is_empty()) {
1554       else_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ELSE,
1555                                       ir_to_mesa_undef_dst,
1556                                       ir_to_mesa_undef);
1557       visit_exec_list(&ir->else_instructions, this);
1558    }
1559
1560    if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
1561                                  ir_to_mesa_undef_dst, ir_to_mesa_undef);
1562 }
1563
1564 ir_to_mesa_visitor::ir_to_mesa_visitor()
1565 {
1566    result.file = PROGRAM_UNDEFINED;
1567    next_temp = 1;
1568    sampler_map = NULL;
1569    sampler_map_size = 0;
1570 }
1571
1572 static struct prog_src_register
1573 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
1574 {
1575    struct prog_src_register mesa_reg;
1576
1577    mesa_reg.File = reg.file;
1578    assert(reg.index < (1 << INST_INDEX_BITS) - 1);
1579    mesa_reg.Index = reg.index;
1580    mesa_reg.Swizzle = reg.swizzle;
1581    mesa_reg.RelAddr = reg.reladdr;
1582    mesa_reg.Negate = reg.negate;
1583    mesa_reg.Abs = 0;
1584
1585    return mesa_reg;
1586 }
1587
1588 static void
1589 set_branchtargets(struct prog_instruction *mesa_instructions,
1590                   int num_instructions)
1591 {
1592    int if_count = 0, loop_count = 0;
1593    int *if_stack, *loop_stack;
1594    int if_stack_pos = 0, loop_stack_pos = 0;
1595    int i, j;
1596
1597    for (i = 0; i < num_instructions; i++) {
1598       switch (mesa_instructions[i].Opcode) {
1599       case OPCODE_IF:
1600          if_count++;
1601          break;
1602       case OPCODE_BGNLOOP:
1603          loop_count++;
1604          break;
1605       case OPCODE_BRK:
1606       case OPCODE_CONT:
1607          mesa_instructions[i].BranchTarget = -1;
1608          break;
1609       default:
1610          break;
1611       }
1612    }
1613
1614    if_stack = (int *)calloc(if_count, sizeof(*if_stack));
1615    loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
1616
1617    for (i = 0; i < num_instructions; i++) {
1618       switch (mesa_instructions[i].Opcode) {
1619       case OPCODE_IF:
1620          if_stack[if_stack_pos] = i;
1621          if_stack_pos++;
1622          break;
1623       case OPCODE_ELSE:
1624          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1625          if_stack[if_stack_pos - 1] = i;
1626          break;
1627       case OPCODE_ENDIF:
1628          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1629          if_stack_pos--;
1630          break;
1631       case OPCODE_BGNLOOP:
1632          loop_stack[loop_stack_pos] = i;
1633          loop_stack_pos++;
1634          break;
1635       case OPCODE_ENDLOOP:
1636          loop_stack_pos--;
1637          /* Rewrite any breaks/conts at this nesting level (haven't
1638           * already had a BranchTarget assigned) to point to the end
1639           * of the loop.
1640           */
1641          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
1642             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
1643                 mesa_instructions[j].Opcode == OPCODE_CONT) {
1644                if (mesa_instructions[j].BranchTarget == -1) {
1645                   mesa_instructions[j].BranchTarget = i;
1646                }
1647             }
1648          }
1649          /* The loop ends point at each other. */
1650          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
1651          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
1652       default:
1653          break;
1654       }
1655    }
1656
1657    free(if_stack);
1658 }
1659
1660 static void
1661 print_program(struct prog_instruction *mesa_instructions,
1662               ir_instruction **mesa_instruction_annotation,
1663               int num_instructions)
1664 {
1665    ir_instruction *last_ir = NULL;
1666    int i;
1667
1668    for (i = 0; i < num_instructions; i++) {
1669       struct prog_instruction *mesa_inst = mesa_instructions + i;
1670       ir_instruction *ir = mesa_instruction_annotation[i];
1671
1672       if (last_ir != ir && ir) {
1673          ir_print_visitor print;
1674          ir->accept(&print);
1675          printf("\n");
1676          last_ir = ir;
1677       }
1678
1679       _mesa_print_instruction(mesa_inst);
1680    }
1681 }
1682
1683 static void
1684 count_resources(struct gl_program *prog)
1685 {
1686    unsigned int i;
1687
1688    prog->InputsRead = 0;
1689    prog->OutputsWritten = 0;
1690    prog->SamplersUsed = 0;
1691
1692    for (i = 0; i < prog->NumInstructions; i++) {
1693       struct prog_instruction *inst = &prog->Instructions[i];
1694       unsigned int reg;
1695
1696       switch (inst->DstReg.File) {
1697       case PROGRAM_OUTPUT:
1698          prog->OutputsWritten |= BITFIELD64_BIT(inst->DstReg.Index);
1699          break;
1700       case PROGRAM_INPUT:
1701          prog->InputsRead |= BITFIELD64_BIT(inst->DstReg.Index);
1702          break;
1703       default:
1704          break;
1705       }
1706
1707       for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
1708          switch (inst->SrcReg[reg].File) {
1709          case PROGRAM_OUTPUT:
1710             prog->OutputsWritten |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1711             break;
1712          case PROGRAM_INPUT:
1713             prog->InputsRead |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1714             break;
1715          default:
1716             break;
1717          }
1718       }
1719
1720       /* Instead of just using the uniform's value to map to a
1721        * sampler, Mesa first allocates a separate number for the
1722        * sampler (_mesa_add_sampler), then we reindex it down to a
1723        * small integer (sampler_map[], SamplersUsed), then that gets
1724        * mapped to the uniform's value, and we get an actual sampler.
1725        */
1726       if (_mesa_is_tex_instruction(inst->Opcode)) {
1727          prog->SamplerTargets[inst->TexSrcUnit] =
1728             (gl_texture_index)inst->TexSrcTarget;
1729          prog->SamplersUsed |= 1 << inst->TexSrcUnit;
1730          if (inst->TexShadow) {
1731             prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
1732          }
1733       }
1734    }
1735
1736    _mesa_update_shader_textures_used(prog);
1737 }
1738
1739 /* Each stage has some uniforms in its Parameters list.  The Uniforms
1740  * list for the linked shader program has a pointer to these uniforms
1741  * in each of the stage's Parameters list, so that their values can be
1742  * updated when a uniform is set.
1743  */
1744 static void
1745 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
1746                                      struct gl_program *prog)
1747 {
1748    unsigned int i;
1749
1750    for (i = 0; i < prog->Parameters->NumParameters; i++) {
1751       const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
1752
1753       if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
1754          struct gl_uniform *uniform =
1755             _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
1756          if (uniform)
1757             uniform->Initialized = p->Initialized;
1758       }
1759    }
1760 }
1761
1762 struct gl_program *
1763 get_mesa_program(GLcontext *ctx, void *mem_ctx, struct gl_shader *shader)
1764 {
1765    ir_to_mesa_visitor v;
1766    struct prog_instruction *mesa_instructions, *mesa_inst;
1767    ir_instruction **mesa_instruction_annotation;
1768    int i;
1769    struct gl_program *prog;
1770    GLenum target;
1771
1772    switch (shader->Type) {
1773    case GL_VERTEX_SHADER:   target = GL_VERTEX_PROGRAM_ARB; break;
1774    case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; break;
1775    default: assert(!"should not be reached"); break;
1776    }
1777
1778    prog = ctx->Driver.NewProgram(ctx, target, 1);
1779    if (!prog)
1780       return NULL;
1781    prog->Parameters = _mesa_new_parameter_list();
1782    prog->Varying = _mesa_new_parameter_list();
1783    prog->Attributes = _mesa_new_parameter_list();
1784    v.ctx = ctx;
1785    v.prog = prog;
1786
1787    v.mem_ctx = talloc_new(NULL);
1788    visit_exec_list(shader->ir, &v);
1789    v.ir_to_mesa_emit_op1(NULL, OPCODE_END,
1790                          ir_to_mesa_undef_dst, ir_to_mesa_undef);
1791
1792    prog->NumTemporaries = v.next_temp;
1793
1794    int num_instructions = 0;
1795    foreach_iter(exec_list_iterator, iter, v.instructions) {
1796       num_instructions++;
1797    }
1798
1799    mesa_instructions =
1800       (struct prog_instruction *)calloc(num_instructions,
1801                                         sizeof(*mesa_instructions));
1802    mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
1803                                               num_instructions);
1804
1805    mesa_inst = mesa_instructions;
1806    i = 0;
1807    foreach_iter(exec_list_iterator, iter, v.instructions) {
1808       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
1809
1810       mesa_inst->Opcode = inst->op;
1811       mesa_inst->CondUpdate = inst->cond_update;
1812       mesa_inst->DstReg.File = inst->dst_reg.file;
1813       mesa_inst->DstReg.Index = inst->dst_reg.index;
1814       mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
1815       mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
1816       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
1817       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
1818       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
1819       mesa_inst->TexSrcUnit = inst->sampler;
1820       mesa_inst->TexSrcTarget = inst->tex_target;
1821       mesa_inst->TexShadow = inst->tex_shadow;
1822       mesa_instruction_annotation[i] = inst->ir;
1823
1824       mesa_inst++;
1825       i++;
1826    }
1827
1828    set_branchtargets(mesa_instructions, num_instructions);
1829    if (0) {
1830       print_program(mesa_instructions, mesa_instruction_annotation,
1831                     num_instructions);
1832    }
1833
1834    prog->Instructions = mesa_instructions;
1835    prog->NumInstructions = num_instructions;
1836
1837    _mesa_reference_program(ctx, &shader->Program, prog);
1838
1839    if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
1840       _mesa_optimize_program(ctx, prog);
1841    }
1842
1843    return prog;
1844 }
1845
1846 extern "C" {
1847
1848 static void
1849 steal_memory(ir_instruction *ir, void *new_ctx)
1850 {
1851    talloc_steal(new_ctx, ir);
1852 }
1853
1854 void
1855 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
1856 {
1857    struct _mesa_glsl_parse_state *state;
1858
1859    state = talloc_zero(shader, struct _mesa_glsl_parse_state);
1860    switch (shader->Type) {
1861    case GL_VERTEX_SHADER:   state->target = vertex_shader; break;
1862    case GL_FRAGMENT_SHADER: state->target = fragment_shader; break;
1863    case GL_GEOMETRY_SHADER: state->target = geometry_shader; break;
1864    }
1865
1866    state->scanner = NULL;
1867    state->translation_unit.make_empty();
1868    state->symbols = new(shader) glsl_symbol_table;
1869    state->info_log = talloc_strdup(shader, "");
1870    state->error = false;
1871    state->temp_index = 0;
1872    state->loop_or_switch_nesting = NULL;
1873    state->ARB_texture_rectangle_enable = true;
1874
1875    state->extensions = &ctx->Extensions;
1876    state->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
1877    state->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits;
1878
1879    const char *source = shader->Source;
1880    state->error = preprocess(state, &source, &state->info_log,
1881                              &ctx->Extensions);
1882
1883    if (!state->error) {
1884      _mesa_glsl_lexer_ctor(state, source);
1885      _mesa_glsl_parse(state);
1886      _mesa_glsl_lexer_dtor(state);
1887    }
1888
1889    shader->ir = new(shader) exec_list;
1890    if (!state->error && !state->translation_unit.is_empty())
1891       _mesa_ast_to_hir(shader->ir, state);
1892
1893    /* Lowering */
1894    do_mod_to_fract(shader->ir);
1895    do_div_to_mul_rcp(shader->ir);
1896
1897    /* Optimization passes */
1898    if (!state->error && !shader->ir->is_empty()) {
1899       bool progress;
1900       do {
1901          progress = false;
1902
1903          progress = do_function_inlining(shader->ir) || progress;
1904          progress = do_if_simplification(shader->ir) || progress;
1905          progress = do_copy_propagation(shader->ir) || progress;
1906          progress = do_dead_code_local(shader->ir) || progress;
1907          progress = do_dead_code_unlinked(state, shader->ir) || progress;
1908          progress = do_constant_variable_unlinked(shader->ir) || progress;
1909          progress = do_constant_folding(shader->ir) || progress;
1910          progress = do_if_return(shader->ir) || progress;
1911
1912          progress = do_vec_index_to_swizzle(shader->ir) || progress;
1913          /* Do this one after the previous to let the easier pass handle
1914           * constant vector indexing.
1915           */
1916          progress = do_vec_index_to_cond_assign(shader->ir) || progress;
1917
1918          progress = do_swizzle_swizzle(shader->ir) || progress;
1919       } while (progress);
1920    }
1921
1922    shader->symbols = state->symbols;
1923
1924    shader->CompileStatus = !state->error;
1925    shader->InfoLog = state->info_log;
1926
1927    /* Retain any live IR, but trash the rest. */
1928    foreach_list(node, shader->ir) {
1929       visit_tree((ir_instruction *) node, steal_memory, shader);
1930    }
1931
1932    talloc_free(state);
1933  }
1934
1935 void
1936 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
1937 {
1938    unsigned int i;
1939
1940    _mesa_clear_shader_program_data(ctx, prog);
1941
1942    prog->LinkStatus = GL_TRUE;
1943
1944    for (i = 0; i < prog->NumShaders; i++) {
1945       if (!prog->Shaders[i]->CompileStatus) {
1946          prog->InfoLog =
1947             talloc_asprintf_append(prog->InfoLog,
1948                                    "linking with uncompiled shader");
1949          prog->LinkStatus = GL_FALSE;
1950       }
1951    }
1952
1953    prog->Varying = _mesa_new_parameter_list();
1954    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
1955    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
1956
1957    if (prog->LinkStatus) {
1958       link_shaders(prog);
1959
1960       /* We don't use the linker's uniforms list, and cook up our own at
1961        * generate time.
1962        */
1963       free(prog->Uniforms);
1964       prog->Uniforms = _mesa_new_uniform_list();
1965    }
1966
1967    prog->LinkStatus = prog->LinkStatus;
1968
1969    /* FINISHME: This should use the linker-generated code */
1970    if (prog->LinkStatus) {
1971       for (i = 0; i < prog->NumShaders; i++) {
1972          struct gl_program *linked_prog;
1973
1974          linked_prog = get_mesa_program(ctx, prog,
1975                                         prog->Shaders[i]);
1976          count_resources(linked_prog);
1977
1978          link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
1979
1980          switch (prog->Shaders[i]->Type) {
1981          case GL_VERTEX_SHADER:
1982             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
1983                                      (struct gl_vertex_program *)linked_prog);
1984             ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
1985                                             linked_prog);
1986             break;
1987          case GL_FRAGMENT_SHADER:
1988             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
1989                                      (struct gl_fragment_program *)linked_prog);
1990             ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
1991                                             linked_prog);
1992             break;
1993          }
1994       }
1995    }
1996 }
1997
1998 } /* extern "C" */