src/mesa/shader/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translates the IR to ARB_fragment_program text if possible,
  30  * printing the result
  31  */
  32
  33 #include <stdio.h>
  34 #include "ir.h"
  35 #include "ir_visitor.h"
  36 #include "ir_print_visitor.h"
  37 #include "ir_expression_flattening.h"
  38 #include "glsl_types.h"
  39 #include "glsl_parser_extras.h"
  40 #include "../glsl/program.h"
  41 #include "ir_optimization.h"
  42 #include "ast.h"
  43
  44 extern "C" {
  45 #include "main/mtypes.h"
  46 #include "shader/prog_instruction.h"
  47 #include "shader/prog_print.h"
  48 #include "shader/program.h"
  49 #include "shader/prog_uniform.h"
  50 #include "shader/prog_parameter.h"
  51 #include "shader/shader_api.h"
  52 }
  53
  54 /**
  55  * This struct is a corresponding struct to Mesa prog_src_register, with
  56  * wider fields.
  57  */
  58 typedef struct ir_to_mesa_src_reg {
  59    int file; /**< PROGRAM_* from Mesa */
  60    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  61    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  62    int negate; /**< NEGATE_XYZW mask from mesa */
  63    bool reladdr; /**< Register index should be offset by address reg. */
  64 } ir_to_mesa_src_reg;
  65
  66 typedef struct ir_to_mesa_dst_reg {
  67    int file; /**< PROGRAM_* from Mesa */
  68    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  69    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  70 } ir_to_mesa_dst_reg;
  71
  72 extern ir_to_mesa_src_reg ir_to_mesa_undef;
  73
  74 class ir_to_mesa_instruction : public exec_node {
  75 public:
  76    enum prog_opcode op;
  77    ir_to_mesa_dst_reg dst_reg;
  78    ir_to_mesa_src_reg src_reg[3];
  79    /** Pointer to the ir source this tree came from for debugging */
  80    ir_instruction *ir;
  81 };
  82
  83 class temp_entry : public exec_node {
  84 public:
  85    temp_entry(ir_variable *var, int file, int index)
  86       : file(file), index(index), var(var)
  87    {
  88       /* empty */
  89    }
  90
  91    int file;
  92    int index;
  93    ir_variable *var; /* variable that maps to this, if any */
  94 };
  95
  96 class ir_to_mesa_visitor : public ir_visitor {
  97 public:
  98    ir_to_mesa_visitor();
  99
 100    GLcontext *ctx;
 101    struct gl_program *prog;
 102
 103    int next_temp;
 104
 105    temp_entry *find_variable_storage(ir_variable *var);
 106
 107    ir_to_mesa_src_reg get_temp(const glsl_type *type);
 108
 109    struct ir_to_mesa_src_reg src_reg_for_float(float val);
 110
 111    /**
 112     * \name Visit methods
 113     *
 114     * As typical for the visitor pattern, there must be one \c visit method for
 115     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 116     * the hierarchy should not have \c visit methods.
 117     */
 118    /*@{*/
 119    virtual void visit(ir_variable *);
 120    virtual void visit(ir_loop *);
 121    virtual void visit(ir_loop_jump *);
 122    virtual void visit(ir_function_signature *);
 123    virtual void visit(ir_function *);
 124    virtual void visit(ir_expression *);
 125    virtual void visit(ir_swizzle *);
 126    virtual void visit(ir_dereference_variable  *);
 127    virtual void visit(ir_dereference_array *);
 128    virtual void visit(ir_dereference_record *);
 129    virtual void visit(ir_assignment *);
 130    virtual void visit(ir_constant *);
 131    virtual void visit(ir_call *);
 132    virtual void visit(ir_return *);
 133    virtual void visit(ir_texture *);
 134    virtual void visit(ir_if *);
 135    /*@}*/
 136
 137    struct ir_to_mesa_src_reg result;
 138
 139    /** List of temp_entry */
 140    exec_list variable_storage;
 141
 142    /** List of ir_to_mesa_instruction */
 143    exec_list instructions;
 144
 145    ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
 146                                                enum prog_opcode op,
 147                                                ir_to_mesa_dst_reg dst,
 148                                                ir_to_mesa_src_reg src0);
 149
 150    ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
 151                                                enum prog_opcode op,
 152                                                ir_to_mesa_dst_reg dst,
 153                                                ir_to_mesa_src_reg src0,
 154                                                ir_to_mesa_src_reg src1);
 155
 156    ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
 157                                                enum prog_opcode op,
 158                                                ir_to_mesa_dst_reg dst,
 159                                                ir_to_mesa_src_reg src0,
 160                                                ir_to_mesa_src_reg src1,
 161                                                ir_to_mesa_src_reg src2);
 162
 163    void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 164                                    enum prog_opcode op,
 165                                    ir_to_mesa_dst_reg dst,
 166                                    ir_to_mesa_src_reg src0);
 167
 168    void *mem_ctx;
 169 };
 170
 171 ir_to_mesa_src_reg ir_to_mesa_undef = {
 172    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, false,
 173 };
 174
 175 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
 176    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP
 177 };
 178
 179 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
 180    PROGRAM_ADDRESS, 0, WRITEMASK_X
 181 };
 182
 183 static int swizzle_for_size(int size)
 184 {
 185    int size_swizzles[4] = {
 186       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 187       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 188       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 189       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 190    };
 191
 192    return size_swizzles[size - 1];
 193 }
 194
 195 /* This list should match up with builtin_variables.h */
 196 static const struct {
 197    const char *name;
 198    int file;
 199    int index;
 200 } builtin_var_to_mesa_reg[] = {
 201    /* core_vs */
 202    {"gl_Position", PROGRAM_OUTPUT, VERT_RESULT_HPOS},
 203    {"gl_PointSize", PROGRAM_OUTPUT, VERT_RESULT_PSIZ},
 204
 205    /* core_fs */
 206    {"gl_FragCoord", PROGRAM_INPUT, FRAG_ATTRIB_WPOS},
 207    {"gl_FrontFacing", PROGRAM_INPUT, FRAG_ATTRIB_FACE},
 208    {"gl_FragColor", PROGRAM_OUTPUT, FRAG_ATTRIB_COL0},
 209    {"gl_FragDepth", PROGRAM_UNDEFINED, FRAG_ATTRIB_WPOS}, /* FINISHME: WPOS.z */
 210
 211    /* 110_deprecated_fs */
 212    {"gl_Color", PROGRAM_INPUT, FRAG_ATTRIB_COL0},
 213    {"gl_SecondaryColor", PROGRAM_INPUT, FRAG_ATTRIB_COL1},
 214    {"gl_FogFragCoord", PROGRAM_INPUT, FRAG_ATTRIB_FOGC},
 215    {"gl_TexCoord", PROGRAM_INPUT, FRAG_ATTRIB_TEX0}, /* array */
 216
 217    /* 110_deprecated_vs */
 218    {"gl_Vertex", PROGRAM_INPUT, VERT_ATTRIB_POS},
 219    {"gl_Normal", PROGRAM_INPUT, VERT_ATTRIB_NORMAL},
 220    {"gl_Color", PROGRAM_INPUT, VERT_ATTRIB_COLOR0},
 221    {"gl_SecondaryColor", PROGRAM_INPUT, VERT_ATTRIB_COLOR1},
 222    {"gl_MultiTexCoord0", PROGRAM_INPUT, VERT_ATTRIB_TEX0},
 223    {"gl_MultiTexCoord1", PROGRAM_INPUT, VERT_ATTRIB_TEX1},
 224    {"gl_MultiTexCoord2", PROGRAM_INPUT, VERT_ATTRIB_TEX2},
 225    {"gl_MultiTexCoord3", PROGRAM_INPUT, VERT_ATTRIB_TEX3},
 226    {"gl_MultiTexCoord4", PROGRAM_INPUT, VERT_ATTRIB_TEX4},
 227    {"gl_MultiTexCoord5", PROGRAM_INPUT, VERT_ATTRIB_TEX5},
 228    {"gl_MultiTexCoord6", PROGRAM_INPUT, VERT_ATTRIB_TEX6},
 229    {"gl_MultiTexCoord7", PROGRAM_INPUT, VERT_ATTRIB_TEX7},
 230    {"gl_TexCoord", PROGRAM_OUTPUT, VERT_RESULT_TEX0}, /* array */
 231    {"gl_FogCoord", PROGRAM_INPUT, VERT_RESULT_FOGC},
 232    /*{"gl_ClipVertex", PROGRAM_OUTPUT, VERT_ATTRIB_FOGC},*/ /* FINISHME */
 233    {"gl_FrontColor", PROGRAM_OUTPUT, VERT_RESULT_COL0},
 234    {"gl_BackColor", PROGRAM_OUTPUT, VERT_RESULT_BFC0},
 235    {"gl_FrontSecondaryColor", PROGRAM_OUTPUT, VERT_RESULT_COL1},
 236    {"gl_BackSecondaryColor", PROGRAM_OUTPUT, VERT_RESULT_BFC1},
 237    {"gl_FogFragCoord", PROGRAM_OUTPUT, VERT_RESULT_FOGC},
 238
 239    /* 130_vs */
 240    /*{"gl_VertexID", PROGRAM_INPUT, VERT_ATTRIB_FOGC},*/ /* FINISHME */
 241
 242    {"gl_FragData", PROGRAM_OUTPUT, FRAG_RESULT_DATA0}, /* array */
 243 };
 244
 245 ir_to_mesa_instruction *
 246 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
 247                                         enum prog_opcode op,
 248                                         ir_to_mesa_dst_reg dst,
 249                                         ir_to_mesa_src_reg src0,
 250                                         ir_to_mesa_src_reg src1,
 251                                         ir_to_mesa_src_reg src2)
 252 {
 253    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 254
 255    inst->op = op;
 256    inst->dst_reg = dst;
 257    inst->src_reg[0] = src0;
 258    inst->src_reg[1] = src1;
 259    inst->src_reg[2] = src2;
 260    inst->ir = ir;
 261
 262    this->instructions.push_tail(inst);
 263
 264    return inst;
 265 }
 266
 267
 268 ir_to_mesa_instruction *
 269 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
 270                                         enum prog_opcode op,
 271                                         ir_to_mesa_dst_reg dst,
 272                                         ir_to_mesa_src_reg src0,
 273                                         ir_to_mesa_src_reg src1)
 274 {
 275    return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
 276 }
 277
 278 ir_to_mesa_instruction *
 279 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
 280                                         enum prog_opcode op,
 281                                         ir_to_mesa_dst_reg dst,
 282                                         ir_to_mesa_src_reg src0)
 283 {
 284    return ir_to_mesa_emit_op3(ir, op, dst,
 285                               src0, ir_to_mesa_undef, ir_to_mesa_undef);
 286 }
 287
 288 inline ir_to_mesa_dst_reg
 289 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
 290 {
 291    ir_to_mesa_dst_reg dst_reg;
 292
 293    dst_reg.file = reg.file;
 294    dst_reg.index = reg.index;
 295    dst_reg.writemask = WRITEMASK_XYZW;
 296
 297    return dst_reg;
 298 }
 299
 300 /**
 301  * Emits Mesa scalar opcodes to produce unique answers across channels.
 302  *
 303  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 304  * channel determines the result across all channels.  So to do a vec4
 305  * of this operation, we want to emit a scalar per source channel used
 306  * to produce dest channels.
 307  */
 308 void
 309 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 310                                                enum prog_opcode op,
 311                                                ir_to_mesa_dst_reg dst,
 312                                                ir_to_mesa_src_reg src0)
 313 {
 314    int i, j;
 315    int done_mask = ~dst.writemask;
 316
 317    /* Mesa RCP is a scalar operation splatting results to all channels,
 318     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 319     * dst channels.
 320     */
 321    for (i = 0; i < 4; i++) {
 322       GLuint this_mask = (1 << i);
 323       ir_to_mesa_instruction *inst;
 324       ir_to_mesa_src_reg src = src0;
 325
 326       if (done_mask & this_mask)
 327          continue;
 328
 329       GLuint src_swiz = GET_SWZ(src.swizzle, i);
 330       for (j = i + 1; j < 4; j++) {
 331          if (!(done_mask & (1 << j)) && GET_SWZ(src.swizzle, j) == src_swiz) {
 332             this_mask |= (1 << j);
 333          }
 334       }
 335       src.swizzle = MAKE_SWIZZLE4(src_swiz, src_swiz,
 336                                   src_swiz, src_swiz);
 337
 338       inst = ir_to_mesa_emit_op1(ir, op,
 339                                  dst,
 340                                  src);
 341       inst->dst_reg.writemask = this_mask;
 342       done_mask |= this_mask;
 343    }
 344 }
 345
 346 struct ir_to_mesa_src_reg
 347 ir_to_mesa_visitor::src_reg_for_float(float val)
 348 {
 349    ir_to_mesa_src_reg src_reg;
 350
 351    src_reg.file = PROGRAM_CONSTANT;
 352    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 353                                               &val, 1, &src_reg.swizzle);
 354
 355    return src_reg;
 356 }
 357
 358 /**
 359  * In the initial pass of codegen, we assign temporary numbers to
 360  * intermediate results.  (not SSA -- variable assignments will reuse
 361  * storage).  Actual register allocation for the Mesa VM occurs in a
 362  * pass over the Mesa IR later.
 363  */
 364 ir_to_mesa_src_reg
 365 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 366 {
 367    ir_to_mesa_src_reg src_reg;
 368    int swizzle[4];
 369    int i;
 370
 371    assert(!type->is_array());
 372
 373    src_reg.file = PROGRAM_TEMPORARY;
 374    src_reg.index = type->matrix_columns;
 375    src_reg.reladdr = false;
 376
 377    for (i = 0; i < type->vector_elements; i++)
 378       swizzle[i] = i;
 379    for (; i < 4; i++)
 380       swizzle[i] = type->vector_elements - 1;
 381    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
 382                                    swizzle[2], swizzle[3]);
 383
 384    return src_reg;
 385 }
 386
 387 static int
 388 type_size(const struct glsl_type *type)
 389 {
 390    unsigned int i;
 391    int size;
 392
 393    switch (type->base_type) {
 394    case GLSL_TYPE_UINT:
 395    case GLSL_TYPE_INT:
 396    case GLSL_TYPE_FLOAT:
 397    case GLSL_TYPE_BOOL:
 398       if (type->is_matrix()) {
 399          return 4; /* FINISHME: Not all matrices are 4x4. */
 400       } else {
 401          /* Regardless of size of vector, it gets a vec4. This is bad
 402           * packing for things like floats, but otherwise arrays become a
 403           * mess.  Hopefully a later pass over the code can pack scalars
 404           * down if appropriate.
 405           */
 406          return 1;
 407       }
 408    case GLSL_TYPE_ARRAY:
 409       return type_size(type->fields.array) * type->length;
 410    case GLSL_TYPE_STRUCT:
 411       size = 0;
 412       for (i = 0; i < type->length; i++) {
 413          size += type_size(type->fields.structure[i].type);
 414       }
 415       return size;
 416    default:
 417       assert(0);
 418    }
 419 }
 420
 421 temp_entry *
 422 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
 423 {
 424
 425    temp_entry *entry;
 426
 427    foreach_iter(exec_list_iterator, iter, this->variable_storage) {
 428       entry = (temp_entry *)iter.get();
 429
 430       if (entry->var == var)
 431          return entry;
 432    }
 433
 434    return NULL;
 435 }
 436
 437 void
 438 ir_to_mesa_visitor::visit(ir_variable *ir)
 439 {
 440    (void)ir;
 441 }
 442
 443 void
 444 ir_to_mesa_visitor::visit(ir_loop *ir)
 445 {
 446    assert(!ir->from);
 447    assert(!ir->to);
 448    assert(!ir->increment);
 449    assert(!ir->counter);
 450
 451    ir_to_mesa_emit_op1(NULL, OPCODE_BGNLOOP,
 452                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
 453
 454    visit_exec_list(&ir->body_instructions, this);
 455
 456    ir_to_mesa_emit_op1(NULL, OPCODE_ENDLOOP,
 457                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
 458 }
 459
 460 void
 461 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 462 {
 463    switch (ir->mode) {
 464    case ir_loop_jump::jump_break:
 465       ir_to_mesa_emit_op1(NULL, OPCODE_BRK,
 466                           ir_to_mesa_undef_dst, ir_to_mesa_undef);
 467       break;
 468    case ir_loop_jump::jump_continue:
 469       ir_to_mesa_emit_op1(NULL, OPCODE_CONT,
 470                           ir_to_mesa_undef_dst, ir_to_mesa_undef);
 471       break;
 472    }
 473 }
 474
 475
 476 void
 477 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 478 {
 479    assert(0);
 480    (void)ir;
 481 }
 482
 483 void
 484 ir_to_mesa_visitor::visit(ir_function *ir)
 485 {
 486    /* Ignore function bodies other than main() -- we shouldn't see calls to
 487     * them since they should all be inlined before we get to ir_to_mesa.
 488     */
 489    if (strcmp(ir->name, "main") == 0) {
 490       const ir_function_signature *sig;
 491       exec_list empty;
 492
 493       sig = ir->matching_signature(&empty);
 494
 495       assert(sig);
 496
 497       foreach_iter(exec_list_iterator, iter, sig->body) {
 498          ir_instruction *ir = (ir_instruction *)iter.get();
 499
 500          ir->accept(this);
 501       }
 502    }
 503 }
 504
 505 void
 506 ir_to_mesa_visitor::visit(ir_expression *ir)
 507 {
 508    unsigned int operand;
 509    struct ir_to_mesa_src_reg op[2];
 510    struct ir_to_mesa_src_reg result_src;
 511    struct ir_to_mesa_dst_reg result_dst;
 512    const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
 513    const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
 514    const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
 515
 516    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 517       this->result.file = PROGRAM_UNDEFINED;
 518       ir->operands[operand]->accept(this);
 519       if (this->result.file == PROGRAM_UNDEFINED) {
 520          ir_print_visitor v;
 521          printf("Failed to get tree for expression operand:\n");
 522          ir->operands[operand]->accept(&v);
 523          exit(1);
 524       }
 525       op[operand] = this->result;
 526
 527       /* Only expression implemented for matrices yet */
 528       assert(!ir->operands[operand]->type->is_matrix() ||
 529              ir->operation == ir_binop_mul);
 530    }
 531
 532    this->result.file = PROGRAM_UNDEFINED;
 533
 534    /* Storage for our result.  Ideally for an assignment we'd be using
 535     * the actual storage for the result here, instead.
 536     */
 537    result_src = get_temp(ir->type);
 538    /* convenience for the emit functions below. */
 539    result_dst = ir_to_mesa_dst_reg_from_src(result_src);
 540    /* Limit writes to the channels that will be used by result_src later.
 541     * This does limit this temp's use as a temporary for multi-instruction
 542     * sequences.
 543     */
 544    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 545
 546    switch (ir->operation) {
 547    case ir_unop_logic_not:
 548       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
 549                           op[0], src_reg_for_float(0.0));
 550       break;
 551    case ir_unop_neg:
 552       op[0].negate = ~op[0].negate;
 553       result_src = op[0];
 554       break;
 555    case ir_unop_exp:
 556       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EXP, result_dst, op[0]);
 557       break;
 558    case ir_unop_exp2:
 559       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
 560       break;
 561    case ir_unop_log:
 562       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
 563       break;
 564    case ir_unop_log2:
 565       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
 566       break;
 567    case ir_unop_sin:
 568       ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
 569       break;
 570    case ir_unop_cos:
 571       ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
 572       break;
 573    case ir_binop_add:
 574       ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
 575       break;
 576    case ir_binop_sub:
 577       ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
 578       break;
 579    case ir_binop_mul:
 580       if (ir->operands[0]->type->is_matrix() &&
 581           !ir->operands[1]->type->is_matrix()) {
 582          if (ir->operands[1]->type->is_scalar()) {
 583             ir_to_mesa_dst_reg dst_column = result_dst;
 584             ir_to_mesa_src_reg src_column = op[0];
 585             for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
 586                ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 587                                    dst_column, src_column, op[1]);
 588                dst_column.index++;
 589                src_column.index++;
 590             }
 591          } else {
 592             ir_to_mesa_src_reg src_column = op[0];
 593             ir_to_mesa_src_reg src_chan = op[1];
 594             assert(!ir->operands[1]->type->is_matrix() ||
 595                     !"FINISHME: matrix * matrix");
 596              for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
 597                 src_chan.swizzle = MAKE_SWIZZLE4(i, i, i, i);
 598                 if (i == 0) {
 599                    ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 600                                        result_dst, src_column, src_chan);
 601                 } else {
 602                    ir_to_mesa_emit_op3(ir, OPCODE_MAD,
 603                                        result_dst, src_column, src_chan,
 604                                        result_src);
 605                 }
 606                 src_column.index++;
 607             }
 608          }
 609       } else {
 610          assert(!ir->operands[0]->type->is_matrix());
 611          assert(!ir->operands[1]->type->is_matrix());
 612          ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
 613       }
 614       break;
 615    case ir_binop_div:
 616       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[1]);
 617       ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], result_src);
 618       break;
 619
 620    case ir_binop_less:
 621       ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
 622       break;
 623    case ir_binop_greater:
 624       ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
 625       break;
 626    case ir_binop_lequal:
 627       ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
 628       break;
 629    case ir_binop_gequal:
 630       ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
 631       break;
 632    case ir_binop_equal:
 633       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
 634       break;
 635    case ir_binop_logic_xor:
 636    case ir_binop_nequal:
 637       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
 638       break;
 639
 640    case ir_binop_logic_or:
 641       /* This could be a saturated add and skip the SNE. */
 642       ir_to_mesa_emit_op2(ir, OPCODE_ADD,
 643                           result_dst,
 644                           op[0], op[1]);
 645
 646       ir_to_mesa_emit_op2(ir, OPCODE_SNE,
 647                           result_dst,
 648                           result_src, src_reg_for_float(0.0));
 649       break;
 650
 651    case ir_binop_logic_and:
 652       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
 653       ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 654                           result_dst,
 655                           op[0], op[1]);
 656       break;
 657
 658    case ir_binop_dot:
 659       if (ir->operands[0]->type == vec4_type) {
 660          assert(ir->operands[1]->type == vec4_type);
 661          ir_to_mesa_emit_op2(ir, OPCODE_DP4,
 662                              result_dst,
 663                              op[0], op[1]);
 664       } else if (ir->operands[0]->type == vec3_type) {
 665          assert(ir->operands[1]->type == vec3_type);
 666          ir_to_mesa_emit_op2(ir, OPCODE_DP3,
 667                              result_dst,
 668                              op[0], op[1]);
 669       } else if (ir->operands[0]->type == vec2_type) {
 670          assert(ir->operands[1]->type == vec2_type);
 671          ir_to_mesa_emit_op2(ir, OPCODE_DP2,
 672                              result_dst,
 673                              op[0], op[1]);
 674       }
 675       break;
 676    case ir_unop_sqrt:
 677       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 678       ir_to_mesa_emit_op1(ir, OPCODE_RCP, result_dst, result_src);
 679       break;
 680    case ir_unop_rsq:
 681       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 682       break;
 683    case ir_unop_i2f:
 684       /* Mesa IR lacks types, ints are stored as truncated floats. */
 685       result_src = op[0];
 686       break;
 687    case ir_unop_f2i:
 688       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 689       break;
 690    case ir_unop_f2b:
 691       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
 692                           result_src, src_reg_for_float(0.0));
 693       break;
 694    case ir_unop_trunc:
 695       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 696       break;
 697    case ir_unop_ceil:
 698       op[0].negate = ~op[0].negate;
 699       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 700       result_src.negate = ~result_src.negate;
 701       break;
 702    case ir_unop_floor:
 703       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 704       break;
 705    case ir_binop_min:
 706       ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
 707       break;
 708    case ir_binop_max:
 709       ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
 710       break;
 711    default:
 712       ir_print_visitor v;
 713       printf("Failed to get tree for expression:\n");
 714       ir->accept(&v);
 715       exit(1);
 716       break;
 717    }
 718
 719    this->result = result_src;
 720 }
 721
 722
 723 void
 724 ir_to_mesa_visitor::visit(ir_swizzle *ir)
 725 {
 726    ir_to_mesa_src_reg src_reg;
 727    int i;
 728    int swizzle[4];
 729
 730    /* Note that this is only swizzles in expressions, not those on the left
 731     * hand side of an assignment, which do write masking.  See ir_assignment
 732     * for that.
 733     */
 734
 735    ir->val->accept(this);
 736    src_reg = this->result;
 737    assert(src_reg.file != PROGRAM_UNDEFINED);
 738
 739    for (i = 0; i < 4; i++) {
 740       if (i < ir->type->vector_elements) {
 741          switch (i) {
 742          case 0:
 743             swizzle[i] = ir->mask.x;
 744             break;
 745          case 1:
 746             swizzle[i] = ir->mask.y;
 747             break;
 748          case 2:
 749             swizzle[i] = ir->mask.z;
 750             break;
 751          case 3:
 752             swizzle[i] = ir->mask.w;
 753             break;
 754          }
 755       } else {
 756          /* If the type is smaller than a vec4, replicate the last
 757           * channel out.
 758           */
 759          swizzle[i] = ir->type->vector_elements - 1;
 760       }
 761    }
 762
 763    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
 764                                    swizzle[1],
 765                                    swizzle[2],
 766                                    swizzle[3]);
 767
 768    this->result = src_reg;
 769 }
 770
 771 static temp_entry *
 772 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var)
 773 {
 774    /*
 775     * NOTE: The ARB_vertex_program extension specified that matrices get
 776     * loaded in registers in row-major order.  With GLSL, we want column-
 777     * major order.  So, we need to transpose all matrices here...
 778     */
 779    static const struct {
 780       const char *name;
 781       int matrix;
 782       int modifier;
 783    } matrices[] = {
 784       { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
 785       { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
 786       { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
 787       { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
 788
 789       { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
 790       { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
 791       { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
 792       { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
 793
 794       { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
 795       { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
 796       { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
 797       { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
 798
 799       { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
 800       { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
 801       { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
 802       { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
 803
 804       { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
 805
 806    };
 807    unsigned int i;
 808    temp_entry *entry;
 809
 810    /* C++ gets angry when we try to use an int as a gl_state_index, so we use
 811     * ints for gl_state_index.  Make sure they're compatible.
 812     */
 813    assert(sizeof(gl_state_index) == sizeof(int));
 814
 815    for (i = 0; i < Elements(matrices); i++) {
 816       if (strcmp(var->name, matrices[i].name) == 0) {
 817          int j;
 818          int last_pos = -1, base_pos = -1;
 819          int tokens[STATE_LENGTH];
 820
 821          tokens[0] = matrices[i].matrix;
 822          tokens[1] = 0; /* array index! */
 823          tokens[4] = matrices[i].modifier;
 824
 825          /* Add a ref for each column.  It looks like the reason we do
 826           * it this way is that _mesa_add_state_reference doesn't work
 827           * for things that aren't vec4s, so the tokens[2]/tokens[3]
 828           * range has to be equal.
 829           */
 830          for (j = 0; j < 4; j++) {
 831             tokens[2] = j;
 832             tokens[3] = j;
 833             int pos = _mesa_add_state_reference(prog->Parameters,
 834                                                 (gl_state_index *)tokens);
 835             assert(last_pos == -1 || last_pos == base_pos + j);
 836             if (base_pos == -1)
 837                base_pos = pos;
 838          }
 839
 840          entry = new(mem_ctx) temp_entry(var,
 841                                          PROGRAM_STATE_VAR,
 842                                          base_pos);
 843
 844          return entry;
 845       }
 846    }
 847
 848    return NULL;
 849 }
 850
 851 void
 852 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
 853 {
 854    ir_to_mesa_src_reg src_reg;
 855    temp_entry *entry = find_variable_storage(ir->var);
 856    unsigned int i, loc;
 857    bool var_in;
 858
 859    if (!entry) {
 860       switch (ir->var->mode) {
 861       case ir_var_uniform:
 862          entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var);
 863          if (entry)
 864             break;
 865
 866          /* FINISHME: Fix up uniform name for arrays and things */
 867          assert(ir->var->type->gl_type != 0 &&
 868                 ir->var->type->gl_type != GL_INVALID_ENUM);
 869          loc = _mesa_add_uniform(this->prog->Parameters,
 870                                  ir->var->name,
 871                                  type_size(ir->var->type) * 4,
 872                                  ir->var->type->gl_type,
 873                                  NULL);
 874          /* Always mark the uniform used at this point.  If it isn't
 875           * used, dead code elimination should have nuked the decl already.
 876           */
 877          this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
 878
 879          entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_UNIFORM, loc);
 880          this->variable_storage.push_tail(entry);
 881          break;
 882       case ir_var_in:
 883       case ir_var_out:
 884       case ir_var_inout:
 885          var_in = (ir->var->mode == ir_var_in ||
 886                    ir->var->mode == ir_var_inout);
 887
 888          for (i = 0; i < ARRAY_SIZE(builtin_var_to_mesa_reg); i++) {
 889             bool in = builtin_var_to_mesa_reg[i].file == PROGRAM_INPUT;
 890
 891             if (strcmp(ir->var->name, builtin_var_to_mesa_reg[i].name) == 0 &&
 892                 !(var_in ^ in))
 893                break;
 894          }
 895          if (i == ARRAY_SIZE(builtin_var_to_mesa_reg)) {
 896             printf("Failed to find builtin for %s variable %s\n",
 897                    var_in ? "in" : "out",
 898                    ir->var->name);
 899             abort();
 900          }
 901          entry = new(mem_ctx) temp_entry(ir->var,
 902                                          builtin_var_to_mesa_reg[i].file,
 903                                          builtin_var_to_mesa_reg[i].index);
 904          break;
 905       case ir_var_auto:
 906          entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_TEMPORARY,
 907                                          this->next_temp);
 908          this->variable_storage.push_tail(entry);
 909
 910          next_temp += type_size(ir->var->type);
 911          break;
 912       }
 913
 914       if (!entry) {
 915          printf("Failed to make storage for %s\n", ir->var->name);
 916          exit(1);
 917       }
 918    }
 919
 920    src_reg.file = entry->file;
 921    src_reg.index = entry->index;
 922    /* If the type is smaller than a vec4, replicate the last channel out. */
 923    src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
 924    src_reg.reladdr = false;
 925    src_reg.negate = 0;
 926
 927    this->result = src_reg;
 928 }
 929
 930 void
 931 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
 932 {
 933    ir_constant *index;
 934    ir_to_mesa_src_reg src_reg;
 935
 936    index = ir->array_index->constant_expression_value();
 937
 938    /* By the time we make it to this stage, matrices should be broken down
 939     * to vectors.
 940     */
 941    assert(!ir->type->is_matrix());
 942
 943    ir->array->accept(this);
 944    src_reg = this->result;
 945
 946    if (src_reg.file == PROGRAM_INPUT ||
 947        src_reg.file == PROGRAM_OUTPUT) {
 948       assert(index); /* FINISHME: Handle variable indexing of builtins. */
 949
 950       src_reg.index += index->value.i[0];
 951    } else {
 952       if (index) {
 953          src_reg.index += index->value.i[0];
 954       } else {
 955          ir_to_mesa_src_reg array_base = this->result;
 956          /* Variable index array dereference.  It eats the "vec4" of the
 957           * base of the array and an index that offsets the Mesa register
 958           * index.
 959           */
 960          ir->array_index->accept(this);
 961
 962          /* FINISHME: This doesn't work when we're trying to do the LHS
 963           * of an assignment.
 964           */
 965          src_reg.reladdr = true;
 966          ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
 967                              this->result);
 968
 969          this->result = get_temp(ir->type);
 970          ir_to_mesa_emit_op1(ir, OPCODE_MOV,
 971                              ir_to_mesa_dst_reg_from_src(this->result),
 972                              src_reg);
 973       }
 974    }
 975
 976    /* If the type is smaller than a vec4, replicate the last channel out. */
 977    src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
 978
 979    this->result = src_reg;
 980 }
 981
 982 void
 983 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
 984 {
 985    unsigned int i;
 986    const glsl_type *struct_type = ir->record->type;
 987    int offset = 0;
 988
 989    ir->record->accept(this);
 990
 991    for (i = 0; i < struct_type->length; i++) {
 992       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
 993          break;
 994       offset += type_size(struct_type->fields.structure[i].type);
 995    }
 996    this->result.index += offset;
 997 }
 998
 999 /**
1000  * We want to be careful in assignment setup to hit the actual storage
1001  * instead of potentially using a temporary like we might with the
1002  * ir_dereference handler.
1003  *
1004  * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
1005  * should only see potentially one variable array index of a vector,
1006  * and one swizzle, before getting to actual vec4 storage.  So handle
1007  * those, then go use ir_dereference to handle the rest.
1008  */
1009 static struct ir_to_mesa_dst_reg
1010 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v)
1011 {
1012    struct ir_to_mesa_dst_reg dst_reg;
1013    ir_dereference *deref;
1014    ir_swizzle *swiz;
1015
1016    /* Use the rvalue deref handler for the most part.  We'll ignore
1017     * swizzles in it and write swizzles using writemask, though.
1018     */
1019    ir->accept(v);
1020    dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1021
1022    if ((deref = ir->as_dereference())) {
1023       ir_dereference_array *deref_array = ir->as_dereference_array();
1024       assert(!deref_array || deref_array->array->type->is_array());
1025
1026       ir->accept(v);
1027    } else if ((swiz = ir->as_swizzle())) {
1028       dst_reg.writemask = 0;
1029       if (swiz->mask.num_components >= 1)
1030          dst_reg.writemask |= (1 << swiz->mask.x);
1031       if (swiz->mask.num_components >= 2)
1032          dst_reg.writemask |= (1 << swiz->mask.y);
1033       if (swiz->mask.num_components >= 3)
1034          dst_reg.writemask |= (1 << swiz->mask.z);
1035       if (swiz->mask.num_components >= 4)
1036          dst_reg.writemask |= (1 << swiz->mask.w);
1037    }
1038
1039    return dst_reg;
1040 }
1041
1042 void
1043 ir_to_mesa_visitor::visit(ir_assignment *ir)
1044 {
1045    struct ir_to_mesa_dst_reg l;
1046    struct ir_to_mesa_src_reg r;
1047
1048    assert(!ir->lhs->type->is_matrix());
1049    assert(!ir->lhs->type->is_array());
1050    assert(ir->lhs->type->base_type != GLSL_TYPE_STRUCT);
1051
1052    l = get_assignment_lhs(ir->lhs, this);
1053
1054    ir->rhs->accept(this);
1055    r = this->result;
1056    assert(l.file != PROGRAM_UNDEFINED);
1057    assert(r.file != PROGRAM_UNDEFINED);
1058
1059    if (ir->condition) {
1060          ir_constant *condition_constant;
1061
1062          condition_constant = ir->condition->constant_expression_value();
1063
1064          assert(condition_constant && condition_constant->value.b[0]);
1065    }
1066
1067    ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1068 }
1069
1070
1071 void
1072 ir_to_mesa_visitor::visit(ir_constant *ir)
1073 {
1074    ir_to_mesa_src_reg src_reg;
1075    GLfloat stack_vals[4];
1076    GLfloat *values = stack_vals;
1077    unsigned int i;
1078
1079    if (ir->type->is_matrix() || ir->type->is_array()) {
1080       assert(!"FINISHME: array/matrix constants");
1081    }
1082
1083    src_reg.file = PROGRAM_CONSTANT;
1084    switch (ir->type->base_type) {
1085    case GLSL_TYPE_FLOAT:
1086       values = &ir->value.f[0];
1087       break;
1088    case GLSL_TYPE_UINT:
1089       for (i = 0; i < ir->type->vector_elements; i++) {
1090          values[i] = ir->value.u[i];
1091       }
1092       break;
1093    case GLSL_TYPE_INT:
1094       for (i = 0; i < ir->type->vector_elements; i++) {
1095          values[i] = ir->value.i[i];
1096       }
1097       break;
1098    case GLSL_TYPE_BOOL:
1099       for (i = 0; i < ir->type->vector_elements; i++) {
1100          values[i] = ir->value.b[i];
1101       }
1102       break;
1103    default:
1104       assert(!"Non-float/uint/int/bool constant");
1105    }
1106
1107    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1108                                               values, ir->type->vector_elements,
1109                                               &src_reg.swizzle);
1110    src_reg.reladdr = false;
1111    src_reg.negate = 0;
1112
1113    this->result = src_reg;
1114 }
1115
1116
1117 void
1118 ir_to_mesa_visitor::visit(ir_call *ir)
1119 {
1120    printf("Can't support call to %s\n", ir->callee_name());
1121    exit(1);
1122 }
1123
1124
1125 void
1126 ir_to_mesa_visitor::visit(ir_texture *ir)
1127 {
1128    assert(0);
1129
1130    ir->coordinate->accept(this);
1131 }
1132
1133 void
1134 ir_to_mesa_visitor::visit(ir_return *ir)
1135 {
1136    assert(0);
1137
1138    ir->get_value()->accept(this);
1139 }
1140
1141
1142 void
1143 ir_to_mesa_visitor::visit(ir_if *ir)
1144 {
1145    ir_to_mesa_instruction *if_inst, *else_inst = NULL;
1146
1147    ir->condition->accept(this);
1148    assert(this->result.file != PROGRAM_UNDEFINED);
1149
1150    if_inst = ir_to_mesa_emit_op1(ir->condition,
1151                                  OPCODE_IF, ir_to_mesa_undef_dst,
1152                                  this->result);
1153
1154    this->instructions.push_tail(if_inst);
1155
1156    visit_exec_list(&ir->then_instructions, this);
1157
1158    if (!ir->else_instructions.is_empty()) {
1159       else_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ELSE,
1160                                       ir_to_mesa_undef_dst,
1161                                       ir_to_mesa_undef);
1162       visit_exec_list(&ir->then_instructions, this);
1163    }
1164
1165    if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
1166                                  ir_to_mesa_undef_dst, ir_to_mesa_undef);
1167 }
1168
1169 ir_to_mesa_visitor::ir_to_mesa_visitor()
1170 {
1171    result.file = PROGRAM_UNDEFINED;
1172    next_temp = 1;
1173 }
1174
1175 static struct prog_src_register
1176 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
1177 {
1178    struct prog_src_register mesa_reg;
1179
1180    mesa_reg.File = reg.file;
1181    assert(reg.index < (1 << INST_INDEX_BITS) - 1);
1182    mesa_reg.Index = reg.index;
1183    mesa_reg.Swizzle = reg.swizzle;
1184    mesa_reg.RelAddr = reg.reladdr;
1185
1186    return mesa_reg;
1187 }
1188
1189 static void
1190 set_branchtargets(struct prog_instruction *mesa_instructions,
1191                   int num_instructions)
1192 {
1193    int if_count = 0, loop_count;
1194    int *if_stack, *loop_stack;
1195    int if_stack_pos = 0, loop_stack_pos = 0;
1196    int i, j;
1197
1198    for (i = 0; i < num_instructions; i++) {
1199       switch (mesa_instructions[i].Opcode) {
1200       case OPCODE_IF:
1201          if_count++;
1202          break;
1203       case OPCODE_BGNLOOP:
1204          loop_count++;
1205          break;
1206       case OPCODE_BRK:
1207       case OPCODE_CONT:
1208          mesa_instructions[i].BranchTarget = -1;
1209          break;
1210       default:
1211          break;
1212       }
1213    }
1214
1215    if_stack = (int *)calloc(if_count, sizeof(*if_stack));
1216    loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
1217
1218    for (i = 0; i < num_instructions; i++) {
1219       switch (mesa_instructions[i].Opcode) {
1220       case OPCODE_IF:
1221          if_stack[if_stack_pos] = i;
1222          if_stack_pos++;
1223          break;
1224       case OPCODE_ELSE:
1225          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1226          if_stack[if_stack_pos - 1] = i;
1227          break;
1228       case OPCODE_ENDIF:
1229          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1230          if_stack_pos--;
1231          break;
1232       case OPCODE_BGNLOOP:
1233          loop_stack[loop_stack_pos] = i;
1234          loop_stack_pos++;
1235          break;
1236       case OPCODE_ENDLOOP:
1237          loop_stack_pos--;
1238          /* Rewrite any breaks/conts at this nesting level (haven't
1239           * already had a BranchTarget assigned) to point to the end
1240           * of the loop.
1241           */
1242          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
1243             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
1244                 mesa_instructions[j].Opcode == OPCODE_CONT) {
1245                if (mesa_instructions[j].BranchTarget == -1) {
1246                   mesa_instructions[j].BranchTarget = i;
1247                }
1248             }
1249          }
1250          /* The loop ends point at each other. */
1251          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
1252          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
1253       default:
1254          break;
1255       }
1256    }
1257
1258    free(if_stack);
1259 }
1260
1261 static void
1262 print_program(struct prog_instruction *mesa_instructions,
1263               ir_instruction **mesa_instruction_annotation,
1264               int num_instructions)
1265 {
1266    ir_instruction *last_ir = NULL;
1267    int i;
1268
1269    for (i = 0; i < num_instructions; i++) {
1270       struct prog_instruction *mesa_inst = mesa_instructions + i;
1271       ir_instruction *ir = mesa_instruction_annotation[i];
1272
1273       if (last_ir != ir && ir) {
1274          ir_print_visitor print;
1275          ir->accept(&print);
1276          printf("\n");
1277          last_ir = ir;
1278       }
1279
1280       _mesa_print_instruction(mesa_inst);
1281    }
1282 }
1283
1284 static void
1285 count_resources(struct gl_program *prog)
1286 {
1287    prog->InputsRead = 0;
1288    prog->OutputsWritten = 0;
1289    unsigned int i;
1290
1291    for (i = 0; i < prog->NumInstructions; i++) {
1292       struct prog_instruction *inst = &prog->Instructions[i];
1293       unsigned int reg;
1294
1295       switch (inst->DstReg.File) {
1296       case PROGRAM_OUTPUT:
1297          prog->OutputsWritten |= BITFIELD64_BIT(inst->DstReg.Index);
1298          break;
1299       case PROGRAM_INPUT:
1300          prog->InputsRead |= BITFIELD64_BIT(inst->DstReg.Index);
1301          break;
1302       default:
1303          break;
1304       }
1305
1306       for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
1307          switch (inst->SrcReg[reg].File) {
1308          case PROGRAM_OUTPUT:
1309             prog->OutputsWritten |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1310             break;
1311          case PROGRAM_INPUT:
1312             prog->InputsRead |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1313             break;
1314          default:
1315             break;
1316          }
1317       }
1318    }
1319 }
1320
1321 /* Each stage has some uniforms in its Parameters list.  The Uniforms
1322  * list for the linked shader program has a pointer to these uniforms
1323  * in each of the stage's Parameters list, so that their values can be
1324  * updated when a uniform is set.
1325  */
1326 static void
1327 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
1328                                      struct gl_program *prog)
1329 {
1330    unsigned int i;
1331
1332    for (i = 0; i < prog->Parameters->NumParameters; i++) {
1333       const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
1334
1335       if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
1336          struct gl_uniform *uniform =
1337             _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
1338          if (uniform)
1339             uniform->Initialized = p->Initialized;
1340       }
1341    }
1342 }
1343
1344 struct gl_program *
1345 get_mesa_program(GLcontext *ctx, void *mem_ctx, struct glsl_shader *shader)
1346 {
1347    ir_to_mesa_visitor v;
1348    struct prog_instruction *mesa_instructions, *mesa_inst;
1349    ir_instruction **mesa_instruction_annotation;
1350    int i;
1351    exec_list *instructions = &shader->ir;
1352    struct gl_program *prog;
1353    GLenum target;
1354
1355    switch (shader->Type) {
1356    case GL_VERTEX_SHADER:   target = GL_VERTEX_PROGRAM_ARB; break;
1357    case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; break;
1358    default: assert(!"should not be reached"); break;
1359    }
1360
1361    prog = ctx->Driver.NewProgram(ctx, target, 1);
1362    if (!prog)
1363       return NULL;
1364    prog->Parameters = _mesa_new_parameter_list();
1365    prog->Varying = _mesa_new_parameter_list();
1366    prog->Attributes = _mesa_new_parameter_list();
1367    v.ctx = ctx;
1368    v.prog = prog;
1369
1370    v.mem_ctx = talloc_new(NULL);
1371    visit_exec_list(instructions, &v);
1372    v.ir_to_mesa_emit_op1(NULL, OPCODE_END,
1373                          ir_to_mesa_undef_dst, ir_to_mesa_undef);
1374
1375    prog->NumTemporaries = v.next_temp;
1376
1377    int num_instructions = 0;
1378    foreach_iter(exec_list_iterator, iter, v.instructions) {
1379       num_instructions++;
1380    }
1381
1382    mesa_instructions =
1383       (struct prog_instruction *)calloc(num_instructions,
1384                                         sizeof(*mesa_instructions));
1385    mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
1386                                               num_instructions);
1387
1388    mesa_inst = mesa_instructions;
1389    i = 0;
1390    foreach_iter(exec_list_iterator, iter, v.instructions) {
1391       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
1392
1393       mesa_inst->Opcode = inst->op;
1394       mesa_inst->DstReg.File = inst->dst_reg.file;
1395       mesa_inst->DstReg.Index = inst->dst_reg.index;
1396       mesa_inst->DstReg.CondMask = COND_TR;
1397       mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
1398       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
1399       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
1400       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
1401       mesa_instruction_annotation[i] = inst->ir;
1402
1403       mesa_inst++;
1404       i++;
1405    }
1406
1407    set_branchtargets(mesa_instructions, num_instructions);
1408    if (0) {
1409       print_program(mesa_instructions, mesa_instruction_annotation,
1410                     num_instructions);
1411    }
1412
1413    prog->Instructions = mesa_instructions;
1414    prog->NumInstructions = num_instructions;
1415
1416    _mesa_reference_program(ctx, &shader->mesa_shader->Program, prog);
1417
1418    return prog;
1419 }
1420
1421 /* Takes a Mesa gl shader structure and compiles it, returning our Mesa-like
1422  * structure with the IR and such attached.
1423  */
1424 static struct glsl_shader *
1425 _mesa_get_glsl_shader(GLcontext *ctx, void *mem_ctx, struct gl_shader *sh)
1426 {
1427    struct glsl_shader *shader = talloc_zero(mem_ctx, struct glsl_shader);
1428    struct _mesa_glsl_parse_state *state;
1429
1430    shader->Type = sh->Type;
1431    shader->Name = sh->Name;
1432    shader->RefCount = 1;
1433    shader->Source = sh->Source;
1434    shader->SourceLen = strlen(sh->Source);
1435    shader->mesa_shader = sh;
1436
1437    state = talloc_zero(shader, struct _mesa_glsl_parse_state);
1438    switch (shader->Type) {
1439    case GL_VERTEX_SHADER:   state->target = vertex_shader; break;
1440    case GL_FRAGMENT_SHADER: state->target = fragment_shader; break;
1441    case GL_GEOMETRY_SHADER: state->target = geometry_shader; break;
1442    }
1443
1444    state->scanner = NULL;
1445    state->translation_unit.make_empty();
1446    state->symbols = new(mem_ctx) glsl_symbol_table;
1447    state->info_log = talloc_strdup(shader, "");
1448    state->error = false;
1449    state->temp_index = 0;
1450    state->loop_or_switch_nesting = NULL;
1451    state->ARB_texture_rectangle_enable = true;
1452
1453    _mesa_glsl_lexer_ctor(state, shader->Source);
1454    _mesa_glsl_parse(state);
1455    _mesa_glsl_lexer_dtor(state);
1456
1457    shader->ir.make_empty();
1458    if (!state->error && !state->translation_unit.is_empty())
1459       _mesa_ast_to_hir(&shader->ir, state);
1460
1461    /* Optimization passes */
1462    if (!state->error && !shader->ir.is_empty()) {
1463       bool progress;
1464       do {
1465          progress = false;
1466
1467          progress = do_function_inlining(&shader->ir) || progress;
1468          progress = do_if_simplification(&shader->ir) || progress;
1469          progress = do_copy_propagation(&shader->ir) || progress;
1470          progress = do_dead_code_local(&shader->ir) || progress;
1471          progress = do_dead_code_unlinked(state, &shader->ir) || progress;
1472          progress = do_constant_variable_unlinked(&shader->ir) || progress;
1473          progress = do_constant_folding(&shader->ir) || progress;
1474          progress = do_vec_index_to_swizzle(&shader->ir) || progress;
1475          progress = do_swizzle_swizzle(&shader->ir) || progress;
1476       } while (progress);
1477    }
1478
1479    shader->symbols = state->symbols;
1480
1481    shader->CompileStatus = !state->error;
1482    shader->InfoLog = state->info_log;
1483
1484    talloc_free(state);
1485
1486    return shader;
1487 }
1488
1489 extern "C" {
1490
1491 void
1492 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *sh)
1493 {
1494    struct glsl_shader *shader;
1495    TALLOC_CTX *mem_ctx = talloc_new(NULL);
1496
1497    shader = _mesa_get_glsl_shader(ctx, mem_ctx, sh);
1498
1499    sh->CompileStatus = shader->CompileStatus;
1500    sh->InfoLog = strdup(shader->InfoLog);
1501    talloc_free(mem_ctx);
1502  }
1503
1504 void
1505 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
1506 {
1507    struct glsl_program *whole_program;
1508    unsigned int i;
1509
1510    _mesa_clear_shader_program_data(ctx, prog);
1511
1512    whole_program = talloc_zero(NULL, struct glsl_program);
1513    whole_program->LinkStatus = GL_TRUE;
1514    whole_program->NumShaders = prog->NumShaders;
1515    whole_program->Shaders = talloc_array(whole_program, struct glsl_shader *,
1516                                          prog->NumShaders);
1517
1518    for (i = 0; i < prog->NumShaders; i++) {
1519       whole_program->Shaders[i] = _mesa_get_glsl_shader(ctx, whole_program,
1520                                                         prog->Shaders[i]);
1521       if (!whole_program->Shaders[i]->CompileStatus) {
1522          whole_program->InfoLog =
1523             talloc_asprintf_append(whole_program->InfoLog,
1524                                    "linking with uncompiled shader");
1525          whole_program->LinkStatus = GL_FALSE;
1526       }
1527    }
1528
1529    prog->Uniforms = _mesa_new_uniform_list();
1530    prog->Varying = _mesa_new_parameter_list();
1531    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
1532    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
1533
1534    if (whole_program->LinkStatus)
1535       link_shaders(whole_program);
1536
1537    prog->LinkStatus = whole_program->LinkStatus;
1538
1539    /* FINISHME: This should use the linker-generated code */
1540    if (prog->LinkStatus) {
1541       for (i = 0; i < prog->NumShaders; i++) {
1542          struct gl_program *linked_prog;
1543
1544          linked_prog = get_mesa_program(ctx, whole_program,
1545                                         whole_program->Shaders[i]);
1546          count_resources(linked_prog);
1547
1548          link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
1549
1550          switch (whole_program->Shaders[i]->Type) {
1551          case GL_VERTEX_SHADER:
1552             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
1553                                      (struct gl_vertex_program *)linked_prog);
1554             break;
1555          case GL_FRAGMENT_SHADER:
1556             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
1557                                      (struct gl_fragment_program *)linked_prog);
1558             break;
1559          }
1560       }
1561    }
1562
1563    talloc_free(whole_program);
1564 }
1565
1566 } /* extern "C" */