src/mesa/shader/ir_to_mesa.cpp

   1 /*
   2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
   3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
   4  * Copyright © 2010 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23  * DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file ir_to_mesa.cpp
  28  *
  29  * Translates the IR to ARB_fragment_program text if possible,
  30  * printing the result
  31  */
  32
  33 #include <stdio.h>
  34 #include "ir.h"
  35 #include "ir_visitor.h"
  36 #include "ir_print_visitor.h"
  37 #include "ir_expression_flattening.h"
  38 #include "glsl_types.h"
  39 #include "glsl_parser_extras.h"
  40 #include "../glsl/program.h"
  41 #include "ir_optimization.h"
  42 #include "ast.h"
  43
  44 extern "C" {
  45 #include "main/mtypes.h"
  46 #include "shader/prog_instruction.h"
  47 #include "shader/prog_print.h"
  48 #include "shader/program.h"
  49 #include "shader/prog_uniform.h"
  50 #include "shader/prog_parameter.h"
  51 #include "shader/shader_api.h"
  52 }
  53
  54 /**
  55  * This struct is a corresponding struct to Mesa prog_src_register, with
  56  * wider fields.
  57  */
  58 typedef struct ir_to_mesa_src_reg {
  59    int file; /**< PROGRAM_* from Mesa */
  60    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  61    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  62    int negate; /**< NEGATE_XYZW mask from mesa */
  63    bool reladdr; /**< Register index should be offset by address reg. */
  64 } ir_to_mesa_src_reg;
  65
  66 typedef struct ir_to_mesa_dst_reg {
  67    int file; /**< PROGRAM_* from Mesa */
  68    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
  69    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  70 } ir_to_mesa_dst_reg;
  71
  72 extern ir_to_mesa_src_reg ir_to_mesa_undef;
  73
  74 class ir_to_mesa_instruction : public exec_node {
  75 public:
  76    enum prog_opcode op;
  77    ir_to_mesa_dst_reg dst_reg;
  78    ir_to_mesa_src_reg src_reg[3];
  79    /** Pointer to the ir source this tree came from for debugging */
  80    ir_instruction *ir;
  81 };
  82
  83 class temp_entry : public exec_node {
  84 public:
  85    temp_entry(ir_variable *var, int file, int index)
  86       : file(file), index(index), var(var)
  87    {
  88       /* empty */
  89    }
  90
  91    int file;
  92    int index;
  93    ir_variable *var; /* variable that maps to this, if any */
  94 };
  95
  96 class ir_to_mesa_visitor : public ir_visitor {
  97 public:
  98    ir_to_mesa_visitor();
  99
 100    GLcontext *ctx;
 101    struct gl_program *prog;
 102
 103    int next_temp;
 104
 105    temp_entry *find_variable_storage(ir_variable *var);
 106
 107    ir_to_mesa_src_reg get_temp(const glsl_type *type);
 108
 109    struct ir_to_mesa_src_reg src_reg_for_float(float val);
 110
 111    /**
 112     * \name Visit methods
 113     *
 114     * As typical for the visitor pattern, there must be one \c visit method for
 115     * each concrete subclass of \c ir_instruction.  Virtual base classes within
 116     * the hierarchy should not have \c visit methods.
 117     */
 118    /*@{*/
 119    virtual void visit(ir_variable *);
 120    virtual void visit(ir_loop *);
 121    virtual void visit(ir_loop_jump *);
 122    virtual void visit(ir_function_signature *);
 123    virtual void visit(ir_function *);
 124    virtual void visit(ir_expression *);
 125    virtual void visit(ir_swizzle *);
 126    virtual void visit(ir_dereference_variable  *);
 127    virtual void visit(ir_dereference_array *);
 128    virtual void visit(ir_dereference_record *);
 129    virtual void visit(ir_assignment *);
 130    virtual void visit(ir_constant *);
 131    virtual void visit(ir_call *);
 132    virtual void visit(ir_return *);
 133    virtual void visit(ir_texture *);
 134    virtual void visit(ir_if *);
 135    /*@}*/
 136
 137    struct ir_to_mesa_src_reg result;
 138
 139    /** List of temp_entry */
 140    exec_list variable_storage;
 141
 142    /** List of ir_to_mesa_instruction */
 143    exec_list instructions;
 144
 145    ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
 146                                                enum prog_opcode op,
 147                                                ir_to_mesa_dst_reg dst,
 148                                                ir_to_mesa_src_reg src0);
 149
 150    ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
 151                                                enum prog_opcode op,
 152                                                ir_to_mesa_dst_reg dst,
 153                                                ir_to_mesa_src_reg src0,
 154                                                ir_to_mesa_src_reg src1);
 155
 156    ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
 157                                                enum prog_opcode op,
 158                                                ir_to_mesa_dst_reg dst,
 159                                                ir_to_mesa_src_reg src0,
 160                                                ir_to_mesa_src_reg src1,
 161                                                ir_to_mesa_src_reg src2);
 162
 163    void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 164                                    enum prog_opcode op,
 165                                    ir_to_mesa_dst_reg dst,
 166                                    ir_to_mesa_src_reg src0);
 167
 168    void *mem_ctx;
 169 };
 170
 171 ir_to_mesa_src_reg ir_to_mesa_undef = {
 172    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, false,
 173 };
 174
 175 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
 176    PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP
 177 };
 178
 179 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
 180    PROGRAM_ADDRESS, 0, WRITEMASK_X
 181 };
 182
 183 static int swizzle_for_size(int size)
 184 {
 185    int size_swizzles[4] = {
 186       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
 187       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
 188       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
 189       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
 190    };
 191
 192    return size_swizzles[size - 1];
 193 }
 194
 195 /* This list should match up with builtin_variables.h */
 196 static const struct {
 197    const char *name;
 198    int file;
 199    int index;
 200 } builtin_var_to_mesa_reg[] = {
 201    /* core_vs */
 202    {"gl_Position", PROGRAM_OUTPUT, VERT_RESULT_HPOS},
 203    {"gl_PointSize", PROGRAM_OUTPUT, VERT_RESULT_PSIZ},
 204
 205    /* core_fs */
 206    {"gl_FragCoord", PROGRAM_INPUT, FRAG_ATTRIB_WPOS},
 207    {"gl_FrontFacing", PROGRAM_INPUT, FRAG_ATTRIB_FACE},
 208    {"gl_FragColor", PROGRAM_OUTPUT, FRAG_ATTRIB_COL0},
 209    {"gl_FragDepth", PROGRAM_UNDEFINED, FRAG_ATTRIB_WPOS}, /* FINISHME: WPOS.z */
 210
 211    /* 110_deprecated_fs */
 212    {"gl_Color", PROGRAM_INPUT, FRAG_ATTRIB_COL0},
 213    {"gl_SecondaryColor", PROGRAM_INPUT, FRAG_ATTRIB_COL1},
 214    {"gl_FogFragCoord", PROGRAM_INPUT, FRAG_ATTRIB_FOGC},
 215    {"gl_TexCoord", PROGRAM_INPUT, FRAG_ATTRIB_TEX0}, /* array */
 216
 217    /* 110_deprecated_vs */
 218    {"gl_Vertex", PROGRAM_INPUT, VERT_ATTRIB_POS},
 219    {"gl_Normal", PROGRAM_INPUT, VERT_ATTRIB_NORMAL},
 220    {"gl_Color", PROGRAM_INPUT, VERT_ATTRIB_COLOR0},
 221    {"gl_SecondaryColor", PROGRAM_INPUT, VERT_ATTRIB_COLOR1},
 222    {"gl_MultiTexCoord0", PROGRAM_INPUT, VERT_ATTRIB_TEX0},
 223    {"gl_MultiTexCoord1", PROGRAM_INPUT, VERT_ATTRIB_TEX1},
 224    {"gl_MultiTexCoord2", PROGRAM_INPUT, VERT_ATTRIB_TEX2},
 225    {"gl_MultiTexCoord3", PROGRAM_INPUT, VERT_ATTRIB_TEX3},
 226    {"gl_MultiTexCoord4", PROGRAM_INPUT, VERT_ATTRIB_TEX4},
 227    {"gl_MultiTexCoord5", PROGRAM_INPUT, VERT_ATTRIB_TEX5},
 228    {"gl_MultiTexCoord6", PROGRAM_INPUT, VERT_ATTRIB_TEX6},
 229    {"gl_MultiTexCoord7", PROGRAM_INPUT, VERT_ATTRIB_TEX7},
 230    {"gl_TexCoord", PROGRAM_OUTPUT, VERT_RESULT_TEX0}, /* array */
 231    {"gl_FogCoord", PROGRAM_INPUT, VERT_RESULT_FOGC},
 232    /*{"gl_ClipVertex", PROGRAM_OUTPUT, VERT_ATTRIB_FOGC},*/ /* FINISHME */
 233    {"gl_FrontColor", PROGRAM_OUTPUT, VERT_RESULT_COL0},
 234    {"gl_BackColor", PROGRAM_OUTPUT, VERT_RESULT_BFC0},
 235    {"gl_FrontSecondaryColor", PROGRAM_OUTPUT, VERT_RESULT_COL1},
 236    {"gl_BackSecondaryColor", PROGRAM_OUTPUT, VERT_RESULT_BFC1},
 237    {"gl_FogFragCoord", PROGRAM_OUTPUT, VERT_RESULT_FOGC},
 238
 239    /* 130_vs */
 240    /*{"gl_VertexID", PROGRAM_INPUT, VERT_ATTRIB_FOGC},*/ /* FINISHME */
 241
 242    {"gl_FragData", PROGRAM_OUTPUT, FRAG_RESULT_DATA0}, /* array */
 243 };
 244
 245 ir_to_mesa_instruction *
 246 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
 247                                         enum prog_opcode op,
 248                                         ir_to_mesa_dst_reg dst,
 249                                         ir_to_mesa_src_reg src0,
 250                                         ir_to_mesa_src_reg src1,
 251                                         ir_to_mesa_src_reg src2)
 252 {
 253    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
 254
 255    inst->op = op;
 256    inst->dst_reg = dst;
 257    inst->src_reg[0] = src0;
 258    inst->src_reg[1] = src1;
 259    inst->src_reg[2] = src2;
 260    inst->ir = ir;
 261
 262    this->instructions.push_tail(inst);
 263
 264    return inst;
 265 }
 266
 267
 268 ir_to_mesa_instruction *
 269 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
 270                                         enum prog_opcode op,
 271                                         ir_to_mesa_dst_reg dst,
 272                                         ir_to_mesa_src_reg src0,
 273                                         ir_to_mesa_src_reg src1)
 274 {
 275    return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
 276 }
 277
 278 ir_to_mesa_instruction *
 279 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
 280                                         enum prog_opcode op,
 281                                         ir_to_mesa_dst_reg dst,
 282                                         ir_to_mesa_src_reg src0)
 283 {
 284    return ir_to_mesa_emit_op3(ir, op, dst,
 285                               src0, ir_to_mesa_undef, ir_to_mesa_undef);
 286 }
 287
 288 inline ir_to_mesa_dst_reg
 289 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
 290 {
 291    ir_to_mesa_dst_reg dst_reg;
 292
 293    dst_reg.file = reg.file;
 294    dst_reg.index = reg.index;
 295    dst_reg.writemask = WRITEMASK_XYZW;
 296
 297    return dst_reg;
 298 }
 299
 300 /**
 301  * Emits Mesa scalar opcodes to produce unique answers across channels.
 302  *
 303  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
 304  * channel determines the result across all channels.  So to do a vec4
 305  * of this operation, we want to emit a scalar per source channel used
 306  * to produce dest channels.
 307  */
 308 void
 309 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
 310                                                enum prog_opcode op,
 311                                                ir_to_mesa_dst_reg dst,
 312                                                ir_to_mesa_src_reg src0)
 313 {
 314    int i, j;
 315    int done_mask = ~dst.writemask;
 316
 317    /* Mesa RCP is a scalar operation splatting results to all channels,
 318     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
 319     * dst channels.
 320     */
 321    for (i = 0; i < 4; i++) {
 322       GLuint this_mask = (1 << i);
 323       ir_to_mesa_instruction *inst;
 324       ir_to_mesa_src_reg src = src0;
 325
 326       if (done_mask & this_mask)
 327          continue;
 328
 329       GLuint src_swiz = GET_SWZ(src.swizzle, i);
 330       for (j = i + 1; j < 4; j++) {
 331          if (!(done_mask & (1 << j)) && GET_SWZ(src.swizzle, j) == src_swiz) {
 332             this_mask |= (1 << j);
 333          }
 334       }
 335       src.swizzle = MAKE_SWIZZLE4(src_swiz, src_swiz,
 336                                   src_swiz, src_swiz);
 337
 338       inst = ir_to_mesa_emit_op1(ir, op,
 339                                  dst,
 340                                  src);
 341       inst->dst_reg.writemask = this_mask;
 342       done_mask |= this_mask;
 343    }
 344 }
 345
 346 struct ir_to_mesa_src_reg
 347 ir_to_mesa_visitor::src_reg_for_float(float val)
 348 {
 349    ir_to_mesa_src_reg src_reg;
 350
 351    src_reg.file = PROGRAM_CONSTANT;
 352    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
 353                                               &val, 1, &src_reg.swizzle);
 354
 355    return src_reg;
 356 }
 357
 358 /**
 359  * In the initial pass of codegen, we assign temporary numbers to
 360  * intermediate results.  (not SSA -- variable assignments will reuse
 361  * storage).  Actual register allocation for the Mesa VM occurs in a
 362  * pass over the Mesa IR later.
 363  */
 364 ir_to_mesa_src_reg
 365 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 366 {
 367    ir_to_mesa_src_reg src_reg;
 368    int swizzle[4];
 369    int i;
 370
 371    assert(!type->is_array());
 372
 373    src_reg.file = PROGRAM_TEMPORARY;
 374    src_reg.index = type->matrix_columns;
 375    src_reg.reladdr = false;
 376
 377    for (i = 0; i < type->vector_elements; i++)
 378       swizzle[i] = i;
 379    for (; i < 4; i++)
 380       swizzle[i] = type->vector_elements - 1;
 381    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
 382                                    swizzle[2], swizzle[3]);
 383
 384    return src_reg;
 385 }
 386
 387 static int
 388 type_size(const struct glsl_type *type)
 389 {
 390    unsigned int i;
 391    int size;
 392
 393    switch (type->base_type) {
 394    case GLSL_TYPE_UINT:
 395    case GLSL_TYPE_INT:
 396    case GLSL_TYPE_FLOAT:
 397    case GLSL_TYPE_BOOL:
 398       if (type->is_matrix()) {
 399          return 4; /* FINISHME: Not all matrices are 4x4. */
 400       } else {
 401          /* Regardless of size of vector, it gets a vec4. This is bad
 402           * packing for things like floats, but otherwise arrays become a
 403           * mess.  Hopefully a later pass over the code can pack scalars
 404           * down if appropriate.
 405           */
 406          return 1;
 407       }
 408    case GLSL_TYPE_ARRAY:
 409       return type_size(type->fields.array) * type->length;
 410    case GLSL_TYPE_STRUCT:
 411       size = 0;
 412       for (i = 0; i < type->length; i++) {
 413          size += type_size(type->fields.structure[i].type);
 414       }
 415       return size;
 416    default:
 417       assert(0);
 418    }
 419 }
 420
 421 temp_entry *
 422 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
 423 {
 424
 425    temp_entry *entry;
 426
 427    foreach_iter(exec_list_iterator, iter, this->variable_storage) {
 428       entry = (temp_entry *)iter.get();
 429
 430       if (entry->var == var)
 431          return entry;
 432    }
 433
 434    return NULL;
 435 }
 436
 437 void
 438 ir_to_mesa_visitor::visit(ir_variable *ir)
 439 {
 440    (void)ir;
 441 }
 442
 443 void
 444 ir_to_mesa_visitor::visit(ir_loop *ir)
 445 {
 446    assert(!ir->from);
 447    assert(!ir->to);
 448    assert(!ir->increment);
 449    assert(!ir->counter);
 450
 451    ir_to_mesa_emit_op1(NULL, OPCODE_BGNLOOP,
 452                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
 453
 454    visit_exec_list(&ir->body_instructions, this);
 455
 456    ir_to_mesa_emit_op1(NULL, OPCODE_ENDLOOP,
 457                        ir_to_mesa_undef_dst, ir_to_mesa_undef);
 458 }
 459
 460 void
 461 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
 462 {
 463    switch (ir->mode) {
 464    case ir_loop_jump::jump_break:
 465       ir_to_mesa_emit_op1(NULL, OPCODE_BRK,
 466                           ir_to_mesa_undef_dst, ir_to_mesa_undef);
 467       break;
 468    case ir_loop_jump::jump_continue:
 469       ir_to_mesa_emit_op1(NULL, OPCODE_CONT,
 470                           ir_to_mesa_undef_dst, ir_to_mesa_undef);
 471       break;
 472    }
 473 }
 474
 475
 476 void
 477 ir_to_mesa_visitor::visit(ir_function_signature *ir)
 478 {
 479    assert(0);
 480    (void)ir;
 481 }
 482
 483 void
 484 ir_to_mesa_visitor::visit(ir_function *ir)
 485 {
 486    /* Ignore function bodies other than main() -- we shouldn't see calls to
 487     * them since they should all be inlined before we get to ir_to_mesa.
 488     */
 489    if (strcmp(ir->name, "main") == 0) {
 490       const ir_function_signature *sig;
 491       exec_list empty;
 492
 493       sig = ir->matching_signature(&empty);
 494
 495       assert(sig);
 496
 497       foreach_iter(exec_list_iterator, iter, sig->body) {
 498          ir_instruction *ir = (ir_instruction *)iter.get();
 499
 500          ir->accept(this);
 501       }
 502    }
 503 }
 504
 505 void
 506 ir_to_mesa_visitor::visit(ir_expression *ir)
 507 {
 508    unsigned int operand;
 509    struct ir_to_mesa_src_reg op[2];
 510    struct ir_to_mesa_src_reg result_src;
 511    struct ir_to_mesa_dst_reg result_dst;
 512    const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
 513    const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
 514    const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
 515
 516    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 517       this->result.file = PROGRAM_UNDEFINED;
 518       ir->operands[operand]->accept(this);
 519       if (this->result.file == PROGRAM_UNDEFINED) {
 520          ir_print_visitor v;
 521          printf("Failed to get tree for expression operand:\n");
 522          ir->operands[operand]->accept(&v);
 523          exit(1);
 524       }
 525       op[operand] = this->result;
 526
 527       /* Only expression implemented for matrices yet */
 528       assert(!ir->operands[operand]->type->is_matrix() ||
 529              ir->operation == ir_binop_mul);
 530    }
 531
 532    this->result.file = PROGRAM_UNDEFINED;
 533
 534    /* Storage for our result.  Ideally for an assignment we'd be using
 535     * the actual storage for the result here, instead.
 536     */
 537    result_src = get_temp(ir->type);
 538    /* convenience for the emit functions below. */
 539    result_dst = ir_to_mesa_dst_reg_from_src(result_src);
 540    /* Limit writes to the channels that will be used by result_src later.
 541     * This does limit this temp's use as a temporary for multi-instruction
 542     * sequences.
 543     */
 544    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 545
 546    switch (ir->operation) {
 547    case ir_unop_logic_not:
 548       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
 549                           op[0], src_reg_for_float(0.0));
 550       break;
 551    case ir_unop_neg:
 552       op[0].negate = ~op[0].negate;
 553       result_src = op[0];
 554       break;
 555    case ir_unop_exp:
 556       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EXP, result_dst, op[0]);
 557       break;
 558    case ir_unop_exp2:
 559       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
 560       break;
 561    case ir_unop_log:
 562       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
 563       break;
 564    case ir_unop_log2:
 565       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
 566       break;
 567    case ir_unop_sin:
 568       ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
 569       break;
 570    case ir_unop_cos:
 571       ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
 572       break;
 573    case ir_binop_add:
 574       ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
 575       break;
 576    case ir_binop_sub:
 577       ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
 578       break;
 579    case ir_binop_mul:
 580       if (ir->operands[0]->type->is_matrix() &&
 581           !ir->operands[1]->type->is_matrix()) {
 582          if (ir->operands[1]->type->is_scalar()) {
 583             ir_to_mesa_dst_reg dst_column = result_dst;
 584             ir_to_mesa_src_reg src_column = op[0];
 585             for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
 586                ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 587                                    dst_column, src_column, op[1]);
 588                dst_column.index++;
 589                src_column.index++;
 590             }
 591          } else {
 592             ir_to_mesa_dst_reg dst_chan = result_dst;
 593             ir_to_mesa_src_reg src_column = op[0];
 594             ir_to_mesa_src_reg src_chan = op[1];
 595             for (int i = 0; i < ir->operands[0]->type->matrix_columns; i++) {
 596                dst_chan.writemask = (1 << i);
 597                src_chan.swizzle = MAKE_SWIZZLE4(i, i, i, i);
 598                ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 599                                    dst_chan, src_column, src_chan);
 600                src_column.index++;
 601             }
 602          }
 603       } else {
 604          assert(!ir->operands[0]->type->is_matrix());
 605          assert(!ir->operands[1]->type->is_matrix());
 606          ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
 607       }
 608       break;
 609    case ir_binop_div:
 610       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[1]);
 611       ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], result_src);
 612       break;
 613
 614    case ir_binop_less:
 615       ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
 616       break;
 617    case ir_binop_greater:
 618       ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
 619       break;
 620    case ir_binop_lequal:
 621       ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
 622       break;
 623    case ir_binop_gequal:
 624       ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
 625       break;
 626    case ir_binop_equal:
 627       ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
 628       break;
 629    case ir_binop_logic_xor:
 630    case ir_binop_nequal:
 631       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
 632       break;
 633
 634    case ir_binop_logic_or:
 635       /* This could be a saturated add and skip the SNE. */
 636       ir_to_mesa_emit_op2(ir, OPCODE_ADD,
 637                           result_dst,
 638                           op[0], op[1]);
 639
 640       ir_to_mesa_emit_op2(ir, OPCODE_SNE,
 641                           result_dst,
 642                           result_src, src_reg_for_float(0.0));
 643       break;
 644
 645    case ir_binop_logic_and:
 646       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
 647       ir_to_mesa_emit_op2(ir, OPCODE_MUL,
 648                           result_dst,
 649                           op[0], op[1]);
 650       break;
 651
 652    case ir_binop_dot:
 653       if (ir->operands[0]->type == vec4_type) {
 654          assert(ir->operands[1]->type == vec4_type);
 655          ir_to_mesa_emit_op2(ir, OPCODE_DP4,
 656                              result_dst,
 657                              op[0], op[1]);
 658       } else if (ir->operands[0]->type == vec3_type) {
 659          assert(ir->operands[1]->type == vec3_type);
 660          ir_to_mesa_emit_op2(ir, OPCODE_DP3,
 661                              result_dst,
 662                              op[0], op[1]);
 663       } else if (ir->operands[0]->type == vec2_type) {
 664          assert(ir->operands[1]->type == vec2_type);
 665          ir_to_mesa_emit_op2(ir, OPCODE_DP2,
 666                              result_dst,
 667                              op[0], op[1]);
 668       }
 669       break;
 670    case ir_unop_sqrt:
 671       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 672       ir_to_mesa_emit_op1(ir, OPCODE_RCP, result_dst, result_src);
 673       break;
 674    case ir_unop_rsq:
 675       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
 676       break;
 677    case ir_unop_i2f:
 678       /* Mesa IR lacks types, ints are stored as truncated floats. */
 679       result_src = op[0];
 680       break;
 681    case ir_unop_f2i:
 682       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 683       break;
 684    case ir_unop_f2b:
 685       ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
 686                           result_src, src_reg_for_float(0.0));
 687       break;
 688    case ir_unop_trunc:
 689       ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
 690       break;
 691    case ir_unop_ceil:
 692       op[0].negate = ~op[0].negate;
 693       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 694       result_src.negate = ~result_src.negate;
 695       break;
 696    case ir_unop_floor:
 697       ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
 698       break;
 699    case ir_binop_min:
 700       ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
 701       break;
 702    case ir_binop_max:
 703       ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
 704       break;
 705    default:
 706       ir_print_visitor v;
 707       printf("Failed to get tree for expression:\n");
 708       ir->accept(&v);
 709       exit(1);
 710       break;
 711    }
 712
 713    this->result = result_src;
 714 }
 715
 716
 717 void
 718 ir_to_mesa_visitor::visit(ir_swizzle *ir)
 719 {
 720    ir_to_mesa_src_reg src_reg;
 721    int i;
 722    int swizzle[4];
 723
 724    /* Note that this is only swizzles in expressions, not those on the left
 725     * hand side of an assignment, which do write masking.  See ir_assignment
 726     * for that.
 727     */
 728
 729    ir->val->accept(this);
 730    src_reg = this->result;
 731    assert(src_reg.file != PROGRAM_UNDEFINED);
 732
 733    for (i = 0; i < 4; i++) {
 734       if (i < ir->type->vector_elements) {
 735          switch (i) {
 736          case 0:
 737             swizzle[i] = ir->mask.x;
 738             break;
 739          case 1:
 740             swizzle[i] = ir->mask.y;
 741             break;
 742          case 2:
 743             swizzle[i] = ir->mask.z;
 744             break;
 745          case 3:
 746             swizzle[i] = ir->mask.w;
 747             break;
 748          }
 749       } else {
 750          /* If the type is smaller than a vec4, replicate the last
 751           * channel out.
 752           */
 753          swizzle[i] = ir->type->vector_elements - 1;
 754       }
 755    }
 756
 757    src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
 758                                    swizzle[1],
 759                                    swizzle[2],
 760                                    swizzle[3]);
 761
 762    this->result = src_reg;
 763 }
 764
 765 static temp_entry *
 766 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var)
 767 {
 768    /*
 769     * NOTE: The ARB_vertex_program extension specified that matrices get
 770     * loaded in registers in row-major order.  With GLSL, we want column-
 771     * major order.  So, we need to transpose all matrices here...
 772     */
 773    static const struct {
 774       const char *name;
 775       int matrix;
 776       int modifier;
 777    } matrices[] = {
 778       { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
 779       { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
 780       { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
 781       { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
 782
 783       { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
 784       { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
 785       { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
 786       { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
 787
 788       { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
 789       { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
 790       { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
 791       { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
 792
 793       { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
 794       { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
 795       { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
 796       { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
 797
 798       { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
 799
 800    };
 801    unsigned int i;
 802    temp_entry *entry;
 803
 804    /* C++ gets angry when we try to use an int as a gl_state_index, so we use
 805     * ints for gl_state_index.  Make sure they're compatible.
 806     */
 807    assert(sizeof(gl_state_index) == sizeof(int));
 808
 809    for (i = 0; i < Elements(matrices); i++) {
 810       if (strcmp(var->name, matrices[i].name) == 0) {
 811          int j;
 812          int last_pos = -1, base_pos = -1;
 813          int tokens[STATE_LENGTH];
 814
 815          tokens[0] = matrices[i].matrix;
 816          tokens[1] = 0; /* array index! */
 817          tokens[4] = matrices[i].modifier;
 818
 819          /* Add a ref for each column.  It looks like the reason we do
 820           * it this way is that _mesa_add_state_reference doesn't work
 821           * for things that aren't vec4s, so the tokens[2]/tokens[3]
 822           * range has to be equal.
 823           */
 824          for (j = 0; j < 4; j++) {
 825             tokens[2] = j;
 826             tokens[3] = j;
 827             int pos = _mesa_add_state_reference(prog->Parameters,
 828                                                 (gl_state_index *)tokens);
 829             assert(last_pos == -1 || last_pos == base_pos + j);
 830             if (base_pos == -1)
 831                base_pos = pos;
 832          }
 833
 834          entry = new(mem_ctx) temp_entry(var,
 835                                          PROGRAM_STATE_VAR,
 836                                          base_pos);
 837
 838          return entry;
 839       }
 840    }
 841
 842    return NULL;
 843 }
 844
 845 void
 846 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
 847 {
 848    ir_to_mesa_src_reg src_reg;
 849    temp_entry *entry = find_variable_storage(ir->var);
 850    unsigned int i, loc;
 851    bool var_in;
 852
 853    if (!entry) {
 854       switch (ir->var->mode) {
 855       case ir_var_uniform:
 856          entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var);
 857          if (entry)
 858             break;
 859
 860          /* FINISHME: Fix up uniform name for arrays and things */
 861          assert(ir->var->type->gl_type != 0 &&
 862                 ir->var->type->gl_type != GL_INVALID_ENUM);
 863          loc = _mesa_add_uniform(this->prog->Parameters,
 864                                  ir->var->name,
 865                                  type_size(ir->var->type) * 4,
 866                                  ir->var->type->gl_type,
 867                                  NULL);
 868          /* Always mark the uniform used at this point.  If it isn't
 869           * used, dead code elimination should have nuked the decl already.
 870           */
 871          this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
 872
 873          entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_UNIFORM, loc);
 874          this->variable_storage.push_tail(entry);
 875          break;
 876       case ir_var_in:
 877       case ir_var_out:
 878       case ir_var_inout:
 879          var_in = (ir->var->mode == ir_var_in ||
 880                    ir->var->mode == ir_var_inout);
 881
 882          for (i = 0; i < ARRAY_SIZE(builtin_var_to_mesa_reg); i++) {
 883             bool in = builtin_var_to_mesa_reg[i].file == PROGRAM_INPUT;
 884
 885             if (strcmp(ir->var->name, builtin_var_to_mesa_reg[i].name) == 0 &&
 886                 !(var_in ^ in))
 887                break;
 888          }
 889          if (i == ARRAY_SIZE(builtin_var_to_mesa_reg)) {
 890             printf("Failed to find builtin for %s variable %s\n",
 891                    var_in ? "in" : "out",
 892                    ir->var->name);
 893             abort();
 894          }
 895          entry = new(mem_ctx) temp_entry(ir->var,
 896                                          builtin_var_to_mesa_reg[i].file,
 897                                          builtin_var_to_mesa_reg[i].index);
 898          break;
 899       case ir_var_auto:
 900          entry = new(mem_ctx) temp_entry(ir->var, PROGRAM_TEMPORARY,
 901                                          this->next_temp);
 902          this->variable_storage.push_tail(entry);
 903
 904          next_temp += type_size(ir->var->type);
 905          break;
 906       }
 907
 908       if (!entry) {
 909          printf("Failed to make storage for %s\n", ir->var->name);
 910          exit(1);
 911       }
 912    }
 913
 914    src_reg.file = entry->file;
 915    src_reg.index = entry->index;
 916    /* If the type is smaller than a vec4, replicate the last channel out. */
 917    src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
 918    src_reg.reladdr = false;
 919    src_reg.negate = 0;
 920
 921    this->result = src_reg;
 922 }
 923
 924 void
 925 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
 926 {
 927    ir_constant *index;
 928    ir_to_mesa_src_reg src_reg;
 929
 930    index = ir->array_index->constant_expression_value();
 931
 932    /* By the time we make it to this stage, matrices should be broken down
 933     * to vectors.
 934     */
 935    assert(!ir->type->is_matrix());
 936
 937    ir->array->accept(this);
 938    src_reg = this->result;
 939
 940    if (src_reg.file == PROGRAM_INPUT ||
 941        src_reg.file == PROGRAM_OUTPUT) {
 942       assert(index); /* FINISHME: Handle variable indexing of builtins. */
 943
 944       src_reg.index += index->value.i[0];
 945    } else {
 946       if (index) {
 947          src_reg.index += index->value.i[0];
 948       } else {
 949          ir_to_mesa_src_reg array_base = this->result;
 950          /* Variable index array dereference.  It eats the "vec4" of the
 951           * base of the array and an index that offsets the Mesa register
 952           * index.
 953           */
 954          ir->array_index->accept(this);
 955
 956          /* FINISHME: This doesn't work when we're trying to do the LHS
 957           * of an assignment.
 958           */
 959          src_reg.reladdr = true;
 960          ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
 961                              this->result);
 962
 963          this->result = get_temp(ir->type);
 964          ir_to_mesa_emit_op1(ir, OPCODE_MOV,
 965                              ir_to_mesa_dst_reg_from_src(this->result),
 966                              src_reg);
 967       }
 968    }
 969
 970    /* If the type is smaller than a vec4, replicate the last channel out. */
 971    src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
 972
 973    this->result = src_reg;
 974 }
 975
 976 void
 977 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
 978 {
 979    unsigned int i;
 980    const glsl_type *struct_type = ir->record->type;
 981    int offset = 0;
 982
 983    ir->record->accept(this);
 984
 985    for (i = 0; i < struct_type->length; i++) {
 986       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
 987          break;
 988       offset += type_size(struct_type->fields.structure[i].type);
 989    }
 990    this->result.index += offset;
 991 }
 992
 993 /**
 994  * We want to be careful in assignment setup to hit the actual storage
 995  * instead of potentially using a temporary like we might with the
 996  * ir_dereference handler.
 997  *
 998  * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
 999  * should only see potentially one variable array index of a vector,
1000  * and one swizzle, before getting to actual vec4 storage.  So handle
1001  * those, then go use ir_dereference to handle the rest.
1002  */
1003 static struct ir_to_mesa_dst_reg
1004 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v)
1005 {
1006    struct ir_to_mesa_dst_reg dst_reg;
1007    ir_dereference *deref;
1008    ir_swizzle *swiz;
1009
1010    /* Use the rvalue deref handler for the most part.  We'll ignore
1011     * swizzles in it and write swizzles using writemask, though.
1012     */
1013    ir->accept(v);
1014    dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1015
1016    if ((deref = ir->as_dereference())) {
1017       ir_dereference_array *deref_array = ir->as_dereference_array();
1018       assert(!deref_array || deref_array->array->type->is_array());
1019
1020       ir->accept(v);
1021    } else if ((swiz = ir->as_swizzle())) {
1022       dst_reg.writemask = 0;
1023       if (swiz->mask.num_components >= 1)
1024          dst_reg.writemask |= (1 << swiz->mask.x);
1025       if (swiz->mask.num_components >= 2)
1026          dst_reg.writemask |= (1 << swiz->mask.y);
1027       if (swiz->mask.num_components >= 3)
1028          dst_reg.writemask |= (1 << swiz->mask.z);
1029       if (swiz->mask.num_components >= 4)
1030          dst_reg.writemask |= (1 << swiz->mask.w);
1031    }
1032
1033    return dst_reg;
1034 }
1035
1036 void
1037 ir_to_mesa_visitor::visit(ir_assignment *ir)
1038 {
1039    struct ir_to_mesa_dst_reg l;
1040    struct ir_to_mesa_src_reg r;
1041
1042    assert(!ir->lhs->type->is_matrix());
1043    assert(!ir->lhs->type->is_array());
1044    assert(ir->lhs->type->base_type != GLSL_TYPE_STRUCT);
1045
1046    l = get_assignment_lhs(ir->lhs, this);
1047
1048    ir->rhs->accept(this);
1049    r = this->result;
1050    assert(l.file != PROGRAM_UNDEFINED);
1051    assert(r.file != PROGRAM_UNDEFINED);
1052
1053    if (ir->condition) {
1054          ir_constant *condition_constant;
1055
1056          condition_constant = ir->condition->constant_expression_value();
1057
1058          assert(condition_constant && condition_constant->value.b[0]);
1059    }
1060
1061    ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1062 }
1063
1064
1065 void
1066 ir_to_mesa_visitor::visit(ir_constant *ir)
1067 {
1068    ir_to_mesa_src_reg src_reg;
1069    GLfloat stack_vals[4];
1070    GLfloat *values = stack_vals;
1071    unsigned int i;
1072
1073    if (ir->type->is_matrix() || ir->type->is_array()) {
1074       assert(!"FINISHME: array/matrix constants");
1075    }
1076
1077    src_reg.file = PROGRAM_CONSTANT;
1078    switch (ir->type->base_type) {
1079    case GLSL_TYPE_FLOAT:
1080       values = &ir->value.f[0];
1081       break;
1082    case GLSL_TYPE_UINT:
1083       for (i = 0; i < ir->type->vector_elements; i++) {
1084          values[i] = ir->value.u[i];
1085       }
1086       break;
1087    case GLSL_TYPE_INT:
1088       for (i = 0; i < ir->type->vector_elements; i++) {
1089          values[i] = ir->value.i[i];
1090       }
1091       break;
1092    case GLSL_TYPE_BOOL:
1093       for (i = 0; i < ir->type->vector_elements; i++) {
1094          values[i] = ir->value.b[i];
1095       }
1096       break;
1097    default:
1098       assert(!"Non-float/uint/int/bool constant");
1099    }
1100
1101    src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1102                                               values, ir->type->vector_elements,
1103                                               &src_reg.swizzle);
1104    src_reg.reladdr = false;
1105    src_reg.negate = 0;
1106
1107    this->result = src_reg;
1108 }
1109
1110
1111 void
1112 ir_to_mesa_visitor::visit(ir_call *ir)
1113 {
1114    printf("Can't support call to %s\n", ir->callee_name());
1115    exit(1);
1116 }
1117
1118
1119 void
1120 ir_to_mesa_visitor::visit(ir_texture *ir)
1121 {
1122    assert(0);
1123
1124    ir->coordinate->accept(this);
1125 }
1126
1127 void
1128 ir_to_mesa_visitor::visit(ir_return *ir)
1129 {
1130    assert(0);
1131
1132    ir->get_value()->accept(this);
1133 }
1134
1135
1136 void
1137 ir_to_mesa_visitor::visit(ir_if *ir)
1138 {
1139    ir_to_mesa_instruction *if_inst, *else_inst = NULL;
1140
1141    ir->condition->accept(this);
1142    assert(this->result.file != PROGRAM_UNDEFINED);
1143
1144    if_inst = ir_to_mesa_emit_op1(ir->condition,
1145                                  OPCODE_IF, ir_to_mesa_undef_dst,
1146                                  this->result);
1147
1148    this->instructions.push_tail(if_inst);
1149
1150    visit_exec_list(&ir->then_instructions, this);
1151
1152    if (!ir->else_instructions.is_empty()) {
1153       else_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ELSE,
1154                                       ir_to_mesa_undef_dst,
1155                                       ir_to_mesa_undef);
1156       visit_exec_list(&ir->then_instructions, this);
1157    }
1158
1159    if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
1160                                  ir_to_mesa_undef_dst, ir_to_mesa_undef);
1161 }
1162
1163 ir_to_mesa_visitor::ir_to_mesa_visitor()
1164 {
1165    result.file = PROGRAM_UNDEFINED;
1166    next_temp = 1;
1167 }
1168
1169 static struct prog_src_register
1170 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
1171 {
1172    struct prog_src_register mesa_reg;
1173
1174    mesa_reg.File = reg.file;
1175    assert(reg.index < (1 << INST_INDEX_BITS) - 1);
1176    mesa_reg.Index = reg.index;
1177    mesa_reg.Swizzle = reg.swizzle;
1178    mesa_reg.RelAddr = reg.reladdr;
1179
1180    return mesa_reg;
1181 }
1182
1183 static void
1184 set_branchtargets(struct prog_instruction *mesa_instructions,
1185                   int num_instructions)
1186 {
1187    int if_count = 0, loop_count;
1188    int *if_stack, *loop_stack;
1189    int if_stack_pos = 0, loop_stack_pos = 0;
1190    int i, j;
1191
1192    for (i = 0; i < num_instructions; i++) {
1193       switch (mesa_instructions[i].Opcode) {
1194       case OPCODE_IF:
1195          if_count++;
1196          break;
1197       case OPCODE_BGNLOOP:
1198          loop_count++;
1199          break;
1200       case OPCODE_BRK:
1201       case OPCODE_CONT:
1202          mesa_instructions[i].BranchTarget = -1;
1203          break;
1204       default:
1205          break;
1206       }
1207    }
1208
1209    if_stack = (int *)calloc(if_count, sizeof(*if_stack));
1210    loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
1211
1212    for (i = 0; i < num_instructions; i++) {
1213       switch (mesa_instructions[i].Opcode) {
1214       case OPCODE_IF:
1215          if_stack[if_stack_pos] = i;
1216          if_stack_pos++;
1217          break;
1218       case OPCODE_ELSE:
1219          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1220          if_stack[if_stack_pos - 1] = i;
1221          break;
1222       case OPCODE_ENDIF:
1223          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1224          if_stack_pos--;
1225          break;
1226       case OPCODE_BGNLOOP:
1227          loop_stack[loop_stack_pos] = i;
1228          loop_stack_pos++;
1229          break;
1230       case OPCODE_ENDLOOP:
1231          loop_stack_pos--;
1232          /* Rewrite any breaks/conts at this nesting level (haven't
1233           * already had a BranchTarget assigned) to point to the end
1234           * of the loop.
1235           */
1236          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
1237             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
1238                 mesa_instructions[j].Opcode == OPCODE_CONT) {
1239                if (mesa_instructions[j].BranchTarget == -1) {
1240                   mesa_instructions[j].BranchTarget = i;
1241                }
1242             }
1243          }
1244          /* The loop ends point at each other. */
1245          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
1246          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
1247       default:
1248          break;
1249       }
1250    }
1251
1252    free(if_stack);
1253 }
1254
1255 static void
1256 print_program(struct prog_instruction *mesa_instructions,
1257               ir_instruction **mesa_instruction_annotation,
1258               int num_instructions)
1259 {
1260    ir_instruction *last_ir = NULL;
1261    int i;
1262
1263    for (i = 0; i < num_instructions; i++) {
1264       struct prog_instruction *mesa_inst = mesa_instructions + i;
1265       ir_instruction *ir = mesa_instruction_annotation[i];
1266
1267       if (last_ir != ir && ir) {
1268          ir_print_visitor print;
1269          ir->accept(&print);
1270          printf("\n");
1271          last_ir = ir;
1272       }
1273
1274       _mesa_print_instruction(mesa_inst);
1275    }
1276 }
1277
1278 static void
1279 count_resources(struct gl_program *prog)
1280 {
1281    prog->InputsRead = 0;
1282    prog->OutputsWritten = 0;
1283    unsigned int i;
1284
1285    for (i = 0; i < prog->NumInstructions; i++) {
1286       struct prog_instruction *inst = &prog->Instructions[i];
1287       unsigned int reg;
1288
1289       switch (inst->DstReg.File) {
1290       case PROGRAM_OUTPUT:
1291          prog->OutputsWritten |= BITFIELD64_BIT(inst->DstReg.Index);
1292          break;
1293       case PROGRAM_INPUT:
1294          prog->InputsRead |= BITFIELD64_BIT(inst->DstReg.Index);
1295          break;
1296       default:
1297          break;
1298       }
1299
1300       for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
1301          switch (inst->SrcReg[reg].File) {
1302          case PROGRAM_OUTPUT:
1303             prog->OutputsWritten |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1304             break;
1305          case PROGRAM_INPUT:
1306             prog->InputsRead |= BITFIELD64_BIT(inst->SrcReg[reg].Index);
1307             break;
1308          default:
1309             break;
1310          }
1311       }
1312    }
1313 }
1314
1315 /* Each stage has some uniforms in its Parameters list.  The Uniforms
1316  * list for the linked shader program has a pointer to these uniforms
1317  * in each of the stage's Parameters list, so that their values can be
1318  * updated when a uniform is set.
1319  */
1320 static void
1321 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
1322                                      struct gl_program *prog)
1323 {
1324    unsigned int i;
1325
1326    for (i = 0; i < prog->Parameters->NumParameters; i++) {
1327       const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
1328
1329       if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
1330          struct gl_uniform *uniform =
1331             _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
1332          if (uniform)
1333             uniform->Initialized = p->Initialized;
1334       }
1335    }
1336 }
1337
1338 struct gl_program *
1339 get_mesa_program(GLcontext *ctx, void *mem_ctx, struct glsl_shader *shader)
1340 {
1341    ir_to_mesa_visitor v;
1342    struct prog_instruction *mesa_instructions, *mesa_inst;
1343    ir_instruction **mesa_instruction_annotation;
1344    int i;
1345    exec_list *instructions = &shader->ir;
1346    struct gl_program *prog;
1347    GLenum target;
1348
1349    switch (shader->Type) {
1350    case GL_VERTEX_SHADER:   target = GL_VERTEX_PROGRAM_ARB; break;
1351    case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; break;
1352    default: assert(!"should not be reached"); break;
1353    }
1354
1355    prog = ctx->Driver.NewProgram(ctx, target, 1);
1356    if (!prog)
1357       return NULL;
1358    prog->Parameters = _mesa_new_parameter_list();
1359    prog->Varying = _mesa_new_parameter_list();
1360    prog->Attributes = _mesa_new_parameter_list();
1361    v.ctx = ctx;
1362    v.prog = prog;
1363
1364    v.mem_ctx = talloc_new(NULL);
1365    visit_exec_list(instructions, &v);
1366    v.ir_to_mesa_emit_op1(NULL, OPCODE_END,
1367                          ir_to_mesa_undef_dst, ir_to_mesa_undef);
1368
1369    prog->NumTemporaries = v.next_temp;
1370
1371    int num_instructions = 0;
1372    foreach_iter(exec_list_iterator, iter, v.instructions) {
1373       num_instructions++;
1374    }
1375
1376    mesa_instructions =
1377       (struct prog_instruction *)calloc(num_instructions,
1378                                         sizeof(*mesa_instructions));
1379    mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
1380                                               num_instructions);
1381
1382    mesa_inst = mesa_instructions;
1383    i = 0;
1384    foreach_iter(exec_list_iterator, iter, v.instructions) {
1385       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
1386
1387       mesa_inst->Opcode = inst->op;
1388       mesa_inst->DstReg.File = inst->dst_reg.file;
1389       mesa_inst->DstReg.Index = inst->dst_reg.index;
1390       mesa_inst->DstReg.CondMask = COND_TR;
1391       mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
1392       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
1393       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
1394       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
1395       mesa_instruction_annotation[i] = inst->ir;
1396
1397       mesa_inst++;
1398       i++;
1399    }
1400
1401    set_branchtargets(mesa_instructions, num_instructions);
1402    if (0) {
1403       print_program(mesa_instructions, mesa_instruction_annotation,
1404                     num_instructions);
1405    }
1406
1407    prog->Instructions = mesa_instructions;
1408    prog->NumInstructions = num_instructions;
1409
1410    _mesa_reference_program(ctx, &shader->mesa_shader->Program, prog);
1411
1412    return prog;
1413 }
1414
1415 /* Takes a Mesa gl shader structure and compiles it, returning our Mesa-like
1416  * structure with the IR and such attached.
1417  */
1418 static struct glsl_shader *
1419 _mesa_get_glsl_shader(GLcontext *ctx, void *mem_ctx, struct gl_shader *sh)
1420 {
1421    struct glsl_shader *shader = talloc_zero(mem_ctx, struct glsl_shader);
1422    struct _mesa_glsl_parse_state *state;
1423
1424    shader->Type = sh->Type;
1425    shader->Name = sh->Name;
1426    shader->RefCount = 1;
1427    shader->Source = sh->Source;
1428    shader->SourceLen = strlen(sh->Source);
1429    shader->mesa_shader = sh;
1430
1431    state = talloc_zero(shader, struct _mesa_glsl_parse_state);
1432    switch (shader->Type) {
1433    case GL_VERTEX_SHADER:   state->target = vertex_shader; break;
1434    case GL_FRAGMENT_SHADER: state->target = fragment_shader; break;
1435    case GL_GEOMETRY_SHADER: state->target = geometry_shader; break;
1436    }
1437
1438    state->scanner = NULL;
1439    state->translation_unit.make_empty();
1440    state->symbols = new(mem_ctx) glsl_symbol_table;
1441    state->info_log = talloc_strdup(shader, "");
1442    state->error = false;
1443    state->temp_index = 0;
1444    state->loop_or_switch_nesting = NULL;
1445    state->ARB_texture_rectangle_enable = true;
1446
1447    _mesa_glsl_lexer_ctor(state, shader->Source);
1448    _mesa_glsl_parse(state);
1449    _mesa_glsl_lexer_dtor(state);
1450
1451    shader->ir.make_empty();
1452    if (!state->error && !state->translation_unit.is_empty())
1453       _mesa_ast_to_hir(&shader->ir, state);
1454
1455    /* Optimization passes */
1456    if (!state->error && !shader->ir.is_empty()) {
1457       bool progress;
1458       do {
1459          progress = false;
1460
1461          progress = do_function_inlining(&shader->ir) || progress;
1462          progress = do_if_simplification(&shader->ir) || progress;
1463          progress = do_copy_propagation(&shader->ir) || progress;
1464          progress = do_dead_code_local(&shader->ir) || progress;
1465          progress = do_dead_code_unlinked(state, &shader->ir) || progress;
1466          progress = do_constant_variable_unlinked(&shader->ir) || progress;
1467          progress = do_constant_folding(&shader->ir) || progress;
1468          progress = do_vec_index_to_swizzle(&shader->ir) || progress;
1469          progress = do_swizzle_swizzle(&shader->ir) || progress;
1470       } while (progress);
1471    }
1472
1473    shader->symbols = state->symbols;
1474
1475    shader->CompileStatus = !state->error;
1476    shader->InfoLog = state->info_log;
1477
1478    talloc_free(state);
1479
1480    return shader;
1481 }
1482
1483 extern "C" {
1484
1485 void
1486 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *sh)
1487 {
1488    struct glsl_shader *shader;
1489    TALLOC_CTX *mem_ctx = talloc_new(NULL);
1490
1491    shader = _mesa_get_glsl_shader(ctx, mem_ctx, sh);
1492
1493    sh->CompileStatus = shader->CompileStatus;
1494    sh->InfoLog = strdup(shader->InfoLog);
1495    talloc_free(mem_ctx);
1496  }
1497
1498 void
1499 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
1500 {
1501    struct glsl_program *whole_program;
1502    unsigned int i;
1503
1504    _mesa_clear_shader_program_data(ctx, prog);
1505
1506    whole_program = talloc_zero(NULL, struct glsl_program);
1507    whole_program->LinkStatus = GL_TRUE;
1508    whole_program->NumShaders = prog->NumShaders;
1509    whole_program->Shaders = talloc_array(whole_program, struct glsl_shader *,
1510                                          prog->NumShaders);
1511
1512    for (i = 0; i < prog->NumShaders; i++) {
1513       whole_program->Shaders[i] = _mesa_get_glsl_shader(ctx, whole_program,
1514                                                         prog->Shaders[i]);
1515       if (!whole_program->Shaders[i]->CompileStatus) {
1516          whole_program->InfoLog =
1517             talloc_asprintf_append(whole_program->InfoLog,
1518                                    "linking with uncompiled shader");
1519          whole_program->LinkStatus = GL_FALSE;
1520       }
1521    }
1522
1523    prog->Uniforms = _mesa_new_uniform_list();
1524    prog->Varying = _mesa_new_parameter_list();
1525    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
1526    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
1527
1528    if (whole_program->LinkStatus)
1529       link_shaders(whole_program);
1530
1531    prog->LinkStatus = whole_program->LinkStatus;
1532
1533    /* FINISHME: This should use the linker-generated code */
1534    if (prog->LinkStatus) {
1535       for (i = 0; i < prog->NumShaders; i++) {
1536          struct gl_program *linked_prog;
1537
1538          linked_prog = get_mesa_program(ctx, whole_program,
1539                                         whole_program->Shaders[i]);
1540          count_resources(linked_prog);
1541
1542          link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
1543
1544          switch (whole_program->Shaders[i]->Type) {
1545          case GL_VERTEX_SHADER:
1546             _mesa_reference_vertprog(ctx, &prog->VertexProgram,
1547                                      (struct gl_vertex_program *)linked_prog);
1548             break;
1549          case GL_FRAGMENT_SHADER:
1550             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
1551                                      (struct gl_fragment_program *)linked_prog);
1552             break;
1553          }
1554       }
1555    }
1556
1557    talloc_free(whole_program);
1558 }
1559
1560 } /* extern "C" */