src/mesa/drivers/dri/i965/brw_fs.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 extern "C" {
  29
  30 #include <sys/types.h>
  31
  32 #include "main/macros.h"
  33 #include "main/shaderobj.h"
  34 #include "main/uniforms.h"
  35 #include "program/prog_parameter.h"
  36 #include "program/prog_print.h"
  37 #include "program/prog_optimize.h"
  38 #include "program/register_allocate.h"
  39 #include "program/sampler.h"
  40 #include "program/hash_table.h"
  41 #include "brw_context.h"
  42 #include "brw_eu.h"
  43 #include "brw_wm.h"
  44 #include "talloc.h"
  45 }
  46 #include "brw_fs.h"
  47 #include "../glsl/glsl_types.h"
  48 #include "../glsl/ir_optimization.h"
  49 #include "../glsl/ir_print_visitor.h"
  50
  51 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
  52
  53 struct gl_shader *
  54 brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
  55 {
  56    struct brw_shader *shader;
  57
  58    shader = talloc_zero(NULL, struct brw_shader);
  59    if (shader) {
  60       shader->base.Type = type;
  61       shader->base.Name = name;
  62       _mesa_init_shader(ctx, &shader->base);
  63    }
  64
  65    return &shader->base;
  66 }
  67
  68 struct gl_shader_program *
  69 brw_new_shader_program(struct gl_context *ctx, GLuint name)
  70 {
  71    struct brw_shader_program *prog;
  72    prog = talloc_zero(NULL, struct brw_shader_program);
  73    if (prog) {
  74       prog->base.Name = name;
  75       _mesa_init_shader_program(ctx, &prog->base);
  76    }
  77    return &prog->base;
  78 }
  79
  80 GLboolean
  81 brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
  82 {
  83    if (!_mesa_ir_compile_shader(ctx, shader))
  84       return GL_FALSE;
  85
  86    return GL_TRUE;
  87 }
  88
  89 GLboolean
  90 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  91 {
  92    struct intel_context *intel = intel_context(ctx);
  93
  94    struct brw_shader *shader =
  95       (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  96    if (shader != NULL) {
  97       void *mem_ctx = talloc_new(NULL);
  98       bool progress;
  99
 100       if (shader->ir)
 101          talloc_free(shader->ir);
 102       shader->ir = new(shader) exec_list;
 103       clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 104
 105       do_mat_op_to_vec(shader->ir);
 106       do_mod_to_fract(shader->ir);
 107       do_div_to_mul_rcp(shader->ir);
 108       do_sub_to_add_neg(shader->ir);
 109       do_explog_to_explog2(shader->ir);
 110       do_lower_texture_projection(shader->ir);
 111       brw_do_cubemap_normalize(shader->ir);
 112
 113       do {
 114          progress = false;
 115
 116          brw_do_channel_expressions(shader->ir);
 117          brw_do_vector_splitting(shader->ir);
 118
 119          progress = do_lower_jumps(shader->ir, true, true,
 120                                    true, /* main return */
 121                                    false, /* continue */
 122                                    false /* loops */
 123                                    ) || progress;
 124
 125          progress = do_common_optimization(shader->ir, true, 32) || progress;
 126
 127          progress = lower_noise(shader->ir) || progress;
 128          progress =
 129             lower_variable_index_to_cond_assign(shader->ir,
 130                                                 GL_TRUE, /* input */
 131                                                 GL_TRUE, /* output */
 132                                                 GL_TRUE, /* temp */
 133                                                 GL_TRUE /* uniform */
 134                                                 ) || progress;
 135          if (intel->gen == 6) {
 136             progress = do_if_to_cond_assign(shader->ir) || progress;
 137          }
 138       } while (progress);
 139
 140       validate_ir_tree(shader->ir);
 141
 142       reparent_ir(shader->ir, shader->ir);
 143       talloc_free(mem_ctx);
 144    }
 145
 146    if (!_mesa_ir_link_shader(ctx, prog))
 147       return GL_FALSE;
 148
 149    return GL_TRUE;
 150 }
 151
 152 static int
 153 type_size(const struct glsl_type *type)
 154 {
 155    unsigned int size, i;
 156
 157    switch (type->base_type) {
 158    case GLSL_TYPE_UINT:
 159    case GLSL_TYPE_INT:
 160    case GLSL_TYPE_FLOAT:
 161    case GLSL_TYPE_BOOL:
 162       return type->components();
 163    case GLSL_TYPE_ARRAY:
 164       return type_size(type->fields.array) * type->length;
 165    case GLSL_TYPE_STRUCT:
 166       size = 0;
 167       for (i = 0; i < type->length; i++) {
 168          size += type_size(type->fields.structure[i].type);
 169       }
 170       return size;
 171    case GLSL_TYPE_SAMPLER:
 172       /* Samplers take up no register space, since they're baked in at
 173        * link time.
 174        */
 175       return 0;
 176    default:
 177       assert(!"not reached");
 178       return 0;
 179    }
 180 }
 181
 182 int
 183 fs_visitor::virtual_grf_alloc(int size)
 184 {
 185    if (virtual_grf_array_size <= virtual_grf_next) {
 186       if (virtual_grf_array_size == 0)
 187          virtual_grf_array_size = 16;
 188       else
 189          virtual_grf_array_size *= 2;
 190       virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
 191                                          int, virtual_grf_array_size);
 192
 193       /* This slot is always unused. */
 194       virtual_grf_sizes[0] = 0;
 195    }
 196    virtual_grf_sizes[virtual_grf_next] = size;
 197    return virtual_grf_next++;
 198 }
 199
 200 /** Fixed HW reg constructor. */
 201 fs_reg::fs_reg(enum register_file file, int hw_reg)
 202 {
 203    init();
 204    this->file = file;
 205    this->hw_reg = hw_reg;
 206    this->type = BRW_REGISTER_TYPE_F;
 207 }
 208
 209 /** Fixed HW reg constructor. */
 210 fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
 211 {
 212    init();
 213    this->file = file;
 214    this->hw_reg = hw_reg;
 215    this->type = type;
 216 }
 217
 218 int
 219 brw_type_for_base_type(const struct glsl_type *type)
 220 {
 221    switch (type->base_type) {
 222    case GLSL_TYPE_FLOAT:
 223       return BRW_REGISTER_TYPE_F;
 224    case GLSL_TYPE_INT:
 225    case GLSL_TYPE_BOOL:
 226       return BRW_REGISTER_TYPE_D;
 227    case GLSL_TYPE_UINT:
 228       return BRW_REGISTER_TYPE_UD;
 229    case GLSL_TYPE_ARRAY:
 230    case GLSL_TYPE_STRUCT:
 231       /* These should be overridden with the type of the member when
 232        * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
 233        * way to trip up if we don't.
 234        */
 235       return BRW_REGISTER_TYPE_UD;
 236    default:
 237       assert(!"not reached");
 238       return BRW_REGISTER_TYPE_F;
 239    }
 240 }
 241
 242 /** Automatic reg constructor. */
 243 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
 244 {
 245    init();
 246
 247    this->file = GRF;
 248    this->reg = v->virtual_grf_alloc(type_size(type));
 249    this->reg_offset = 0;
 250    this->type = brw_type_for_base_type(type);
 251 }
 252
 253 fs_reg *
 254 fs_visitor::variable_storage(ir_variable *var)
 255 {
 256    return (fs_reg *)hash_table_find(this->variable_ht, var);
 257 }
 258
 259 /* Our support for uniforms is piggy-backed on the struct
 260  * gl_fragment_program, because that's where the values actually
 261  * get stored, rather than in some global gl_shader_program uniform
 262  * store.
 263  */
 264 int
 265 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 266 {
 267    unsigned int offset = 0;
 268    float *vec_values;
 269
 270    if (type->is_matrix()) {
 271       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 272                                                         type->vector_elements,
 273                                                         1);
 274
 275       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 276          offset += setup_uniform_values(loc + offset, column);
 277       }
 278
 279       return offset;
 280    }
 281
 282    switch (type->base_type) {
 283    case GLSL_TYPE_FLOAT:
 284    case GLSL_TYPE_UINT:
 285    case GLSL_TYPE_INT:
 286    case GLSL_TYPE_BOOL:
 287       vec_values = fp->Base.Parameters->ParameterValues[loc];
 288       for (unsigned int i = 0; i < type->vector_elements; i++) {
 289          c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
 290       }
 291       return 1;
 292
 293    case GLSL_TYPE_STRUCT:
 294       for (unsigned int i = 0; i < type->length; i++) {
 295          offset += setup_uniform_values(loc + offset,
 296                                         type->fields.structure[i].type);
 297       }
 298       return offset;
 299
 300    case GLSL_TYPE_ARRAY:
 301       for (unsigned int i = 0; i < type->length; i++) {
 302          offset += setup_uniform_values(loc + offset, type->fields.array);
 303       }
 304       return offset;
 305
 306    case GLSL_TYPE_SAMPLER:
 307       /* The sampler takes up a slot, but we don't use any values from it. */
 308       return 1;
 309
 310    default:
 311       assert(!"not reached");
 312       return 0;
 313    }
 314 }
 315
 316
 317 /* Our support for builtin uniforms is even scarier than non-builtin.
 318  * It sits on top of the PROG_STATE_VAR parameters that are
 319  * automatically updated from GL context state.
 320  */
 321 void
 322 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
 323 {
 324    const struct gl_builtin_uniform_desc *statevar = NULL;
 325
 326    for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
 327       statevar = &_mesa_builtin_uniform_desc[i];
 328       if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
 329          break;
 330    }
 331
 332    if (!statevar->name) {
 333       this->fail = true;
 334       printf("Failed to find builtin uniform `%s'\n", ir->name);
 335       return;
 336    }
 337
 338    int array_count;
 339    if (ir->type->is_array()) {
 340       array_count = ir->type->length;
 341    } else {
 342       array_count = 1;
 343    }
 344
 345    for (int a = 0; a < array_count; a++) {
 346       for (unsigned int i = 0; i < statevar->num_elements; i++) {
 347          struct gl_builtin_uniform_element *element = &statevar->elements[i];
 348          int tokens[STATE_LENGTH];
 349
 350          memcpy(tokens, element->tokens, sizeof(element->tokens));
 351          if (ir->type->is_array()) {
 352             tokens[1] = a;
 353          }
 354
 355          /* This state reference has already been setup by ir_to_mesa,
 356           * but we'll get the same index back here.
 357           */
 358          int index = _mesa_add_state_reference(this->fp->Base.Parameters,
 359                                                (gl_state_index *)tokens);
 360          float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
 361
 362          /* Add each of the unique swizzles of the element as a
 363           * parameter.  This'll end up matching the expected layout of
 364           * the array/matrix/structure we're trying to fill in.
 365           */
 366          int last_swiz = -1;
 367          for (unsigned int i = 0; i < 4; i++) {
 368             int swiz = GET_SWZ(element->swizzle, i);
 369             if (swiz == last_swiz)
 370                break;
 371             last_swiz = swiz;
 372
 373             c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
 374          }
 375       }
 376    }
 377 }
 378
 379 fs_reg *
 380 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
 381 {
 382    fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
 383    fs_reg wpos = *reg;
 384    fs_reg neg_y = this->pixel_y;
 385    neg_y.negate = true;
 386
 387    /* gl_FragCoord.x */
 388    if (ir->pixel_center_integer) {
 389       emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
 390    } else {
 391       emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
 392    }
 393    wpos.reg_offset++;
 394
 395    /* gl_FragCoord.y */
 396    if (ir->origin_upper_left && ir->pixel_center_integer) {
 397       emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
 398    } else {
 399       fs_reg pixel_y = this->pixel_y;
 400       float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
 401
 402       if (!ir->origin_upper_left) {
 403          pixel_y.negate = true;
 404          offset += c->key.drawable_height - 1.0;
 405       }
 406
 407       emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
 408    }
 409    wpos.reg_offset++;
 410
 411    /* gl_FragCoord.z */
 412    emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
 413                 interp_reg(FRAG_ATTRIB_WPOS, 2)));
 414    wpos.reg_offset++;
 415
 416    /* gl_FragCoord.w: Already set up in emit_interpolation */
 417    emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
 418
 419    return reg;
 420 }
 421
 422 fs_reg *
 423 fs_visitor::emit_general_interpolation(ir_variable *ir)
 424 {
 425    fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
 426    /* Interpolation is always in floating point regs. */
 427    reg->type = BRW_REGISTER_TYPE_F;
 428    fs_reg attr = *reg;
 429
 430    unsigned int array_elements;
 431    const glsl_type *type;
 432
 433    if (ir->type->is_array()) {
 434       array_elements = ir->type->length;
 435       if (array_elements == 0) {
 436          this->fail = true;
 437       }
 438       type = ir->type->fields.array;
 439    } else {
 440       array_elements = 1;
 441       type = ir->type;
 442    }
 443
 444    int location = ir->location;
 445    for (unsigned int i = 0; i < array_elements; i++) {
 446       for (unsigned int j = 0; j < type->matrix_columns; j++) {
 447          if (urb_setup[location] == -1) {
 448             /* If there's no incoming setup data for this slot, don't
 449              * emit interpolation for it.
 450              */
 451             attr.reg_offset += type->vector_elements;
 452             location++;
 453             continue;
 454          }
 455
 456          for (unsigned int c = 0; c < type->vector_elements; c++) {
 457             struct brw_reg interp = interp_reg(location, c);
 458             emit(fs_inst(FS_OPCODE_LINTERP,
 459                          attr,
 460                          this->delta_x,
 461                          this->delta_y,
 462                          fs_reg(interp)));
 463             attr.reg_offset++;
 464          }
 465
 466          if (intel->gen < 6) {
 467             attr.reg_offset -= type->vector_elements;
 468             for (unsigned int c = 0; c < type->vector_elements; c++) {
 469                emit(fs_inst(BRW_OPCODE_MUL,
 470                             attr,
 471                             attr,
 472                             this->pixel_w));
 473                attr.reg_offset++;
 474             }
 475          }
 476          location++;
 477       }
 478    }
 479
 480    return reg;
 481 }
 482
 483 fs_reg *
 484 fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
 485 {
 486    fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
 487
 488    /* The frontfacing comes in as a bit in the thread payload. */
 489    if (intel->gen >= 6) {
 490       emit(fs_inst(BRW_OPCODE_ASR,
 491                    *reg,
 492                    fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
 493                    fs_reg(15)));
 494       emit(fs_inst(BRW_OPCODE_NOT,
 495                    *reg,
 496                    *reg));
 497       emit(fs_inst(BRW_OPCODE_AND,
 498                    *reg,
 499                    *reg,
 500                    fs_reg(1)));
 501    } else {
 502       struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 503       /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 504        * us front face
 505        */
 506       fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
 507                                    *reg,
 508                                    fs_reg(r1_6ud),
 509                                    fs_reg(1u << 31)));
 510       inst->conditional_mod = BRW_CONDITIONAL_L;
 511       emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
 512    }
 513
 514    return reg;
 515 }
 516
 517 fs_inst *
 518 fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
 519 {
 520    switch (opcode) {
 521    case FS_OPCODE_RCP:
 522    case FS_OPCODE_RSQ:
 523    case FS_OPCODE_SQRT:
 524    case FS_OPCODE_EXP2:
 525    case FS_OPCODE_LOG2:
 526    case FS_OPCODE_SIN:
 527    case FS_OPCODE_COS:
 528       break;
 529    default:
 530       assert(!"not reached: bad math opcode");
 531       return NULL;
 532    }
 533
 534    /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
 535     * might be able to do better by doing execsize = 1 math and then
 536     * expanding that result out, but we would need to be careful with
 537     * masking.
 538     */
 539    if (intel->gen >= 6 && src.file == UNIFORM) {
 540       fs_reg expanded = fs_reg(this, glsl_type::float_type);
 541       emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
 542       src = expanded;
 543    }
 544
 545    fs_inst *inst = emit(fs_inst(opcode, dst, src));
 546
 547    if (intel->gen < 6) {
 548       inst->base_mrf = 2;
 549       inst->mlen = 1;
 550    }
 551
 552    return inst;
 553 }
 554
 555 fs_inst *
 556 fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 557 {
 558    int base_mrf = 2;
 559    fs_inst *inst;
 560
 561    assert(opcode == FS_OPCODE_POW);
 562
 563    if (intel->gen >= 6) {
 564       /* Can't do hstride == 0 args to gen6 math, so expand it out. */
 565       if (src0.file == UNIFORM) {
 566          fs_reg expanded = fs_reg(this, glsl_type::float_type);
 567          emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
 568          src0 = expanded;
 569       }
 570
 571       if (src1.file == UNIFORM) {
 572          fs_reg expanded = fs_reg(this, glsl_type::float_type);
 573          emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
 574          src1 = expanded;
 575       }
 576
 577       inst = emit(fs_inst(opcode, dst, src0, src1));
 578    } else {
 579       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
 580       inst = emit(fs_inst(opcode, dst, src0, reg_null_f));
 581
 582       inst->base_mrf = base_mrf;
 583       inst->mlen = 2;
 584    }
 585    return inst;
 586 }
 587
 588 void
 589 fs_visitor::visit(ir_variable *ir)
 590 {
 591    fs_reg *reg = NULL;
 592
 593    if (variable_storage(ir))
 594       return;
 595
 596    if (strcmp(ir->name, "gl_FragColor") == 0) {
 597       this->frag_color = ir;
 598    } else if (strcmp(ir->name, "gl_FragData") == 0) {
 599       this->frag_data = ir;
 600    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
 601       this->frag_depth = ir;
 602    }
 603
 604    if (ir->mode == ir_var_in) {
 605       if (!strcmp(ir->name, "gl_FragCoord")) {
 606          reg = emit_fragcoord_interpolation(ir);
 607       } else if (!strcmp(ir->name, "gl_FrontFacing")) {
 608          reg = emit_frontfacing_interpolation(ir);
 609       } else {
 610          reg = emit_general_interpolation(ir);
 611       }
 612       assert(reg);
 613       hash_table_insert(this->variable_ht, reg, ir);
 614       return;
 615    }
 616
 617    if (ir->mode == ir_var_uniform) {
 618       int param_index = c->prog_data.nr_params;
 619
 620       if (!strncmp(ir->name, "gl_", 3)) {
 621          setup_builtin_uniform_values(ir);
 622       } else {
 623          setup_uniform_values(ir->location, ir->type);
 624       }
 625
 626       reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
 627    }
 628
 629    if (!reg)
 630       reg = new(this->mem_ctx) fs_reg(this, ir->type);
 631
 632    hash_table_insert(this->variable_ht, reg, ir);
 633 }
 634
 635 void
 636 fs_visitor::visit(ir_dereference_variable *ir)
 637 {
 638    fs_reg *reg = variable_storage(ir->var);
 639    this->result = *reg;
 640 }
 641
 642 void
 643 fs_visitor::visit(ir_dereference_record *ir)
 644 {
 645    const glsl_type *struct_type = ir->record->type;
 646
 647    ir->record->accept(this);
 648
 649    unsigned int offset = 0;
 650    for (unsigned int i = 0; i < struct_type->length; i++) {
 651       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
 652          break;
 653       offset += type_size(struct_type->fields.structure[i].type);
 654    }
 655    this->result.reg_offset += offset;
 656    this->result.type = brw_type_for_base_type(ir->type);
 657 }
 658
 659 void
 660 fs_visitor::visit(ir_dereference_array *ir)
 661 {
 662    ir_constant *index;
 663    int element_size;
 664
 665    ir->array->accept(this);
 666    index = ir->array_index->as_constant();
 667
 668    element_size = type_size(ir->type);
 669    this->result.type = brw_type_for_base_type(ir->type);
 670
 671    if (index) {
 672       assert(this->result.file == UNIFORM ||
 673              (this->result.file == GRF &&
 674               this->result.reg != 0));
 675       this->result.reg_offset += index->value.i[0] * element_size;
 676    } else {
 677       assert(!"FINISHME: non-constant array element");
 678    }
 679 }
 680
 681 void
 682 fs_visitor::visit(ir_expression *ir)
 683 {
 684    unsigned int operand;
 685    fs_reg op[2], temp;
 686    fs_inst *inst;
 687
 688    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 689       ir->operands[operand]->accept(this);
 690       if (this->result.file == BAD_FILE) {
 691          ir_print_visitor v;
 692          printf("Failed to get tree for expression operand:\n");
 693          ir->operands[operand]->accept(&v);
 694          this->fail = true;
 695       }
 696       op[operand] = this->result;
 697
 698       /* Matrix expression operands should have been broken down to vector
 699        * operations already.
 700        */
 701       assert(!ir->operands[operand]->type->is_matrix());
 702       /* And then those vector operands should have been broken down to scalar.
 703        */
 704       assert(!ir->operands[operand]->type->is_vector());
 705    }
 706
 707    /* Storage for our result.  If our result goes into an assignment, it will
 708     * just get copy-propagated out, so no worries.
 709     */
 710    this->result = fs_reg(this, ir->type);
 711
 712    switch (ir->operation) {
 713    case ir_unop_logic_not:
 714       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 715        * ones complement of the whole register, not just bit 0.
 716        */
 717       emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)));
 718       break;
 719    case ir_unop_neg:
 720       op[0].negate = !op[0].negate;
 721       this->result = op[0];
 722       break;
 723    case ir_unop_abs:
 724       op[0].abs = true;
 725       this->result = op[0];
 726       break;
 727    case ir_unop_sign:
 728       temp = fs_reg(this, ir->type);
 729
 730       emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
 731
 732       inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)));
 733       inst->conditional_mod = BRW_CONDITIONAL_G;
 734       inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
 735       inst->predicated = true;
 736
 737       inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)));
 738       inst->conditional_mod = BRW_CONDITIONAL_L;
 739       inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
 740       inst->predicated = true;
 741
 742       break;
 743    case ir_unop_rcp:
 744       emit_math(FS_OPCODE_RCP, this->result, op[0]);
 745       break;
 746
 747    case ir_unop_exp2:
 748       emit_math(FS_OPCODE_EXP2, this->result, op[0]);
 749       break;
 750    case ir_unop_log2:
 751       emit_math(FS_OPCODE_LOG2, this->result, op[0]);
 752       break;
 753    case ir_unop_exp:
 754    case ir_unop_log:
 755       assert(!"not reached: should be handled by ir_explog_to_explog2");
 756       break;
 757    case ir_unop_sin:
 758       emit_math(FS_OPCODE_SIN, this->result, op[0]);
 759       break;
 760    case ir_unop_cos:
 761       emit_math(FS_OPCODE_COS, this->result, op[0]);
 762       break;
 763
 764    case ir_unop_dFdx:
 765       emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
 766       break;
 767    case ir_unop_dFdy:
 768       emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
 769       break;
 770
 771    case ir_binop_add:
 772       emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
 773       break;
 774    case ir_binop_sub:
 775       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 776       break;
 777
 778    case ir_binop_mul:
 779       emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
 780       break;
 781    case ir_binop_div:
 782       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 783       break;
 784    case ir_binop_mod:
 785       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 786       break;
 787
 788    case ir_binop_less:
 789       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 790       inst->conditional_mod = BRW_CONDITIONAL_L;
 791       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 792       break;
 793    case ir_binop_greater:
 794       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 795       inst->conditional_mod = BRW_CONDITIONAL_G;
 796       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 797       break;
 798    case ir_binop_lequal:
 799       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 800       inst->conditional_mod = BRW_CONDITIONAL_LE;
 801       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 802       break;
 803    case ir_binop_gequal:
 804       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 805       inst->conditional_mod = BRW_CONDITIONAL_GE;
 806       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 807       break;
 808    case ir_binop_equal:
 809    case ir_binop_all_equal: /* same as nequal for scalars */
 810       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 811       inst->conditional_mod = BRW_CONDITIONAL_Z;
 812       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 813       break;
 814    case ir_binop_nequal:
 815    case ir_binop_any_nequal: /* same as nequal for scalars */
 816       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 817       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 818       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 819       break;
 820
 821    case ir_binop_logic_xor:
 822       emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
 823       break;
 824
 825    case ir_binop_logic_or:
 826       emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
 827       break;
 828
 829    case ir_binop_logic_and:
 830       emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
 831       break;
 832
 833    case ir_binop_dot:
 834    case ir_binop_cross:
 835    case ir_unop_any:
 836       assert(!"not reached: should be handled by brw_fs_channel_expressions");
 837       break;
 838
 839    case ir_unop_noise:
 840       assert(!"not reached: should be handled by lower_noise");
 841       break;
 842
 843    case ir_unop_sqrt:
 844       emit_math(FS_OPCODE_SQRT, this->result, op[0]);
 845       break;
 846
 847    case ir_unop_rsq:
 848       emit_math(FS_OPCODE_RSQ, this->result, op[0]);
 849       break;
 850
 851    case ir_unop_i2f:
 852    case ir_unop_b2f:
 853    case ir_unop_b2i:
 854    case ir_unop_f2i:
 855       emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
 856       break;
 857    case ir_unop_f2b:
 858    case ir_unop_i2b:
 859       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
 860       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 861       inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
 862                           this->result, fs_reg(1)));
 863       break;
 864
 865    case ir_unop_trunc:
 866       emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
 867       break;
 868    case ir_unop_ceil:
 869       op[0].negate = !op[0].negate;
 870       inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
 871       this->result.negate = true;
 872       break;
 873    case ir_unop_floor:
 874       inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
 875       break;
 876    case ir_unop_fract:
 877       inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
 878       break;
 879    case ir_unop_round_even:
 880       emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0]));
 881       break;
 882
 883    case ir_binop_min:
 884       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 885       inst->conditional_mod = BRW_CONDITIONAL_L;
 886
 887       inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
 888       inst->predicated = true;
 889       break;
 890    case ir_binop_max:
 891       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 892       inst->conditional_mod = BRW_CONDITIONAL_G;
 893
 894       inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
 895       inst->predicated = true;
 896       break;
 897
 898    case ir_binop_pow:
 899       emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
 900       break;
 901
 902    case ir_unop_bit_not:
 903    case ir_unop_u2f:
 904    case ir_binop_lshift:
 905    case ir_binop_rshift:
 906    case ir_binop_bit_and:
 907    case ir_binop_bit_xor:
 908    case ir_binop_bit_or:
 909       assert(!"GLSL 1.30 features unsupported");
 910       break;
 911    }
 912 }
 913
 914 void
 915 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
 916                                    const glsl_type *type, bool predicated)
 917 {
 918    switch (type->base_type) {
 919    case GLSL_TYPE_FLOAT:
 920    case GLSL_TYPE_UINT:
 921    case GLSL_TYPE_INT:
 922    case GLSL_TYPE_BOOL:
 923       for (unsigned int i = 0; i < type->components(); i++) {
 924          l.type = brw_type_for_base_type(type);
 925          r.type = brw_type_for_base_type(type);
 926
 927          fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
 928          inst->predicated = predicated;
 929
 930          l.reg_offset++;
 931          r.reg_offset++;
 932       }
 933       break;
 934    case GLSL_TYPE_ARRAY:
 935       for (unsigned int i = 0; i < type->length; i++) {
 936          emit_assignment_writes(l, r, type->fields.array, predicated);
 937       }
 938       break;
 939
 940    case GLSL_TYPE_STRUCT:
 941       for (unsigned int i = 0; i < type->length; i++) {
 942          emit_assignment_writes(l, r, type->fields.structure[i].type,
 943                                 predicated);
 944       }
 945       break;
 946
 947    case GLSL_TYPE_SAMPLER:
 948       break;
 949
 950    default:
 951       assert(!"not reached");
 952       break;
 953    }
 954 }
 955
 956 void
 957 fs_visitor::visit(ir_assignment *ir)
 958 {
 959    struct fs_reg l, r;
 960    fs_inst *inst;
 961
 962    /* FINISHME: arrays on the lhs */
 963    ir->lhs->accept(this);
 964    l = this->result;
 965
 966    ir->rhs->accept(this);
 967    r = this->result;
 968
 969    assert(l.file != BAD_FILE);
 970    assert(r.file != BAD_FILE);
 971
 972    if (ir->condition) {
 973       emit_bool_to_cond_code(ir->condition);
 974    }
 975
 976    if (ir->lhs->type->is_scalar() ||
 977        ir->lhs->type->is_vector()) {
 978       for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
 979          if (ir->write_mask & (1 << i)) {
 980             inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
 981             if (ir->condition)
 982                inst->predicated = true;
 983             r.reg_offset++;
 984          }
 985          l.reg_offset++;
 986       }
 987    } else {
 988       emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
 989    }
 990 }
 991
 992 fs_inst *
 993 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
 994 {
 995    int mlen;
 996    int base_mrf = 1;
 997    bool simd16 = false;
 998    fs_reg orig_dst;
 999
1000    /* g0 header. */
1001    mlen = 1;
1002
1003    if (ir->shadow_comparitor) {
1004       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1005          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1006                       coordinate));
1007          coordinate.reg_offset++;
1008       }
1009       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1010       mlen += 3;
1011
1012       if (ir->op == ir_tex) {
1013          /* There's no plain shadow compare message, so we use shadow
1014           * compare with a bias of 0.0.
1015           */
1016          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1017                       fs_reg(0.0f)));
1018          mlen++;
1019       } else if (ir->op == ir_txb) {
1020          ir->lod_info.bias->accept(this);
1021          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1022                       this->result));
1023          mlen++;
1024       } else {
1025          assert(ir->op == ir_txl);
1026          ir->lod_info.lod->accept(this);
1027          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1028                       this->result));
1029          mlen++;
1030       }
1031
1032       ir->shadow_comparitor->accept(this);
1033       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1034       mlen++;
1035    } else if (ir->op == ir_tex) {
1036       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1037          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1038                       coordinate));
1039          coordinate.reg_offset++;
1040       }
1041       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1042       mlen += 3;
1043    } else {
1044       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1045        * instructions.  We'll need to do SIMD16 here.
1046        */
1047       assert(ir->op == ir_txb || ir->op == ir_txl);
1048
1049       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1050          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2),
1051                       coordinate));
1052          coordinate.reg_offset++;
1053       }
1054
1055       /* lod/bias appears after u/v/r. */
1056       mlen += 6;
1057
1058       if (ir->op == ir_txb) {
1059          ir->lod_info.bias->accept(this);
1060          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1061                       this->result));
1062          mlen++;
1063       } else {
1064          ir->lod_info.lod->accept(this);
1065          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1066                       this->result));
1067          mlen++;
1068       }
1069
1070       /* The unused upper half. */
1071       mlen++;
1072
1073       /* Now, since we're doing simd16, the return is 2 interleaved
1074        * vec4s where the odd-indexed ones are junk. We'll need to move
1075        * this weirdness around to the expected layout.
1076        */
1077       simd16 = true;
1078       orig_dst = dst;
1079       dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1080                                                        2));
1081       dst.type = BRW_REGISTER_TYPE_F;
1082    }
1083
1084    fs_inst *inst = NULL;
1085    switch (ir->op) {
1086    case ir_tex:
1087       inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1088       break;
1089    case ir_txb:
1090       inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1091       break;
1092    case ir_txl:
1093       inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1094       break;
1095    case ir_txd:
1096    case ir_txf:
1097       assert(!"GLSL 1.30 features unsupported");
1098       break;
1099    }
1100    inst->base_mrf = base_mrf;
1101    inst->mlen = mlen;
1102
1103    if (simd16) {
1104       for (int i = 0; i < 4; i++) {
1105          emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1106          orig_dst.reg_offset++;
1107          dst.reg_offset += 2;
1108       }
1109    }
1110
1111    return inst;
1112 }
1113
1114 fs_inst *
1115 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1116 {
1117    /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1118     * optional parameters like shadow comparitor or LOD bias.  If
1119     * optional parameters aren't present, those base slots are
1120     * optional and don't need to be included in the message.
1121     *
1122     * We don't fill in the unnecessary slots regardless, which may
1123     * look surprising in the disassembly.
1124     */
1125    int mlen = 1; /* g0 header always present. */
1126    int base_mrf = 1;
1127
1128    for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1129       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1130                    coordinate));
1131       coordinate.reg_offset++;
1132    }
1133    mlen += ir->coordinate->type->vector_elements;
1134
1135    if (ir->shadow_comparitor) {
1136       mlen = MAX2(mlen, 5);
1137
1138       ir->shadow_comparitor->accept(this);
1139       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1140       mlen++;
1141    }
1142
1143    fs_inst *inst = NULL;
1144    switch (ir->op) {
1145    case ir_tex:
1146       inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1147       break;
1148    case ir_txb:
1149       ir->lod_info.bias->accept(this);
1150       mlen = MAX2(mlen, 5);
1151       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1152       mlen++;
1153
1154       inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1155       break;
1156    case ir_txl:
1157       ir->lod_info.lod->accept(this);
1158       mlen = MAX2(mlen, 5);
1159       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1160       mlen++;
1161
1162       inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1163       break;
1164    case ir_txd:
1165    case ir_txf:
1166       assert(!"GLSL 1.30 features unsupported");
1167       break;
1168    }
1169    inst->base_mrf = base_mrf;
1170    inst->mlen = mlen;
1171
1172    return inst;
1173 }
1174
1175 void
1176 fs_visitor::visit(ir_texture *ir)
1177 {
1178    int sampler;
1179    fs_inst *inst = NULL;
1180
1181    ir->coordinate->accept(this);
1182    fs_reg coordinate = this->result;
1183
1184    /* Should be lowered by do_lower_texture_projection */
1185    assert(!ir->projector);
1186
1187    sampler = _mesa_get_sampler_uniform_value(ir->sampler,
1188                                              ctx->Shader.CurrentProgram,
1189                                              &brw->fragment_program->Base);
1190    sampler = c->fp->program.Base.SamplerUnits[sampler];
1191
1192    /* The 965 requires the EU to do the normalization of GL rectangle
1193     * texture coordinates.  We use the program parameter state
1194     * tracking to get the scaling factor.
1195     */
1196    if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
1197       struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
1198       int tokens[STATE_LENGTH] = {
1199          STATE_INTERNAL,
1200          STATE_TEXRECT_SCALE,
1201          sampler,
1202          0,
1203          0
1204       };
1205
1206       fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
1207       fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
1208       GLuint index = _mesa_add_state_reference(params,
1209                                                (gl_state_index *)tokens);
1210       float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
1211
1212       c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
1213       c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
1214
1215       fs_reg dst = fs_reg(this, ir->coordinate->type);
1216       fs_reg src = coordinate;
1217       coordinate = dst;
1218
1219       emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x));
1220       dst.reg_offset++;
1221       src.reg_offset++;
1222       emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y));
1223    }
1224
1225    /* Writemasking doesn't eliminate channels on SIMD8 texture
1226     * samples, so don't worry about them.
1227     */
1228    fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1229
1230    if (intel->gen < 5) {
1231       inst = emit_texture_gen4(ir, dst, coordinate);
1232    } else {
1233       inst = emit_texture_gen5(ir, dst, coordinate);
1234    }
1235
1236    inst->sampler = sampler;
1237
1238    this->result = dst;
1239
1240    if (ir->shadow_comparitor)
1241       inst->shadow_compare = true;
1242
1243    if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1244       fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1245
1246       for (int i = 0; i < 4; i++) {
1247          int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1248          fs_reg l = swizzle_dst;
1249          l.reg_offset += i;
1250
1251          if (swiz == SWIZZLE_ZERO) {
1252             emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1253          } else if (swiz == SWIZZLE_ONE) {
1254             emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1255          } else {
1256             fs_reg r = dst;
1257             r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1258             emit(fs_inst(BRW_OPCODE_MOV, l, r));
1259          }
1260       }
1261       this->result = swizzle_dst;
1262    }
1263 }
1264
1265 void
1266 fs_visitor::visit(ir_swizzle *ir)
1267 {
1268    ir->val->accept(this);
1269    fs_reg val = this->result;
1270
1271    if (ir->type->vector_elements == 1) {
1272       this->result.reg_offset += ir->mask.x;
1273       return;
1274    }
1275
1276    fs_reg result = fs_reg(this, ir->type);
1277    this->result = result;
1278
1279    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1280       fs_reg channel = val;
1281       int swiz = 0;
1282
1283       switch (i) {
1284       case 0:
1285          swiz = ir->mask.x;
1286          break;
1287       case 1:
1288          swiz = ir->mask.y;
1289          break;
1290       case 2:
1291          swiz = ir->mask.z;
1292          break;
1293       case 3:
1294          swiz = ir->mask.w;
1295          break;
1296       }
1297
1298       channel.reg_offset += swiz;
1299       emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1300       result.reg_offset++;
1301    }
1302 }
1303
1304 void
1305 fs_visitor::visit(ir_discard *ir)
1306 {
1307    fs_reg temp = fs_reg(this, glsl_type::uint_type);
1308
1309    assert(ir->condition == NULL); /* FINISHME */
1310
1311    emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d));
1312    emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp));
1313    kill_emitted = true;
1314 }
1315
1316 void
1317 fs_visitor::visit(ir_constant *ir)
1318 {
1319    fs_reg reg(this, ir->type);
1320    this->result = reg;
1321
1322    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1323       switch (ir->type->base_type) {
1324       case GLSL_TYPE_FLOAT:
1325          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1326          break;
1327       case GLSL_TYPE_UINT:
1328          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1329          break;
1330       case GLSL_TYPE_INT:
1331          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1332          break;
1333       case GLSL_TYPE_BOOL:
1334          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1335          break;
1336       default:
1337          assert(!"Non-float/uint/int/bool constant");
1338       }
1339       reg.reg_offset++;
1340    }
1341 }
1342
1343 void
1344 fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
1345 {
1346    ir_expression *expr = ir->as_expression();
1347
1348    if (expr) {
1349       fs_reg op[2];
1350       fs_inst *inst;
1351
1352       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1353          assert(expr->operands[i]->type->is_scalar());
1354
1355          expr->operands[i]->accept(this);
1356          op[i] = this->result;
1357       }
1358
1359       switch (expr->operation) {
1360       case ir_unop_logic_not:
1361          inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)));
1362          inst->conditional_mod = BRW_CONDITIONAL_Z;
1363          break;
1364
1365       case ir_binop_logic_xor:
1366          inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]));
1367          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1368          break;
1369
1370       case ir_binop_logic_or:
1371          inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1]));
1372          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1373          break;
1374
1375       case ir_binop_logic_and:
1376          inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1]));
1377          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1378          break;
1379
1380       case ir_unop_f2b:
1381          if (intel->gen >= 6) {
1382             inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
1383                                 op[0], fs_reg(0.0f)));
1384          } else {
1385             inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
1386          }
1387          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1388          break;
1389
1390       case ir_unop_i2b:
1391          if (intel->gen >= 6) {
1392             inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)));
1393          } else {
1394             inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
1395          }
1396          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1397          break;
1398
1399       case ir_binop_greater:
1400          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1401          inst->conditional_mod = BRW_CONDITIONAL_G;
1402          break;
1403       case ir_binop_gequal:
1404          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1405          inst->conditional_mod = BRW_CONDITIONAL_GE;
1406          break;
1407       case ir_binop_less:
1408          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1409          inst->conditional_mod = BRW_CONDITIONAL_L;
1410          break;
1411       case ir_binop_lequal:
1412          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1413          inst->conditional_mod = BRW_CONDITIONAL_LE;
1414          break;
1415       case ir_binop_equal:
1416       case ir_binop_all_equal:
1417          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1418          inst->conditional_mod = BRW_CONDITIONAL_Z;
1419          break;
1420       case ir_binop_nequal:
1421       case ir_binop_any_nequal:
1422          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1423          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1424          break;
1425       default:
1426          assert(!"not reached");
1427          this->fail = true;
1428          break;
1429       }
1430       return;
1431    }
1432
1433    ir->accept(this);
1434
1435    if (intel->gen >= 6) {
1436       fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d,
1437                                    this->result, fs_reg(1)));
1438       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1439    } else {
1440       fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result));
1441       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1442    }
1443 }
1444
1445 /**
1446  * Emit a gen6 IF statement with the comparison folded into the IF
1447  * instruction.
1448  */
1449 void
1450 fs_visitor::emit_if_gen6(ir_if *ir)
1451 {
1452    ir_expression *expr = ir->condition->as_expression();
1453
1454    if (expr) {
1455       fs_reg op[2];
1456       fs_inst *inst;
1457       fs_reg temp;
1458
1459       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1460          assert(expr->operands[i]->type->is_scalar());
1461
1462          expr->operands[i]->accept(this);
1463          op[i] = this->result;
1464       }
1465
1466       switch (expr->operation) {
1467       case ir_unop_logic_not:
1468          inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1)));
1469          inst->conditional_mod = BRW_CONDITIONAL_Z;
1470          return;
1471
1472       case ir_binop_logic_xor:
1473          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1474          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1475          return;
1476
1477       case ir_binop_logic_or:
1478          temp = fs_reg(this, glsl_type::bool_type);
1479          emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1]));
1480          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)));
1481          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1482          return;
1483
1484       case ir_binop_logic_and:
1485          temp = fs_reg(this, glsl_type::bool_type);
1486          emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1]));
1487          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)));
1488          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1489          return;
1490
1491       case ir_unop_f2b:
1492          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)));
1493          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1494          return;
1495
1496       case ir_unop_i2b:
1497          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)));
1498          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1499          return;
1500
1501       case ir_binop_greater:
1502          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1503          inst->conditional_mod = BRW_CONDITIONAL_G;
1504          return;
1505       case ir_binop_gequal:
1506          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1507          inst->conditional_mod = BRW_CONDITIONAL_GE;
1508          return;
1509       case ir_binop_less:
1510          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1511          inst->conditional_mod = BRW_CONDITIONAL_L;
1512          return;
1513       case ir_binop_lequal:
1514          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1515          inst->conditional_mod = BRW_CONDITIONAL_LE;
1516          return;
1517       case ir_binop_equal:
1518       case ir_binop_all_equal:
1519          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1520          inst->conditional_mod = BRW_CONDITIONAL_Z;
1521          return;
1522       case ir_binop_nequal:
1523       case ir_binop_any_nequal:
1524          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1525          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1526          return;
1527       default:
1528          assert(!"not reached");
1529          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)));
1530          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1531          this->fail = true;
1532          return;
1533       }
1534       return;
1535    }
1536
1537    ir->condition->accept(this);
1538
1539    fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)));
1540    inst->conditional_mod = BRW_CONDITIONAL_NZ;
1541 }
1542
1543 void
1544 fs_visitor::visit(ir_if *ir)
1545 {
1546    fs_inst *inst;
1547
1548    /* Don't point the annotation at the if statement, because then it plus
1549     * the then and else blocks get printed.
1550     */
1551    this->base_ir = ir->condition;
1552
1553    if (intel->gen >= 6) {
1554       emit_if_gen6(ir);
1555    } else {
1556       emit_bool_to_cond_code(ir->condition);
1557
1558       inst = emit(fs_inst(BRW_OPCODE_IF));
1559       inst->predicated = true;
1560    }
1561
1562    foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1563       ir_instruction *ir = (ir_instruction *)iter.get();
1564       this->base_ir = ir;
1565
1566       ir->accept(this);
1567    }
1568
1569    if (!ir->else_instructions.is_empty()) {
1570       emit(fs_inst(BRW_OPCODE_ELSE));
1571
1572       foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1573          ir_instruction *ir = (ir_instruction *)iter.get();
1574          this->base_ir = ir;
1575
1576          ir->accept(this);
1577       }
1578    }
1579
1580    emit(fs_inst(BRW_OPCODE_ENDIF));
1581 }
1582
1583 void
1584 fs_visitor::visit(ir_loop *ir)
1585 {
1586    fs_reg counter = reg_undef;
1587
1588    if (ir->counter) {
1589       this->base_ir = ir->counter;
1590       ir->counter->accept(this);
1591       counter = *(variable_storage(ir->counter));
1592
1593       if (ir->from) {
1594          this->base_ir = ir->from;
1595          ir->from->accept(this);
1596
1597          emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1598       }
1599    }
1600
1601    emit(fs_inst(BRW_OPCODE_DO));
1602
1603    if (ir->to) {
1604       this->base_ir = ir->to;
1605       ir->to->accept(this);
1606
1607       fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
1608                                    counter, this->result));
1609       switch (ir->cmp) {
1610       case ir_binop_equal:
1611          inst->conditional_mod = BRW_CONDITIONAL_Z;
1612          break;
1613       case ir_binop_nequal:
1614          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1615          break;
1616       case ir_binop_gequal:
1617          inst->conditional_mod = BRW_CONDITIONAL_GE;
1618          break;
1619       case ir_binop_lequal:
1620          inst->conditional_mod = BRW_CONDITIONAL_LE;
1621          break;
1622       case ir_binop_greater:
1623          inst->conditional_mod = BRW_CONDITIONAL_G;
1624          break;
1625       case ir_binop_less:
1626          inst->conditional_mod = BRW_CONDITIONAL_L;
1627          break;
1628       default:
1629          assert(!"not reached: unknown loop condition");
1630          this->fail = true;
1631          break;
1632       }
1633
1634       inst = emit(fs_inst(BRW_OPCODE_BREAK));
1635       inst->predicated = true;
1636    }
1637
1638    foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1639       ir_instruction *ir = (ir_instruction *)iter.get();
1640
1641       this->base_ir = ir;
1642       ir->accept(this);
1643    }
1644
1645    if (ir->increment) {
1646       this->base_ir = ir->increment;
1647       ir->increment->accept(this);
1648       emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1649    }
1650
1651    emit(fs_inst(BRW_OPCODE_WHILE));
1652 }
1653
1654 void
1655 fs_visitor::visit(ir_loop_jump *ir)
1656 {
1657    switch (ir->mode) {
1658    case ir_loop_jump::jump_break:
1659       emit(fs_inst(BRW_OPCODE_BREAK));
1660       break;
1661    case ir_loop_jump::jump_continue:
1662       emit(fs_inst(BRW_OPCODE_CONTINUE));
1663       break;
1664    }
1665 }
1666
1667 void
1668 fs_visitor::visit(ir_call *ir)
1669 {
1670    assert(!"FINISHME");
1671 }
1672
1673 void
1674 fs_visitor::visit(ir_return *ir)
1675 {
1676    assert(!"FINISHME");
1677 }
1678
1679 void
1680 fs_visitor::visit(ir_function *ir)
1681 {
1682    /* Ignore function bodies other than main() -- we shouldn't see calls to
1683     * them since they should all be inlined before we get to ir_to_mesa.
1684     */
1685    if (strcmp(ir->name, "main") == 0) {
1686       const ir_function_signature *sig;
1687       exec_list empty;
1688
1689       sig = ir->matching_signature(&empty);
1690
1691       assert(sig);
1692
1693       foreach_iter(exec_list_iterator, iter, sig->body) {
1694          ir_instruction *ir = (ir_instruction *)iter.get();
1695          this->base_ir = ir;
1696
1697          ir->accept(this);
1698       }
1699    }
1700 }
1701
1702 void
1703 fs_visitor::visit(ir_function_signature *ir)
1704 {
1705    assert(!"not reached");
1706    (void)ir;
1707 }
1708
1709 fs_inst *
1710 fs_visitor::emit(fs_inst inst)
1711 {
1712    fs_inst *list_inst = new(mem_ctx) fs_inst;
1713    *list_inst = inst;
1714
1715    list_inst->annotation = this->current_annotation;
1716    list_inst->ir = this->base_ir;
1717
1718    this->instructions.push_tail(list_inst);
1719
1720    return list_inst;
1721 }
1722
1723 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1724 void
1725 fs_visitor::emit_dummy_fs()
1726 {
1727    /* Everyone's favorite color. */
1728    emit(fs_inst(BRW_OPCODE_MOV,
1729                 fs_reg(MRF, 2),
1730                 fs_reg(1.0f)));
1731    emit(fs_inst(BRW_OPCODE_MOV,
1732                 fs_reg(MRF, 3),
1733                 fs_reg(0.0f)));
1734    emit(fs_inst(BRW_OPCODE_MOV,
1735                 fs_reg(MRF, 4),
1736                 fs_reg(1.0f)));
1737    emit(fs_inst(BRW_OPCODE_MOV,
1738                 fs_reg(MRF, 5),
1739                 fs_reg(0.0f)));
1740
1741    fs_inst *write;
1742    write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1743                         fs_reg(0),
1744                         fs_reg(0)));
1745    write->base_mrf = 0;
1746 }
1747
1748 /* The register location here is relative to the start of the URB
1749  * data.  It will get adjusted to be a real location before
1750  * generate_code() time.
1751  */
1752 struct brw_reg
1753 fs_visitor::interp_reg(int location, int channel)
1754 {
1755    int regnr = urb_setup[location] * 2 + channel / 2;
1756    int stride = (channel & 1) * 4;
1757
1758    assert(urb_setup[location] != -1);
1759
1760    return brw_vec1_grf(regnr, stride);
1761 }
1762
1763 /** Emits the interpolation for the varying inputs. */
1764 void
1765 fs_visitor::emit_interpolation_setup_gen4()
1766 {
1767    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1768
1769    this->current_annotation = "compute pixel centers";
1770    this->pixel_x = fs_reg(this, glsl_type::uint_type);
1771    this->pixel_y = fs_reg(this, glsl_type::uint_type);
1772    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1773    this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1774    emit(fs_inst(BRW_OPCODE_ADD,
1775                 this->pixel_x,
1776                 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1777                 fs_reg(brw_imm_v(0x10101010))));
1778    emit(fs_inst(BRW_OPCODE_ADD,
1779                 this->pixel_y,
1780                 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1781                 fs_reg(brw_imm_v(0x11001100))));
1782
1783    this->current_annotation = "compute pixel deltas from v0";
1784    if (brw->has_pln) {
1785       this->delta_x = fs_reg(this, glsl_type::vec2_type);
1786       this->delta_y = this->delta_x;
1787       this->delta_y.reg_offset++;
1788    } else {
1789       this->delta_x = fs_reg(this, glsl_type::float_type);
1790       this->delta_y = fs_reg(this, glsl_type::float_type);
1791    }
1792    emit(fs_inst(BRW_OPCODE_ADD,
1793                 this->delta_x,
1794                 this->pixel_x,
1795                 fs_reg(negate(brw_vec1_grf(1, 0)))));
1796    emit(fs_inst(BRW_OPCODE_ADD,
1797                 this->delta_y,
1798                 this->pixel_y,
1799                 fs_reg(negate(brw_vec1_grf(1, 1)))));
1800
1801    this->current_annotation = "compute pos.w and 1/pos.w";
1802    /* Compute wpos.w.  It's always in our setup, since it's needed to
1803     * interpolate the other attributes.
1804     */
1805    this->wpos_w = fs_reg(this, glsl_type::float_type);
1806    emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1807                 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1808    /* Compute the pixel 1/W value from wpos.w. */
1809    this->pixel_w = fs_reg(this, glsl_type::float_type);
1810    emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1811    this->current_annotation = NULL;
1812 }
1813
1814 /** Emits the interpolation for the varying inputs. */
1815 void
1816 fs_visitor::emit_interpolation_setup_gen6()
1817 {
1818    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1819
1820    /* If the pixel centers end up used, the setup is the same as for gen4. */
1821    this->current_annotation = "compute pixel centers";
1822    fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
1823    fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
1824    int_pixel_x.type = BRW_REGISTER_TYPE_UW;
1825    int_pixel_y.type = BRW_REGISTER_TYPE_UW;
1826    emit(fs_inst(BRW_OPCODE_ADD,
1827                 int_pixel_x,
1828                 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1829                 fs_reg(brw_imm_v(0x10101010))));
1830    emit(fs_inst(BRW_OPCODE_ADD,
1831                 int_pixel_y,
1832                 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1833                 fs_reg(brw_imm_v(0x11001100))));
1834
1835    /* As of gen6, we can no longer mix float and int sources.  We have
1836     * to turn the integer pixel centers into floats for their actual
1837     * use.
1838     */
1839    this->pixel_x = fs_reg(this, glsl_type::float_type);
1840    this->pixel_y = fs_reg(this, glsl_type::float_type);
1841    emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
1842    emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
1843
1844    this->current_annotation = "compute 1/pos.w";
1845    this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1846    this->pixel_w = fs_reg(this, glsl_type::float_type);
1847    emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1848
1849    this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1850    this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1851
1852    this->current_annotation = NULL;
1853 }
1854
1855 void
1856 fs_visitor::emit_fb_writes()
1857 {
1858    this->current_annotation = "FB write header";
1859    GLboolean header_present = GL_TRUE;
1860    int nr = 0;
1861
1862    if (intel->gen >= 6 &&
1863        !this->kill_emitted &&
1864        c->key.nr_color_regions == 1) {
1865       header_present = false;
1866    }
1867
1868    if (header_present) {
1869       /* m0, m1 header */
1870       nr += 2;
1871    }
1872
1873    if (c->key.aa_dest_stencil_reg) {
1874       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1875                    fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1876    }
1877
1878    /* Reserve space for color. It'll be filled in per MRT below. */
1879    int color_mrf = nr;
1880    nr += 4;
1881
1882    if (c->key.source_depth_to_render_target) {
1883       if (c->key.computes_depth) {
1884          /* Hand over gl_FragDepth. */
1885          assert(this->frag_depth);
1886          fs_reg depth = *(variable_storage(this->frag_depth));
1887
1888          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1889       } else {
1890          /* Pass through the payload depth. */
1891          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1892                       fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1893       }
1894    }
1895
1896    if (c->key.dest_depth_reg) {
1897       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1898                    fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1899    }
1900
1901    fs_reg color = reg_undef;
1902    if (this->frag_color)
1903       color = *(variable_storage(this->frag_color));
1904    else if (this->frag_data)
1905       color = *(variable_storage(this->frag_data));
1906
1907    for (int target = 0; target < c->key.nr_color_regions; target++) {
1908       this->current_annotation = talloc_asprintf(this->mem_ctx,
1909                                                  "FB write target %d",
1910                                                  target);
1911       if (this->frag_color || this->frag_data) {
1912          for (int i = 0; i < 4; i++) {
1913             emit(fs_inst(BRW_OPCODE_MOV,
1914                          fs_reg(MRF, color_mrf + i),
1915                          color));
1916             color.reg_offset++;
1917          }
1918       }
1919
1920       if (this->frag_color)
1921          color.reg_offset -= 4;
1922
1923       fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1924                                    reg_undef, reg_undef));
1925       inst->target = target;
1926       inst->base_mrf = 0;
1927       inst->mlen = nr;
1928       if (target == c->key.nr_color_regions - 1)
1929          inst->eot = true;
1930       inst->header_present = header_present;
1931    }
1932
1933    if (c->key.nr_color_regions == 0) {
1934       fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1935                                    reg_undef, reg_undef));
1936       inst->base_mrf = 0;
1937       inst->mlen = nr;
1938       inst->eot = true;
1939       inst->header_present = header_present;
1940    }
1941
1942    this->current_annotation = NULL;
1943 }
1944
1945 void
1946 fs_visitor::generate_fb_write(fs_inst *inst)
1947 {
1948    GLboolean eot = inst->eot;
1949    struct brw_reg implied_header;
1950
1951    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1952     * move, here's g1.
1953     */
1954    brw_push_insn_state(p);
1955    brw_set_mask_control(p, BRW_MASK_DISABLE);
1956    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1957
1958    if (inst->header_present) {
1959       if (intel->gen >= 6) {
1960          brw_MOV(p,
1961                  brw_message_reg(inst->base_mrf),
1962                  brw_vec8_grf(0, 0));
1963          implied_header = brw_null_reg();
1964       } else {
1965          implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
1966       }
1967
1968       brw_MOV(p,
1969               brw_message_reg(inst->base_mrf + 1),
1970               brw_vec8_grf(1, 0));
1971    } else {
1972       implied_header = brw_null_reg();
1973    }
1974
1975    brw_pop_insn_state(p);
1976
1977    brw_fb_WRITE(p,
1978                 8, /* dispatch_width */
1979                 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1980                 inst->base_mrf,
1981                 implied_header,
1982                 inst->target,
1983                 inst->mlen,
1984                 0,
1985                 eot);
1986 }
1987
1988 void
1989 fs_visitor::generate_linterp(fs_inst *inst,
1990                              struct brw_reg dst, struct brw_reg *src)
1991 {
1992    struct brw_reg delta_x = src[0];
1993    struct brw_reg delta_y = src[1];
1994    struct brw_reg interp = src[2];
1995
1996    if (brw->has_pln &&
1997        delta_y.nr == delta_x.nr + 1 &&
1998        (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1999       brw_PLN(p, dst, interp, delta_x);
2000    } else {
2001       brw_LINE(p, brw_null_reg(), interp, delta_x);
2002       brw_MAC(p, dst, suboffset(interp, 1), delta_y);
2003    }
2004 }
2005
2006 void
2007 fs_visitor::generate_math(fs_inst *inst,
2008                           struct brw_reg dst, struct brw_reg *src)
2009 {
2010    int op;
2011
2012    switch (inst->opcode) {
2013    case FS_OPCODE_RCP:
2014       op = BRW_MATH_FUNCTION_INV;
2015       break;
2016    case FS_OPCODE_RSQ:
2017       op = BRW_MATH_FUNCTION_RSQ;
2018       break;
2019    case FS_OPCODE_SQRT:
2020       op = BRW_MATH_FUNCTION_SQRT;
2021       break;
2022    case FS_OPCODE_EXP2:
2023       op = BRW_MATH_FUNCTION_EXP;
2024       break;
2025    case FS_OPCODE_LOG2:
2026       op = BRW_MATH_FUNCTION_LOG;
2027       break;
2028    case FS_OPCODE_POW:
2029       op = BRW_MATH_FUNCTION_POW;
2030       break;
2031    case FS_OPCODE_SIN:
2032       op = BRW_MATH_FUNCTION_SIN;
2033       break;
2034    case FS_OPCODE_COS:
2035       op = BRW_MATH_FUNCTION_COS;
2036       break;
2037    default:
2038       assert(!"not reached: unknown math function");
2039       op = 0;
2040       break;
2041    }
2042
2043    if (intel->gen >= 6) {
2044       assert(inst->mlen == 0);
2045
2046       if (inst->opcode == FS_OPCODE_POW) {
2047          brw_math2(p, dst, op, src[0], src[1]);
2048       } else {
2049          brw_math(p, dst,
2050                   op,
2051                   inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2052                   BRW_MATH_SATURATE_NONE,
2053                   0, src[0],
2054                   BRW_MATH_DATA_VECTOR,
2055                   BRW_MATH_PRECISION_FULL);
2056       }
2057    } else {
2058       assert(inst->mlen >= 1);
2059
2060       brw_math(p, dst,
2061                op,
2062                inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2063                BRW_MATH_SATURATE_NONE,
2064                inst->base_mrf, src[0],
2065                BRW_MATH_DATA_VECTOR,
2066                BRW_MATH_PRECISION_FULL);
2067    }
2068 }
2069
2070 void
2071 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
2072 {
2073    int msg_type = -1;
2074    int rlen = 4;
2075    uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
2076
2077    if (intel->gen >= 5) {
2078       switch (inst->opcode) {
2079       case FS_OPCODE_TEX:
2080          if (inst->shadow_compare) {
2081             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
2082          } else {
2083             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
2084          }
2085          break;
2086       case FS_OPCODE_TXB:
2087          if (inst->shadow_compare) {
2088             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
2089          } else {
2090             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
2091          }
2092          break;
2093       }
2094    } else {
2095       switch (inst->opcode) {
2096       case FS_OPCODE_TEX:
2097          /* Note that G45 and older determines shadow compare and dispatch width
2098           * from message length for most messages.
2099           */
2100          msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2101          if (inst->shadow_compare) {
2102             assert(inst->mlen == 6);
2103          } else {
2104             assert(inst->mlen <= 4);
2105          }
2106          break;
2107       case FS_OPCODE_TXB:
2108          if (inst->shadow_compare) {
2109             assert(inst->mlen == 6);
2110             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2111          } else {
2112             assert(inst->mlen == 9);
2113             msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2114             simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2115          }
2116          break;
2117       }
2118    }
2119    assert(msg_type != -1);
2120
2121    if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2122       rlen = 8;
2123       dst = vec16(dst);
2124    }
2125
2126    brw_SAMPLE(p,
2127               retype(dst, BRW_REGISTER_TYPE_UW),
2128               inst->base_mrf,
2129               retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2130               SURF_INDEX_TEXTURE(inst->sampler),
2131               inst->sampler,
2132               WRITEMASK_XYZW,
2133               msg_type,
2134               rlen,
2135               inst->mlen,
2136               0,
2137               1,
2138               simd_mode);
2139 }
2140
2141
2142 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2143  * looking like:
2144  *
2145  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2146  *
2147  * and we're trying to produce:
2148  *
2149  *           DDX                     DDY
2150  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
2151  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
2152  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
2153  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
2154  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
2155  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
2156  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
2157  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
2158  *
2159  * and add another set of two more subspans if in 16-pixel dispatch mode.
2160  *
2161  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2162  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2163  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2164  * between each other.  We could probably do it like ddx and swizzle the right
2165  * order later, but bail for now and just produce
2166  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2167  */
2168 void
2169 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2170 {
2171    struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2172                                  BRW_REGISTER_TYPE_F,
2173                                  BRW_VERTICAL_STRIDE_2,
2174                                  BRW_WIDTH_2,
2175                                  BRW_HORIZONTAL_STRIDE_0,
2176                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2177    struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2178                                  BRW_REGISTER_TYPE_F,
2179                                  BRW_VERTICAL_STRIDE_2,
2180                                  BRW_WIDTH_2,
2181                                  BRW_HORIZONTAL_STRIDE_0,
2182                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2183    brw_ADD(p, dst, src0, negate(src1));
2184 }
2185
2186 void
2187 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2188 {
2189    struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2190                                  BRW_REGISTER_TYPE_F,
2191                                  BRW_VERTICAL_STRIDE_4,
2192                                  BRW_WIDTH_4,
2193                                  BRW_HORIZONTAL_STRIDE_0,
2194                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2195    struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2196                                  BRW_REGISTER_TYPE_F,
2197                                  BRW_VERTICAL_STRIDE_4,
2198                                  BRW_WIDTH_4,
2199                                  BRW_HORIZONTAL_STRIDE_0,
2200                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2201    brw_ADD(p, dst, src0, negate(src1));
2202 }
2203
2204 void
2205 fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
2206 {
2207    brw_push_insn_state(p);
2208    brw_set_mask_control(p, BRW_MASK_DISABLE);
2209    brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
2210    brw_pop_insn_state(p);
2211 }
2212
2213 void
2214 fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
2215 {
2216    struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2217    mask = brw_uw1_reg(mask.file, mask.nr, 0);
2218
2219    brw_push_insn_state(p);
2220    brw_set_mask_control(p, BRW_MASK_DISABLE);
2221    brw_AND(p, g0, mask, g0);
2222    brw_pop_insn_state(p);
2223 }
2224
2225 void
2226 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
2227 {
2228    assert(inst->mlen != 0);
2229
2230    brw_MOV(p,
2231            retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
2232            retype(src, BRW_REGISTER_TYPE_UD));
2233    brw_oword_block_write(p, brw_message_reg(inst->base_mrf), 1, inst->offset);
2234 }
2235
2236 void
2237 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
2238 {
2239    assert(inst->mlen != 0);
2240
2241    /* Clear any post destination dependencies that would be ignored by
2242     * the block read.  See the B-Spec for pre-gen5 send instruction.
2243     *
2244     * This could use a better solution, since texture sampling and
2245     * math reads could potentially run into it as well -- anywhere
2246     * that we have a SEND with a destination that is a register that
2247     * was written but not read within the last N instructions (what's
2248     * N?  unsure).  This is rare because of dead code elimination, but
2249     * not impossible.
2250     */
2251    if (intel->gen == 4 && !intel->is_g4x)
2252       brw_MOV(p, brw_null_reg(), dst);
2253
2254    brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 1,
2255                         inst->offset);
2256
2257    if (intel->gen == 4 && !intel->is_g4x) {
2258       /* gen4 errata: destination from a send can't be used as a
2259        * destination until it's been read.  Just read it so we don't
2260        * have to worry.
2261        */
2262       brw_MOV(p, brw_null_reg(), dst);
2263    }
2264 }
2265
2266 void
2267 fs_visitor::assign_curb_setup()
2268 {
2269    c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2270    c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2271
2272    /* Map the offsets in the UNIFORM file to fixed HW regs. */
2273    foreach_iter(exec_list_iterator, iter, this->instructions) {
2274       fs_inst *inst = (fs_inst *)iter.get();
2275
2276       for (unsigned int i = 0; i < 3; i++) {
2277          if (inst->src[i].file == UNIFORM) {
2278             int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2279             struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2280                                                   constant_nr / 8,
2281                                                   constant_nr % 8);
2282
2283             inst->src[i].file = FIXED_HW_REG;
2284             inst->src[i].fixed_hw_reg = brw_reg;
2285          }
2286       }
2287    }
2288 }
2289
2290 void
2291 fs_visitor::calculate_urb_setup()
2292 {
2293    for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2294       urb_setup[i] = -1;
2295    }
2296
2297    int urb_next = 0;
2298    /* Figure out where each of the incoming setup attributes lands. */
2299    if (intel->gen >= 6) {
2300       for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2301          if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
2302             urb_setup[i] = urb_next++;
2303          }
2304       }
2305    } else {
2306       /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2307       for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2308          if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2309             int fp_index;
2310
2311             if (i >= VERT_RESULT_VAR0)
2312                fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2313             else if (i <= VERT_RESULT_TEX7)
2314                fp_index = i;
2315             else
2316                fp_index = -1;
2317
2318             if (fp_index >= 0)
2319                urb_setup[fp_index] = urb_next++;
2320          }
2321       }
2322    }
2323
2324    /* Each attribute is 4 setup channels, each of which is half a reg. */
2325    c->prog_data.urb_read_length = urb_next * 2;
2326 }
2327
2328 void
2329 fs_visitor::assign_urb_setup()
2330 {
2331    int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2332
2333    /* Offset all the urb_setup[] index by the actual position of the
2334     * setup regs, now that the location of the constants has been chosen.
2335     */
2336    foreach_iter(exec_list_iterator, iter, this->instructions) {
2337       fs_inst *inst = (fs_inst *)iter.get();
2338
2339       if (inst->opcode != FS_OPCODE_LINTERP)
2340          continue;
2341
2342       assert(inst->src[2].file == FIXED_HW_REG);
2343
2344       inst->src[2].fixed_hw_reg.nr += urb_start;
2345    }
2346
2347    this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2348 }
2349
2350 /**
2351  * Split large virtual GRFs into separate components if we can.
2352  *
2353  * This is mostly duplicated with what brw_fs_vector_splitting does,
2354  * but that's really conservative because it's afraid of doing
2355  * splitting that doesn't result in real progress after the rest of
2356  * the optimization phases, which would cause infinite looping in
2357  * optimization.  We can do it once here, safely.  This also has the
2358  * opportunity to split interpolated values, or maybe even uniforms,
2359  * which we don't have at the IR level.
2360  *
2361  * We want to split, because virtual GRFs are what we register
2362  * allocate and spill (due to contiguousness requirements for some
2363  * instructions), and they're what we naturally generate in the
2364  * codegen process, but most virtual GRFs don't actually need to be
2365  * contiguous sets of GRFs.  If we split, we'll end up with reduced
2366  * live intervals and better dead code elimination and coalescing.
2367  */
2368 void
2369 fs_visitor::split_virtual_grfs()
2370 {
2371    int num_vars = this->virtual_grf_next;
2372    bool split_grf[num_vars];
2373    int new_virtual_grf[num_vars];
2374
2375    /* Try to split anything > 0 sized. */
2376    for (int i = 0; i < num_vars; i++) {
2377       if (this->virtual_grf_sizes[i] != 1)
2378          split_grf[i] = true;
2379       else
2380          split_grf[i] = false;
2381    }
2382
2383    if (brw->has_pln) {
2384       /* PLN opcodes rely on the delta_xy being contiguous. */
2385       split_grf[this->delta_x.reg] = false;
2386    }
2387
2388    foreach_iter(exec_list_iterator, iter, this->instructions) {
2389       fs_inst *inst = (fs_inst *)iter.get();
2390
2391       /* Texturing produces 4 contiguous registers, so no splitting. */
2392       if ((inst->opcode == FS_OPCODE_TEX ||
2393            inst->opcode == FS_OPCODE_TXB ||
2394            inst->opcode == FS_OPCODE_TXL) &&
2395           inst->dst.file == GRF) {
2396          split_grf[inst->dst.reg] = false;
2397       }
2398    }
2399
2400    /* Allocate new space for split regs.  Note that the virtual
2401     * numbers will be contiguous.
2402     */
2403    for (int i = 0; i < num_vars; i++) {
2404       if (split_grf[i]) {
2405          new_virtual_grf[i] = virtual_grf_alloc(1);
2406          for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
2407             int reg = virtual_grf_alloc(1);
2408             assert(reg == new_virtual_grf[i] + j - 1);
2409          }
2410          this->virtual_grf_sizes[i] = 1;
2411       }
2412    }
2413
2414    foreach_iter(exec_list_iterator, iter, this->instructions) {
2415       fs_inst *inst = (fs_inst *)iter.get();
2416
2417       if (inst->dst.file == GRF &&
2418           split_grf[inst->dst.reg] &&
2419           inst->dst.reg_offset != 0) {
2420          inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
2421                           inst->dst.reg_offset - 1);
2422          inst->dst.reg_offset = 0;
2423       }
2424       for (int i = 0; i < 3; i++) {
2425          if (inst->src[i].file == GRF &&
2426              split_grf[inst->src[i].reg] &&
2427              inst->src[i].reg_offset != 0) {
2428             inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
2429                                 inst->src[i].reg_offset - 1);
2430             inst->src[i].reg_offset = 0;
2431          }
2432       }
2433    }
2434 }
2435
2436 void
2437 fs_visitor::calculate_live_intervals()
2438 {
2439    int num_vars = this->virtual_grf_next;
2440    int *def = talloc_array(mem_ctx, int, num_vars);
2441    int *use = talloc_array(mem_ctx, int, num_vars);
2442    int loop_depth = 0;
2443    int loop_start = 0;
2444
2445    for (int i = 0; i < num_vars; i++) {
2446       def[i] = 1 << 30;
2447       use[i] = -1;
2448    }
2449
2450    int ip = 0;
2451    foreach_iter(exec_list_iterator, iter, this->instructions) {
2452       fs_inst *inst = (fs_inst *)iter.get();
2453
2454       if (inst->opcode == BRW_OPCODE_DO) {
2455          if (loop_depth++ == 0)
2456             loop_start = ip;
2457       } else if (inst->opcode == BRW_OPCODE_WHILE) {
2458          loop_depth--;
2459
2460          if (loop_depth == 0) {
2461             /* FINISHME:
2462              *
2463              * Patches up any vars marked for use within the loop as
2464              * live until the end.  This is conservative, as there
2465              * will often be variables defined and used inside the
2466              * loop but dead at the end of the loop body.
2467              */
2468             for (int i = 0; i < num_vars; i++) {
2469                if (use[i] == loop_start) {
2470                   use[i] = ip;
2471                }
2472             }
2473          }
2474       } else {
2475          int eip = ip;
2476
2477          if (loop_depth)
2478             eip = loop_start;
2479
2480          for (unsigned int i = 0; i < 3; i++) {
2481             if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2482                use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2483             }
2484          }
2485          if (inst->dst.file == GRF && inst->dst.reg != 0) {
2486             def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2487          }
2488       }
2489
2490       ip++;
2491    }
2492
2493    talloc_free(this->virtual_grf_def);
2494    talloc_free(this->virtual_grf_use);
2495    this->virtual_grf_def = def;
2496    this->virtual_grf_use = use;
2497 }
2498
2499 /**
2500  * Attempts to move immediate constants into the immediate
2501  * constant slot of following instructions.
2502  *
2503  * Immediate constants are a bit tricky -- they have to be in the last
2504  * operand slot, you can't do abs/negate on them,
2505  */
2506
2507 bool
2508 fs_visitor::propagate_constants()
2509 {
2510    bool progress = false;
2511
2512    foreach_iter(exec_list_iterator, iter, this->instructions) {
2513       fs_inst *inst = (fs_inst *)iter.get();
2514
2515       if (inst->opcode != BRW_OPCODE_MOV ||
2516           inst->predicated ||
2517           inst->dst.file != GRF || inst->src[0].file != IMM ||
2518           inst->dst.type != inst->src[0].type)
2519          continue;
2520
2521       /* Don't bother with cases where we should have had the
2522        * operation on the constant folded in GLSL already.
2523        */
2524       if (inst->saturate)
2525          continue;
2526
2527       /* Found a move of a constant to a GRF.  Find anything else using the GRF
2528        * before it's written, and replace it with the constant if we can.
2529        */
2530       exec_list_iterator scan_iter = iter;
2531       scan_iter.next();
2532       for (; scan_iter.has_next(); scan_iter.next()) {
2533          fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2534
2535          if (scan_inst->opcode == BRW_OPCODE_DO ||
2536              scan_inst->opcode == BRW_OPCODE_WHILE ||
2537              scan_inst->opcode == BRW_OPCODE_ELSE ||
2538              scan_inst->opcode == BRW_OPCODE_ENDIF) {
2539             break;
2540          }
2541
2542          for (int i = 2; i >= 0; i--) {
2543             if (scan_inst->src[i].file != GRF ||
2544                 scan_inst->src[i].reg != inst->dst.reg ||
2545                 scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2546                continue;
2547
2548             /* Don't bother with cases where we should have had the
2549              * operation on the constant folded in GLSL already.
2550              */
2551             if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2552                continue;
2553
2554             switch (scan_inst->opcode) {
2555             case BRW_OPCODE_MOV:
2556                scan_inst->src[i] = inst->src[0];
2557                progress = true;
2558                break;
2559
2560             case BRW_OPCODE_MUL:
2561             case BRW_OPCODE_ADD:
2562                if (i == 1) {
2563                   scan_inst->src[i] = inst->src[0];
2564                   progress = true;
2565                } else if (i == 0 && scan_inst->src[1].file != IMM) {
2566                   /* Fit this constant in by commuting the operands */
2567                   scan_inst->src[0] = scan_inst->src[1];
2568                   scan_inst->src[1] = inst->src[0];
2569                }
2570                break;
2571             case BRW_OPCODE_CMP:
2572                if (i == 1) {
2573                   scan_inst->src[i] = inst->src[0];
2574                   progress = true;
2575                }
2576             }
2577          }
2578
2579          if (scan_inst->dst.file == GRF &&
2580              scan_inst->dst.reg == inst->dst.reg &&
2581              (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2582               scan_inst->opcode == FS_OPCODE_TEX)) {
2583             break;
2584          }
2585       }
2586    }
2587
2588    return progress;
2589 }
2590 /**
2591  * Must be called after calculate_live_intervales() to remove unused
2592  * writes to registers -- register allocation will fail otherwise
2593  * because something deffed but not used won't be considered to
2594  * interfere with other regs.
2595  */
2596 bool
2597 fs_visitor::dead_code_eliminate()
2598 {
2599    bool progress = false;
2600    int num_vars = this->virtual_grf_next;
2601    bool dead[num_vars];
2602
2603    for (int i = 0; i < num_vars; i++) {
2604       dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i];
2605
2606       if (dead[i]) {
2607          /* Mark off its interval so it won't interfere with anything. */
2608          this->virtual_grf_def[i] = -1;
2609          this->virtual_grf_use[i] = -1;
2610       }
2611    }
2612
2613    foreach_iter(exec_list_iterator, iter, this->instructions) {
2614       fs_inst *inst = (fs_inst *)iter.get();
2615
2616       if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2617          inst->remove();
2618          progress = true;
2619       }
2620    }
2621
2622    return progress;
2623 }
2624
2625 bool
2626 fs_visitor::register_coalesce()
2627 {
2628    bool progress = false;
2629
2630    foreach_iter(exec_list_iterator, iter, this->instructions) {
2631       fs_inst *inst = (fs_inst *)iter.get();
2632
2633       if (inst->opcode != BRW_OPCODE_MOV ||
2634           inst->predicated ||
2635           inst->saturate ||
2636           inst->dst.file != GRF || inst->src[0].file != GRF ||
2637           inst->dst.type != inst->src[0].type)
2638          continue;
2639
2640       /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
2641        * them: check for no writes to either one until the exit of the
2642        * program.
2643        */
2644       bool interfered = false;
2645       exec_list_iterator scan_iter = iter;
2646       scan_iter.next();
2647       for (; scan_iter.has_next(); scan_iter.next()) {
2648          fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2649
2650          if (scan_inst->opcode == BRW_OPCODE_DO ||
2651              scan_inst->opcode == BRW_OPCODE_WHILE ||
2652              scan_inst->opcode == BRW_OPCODE_ENDIF) {
2653             interfered = true;
2654             iter = scan_iter;
2655             break;
2656          }
2657
2658          if (scan_inst->dst.file == GRF) {
2659             if (scan_inst->dst.reg == inst->dst.reg &&
2660                 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2661                  scan_inst->opcode == FS_OPCODE_TEX)) {
2662                interfered = true;
2663                break;
2664             }
2665             if (scan_inst->dst.reg == inst->src[0].reg &&
2666                 (scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
2667                  scan_inst->opcode == FS_OPCODE_TEX)) {
2668                interfered = true;
2669                break;
2670             }
2671          }
2672       }
2673       if (interfered) {
2674          continue;
2675       }
2676
2677       /* Update live interval so we don't have to recalculate. */
2678       this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg],
2679                                                      virtual_grf_use[inst->dst.reg]);
2680
2681       /* Rewrite the later usage to point at the source of the move to
2682        * be removed.
2683        */
2684       for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
2685            scan_iter.next()) {
2686          fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2687
2688          for (int i = 0; i < 3; i++) {
2689             if (scan_inst->src[i].file == GRF &&
2690                 scan_inst->src[i].reg == inst->dst.reg &&
2691                 scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
2692                scan_inst->src[i].reg = inst->src[0].reg;
2693                scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
2694                scan_inst->src[i].abs |= inst->src[0].abs;
2695                scan_inst->src[i].negate ^= inst->src[0].negate;
2696             }
2697          }
2698       }
2699
2700       inst->remove();
2701       progress = true;
2702    }
2703
2704    return progress;
2705 }
2706
2707
2708 bool
2709 fs_visitor::compute_to_mrf()
2710 {
2711    bool progress = false;
2712    int next_ip = 0;
2713
2714    foreach_iter(exec_list_iterator, iter, this->instructions) {
2715       fs_inst *inst = (fs_inst *)iter.get();
2716
2717       int ip = next_ip;
2718       next_ip++;
2719
2720       if (inst->opcode != BRW_OPCODE_MOV ||
2721           inst->predicated ||
2722           inst->dst.file != MRF || inst->src[0].file != GRF ||
2723           inst->dst.type != inst->src[0].type ||
2724           inst->src[0].abs || inst->src[0].negate)
2725          continue;
2726
2727       /* Can't compute-to-MRF this GRF if someone else was going to
2728        * read it later.
2729        */
2730       if (this->virtual_grf_use[inst->src[0].reg] > ip)
2731          continue;
2732
2733       /* Found a move of a GRF to a MRF.  Let's see if we can go
2734        * rewrite the thing that made this GRF to write into the MRF.
2735        */
2736       bool found = false;
2737       fs_inst *scan_inst;
2738       for (scan_inst = (fs_inst *)inst->prev;
2739            scan_inst->prev != NULL;
2740            scan_inst = (fs_inst *)scan_inst->prev) {
2741          /* We don't handle flow control here.  Most computation of
2742           * values that end up in MRFs are shortly before the MRF
2743           * write anyway.
2744           */
2745          if (scan_inst->opcode == BRW_OPCODE_DO ||
2746              scan_inst->opcode == BRW_OPCODE_WHILE ||
2747              scan_inst->opcode == BRW_OPCODE_ENDIF) {
2748             break;
2749          }
2750
2751          /* You can't read from an MRF, so if someone else reads our
2752           * MRF's source GRF that we wanted to rewrite, that stops us.
2753           */
2754          bool interfered = false;
2755          for (int i = 0; i < 3; i++) {
2756             if (scan_inst->src[i].file == GRF &&
2757                 scan_inst->src[i].reg == inst->src[0].reg &&
2758                 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
2759                interfered = true;
2760             }
2761          }
2762          if (interfered)
2763             break;
2764
2765          if (scan_inst->dst.file == MRF &&
2766              scan_inst->dst.hw_reg == inst->dst.hw_reg) {
2767             /* Somebody else wrote our MRF here, so we can't can't
2768              * compute-to-MRF before that.
2769              */
2770             break;
2771          }
2772
2773          if (scan_inst->mlen > 0) {
2774             /* Found a SEND instruction, which will do some amount of
2775              * implied write that may overwrite our MRF that we were
2776              * hoping to compute-to-MRF somewhere above it.  Nothing
2777              * we have implied-writes more than 2 MRFs from base_mrf,
2778              * though.
2779              */
2780             int implied_write_len = MIN2(scan_inst->mlen, 2);
2781             if (inst->dst.hw_reg >= scan_inst->base_mrf &&
2782                 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) {
2783                break;
2784             }
2785          }
2786
2787          if (scan_inst->dst.file == GRF &&
2788              scan_inst->dst.reg == inst->src[0].reg) {
2789             /* Found the last thing to write our reg we want to turn
2790              * into a compute-to-MRF.
2791              */
2792
2793             if (scan_inst->opcode == FS_OPCODE_TEX) {
2794                /* texturing writes several continuous regs, so we can't
2795                 * compute-to-mrf that.
2796                 */
2797                break;
2798             }
2799
2800             /* If it's predicated, it (probably) didn't populate all
2801              * the channels.
2802              */
2803             if (scan_inst->predicated)
2804                break;
2805
2806             /* SEND instructions can't have MRF as a destination. */
2807             if (scan_inst->mlen)
2808                break;
2809
2810             if (intel->gen >= 6) {
2811                /* gen6 math instructions must have the destination be
2812                 * GRF, so no compute-to-MRF for them.
2813                 */
2814                if (scan_inst->opcode == FS_OPCODE_RCP ||
2815                    scan_inst->opcode == FS_OPCODE_RSQ ||
2816                    scan_inst->opcode == FS_OPCODE_SQRT ||
2817                    scan_inst->opcode == FS_OPCODE_EXP2 ||
2818                    scan_inst->opcode == FS_OPCODE_LOG2 ||
2819                    scan_inst->opcode == FS_OPCODE_SIN ||
2820                    scan_inst->opcode == FS_OPCODE_COS ||
2821                    scan_inst->opcode == FS_OPCODE_POW) {
2822                   break;
2823                }
2824             }
2825
2826             if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
2827                /* Found the creator of our MRF's source value. */
2828                found = true;
2829                break;
2830             }
2831          }
2832       }
2833       if (found) {
2834          scan_inst->dst.file = MRF;
2835          scan_inst->dst.hw_reg = inst->dst.hw_reg;
2836          scan_inst->saturate |= inst->saturate;
2837          inst->remove();
2838          progress = true;
2839       }
2840    }
2841
2842    return progress;
2843 }
2844
2845 bool
2846 fs_visitor::virtual_grf_interferes(int a, int b)
2847 {
2848    int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2849    int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2850
2851    /* For dead code, just check if the def interferes with the other range. */
2852    if (this->virtual_grf_use[a] == -1) {
2853       return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
2854               this->virtual_grf_def[a] < this->virtual_grf_use[b]);
2855    }
2856    if (this->virtual_grf_use[b] == -1) {
2857       return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
2858               this->virtual_grf_def[b] < this->virtual_grf_use[a]);
2859    }
2860
2861    return start < end;
2862 }
2863
2864 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2865 {
2866    struct brw_reg brw_reg;
2867
2868    switch (reg->file) {
2869    case GRF:
2870    case ARF:
2871    case MRF:
2872       brw_reg = brw_vec8_reg(reg->file,
2873                             reg->hw_reg, 0);
2874       brw_reg = retype(brw_reg, reg->type);
2875       break;
2876    case IMM:
2877       switch (reg->type) {
2878       case BRW_REGISTER_TYPE_F:
2879          brw_reg = brw_imm_f(reg->imm.f);
2880          break;
2881       case BRW_REGISTER_TYPE_D:
2882          brw_reg = brw_imm_d(reg->imm.i);
2883          break;
2884       case BRW_REGISTER_TYPE_UD:
2885          brw_reg = brw_imm_ud(reg->imm.u);
2886          break;
2887       default:
2888          assert(!"not reached");
2889          break;
2890       }
2891       break;
2892    case FIXED_HW_REG:
2893       brw_reg = reg->fixed_hw_reg;
2894       break;
2895    case BAD_FILE:
2896       /* Probably unused. */
2897       brw_reg = brw_null_reg();
2898       break;
2899    case UNIFORM:
2900       assert(!"not reached");
2901       brw_reg = brw_null_reg();
2902       break;
2903    }
2904    if (reg->abs)
2905       brw_reg = brw_abs(brw_reg);
2906    if (reg->negate)
2907       brw_reg = negate(brw_reg);
2908
2909    return brw_reg;
2910 }
2911
2912 void
2913 fs_visitor::generate_code()
2914 {
2915    unsigned int annotation_len = 0;
2916    int last_native_inst = 0;
2917    struct brw_instruction *if_stack[16], *loop_stack[16];
2918    int if_stack_depth = 0, loop_stack_depth = 0;
2919    int if_depth_in_loop[16];
2920
2921    if_depth_in_loop[loop_stack_depth] = 0;
2922
2923    memset(&if_stack, 0, sizeof(if_stack));
2924    foreach_iter(exec_list_iterator, iter, this->instructions) {
2925       fs_inst *inst = (fs_inst *)iter.get();
2926       struct brw_reg src[3], dst;
2927
2928       for (unsigned int i = 0; i < 3; i++) {
2929          src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2930       }
2931       dst = brw_reg_from_fs_reg(&inst->dst);
2932
2933       brw_set_conditionalmod(p, inst->conditional_mod);
2934       brw_set_predicate_control(p, inst->predicated);
2935
2936       switch (inst->opcode) {
2937       case BRW_OPCODE_MOV:
2938          brw_MOV(p, dst, src[0]);
2939          break;
2940       case BRW_OPCODE_ADD:
2941          brw_ADD(p, dst, src[0], src[1]);
2942          break;
2943       case BRW_OPCODE_MUL:
2944          brw_MUL(p, dst, src[0], src[1]);
2945          break;
2946
2947       case BRW_OPCODE_FRC:
2948          brw_FRC(p, dst, src[0]);
2949          break;
2950       case BRW_OPCODE_RNDD:
2951          brw_RNDD(p, dst, src[0]);
2952          break;
2953       case BRW_OPCODE_RNDE:
2954          brw_RNDE(p, dst, src[0]);
2955          break;
2956       case BRW_OPCODE_RNDZ:
2957          brw_RNDZ(p, dst, src[0]);
2958          break;
2959
2960       case BRW_OPCODE_AND:
2961          brw_AND(p, dst, src[0], src[1]);
2962          break;
2963       case BRW_OPCODE_OR:
2964          brw_OR(p, dst, src[0], src[1]);
2965          break;
2966       case BRW_OPCODE_XOR:
2967          brw_XOR(p, dst, src[0], src[1]);
2968          break;
2969       case BRW_OPCODE_NOT:
2970          brw_NOT(p, dst, src[0]);
2971          break;
2972       case BRW_OPCODE_ASR:
2973          brw_ASR(p, dst, src[0], src[1]);
2974          break;
2975       case BRW_OPCODE_SHR:
2976          brw_SHR(p, dst, src[0], src[1]);
2977          break;
2978       case BRW_OPCODE_SHL:
2979          brw_SHL(p, dst, src[0], src[1]);
2980          break;
2981
2982       case BRW_OPCODE_CMP:
2983          brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2984          break;
2985       case BRW_OPCODE_SEL:
2986          brw_SEL(p, dst, src[0], src[1]);
2987          break;
2988
2989       case BRW_OPCODE_IF:
2990          assert(if_stack_depth < 16);
2991          if (inst->src[0].file != BAD_FILE) {
2992             assert(intel->gen >= 6);
2993             if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]);
2994          } else {
2995             if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2996          }
2997          if_depth_in_loop[loop_stack_depth]++;
2998          if_stack_depth++;
2999          break;
3000
3001       case BRW_OPCODE_ELSE:
3002          if_stack[if_stack_depth - 1] =
3003             brw_ELSE(p, if_stack[if_stack_depth - 1]);
3004          break;
3005       case BRW_OPCODE_ENDIF:
3006          if_stack_depth--;
3007          brw_ENDIF(p , if_stack[if_stack_depth]);
3008          if_depth_in_loop[loop_stack_depth]--;
3009          break;
3010
3011       case BRW_OPCODE_DO:
3012          loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
3013          if_depth_in_loop[loop_stack_depth] = 0;
3014          break;
3015
3016       case BRW_OPCODE_BREAK:
3017          brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
3018          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
3019          break;
3020       case BRW_OPCODE_CONTINUE:
3021          brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
3022          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
3023          break;
3024
3025       case BRW_OPCODE_WHILE: {
3026          struct brw_instruction *inst0, *inst1;
3027          GLuint br = 1;
3028
3029          if (intel->gen >= 5)
3030             br = 2;
3031
3032          assert(loop_stack_depth > 0);
3033          loop_stack_depth--;
3034          inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
3035          /* patch all the BREAK/CONT instructions from last BGNLOOP */
3036          while (inst0 > loop_stack[loop_stack_depth]) {
3037             inst0--;
3038             if (inst0->header.opcode == BRW_OPCODE_BREAK &&
3039                 inst0->bits3.if_else.jump_count == 0) {
3040                inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
3041             }
3042             else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
3043                      inst0->bits3.if_else.jump_count == 0) {
3044                inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
3045             }
3046          }
3047       }
3048          break;
3049
3050       case FS_OPCODE_RCP:
3051       case FS_OPCODE_RSQ:
3052       case FS_OPCODE_SQRT:
3053       case FS_OPCODE_EXP2:
3054       case FS_OPCODE_LOG2:
3055       case FS_OPCODE_POW:
3056       case FS_OPCODE_SIN:
3057       case FS_OPCODE_COS:
3058          generate_math(inst, dst, src);
3059          break;
3060       case FS_OPCODE_LINTERP:
3061          generate_linterp(inst, dst, src);
3062          break;
3063       case FS_OPCODE_TEX:
3064       case FS_OPCODE_TXB:
3065       case FS_OPCODE_TXL:
3066          generate_tex(inst, dst);
3067          break;
3068       case FS_OPCODE_DISCARD_NOT:
3069          generate_discard_not(inst, dst);
3070          break;
3071       case FS_OPCODE_DISCARD_AND:
3072          generate_discard_and(inst, src[0]);
3073          break;
3074       case FS_OPCODE_DDX:
3075          generate_ddx(inst, dst, src[0]);
3076          break;
3077       case FS_OPCODE_DDY:
3078          generate_ddy(inst, dst, src[0]);
3079          break;
3080
3081       case FS_OPCODE_SPILL:
3082          generate_spill(inst, src[0]);
3083          break;
3084
3085       case FS_OPCODE_UNSPILL:
3086          generate_unspill(inst, dst);
3087          break;
3088
3089       case FS_OPCODE_FB_WRITE:
3090          generate_fb_write(inst);
3091          break;
3092       default:
3093          if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
3094             _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
3095                           brw_opcodes[inst->opcode].name);
3096          } else {
3097             _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
3098          }
3099          this->fail = true;
3100       }
3101
3102       if (annotation_len < p->nr_insn) {
3103          annotation_len *= 2;
3104          if (annotation_len < 16)
3105             annotation_len = 16;
3106
3107          this->annotation_string = talloc_realloc(this->mem_ctx,
3108                                                   annotation_string,
3109                                                   const char *,
3110                                                   annotation_len);
3111          this->annotation_ir = talloc_realloc(this->mem_ctx,
3112                                               annotation_ir,
3113                                               ir_instruction *,
3114                                               annotation_len);
3115       }
3116
3117       for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
3118          this->annotation_string[i] = inst->annotation;
3119          this->annotation_ir[i] = inst->ir;
3120       }
3121       last_native_inst = p->nr_insn;
3122    }
3123 }
3124
3125 GLboolean
3126 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
3127 {
3128    struct brw_compile *p = &c->func;
3129    struct intel_context *intel = &brw->intel;
3130    struct gl_context *ctx = &intel->ctx;
3131    struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
3132
3133    if (!prog)
3134       return GL_FALSE;
3135
3136    struct brw_shader *shader =
3137      (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
3138    if (!shader)
3139       return GL_FALSE;
3140
3141    /* We always use 8-wide mode, at least for now.  For one, flow
3142     * control only works in 8-wide.  Also, when we're fragment shader
3143     * bound, we're almost always under register pressure as well, so
3144     * 8-wide would save us from the performance cliff of spilling
3145     * regs.
3146     */
3147    c->dispatch_width = 8;
3148
3149    if (INTEL_DEBUG & DEBUG_WM) {
3150       printf("GLSL IR for native fragment shader %d:\n", prog->Name);
3151       _mesa_print_ir(shader->ir, NULL);
3152       printf("\n");
3153    }
3154
3155    /* Now the main event: Visit the shader IR and generate our FS IR for it.
3156     */
3157    fs_visitor v(c, shader);
3158
3159    if (0) {
3160       v.emit_dummy_fs();
3161    } else {
3162       v.calculate_urb_setup();
3163       if (intel->gen < 6)
3164          v.emit_interpolation_setup_gen4();
3165       else
3166          v.emit_interpolation_setup_gen6();
3167
3168       /* Generate FS IR for main().  (the visitor only descends into
3169        * functions called "main").
3170        */
3171       foreach_iter(exec_list_iterator, iter, *shader->ir) {
3172          ir_instruction *ir = (ir_instruction *)iter.get();
3173          v.base_ir = ir;
3174          ir->accept(&v);
3175       }
3176
3177       v.emit_fb_writes();
3178
3179       v.split_virtual_grfs();
3180
3181       v.assign_curb_setup();
3182       v.assign_urb_setup();
3183
3184       bool progress;
3185       do {
3186          progress = false;
3187          v.calculate_live_intervals();
3188          progress = v.propagate_constants() || progress;
3189          progress = v.register_coalesce() || progress;
3190          progress = v.compute_to_mrf() || progress;
3191          progress = v.dead_code_eliminate() || progress;
3192       } while (progress);
3193
3194       if (0) {
3195          /* Debug of register spilling: Go spill everything. */
3196          int virtual_grf_count = v.virtual_grf_next;
3197          for (int i = 1; i < virtual_grf_count; i++) {
3198             v.spill_reg(i);
3199          }
3200          v.calculate_live_intervals();
3201       }
3202
3203       if (0)
3204          v.assign_regs_trivial();
3205       else {
3206          while (!v.assign_regs()) {
3207             if (v.fail)
3208                break;
3209
3210             v.calculate_live_intervals();
3211          }
3212       }
3213    }
3214
3215    if (!v.fail)
3216       v.generate_code();
3217
3218    assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3219
3220    if (v.fail)
3221       return GL_FALSE;
3222
3223    if (INTEL_DEBUG & DEBUG_WM) {
3224       const char *last_annotation_string = NULL;
3225       ir_instruction *last_annotation_ir = NULL;
3226
3227       printf("Native code for fragment shader %d:\n", prog->Name);
3228       for (unsigned int i = 0; i < p->nr_insn; i++) {
3229          if (last_annotation_ir != v.annotation_ir[i]) {
3230             last_annotation_ir = v.annotation_ir[i];
3231             if (last_annotation_ir) {
3232                printf("   ");
3233                last_annotation_ir->print();
3234                printf("\n");
3235             }
3236          }
3237          if (last_annotation_string != v.annotation_string[i]) {
3238             last_annotation_string = v.annotation_string[i];
3239             if (last_annotation_string)
3240                printf("   %s\n", last_annotation_string);
3241          }
3242          if (0) {
3243             printf("0x%08x 0x%08x 0x%08x 0x%08x ",
3244                    ((uint32_t *)&p->store[i])[3],
3245                    ((uint32_t *)&p->store[i])[2],
3246                    ((uint32_t *)&p->store[i])[1],
3247                    ((uint32_t *)&p->store[i])[0]);
3248          }
3249          brw_disasm(stdout, &p->store[i], intel->gen);
3250       }
3251       printf("\n");
3252    }
3253
3254    c->prog_data.total_grf = v.grf_used;
3255
3256    return GL_TRUE;
3257 }