src/mesa/drivers/dri/i965/brw_fs.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 extern "C" {
  29
  30 #include <sys/types.h>
  31
  32 #include "main/macros.h"
  33 #include "main/shaderobj.h"
  34 #include "main/uniforms.h"
  35 #include "program/prog_parameter.h"
  36 #include "program/prog_print.h"
  37 #include "program/prog_optimize.h"
  38 #include "program/register_allocate.h"
  39 #include "program/sampler.h"
  40 #include "program/hash_table.h"
  41 #include "brw_context.h"
  42 #include "brw_eu.h"
  43 #include "brw_wm.h"
  44 #include "talloc.h"
  45 }
  46 #include "brw_fs.h"
  47 #include "../glsl/glsl_types.h"
  48 #include "../glsl/ir_optimization.h"
  49 #include "../glsl/ir_print_visitor.h"
  50
  51 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
  52
  53 struct gl_shader *
  54 brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
  55 {
  56    struct brw_shader *shader;
  57
  58    shader = talloc_zero(NULL, struct brw_shader);
  59    if (shader) {
  60       shader->base.Type = type;
  61       shader->base.Name = name;
  62       _mesa_init_shader(ctx, &shader->base);
  63    }
  64
  65    return &shader->base;
  66 }
  67
  68 struct gl_shader_program *
  69 brw_new_shader_program(struct gl_context *ctx, GLuint name)
  70 {
  71    struct brw_shader_program *prog;
  72    prog = talloc_zero(NULL, struct brw_shader_program);
  73    if (prog) {
  74       prog->base.Name = name;
  75       _mesa_init_shader_program(ctx, &prog->base);
  76    }
  77    return &prog->base;
  78 }
  79
  80 GLboolean
  81 brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
  82 {
  83    if (!_mesa_ir_compile_shader(ctx, shader))
  84       return GL_FALSE;
  85
  86    return GL_TRUE;
  87 }
  88
  89 GLboolean
  90 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  91 {
  92    struct brw_shader *shader =
  93       (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  94    if (shader != NULL) {
  95       void *mem_ctx = talloc_new(NULL);
  96       bool progress;
  97
  98       if (shader->ir)
  99          talloc_free(shader->ir);
 100       shader->ir = new(shader) exec_list;
 101       clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 102
 103       do_mat_op_to_vec(shader->ir);
 104       do_mod_to_fract(shader->ir);
 105       do_div_to_mul_rcp(shader->ir);
 106       do_sub_to_add_neg(shader->ir);
 107       do_explog_to_explog2(shader->ir);
 108       do_lower_texture_projection(shader->ir);
 109       brw_do_cubemap_normalize(shader->ir);
 110
 111       do {
 112          progress = false;
 113
 114          brw_do_channel_expressions(shader->ir);
 115          brw_do_vector_splitting(shader->ir);
 116
 117          progress = do_lower_jumps(shader->ir, true, true,
 118                                    true, /* main return */
 119                                    false, /* continue */
 120                                    false /* loops */
 121                                    ) || progress;
 122
 123          progress = do_common_optimization(shader->ir, true, 32) || progress;
 124
 125          progress = lower_noise(shader->ir) || progress;
 126          progress =
 127             lower_variable_index_to_cond_assign(shader->ir,
 128                                                 GL_TRUE, /* input */
 129                                                 GL_TRUE, /* output */
 130                                                 GL_TRUE, /* temp */
 131                                                 GL_TRUE /* uniform */
 132                                                 ) || progress;
 133       } while (progress);
 134
 135       validate_ir_tree(shader->ir);
 136
 137       reparent_ir(shader->ir, shader->ir);
 138       talloc_free(mem_ctx);
 139    }
 140
 141    if (!_mesa_ir_link_shader(ctx, prog))
 142       return GL_FALSE;
 143
 144    return GL_TRUE;
 145 }
 146
 147 static int
 148 type_size(const struct glsl_type *type)
 149 {
 150    unsigned int size, i;
 151
 152    switch (type->base_type) {
 153    case GLSL_TYPE_UINT:
 154    case GLSL_TYPE_INT:
 155    case GLSL_TYPE_FLOAT:
 156    case GLSL_TYPE_BOOL:
 157       return type->components();
 158    case GLSL_TYPE_ARRAY:
 159       return type_size(type->fields.array) * type->length;
 160    case GLSL_TYPE_STRUCT:
 161       size = 0;
 162       for (i = 0; i < type->length; i++) {
 163          size += type_size(type->fields.structure[i].type);
 164       }
 165       return size;
 166    case GLSL_TYPE_SAMPLER:
 167       /* Samplers take up no register space, since they're baked in at
 168        * link time.
 169        */
 170       return 0;
 171    default:
 172       assert(!"not reached");
 173       return 0;
 174    }
 175 }
 176
 177 int
 178 fs_visitor::virtual_grf_alloc(int size)
 179 {
 180    if (virtual_grf_array_size <= virtual_grf_next) {
 181       if (virtual_grf_array_size == 0)
 182          virtual_grf_array_size = 16;
 183       else
 184          virtual_grf_array_size *= 2;
 185       virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
 186                                          int, virtual_grf_array_size);
 187
 188       /* This slot is always unused. */
 189       virtual_grf_sizes[0] = 0;
 190    }
 191    virtual_grf_sizes[virtual_grf_next] = size;
 192    return virtual_grf_next++;
 193 }
 194
 195 /** Fixed HW reg constructor. */
 196 fs_reg::fs_reg(enum register_file file, int hw_reg)
 197 {
 198    init();
 199    this->file = file;
 200    this->hw_reg = hw_reg;
 201    this->type = BRW_REGISTER_TYPE_F;
 202 }
 203
 204 /** Fixed HW reg constructor. */
 205 fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
 206 {
 207    init();
 208    this->file = file;
 209    this->hw_reg = hw_reg;
 210    this->type = type;
 211 }
 212
 213 int
 214 brw_type_for_base_type(const struct glsl_type *type)
 215 {
 216    switch (type->base_type) {
 217    case GLSL_TYPE_FLOAT:
 218       return BRW_REGISTER_TYPE_F;
 219    case GLSL_TYPE_INT:
 220    case GLSL_TYPE_BOOL:
 221       return BRW_REGISTER_TYPE_D;
 222    case GLSL_TYPE_UINT:
 223       return BRW_REGISTER_TYPE_UD;
 224    case GLSL_TYPE_ARRAY:
 225    case GLSL_TYPE_STRUCT:
 226    case GLSL_TYPE_SAMPLER:
 227       /* These should be overridden with the type of the member when
 228        * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
 229        * way to trip up if we don't.
 230        */
 231       return BRW_REGISTER_TYPE_UD;
 232    default:
 233       assert(!"not reached");
 234       return BRW_REGISTER_TYPE_F;
 235    }
 236 }
 237
 238 /** Automatic reg constructor. */
 239 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
 240 {
 241    init();
 242
 243    this->file = GRF;
 244    this->reg = v->virtual_grf_alloc(type_size(type));
 245    this->reg_offset = 0;
 246    this->type = brw_type_for_base_type(type);
 247 }
 248
 249 fs_reg *
 250 fs_visitor::variable_storage(ir_variable *var)
 251 {
 252    return (fs_reg *)hash_table_find(this->variable_ht, var);
 253 }
 254
 255 /* Our support for uniforms is piggy-backed on the struct
 256  * gl_fragment_program, because that's where the values actually
 257  * get stored, rather than in some global gl_shader_program uniform
 258  * store.
 259  */
 260 int
 261 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 262 {
 263    unsigned int offset = 0;
 264    float *vec_values;
 265
 266    if (type->is_matrix()) {
 267       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 268                                                         type->vector_elements,
 269                                                         1);
 270
 271       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 272          offset += setup_uniform_values(loc + offset, column);
 273       }
 274
 275       return offset;
 276    }
 277
 278    switch (type->base_type) {
 279    case GLSL_TYPE_FLOAT:
 280    case GLSL_TYPE_UINT:
 281    case GLSL_TYPE_INT:
 282    case GLSL_TYPE_BOOL:
 283       vec_values = fp->Base.Parameters->ParameterValues[loc];
 284       for (unsigned int i = 0; i < type->vector_elements; i++) {
 285          unsigned int param = c->prog_data.nr_params++;
 286
 287          assert(param < ARRAY_SIZE(c->prog_data.param));
 288
 289          switch (type->base_type) {
 290          case GLSL_TYPE_FLOAT:
 291             c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
 292             break;
 293          case GLSL_TYPE_UINT:
 294             c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
 295             break;
 296          case GLSL_TYPE_INT:
 297             c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
 298             break;
 299          case GLSL_TYPE_BOOL:
 300             c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
 301             break;
 302          }
 303
 304          c->prog_data.param[param] = &vec_values[i];
 305       }
 306       return 1;
 307
 308    case GLSL_TYPE_STRUCT:
 309       for (unsigned int i = 0; i < type->length; i++) {
 310          offset += setup_uniform_values(loc + offset,
 311                                         type->fields.structure[i].type);
 312       }
 313       return offset;
 314
 315    case GLSL_TYPE_ARRAY:
 316       for (unsigned int i = 0; i < type->length; i++) {
 317          offset += setup_uniform_values(loc + offset, type->fields.array);
 318       }
 319       return offset;
 320
 321    case GLSL_TYPE_SAMPLER:
 322       /* The sampler takes up a slot, but we don't use any values from it. */
 323       return 1;
 324
 325    default:
 326       assert(!"not reached");
 327       return 0;
 328    }
 329 }
 330
 331
 332 /* Our support for builtin uniforms is even scarier than non-builtin.
 333  * It sits on top of the PROG_STATE_VAR parameters that are
 334  * automatically updated from GL context state.
 335  */
 336 void
 337 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
 338 {
 339    const struct gl_builtin_uniform_desc *statevar = NULL;
 340
 341    for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
 342       statevar = &_mesa_builtin_uniform_desc[i];
 343       if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
 344          break;
 345    }
 346
 347    if (!statevar->name) {
 348       this->fail = true;
 349       printf("Failed to find builtin uniform `%s'\n", ir->name);
 350       return;
 351    }
 352
 353    int array_count;
 354    if (ir->type->is_array()) {
 355       array_count = ir->type->length;
 356    } else {
 357       array_count = 1;
 358    }
 359
 360    for (int a = 0; a < array_count; a++) {
 361       for (unsigned int i = 0; i < statevar->num_elements; i++) {
 362          struct gl_builtin_uniform_element *element = &statevar->elements[i];
 363          int tokens[STATE_LENGTH];
 364
 365          memcpy(tokens, element->tokens, sizeof(element->tokens));
 366          if (ir->type->is_array()) {
 367             tokens[1] = a;
 368          }
 369
 370          /* This state reference has already been setup by ir_to_mesa,
 371           * but we'll get the same index back here.
 372           */
 373          int index = _mesa_add_state_reference(this->fp->Base.Parameters,
 374                                                (gl_state_index *)tokens);
 375          float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
 376
 377          /* Add each of the unique swizzles of the element as a
 378           * parameter.  This'll end up matching the expected layout of
 379           * the array/matrix/structure we're trying to fill in.
 380           */
 381          int last_swiz = -1;
 382          for (unsigned int i = 0; i < 4; i++) {
 383             int swiz = GET_SWZ(element->swizzle, i);
 384             if (swiz == last_swiz)
 385                break;
 386             last_swiz = swiz;
 387
 388             c->prog_data.param_convert[c->prog_data.nr_params] =
 389                PARAM_NO_CONVERT;
 390             c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
 391          }
 392       }
 393    }
 394 }
 395
 396 fs_reg *
 397 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
 398 {
 399    fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
 400    fs_reg wpos = *reg;
 401    fs_reg neg_y = this->pixel_y;
 402    neg_y.negate = true;
 403
 404    /* gl_FragCoord.x */
 405    if (ir->pixel_center_integer) {
 406       emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
 407    } else {
 408       emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
 409    }
 410    wpos.reg_offset++;
 411
 412    /* gl_FragCoord.y */
 413    if (ir->origin_upper_left && ir->pixel_center_integer) {
 414       emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
 415    } else {
 416       fs_reg pixel_y = this->pixel_y;
 417       float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
 418
 419       if (!ir->origin_upper_left) {
 420          pixel_y.negate = true;
 421          offset += c->key.drawable_height - 1.0;
 422       }
 423
 424       emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
 425    }
 426    wpos.reg_offset++;
 427
 428    /* gl_FragCoord.z */
 429    emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
 430                 interp_reg(FRAG_ATTRIB_WPOS, 2)));
 431    wpos.reg_offset++;
 432
 433    /* gl_FragCoord.w: Already set up in emit_interpolation */
 434    emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
 435
 436    return reg;
 437 }
 438
 439 fs_reg *
 440 fs_visitor::emit_general_interpolation(ir_variable *ir)
 441 {
 442    fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
 443    /* Interpolation is always in floating point regs. */
 444    reg->type = BRW_REGISTER_TYPE_F;
 445    fs_reg attr = *reg;
 446
 447    unsigned int array_elements;
 448    const glsl_type *type;
 449
 450    if (ir->type->is_array()) {
 451       array_elements = ir->type->length;
 452       if (array_elements == 0) {
 453          this->fail = true;
 454       }
 455       type = ir->type->fields.array;
 456    } else {
 457       array_elements = 1;
 458       type = ir->type;
 459    }
 460
 461    int location = ir->location;
 462    for (unsigned int i = 0; i < array_elements; i++) {
 463       for (unsigned int j = 0; j < type->matrix_columns; j++) {
 464          if (urb_setup[location] == -1) {
 465             /* If there's no incoming setup data for this slot, don't
 466              * emit interpolation for it.
 467              */
 468             attr.reg_offset += type->vector_elements;
 469             location++;
 470             continue;
 471          }
 472
 473          for (unsigned int c = 0; c < type->vector_elements; c++) {
 474             struct brw_reg interp = interp_reg(location, c);
 475             emit(fs_inst(FS_OPCODE_LINTERP,
 476                          attr,
 477                          this->delta_x,
 478                          this->delta_y,
 479                          fs_reg(interp)));
 480             attr.reg_offset++;
 481          }
 482
 483          if (intel->gen < 6) {
 484             attr.reg_offset -= type->vector_elements;
 485             for (unsigned int c = 0; c < type->vector_elements; c++) {
 486                emit(fs_inst(BRW_OPCODE_MUL,
 487                             attr,
 488                             attr,
 489                             this->pixel_w));
 490                attr.reg_offset++;
 491             }
 492          }
 493          location++;
 494       }
 495    }
 496
 497    return reg;
 498 }
 499
 500 fs_reg *
 501 fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
 502 {
 503    fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
 504
 505    /* The frontfacing comes in as a bit in the thread payload. */
 506    if (intel->gen >= 6) {
 507       emit(fs_inst(BRW_OPCODE_ASR,
 508                    *reg,
 509                    fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
 510                    fs_reg(15)));
 511       emit(fs_inst(BRW_OPCODE_NOT,
 512                    *reg,
 513                    *reg));
 514       emit(fs_inst(BRW_OPCODE_AND,
 515                    *reg,
 516                    *reg,
 517                    fs_reg(1)));
 518    } else {
 519       struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 520       /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 521        * us front face
 522        */
 523       fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
 524                                    *reg,
 525                                    fs_reg(r1_6ud),
 526                                    fs_reg(1u << 31)));
 527       inst->conditional_mod = BRW_CONDITIONAL_L;
 528       emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
 529    }
 530
 531    return reg;
 532 }
 533
 534 fs_inst *
 535 fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
 536 {
 537    switch (opcode) {
 538    case FS_OPCODE_RCP:
 539    case FS_OPCODE_RSQ:
 540    case FS_OPCODE_SQRT:
 541    case FS_OPCODE_EXP2:
 542    case FS_OPCODE_LOG2:
 543    case FS_OPCODE_SIN:
 544    case FS_OPCODE_COS:
 545       break;
 546    default:
 547       assert(!"not reached: bad math opcode");
 548       return NULL;
 549    }
 550
 551    /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
 552     * might be able to do better by doing execsize = 1 math and then
 553     * expanding that result out, but we would need to be careful with
 554     * masking.
 555     */
 556    if (intel->gen >= 6 && src.file == UNIFORM) {
 557       fs_reg expanded = fs_reg(this, glsl_type::float_type);
 558       emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
 559       src = expanded;
 560    }
 561
 562    fs_inst *inst = emit(fs_inst(opcode, dst, src));
 563
 564    if (intel->gen < 6) {
 565       inst->base_mrf = 2;
 566       inst->mlen = 1;
 567    }
 568
 569    return inst;
 570 }
 571
 572 fs_inst *
 573 fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 574 {
 575    int base_mrf = 2;
 576    fs_inst *inst;
 577
 578    assert(opcode == FS_OPCODE_POW);
 579
 580    if (intel->gen >= 6) {
 581       /* Can't do hstride == 0 args to gen6 math, so expand it out. */
 582       if (src0.file == UNIFORM) {
 583          fs_reg expanded = fs_reg(this, glsl_type::float_type);
 584          emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
 585          src0 = expanded;
 586       }
 587
 588       if (src1.file == UNIFORM) {
 589          fs_reg expanded = fs_reg(this, glsl_type::float_type);
 590          emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
 591          src1 = expanded;
 592       }
 593
 594       inst = emit(fs_inst(opcode, dst, src0, src1));
 595    } else {
 596       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
 597       inst = emit(fs_inst(opcode, dst, src0, reg_null_f));
 598
 599       inst->base_mrf = base_mrf;
 600       inst->mlen = 2;
 601    }
 602    return inst;
 603 }
 604
 605 void
 606 fs_visitor::visit(ir_variable *ir)
 607 {
 608    fs_reg *reg = NULL;
 609
 610    if (variable_storage(ir))
 611       return;
 612
 613    if (strcmp(ir->name, "gl_FragColor") == 0) {
 614       this->frag_color = ir;
 615    } else if (strcmp(ir->name, "gl_FragData") == 0) {
 616       this->frag_data = ir;
 617    } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
 618       this->frag_depth = ir;
 619    }
 620
 621    if (ir->mode == ir_var_in) {
 622       if (!strcmp(ir->name, "gl_FragCoord")) {
 623          reg = emit_fragcoord_interpolation(ir);
 624       } else if (!strcmp(ir->name, "gl_FrontFacing")) {
 625          reg = emit_frontfacing_interpolation(ir);
 626       } else {
 627          reg = emit_general_interpolation(ir);
 628       }
 629       assert(reg);
 630       hash_table_insert(this->variable_ht, reg, ir);
 631       return;
 632    }
 633
 634    if (ir->mode == ir_var_uniform) {
 635       int param_index = c->prog_data.nr_params;
 636
 637       if (!strncmp(ir->name, "gl_", 3)) {
 638          setup_builtin_uniform_values(ir);
 639       } else {
 640          setup_uniform_values(ir->location, ir->type);
 641       }
 642
 643       reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
 644       reg->type = brw_type_for_base_type(ir->type);
 645    }
 646
 647    if (!reg)
 648       reg = new(this->mem_ctx) fs_reg(this, ir->type);
 649
 650    hash_table_insert(this->variable_ht, reg, ir);
 651 }
 652
 653 void
 654 fs_visitor::visit(ir_dereference_variable *ir)
 655 {
 656    fs_reg *reg = variable_storage(ir->var);
 657    this->result = *reg;
 658 }
 659
 660 void
 661 fs_visitor::visit(ir_dereference_record *ir)
 662 {
 663    const glsl_type *struct_type = ir->record->type;
 664
 665    ir->record->accept(this);
 666
 667    unsigned int offset = 0;
 668    for (unsigned int i = 0; i < struct_type->length; i++) {
 669       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
 670          break;
 671       offset += type_size(struct_type->fields.structure[i].type);
 672    }
 673    this->result.reg_offset += offset;
 674    this->result.type = brw_type_for_base_type(ir->type);
 675 }
 676
 677 void
 678 fs_visitor::visit(ir_dereference_array *ir)
 679 {
 680    ir_constant *index;
 681    int element_size;
 682
 683    ir->array->accept(this);
 684    index = ir->array_index->as_constant();
 685
 686    element_size = type_size(ir->type);
 687    this->result.type = brw_type_for_base_type(ir->type);
 688
 689    if (index) {
 690       assert(this->result.file == UNIFORM ||
 691              (this->result.file == GRF &&
 692               this->result.reg != 0));
 693       this->result.reg_offset += index->value.i[0] * element_size;
 694    } else {
 695       assert(!"FINISHME: non-constant array element");
 696    }
 697 }
 698
 699 void
 700 fs_visitor::visit(ir_expression *ir)
 701 {
 702    unsigned int operand;
 703    fs_reg op[2], temp;
 704    fs_inst *inst;
 705
 706    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 707       ir->operands[operand]->accept(this);
 708       if (this->result.file == BAD_FILE) {
 709          ir_print_visitor v;
 710          printf("Failed to get tree for expression operand:\n");
 711          ir->operands[operand]->accept(&v);
 712          this->fail = true;
 713       }
 714       op[operand] = this->result;
 715
 716       /* Matrix expression operands should have been broken down to vector
 717        * operations already.
 718        */
 719       assert(!ir->operands[operand]->type->is_matrix());
 720       /* And then those vector operands should have been broken down to scalar.
 721        */
 722       assert(!ir->operands[operand]->type->is_vector());
 723    }
 724
 725    /* Storage for our result.  If our result goes into an assignment, it will
 726     * just get copy-propagated out, so no worries.
 727     */
 728    this->result = fs_reg(this, ir->type);
 729
 730    switch (ir->operation) {
 731    case ir_unop_logic_not:
 732       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 733        * ones complement of the whole register, not just bit 0.
 734        */
 735       emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)));
 736       break;
 737    case ir_unop_neg:
 738       op[0].negate = !op[0].negate;
 739       this->result = op[0];
 740       break;
 741    case ir_unop_abs:
 742       op[0].abs = true;
 743       this->result = op[0];
 744       break;
 745    case ir_unop_sign:
 746       temp = fs_reg(this, ir->type);
 747
 748       emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
 749
 750       inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)));
 751       inst->conditional_mod = BRW_CONDITIONAL_G;
 752       inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
 753       inst->predicated = true;
 754
 755       inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)));
 756       inst->conditional_mod = BRW_CONDITIONAL_L;
 757       inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
 758       inst->predicated = true;
 759
 760       break;
 761    case ir_unop_rcp:
 762       emit_math(FS_OPCODE_RCP, this->result, op[0]);
 763       break;
 764
 765    case ir_unop_exp2:
 766       emit_math(FS_OPCODE_EXP2, this->result, op[0]);
 767       break;
 768    case ir_unop_log2:
 769       emit_math(FS_OPCODE_LOG2, this->result, op[0]);
 770       break;
 771    case ir_unop_exp:
 772    case ir_unop_log:
 773       assert(!"not reached: should be handled by ir_explog_to_explog2");
 774       break;
 775    case ir_unop_sin:
 776       emit_math(FS_OPCODE_SIN, this->result, op[0]);
 777       break;
 778    case ir_unop_cos:
 779       emit_math(FS_OPCODE_COS, this->result, op[0]);
 780       break;
 781
 782    case ir_unop_dFdx:
 783       emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
 784       break;
 785    case ir_unop_dFdy:
 786       emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
 787       break;
 788
 789    case ir_binop_add:
 790       emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
 791       break;
 792    case ir_binop_sub:
 793       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 794       break;
 795
 796    case ir_binop_mul:
 797       emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
 798       break;
 799    case ir_binop_div:
 800       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 801       break;
 802    case ir_binop_mod:
 803       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 804       break;
 805
 806    case ir_binop_less:
 807       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 808       inst->conditional_mod = BRW_CONDITIONAL_L;
 809       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 810       break;
 811    case ir_binop_greater:
 812       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 813       inst->conditional_mod = BRW_CONDITIONAL_G;
 814       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 815       break;
 816    case ir_binop_lequal:
 817       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 818       inst->conditional_mod = BRW_CONDITIONAL_LE;
 819       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 820       break;
 821    case ir_binop_gequal:
 822       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 823       inst->conditional_mod = BRW_CONDITIONAL_GE;
 824       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 825       break;
 826    case ir_binop_equal:
 827    case ir_binop_all_equal: /* same as nequal for scalars */
 828       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 829       inst->conditional_mod = BRW_CONDITIONAL_Z;
 830       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 831       break;
 832    case ir_binop_nequal:
 833    case ir_binop_any_nequal: /* same as nequal for scalars */
 834       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 835       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 836       emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
 837       break;
 838
 839    case ir_binop_logic_xor:
 840       emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
 841       break;
 842
 843    case ir_binop_logic_or:
 844       emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
 845       break;
 846
 847    case ir_binop_logic_and:
 848       emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
 849       break;
 850
 851    case ir_binop_dot:
 852    case ir_binop_cross:
 853    case ir_unop_any:
 854       assert(!"not reached: should be handled by brw_fs_channel_expressions");
 855       break;
 856
 857    case ir_unop_noise:
 858       assert(!"not reached: should be handled by lower_noise");
 859       break;
 860
 861    case ir_unop_sqrt:
 862       emit_math(FS_OPCODE_SQRT, this->result, op[0]);
 863       break;
 864
 865    case ir_unop_rsq:
 866       emit_math(FS_OPCODE_RSQ, this->result, op[0]);
 867       break;
 868
 869    case ir_unop_i2f:
 870    case ir_unop_b2f:
 871    case ir_unop_b2i:
 872    case ir_unop_f2i:
 873       emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
 874       break;
 875    case ir_unop_f2b:
 876    case ir_unop_i2b:
 877       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
 878       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 879       inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
 880                           this->result, fs_reg(1)));
 881       break;
 882
 883    case ir_unop_trunc:
 884       emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
 885       break;
 886    case ir_unop_ceil:
 887       op[0].negate = !op[0].negate;
 888       inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
 889       this->result.negate = true;
 890       break;
 891    case ir_unop_floor:
 892       inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
 893       break;
 894    case ir_unop_fract:
 895       inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
 896       break;
 897    case ir_unop_round_even:
 898       emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0]));
 899       break;
 900
 901    case ir_binop_min:
 902       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 903       inst->conditional_mod = BRW_CONDITIONAL_L;
 904
 905       inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
 906       inst->predicated = true;
 907       break;
 908    case ir_binop_max:
 909       inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
 910       inst->conditional_mod = BRW_CONDITIONAL_G;
 911
 912       inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
 913       inst->predicated = true;
 914       break;
 915
 916    case ir_binop_pow:
 917       emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
 918       break;
 919
 920    case ir_unop_bit_not:
 921       inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0]));
 922       break;
 923    case ir_binop_bit_and:
 924       inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
 925       break;
 926    case ir_binop_bit_xor:
 927       inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
 928       break;
 929    case ir_binop_bit_or:
 930       inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
 931       break;
 932
 933    case ir_unop_u2f:
 934    case ir_binop_lshift:
 935    case ir_binop_rshift:
 936       assert(!"GLSL 1.30 features unsupported");
 937       break;
 938    }
 939 }
 940
 941 void
 942 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
 943                                    const glsl_type *type, bool predicated)
 944 {
 945    switch (type->base_type) {
 946    case GLSL_TYPE_FLOAT:
 947    case GLSL_TYPE_UINT:
 948    case GLSL_TYPE_INT:
 949    case GLSL_TYPE_BOOL:
 950       for (unsigned int i = 0; i < type->components(); i++) {
 951          l.type = brw_type_for_base_type(type);
 952          r.type = brw_type_for_base_type(type);
 953
 954          fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
 955          inst->predicated = predicated;
 956
 957          l.reg_offset++;
 958          r.reg_offset++;
 959       }
 960       break;
 961    case GLSL_TYPE_ARRAY:
 962       for (unsigned int i = 0; i < type->length; i++) {
 963          emit_assignment_writes(l, r, type->fields.array, predicated);
 964       }
 965       break;
 966
 967    case GLSL_TYPE_STRUCT:
 968       for (unsigned int i = 0; i < type->length; i++) {
 969          emit_assignment_writes(l, r, type->fields.structure[i].type,
 970                                 predicated);
 971       }
 972       break;
 973
 974    case GLSL_TYPE_SAMPLER:
 975       break;
 976
 977    default:
 978       assert(!"not reached");
 979       break;
 980    }
 981 }
 982
 983 void
 984 fs_visitor::visit(ir_assignment *ir)
 985 {
 986    struct fs_reg l, r;
 987    fs_inst *inst;
 988
 989    /* FINISHME: arrays on the lhs */
 990    ir->lhs->accept(this);
 991    l = this->result;
 992
 993    ir->rhs->accept(this);
 994    r = this->result;
 995
 996    assert(l.file != BAD_FILE);
 997    assert(r.file != BAD_FILE);
 998
 999    if (ir->condition) {
1000       emit_bool_to_cond_code(ir->condition);
1001    }
1002
1003    if (ir->lhs->type->is_scalar() ||
1004        ir->lhs->type->is_vector()) {
1005       for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
1006          if (ir->write_mask & (1 << i)) {
1007             inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1008             if (ir->condition)
1009                inst->predicated = true;
1010             r.reg_offset++;
1011          }
1012          l.reg_offset++;
1013       }
1014    } else {
1015       emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
1016    }
1017 }
1018
1019 fs_inst *
1020 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1021 {
1022    int mlen;
1023    int base_mrf = 1;
1024    bool simd16 = false;
1025    fs_reg orig_dst;
1026
1027    /* g0 header. */
1028    mlen = 1;
1029
1030    if (ir->shadow_comparitor) {
1031       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1032          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1033                       coordinate));
1034          coordinate.reg_offset++;
1035       }
1036       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1037       mlen += 3;
1038
1039       if (ir->op == ir_tex) {
1040          /* There's no plain shadow compare message, so we use shadow
1041           * compare with a bias of 0.0.
1042           */
1043          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1044                       fs_reg(0.0f)));
1045          mlen++;
1046       } else if (ir->op == ir_txb) {
1047          ir->lod_info.bias->accept(this);
1048          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1049                       this->result));
1050          mlen++;
1051       } else {
1052          assert(ir->op == ir_txl);
1053          ir->lod_info.lod->accept(this);
1054          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1055                       this->result));
1056          mlen++;
1057       }
1058
1059       ir->shadow_comparitor->accept(this);
1060       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1061       mlen++;
1062    } else if (ir->op == ir_tex) {
1063       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1064          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1065                       coordinate));
1066          coordinate.reg_offset++;
1067       }
1068       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1069       mlen += 3;
1070    } else {
1071       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1072        * instructions.  We'll need to do SIMD16 here.
1073        */
1074       assert(ir->op == ir_txb || ir->op == ir_txl);
1075
1076       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1077          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2),
1078                       coordinate));
1079          coordinate.reg_offset++;
1080       }
1081
1082       /* lod/bias appears after u/v/r. */
1083       mlen += 6;
1084
1085       if (ir->op == ir_txb) {
1086          ir->lod_info.bias->accept(this);
1087          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1088                       this->result));
1089          mlen++;
1090       } else {
1091          ir->lod_info.lod->accept(this);
1092          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1093                       this->result));
1094          mlen++;
1095       }
1096
1097       /* The unused upper half. */
1098       mlen++;
1099
1100       /* Now, since we're doing simd16, the return is 2 interleaved
1101        * vec4s where the odd-indexed ones are junk. We'll need to move
1102        * this weirdness around to the expected layout.
1103        */
1104       simd16 = true;
1105       orig_dst = dst;
1106       dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1107                                                        2));
1108       dst.type = BRW_REGISTER_TYPE_F;
1109    }
1110
1111    fs_inst *inst = NULL;
1112    switch (ir->op) {
1113    case ir_tex:
1114       inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1115       break;
1116    case ir_txb:
1117       inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1118       break;
1119    case ir_txl:
1120       inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1121       break;
1122    case ir_txd:
1123    case ir_txf:
1124       assert(!"GLSL 1.30 features unsupported");
1125       break;
1126    }
1127    inst->base_mrf = base_mrf;
1128    inst->mlen = mlen;
1129
1130    if (simd16) {
1131       for (int i = 0; i < 4; i++) {
1132          emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1133          orig_dst.reg_offset++;
1134          dst.reg_offset += 2;
1135       }
1136    }
1137
1138    return inst;
1139 }
1140
1141 fs_inst *
1142 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1143 {
1144    /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1145     * optional parameters like shadow comparitor or LOD bias.  If
1146     * optional parameters aren't present, those base slots are
1147     * optional and don't need to be included in the message.
1148     *
1149     * We don't fill in the unnecessary slots regardless, which may
1150     * look surprising in the disassembly.
1151     */
1152    int mlen = 1; /* g0 header always present. */
1153    int base_mrf = 1;
1154
1155    for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1156       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1157                    coordinate));
1158       coordinate.reg_offset++;
1159    }
1160    mlen += ir->coordinate->type->vector_elements;
1161
1162    if (ir->shadow_comparitor) {
1163       mlen = MAX2(mlen, 5);
1164
1165       ir->shadow_comparitor->accept(this);
1166       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1167       mlen++;
1168    }
1169
1170    fs_inst *inst = NULL;
1171    switch (ir->op) {
1172    case ir_tex:
1173       inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1174       break;
1175    case ir_txb:
1176       ir->lod_info.bias->accept(this);
1177       mlen = MAX2(mlen, 5);
1178       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1179       mlen++;
1180
1181       inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1182       break;
1183    case ir_txl:
1184       ir->lod_info.lod->accept(this);
1185       mlen = MAX2(mlen, 5);
1186       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1187       mlen++;
1188
1189       inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1190       break;
1191    case ir_txd:
1192    case ir_txf:
1193       assert(!"GLSL 1.30 features unsupported");
1194       break;
1195    }
1196    inst->base_mrf = base_mrf;
1197    inst->mlen = mlen;
1198
1199    return inst;
1200 }
1201
1202 void
1203 fs_visitor::visit(ir_texture *ir)
1204 {
1205    int sampler;
1206    fs_inst *inst = NULL;
1207
1208    ir->coordinate->accept(this);
1209    fs_reg coordinate = this->result;
1210
1211    /* Should be lowered by do_lower_texture_projection */
1212    assert(!ir->projector);
1213
1214    sampler = _mesa_get_sampler_uniform_value(ir->sampler,
1215                                              ctx->Shader.CurrentFragmentProgram,
1216                                              &brw->fragment_program->Base);
1217    sampler = c->fp->program.Base.SamplerUnits[sampler];
1218
1219    /* The 965 requires the EU to do the normalization of GL rectangle
1220     * texture coordinates.  We use the program parameter state
1221     * tracking to get the scaling factor.
1222     */
1223    if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
1224       struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
1225       int tokens[STATE_LENGTH] = {
1226          STATE_INTERNAL,
1227          STATE_TEXRECT_SCALE,
1228          sampler,
1229          0,
1230          0
1231       };
1232
1233       c->prog_data.param_convert[c->prog_data.nr_params] =
1234          PARAM_NO_CONVERT;
1235       c->prog_data.param_convert[c->prog_data.nr_params + 1] =
1236          PARAM_NO_CONVERT;
1237
1238       fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
1239       fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
1240       GLuint index = _mesa_add_state_reference(params,
1241                                                (gl_state_index *)tokens);
1242       float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
1243
1244       c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
1245       c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
1246
1247       fs_reg dst = fs_reg(this, ir->coordinate->type);
1248       fs_reg src = coordinate;
1249       coordinate = dst;
1250
1251       emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x));
1252       dst.reg_offset++;
1253       src.reg_offset++;
1254       emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y));
1255    }
1256
1257    /* Writemasking doesn't eliminate channels on SIMD8 texture
1258     * samples, so don't worry about them.
1259     */
1260    fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1261
1262    if (intel->gen < 5) {
1263       inst = emit_texture_gen4(ir, dst, coordinate);
1264    } else {
1265       inst = emit_texture_gen5(ir, dst, coordinate);
1266    }
1267
1268    inst->sampler = sampler;
1269
1270    this->result = dst;
1271
1272    if (ir->shadow_comparitor)
1273       inst->shadow_compare = true;
1274
1275    if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1276       fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1277
1278       for (int i = 0; i < 4; i++) {
1279          int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1280          fs_reg l = swizzle_dst;
1281          l.reg_offset += i;
1282
1283          if (swiz == SWIZZLE_ZERO) {
1284             emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1285          } else if (swiz == SWIZZLE_ONE) {
1286             emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1287          } else {
1288             fs_reg r = dst;
1289             r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1290             emit(fs_inst(BRW_OPCODE_MOV, l, r));
1291          }
1292       }
1293       this->result = swizzle_dst;
1294    }
1295 }
1296
1297 void
1298 fs_visitor::visit(ir_swizzle *ir)
1299 {
1300    ir->val->accept(this);
1301    fs_reg val = this->result;
1302
1303    if (ir->type->vector_elements == 1) {
1304       this->result.reg_offset += ir->mask.x;
1305       return;
1306    }
1307
1308    fs_reg result = fs_reg(this, ir->type);
1309    this->result = result;
1310
1311    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1312       fs_reg channel = val;
1313       int swiz = 0;
1314
1315       switch (i) {
1316       case 0:
1317          swiz = ir->mask.x;
1318          break;
1319       case 1:
1320          swiz = ir->mask.y;
1321          break;
1322       case 2:
1323          swiz = ir->mask.z;
1324          break;
1325       case 3:
1326          swiz = ir->mask.w;
1327          break;
1328       }
1329
1330       channel.reg_offset += swiz;
1331       emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1332       result.reg_offset++;
1333    }
1334 }
1335
1336 void
1337 fs_visitor::visit(ir_discard *ir)
1338 {
1339    fs_reg temp = fs_reg(this, glsl_type::uint_type);
1340
1341    assert(ir->condition == NULL); /* FINISHME */
1342
1343    emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d));
1344    emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp));
1345    kill_emitted = true;
1346 }
1347
1348 void
1349 fs_visitor::visit(ir_constant *ir)
1350 {
1351    fs_reg reg(this, ir->type);
1352    this->result = reg;
1353
1354    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1355       switch (ir->type->base_type) {
1356       case GLSL_TYPE_FLOAT:
1357          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1358          break;
1359       case GLSL_TYPE_UINT:
1360          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1361          break;
1362       case GLSL_TYPE_INT:
1363          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1364          break;
1365       case GLSL_TYPE_BOOL:
1366          emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1367          break;
1368       default:
1369          assert(!"Non-float/uint/int/bool constant");
1370       }
1371       reg.reg_offset++;
1372    }
1373 }
1374
1375 void
1376 fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
1377 {
1378    ir_expression *expr = ir->as_expression();
1379
1380    if (expr) {
1381       fs_reg op[2];
1382       fs_inst *inst;
1383
1384       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1385          assert(expr->operands[i]->type->is_scalar());
1386
1387          expr->operands[i]->accept(this);
1388          op[i] = this->result;
1389       }
1390
1391       switch (expr->operation) {
1392       case ir_unop_logic_not:
1393          inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)));
1394          inst->conditional_mod = BRW_CONDITIONAL_Z;
1395          break;
1396
1397       case ir_binop_logic_xor:
1398          inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]));
1399          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1400          break;
1401
1402       case ir_binop_logic_or:
1403          inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1]));
1404          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1405          break;
1406
1407       case ir_binop_logic_and:
1408          inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1]));
1409          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1410          break;
1411
1412       case ir_unop_f2b:
1413          if (intel->gen >= 6) {
1414             inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
1415                                 op[0], fs_reg(0.0f)));
1416          } else {
1417             inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
1418          }
1419          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1420          break;
1421
1422       case ir_unop_i2b:
1423          if (intel->gen >= 6) {
1424             inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)));
1425          } else {
1426             inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
1427          }
1428          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1429          break;
1430
1431       case ir_binop_greater:
1432          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1433          inst->conditional_mod = BRW_CONDITIONAL_G;
1434          break;
1435       case ir_binop_gequal:
1436          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1437          inst->conditional_mod = BRW_CONDITIONAL_GE;
1438          break;
1439       case ir_binop_less:
1440          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1441          inst->conditional_mod = BRW_CONDITIONAL_L;
1442          break;
1443       case ir_binop_lequal:
1444          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1445          inst->conditional_mod = BRW_CONDITIONAL_LE;
1446          break;
1447       case ir_binop_equal:
1448       case ir_binop_all_equal:
1449          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1450          inst->conditional_mod = BRW_CONDITIONAL_Z;
1451          break;
1452       case ir_binop_nequal:
1453       case ir_binop_any_nequal:
1454          inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
1455          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1456          break;
1457       default:
1458          assert(!"not reached");
1459          this->fail = true;
1460          break;
1461       }
1462       return;
1463    }
1464
1465    ir->accept(this);
1466
1467    if (intel->gen >= 6) {
1468       fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d,
1469                                    this->result, fs_reg(1)));
1470       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1471    } else {
1472       fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result));
1473       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1474    }
1475 }
1476
1477 /**
1478  * Emit a gen6 IF statement with the comparison folded into the IF
1479  * instruction.
1480  */
1481 void
1482 fs_visitor::emit_if_gen6(ir_if *ir)
1483 {
1484    ir_expression *expr = ir->condition->as_expression();
1485
1486    if (expr) {
1487       fs_reg op[2];
1488       fs_inst *inst;
1489       fs_reg temp;
1490
1491       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1492          assert(expr->operands[i]->type->is_scalar());
1493
1494          expr->operands[i]->accept(this);
1495          op[i] = this->result;
1496       }
1497
1498       switch (expr->operation) {
1499       case ir_unop_logic_not:
1500          inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1)));
1501          inst->conditional_mod = BRW_CONDITIONAL_Z;
1502          return;
1503
1504       case ir_binop_logic_xor:
1505          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1506          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1507          return;
1508
1509       case ir_binop_logic_or:
1510          temp = fs_reg(this, glsl_type::bool_type);
1511          emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1]));
1512          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)));
1513          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1514          return;
1515
1516       case ir_binop_logic_and:
1517          temp = fs_reg(this, glsl_type::bool_type);
1518          emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1]));
1519          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)));
1520          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1521          return;
1522
1523       case ir_unop_f2b:
1524          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)));
1525          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1526          return;
1527
1528       case ir_unop_i2b:
1529          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)));
1530          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1531          return;
1532
1533       case ir_binop_greater:
1534          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1535          inst->conditional_mod = BRW_CONDITIONAL_G;
1536          return;
1537       case ir_binop_gequal:
1538          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1539          inst->conditional_mod = BRW_CONDITIONAL_GE;
1540          return;
1541       case ir_binop_less:
1542          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1543          inst->conditional_mod = BRW_CONDITIONAL_L;
1544          return;
1545       case ir_binop_lequal:
1546          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1547          inst->conditional_mod = BRW_CONDITIONAL_LE;
1548          return;
1549       case ir_binop_equal:
1550       case ir_binop_all_equal:
1551          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1552          inst->conditional_mod = BRW_CONDITIONAL_Z;
1553          return;
1554       case ir_binop_nequal:
1555       case ir_binop_any_nequal:
1556          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
1557          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1558          return;
1559       default:
1560          assert(!"not reached");
1561          inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)));
1562          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1563          this->fail = true;
1564          return;
1565       }
1566       return;
1567    }
1568
1569    ir->condition->accept(this);
1570
1571    fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)));
1572    inst->conditional_mod = BRW_CONDITIONAL_NZ;
1573 }
1574
1575 void
1576 fs_visitor::visit(ir_if *ir)
1577 {
1578    fs_inst *inst;
1579
1580    /* Don't point the annotation at the if statement, because then it plus
1581     * the then and else blocks get printed.
1582     */
1583    this->base_ir = ir->condition;
1584
1585    if (intel->gen >= 6) {
1586       emit_if_gen6(ir);
1587    } else {
1588       emit_bool_to_cond_code(ir->condition);
1589
1590       inst = emit(fs_inst(BRW_OPCODE_IF));
1591       inst->predicated = true;
1592    }
1593
1594    foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1595       ir_instruction *ir = (ir_instruction *)iter.get();
1596       this->base_ir = ir;
1597
1598       ir->accept(this);
1599    }
1600
1601    if (!ir->else_instructions.is_empty()) {
1602       emit(fs_inst(BRW_OPCODE_ELSE));
1603
1604       foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1605          ir_instruction *ir = (ir_instruction *)iter.get();
1606          this->base_ir = ir;
1607
1608          ir->accept(this);
1609       }
1610    }
1611
1612    emit(fs_inst(BRW_OPCODE_ENDIF));
1613 }
1614
1615 void
1616 fs_visitor::visit(ir_loop *ir)
1617 {
1618    fs_reg counter = reg_undef;
1619
1620    if (ir->counter) {
1621       this->base_ir = ir->counter;
1622       ir->counter->accept(this);
1623       counter = *(variable_storage(ir->counter));
1624
1625       if (ir->from) {
1626          this->base_ir = ir->from;
1627          ir->from->accept(this);
1628
1629          emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1630       }
1631    }
1632
1633    emit(fs_inst(BRW_OPCODE_DO));
1634
1635    if (ir->to) {
1636       this->base_ir = ir->to;
1637       ir->to->accept(this);
1638
1639       fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
1640                                    counter, this->result));
1641       switch (ir->cmp) {
1642       case ir_binop_equal:
1643          inst->conditional_mod = BRW_CONDITIONAL_Z;
1644          break;
1645       case ir_binop_nequal:
1646          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1647          break;
1648       case ir_binop_gequal:
1649          inst->conditional_mod = BRW_CONDITIONAL_GE;
1650          break;
1651       case ir_binop_lequal:
1652          inst->conditional_mod = BRW_CONDITIONAL_LE;
1653          break;
1654       case ir_binop_greater:
1655          inst->conditional_mod = BRW_CONDITIONAL_G;
1656          break;
1657       case ir_binop_less:
1658          inst->conditional_mod = BRW_CONDITIONAL_L;
1659          break;
1660       default:
1661          assert(!"not reached: unknown loop condition");
1662          this->fail = true;
1663          break;
1664       }
1665
1666       inst = emit(fs_inst(BRW_OPCODE_BREAK));
1667       inst->predicated = true;
1668    }
1669
1670    foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1671       ir_instruction *ir = (ir_instruction *)iter.get();
1672
1673       this->base_ir = ir;
1674       ir->accept(this);
1675    }
1676
1677    if (ir->increment) {
1678       this->base_ir = ir->increment;
1679       ir->increment->accept(this);
1680       emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1681    }
1682
1683    emit(fs_inst(BRW_OPCODE_WHILE));
1684 }
1685
1686 void
1687 fs_visitor::visit(ir_loop_jump *ir)
1688 {
1689    switch (ir->mode) {
1690    case ir_loop_jump::jump_break:
1691       emit(fs_inst(BRW_OPCODE_BREAK));
1692       break;
1693    case ir_loop_jump::jump_continue:
1694       emit(fs_inst(BRW_OPCODE_CONTINUE));
1695       break;
1696    }
1697 }
1698
1699 void
1700 fs_visitor::visit(ir_call *ir)
1701 {
1702    assert(!"FINISHME");
1703 }
1704
1705 void
1706 fs_visitor::visit(ir_return *ir)
1707 {
1708    assert(!"FINISHME");
1709 }
1710
1711 void
1712 fs_visitor::visit(ir_function *ir)
1713 {
1714    /* Ignore function bodies other than main() -- we shouldn't see calls to
1715     * them since they should all be inlined before we get to ir_to_mesa.
1716     */
1717    if (strcmp(ir->name, "main") == 0) {
1718       const ir_function_signature *sig;
1719       exec_list empty;
1720
1721       sig = ir->matching_signature(&empty);
1722
1723       assert(sig);
1724
1725       foreach_iter(exec_list_iterator, iter, sig->body) {
1726          ir_instruction *ir = (ir_instruction *)iter.get();
1727          this->base_ir = ir;
1728
1729          ir->accept(this);
1730       }
1731    }
1732 }
1733
1734 void
1735 fs_visitor::visit(ir_function_signature *ir)
1736 {
1737    assert(!"not reached");
1738    (void)ir;
1739 }
1740
1741 fs_inst *
1742 fs_visitor::emit(fs_inst inst)
1743 {
1744    fs_inst *list_inst = new(mem_ctx) fs_inst;
1745    *list_inst = inst;
1746
1747    list_inst->annotation = this->current_annotation;
1748    list_inst->ir = this->base_ir;
1749
1750    this->instructions.push_tail(list_inst);
1751
1752    return list_inst;
1753 }
1754
1755 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1756 void
1757 fs_visitor::emit_dummy_fs()
1758 {
1759    /* Everyone's favorite color. */
1760    emit(fs_inst(BRW_OPCODE_MOV,
1761                 fs_reg(MRF, 2),
1762                 fs_reg(1.0f)));
1763    emit(fs_inst(BRW_OPCODE_MOV,
1764                 fs_reg(MRF, 3),
1765                 fs_reg(0.0f)));
1766    emit(fs_inst(BRW_OPCODE_MOV,
1767                 fs_reg(MRF, 4),
1768                 fs_reg(1.0f)));
1769    emit(fs_inst(BRW_OPCODE_MOV,
1770                 fs_reg(MRF, 5),
1771                 fs_reg(0.0f)));
1772
1773    fs_inst *write;
1774    write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1775                         fs_reg(0),
1776                         fs_reg(0)));
1777    write->base_mrf = 0;
1778 }
1779
1780 /* The register location here is relative to the start of the URB
1781  * data.  It will get adjusted to be a real location before
1782  * generate_code() time.
1783  */
1784 struct brw_reg
1785 fs_visitor::interp_reg(int location, int channel)
1786 {
1787    int regnr = urb_setup[location] * 2 + channel / 2;
1788    int stride = (channel & 1) * 4;
1789
1790    assert(urb_setup[location] != -1);
1791
1792    return brw_vec1_grf(regnr, stride);
1793 }
1794
1795 /** Emits the interpolation for the varying inputs. */
1796 void
1797 fs_visitor::emit_interpolation_setup_gen4()
1798 {
1799    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1800
1801    this->current_annotation = "compute pixel centers";
1802    this->pixel_x = fs_reg(this, glsl_type::uint_type);
1803    this->pixel_y = fs_reg(this, glsl_type::uint_type);
1804    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1805    this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1806    emit(fs_inst(BRW_OPCODE_ADD,
1807                 this->pixel_x,
1808                 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1809                 fs_reg(brw_imm_v(0x10101010))));
1810    emit(fs_inst(BRW_OPCODE_ADD,
1811                 this->pixel_y,
1812                 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1813                 fs_reg(brw_imm_v(0x11001100))));
1814
1815    this->current_annotation = "compute pixel deltas from v0";
1816    if (brw->has_pln) {
1817       this->delta_x = fs_reg(this, glsl_type::vec2_type);
1818       this->delta_y = this->delta_x;
1819       this->delta_y.reg_offset++;
1820    } else {
1821       this->delta_x = fs_reg(this, glsl_type::float_type);
1822       this->delta_y = fs_reg(this, glsl_type::float_type);
1823    }
1824    emit(fs_inst(BRW_OPCODE_ADD,
1825                 this->delta_x,
1826                 this->pixel_x,
1827                 fs_reg(negate(brw_vec1_grf(1, 0)))));
1828    emit(fs_inst(BRW_OPCODE_ADD,
1829                 this->delta_y,
1830                 this->pixel_y,
1831                 fs_reg(negate(brw_vec1_grf(1, 1)))));
1832
1833    this->current_annotation = "compute pos.w and 1/pos.w";
1834    /* Compute wpos.w.  It's always in our setup, since it's needed to
1835     * interpolate the other attributes.
1836     */
1837    this->wpos_w = fs_reg(this, glsl_type::float_type);
1838    emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1839                 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1840    /* Compute the pixel 1/W value from wpos.w. */
1841    this->pixel_w = fs_reg(this, glsl_type::float_type);
1842    emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1843    this->current_annotation = NULL;
1844 }
1845
1846 /** Emits the interpolation for the varying inputs. */
1847 void
1848 fs_visitor::emit_interpolation_setup_gen6()
1849 {
1850    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1851
1852    /* If the pixel centers end up used, the setup is the same as for gen4. */
1853    this->current_annotation = "compute pixel centers";
1854    fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
1855    fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
1856    int_pixel_x.type = BRW_REGISTER_TYPE_UW;
1857    int_pixel_y.type = BRW_REGISTER_TYPE_UW;
1858    emit(fs_inst(BRW_OPCODE_ADD,
1859                 int_pixel_x,
1860                 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1861                 fs_reg(brw_imm_v(0x10101010))));
1862    emit(fs_inst(BRW_OPCODE_ADD,
1863                 int_pixel_y,
1864                 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1865                 fs_reg(brw_imm_v(0x11001100))));
1866
1867    /* As of gen6, we can no longer mix float and int sources.  We have
1868     * to turn the integer pixel centers into floats for their actual
1869     * use.
1870     */
1871    this->pixel_x = fs_reg(this, glsl_type::float_type);
1872    this->pixel_y = fs_reg(this, glsl_type::float_type);
1873    emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
1874    emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
1875
1876    this->current_annotation = "compute 1/pos.w";
1877    this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1878    this->pixel_w = fs_reg(this, glsl_type::float_type);
1879    emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1880
1881    this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1882    this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1883
1884    this->current_annotation = NULL;
1885 }
1886
1887 void
1888 fs_visitor::emit_fb_writes()
1889 {
1890    this->current_annotation = "FB write header";
1891    GLboolean header_present = GL_TRUE;
1892    int nr = 0;
1893
1894    if (intel->gen >= 6 &&
1895        !this->kill_emitted &&
1896        c->key.nr_color_regions == 1) {
1897       header_present = false;
1898    }
1899
1900    if (header_present) {
1901       /* m0, m1 header */
1902       nr += 2;
1903    }
1904
1905    if (c->key.aa_dest_stencil_reg) {
1906       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1907                    fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1908    }
1909
1910    /* Reserve space for color. It'll be filled in per MRT below. */
1911    int color_mrf = nr;
1912    nr += 4;
1913
1914    if (c->key.source_depth_to_render_target) {
1915       if (c->key.computes_depth) {
1916          /* Hand over gl_FragDepth. */
1917          assert(this->frag_depth);
1918          fs_reg depth = *(variable_storage(this->frag_depth));
1919
1920          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1921       } else {
1922          /* Pass through the payload depth. */
1923          emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1924                       fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1925       }
1926    }
1927
1928    if (c->key.dest_depth_reg) {
1929       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1930                    fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1931    }
1932
1933    fs_reg color = reg_undef;
1934    if (this->frag_color)
1935       color = *(variable_storage(this->frag_color));
1936    else if (this->frag_data)
1937       color = *(variable_storage(this->frag_data));
1938
1939    for (int target = 0; target < c->key.nr_color_regions; target++) {
1940       this->current_annotation = talloc_asprintf(this->mem_ctx,
1941                                                  "FB write target %d",
1942                                                  target);
1943       if (this->frag_color || this->frag_data) {
1944          for (int i = 0; i < 4; i++) {
1945             emit(fs_inst(BRW_OPCODE_MOV,
1946                          fs_reg(MRF, color_mrf + i),
1947                          color));
1948             color.reg_offset++;
1949          }
1950       }
1951
1952       if (this->frag_color)
1953          color.reg_offset -= 4;
1954
1955       fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1956                                    reg_undef, reg_undef));
1957       inst->target = target;
1958       inst->base_mrf = 0;
1959       inst->mlen = nr;
1960       if (target == c->key.nr_color_regions - 1)
1961          inst->eot = true;
1962       inst->header_present = header_present;
1963    }
1964
1965    if (c->key.nr_color_regions == 0) {
1966       fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1967                                    reg_undef, reg_undef));
1968       inst->base_mrf = 0;
1969       inst->mlen = nr;
1970       inst->eot = true;
1971       inst->header_present = header_present;
1972    }
1973
1974    this->current_annotation = NULL;
1975 }
1976
1977 void
1978 fs_visitor::generate_fb_write(fs_inst *inst)
1979 {
1980    GLboolean eot = inst->eot;
1981    struct brw_reg implied_header;
1982
1983    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1984     * move, here's g1.
1985     */
1986    brw_push_insn_state(p);
1987    brw_set_mask_control(p, BRW_MASK_DISABLE);
1988    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1989
1990    if (inst->header_present) {
1991       if (intel->gen >= 6) {
1992          brw_MOV(p,
1993                  brw_message_reg(inst->base_mrf),
1994                  brw_vec8_grf(0, 0));
1995
1996          if (inst->target > 0) {
1997             /* Set the render target index for choosing BLEND_STATE. */
1998             brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
1999                               BRW_REGISTER_TYPE_UD),
2000                     brw_imm_ud(inst->target));
2001          }
2002
2003          /* Clear viewport index, render target array index. */
2004          brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0),
2005                            BRW_REGISTER_TYPE_UD),
2006                  retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
2007                  brw_imm_ud(0xf7ff));
2008
2009          implied_header = brw_null_reg();
2010       } else {
2011          implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
2012       }
2013
2014       brw_MOV(p,
2015               brw_message_reg(inst->base_mrf + 1),
2016               brw_vec8_grf(1, 0));
2017    } else {
2018       implied_header = brw_null_reg();
2019    }
2020
2021    brw_pop_insn_state(p);
2022
2023    brw_fb_WRITE(p,
2024                 8, /* dispatch_width */
2025                 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
2026                 inst->base_mrf,
2027                 implied_header,
2028                 inst->target,
2029                 inst->mlen,
2030                 0,
2031                 eot);
2032 }
2033
2034 void
2035 fs_visitor::generate_linterp(fs_inst *inst,
2036                              struct brw_reg dst, struct brw_reg *src)
2037 {
2038    struct brw_reg delta_x = src[0];
2039    struct brw_reg delta_y = src[1];
2040    struct brw_reg interp = src[2];
2041
2042    if (brw->has_pln &&
2043        delta_y.nr == delta_x.nr + 1 &&
2044        (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
2045       brw_PLN(p, dst, interp, delta_x);
2046    } else {
2047       brw_LINE(p, brw_null_reg(), interp, delta_x);
2048       brw_MAC(p, dst, suboffset(interp, 1), delta_y);
2049    }
2050 }
2051
2052 void
2053 fs_visitor::generate_math(fs_inst *inst,
2054                           struct brw_reg dst, struct brw_reg *src)
2055 {
2056    int op;
2057
2058    switch (inst->opcode) {
2059    case FS_OPCODE_RCP:
2060       op = BRW_MATH_FUNCTION_INV;
2061       break;
2062    case FS_OPCODE_RSQ:
2063       op = BRW_MATH_FUNCTION_RSQ;
2064       break;
2065    case FS_OPCODE_SQRT:
2066       op = BRW_MATH_FUNCTION_SQRT;
2067       break;
2068    case FS_OPCODE_EXP2:
2069       op = BRW_MATH_FUNCTION_EXP;
2070       break;
2071    case FS_OPCODE_LOG2:
2072       op = BRW_MATH_FUNCTION_LOG;
2073       break;
2074    case FS_OPCODE_POW:
2075       op = BRW_MATH_FUNCTION_POW;
2076       break;
2077    case FS_OPCODE_SIN:
2078       op = BRW_MATH_FUNCTION_SIN;
2079       break;
2080    case FS_OPCODE_COS:
2081       op = BRW_MATH_FUNCTION_COS;
2082       break;
2083    default:
2084       assert(!"not reached: unknown math function");
2085       op = 0;
2086       break;
2087    }
2088
2089    if (intel->gen >= 6) {
2090       assert(inst->mlen == 0);
2091
2092       if (inst->opcode == FS_OPCODE_POW) {
2093          brw_math2(p, dst, op, src[0], src[1]);
2094       } else {
2095          brw_math(p, dst,
2096                   op,
2097                   inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2098                   BRW_MATH_SATURATE_NONE,
2099                   0, src[0],
2100                   BRW_MATH_DATA_VECTOR,
2101                   BRW_MATH_PRECISION_FULL);
2102       }
2103    } else {
2104       assert(inst->mlen >= 1);
2105
2106       brw_math(p, dst,
2107                op,
2108                inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2109                BRW_MATH_SATURATE_NONE,
2110                inst->base_mrf, src[0],
2111                BRW_MATH_DATA_VECTOR,
2112                BRW_MATH_PRECISION_FULL);
2113    }
2114 }
2115
2116 void
2117 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
2118 {
2119    int msg_type = -1;
2120    int rlen = 4;
2121    uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
2122
2123    if (intel->gen >= 5) {
2124       switch (inst->opcode) {
2125       case FS_OPCODE_TEX:
2126          if (inst->shadow_compare) {
2127             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
2128          } else {
2129             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
2130          }
2131          break;
2132       case FS_OPCODE_TXB:
2133          if (inst->shadow_compare) {
2134             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
2135          } else {
2136             msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
2137          }
2138          break;
2139       }
2140    } else {
2141       switch (inst->opcode) {
2142       case FS_OPCODE_TEX:
2143          /* Note that G45 and older determines shadow compare and dispatch width
2144           * from message length for most messages.
2145           */
2146          msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2147          if (inst->shadow_compare) {
2148             assert(inst->mlen == 6);
2149          } else {
2150             assert(inst->mlen <= 4);
2151          }
2152          break;
2153       case FS_OPCODE_TXB:
2154          if (inst->shadow_compare) {
2155             assert(inst->mlen == 6);
2156             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2157          } else {
2158             assert(inst->mlen == 9);
2159             msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2160             simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2161          }
2162          break;
2163       }
2164    }
2165    assert(msg_type != -1);
2166
2167    if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2168       rlen = 8;
2169       dst = vec16(dst);
2170    }
2171
2172    brw_SAMPLE(p,
2173               retype(dst, BRW_REGISTER_TYPE_UW),
2174               inst->base_mrf,
2175               retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2176               SURF_INDEX_TEXTURE(inst->sampler),
2177               inst->sampler,
2178               WRITEMASK_XYZW,
2179               msg_type,
2180               rlen,
2181               inst->mlen,
2182               0,
2183               1,
2184               simd_mode);
2185 }
2186
2187
2188 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2189  * looking like:
2190  *
2191  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2192  *
2193  * and we're trying to produce:
2194  *
2195  *           DDX                     DDY
2196  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
2197  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
2198  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
2199  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
2200  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
2201  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
2202  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
2203  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
2204  *
2205  * and add another set of two more subspans if in 16-pixel dispatch mode.
2206  *
2207  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2208  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2209  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2210  * between each other.  We could probably do it like ddx and swizzle the right
2211  * order later, but bail for now and just produce
2212  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2213  */
2214 void
2215 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2216 {
2217    struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2218                                  BRW_REGISTER_TYPE_F,
2219                                  BRW_VERTICAL_STRIDE_2,
2220                                  BRW_WIDTH_2,
2221                                  BRW_HORIZONTAL_STRIDE_0,
2222                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2223    struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2224                                  BRW_REGISTER_TYPE_F,
2225                                  BRW_VERTICAL_STRIDE_2,
2226                                  BRW_WIDTH_2,
2227                                  BRW_HORIZONTAL_STRIDE_0,
2228                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2229    brw_ADD(p, dst, src0, negate(src1));
2230 }
2231
2232 void
2233 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2234 {
2235    struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2236                                  BRW_REGISTER_TYPE_F,
2237                                  BRW_VERTICAL_STRIDE_4,
2238                                  BRW_WIDTH_4,
2239                                  BRW_HORIZONTAL_STRIDE_0,
2240                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2241    struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2242                                  BRW_REGISTER_TYPE_F,
2243                                  BRW_VERTICAL_STRIDE_4,
2244                                  BRW_WIDTH_4,
2245                                  BRW_HORIZONTAL_STRIDE_0,
2246                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2247    brw_ADD(p, dst, src0, negate(src1));
2248 }
2249
2250 void
2251 fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
2252 {
2253    if (intel->gen >= 6) {
2254       /* Gen6 no longer has the mask reg for us to just read the
2255        * active channels from.  However, cmp updates just the channels
2256        * of the flag reg that are enabled, so we can get at the
2257        * channel enables that way.  In this step, make a reg of ones
2258        * we'll compare to.
2259        */
2260       brw_MOV(p, mask, brw_imm_ud(1));
2261    } else {
2262       brw_push_insn_state(p);
2263       brw_set_mask_control(p, BRW_MASK_DISABLE);
2264       brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
2265       brw_pop_insn_state(p);
2266    }
2267 }
2268
2269 void
2270 fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
2271 {
2272    if (intel->gen >= 6) {
2273       struct brw_reg f0 = brw_flag_reg();
2274       struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
2275
2276       brw_push_insn_state(p);
2277       brw_set_mask_control(p, BRW_MASK_DISABLE);
2278       brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */
2279       brw_pop_insn_state(p);
2280
2281       brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
2282               BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */
2283       /* Undo CMP's whacking of predication*/
2284       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2285
2286       brw_push_insn_state(p);
2287       brw_set_mask_control(p, BRW_MASK_DISABLE);
2288       brw_AND(p, g1, f0, g1);
2289       brw_pop_insn_state(p);
2290    } else {
2291       struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2292
2293       mask = brw_uw1_reg(mask.file, mask.nr, 0);
2294
2295       brw_push_insn_state(p);
2296       brw_set_mask_control(p, BRW_MASK_DISABLE);
2297       brw_AND(p, g0, mask, g0);
2298       brw_pop_insn_state(p);
2299    }
2300 }
2301
2302 void
2303 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
2304 {
2305    assert(inst->mlen != 0);
2306
2307    brw_MOV(p,
2308            retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
2309            retype(src, BRW_REGISTER_TYPE_UD));
2310    brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
2311                                  inst->offset);
2312 }
2313
2314 void
2315 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
2316 {
2317    assert(inst->mlen != 0);
2318
2319    /* Clear any post destination dependencies that would be ignored by
2320     * the block read.  See the B-Spec for pre-gen5 send instruction.
2321     *
2322     * This could use a better solution, since texture sampling and
2323     * math reads could potentially run into it as well -- anywhere
2324     * that we have a SEND with a destination that is a register that
2325     * was written but not read within the last N instructions (what's
2326     * N?  unsure).  This is rare because of dead code elimination, but
2327     * not impossible.
2328     */
2329    if (intel->gen == 4 && !intel->is_g4x)
2330       brw_MOV(p, brw_null_reg(), dst);
2331
2332    brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
2333                                 inst->offset);
2334
2335    if (intel->gen == 4 && !intel->is_g4x) {
2336       /* gen4 errata: destination from a send can't be used as a
2337        * destination until it's been read.  Just read it so we don't
2338        * have to worry.
2339        */
2340       brw_MOV(p, brw_null_reg(), dst);
2341    }
2342 }
2343
2344
2345 void
2346 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
2347 {
2348    assert(inst->mlen != 0);
2349
2350    /* Clear any post destination dependencies that would be ignored by
2351     * the block read.  See the B-Spec for pre-gen5 send instruction.
2352     *
2353     * This could use a better solution, since texture sampling and
2354     * math reads could potentially run into it as well -- anywhere
2355     * that we have a SEND with a destination that is a register that
2356     * was written but not read within the last N instructions (what's
2357     * N?  unsure).  This is rare because of dead code elimination, but
2358     * not impossible.
2359     */
2360    if (intel->gen == 4 && !intel->is_g4x)
2361       brw_MOV(p, brw_null_reg(), dst);
2362
2363    brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
2364                         inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
2365
2366    if (intel->gen == 4 && !intel->is_g4x) {
2367       /* gen4 errata: destination from a send can't be used as a
2368        * destination until it's been read.  Just read it so we don't
2369        * have to worry.
2370        */
2371       brw_MOV(p, brw_null_reg(), dst);
2372    }
2373 }
2374
2375 void
2376 fs_visitor::assign_curb_setup()
2377 {
2378    c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2379    c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2380
2381    /* Map the offsets in the UNIFORM file to fixed HW regs. */
2382    foreach_iter(exec_list_iterator, iter, this->instructions) {
2383       fs_inst *inst = (fs_inst *)iter.get();
2384
2385       for (unsigned int i = 0; i < 3; i++) {
2386          if (inst->src[i].file == UNIFORM) {
2387             int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2388             struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2389                                                   constant_nr / 8,
2390                                                   constant_nr % 8);
2391
2392             inst->src[i].file = FIXED_HW_REG;
2393             inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type);
2394          }
2395       }
2396    }
2397 }
2398
2399 void
2400 fs_visitor::calculate_urb_setup()
2401 {
2402    for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2403       urb_setup[i] = -1;
2404    }
2405
2406    int urb_next = 0;
2407    /* Figure out where each of the incoming setup attributes lands. */
2408    if (intel->gen >= 6) {
2409       for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2410          if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
2411             urb_setup[i] = urb_next++;
2412          }
2413       }
2414    } else {
2415       /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2416       for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2417          if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2418             int fp_index;
2419
2420             if (i >= VERT_RESULT_VAR0)
2421                fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2422             else if (i <= VERT_RESULT_TEX7)
2423                fp_index = i;
2424             else
2425                fp_index = -1;
2426
2427             if (fp_index >= 0)
2428                urb_setup[fp_index] = urb_next++;
2429          }
2430       }
2431    }
2432
2433    /* Each attribute is 4 setup channels, each of which is half a reg. */
2434    c->prog_data.urb_read_length = urb_next * 2;
2435 }
2436
2437 void
2438 fs_visitor::assign_urb_setup()
2439 {
2440    int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2441
2442    /* Offset all the urb_setup[] index by the actual position of the
2443     * setup regs, now that the location of the constants has been chosen.
2444     */
2445    foreach_iter(exec_list_iterator, iter, this->instructions) {
2446       fs_inst *inst = (fs_inst *)iter.get();
2447
2448       if (inst->opcode != FS_OPCODE_LINTERP)
2449          continue;
2450
2451       assert(inst->src[2].file == FIXED_HW_REG);
2452
2453       inst->src[2].fixed_hw_reg.nr += urb_start;
2454    }
2455
2456    this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2457 }
2458
2459 /**
2460  * Split large virtual GRFs into separate components if we can.
2461  *
2462  * This is mostly duplicated with what brw_fs_vector_splitting does,
2463  * but that's really conservative because it's afraid of doing
2464  * splitting that doesn't result in real progress after the rest of
2465  * the optimization phases, which would cause infinite looping in
2466  * optimization.  We can do it once here, safely.  This also has the
2467  * opportunity to split interpolated values, or maybe even uniforms,
2468  * which we don't have at the IR level.
2469  *
2470  * We want to split, because virtual GRFs are what we register
2471  * allocate and spill (due to contiguousness requirements for some
2472  * instructions), and they're what we naturally generate in the
2473  * codegen process, but most virtual GRFs don't actually need to be
2474  * contiguous sets of GRFs.  If we split, we'll end up with reduced
2475  * live intervals and better dead code elimination and coalescing.
2476  */
2477 void
2478 fs_visitor::split_virtual_grfs()
2479 {
2480    int num_vars = this->virtual_grf_next;
2481    bool split_grf[num_vars];
2482    int new_virtual_grf[num_vars];
2483
2484    /* Try to split anything > 0 sized. */
2485    for (int i = 0; i < num_vars; i++) {
2486       if (this->virtual_grf_sizes[i] != 1)
2487          split_grf[i] = true;
2488       else
2489          split_grf[i] = false;
2490    }
2491
2492    if (brw->has_pln) {
2493       /* PLN opcodes rely on the delta_xy being contiguous. */
2494       split_grf[this->delta_x.reg] = false;
2495    }
2496
2497    foreach_iter(exec_list_iterator, iter, this->instructions) {
2498       fs_inst *inst = (fs_inst *)iter.get();
2499
2500       /* Texturing produces 4 contiguous registers, so no splitting. */
2501       if ((inst->opcode == FS_OPCODE_TEX ||
2502            inst->opcode == FS_OPCODE_TXB ||
2503            inst->opcode == FS_OPCODE_TXL) &&
2504           inst->dst.file == GRF) {
2505          split_grf[inst->dst.reg] = false;
2506       }
2507    }
2508
2509    /* Allocate new space for split regs.  Note that the virtual
2510     * numbers will be contiguous.
2511     */
2512    for (int i = 0; i < num_vars; i++) {
2513       if (split_grf[i]) {
2514          new_virtual_grf[i] = virtual_grf_alloc(1);
2515          for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
2516             int reg = virtual_grf_alloc(1);
2517             assert(reg == new_virtual_grf[i] + j - 1);
2518          }
2519          this->virtual_grf_sizes[i] = 1;
2520       }
2521    }
2522
2523    foreach_iter(exec_list_iterator, iter, this->instructions) {
2524       fs_inst *inst = (fs_inst *)iter.get();
2525
2526       if (inst->dst.file == GRF &&
2527           split_grf[inst->dst.reg] &&
2528           inst->dst.reg_offset != 0) {
2529          inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
2530                           inst->dst.reg_offset - 1);
2531          inst->dst.reg_offset = 0;
2532       }
2533       for (int i = 0; i < 3; i++) {
2534          if (inst->src[i].file == GRF &&
2535              split_grf[inst->src[i].reg] &&
2536              inst->src[i].reg_offset != 0) {
2537             inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
2538                                 inst->src[i].reg_offset - 1);
2539             inst->src[i].reg_offset = 0;
2540          }
2541       }
2542    }
2543 }
2544
2545 /**
2546  * Choose accesses from the UNIFORM file to demote to using the pull
2547  * constant buffer.
2548  *
2549  * We allow a fragment shader to have more than the specified minimum
2550  * maximum number of fragment shader uniform components (64).  If
2551  * there are too many of these, they'd fill up all of register space.
2552  * So, this will push some of them out to the pull constant buffer and
2553  * update the program to load them.
2554  */
2555 void
2556 fs_visitor::setup_pull_constants()
2557 {
2558    /* Only allow 16 registers (128 uniform components) as push constants. */
2559    unsigned int max_uniform_components = 16 * 8;
2560    if (c->prog_data.nr_params <= max_uniform_components)
2561       return;
2562
2563    /* Just demote the end of the list.  We could probably do better
2564     * here, demoting things that are rarely used in the program first.
2565     */
2566    int pull_uniform_base = max_uniform_components;
2567    int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
2568
2569    foreach_iter(exec_list_iterator, iter, this->instructions) {
2570       fs_inst *inst = (fs_inst *)iter.get();
2571
2572       for (int i = 0; i < 3; i++) {
2573          if (inst->src[i].file != UNIFORM)
2574             continue;
2575
2576          int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2577          if (uniform_nr < pull_uniform_base)
2578             continue;
2579
2580          fs_reg dst = fs_reg(this, glsl_type::float_type);
2581          fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD,
2582                                               dst);
2583          pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15;
2584          pull->ir = inst->ir;
2585          pull->annotation = inst->annotation;
2586          pull->base_mrf = 14;
2587          pull->mlen = 1;
2588
2589          inst->insert_before(pull);
2590
2591          inst->src[i].file = GRF;
2592          inst->src[i].reg = dst.reg;
2593          inst->src[i].reg_offset = 0;
2594          inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3;
2595       }
2596    }
2597
2598    for (int i = 0; i < pull_uniform_count; i++) {
2599       c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i];
2600       c->prog_data.pull_param_convert[i] =
2601          c->prog_data.param_convert[pull_uniform_base + i];
2602    }
2603    c->prog_data.nr_params -= pull_uniform_count;
2604    c->prog_data.nr_pull_params = pull_uniform_count;
2605 }
2606
2607 void
2608 fs_visitor::calculate_live_intervals()
2609 {
2610    int num_vars = this->virtual_grf_next;
2611    int *def = talloc_array(mem_ctx, int, num_vars);
2612    int *use = talloc_array(mem_ctx, int, num_vars);
2613    int loop_depth = 0;
2614    int loop_start = 0;
2615    int bb_header_ip = 0;
2616
2617    for (int i = 0; i < num_vars; i++) {
2618       def[i] = 1 << 30;
2619       use[i] = -1;
2620    }
2621
2622    int ip = 0;
2623    foreach_iter(exec_list_iterator, iter, this->instructions) {
2624       fs_inst *inst = (fs_inst *)iter.get();
2625
2626       if (inst->opcode == BRW_OPCODE_DO) {
2627          if (loop_depth++ == 0)
2628             loop_start = ip;
2629       } else if (inst->opcode == BRW_OPCODE_WHILE) {
2630          loop_depth--;
2631
2632          if (loop_depth == 0) {
2633             /* Patches up the use of vars marked for being live across
2634              * the whole loop.
2635              */
2636             for (int i = 0; i < num_vars; i++) {
2637                if (use[i] == loop_start) {
2638                   use[i] = ip;
2639                }
2640             }
2641          }
2642       } else {
2643          for (unsigned int i = 0; i < 3; i++) {
2644             if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2645                int reg = inst->src[i].reg;
2646
2647                if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 &&
2648                                    def[reg] >= bb_header_ip)) {
2649                   use[reg] = ip;
2650                } else {
2651                   def[reg] = MIN2(loop_start, def[reg]);
2652                   use[reg] = loop_start;
2653
2654                   /* Nobody else is going to go smash our start to
2655                    * later in the loop now, because def[reg] now
2656                    * points before the bb header.
2657                    */
2658                }
2659             }
2660          }
2661          if (inst->dst.file == GRF && inst->dst.reg != 0) {
2662             int reg = inst->dst.reg;
2663
2664             if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 &&
2665                                 !inst->predicated)) {
2666                def[reg] = MIN2(def[reg], ip);
2667             } else {
2668                def[reg] = MIN2(def[reg], loop_start);
2669             }
2670          }
2671       }
2672
2673       ip++;
2674
2675       /* Set the basic block header IP.  This is used for determining
2676        * if a complete def of single-register virtual GRF in a loop
2677        * dominates a use in the same basic block.  It's a quick way to
2678        * reduce the live interval range of most register used in a
2679        * loop.
2680        */
2681       if (inst->opcode == BRW_OPCODE_IF ||
2682           inst->opcode == BRW_OPCODE_ELSE ||
2683           inst->opcode == BRW_OPCODE_ENDIF ||
2684           inst->opcode == BRW_OPCODE_DO ||
2685           inst->opcode == BRW_OPCODE_WHILE ||
2686           inst->opcode == BRW_OPCODE_BREAK ||
2687           inst->opcode == BRW_OPCODE_CONTINUE) {
2688          bb_header_ip = ip;
2689       }
2690    }
2691
2692    talloc_free(this->virtual_grf_def);
2693    talloc_free(this->virtual_grf_use);
2694    this->virtual_grf_def = def;
2695    this->virtual_grf_use = use;
2696 }
2697
2698 /**
2699  * Attempts to move immediate constants into the immediate
2700  * constant slot of following instructions.
2701  *
2702  * Immediate constants are a bit tricky -- they have to be in the last
2703  * operand slot, you can't do abs/negate on them,
2704  */
2705
2706 bool
2707 fs_visitor::propagate_constants()
2708 {
2709    bool progress = false;
2710
2711    foreach_iter(exec_list_iterator, iter, this->instructions) {
2712       fs_inst *inst = (fs_inst *)iter.get();
2713
2714       if (inst->opcode != BRW_OPCODE_MOV ||
2715           inst->predicated ||
2716           inst->dst.file != GRF || inst->src[0].file != IMM ||
2717           inst->dst.type != inst->src[0].type)
2718          continue;
2719
2720       /* Don't bother with cases where we should have had the
2721        * operation on the constant folded in GLSL already.
2722        */
2723       if (inst->saturate)
2724          continue;
2725
2726       /* Found a move of a constant to a GRF.  Find anything else using the GRF
2727        * before it's written, and replace it with the constant if we can.
2728        */
2729       exec_list_iterator scan_iter = iter;
2730       scan_iter.next();
2731       for (; scan_iter.has_next(); scan_iter.next()) {
2732          fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2733
2734          if (scan_inst->opcode == BRW_OPCODE_DO ||
2735              scan_inst->opcode == BRW_OPCODE_WHILE ||
2736              scan_inst->opcode == BRW_OPCODE_ELSE ||
2737              scan_inst->opcode == BRW_OPCODE_ENDIF) {
2738             break;
2739          }
2740
2741          for (int i = 2; i >= 0; i--) {
2742             if (scan_inst->src[i].file != GRF ||
2743                 scan_inst->src[i].reg != inst->dst.reg ||
2744                 scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2745                continue;
2746
2747             /* Don't bother with cases where we should have had the
2748              * operation on the constant folded in GLSL already.
2749              */
2750             if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2751                continue;
2752
2753             switch (scan_inst->opcode) {
2754             case BRW_OPCODE_MOV:
2755                scan_inst->src[i] = inst->src[0];
2756                progress = true;
2757                break;
2758
2759             case BRW_OPCODE_MUL:
2760             case BRW_OPCODE_ADD:
2761                if (i == 1) {
2762                   scan_inst->src[i] = inst->src[0];
2763                   progress = true;
2764                } else if (i == 0 && scan_inst->src[1].file != IMM) {
2765                   /* Fit this constant in by commuting the operands */
2766                   scan_inst->src[0] = scan_inst->src[1];
2767                   scan_inst->src[1] = inst->src[0];
2768                }
2769                break;
2770             case BRW_OPCODE_CMP:
2771                if (i == 1) {
2772                   scan_inst->src[i] = inst->src[0];
2773                   progress = true;
2774                }
2775             }
2776          }
2777
2778          if (scan_inst->dst.file == GRF &&
2779              scan_inst->dst.reg == inst->dst.reg &&
2780              (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2781               scan_inst->opcode == FS_OPCODE_TEX)) {
2782             break;
2783          }
2784       }
2785    }
2786
2787    return progress;
2788 }
2789 /**
2790  * Must be called after calculate_live_intervales() to remove unused
2791  * writes to registers -- register allocation will fail otherwise
2792  * because something deffed but not used won't be considered to
2793  * interfere with other regs.
2794  */
2795 bool
2796 fs_visitor::dead_code_eliminate()
2797 {
2798    bool progress = false;
2799    int num_vars = this->virtual_grf_next;
2800    bool dead[num_vars];
2801
2802    for (int i = 0; i < num_vars; i++) {
2803       dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i];
2804
2805       if (dead[i]) {
2806          /* Mark off its interval so it won't interfere with anything. */
2807          this->virtual_grf_def[i] = -1;
2808          this->virtual_grf_use[i] = -1;
2809       }
2810    }
2811
2812    foreach_iter(exec_list_iterator, iter, this->instructions) {
2813       fs_inst *inst = (fs_inst *)iter.get();
2814
2815       if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2816          inst->remove();
2817          progress = true;
2818       }
2819    }
2820
2821    return progress;
2822 }
2823
2824 bool
2825 fs_visitor::register_coalesce()
2826 {
2827    bool progress = false;
2828
2829    foreach_iter(exec_list_iterator, iter, this->instructions) {
2830       fs_inst *inst = (fs_inst *)iter.get();
2831
2832       if (inst->opcode != BRW_OPCODE_MOV ||
2833           inst->predicated ||
2834           inst->saturate ||
2835           inst->dst.file != GRF || inst->src[0].file != GRF ||
2836           inst->dst.type != inst->src[0].type)
2837          continue;
2838
2839       /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
2840        * them: check for no writes to either one until the exit of the
2841        * program.
2842        */
2843       bool interfered = false;
2844       exec_list_iterator scan_iter = iter;
2845       scan_iter.next();
2846       for (; scan_iter.has_next(); scan_iter.next()) {
2847          fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2848
2849          if (scan_inst->opcode == BRW_OPCODE_DO ||
2850              scan_inst->opcode == BRW_OPCODE_WHILE ||
2851              scan_inst->opcode == BRW_OPCODE_ENDIF) {
2852             interfered = true;
2853             iter = scan_iter;
2854             break;
2855          }
2856
2857          if (scan_inst->dst.file == GRF) {
2858             if (scan_inst->dst.reg == inst->dst.reg &&
2859                 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2860                  scan_inst->opcode == FS_OPCODE_TEX)) {
2861                interfered = true;
2862                break;
2863             }
2864             if (scan_inst->dst.reg == inst->src[0].reg &&
2865                 (scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
2866                  scan_inst->opcode == FS_OPCODE_TEX)) {
2867                interfered = true;
2868                break;
2869             }
2870          }
2871       }
2872       if (interfered) {
2873          continue;
2874       }
2875
2876       /* Update live interval so we don't have to recalculate. */
2877       this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg],
2878                                                      virtual_grf_use[inst->dst.reg]);
2879
2880       /* Rewrite the later usage to point at the source of the move to
2881        * be removed.
2882        */
2883       for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
2884            scan_iter.next()) {
2885          fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2886
2887          for (int i = 0; i < 3; i++) {
2888             if (scan_inst->src[i].file == GRF &&
2889                 scan_inst->src[i].reg == inst->dst.reg &&
2890                 scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
2891                scan_inst->src[i].reg = inst->src[0].reg;
2892                scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
2893                scan_inst->src[i].abs |= inst->src[0].abs;
2894                scan_inst->src[i].negate ^= inst->src[0].negate;
2895                scan_inst->src[i].smear = inst->src[0].smear;
2896             }
2897          }
2898       }
2899
2900       inst->remove();
2901       progress = true;
2902    }
2903
2904    return progress;
2905 }
2906
2907
2908 bool
2909 fs_visitor::compute_to_mrf()
2910 {
2911    bool progress = false;
2912    int next_ip = 0;
2913
2914    foreach_iter(exec_list_iterator, iter, this->instructions) {
2915       fs_inst *inst = (fs_inst *)iter.get();
2916
2917       int ip = next_ip;
2918       next_ip++;
2919
2920       if (inst->opcode != BRW_OPCODE_MOV ||
2921           inst->predicated ||
2922           inst->dst.file != MRF || inst->src[0].file != GRF ||
2923           inst->dst.type != inst->src[0].type ||
2924           inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1)
2925          continue;
2926
2927       /* Can't compute-to-MRF this GRF if someone else was going to
2928        * read it later.
2929        */
2930       if (this->virtual_grf_use[inst->src[0].reg] > ip)
2931          continue;
2932
2933       /* Found a move of a GRF to a MRF.  Let's see if we can go
2934        * rewrite the thing that made this GRF to write into the MRF.
2935        */
2936       bool found = false;
2937       fs_inst *scan_inst;
2938       for (scan_inst = (fs_inst *)inst->prev;
2939            scan_inst->prev != NULL;
2940            scan_inst = (fs_inst *)scan_inst->prev) {
2941          /* We don't handle flow control here.  Most computation of
2942           * values that end up in MRFs are shortly before the MRF
2943           * write anyway.
2944           */
2945          if (scan_inst->opcode == BRW_OPCODE_DO ||
2946              scan_inst->opcode == BRW_OPCODE_WHILE ||
2947              scan_inst->opcode == BRW_OPCODE_ENDIF) {
2948             break;
2949          }
2950
2951          /* You can't read from an MRF, so if someone else reads our
2952           * MRF's source GRF that we wanted to rewrite, that stops us.
2953           */
2954          bool interfered = false;
2955          for (int i = 0; i < 3; i++) {
2956             if (scan_inst->src[i].file == GRF &&
2957                 scan_inst->src[i].reg == inst->src[0].reg &&
2958                 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
2959                interfered = true;
2960             }
2961          }
2962          if (interfered)
2963             break;
2964
2965          if (scan_inst->dst.file == MRF &&
2966              scan_inst->dst.hw_reg == inst->dst.hw_reg) {
2967             /* Somebody else wrote our MRF here, so we can't can't
2968              * compute-to-MRF before that.
2969              */
2970             break;
2971          }
2972
2973          if (scan_inst->mlen > 0) {
2974             /* Found a SEND instruction, which will do some amount of
2975              * implied write that may overwrite our MRF that we were
2976              * hoping to compute-to-MRF somewhere above it.  Nothing
2977              * we have implied-writes more than 2 MRFs from base_mrf,
2978              * though.
2979              */
2980             int implied_write_len = MIN2(scan_inst->mlen, 2);
2981             if (inst->dst.hw_reg >= scan_inst->base_mrf &&
2982                 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) {
2983                break;
2984             }
2985          }
2986
2987          if (scan_inst->dst.file == GRF &&
2988              scan_inst->dst.reg == inst->src[0].reg) {
2989             /* Found the last thing to write our reg we want to turn
2990              * into a compute-to-MRF.
2991              */
2992
2993             if (scan_inst->opcode == FS_OPCODE_TEX) {
2994                /* texturing writes several continuous regs, so we can't
2995                 * compute-to-mrf that.
2996                 */
2997                break;
2998             }
2999
3000             /* If it's predicated, it (probably) didn't populate all
3001              * the channels.
3002              */
3003             if (scan_inst->predicated)
3004                break;
3005
3006             /* SEND instructions can't have MRF as a destination. */
3007             if (scan_inst->mlen)
3008                break;
3009
3010             if (intel->gen >= 6) {
3011                /* gen6 math instructions must have the destination be
3012                 * GRF, so no compute-to-MRF for them.
3013                 */
3014                if (scan_inst->opcode == FS_OPCODE_RCP ||
3015                    scan_inst->opcode == FS_OPCODE_RSQ ||
3016                    scan_inst->opcode == FS_OPCODE_SQRT ||
3017                    scan_inst->opcode == FS_OPCODE_EXP2 ||
3018                    scan_inst->opcode == FS_OPCODE_LOG2 ||
3019                    scan_inst->opcode == FS_OPCODE_SIN ||
3020                    scan_inst->opcode == FS_OPCODE_COS ||
3021                    scan_inst->opcode == FS_OPCODE_POW) {
3022                   break;
3023                }
3024             }
3025
3026             if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
3027                /* Found the creator of our MRF's source value. */
3028                found = true;
3029                break;
3030             }
3031          }
3032       }
3033       if (found) {
3034          scan_inst->dst.file = MRF;
3035          scan_inst->dst.hw_reg = inst->dst.hw_reg;
3036          scan_inst->saturate |= inst->saturate;
3037          inst->remove();
3038          progress = true;
3039       }
3040    }
3041
3042    return progress;
3043 }
3044
3045 bool
3046 fs_visitor::virtual_grf_interferes(int a, int b)
3047 {
3048    int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
3049    int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
3050
3051    /* For dead code, just check if the def interferes with the other range. */
3052    if (this->virtual_grf_use[a] == -1) {
3053       return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
3054               this->virtual_grf_def[a] < this->virtual_grf_use[b]);
3055    }
3056    if (this->virtual_grf_use[b] == -1) {
3057       return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
3058               this->virtual_grf_def[b] < this->virtual_grf_use[a]);
3059    }
3060
3061    return start < end;
3062 }
3063
3064 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
3065 {
3066    struct brw_reg brw_reg;
3067
3068    switch (reg->file) {
3069    case GRF:
3070    case ARF:
3071    case MRF:
3072       if (reg->smear == -1) {
3073          brw_reg = brw_vec8_reg(reg->file,
3074                                 reg->hw_reg, 0);
3075       } else {
3076          brw_reg = brw_vec1_reg(reg->file,
3077                                 reg->hw_reg, reg->smear);
3078       }
3079       brw_reg = retype(brw_reg, reg->type);
3080       break;
3081    case IMM:
3082       switch (reg->type) {
3083       case BRW_REGISTER_TYPE_F:
3084          brw_reg = brw_imm_f(reg->imm.f);
3085          break;
3086       case BRW_REGISTER_TYPE_D:
3087          brw_reg = brw_imm_d(reg->imm.i);
3088          break;
3089       case BRW_REGISTER_TYPE_UD:
3090          brw_reg = brw_imm_ud(reg->imm.u);
3091          break;
3092       default:
3093          assert(!"not reached");
3094          break;
3095       }
3096       break;
3097    case FIXED_HW_REG:
3098       brw_reg = reg->fixed_hw_reg;
3099       break;
3100    case BAD_FILE:
3101       /* Probably unused. */
3102       brw_reg = brw_null_reg();
3103       break;
3104    case UNIFORM:
3105       assert(!"not reached");
3106       brw_reg = brw_null_reg();
3107       break;
3108    }
3109    if (reg->abs)
3110       brw_reg = brw_abs(brw_reg);
3111    if (reg->negate)
3112       brw_reg = negate(brw_reg);
3113
3114    return brw_reg;
3115 }
3116
3117 void
3118 fs_visitor::generate_code()
3119 {
3120    int last_native_inst = 0;
3121    struct brw_instruction *if_stack[16], *loop_stack[16];
3122    int if_stack_depth = 0, loop_stack_depth = 0;
3123    int if_depth_in_loop[16];
3124    const char *last_annotation_string = NULL;
3125    ir_instruction *last_annotation_ir = NULL;
3126
3127    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
3128       printf("Native code for fragment shader %d:\n",
3129              ctx->Shader.CurrentFragmentProgram->Name);
3130    }
3131
3132    if_depth_in_loop[loop_stack_depth] = 0;
3133
3134    memset(&if_stack, 0, sizeof(if_stack));
3135    foreach_iter(exec_list_iterator, iter, this->instructions) {
3136       fs_inst *inst = (fs_inst *)iter.get();
3137       struct brw_reg src[3], dst;
3138
3139       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
3140          if (last_annotation_ir != inst->ir) {
3141             last_annotation_ir = inst->ir;
3142             if (last_annotation_ir) {
3143                printf("   ");
3144                last_annotation_ir->print();
3145                printf("\n");
3146             }
3147          }
3148          if (last_annotation_string != inst->annotation) {
3149             last_annotation_string = inst->annotation;
3150             if (last_annotation_string)
3151                printf("   %s\n", last_annotation_string);
3152          }
3153       }
3154
3155       for (unsigned int i = 0; i < 3; i++) {
3156          src[i] = brw_reg_from_fs_reg(&inst->src[i]);
3157       }
3158       dst = brw_reg_from_fs_reg(&inst->dst);
3159
3160       brw_set_conditionalmod(p, inst->conditional_mod);
3161       brw_set_predicate_control(p, inst->predicated);
3162
3163       switch (inst->opcode) {
3164       case BRW_OPCODE_MOV:
3165          brw_MOV(p, dst, src[0]);
3166          break;
3167       case BRW_OPCODE_ADD:
3168          brw_ADD(p, dst, src[0], src[1]);
3169          break;
3170       case BRW_OPCODE_MUL:
3171          brw_MUL(p, dst, src[0], src[1]);
3172          break;
3173
3174       case BRW_OPCODE_FRC:
3175          brw_FRC(p, dst, src[0]);
3176          break;
3177       case BRW_OPCODE_RNDD:
3178          brw_RNDD(p, dst, src[0]);
3179          break;
3180       case BRW_OPCODE_RNDE:
3181          brw_RNDE(p, dst, src[0]);
3182          break;
3183       case BRW_OPCODE_RNDZ:
3184          brw_RNDZ(p, dst, src[0]);
3185          break;
3186
3187       case BRW_OPCODE_AND:
3188          brw_AND(p, dst, src[0], src[1]);
3189          break;
3190       case BRW_OPCODE_OR:
3191          brw_OR(p, dst, src[0], src[1]);
3192          break;
3193       case BRW_OPCODE_XOR:
3194          brw_XOR(p, dst, src[0], src[1]);
3195          break;
3196       case BRW_OPCODE_NOT:
3197          brw_NOT(p, dst, src[0]);
3198          break;
3199       case BRW_OPCODE_ASR:
3200          brw_ASR(p, dst, src[0], src[1]);
3201          break;
3202       case BRW_OPCODE_SHR:
3203          brw_SHR(p, dst, src[0], src[1]);
3204          break;
3205       case BRW_OPCODE_SHL:
3206          brw_SHL(p, dst, src[0], src[1]);
3207          break;
3208
3209       case BRW_OPCODE_CMP:
3210          brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
3211          break;
3212       case BRW_OPCODE_SEL:
3213          brw_SEL(p, dst, src[0], src[1]);
3214          break;
3215
3216       case BRW_OPCODE_IF:
3217          assert(if_stack_depth < 16);
3218          if (inst->src[0].file != BAD_FILE) {
3219             assert(intel->gen >= 6);
3220             if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]);
3221          } else {
3222             if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
3223          }
3224          if_depth_in_loop[loop_stack_depth]++;
3225          if_stack_depth++;
3226          break;
3227
3228       case BRW_OPCODE_ELSE:
3229          if_stack[if_stack_depth - 1] =
3230             brw_ELSE(p, if_stack[if_stack_depth - 1]);
3231          break;
3232       case BRW_OPCODE_ENDIF:
3233          if_stack_depth--;
3234          brw_ENDIF(p , if_stack[if_stack_depth]);
3235          if_depth_in_loop[loop_stack_depth]--;
3236          break;
3237
3238       case BRW_OPCODE_DO:
3239          loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
3240          if_depth_in_loop[loop_stack_depth] = 0;
3241          break;
3242
3243       case BRW_OPCODE_BREAK:
3244          brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
3245          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
3246          break;
3247       case BRW_OPCODE_CONTINUE:
3248          brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
3249          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
3250          break;
3251
3252       case BRW_OPCODE_WHILE: {
3253          struct brw_instruction *inst0, *inst1;
3254          GLuint br = 1;
3255
3256          if (intel->gen >= 5)
3257             br = 2;
3258
3259          assert(loop_stack_depth > 0);
3260          loop_stack_depth--;
3261          inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
3262          /* patch all the BREAK/CONT instructions from last BGNLOOP */
3263          while (inst0 > loop_stack[loop_stack_depth]) {
3264             inst0--;
3265             if (inst0->header.opcode == BRW_OPCODE_BREAK &&
3266                 inst0->bits3.if_else.jump_count == 0) {
3267                inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
3268             }
3269             else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
3270                      inst0->bits3.if_else.jump_count == 0) {
3271                inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
3272             }
3273          }
3274       }
3275          break;
3276
3277       case FS_OPCODE_RCP:
3278       case FS_OPCODE_RSQ:
3279       case FS_OPCODE_SQRT:
3280       case FS_OPCODE_EXP2:
3281       case FS_OPCODE_LOG2:
3282       case FS_OPCODE_POW:
3283       case FS_OPCODE_SIN:
3284       case FS_OPCODE_COS:
3285          generate_math(inst, dst, src);
3286          break;
3287       case FS_OPCODE_LINTERP:
3288          generate_linterp(inst, dst, src);
3289          break;
3290       case FS_OPCODE_TEX:
3291       case FS_OPCODE_TXB:
3292       case FS_OPCODE_TXL:
3293          generate_tex(inst, dst);
3294          break;
3295       case FS_OPCODE_DISCARD_NOT:
3296          generate_discard_not(inst, dst);
3297          break;
3298       case FS_OPCODE_DISCARD_AND:
3299          generate_discard_and(inst, src[0]);
3300          break;
3301       case FS_OPCODE_DDX:
3302          generate_ddx(inst, dst, src[0]);
3303          break;
3304       case FS_OPCODE_DDY:
3305          generate_ddy(inst, dst, src[0]);
3306          break;
3307
3308       case FS_OPCODE_SPILL:
3309          generate_spill(inst, src[0]);
3310          break;
3311
3312       case FS_OPCODE_UNSPILL:
3313          generate_unspill(inst, dst);
3314          break;
3315
3316       case FS_OPCODE_PULL_CONSTANT_LOAD:
3317          generate_pull_constant_load(inst, dst);
3318          break;
3319
3320       case FS_OPCODE_FB_WRITE:
3321          generate_fb_write(inst);
3322          break;
3323       default:
3324          if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
3325             _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
3326                           brw_opcodes[inst->opcode].name);
3327          } else {
3328             _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
3329          }
3330          this->fail = true;
3331       }
3332
3333       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
3334          for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
3335             if (0) {
3336                printf("0x%08x 0x%08x 0x%08x 0x%08x ",
3337                       ((uint32_t *)&p->store[i])[3],
3338                       ((uint32_t *)&p->store[i])[2],
3339                       ((uint32_t *)&p->store[i])[1],
3340                       ((uint32_t *)&p->store[i])[0]);
3341             }
3342             brw_disasm(stdout, &p->store[i], intel->gen);
3343             printf("\n");
3344          }
3345       }
3346
3347       last_native_inst = p->nr_insn;
3348    }
3349 }
3350
3351 GLboolean
3352 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
3353 {
3354    struct intel_context *intel = &brw->intel;
3355    struct gl_context *ctx = &intel->ctx;
3356    struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
3357
3358    if (!prog)
3359       return GL_FALSE;
3360
3361    struct brw_shader *shader =
3362      (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
3363    if (!shader)
3364       return GL_FALSE;
3365
3366    /* We always use 8-wide mode, at least for now.  For one, flow
3367     * control only works in 8-wide.  Also, when we're fragment shader
3368     * bound, we're almost always under register pressure as well, so
3369     * 8-wide would save us from the performance cliff of spilling
3370     * regs.
3371     */
3372    c->dispatch_width = 8;
3373
3374    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
3375       printf("GLSL IR for native fragment shader %d:\n", prog->Name);
3376       _mesa_print_ir(shader->ir, NULL);
3377       printf("\n");
3378    }
3379
3380    /* Now the main event: Visit the shader IR and generate our FS IR for it.
3381     */
3382    fs_visitor v(c, shader);
3383
3384    if (0) {
3385       v.emit_dummy_fs();
3386    } else {
3387       v.calculate_urb_setup();
3388       if (intel->gen < 6)
3389          v.emit_interpolation_setup_gen4();
3390       else
3391          v.emit_interpolation_setup_gen6();
3392
3393       /* Generate FS IR for main().  (the visitor only descends into
3394        * functions called "main").
3395        */
3396       foreach_iter(exec_list_iterator, iter, *shader->ir) {
3397          ir_instruction *ir = (ir_instruction *)iter.get();
3398          v.base_ir = ir;
3399          ir->accept(&v);
3400       }
3401
3402       v.emit_fb_writes();
3403
3404       v.split_virtual_grfs();
3405       v.setup_pull_constants();
3406
3407       v.assign_curb_setup();
3408       v.assign_urb_setup();
3409
3410       bool progress;
3411       do {
3412          progress = false;
3413          v.calculate_live_intervals();
3414          progress = v.propagate_constants() || progress;
3415          progress = v.register_coalesce() || progress;
3416          progress = v.compute_to_mrf() || progress;
3417          progress = v.dead_code_eliminate() || progress;
3418       } while (progress);
3419
3420       if (0) {
3421          /* Debug of register spilling: Go spill everything. */
3422          int virtual_grf_count = v.virtual_grf_next;
3423          for (int i = 1; i < virtual_grf_count; i++) {
3424             v.spill_reg(i);
3425          }
3426          v.calculate_live_intervals();
3427       }
3428
3429       if (0)
3430          v.assign_regs_trivial();
3431       else {
3432          while (!v.assign_regs()) {
3433             if (v.fail)
3434                break;
3435
3436             v.calculate_live_intervals();
3437          }
3438       }
3439    }
3440
3441    if (!v.fail)
3442       v.generate_code();
3443
3444    assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3445
3446    if (v.fail)
3447       return GL_FALSE;
3448
3449    c->prog_data.total_grf = v.grf_used;
3450
3451    return GL_TRUE;
3452 }