src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 }
  29
  30 namespace brw {
  31
  32 src_reg::src_reg(dst_reg reg)
  33 {
  34    init();
  35
  36    this->file = reg.file;
  37    this->reg = reg.reg;
  38    this->reg_offset = reg.reg_offset;
  39    this->type = reg.type;
  40
  41    int swizzles[4];
  42    int next_chan = 0;
  43    int last = 0;
  44
  45    for (int i = 0; i < 4; i++) {
  46       if (!(reg.writemask & (1 << i)))
  47          continue;
  48
  49       swizzles[next_chan++] = last = i;
  50    }
  51
  52    for (; next_chan < 4; next_chan++) {
  53       swizzles[next_chan] = last;
  54    }
  55
  56    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  57                                 swizzles[2], swizzles[3]);
  58 }
  59
  60 dst_reg::dst_reg(src_reg reg)
  61 {
  62    init();
  63
  64    this->file = reg.file;
  65    this->reg = reg.reg;
  66    this->reg_offset = reg.reg_offset;
  67    this->type = reg.type;
  68    this->writemask = WRITEMASK_XYZW;
  69 }
  70
  71 vec4_instruction *
  72 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  73                    src_reg src0, src_reg src1, src_reg src2)
  74 {
  75    vec4_instruction *inst = new(mem_ctx) vec4_instruction();
  76
  77    inst->opcode = opcode;
  78    inst->dst = dst;
  79    inst->src[0] = src0;
  80    inst->src[1] = src1;
  81    inst->src[2] = src2;
  82    inst->ir = this->base_ir;
  83    inst->annotation = this->current_annotation;
  84
  85    this->instructions.push_tail(inst);
  86
  87    return inst;
  88 }
  89
  90
  91 vec4_instruction *
  92 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  93 {
  94    return emit(opcode, dst, src0, src1, src_reg());
  95 }
  96
  97 vec4_instruction *
  98 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
  99 {
 100    assert(dst.writemask != 0);
 101    return emit(opcode, dst, src0, src_reg(), src_reg());
 102 }
 103
 104 vec4_instruction *
 105 vec4_visitor::emit(enum opcode opcode)
 106 {
 107    return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
 108 }
 109
 110 void
 111 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 112 {
 113    static enum opcode dot_opcodes[] = {
 114       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 115    };
 116
 117    emit(dot_opcodes[elements - 2], dst, src0, src1);
 118 }
 119
 120 void
 121 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 122 {
 123    /* The gen6 math instruction ignores the source modifiers --
 124     * swizzle, abs, negate, and at least some parts of the register
 125     * region description.
 126     */
 127    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 128    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 129
 130    emit(opcode, dst, temp_src);
 131 }
 132
 133 void
 134 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 135 {
 136    vec4_instruction *inst = emit(opcode, dst, src);
 137    inst->base_mrf = 1;
 138    inst->mlen = 1;
 139 }
 140
 141 void
 142 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 143 {
 144    switch (opcode) {
 145    case SHADER_OPCODE_RCP:
 146    case SHADER_OPCODE_RSQ:
 147    case SHADER_OPCODE_SQRT:
 148    case SHADER_OPCODE_EXP2:
 149    case SHADER_OPCODE_LOG2:
 150    case SHADER_OPCODE_SIN:
 151    case SHADER_OPCODE_COS:
 152       break;
 153    default:
 154       assert(!"not reached: bad math opcode");
 155       return;
 156    }
 157
 158    if (intel->gen >= 6) {
 159       return emit_math1_gen6(opcode, dst, src);
 160    } else {
 161       return emit_math1_gen4(opcode, dst, src);
 162    }
 163 }
 164
 165 void
 166 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 167                               dst_reg dst, src_reg src0, src_reg src1)
 168 {
 169    src_reg expanded;
 170
 171    /* The gen6 math instruction ignores the source modifiers --
 172     * swizzle, abs, negate, and at least some parts of the register
 173     * region description.  Move the sources to temporaries to make it
 174     * generally work.
 175     */
 176
 177    expanded = src_reg(this, glsl_type::vec4_type);
 178    emit(BRW_OPCODE_MOV, dst, src0);
 179    src0 = expanded;
 180
 181    expanded = src_reg(this, glsl_type::vec4_type);
 182    emit(BRW_OPCODE_MOV, dst, src1);
 183    src1 = expanded;
 184
 185    emit(opcode, dst, src0, src1);
 186 }
 187
 188 void
 189 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 190                               dst_reg dst, src_reg src0, src_reg src1)
 191 {
 192    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 193    inst->base_mrf = 1;
 194    inst->mlen = 2;
 195 }
 196
 197 void
 198 vec4_visitor::emit_math(enum opcode opcode,
 199                         dst_reg dst, src_reg src0, src_reg src1)
 200 {
 201    assert(opcode == SHADER_OPCODE_POW);
 202
 203    if (intel->gen >= 6) {
 204       return emit_math2_gen6(opcode, dst, src0, src1);
 205    } else {
 206       return emit_math2_gen4(opcode, dst, src0, src1);
 207    }
 208 }
 209
 210 void
 211 vec4_visitor::visit_instructions(const exec_list *list)
 212 {
 213    foreach_iter(exec_list_iterator, iter, *list) {
 214       ir_instruction *ir = (ir_instruction *)iter.get();
 215
 216       base_ir = ir;
 217       ir->accept(this);
 218    }
 219 }
 220
 221
 222 static int
 223 type_size(const struct glsl_type *type)
 224 {
 225    unsigned int i;
 226    int size;
 227
 228    switch (type->base_type) {
 229    case GLSL_TYPE_UINT:
 230    case GLSL_TYPE_INT:
 231    case GLSL_TYPE_FLOAT:
 232    case GLSL_TYPE_BOOL:
 233       if (type->is_matrix()) {
 234          return type->matrix_columns;
 235       } else {
 236          /* Regardless of size of vector, it gets a vec4. This is bad
 237           * packing for things like floats, but otherwise arrays become a
 238           * mess.  Hopefully a later pass over the code can pack scalars
 239           * down if appropriate.
 240           */
 241          return 1;
 242       }
 243    case GLSL_TYPE_ARRAY:
 244       assert(type->length > 0);
 245       return type_size(type->fields.array) * type->length;
 246    case GLSL_TYPE_STRUCT:
 247       size = 0;
 248       for (i = 0; i < type->length; i++) {
 249          size += type_size(type->fields.structure[i].type);
 250       }
 251       return size;
 252    case GLSL_TYPE_SAMPLER:
 253       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 254        * at link time.
 255        */
 256       return 1;
 257    default:
 258       assert(0);
 259       return 0;
 260    }
 261 }
 262
 263 int
 264 vec4_visitor::virtual_grf_alloc(int size)
 265 {
 266    if (virtual_grf_array_size <= virtual_grf_count) {
 267       if (virtual_grf_array_size == 0)
 268          virtual_grf_array_size = 16;
 269       else
 270          virtual_grf_array_size *= 2;
 271       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 272                                    virtual_grf_array_size);
 273    }
 274    virtual_grf_sizes[virtual_grf_count] = size;
 275    return virtual_grf_count++;
 276 }
 277
 278 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 279 {
 280    init();
 281
 282    this->file = GRF;
 283    this->reg = v->virtual_grf_alloc(type_size(type));
 284
 285    if (type->is_array() || type->is_record()) {
 286       this->swizzle = BRW_SWIZZLE_NOOP;
 287    } else {
 288       this->swizzle = swizzle_for_size(type->vector_elements);
 289    }
 290
 291    this->type = brw_type_for_base_type(type);
 292 }
 293
 294 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 295 {
 296    init();
 297
 298    this->file = GRF;
 299    this->reg = v->virtual_grf_alloc(type_size(type));
 300
 301    if (type->is_array() || type->is_record()) {
 302       this->writemask = WRITEMASK_XYZW;
 303    } else {
 304       this->writemask = (1 << type->vector_elements) - 1;
 305    }
 306
 307    this->type = brw_type_for_base_type(type);
 308 }
 309
 310 /* Our support for uniforms is piggy-backed on the struct
 311  * gl_fragment_program, because that's where the values actually
 312  * get stored, rather than in some global gl_shader_program uniform
 313  * store.
 314  */
 315 int
 316 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 317 {
 318    unsigned int offset = 0;
 319    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 320
 321    if (type->is_matrix()) {
 322       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 323                                                         type->vector_elements,
 324                                                         1);
 325
 326       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 327          offset += setup_uniform_values(loc + offset, column);
 328       }
 329
 330       return offset;
 331    }
 332
 333    switch (type->base_type) {
 334    case GLSL_TYPE_FLOAT:
 335    case GLSL_TYPE_UINT:
 336    case GLSL_TYPE_INT:
 337    case GLSL_TYPE_BOOL:
 338       for (unsigned int i = 0; i < type->vector_elements; i++) {
 339          int slot = this->uniforms * 4 + i;
 340          switch (type->base_type) {
 341          case GLSL_TYPE_FLOAT:
 342             c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
 343             break;
 344          case GLSL_TYPE_UINT:
 345             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
 346             break;
 347          case GLSL_TYPE_INT:
 348             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
 349             break;
 350          case GLSL_TYPE_BOOL:
 351             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
 352             break;
 353          default:
 354             assert(!"not reached");
 355             c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
 356             break;
 357          }
 358          c->prog_data.param[slot] = &values[i];
 359       }
 360
 361       for (unsigned int i = type->vector_elements; i < 4; i++) {
 362          c->prog_data.param_convert[this->uniforms * 4 + i] =
 363             PARAM_CONVERT_ZERO;
 364          c->prog_data.param[this->uniforms * 4 + i] = NULL;
 365       }
 366
 367       this->uniform_size[this->uniforms] = type->vector_elements;
 368       this->uniforms++;
 369
 370       return 1;
 371
 372    case GLSL_TYPE_STRUCT:
 373       for (unsigned int i = 0; i < type->length; i++) {
 374          offset += setup_uniform_values(loc + offset,
 375                                         type->fields.structure[i].type);
 376       }
 377       return offset;
 378
 379    case GLSL_TYPE_ARRAY:
 380       for (unsigned int i = 0; i < type->length; i++) {
 381          offset += setup_uniform_values(loc + offset, type->fields.array);
 382       }
 383       return offset;
 384
 385    case GLSL_TYPE_SAMPLER:
 386       /* The sampler takes up a slot, but we don't use any values from it. */
 387       return 1;
 388
 389    default:
 390       assert(!"not reached");
 391       return 0;
 392    }
 393 }
 394
 395 /* Our support for builtin uniforms is even scarier than non-builtin.
 396  * It sits on top of the PROG_STATE_VAR parameters that are
 397  * automatically updated from GL context state.
 398  */
 399 void
 400 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 401 {
 402    const ir_state_slot *const slots = ir->state_slots;
 403    assert(ir->state_slots != NULL);
 404
 405    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 406       /* This state reference has already been setup by ir_to_mesa,
 407        * but we'll get the same index back here.  We can reference
 408        * ParameterValues directly, since unlike brw_fs.cpp, we never
 409        * add new state references during compile.
 410        */
 411       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 412                                             (gl_state_index *)slots[i].tokens);
 413       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 414
 415       this->uniform_size[this->uniforms] = 0;
 416       /* Add each of the unique swizzled channels of the element.
 417        * This will end up matching the size of the glsl_type of this field.
 418        */
 419       int last_swiz = -1;
 420       for (unsigned int j = 0; j < 4; j++) {
 421          int swiz = GET_SWZ(slots[i].swizzle, j);
 422          if (swiz == last_swiz)
 423             break;
 424          last_swiz = swiz;
 425
 426          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 427          c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
 428          this->uniform_size[this->uniforms]++;
 429       }
 430       this->uniforms++;
 431    }
 432 }
 433
 434 dst_reg *
 435 vec4_visitor::variable_storage(ir_variable *var)
 436 {
 437    return (dst_reg *)hash_table_find(this->variable_ht, var);
 438 }
 439
 440 void
 441 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 442 {
 443    ir_expression *expr = ir->as_expression();
 444
 445    if (expr) {
 446       src_reg op[2];
 447       vec4_instruction *inst;
 448
 449       assert(expr->get_num_operands() <= 2);
 450       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 451          assert(expr->operands[i]->type->is_scalar());
 452
 453          expr->operands[i]->accept(this);
 454          op[i] = this->result;
 455       }
 456
 457       switch (expr->operation) {
 458       case ir_unop_logic_not:
 459          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
 460          inst->conditional_mod = BRW_CONDITIONAL_Z;
 461          break;
 462
 463       case ir_binop_logic_xor:
 464          inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
 465          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 466          break;
 467
 468       case ir_binop_logic_or:
 469          inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
 470          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 471          break;
 472
 473       case ir_binop_logic_and:
 474          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
 475          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 476          break;
 477
 478       case ir_unop_f2b:
 479          if (intel->gen >= 6) {
 480             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
 481          } else {
 482             inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
 483          }
 484          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 485          break;
 486
 487       case ir_unop_i2b:
 488          if (intel->gen >= 6) {
 489             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 490          } else {
 491             inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
 492          }
 493          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 494          break;
 495
 496       case ir_binop_greater:
 497       case ir_binop_gequal:
 498       case ir_binop_less:
 499       case ir_binop_lequal:
 500       case ir_binop_equal:
 501       case ir_binop_all_equal:
 502       case ir_binop_nequal:
 503       case ir_binop_any_nequal:
 504          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 505          inst->conditional_mod =
 506             brw_conditional_for_comparison(expr->operation);
 507          break;
 508
 509       default:
 510          assert(!"not reached");
 511          break;
 512       }
 513       return;
 514    }
 515
 516    ir->accept(this);
 517
 518    if (intel->gen >= 6) {
 519       vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
 520                                this->result, src_reg(1));
 521       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 522    } else {
 523       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
 524       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 525    }
 526 }
 527
 528 /**
 529  * Emit a gen6 IF statement with the comparison folded into the IF
 530  * instruction.
 531  */
 532 void
 533 vec4_visitor::emit_if_gen6(ir_if *ir)
 534 {
 535    ir_expression *expr = ir->condition->as_expression();
 536
 537    if (expr) {
 538       src_reg op[2];
 539       vec4_instruction *inst;
 540       dst_reg temp;
 541
 542       assert(expr->get_num_operands() <= 2);
 543       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 544          assert(expr->operands[i]->type->is_scalar() ||
 545                 expr->operation == ir_binop_any_nequal ||
 546                 expr->operation == ir_binop_all_equal);
 547
 548          expr->operands[i]->accept(this);
 549          op[i] = this->result;
 550       }
 551
 552       switch (expr->operation) {
 553       case ir_unop_logic_not:
 554          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 555          inst->conditional_mod = BRW_CONDITIONAL_Z;
 556          return;
 557
 558       case ir_binop_logic_xor:
 559          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 560          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 561          return;
 562
 563       case ir_binop_logic_or:
 564          temp = dst_reg(this, glsl_type::bool_type);
 565          emit(BRW_OPCODE_OR, temp, op[0], op[1]);
 566          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 567          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 568          return;
 569
 570       case ir_binop_logic_and:
 571          temp = dst_reg(this, glsl_type::bool_type);
 572          emit(BRW_OPCODE_AND, temp, op[0], op[1]);
 573          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 574          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 575          return;
 576
 577       case ir_unop_f2b:
 578          inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
 579          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 580          return;
 581
 582       case ir_unop_i2b:
 583          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 584          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 585          return;
 586
 587       case ir_binop_greater:
 588       case ir_binop_gequal:
 589       case ir_binop_less:
 590       case ir_binop_lequal:
 591       case ir_binop_equal:
 592       case ir_binop_nequal:
 593          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 594          inst->conditional_mod =
 595             brw_conditional_for_comparison(expr->operation);
 596          return;
 597
 598       case ir_binop_all_equal:
 599          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 600          inst->conditional_mod = BRW_CONDITIONAL_Z;
 601
 602          inst = emit(BRW_OPCODE_IF);
 603          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 604          return;
 605
 606       case ir_binop_any_nequal:
 607          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 608          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 609
 610          inst = emit(BRW_OPCODE_IF);
 611          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 612          return;
 613
 614       default:
 615          assert(!"not reached");
 616          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 617          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 618          return;
 619       }
 620       return;
 621    }
 622
 623    ir->condition->accept(this);
 624
 625    vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
 626                             this->result, src_reg(0));
 627    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 628 }
 629
 630 void
 631 vec4_visitor::visit(ir_variable *ir)
 632 {
 633    dst_reg *reg = NULL;
 634
 635    if (variable_storage(ir))
 636       return;
 637
 638    switch (ir->mode) {
 639    case ir_var_in:
 640       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 641       break;
 642
 643    case ir_var_out:
 644       reg = new(mem_ctx) dst_reg(this, ir->type);
 645
 646       for (int i = 0; i < type_size(ir->type); i++) {
 647          output_reg[ir->location + i] = *reg;
 648          output_reg[ir->location + i].reg_offset = i;
 649          output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
 650       }
 651       break;
 652
 653    case ir_var_auto:
 654    case ir_var_temporary:
 655       reg = new(mem_ctx) dst_reg(this, ir->type);
 656       break;
 657
 658    case ir_var_uniform:
 659       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 660
 661       if (!strncmp(ir->name, "gl_", 3)) {
 662          setup_builtin_uniform_values(ir);
 663       } else {
 664          setup_uniform_values(ir->location, ir->type);
 665       }
 666       break;
 667
 668    default:
 669       assert(!"not reached");
 670    }
 671
 672    reg->type = brw_type_for_base_type(ir->type);
 673    hash_table_insert(this->variable_ht, reg, ir);
 674 }
 675
 676 void
 677 vec4_visitor::visit(ir_loop *ir)
 678 {
 679    ir_dereference_variable *counter = NULL;
 680
 681    fail("not yet\n");
 682
 683    /* We don't want debugging output to print the whole body of the
 684     * loop as the annotation.
 685     */
 686    this->base_ir = NULL;
 687
 688    if (ir->counter != NULL)
 689       counter = new(ir) ir_dereference_variable(ir->counter);
 690
 691    if (ir->from != NULL) {
 692       assert(ir->counter != NULL);
 693
 694       ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
 695
 696       a->accept(this);
 697       delete a;
 698    }
 699
 700    emit(BRW_OPCODE_DO);
 701
 702    if (ir->to) {
 703       ir_expression *e =
 704          new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
 705                                counter, ir->to);
 706       ir_if *if_stmt =  new(ir) ir_if(e);
 707
 708       ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
 709
 710       if_stmt->then_instructions.push_tail(brk);
 711
 712       if_stmt->accept(this);
 713
 714       delete if_stmt;
 715       delete e;
 716       delete brk;
 717    }
 718
 719    visit_instructions(&ir->body_instructions);
 720
 721    if (ir->increment) {
 722       ir_expression *e =
 723          new(ir) ir_expression(ir_binop_add, counter->type,
 724                                counter, ir->increment);
 725
 726       ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
 727
 728       a->accept(this);
 729       delete a;
 730       delete e;
 731    }
 732
 733    emit(BRW_OPCODE_WHILE);
 734 }
 735
 736 void
 737 vec4_visitor::visit(ir_loop_jump *ir)
 738 {
 739    switch (ir->mode) {
 740    case ir_loop_jump::jump_break:
 741       emit(BRW_OPCODE_BREAK);
 742       break;
 743    case ir_loop_jump::jump_continue:
 744       emit(BRW_OPCODE_CONTINUE);
 745       break;
 746    }
 747 }
 748
 749
 750 void
 751 vec4_visitor::visit(ir_function_signature *ir)
 752 {
 753    assert(0);
 754    (void)ir;
 755 }
 756
 757 void
 758 vec4_visitor::visit(ir_function *ir)
 759 {
 760    /* Ignore function bodies other than main() -- we shouldn't see calls to
 761     * them since they should all be inlined.
 762     */
 763    if (strcmp(ir->name, "main") == 0) {
 764       const ir_function_signature *sig;
 765       exec_list empty;
 766
 767       sig = ir->matching_signature(&empty);
 768
 769       assert(sig);
 770
 771       visit_instructions(&sig->body);
 772    }
 773 }
 774
 775 GLboolean
 776 vec4_visitor::try_emit_sat(ir_expression *ir)
 777 {
 778    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 779    if (!sat_src)
 780       return false;
 781
 782    sat_src->accept(this);
 783    src_reg src = this->result;
 784
 785    this->result = src_reg(this, ir->type);
 786    vec4_instruction *inst;
 787    inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
 788    inst->saturate = true;
 789
 790    return true;
 791 }
 792
 793 void
 794 vec4_visitor::emit_bool_comparison(unsigned int op,
 795                                  dst_reg dst, src_reg src0, src_reg src1)
 796 {
 797    /* original gen4 does destination conversion before comparison. */
 798    if (intel->gen < 5)
 799       dst.type = src0.type;
 800
 801    vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
 802    inst->conditional_mod = brw_conditional_for_comparison(op);
 803
 804    dst.type = BRW_REGISTER_TYPE_D;
 805    emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
 806 }
 807
 808 void
 809 vec4_visitor::visit(ir_expression *ir)
 810 {
 811    unsigned int operand;
 812    src_reg op[Elements(ir->operands)];
 813    src_reg result_src;
 814    dst_reg result_dst;
 815    vec4_instruction *inst;
 816
 817    if (try_emit_sat(ir))
 818       return;
 819
 820    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 821       this->result.file = BAD_FILE;
 822       ir->operands[operand]->accept(this);
 823       if (this->result.file == BAD_FILE) {
 824          printf("Failed to get tree for expression operand:\n");
 825          ir->operands[operand]->print();
 826          exit(1);
 827       }
 828       op[operand] = this->result;
 829
 830       /* Matrix expression operands should have been broken down to vector
 831        * operations already.
 832        */
 833       assert(!ir->operands[operand]->type->is_matrix());
 834    }
 835
 836    int vector_elements = ir->operands[0]->type->vector_elements;
 837    if (ir->operands[1]) {
 838       vector_elements = MAX2(vector_elements,
 839                              ir->operands[1]->type->vector_elements);
 840    }
 841
 842    this->result.file = BAD_FILE;
 843
 844    /* Storage for our result.  Ideally for an assignment we'd be using
 845     * the actual storage for the result here, instead.
 846     */
 847    result_src = src_reg(this, ir->type);
 848    /* convenience for the emit functions below. */
 849    result_dst = dst_reg(result_src);
 850    /* If nothing special happens, this is the result. */
 851    this->result = result_src;
 852    /* Limit writes to the channels that will be used by result_src later.
 853     * This does limit this temp's use as a temporary for multi-instruction
 854     * sequences.
 855     */
 856    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 857
 858    switch (ir->operation) {
 859    case ir_unop_logic_not:
 860       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 861        * ones complement of the whole register, not just bit 0.
 862        */
 863       emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
 864       break;
 865    case ir_unop_neg:
 866       op[0].negate = !op[0].negate;
 867       this->result = op[0];
 868       break;
 869    case ir_unop_abs:
 870       op[0].abs = true;
 871       op[0].negate = false;
 872       this->result = op[0];
 873       break;
 874
 875    case ir_unop_sign:
 876       emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
 877
 878       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 879       inst->conditional_mod = BRW_CONDITIONAL_G;
 880       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
 881       inst->predicate = BRW_PREDICATE_NORMAL;
 882
 883       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 884       inst->conditional_mod = BRW_CONDITIONAL_L;
 885       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
 886       inst->predicate = BRW_PREDICATE_NORMAL;
 887
 888       break;
 889
 890    case ir_unop_rcp:
 891       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
 892       break;
 893
 894    case ir_unop_exp2:
 895       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
 896       break;
 897    case ir_unop_log2:
 898       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
 899       break;
 900    case ir_unop_exp:
 901    case ir_unop_log:
 902       assert(!"not reached: should be handled by ir_explog_to_explog2");
 903       break;
 904    case ir_unop_sin:
 905    case ir_unop_sin_reduced:
 906       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
 907       break;
 908    case ir_unop_cos:
 909    case ir_unop_cos_reduced:
 910       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
 911       break;
 912
 913    case ir_unop_dFdx:
 914    case ir_unop_dFdy:
 915       assert(!"derivatives not valid in vertex shader");
 916       break;
 917
 918    case ir_unop_noise:
 919       assert(!"not reached: should be handled by lower_noise");
 920       break;
 921
 922    case ir_binop_add:
 923       emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
 924       break;
 925    case ir_binop_sub:
 926       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 927       break;
 928
 929    case ir_binop_mul:
 930       emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
 931       break;
 932    case ir_binop_div:
 933       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 934    case ir_binop_mod:
 935       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 936       break;
 937
 938    case ir_binop_less:
 939    case ir_binop_greater:
 940    case ir_binop_lequal:
 941    case ir_binop_gequal:
 942    case ir_binop_equal:
 943    case ir_binop_nequal: {
 944       dst_reg temp = result_dst;
 945       /* original gen4 does implicit conversion before comparison. */
 946       if (intel->gen < 5)
 947          temp.type = op[0].type;
 948
 949       inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 950       inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
 951       emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
 952       break;
 953    }
 954
 955    case ir_binop_all_equal:
 956       /* "==" operator producing a scalar boolean. */
 957       if (ir->operands[0]->type->is_vector() ||
 958           ir->operands[1]->type->is_vector()) {
 959          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 960          inst->conditional_mod = BRW_CONDITIONAL_Z;
 961
 962          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 963          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 964          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 965       } else {
 966          dst_reg temp = result_dst;
 967          /* original gen4 does implicit conversion before comparison. */
 968          if (intel->gen < 5)
 969             temp.type = op[0].type;
 970
 971          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 972          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 973          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
 974       }
 975       break;
 976    case ir_binop_any_nequal:
 977       /* "!=" operator producing a scalar boolean. */
 978       if (ir->operands[0]->type->is_vector() ||
 979           ir->operands[1]->type->is_vector()) {
 980          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 981          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 982
 983          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 984          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 985          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 986       } else {
 987          dst_reg temp = result_dst;
 988          /* original gen4 does implicit conversion before comparison. */
 989          if (intel->gen < 5)
 990             temp.type = op[0].type;
 991
 992          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 993          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 994          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
 995       }
 996       break;
 997
 998    case ir_unop_any:
 999       emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1000       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1001
1002       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1003       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1004       break;
1005
1006    case ir_binop_logic_xor:
1007       emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1008       break;
1009
1010    case ir_binop_logic_or:
1011       emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1012       break;
1013
1014    case ir_binop_logic_and:
1015       emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1016       break;
1017
1018    case ir_binop_dot:
1019       assert(ir->operands[0]->type->is_vector());
1020       assert(ir->operands[0]->type == ir->operands[1]->type);
1021       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1022       break;
1023
1024    case ir_unop_sqrt:
1025       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1026       break;
1027    case ir_unop_rsq:
1028       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1029       break;
1030    case ir_unop_i2f:
1031    case ir_unop_i2u:
1032    case ir_unop_u2i:
1033    case ir_unop_u2f:
1034    case ir_unop_b2f:
1035    case ir_unop_b2i:
1036    case ir_unop_f2i:
1037       emit(BRW_OPCODE_MOV, result_dst, op[0]);
1038       break;
1039    case ir_unop_f2b:
1040    case ir_unop_i2b: {
1041       dst_reg temp = result_dst;
1042       /* original gen4 does implicit conversion before comparison. */
1043       if (intel->gen < 5)
1044          temp.type = op[0].type;
1045
1046       inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1047       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1048       inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1049       break;
1050    }
1051
1052    case ir_unop_trunc:
1053       emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1054       break;
1055    case ir_unop_ceil:
1056       op[0].negate = !op[0].negate;
1057       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1058       this->result.negate = true;
1059       break;
1060    case ir_unop_floor:
1061       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1062       break;
1063    case ir_unop_fract:
1064       inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1065       break;
1066    case ir_unop_round_even:
1067       emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1068       break;
1069
1070    case ir_binop_min:
1071       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1072       inst->conditional_mod = BRW_CONDITIONAL_L;
1073
1074       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1075       inst->predicate = BRW_PREDICATE_NORMAL;
1076       break;
1077    case ir_binop_max:
1078       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1079       inst->conditional_mod = BRW_CONDITIONAL_G;
1080
1081       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1082       inst->predicate = BRW_PREDICATE_NORMAL;
1083       break;
1084
1085    case ir_binop_pow:
1086       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1087       break;
1088
1089    case ir_unop_bit_not:
1090       inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1091       break;
1092    case ir_binop_bit_and:
1093       inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1094       break;
1095    case ir_binop_bit_xor:
1096       inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1097       break;
1098    case ir_binop_bit_or:
1099       inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1100       break;
1101
1102    case ir_binop_lshift:
1103    case ir_binop_rshift:
1104       assert(!"GLSL 1.30 features unsupported");
1105       break;
1106
1107    case ir_quadop_vector:
1108       assert(!"not reached: should be handled by lower_quadop_vector");
1109       break;
1110    }
1111 }
1112
1113
1114 void
1115 vec4_visitor::visit(ir_swizzle *ir)
1116 {
1117    src_reg src;
1118    int i = 0;
1119    int swizzle[4];
1120
1121    /* Note that this is only swizzles in expressions, not those on the left
1122     * hand side of an assignment, which do write masking.  See ir_assignment
1123     * for that.
1124     */
1125
1126    ir->val->accept(this);
1127    src = this->result;
1128    assert(src.file != BAD_FILE);
1129
1130    for (i = 0; i < ir->type->vector_elements; i++) {
1131       switch (i) {
1132       case 0:
1133          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1134          break;
1135       case 1:
1136          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1137          break;
1138       case 2:
1139          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1140          break;
1141       case 3:
1142          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1143             break;
1144       }
1145    }
1146    for (; i < 4; i++) {
1147       /* Replicate the last channel out. */
1148       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1149    }
1150
1151    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1152
1153    this->result = src;
1154 }
1155
1156 void
1157 vec4_visitor::visit(ir_dereference_variable *ir)
1158 {
1159    const struct glsl_type *type = ir->type;
1160    dst_reg *reg = variable_storage(ir->var);
1161
1162    if (!reg) {
1163       fail("Failed to find variable storage for %s\n", ir->var->name);
1164       this->result = src_reg(brw_null_reg());
1165       return;
1166    }
1167
1168    this->result = src_reg(*reg);
1169
1170    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1171       this->result.swizzle = swizzle_for_size(type->vector_elements);
1172 }
1173
1174 void
1175 vec4_visitor::visit(ir_dereference_array *ir)
1176 {
1177    ir_constant *constant_index;
1178    src_reg src;
1179    int element_size = type_size(ir->type);
1180
1181    constant_index = ir->array_index->constant_expression_value();
1182
1183    ir->array->accept(this);
1184    src = this->result;
1185
1186    if (constant_index) {
1187       src.reg_offset += constant_index->value.i[0] * element_size;
1188    } else {
1189 #if 0 /* Variable array index */
1190       /* Variable index array dereference.  It eats the "vec4" of the
1191        * base of the array and an index that offsets the Mesa register
1192        * index.
1193        */
1194       ir->array_index->accept(this);
1195
1196       src_reg index_reg;
1197
1198       if (element_size == 1) {
1199          index_reg = this->result;
1200       } else {
1201          index_reg = src_reg(this, glsl_type::float_type);
1202
1203          emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1204               this->result, src_reg_for_float(element_size));
1205       }
1206
1207       src.reladdr = ralloc(mem_ctx, src_reg);
1208       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1209 #endif
1210    }
1211
1212    /* If the type is smaller than a vec4, replicate the last channel out. */
1213    if (ir->type->is_scalar() || ir->type->is_vector())
1214       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1215    else
1216       src.swizzle = BRW_SWIZZLE_NOOP;
1217    src.type = brw_type_for_base_type(ir->type);
1218
1219    this->result = src;
1220 }
1221
1222 void
1223 vec4_visitor::visit(ir_dereference_record *ir)
1224 {
1225    unsigned int i;
1226    const glsl_type *struct_type = ir->record->type;
1227    int offset = 0;
1228
1229    ir->record->accept(this);
1230
1231    for (i = 0; i < struct_type->length; i++) {
1232       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1233          break;
1234       offset += type_size(struct_type->fields.structure[i].type);
1235    }
1236
1237    /* If the type is smaller than a vec4, replicate the last channel out. */
1238    if (ir->type->is_scalar() || ir->type->is_vector())
1239       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1240    else
1241       this->result.swizzle = BRW_SWIZZLE_NOOP;
1242    this->result.type = brw_type_for_base_type(ir->type);
1243
1244    this->result.reg_offset += offset;
1245 }
1246
1247 /**
1248  * We want to be careful in assignment setup to hit the actual storage
1249  * instead of potentially using a temporary like we might with the
1250  * ir_dereference handler.
1251  */
1252 static dst_reg
1253 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1254 {
1255    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1256     * access of a vector, it must be separated into a series conditional moves
1257     * before reaching this point (see ir_vec_index_to_cond_assign).
1258     */
1259    assert(ir->as_dereference());
1260    ir_dereference_array *deref_array = ir->as_dereference_array();
1261    if (deref_array) {
1262       assert(!deref_array->array->type->is_vector());
1263    }
1264
1265    /* Use the rvalue deref handler for the most part.  We'll ignore
1266     * swizzles in it and write swizzles using writemask, though.
1267     */
1268    ir->accept(v);
1269    return dst_reg(v->result);
1270 }
1271
1272 void
1273 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1274                               const struct glsl_type *type, bool predicated)
1275 {
1276    if (type->base_type == GLSL_TYPE_STRUCT) {
1277       for (unsigned int i = 0; i < type->length; i++) {
1278          emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1279       }
1280       return;
1281    }
1282
1283    if (type->is_array()) {
1284       for (unsigned int i = 0; i < type->length; i++) {
1285          emit_block_move(dst, src, type->fields.array, predicated);
1286       }
1287       return;
1288    }
1289
1290    if (type->is_matrix()) {
1291       const struct glsl_type *vec_type;
1292
1293       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1294                                          type->vector_elements, 1);
1295
1296       for (int i = 0; i < type->matrix_columns; i++) {
1297          emit_block_move(dst, src, vec_type, predicated);
1298       }
1299       return;
1300    }
1301
1302    assert(type->is_scalar() || type->is_vector());
1303
1304    dst->type = brw_type_for_base_type(type);
1305    src->type = dst->type;
1306
1307    dst->writemask = (1 << type->vector_elements) - 1;
1308
1309    /* Do we need to worry about swizzling a swizzle? */
1310    assert(src->swizzle = BRW_SWIZZLE_NOOP);
1311    src->swizzle = swizzle_for_size(type->vector_elements);
1312
1313    vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1314    if (predicated)
1315       inst->predicate = BRW_PREDICATE_NORMAL;
1316
1317    dst->reg_offset++;
1318    src->reg_offset++;
1319 }
1320
1321 void
1322 vec4_visitor::visit(ir_assignment *ir)
1323 {
1324    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1325
1326    if (!ir->lhs->type->is_scalar() &&
1327        !ir->lhs->type->is_vector()) {
1328       ir->rhs->accept(this);
1329       src_reg src = this->result;
1330
1331       if (ir->condition) {
1332          emit_bool_to_cond_code(ir->condition);
1333       }
1334
1335       emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1336       return;
1337    }
1338
1339    /* Now we're down to just a scalar/vector with writemasks. */
1340    int i;
1341
1342    ir->rhs->accept(this);
1343    src_reg src = this->result;
1344
1345    int swizzles[4];
1346    int first_enabled_chan = 0;
1347    int src_chan = 0;
1348
1349    assert(ir->lhs->type->is_vector() ||
1350           ir->lhs->type->is_scalar());
1351    dst.writemask = ir->write_mask;
1352
1353    for (int i = 0; i < 4; i++) {
1354       if (dst.writemask & (1 << i)) {
1355          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1356          break;
1357       }
1358    }
1359
1360    /* Swizzle a small RHS vector into the channels being written.
1361     *
1362     * glsl ir treats write_mask as dictating how many channels are
1363     * present on the RHS while in our instructions we need to make
1364     * those channels appear in the slots of the vec4 they're written to.
1365     */
1366    for (int i = 0; i < 4; i++) {
1367       if (dst.writemask & (1 << i))
1368          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1369       else
1370          swizzles[i] = first_enabled_chan;
1371    }
1372    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1373                               swizzles[2], swizzles[3]);
1374
1375    if (ir->condition) {
1376       emit_bool_to_cond_code(ir->condition);
1377    }
1378
1379    for (i = 0; i < type_size(ir->lhs->type); i++) {
1380       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1381
1382       if (ir->condition)
1383          inst->predicate = BRW_PREDICATE_NORMAL;
1384
1385       dst.reg_offset++;
1386       src.reg_offset++;
1387    }
1388 }
1389
1390
1391 void
1392 vec4_visitor::visit(ir_constant *ir)
1393 {
1394    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1395       src_reg temp_base = src_reg(this, ir->type);
1396       dst_reg temp = dst_reg(temp_base);
1397
1398       foreach_iter(exec_list_iterator, iter, ir->components) {
1399          ir_constant *field_value = (ir_constant *)iter.get();
1400          int size = type_size(field_value->type);
1401
1402          assert(size > 0);
1403
1404          field_value->accept(this);
1405          src_reg src = this->result;
1406
1407          for (int i = 0; i < (unsigned int)size; i++) {
1408             emit(BRW_OPCODE_MOV, temp, src);
1409
1410             src.reg_offset++;
1411             temp.reg_offset++;
1412          }
1413       }
1414       this->result = temp_base;
1415       return;
1416    }
1417
1418    if (ir->type->is_array()) {
1419       src_reg temp_base = src_reg(this, ir->type);
1420       dst_reg temp = dst_reg(temp_base);
1421       int size = type_size(ir->type->fields.array);
1422
1423       assert(size > 0);
1424
1425       for (unsigned int i = 0; i < ir->type->length; i++) {
1426          ir->array_elements[i]->accept(this);
1427          src_reg src = this->result;
1428          for (int j = 0; j < size; j++) {
1429             emit(BRW_OPCODE_MOV, temp, src);
1430
1431             src.reg_offset++;
1432             temp.reg_offset++;
1433          }
1434       }
1435       this->result = temp_base;
1436       return;
1437    }
1438
1439    if (ir->type->is_matrix()) {
1440       this->result = src_reg(this, ir->type);
1441       dst_reg dst = dst_reg(this->result);
1442
1443       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1444
1445       for (int i = 0; i < ir->type->matrix_columns; i++) {
1446          for (int j = 0; j < ir->type->vector_elements; j++) {
1447             dst.writemask = 1 << j;
1448             emit(BRW_OPCODE_MOV, dst,
1449                  src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1450          }
1451          dst.reg_offset++;
1452       }
1453       return;
1454    }
1455
1456    this->result = src_reg(this, ir->type);
1457    dst_reg dst = dst_reg(this->result);
1458
1459    for (int i = 0; i < ir->type->vector_elements; i++) {
1460       dst.writemask = 1 << i;
1461
1462       switch (ir->type->base_type) {
1463       case GLSL_TYPE_FLOAT:
1464          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
1465          break;
1466       case GLSL_TYPE_INT:
1467          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
1468          break;
1469       case GLSL_TYPE_UINT:
1470          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
1471          break;
1472       case GLSL_TYPE_BOOL:
1473          emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
1474          break;
1475       default:
1476          assert(!"Non-float/uint/int/bool constant");
1477          break;
1478       }
1479    }
1480 }
1481
1482 void
1483 vec4_visitor::visit(ir_call *ir)
1484 {
1485    assert(!"not reached");
1486 }
1487
1488 void
1489 vec4_visitor::visit(ir_texture *ir)
1490 {
1491    assert(!"not reached");
1492 }
1493
1494 void
1495 vec4_visitor::visit(ir_return *ir)
1496 {
1497    assert(!"not reached");
1498 }
1499
1500 void
1501 vec4_visitor::visit(ir_discard *ir)
1502 {
1503    assert(!"not reached");
1504 }
1505
1506 void
1507 vec4_visitor::visit(ir_if *ir)
1508 {
1509    /* Don't point the annotation at the if statement, because then it plus
1510     * the then and else blocks get printed.
1511     */
1512    this->base_ir = ir->condition;
1513
1514    if (intel->gen == 6) {
1515       emit_if_gen6(ir);
1516    } else {
1517       emit_bool_to_cond_code(ir->condition);
1518       vec4_instruction *inst = emit(BRW_OPCODE_IF);
1519       inst->predicate = BRW_PREDICATE_NORMAL;
1520    }
1521
1522    visit_instructions(&ir->then_instructions);
1523
1524    if (!ir->else_instructions.is_empty()) {
1525       this->base_ir = ir->condition;
1526       emit(BRW_OPCODE_ELSE);
1527
1528       visit_instructions(&ir->else_instructions);
1529    }
1530
1531    this->base_ir = ir->condition;
1532    emit(BRW_OPCODE_ENDIF);
1533 }
1534
1535 int
1536 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1537 {
1538    /* Get the position */
1539    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1540
1541    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1542    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1543
1544    current_annotation = "NDC";
1545    dst_reg ndc_w = ndc;
1546    ndc_w.writemask = WRITEMASK_W;
1547    src_reg pos_w = pos;
1548    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1549    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1550
1551    dst_reg ndc_xyz = ndc;
1552    ndc_xyz.writemask = WRITEMASK_XYZ;
1553
1554    emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1555
1556    if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1557        c->key.nr_userclip || brw->has_negative_rhw_bug) {
1558       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1559       GLuint i;
1560
1561       emit(BRW_OPCODE_MOV, header1, 0u);
1562
1563       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1564          assert(!"finishme: psiz");
1565          src_reg psiz;
1566
1567          header1.writemask = WRITEMASK_W;
1568          emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1569          emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1570       }
1571
1572       for (i = 0; i < c->key.nr_userclip; i++) {
1573          vec4_instruction *inst;
1574
1575          inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1576                      pos, src_reg(c->userplane[i]));
1577          inst->conditional_mod = BRW_CONDITIONAL_L;
1578
1579          emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1580          inst->predicate = BRW_PREDICATE_NORMAL;
1581       }
1582
1583       /* i965 clipping workaround:
1584        * 1) Test for -ve rhw
1585        * 2) If set,
1586        *      set ndc = (0,0,0,0)
1587        *      set ucp[6] = 1
1588        *
1589        * Later, clipping will detect ucp[6] and ensure the primitive is
1590        * clipped against all fixed planes.
1591        */
1592       if (brw->has_negative_rhw_bug) {
1593 #if 0
1594          /* FINISHME */
1595          brw_CMP(p,
1596                  vec8(brw_null_reg()),
1597                  BRW_CONDITIONAL_L,
1598                  brw_swizzle1(ndc, 3),
1599                  brw_imm_f(0));
1600
1601          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1602          brw_MOV(p, ndc, brw_imm_f(0));
1603          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1604 #endif
1605       }
1606
1607       header1.writemask = WRITEMASK_XYZW;
1608       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1609    } else {
1610       emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1611                                   BRW_REGISTER_TYPE_UD), 0u);
1612    }
1613
1614    if (intel->gen == 5) {
1615       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1616        * dword 0-3 (m1) of the header is indices, point width, clip flags.
1617        * dword 4-7 (m2) is the ndc position (set above)
1618        * dword 8-11 (m3) of the vertex header is the 4D space position
1619        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1620        * m6 is a pad so that the vertex element data is aligned
1621        * m7 is the first vertex data we fill.
1622        */
1623       current_annotation = "NDC";
1624       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1625
1626       current_annotation = "gl_Position";
1627       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1628
1629       /* user clip distance. */
1630       header_mrf += 2;
1631
1632       /* Pad so that vertex element data is aligned. */
1633       header_mrf++;
1634    } else {
1635       /* There are 8 dwords in VUE header pre-Ironlake:
1636        * dword 0-3 (m1) is indices, point width, clip flags.
1637        * dword 4-7 (m2) is ndc position (set above)
1638        *
1639        * dword 8-11 (m3) is the first vertex data.
1640        */
1641       current_annotation = "NDC";
1642       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1643
1644       current_annotation = "gl_Position";
1645       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1646    }
1647
1648    return header_mrf;
1649 }
1650
1651 int
1652 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1653 {
1654    struct brw_reg reg;
1655
1656    /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1657     * dword 0-3 (m2) of the header is indices, point width, clip flags.
1658     * dword 4-7 (m3) is the 4D space position
1659     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1660     * enabled.
1661     *
1662     * m4 or 6 is the first vertex element data we fill.
1663     */
1664
1665    current_annotation = "indices, point width, clip flags";
1666    reg = brw_message_reg(header_mrf++);
1667    emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1668    if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1669       emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1670            src_reg(output_reg[VERT_RESULT_PSIZ]));
1671    }
1672
1673    current_annotation = "gl_Position";
1674    emit(BRW_OPCODE_MOV,
1675         brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1676
1677    current_annotation = "user clip distances";
1678    if (c->key.nr_userclip) {
1679       for (int i = 0; i < c->key.nr_userclip; i++) {
1680          struct brw_reg m;
1681          if (i < 4)
1682             m = brw_message_reg(header_mrf);
1683          else
1684             m = brw_message_reg(header_mrf + 1);
1685
1686          emit(BRW_OPCODE_DP4,
1687               dst_reg(brw_writemask(m, 1 << (i & 3))),
1688               src_reg(c->userplane[i]));
1689       }
1690       header_mrf += 2;
1691    }
1692
1693    current_annotation = NULL;
1694
1695    return header_mrf;
1696 }
1697
1698 static int
1699 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1700 {
1701    struct intel_context *intel = &brw->intel;
1702
1703    if (intel->gen >= 6) {
1704       /* URB data written (does not include the message header reg) must
1705        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1706        * section 5.4.3.2.2: URB_INTERLEAVED.
1707        *
1708        * URB entries are allocated on a multiple of 1024 bits, so an
1709        * extra 128 bits written here to make the end align to 256 is
1710        * no problem.
1711        */
1712       if ((mlen % 2) != 1)
1713          mlen++;
1714    }
1715
1716    return mlen;
1717 }
1718
1719 /**
1720  * Generates the VUE payload plus the 1 or 2 URB write instructions to
1721  * complete the VS thread.
1722  *
1723  * The VUE layout is documented in Volume 2a.
1724  */
1725 void
1726 vec4_visitor::emit_urb_writes()
1727 {
1728    int base_mrf = 1;
1729    int mrf = base_mrf;
1730    int urb_entry_size;
1731
1732    /* FINISHME: edgeflag */
1733
1734    /* First mrf is the g0-based message header containing URB handles and such,
1735     * which is implied in VS_OPCODE_URB_WRITE.
1736     */
1737    mrf++;
1738
1739    if (intel->gen >= 6) {
1740       mrf = emit_vue_header_gen6(mrf);
1741    } else {
1742       mrf = emit_vue_header_gen4(mrf);
1743    }
1744
1745    int attr;
1746    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1747       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1748          continue;
1749
1750       /* This is set up in the VUE header. */
1751       if (attr == VERT_RESULT_HPOS)
1752          continue;
1753
1754       /* This is loaded into the VUE header, and thus doesn't occupy
1755        * an attribute slot.
1756        */
1757       if (attr == VERT_RESULT_PSIZ)
1758          continue;
1759
1760       emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1761
1762       /* If this is MRF 15, we can't fit anything more into this URB
1763        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1764        * even-numbered amount of URB write data, which will meet
1765        * gen6's requirements for length alignment.
1766        */
1767       if (mrf == 15)
1768          break;
1769    }
1770
1771    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1772    inst->base_mrf = base_mrf;
1773    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1774    inst->eot = true;
1775
1776    urb_entry_size = mrf - base_mrf;
1777
1778    for (; attr < VERT_RESULT_MAX; attr++) {
1779       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1780          continue;
1781       fail("Second URB write not supported.\n");
1782       break;
1783    }
1784
1785    if (intel->gen == 6)
1786       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1787    else
1788       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1789 }
1790
1791 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1792                            struct gl_shader_program *prog,
1793                            struct brw_shader *shader)
1794 {
1795    this->c = c;
1796    this->p = &c->func;
1797    this->brw = p->brw;
1798    this->intel = &brw->intel;
1799    this->ctx = &intel->ctx;
1800    this->prog = prog;
1801    this->shader = shader;
1802
1803    this->mem_ctx = ralloc_context(NULL);
1804    this->failed = false;
1805
1806    this->base_ir = NULL;
1807    this->current_annotation = NULL;
1808
1809    this->c = c;
1810    this->vp = brw->vertex_program; /* FINISHME: change for precompile */
1811    this->prog_data = &c->prog_data;
1812
1813    this->variable_ht = hash_table_ctor(0,
1814                                        hash_table_pointer_hash,
1815                                        hash_table_pointer_compare);
1816
1817    this->virtual_grf_sizes = NULL;
1818    this->virtual_grf_count = 0;
1819    this->virtual_grf_array_size = 0;
1820
1821    this->uniforms = 0;
1822
1823    this->variable_ht = hash_table_ctor(0,
1824                                        hash_table_pointer_hash,
1825                                        hash_table_pointer_compare);
1826 }
1827
1828 vec4_visitor::~vec4_visitor()
1829 {
1830    hash_table_dtor(this->variable_ht);
1831 }
1832
1833
1834 void
1835 vec4_visitor::fail(const char *format, ...)
1836 {
1837    va_list va;
1838    char *msg;
1839
1840    if (failed)
1841       return;
1842
1843    failed = true;
1844
1845    va_start(va, format);
1846    msg = ralloc_vasprintf(mem_ctx, format, va);
1847    va_end(va);
1848    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
1849
1850    this->fail_msg = msg;
1851
1852    if (INTEL_DEBUG & DEBUG_VS) {
1853       fprintf(stderr, "%s",  msg);
1854    }
1855 }
1856
1857 } /* namespace brw */