src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 #include "program/sampler.h"
  29 }
  30
  31 namespace brw {
  32
  33 src_reg::src_reg(dst_reg reg)
  34 {
  35    init();
  36
  37    this->file = reg.file;
  38    this->reg = reg.reg;
  39    this->reg_offset = reg.reg_offset;
  40    this->type = reg.type;
  41    this->reladdr = reg.reladdr;
  42    this->fixed_hw_reg = reg.fixed_hw_reg;
  43
  44    int swizzles[4];
  45    int next_chan = 0;
  46    int last = 0;
  47
  48    for (int i = 0; i < 4; i++) {
  49       if (!(reg.writemask & (1 << i)))
  50          continue;
  51
  52       swizzles[next_chan++] = last = i;
  53    }
  54
  55    for (; next_chan < 4; next_chan++) {
  56       swizzles[next_chan] = last;
  57    }
  58
  59    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  60                                 swizzles[2], swizzles[3]);
  61 }
  62
  63 dst_reg::dst_reg(src_reg reg)
  64 {
  65    init();
  66
  67    this->file = reg.file;
  68    this->reg = reg.reg;
  69    this->reg_offset = reg.reg_offset;
  70    this->type = reg.type;
  71    this->writemask = WRITEMASK_XYZW;
  72    this->reladdr = reg.reladdr;
  73    this->fixed_hw_reg = reg.fixed_hw_reg;
  74 }
  75
  76 vec4_instruction::vec4_instruction(vec4_visitor *v,
  77                                    enum opcode opcode, dst_reg dst,
  78                                    src_reg src0, src_reg src1, src_reg src2)
  79 {
  80    this->opcode = opcode;
  81    this->dst = dst;
  82    this->src[0] = src0;
  83    this->src[1] = src1;
  84    this->src[2] = src2;
  85    this->ir = v->base_ir;
  86    this->annotation = v->current_annotation;
  87 }
  88
  89 vec4_instruction *
  90 vec4_visitor::emit(vec4_instruction *inst)
  91 {
  92    this->instructions.push_tail(inst);
  93
  94    return inst;
  95 }
  96
  97 vec4_instruction *
  98 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  99 {
 100    new_inst->ir = inst->ir;
 101    new_inst->annotation = inst->annotation;
 102
 103    inst->insert_before(new_inst);
 104
 105    return inst;
 106 }
 107
 108 vec4_instruction *
 109 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
 110                    src_reg src0, src_reg src1, src_reg src2)
 111 {
 112    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
 113                                              src0, src1, src2));
 114 }
 115
 116
 117 vec4_instruction *
 118 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
 119 {
 120    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
 121 }
 122
 123 vec4_instruction *
 124 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 125 {
 126    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
 127 }
 128
 129 vec4_instruction *
 130 vec4_visitor::emit(enum opcode opcode)
 131 {
 132    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
 133 }
 134
 135 #define ALU1(op)                                                        \
 136    vec4_instruction *                                                   \
 137    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
 138    {                                                                    \
 139       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 140                                            src0);                       \
 141    }
 142
 143 #define ALU2(op)                                                        \
 144    vec4_instruction *                                                   \
 145    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 146    {                                                                    \
 147       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 148                                            src0, src1);                 \
 149    }
 150
 151 ALU1(NOT)
 152 ALU1(MOV)
 153 ALU1(FRC)
 154 ALU1(RNDD)
 155 ALU1(RNDE)
 156 ALU1(RNDZ)
 157 ALU2(ADD)
 158 ALU2(MUL)
 159 ALU2(MACH)
 160 ALU2(AND)
 161 ALU2(OR)
 162 ALU2(XOR)
 163 ALU2(DP3)
 164 ALU2(DP4)
 165
 166 /** Gen4 predicated IF. */
 167 vec4_instruction *
 168 vec4_visitor::IF(uint32_t predicate)
 169 {
 170    vec4_instruction *inst;
 171
 172    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 173    inst->predicate = predicate;
 174
 175    return inst;
 176 }
 177
 178 /** Gen6+ IF with embedded comparison. */
 179 vec4_instruction *
 180 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 181 {
 182    assert(intel->gen >= 6);
 183
 184    vec4_instruction *inst;
 185
 186    resolve_ud_negate(&src0);
 187    resolve_ud_negate(&src1);
 188
 189    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 190                                         src0, src1);
 191    inst->conditional_mod = condition;
 192
 193    return inst;
 194 }
 195
 196 /**
 197  * CMP: Sets the low bit of the destination channels with the result
 198  * of the comparison, while the upper bits are undefined, and updates
 199  * the flag register with the packed 16 bits of the result.
 200  */
 201 vec4_instruction *
 202 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 203 {
 204    vec4_instruction *inst;
 205
 206    /* original gen4 does type conversion to the destination type
 207     * before before comparison, producing garbage results for floating
 208     * point comparisons.
 209     */
 210    if (intel->gen == 4) {
 211       dst.type = src0.type;
 212       if (dst.file == HW_REG)
 213          dst.fixed_hw_reg.type = dst.type;
 214    }
 215
 216    resolve_ud_negate(&src0);
 217    resolve_ud_negate(&src1);
 218
 219    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 220    inst->conditional_mod = condition;
 221
 222    return inst;
 223 }
 224
 225 vec4_instruction *
 226 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 227 {
 228    vec4_instruction *inst;
 229
 230    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 231                                         dst, index);
 232    inst->base_mrf = 14;
 233    inst->mlen = 1;
 234
 235    return inst;
 236 }
 237
 238 vec4_instruction *
 239 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 240 {
 241    vec4_instruction *inst;
 242
 243    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 244                                         dst, src, index);
 245    inst->base_mrf = 13;
 246    inst->mlen = 2;
 247
 248    return inst;
 249 }
 250
 251 void
 252 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 253 {
 254    static enum opcode dot_opcodes[] = {
 255       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 256    };
 257
 258    emit(dot_opcodes[elements - 2], dst, src0, src1);
 259 }
 260
 261 void
 262 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 263 {
 264    /* The gen6 math instruction ignores the source modifiers --
 265     * swizzle, abs, negate, and at least some parts of the register
 266     * region description.
 267     *
 268     * While it would seem that this MOV could be avoided at this point
 269     * in the case that the swizzle is matched up with the destination
 270     * writemask, note that uniform packing and register allocation
 271     * could rearrange our swizzle, so let's leave this matter up to
 272     * copy propagation later.
 273     */
 274    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 275    emit(MOV(dst_reg(temp_src), src));
 276
 277    if (dst.writemask != WRITEMASK_XYZW) {
 278       /* The gen6 math instruction must be align1, so we can't do
 279        * writemasks.
 280        */
 281       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 282
 283       emit(opcode, temp_dst, temp_src);
 284
 285       emit(MOV(dst, src_reg(temp_dst)));
 286    } else {
 287       emit(opcode, dst, temp_src);
 288    }
 289 }
 290
 291 void
 292 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 293 {
 294    vec4_instruction *inst = emit(opcode, dst, src);
 295    inst->base_mrf = 1;
 296    inst->mlen = 1;
 297 }
 298
 299 void
 300 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 301 {
 302    switch (opcode) {
 303    case SHADER_OPCODE_RCP:
 304    case SHADER_OPCODE_RSQ:
 305    case SHADER_OPCODE_SQRT:
 306    case SHADER_OPCODE_EXP2:
 307    case SHADER_OPCODE_LOG2:
 308    case SHADER_OPCODE_SIN:
 309    case SHADER_OPCODE_COS:
 310       break;
 311    default:
 312       assert(!"not reached: bad math opcode");
 313       return;
 314    }
 315
 316    if (intel->gen >= 7) {
 317       emit(opcode, dst, src);
 318    } else if (intel->gen == 6) {
 319       return emit_math1_gen6(opcode, dst, src);
 320    } else {
 321       return emit_math1_gen4(opcode, dst, src);
 322    }
 323 }
 324
 325 void
 326 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 327                               dst_reg dst, src_reg src0, src_reg src1)
 328 {
 329    src_reg expanded;
 330
 331    /* The gen6 math instruction ignores the source modifiers --
 332     * swizzle, abs, negate, and at least some parts of the register
 333     * region description.  Move the sources to temporaries to make it
 334     * generally work.
 335     */
 336
 337    expanded = src_reg(this, glsl_type::vec4_type);
 338    expanded.type = src0.type;
 339    emit(MOV(dst_reg(expanded), src0));
 340    src0 = expanded;
 341
 342    expanded = src_reg(this, glsl_type::vec4_type);
 343    expanded.type = src1.type;
 344    emit(MOV(dst_reg(expanded), src1));
 345    src1 = expanded;
 346
 347    if (dst.writemask != WRITEMASK_XYZW) {
 348       /* The gen6 math instruction must be align1, so we can't do
 349        * writemasks.
 350        */
 351       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 352       temp_dst.type = dst.type;
 353
 354       emit(opcode, temp_dst, src0, src1);
 355
 356       emit(MOV(dst, src_reg(temp_dst)));
 357    } else {
 358       emit(opcode, dst, src0, src1);
 359    }
 360 }
 361
 362 void
 363 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 364                               dst_reg dst, src_reg src0, src_reg src1)
 365 {
 366    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 367    inst->base_mrf = 1;
 368    inst->mlen = 2;
 369 }
 370
 371 void
 372 vec4_visitor::emit_math(enum opcode opcode,
 373                         dst_reg dst, src_reg src0, src_reg src1)
 374 {
 375    switch (opcode) {
 376    case SHADER_OPCODE_POW:
 377    case SHADER_OPCODE_INT_QUOTIENT:
 378    case SHADER_OPCODE_INT_REMAINDER:
 379       break;
 380    default:
 381       assert(!"not reached: unsupported binary math opcode");
 382       return;
 383    }
 384
 385    if (intel->gen >= 7) {
 386       emit(opcode, dst, src0, src1);
 387    } else if (intel->gen == 6) {
 388       return emit_math2_gen6(opcode, dst, src0, src1);
 389    } else {
 390       return emit_math2_gen4(opcode, dst, src0, src1);
 391    }
 392 }
 393
 394 void
 395 vec4_visitor::visit_instructions(const exec_list *list)
 396 {
 397    foreach_list(node, list) {
 398       ir_instruction *ir = (ir_instruction *)node;
 399
 400       base_ir = ir;
 401       ir->accept(this);
 402    }
 403 }
 404
 405
 406 static int
 407 type_size(const struct glsl_type *type)
 408 {
 409    unsigned int i;
 410    int size;
 411
 412    switch (type->base_type) {
 413    case GLSL_TYPE_UINT:
 414    case GLSL_TYPE_INT:
 415    case GLSL_TYPE_FLOAT:
 416    case GLSL_TYPE_BOOL:
 417       if (type->is_matrix()) {
 418          return type->matrix_columns;
 419       } else {
 420          /* Regardless of size of vector, it gets a vec4. This is bad
 421           * packing for things like floats, but otherwise arrays become a
 422           * mess.  Hopefully a later pass over the code can pack scalars
 423           * down if appropriate.
 424           */
 425          return 1;
 426       }
 427    case GLSL_TYPE_ARRAY:
 428       assert(type->length > 0);
 429       return type_size(type->fields.array) * type->length;
 430    case GLSL_TYPE_STRUCT:
 431       size = 0;
 432       for (i = 0; i < type->length; i++) {
 433          size += type_size(type->fields.structure[i].type);
 434       }
 435       return size;
 436    case GLSL_TYPE_SAMPLER:
 437       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 438        * at link time.
 439        */
 440       return 1;
 441    default:
 442       assert(0);
 443       return 0;
 444    }
 445 }
 446
 447 int
 448 vec4_visitor::virtual_grf_alloc(int size)
 449 {
 450    if (virtual_grf_array_size <= virtual_grf_count) {
 451       if (virtual_grf_array_size == 0)
 452          virtual_grf_array_size = 16;
 453       else
 454          virtual_grf_array_size *= 2;
 455       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 456                                    virtual_grf_array_size);
 457       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 458                                      virtual_grf_array_size);
 459    }
 460    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 461    virtual_grf_reg_count += size;
 462    virtual_grf_sizes[virtual_grf_count] = size;
 463    return virtual_grf_count++;
 464 }
 465
 466 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 467 {
 468    init();
 469
 470    this->file = GRF;
 471    this->reg = v->virtual_grf_alloc(type_size(type));
 472
 473    if (type->is_array() || type->is_record()) {
 474       this->swizzle = BRW_SWIZZLE_NOOP;
 475    } else {
 476       this->swizzle = swizzle_for_size(type->vector_elements);
 477    }
 478
 479    this->type = brw_type_for_base_type(type);
 480 }
 481
 482 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 483 {
 484    init();
 485
 486    this->file = GRF;
 487    this->reg = v->virtual_grf_alloc(type_size(type));
 488
 489    if (type->is_array() || type->is_record()) {
 490       this->writemask = WRITEMASK_XYZW;
 491    } else {
 492       this->writemask = (1 << type->vector_elements) - 1;
 493    }
 494
 495    this->type = brw_type_for_base_type(type);
 496 }
 497
 498 /* Our support for uniforms is piggy-backed on the struct
 499  * gl_fragment_program, because that's where the values actually
 500  * get stored, rather than in some global gl_shader_program uniform
 501  * store.
 502  */
 503 int
 504 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 505 {
 506    unsigned int offset = 0;
 507    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 508
 509    if (type->is_matrix()) {
 510       const glsl_type *column = type->column_type();
 511
 512       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 513          offset += setup_uniform_values(loc + offset, column);
 514       }
 515
 516       return offset;
 517    }
 518
 519    switch (type->base_type) {
 520    case GLSL_TYPE_FLOAT:
 521    case GLSL_TYPE_UINT:
 522    case GLSL_TYPE_INT:
 523    case GLSL_TYPE_BOOL:
 524       for (unsigned int i = 0; i < type->vector_elements; i++) {
 525          c->prog_data.param[this->uniforms * 4 + i] = &values[i];
 526       }
 527
 528       /* Set up pad elements to get things aligned to a vec4 boundary. */
 529       for (unsigned int i = type->vector_elements; i < 4; i++) {
 530          static float zero = 0;
 531
 532          c->prog_data.param[this->uniforms * 4 + i] = &zero;
 533       }
 534
 535       /* Track the size of this uniform vector, for future packing of
 536        * uniforms.
 537        */
 538       this->uniform_vector_size[this->uniforms] = type->vector_elements;
 539       this->uniforms++;
 540
 541       return 1;
 542
 543    case GLSL_TYPE_STRUCT:
 544       for (unsigned int i = 0; i < type->length; i++) {
 545          offset += setup_uniform_values(loc + offset,
 546                                         type->fields.structure[i].type);
 547       }
 548       return offset;
 549
 550    case GLSL_TYPE_ARRAY:
 551       for (unsigned int i = 0; i < type->length; i++) {
 552          offset += setup_uniform_values(loc + offset, type->fields.array);
 553       }
 554       return offset;
 555
 556    case GLSL_TYPE_SAMPLER:
 557       /* The sampler takes up a slot, but we don't use any values from it. */
 558       return 1;
 559
 560    default:
 561       assert(!"not reached");
 562       return 0;
 563    }
 564 }
 565
 566 void
 567 vec4_visitor::setup_uniform_clipplane_values()
 568 {
 569    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 570
 571    /* Pre-Gen6, we compact clip planes.  For example, if the user
 572     * enables just clip planes 0, 1, and 3, we will enable clip planes
 573     * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
 574     * plane 2.  This simplifies the implementation of the Gen6 clip
 575     * thread.
 576     *
 577     * In Gen6 and later, we don't compact clip planes, because this
 578     * simplifies the implementation of gl_ClipDistance.
 579     */
 580    int compacted_clipplane_index = 0;
 581    for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
 582       if (intel->gen < 6 &&
 583           !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
 584          continue;
 585       }
 586       this->uniform_vector_size[this->uniforms] = 4;
 587       this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
 588       this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
 589       for (int j = 0; j < 4; ++j) {
 590          c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
 591       }
 592       ++compacted_clipplane_index;
 593       ++this->uniforms;
 594    }
 595 }
 596
 597 /* Our support for builtin uniforms is even scarier than non-builtin.
 598  * It sits on top of the PROG_STATE_VAR parameters that are
 599  * automatically updated from GL context state.
 600  */
 601 void
 602 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 603 {
 604    const ir_state_slot *const slots = ir->state_slots;
 605    assert(ir->state_slots != NULL);
 606
 607    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 608       /* This state reference has already been setup by ir_to_mesa,
 609        * but we'll get the same index back here.  We can reference
 610        * ParameterValues directly, since unlike brw_fs.cpp, we never
 611        * add new state references during compile.
 612        */
 613       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 614                                             (gl_state_index *)slots[i].tokens);
 615       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 616
 617       this->uniform_vector_size[this->uniforms] = 0;
 618       /* Add each of the unique swizzled channels of the element.
 619        * This will end up matching the size of the glsl_type of this field.
 620        */
 621       int last_swiz = -1;
 622       for (unsigned int j = 0; j < 4; j++) {
 623          int swiz = GET_SWZ(slots[i].swizzle, j);
 624          last_swiz = swiz;
 625
 626          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 627          if (swiz <= last_swiz)
 628             this->uniform_vector_size[this->uniforms]++;
 629       }
 630       this->uniforms++;
 631    }
 632 }
 633
 634 dst_reg *
 635 vec4_visitor::variable_storage(ir_variable *var)
 636 {
 637    return (dst_reg *)hash_table_find(this->variable_ht, var);
 638 }
 639
 640 void
 641 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 642 {
 643    ir_expression *expr = ir->as_expression();
 644
 645    *predicate = BRW_PREDICATE_NORMAL;
 646
 647    if (expr) {
 648       src_reg op[2];
 649       vec4_instruction *inst;
 650
 651       assert(expr->get_num_operands() <= 2);
 652       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 653          expr->operands[i]->accept(this);
 654          op[i] = this->result;
 655
 656          resolve_ud_negate(&op[i]);
 657       }
 658
 659       switch (expr->operation) {
 660       case ir_unop_logic_not:
 661          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 662          inst->conditional_mod = BRW_CONDITIONAL_Z;
 663          break;
 664
 665       case ir_binop_logic_xor:
 666          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 667          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 668          break;
 669
 670       case ir_binop_logic_or:
 671          inst = emit(OR(dst_null_d(), op[0], op[1]));
 672          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 673          break;
 674
 675       case ir_binop_logic_and:
 676          inst = emit(AND(dst_null_d(), op[0], op[1]));
 677          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 678          break;
 679
 680       case ir_unop_f2b:
 681          if (intel->gen >= 6) {
 682             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 683          } else {
 684             inst = emit(MOV(dst_null_f(), op[0]));
 685             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 686          }
 687          break;
 688
 689       case ir_unop_i2b:
 690          if (intel->gen >= 6) {
 691             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 692          } else {
 693             inst = emit(MOV(dst_null_d(), op[0]));
 694             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 695          }
 696          break;
 697
 698       case ir_binop_all_equal:
 699          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 700          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 701          break;
 702
 703       case ir_binop_any_nequal:
 704          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 705          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 706          break;
 707
 708       case ir_unop_any:
 709          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 710          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 711          break;
 712
 713       case ir_binop_greater:
 714       case ir_binop_gequal:
 715       case ir_binop_less:
 716       case ir_binop_lequal:
 717       case ir_binop_equal:
 718       case ir_binop_nequal:
 719          emit(CMP(dst_null_d(), op[0], op[1],
 720                   brw_conditional_for_comparison(expr->operation)));
 721          break;
 722
 723       default:
 724          assert(!"not reached");
 725          break;
 726       }
 727       return;
 728    }
 729
 730    ir->accept(this);
 731
 732    resolve_ud_negate(&this->result);
 733
 734    if (intel->gen >= 6) {
 735       vec4_instruction *inst = emit(AND(dst_null_d(),
 736                                         this->result, src_reg(1)));
 737       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 738    } else {
 739       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 740       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 741    }
 742 }
 743
 744 /**
 745  * Emit a gen6 IF statement with the comparison folded into the IF
 746  * instruction.
 747  */
 748 void
 749 vec4_visitor::emit_if_gen6(ir_if *ir)
 750 {
 751    ir_expression *expr = ir->condition->as_expression();
 752
 753    if (expr) {
 754       src_reg op[2];
 755       dst_reg temp;
 756
 757       assert(expr->get_num_operands() <= 2);
 758       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 759          expr->operands[i]->accept(this);
 760          op[i] = this->result;
 761       }
 762
 763       switch (expr->operation) {
 764       case ir_unop_logic_not:
 765          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 766          return;
 767
 768       case ir_binop_logic_xor:
 769          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 770          return;
 771
 772       case ir_binop_logic_or:
 773          temp = dst_reg(this, glsl_type::bool_type);
 774          emit(OR(temp, op[0], op[1]));
 775          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 776          return;
 777
 778       case ir_binop_logic_and:
 779          temp = dst_reg(this, glsl_type::bool_type);
 780          emit(AND(temp, op[0], op[1]));
 781          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 782          return;
 783
 784       case ir_unop_f2b:
 785          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 786          return;
 787
 788       case ir_unop_i2b:
 789          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 790          return;
 791
 792       case ir_binop_greater:
 793       case ir_binop_gequal:
 794       case ir_binop_less:
 795       case ir_binop_lequal:
 796       case ir_binop_equal:
 797       case ir_binop_nequal:
 798          emit(IF(op[0], op[1],
 799                  brw_conditional_for_comparison(expr->operation)));
 800          return;
 801
 802       case ir_binop_all_equal:
 803          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 804          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 805          return;
 806
 807       case ir_binop_any_nequal:
 808          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 809          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 810          return;
 811
 812       case ir_unop_any:
 813          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 814          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 815          return;
 816
 817       default:
 818          assert(!"not reached");
 819          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 820          return;
 821       }
 822       return;
 823    }
 824
 825    ir->condition->accept(this);
 826
 827    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 828 }
 829
 830 void
 831 vec4_visitor::visit(ir_variable *ir)
 832 {
 833    dst_reg *reg = NULL;
 834
 835    if (variable_storage(ir))
 836       return;
 837
 838    switch (ir->mode) {
 839    case ir_var_in:
 840       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 841
 842       /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 843        * come in as floating point conversions of the integer values.
 844        */
 845       for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
 846          if (!c->key.gl_fixed_input_size[i])
 847             continue;
 848
 849          dst_reg dst = *reg;
 850          dst.type = brw_type_for_base_type(ir->type);
 851          dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
 852          emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 853       }
 854       break;
 855
 856    case ir_var_out:
 857       reg = new(mem_ctx) dst_reg(this, ir->type);
 858
 859       for (int i = 0; i < type_size(ir->type); i++) {
 860          output_reg[ir->location + i] = *reg;
 861          output_reg[ir->location + i].reg_offset = i;
 862          output_reg[ir->location + i].type =
 863             brw_type_for_base_type(ir->type->get_scalar_type());
 864          output_reg_annotation[ir->location + i] = ir->name;
 865       }
 866       break;
 867
 868    case ir_var_auto:
 869    case ir_var_temporary:
 870       reg = new(mem_ctx) dst_reg(this, ir->type);
 871       break;
 872
 873    case ir_var_uniform:
 874       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 875
 876       /* Track how big the whole uniform variable is, in case we need to put a
 877        * copy of its data into pull constants for array access.
 878        */
 879       this->uniform_size[this->uniforms] = type_size(ir->type);
 880
 881       if (!strncmp(ir->name, "gl_", 3)) {
 882          setup_builtin_uniform_values(ir);
 883       } else {
 884          setup_uniform_values(ir->location, ir->type);
 885       }
 886       break;
 887
 888    case ir_var_system_value:
 889       /* VertexID is stored by the VF as the last vertex element, but
 890        * we don't represent it with a flag in inputs_read, so we call
 891        * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
 892        */
 893       reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
 894       prog_data->uses_vertexid = true;
 895
 896       switch (ir->location) {
 897       case SYSTEM_VALUE_VERTEX_ID:
 898          reg->writemask = WRITEMASK_X;
 899          break;
 900       case SYSTEM_VALUE_INSTANCE_ID:
 901          reg->writemask = WRITEMASK_Y;
 902          break;
 903       default:
 904          assert(!"not reached");
 905          break;
 906       }
 907       break;
 908
 909    default:
 910       assert(!"not reached");
 911    }
 912
 913    reg->type = brw_type_for_base_type(ir->type);
 914    hash_table_insert(this->variable_ht, reg, ir);
 915 }
 916
 917 void
 918 vec4_visitor::visit(ir_loop *ir)
 919 {
 920    dst_reg counter;
 921
 922    /* We don't want debugging output to print the whole body of the
 923     * loop as the annotation.
 924     */
 925    this->base_ir = NULL;
 926
 927    if (ir->counter != NULL) {
 928       this->base_ir = ir->counter;
 929       ir->counter->accept(this);
 930       counter = *(variable_storage(ir->counter));
 931
 932       if (ir->from != NULL) {
 933          this->base_ir = ir->from;
 934          ir->from->accept(this);
 935
 936          emit(MOV(counter, this->result));
 937       }
 938    }
 939
 940    emit(BRW_OPCODE_DO);
 941
 942    if (ir->to) {
 943       this->base_ir = ir->to;
 944       ir->to->accept(this);
 945
 946       emit(CMP(dst_null_d(), src_reg(counter), this->result,
 947                brw_conditional_for_comparison(ir->cmp)));
 948
 949       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
 950       inst->predicate = BRW_PREDICATE_NORMAL;
 951    }
 952
 953    visit_instructions(&ir->body_instructions);
 954
 955
 956    if (ir->increment) {
 957       this->base_ir = ir->increment;
 958       ir->increment->accept(this);
 959       emit(ADD(counter, src_reg(counter), this->result));
 960    }
 961
 962    emit(BRW_OPCODE_WHILE);
 963 }
 964
 965 void
 966 vec4_visitor::visit(ir_loop_jump *ir)
 967 {
 968    switch (ir->mode) {
 969    case ir_loop_jump::jump_break:
 970       emit(BRW_OPCODE_BREAK);
 971       break;
 972    case ir_loop_jump::jump_continue:
 973       emit(BRW_OPCODE_CONTINUE);
 974       break;
 975    }
 976 }
 977
 978
 979 void
 980 vec4_visitor::visit(ir_function_signature *ir)
 981 {
 982    assert(0);
 983    (void)ir;
 984 }
 985
 986 void
 987 vec4_visitor::visit(ir_function *ir)
 988 {
 989    /* Ignore function bodies other than main() -- we shouldn't see calls to
 990     * them since they should all be inlined.
 991     */
 992    if (strcmp(ir->name, "main") == 0) {
 993       const ir_function_signature *sig;
 994       exec_list empty;
 995
 996       sig = ir->matching_signature(&empty);
 997
 998       assert(sig);
 999
1000       visit_instructions(&sig->body);
1001    }
1002 }
1003
1004 bool
1005 vec4_visitor::try_emit_sat(ir_expression *ir)
1006 {
1007    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1008    if (!sat_src)
1009       return false;
1010
1011    sat_src->accept(this);
1012    src_reg src = this->result;
1013
1014    this->result = src_reg(this, ir->type);
1015    vec4_instruction *inst;
1016    inst = emit(MOV(dst_reg(this->result), src));
1017    inst->saturate = true;
1018
1019    return true;
1020 }
1021
1022 void
1023 vec4_visitor::emit_bool_comparison(unsigned int op,
1024                                  dst_reg dst, src_reg src0, src_reg src1)
1025 {
1026    /* original gen4 does destination conversion before comparison. */
1027    if (intel->gen < 5)
1028       dst.type = src0.type;
1029
1030    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1031
1032    dst.type = BRW_REGISTER_TYPE_D;
1033    emit(AND(dst, src_reg(dst), src_reg(0x1)));
1034 }
1035
1036 void
1037 vec4_visitor::visit(ir_expression *ir)
1038 {
1039    unsigned int operand;
1040    src_reg op[Elements(ir->operands)];
1041    src_reg result_src;
1042    dst_reg result_dst;
1043    vec4_instruction *inst;
1044
1045    if (try_emit_sat(ir))
1046       return;
1047
1048    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1049       this->result.file = BAD_FILE;
1050       ir->operands[operand]->accept(this);
1051       if (this->result.file == BAD_FILE) {
1052          printf("Failed to get tree for expression operand:\n");
1053          ir->operands[operand]->print();
1054          exit(1);
1055       }
1056       op[operand] = this->result;
1057
1058       /* Matrix expression operands should have been broken down to vector
1059        * operations already.
1060        */
1061       assert(!ir->operands[operand]->type->is_matrix());
1062    }
1063
1064    int vector_elements = ir->operands[0]->type->vector_elements;
1065    if (ir->operands[1]) {
1066       vector_elements = MAX2(vector_elements,
1067                              ir->operands[1]->type->vector_elements);
1068    }
1069
1070    this->result.file = BAD_FILE;
1071
1072    /* Storage for our result.  Ideally for an assignment we'd be using
1073     * the actual storage for the result here, instead.
1074     */
1075    result_src = src_reg(this, ir->type);
1076    /* convenience for the emit functions below. */
1077    result_dst = dst_reg(result_src);
1078    /* If nothing special happens, this is the result. */
1079    this->result = result_src;
1080    /* Limit writes to the channels that will be used by result_src later.
1081     * This does limit this temp's use as a temporary for multi-instruction
1082     * sequences.
1083     */
1084    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1085
1086    switch (ir->operation) {
1087    case ir_unop_logic_not:
1088       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1089        * ones complement of the whole register, not just bit 0.
1090        */
1091       emit(XOR(result_dst, op[0], src_reg(1)));
1092       break;
1093    case ir_unop_neg:
1094       op[0].negate = !op[0].negate;
1095       this->result = op[0];
1096       break;
1097    case ir_unop_abs:
1098       op[0].abs = true;
1099       op[0].negate = false;
1100       this->result = op[0];
1101       break;
1102
1103    case ir_unop_sign:
1104       emit(MOV(result_dst, src_reg(0.0f)));
1105
1106       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1107       inst = emit(MOV(result_dst, src_reg(1.0f)));
1108       inst->predicate = BRW_PREDICATE_NORMAL;
1109
1110       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1111       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1112       inst->predicate = BRW_PREDICATE_NORMAL;
1113
1114       break;
1115
1116    case ir_unop_rcp:
1117       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1118       break;
1119
1120    case ir_unop_exp2:
1121       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1122       break;
1123    case ir_unop_log2:
1124       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1125       break;
1126    case ir_unop_exp:
1127    case ir_unop_log:
1128       assert(!"not reached: should be handled by ir_explog_to_explog2");
1129       break;
1130    case ir_unop_sin:
1131    case ir_unop_sin_reduced:
1132       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1133       break;
1134    case ir_unop_cos:
1135    case ir_unop_cos_reduced:
1136       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1137       break;
1138
1139    case ir_unop_dFdx:
1140    case ir_unop_dFdy:
1141       assert(!"derivatives not valid in vertex shader");
1142       break;
1143
1144    case ir_unop_noise:
1145       assert(!"not reached: should be handled by lower_noise");
1146       break;
1147
1148    case ir_binop_add:
1149       emit(ADD(result_dst, op[0], op[1]));
1150       break;
1151    case ir_binop_sub:
1152       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1153       break;
1154
1155    case ir_binop_mul:
1156       if (ir->type->is_integer()) {
1157          /* For integer multiplication, the MUL uses the low 16 bits
1158           * of one of the operands (src0 on gen6, src1 on gen7).  The
1159           * MACH accumulates in the contribution of the upper 16 bits
1160           * of that operand.
1161           *
1162           * FINISHME: Emit just the MUL if we know an operand is small
1163           * enough.
1164           */
1165          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1166
1167          emit(MUL(acc, op[0], op[1]));
1168          emit(MACH(dst_null_d(), op[0], op[1]));
1169          emit(MOV(result_dst, src_reg(acc)));
1170       } else {
1171          emit(MUL(result_dst, op[0], op[1]));
1172       }
1173       break;
1174    case ir_binop_div:
1175       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1176       assert(ir->type->is_integer());
1177       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1178       break;
1179    case ir_binop_mod:
1180       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1181       assert(ir->type->is_integer());
1182       emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1183       break;
1184
1185    case ir_binop_less:
1186    case ir_binop_greater:
1187    case ir_binop_lequal:
1188    case ir_binop_gequal:
1189    case ir_binop_equal:
1190    case ir_binop_nequal: {
1191       emit(CMP(result_dst, op[0], op[1],
1192                brw_conditional_for_comparison(ir->operation)));
1193       emit(AND(result_dst, result_src, src_reg(0x1)));
1194       break;
1195    }
1196
1197    case ir_binop_all_equal:
1198       /* "==" operator producing a scalar boolean. */
1199       if (ir->operands[0]->type->is_vector() ||
1200           ir->operands[1]->type->is_vector()) {
1201          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1202          emit(MOV(result_dst, src_reg(0)));
1203          inst = emit(MOV(result_dst, src_reg(1)));
1204          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1205       } else {
1206          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1207          emit(AND(result_dst, result_src, src_reg(0x1)));
1208       }
1209       break;
1210    case ir_binop_any_nequal:
1211       /* "!=" operator producing a scalar boolean. */
1212       if (ir->operands[0]->type->is_vector() ||
1213           ir->operands[1]->type->is_vector()) {
1214          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1215
1216          emit(MOV(result_dst, src_reg(0)));
1217          inst = emit(MOV(result_dst, src_reg(1)));
1218          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1219       } else {
1220          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1221          emit(AND(result_dst, result_src, src_reg(0x1)));
1222       }
1223       break;
1224
1225    case ir_unop_any:
1226       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1227       emit(MOV(result_dst, src_reg(0)));
1228
1229       inst = emit(MOV(result_dst, src_reg(1)));
1230       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1231       break;
1232
1233    case ir_binop_logic_xor:
1234       emit(XOR(result_dst, op[0], op[1]));
1235       break;
1236
1237    case ir_binop_logic_or:
1238       emit(OR(result_dst, op[0], op[1]));
1239       break;
1240
1241    case ir_binop_logic_and:
1242       emit(AND(result_dst, op[0], op[1]));
1243       break;
1244
1245    case ir_binop_dot:
1246       assert(ir->operands[0]->type->is_vector());
1247       assert(ir->operands[0]->type == ir->operands[1]->type);
1248       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1249       break;
1250
1251    case ir_unop_sqrt:
1252       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1253       break;
1254    case ir_unop_rsq:
1255       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1256       break;
1257
1258    case ir_unop_bitcast_i2f:
1259    case ir_unop_bitcast_u2f:
1260       this->result = op[0];
1261       this->result.type = BRW_REGISTER_TYPE_F;
1262       break;
1263
1264    case ir_unop_bitcast_f2i:
1265       this->result = op[0];
1266       this->result.type = BRW_REGISTER_TYPE_D;
1267       break;
1268
1269    case ir_unop_bitcast_f2u:
1270       this->result = op[0];
1271       this->result.type = BRW_REGISTER_TYPE_UD;
1272       break;
1273
1274    case ir_unop_i2f:
1275    case ir_unop_i2u:
1276    case ir_unop_u2i:
1277    case ir_unop_u2f:
1278    case ir_unop_b2f:
1279    case ir_unop_b2i:
1280    case ir_unop_f2i:
1281    case ir_unop_f2u:
1282       emit(MOV(result_dst, op[0]));
1283       break;
1284    case ir_unop_f2b:
1285    case ir_unop_i2b: {
1286       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1287       emit(AND(result_dst, result_src, src_reg(1)));
1288       break;
1289    }
1290
1291    case ir_unop_trunc:
1292       emit(RNDZ(result_dst, op[0]));
1293       break;
1294    case ir_unop_ceil:
1295       op[0].negate = !op[0].negate;
1296       inst = emit(RNDD(result_dst, op[0]));
1297       this->result.negate = true;
1298       break;
1299    case ir_unop_floor:
1300       inst = emit(RNDD(result_dst, op[0]));
1301       break;
1302    case ir_unop_fract:
1303       inst = emit(FRC(result_dst, op[0]));
1304       break;
1305    case ir_unop_round_even:
1306       emit(RNDE(result_dst, op[0]));
1307       break;
1308
1309    case ir_binop_min:
1310       if (intel->gen >= 6) {
1311          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1312          inst->conditional_mod = BRW_CONDITIONAL_L;
1313       } else {
1314          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1315
1316          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1317          inst->predicate = BRW_PREDICATE_NORMAL;
1318       }
1319       break;
1320    case ir_binop_max:
1321       if (intel->gen >= 6) {
1322          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1323          inst->conditional_mod = BRW_CONDITIONAL_G;
1324       } else {
1325          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1326
1327          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1328          inst->predicate = BRW_PREDICATE_NORMAL;
1329       }
1330       break;
1331
1332    case ir_binop_pow:
1333       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1334       break;
1335
1336    case ir_unop_bit_not:
1337       inst = emit(NOT(result_dst, op[0]));
1338       break;
1339    case ir_binop_bit_and:
1340       inst = emit(AND(result_dst, op[0], op[1]));
1341       break;
1342    case ir_binop_bit_xor:
1343       inst = emit(XOR(result_dst, op[0], op[1]));
1344       break;
1345    case ir_binop_bit_or:
1346       inst = emit(OR(result_dst, op[0], op[1]));
1347       break;
1348
1349    case ir_binop_lshift:
1350       inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
1351       break;
1352
1353    case ir_binop_rshift:
1354       if (ir->type->base_type == GLSL_TYPE_INT)
1355          inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
1356       else
1357          inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
1358       break;
1359
1360    case ir_quadop_vector:
1361       assert(!"not reached: should be handled by lower_quadop_vector");
1362       break;
1363    }
1364 }
1365
1366
1367 void
1368 vec4_visitor::visit(ir_swizzle *ir)
1369 {
1370    src_reg src;
1371    int i = 0;
1372    int swizzle[4];
1373
1374    /* Note that this is only swizzles in expressions, not those on the left
1375     * hand side of an assignment, which do write masking.  See ir_assignment
1376     * for that.
1377     */
1378
1379    ir->val->accept(this);
1380    src = this->result;
1381    assert(src.file != BAD_FILE);
1382
1383    for (i = 0; i < ir->type->vector_elements; i++) {
1384       switch (i) {
1385       case 0:
1386          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1387          break;
1388       case 1:
1389          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1390          break;
1391       case 2:
1392          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1393          break;
1394       case 3:
1395          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1396             break;
1397       }
1398    }
1399    for (; i < 4; i++) {
1400       /* Replicate the last channel out. */
1401       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1402    }
1403
1404    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1405
1406    this->result = src;
1407 }
1408
1409 void
1410 vec4_visitor::visit(ir_dereference_variable *ir)
1411 {
1412    const struct glsl_type *type = ir->type;
1413    dst_reg *reg = variable_storage(ir->var);
1414
1415    if (!reg) {
1416       fail("Failed to find variable storage for %s\n", ir->var->name);
1417       this->result = src_reg(brw_null_reg());
1418       return;
1419    }
1420
1421    this->result = src_reg(*reg);
1422
1423    /* System values get their swizzle from the dst_reg writemask */
1424    if (ir->var->mode == ir_var_system_value)
1425       return;
1426
1427    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1428       this->result.swizzle = swizzle_for_size(type->vector_elements);
1429 }
1430
1431 void
1432 vec4_visitor::visit(ir_dereference_array *ir)
1433 {
1434    ir_constant *constant_index;
1435    src_reg src;
1436    int element_size = type_size(ir->type);
1437
1438    constant_index = ir->array_index->constant_expression_value();
1439
1440    ir->array->accept(this);
1441    src = this->result;
1442
1443    if (constant_index) {
1444       src.reg_offset += constant_index->value.i[0] * element_size;
1445    } else {
1446       /* Variable index array dereference.  It eats the "vec4" of the
1447        * base of the array and an index that offsets the Mesa register
1448        * index.
1449        */
1450       ir->array_index->accept(this);
1451
1452       src_reg index_reg;
1453
1454       if (element_size == 1) {
1455          index_reg = this->result;
1456       } else {
1457          index_reg = src_reg(this, glsl_type::int_type);
1458
1459          emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1460       }
1461
1462       if (src.reladdr) {
1463          src_reg temp = src_reg(this, glsl_type::int_type);
1464
1465          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1466
1467          index_reg = temp;
1468       }
1469
1470       src.reladdr = ralloc(mem_ctx, src_reg);
1471       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1472    }
1473
1474    /* If the type is smaller than a vec4, replicate the last channel out. */
1475    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1476       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1477    else
1478       src.swizzle = BRW_SWIZZLE_NOOP;
1479    src.type = brw_type_for_base_type(ir->type);
1480
1481    this->result = src;
1482 }
1483
1484 void
1485 vec4_visitor::visit(ir_dereference_record *ir)
1486 {
1487    unsigned int i;
1488    const glsl_type *struct_type = ir->record->type;
1489    int offset = 0;
1490
1491    ir->record->accept(this);
1492
1493    for (i = 0; i < struct_type->length; i++) {
1494       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1495          break;
1496       offset += type_size(struct_type->fields.structure[i].type);
1497    }
1498
1499    /* If the type is smaller than a vec4, replicate the last channel out. */
1500    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1501       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1502    else
1503       this->result.swizzle = BRW_SWIZZLE_NOOP;
1504    this->result.type = brw_type_for_base_type(ir->type);
1505
1506    this->result.reg_offset += offset;
1507 }
1508
1509 /**
1510  * We want to be careful in assignment setup to hit the actual storage
1511  * instead of potentially using a temporary like we might with the
1512  * ir_dereference handler.
1513  */
1514 static dst_reg
1515 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1516 {
1517    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1518     * access of a vector, it must be separated into a series conditional moves
1519     * before reaching this point (see ir_vec_index_to_cond_assign).
1520     */
1521    assert(ir->as_dereference());
1522    ir_dereference_array *deref_array = ir->as_dereference_array();
1523    if (deref_array) {
1524       assert(!deref_array->array->type->is_vector());
1525    }
1526
1527    /* Use the rvalue deref handler for the most part.  We'll ignore
1528     * swizzles in it and write swizzles using writemask, though.
1529     */
1530    ir->accept(v);
1531    return dst_reg(v->result);
1532 }
1533
1534 void
1535 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1536                               const struct glsl_type *type, uint32_t predicate)
1537 {
1538    if (type->base_type == GLSL_TYPE_STRUCT) {
1539       for (unsigned int i = 0; i < type->length; i++) {
1540          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1541       }
1542       return;
1543    }
1544
1545    if (type->is_array()) {
1546       for (unsigned int i = 0; i < type->length; i++) {
1547          emit_block_move(dst, src, type->fields.array, predicate);
1548       }
1549       return;
1550    }
1551
1552    if (type->is_matrix()) {
1553       const struct glsl_type *vec_type;
1554
1555       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1556                                          type->vector_elements, 1);
1557
1558       for (int i = 0; i < type->matrix_columns; i++) {
1559          emit_block_move(dst, src, vec_type, predicate);
1560       }
1561       return;
1562    }
1563
1564    assert(type->is_scalar() || type->is_vector());
1565
1566    dst->type = brw_type_for_base_type(type);
1567    src->type = dst->type;
1568
1569    dst->writemask = (1 << type->vector_elements) - 1;
1570
1571    src->swizzle = swizzle_for_size(type->vector_elements);
1572
1573    vec4_instruction *inst = emit(MOV(*dst, *src));
1574    inst->predicate = predicate;
1575
1576    dst->reg_offset++;
1577    src->reg_offset++;
1578 }
1579
1580
1581 /* If the RHS processing resulted in an instruction generating a
1582  * temporary value, and it would be easy to rewrite the instruction to
1583  * generate its result right into the LHS instead, do so.  This ends
1584  * up reliably removing instructions where it can be tricky to do so
1585  * later without real UD chain information.
1586  */
1587 bool
1588 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1589                                      dst_reg dst,
1590                                      src_reg src,
1591                                      vec4_instruction *pre_rhs_inst,
1592                                      vec4_instruction *last_rhs_inst)
1593 {
1594    /* This could be supported, but it would take more smarts. */
1595    if (ir->condition)
1596       return false;
1597
1598    if (pre_rhs_inst == last_rhs_inst)
1599       return false; /* No instructions generated to work with. */
1600
1601    /* Make sure the last instruction generated our source reg. */
1602    if (src.file != GRF ||
1603        src.file != last_rhs_inst->dst.file ||
1604        src.reg != last_rhs_inst->dst.reg ||
1605        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1606        src.reladdr ||
1607        src.abs ||
1608        src.negate ||
1609        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1610       return false;
1611
1612    /* Check that that last instruction fully initialized the channels
1613     * we want to use, in the order we want to use them.  We could
1614     * potentially reswizzle the operands of many instructions so that
1615     * we could handle out of order channels, but don't yet.
1616     */
1617
1618    for (unsigned i = 0; i < 4; i++) {
1619       if (dst.writemask & (1 << i)) {
1620          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1621             return false;
1622
1623          if (BRW_GET_SWZ(src.swizzle, i) != i)
1624             return false;
1625       }
1626    }
1627
1628    /* Success!  Rewrite the instruction. */
1629    last_rhs_inst->dst.file = dst.file;
1630    last_rhs_inst->dst.reg = dst.reg;
1631    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1632    last_rhs_inst->dst.reladdr = dst.reladdr;
1633    last_rhs_inst->dst.writemask &= dst.writemask;
1634
1635    return true;
1636 }
1637
1638 void
1639 vec4_visitor::visit(ir_assignment *ir)
1640 {
1641    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1642    uint32_t predicate = BRW_PREDICATE_NONE;
1643
1644    if (!ir->lhs->type->is_scalar() &&
1645        !ir->lhs->type->is_vector()) {
1646       ir->rhs->accept(this);
1647       src_reg src = this->result;
1648
1649       if (ir->condition) {
1650          emit_bool_to_cond_code(ir->condition, &predicate);
1651       }
1652
1653       /* emit_block_move doesn't account for swizzles in the source register.
1654        * This should be ok, since the source register is a structure or an
1655        * array, and those can't be swizzled.  But double-check to be sure.
1656        */
1657       assert(src.swizzle ==
1658              (ir->rhs->type->is_matrix()
1659               ? swizzle_for_size(ir->rhs->type->vector_elements)
1660               : BRW_SWIZZLE_NOOP));
1661
1662       emit_block_move(&dst, &src, ir->rhs->type, predicate);
1663       return;
1664    }
1665
1666    /* Now we're down to just a scalar/vector with writemasks. */
1667    int i;
1668
1669    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1670    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1671
1672    ir->rhs->accept(this);
1673
1674    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1675
1676    src_reg src = this->result;
1677
1678    int swizzles[4];
1679    int first_enabled_chan = 0;
1680    int src_chan = 0;
1681
1682    assert(ir->lhs->type->is_vector() ||
1683           ir->lhs->type->is_scalar());
1684    dst.writemask = ir->write_mask;
1685
1686    for (int i = 0; i < 4; i++) {
1687       if (dst.writemask & (1 << i)) {
1688          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1689          break;
1690       }
1691    }
1692
1693    /* Swizzle a small RHS vector into the channels being written.
1694     *
1695     * glsl ir treats write_mask as dictating how many channels are
1696     * present on the RHS while in our instructions we need to make
1697     * those channels appear in the slots of the vec4 they're written to.
1698     */
1699    for (int i = 0; i < 4; i++) {
1700       if (dst.writemask & (1 << i))
1701          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1702       else
1703          swizzles[i] = first_enabled_chan;
1704    }
1705    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1706                               swizzles[2], swizzles[3]);
1707
1708    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1709       return;
1710    }
1711
1712    if (ir->condition) {
1713       emit_bool_to_cond_code(ir->condition, &predicate);
1714    }
1715
1716    for (i = 0; i < type_size(ir->lhs->type); i++) {
1717       vec4_instruction *inst = emit(MOV(dst, src));
1718       inst->predicate = predicate;
1719
1720       dst.reg_offset++;
1721       src.reg_offset++;
1722    }
1723 }
1724
1725 void
1726 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1727 {
1728    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1729       foreach_list(node, &ir->components) {
1730          ir_constant *field_value = (ir_constant *)node;
1731
1732          emit_constant_values(dst, field_value);
1733       }
1734       return;
1735    }
1736
1737    if (ir->type->is_array()) {
1738       for (unsigned int i = 0; i < ir->type->length; i++) {
1739          emit_constant_values(dst, ir->array_elements[i]);
1740       }
1741       return;
1742    }
1743
1744    if (ir->type->is_matrix()) {
1745       for (int i = 0; i < ir->type->matrix_columns; i++) {
1746          float *vec = &ir->value.f[i * ir->type->vector_elements];
1747
1748          for (int j = 0; j < ir->type->vector_elements; j++) {
1749             dst->writemask = 1 << j;
1750             dst->type = BRW_REGISTER_TYPE_F;
1751
1752             emit(MOV(*dst, src_reg(vec[j])));
1753          }
1754          dst->reg_offset++;
1755       }
1756       return;
1757    }
1758
1759    int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1760
1761    for (int i = 0; i < ir->type->vector_elements; i++) {
1762       if (!(remaining_writemask & (1 << i)))
1763          continue;
1764
1765       dst->writemask = 1 << i;
1766       dst->type = brw_type_for_base_type(ir->type);
1767
1768       /* Find other components that match the one we're about to
1769        * write.  Emits fewer instructions for things like vec4(0.5,
1770        * 1.5, 1.5, 1.5).
1771        */
1772       for (int j = i + 1; j < ir->type->vector_elements; j++) {
1773          if (ir->type->base_type == GLSL_TYPE_BOOL) {
1774             if (ir->value.b[i] == ir->value.b[j])
1775                dst->writemask |= (1 << j);
1776          } else {
1777             /* u, i, and f storage all line up, so no need for a
1778              * switch case for comparing each type.
1779              */
1780             if (ir->value.u[i] == ir->value.u[j])
1781                dst->writemask |= (1 << j);
1782          }
1783       }
1784
1785       switch (ir->type->base_type) {
1786       case GLSL_TYPE_FLOAT:
1787          emit(MOV(*dst, src_reg(ir->value.f[i])));
1788          break;
1789       case GLSL_TYPE_INT:
1790          emit(MOV(*dst, src_reg(ir->value.i[i])));
1791          break;
1792       case GLSL_TYPE_UINT:
1793          emit(MOV(*dst, src_reg(ir->value.u[i])));
1794          break;
1795       case GLSL_TYPE_BOOL:
1796          emit(MOV(*dst, src_reg(ir->value.b[i])));
1797          break;
1798       default:
1799          assert(!"Non-float/uint/int/bool constant");
1800          break;
1801       }
1802
1803       remaining_writemask &= ~dst->writemask;
1804    }
1805    dst->reg_offset++;
1806 }
1807
1808 void
1809 vec4_visitor::visit(ir_constant *ir)
1810 {
1811    dst_reg dst = dst_reg(this, ir->type);
1812    this->result = src_reg(dst);
1813
1814    emit_constant_values(&dst, ir);
1815 }
1816
1817 void
1818 vec4_visitor::visit(ir_call *ir)
1819 {
1820    assert(!"not reached");
1821 }
1822
1823 void
1824 vec4_visitor::visit(ir_texture *ir)
1825 {
1826    int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1827    sampler = vp->Base.SamplerUnits[sampler];
1828
1829    /* Should be lowered by do_lower_texture_projection */
1830    assert(!ir->projector);
1831
1832    vec4_instruction *inst = NULL;
1833    switch (ir->op) {
1834    case ir_tex:
1835    case ir_txl:
1836       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1837       break;
1838    case ir_txd:
1839       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1840       break;
1841    case ir_txf:
1842       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1843       break;
1844    case ir_txs:
1845       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1846       break;
1847    case ir_txb:
1848       assert(!"TXB is not valid for vertex shaders.");
1849    }
1850
1851    /* Texel offsets go in the message header; Gen4 also requires headers. */
1852    inst->header_present = ir->offset || intel->gen < 5;
1853    inst->base_mrf = 2;
1854    inst->mlen = inst->header_present + 1; /* always at least one */
1855    inst->sampler = sampler;
1856    inst->dst = dst_reg(this, ir->type);
1857    inst->shadow_compare = ir->shadow_comparitor != NULL;
1858
1859    if (ir->offset != NULL && ir->op != ir_txf)
1860       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1861
1862    /* MRF for the first parameter */
1863    int param_base = inst->base_mrf + inst->header_present;
1864
1865    if (ir->op == ir_txs) {
1866       ir->lod_info.lod->accept(this);
1867       int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1868       emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1869            this->result));
1870    } else {
1871       int i, coord_mask = 0, zero_mask = 0;
1872       /* Load the coordinate */
1873       /* FINISHME: gl_clamp_mask and saturate */
1874       for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1875          coord_mask |= (1 << i);
1876       for (; i < 4; i++)
1877          zero_mask |= (1 << i);
1878
1879       ir->coordinate->accept(this);
1880       if (ir->offset && ir->op == ir_txf) {
1881          /* It appears that the ld instruction used for txf does its
1882           * address bounds check before adding in the offset.  To work
1883           * around this, just add the integer offset to the integer
1884           * texel coordinate, and don't put the offset in the header.
1885           */
1886          ir_constant *offset = ir->offset->as_constant();
1887          assert(offset);
1888
1889          for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
1890             src_reg src = this->result;
1891             src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
1892                                        BRW_GET_SWZ(src.swizzle, j),
1893                                        BRW_GET_SWZ(src.swizzle, j),
1894                                        BRW_GET_SWZ(src.swizzle, j));
1895             emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
1896                      src, offset->value.i[j]));
1897          }
1898       } else {
1899          emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1900                   this->result));
1901       }
1902       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1903                src_reg(0)));
1904       /* Load the shadow comparitor */
1905       if (ir->shadow_comparitor) {
1906          ir->shadow_comparitor->accept(this);
1907          emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1908                           WRITEMASK_X),
1909                   this->result));
1910          inst->mlen++;
1911       }
1912
1913       /* Load the LOD info */
1914       if (ir->op == ir_txl) {
1915          int mrf, writemask;
1916          if (intel->gen >= 5) {
1917             mrf = param_base + 1;
1918             if (ir->shadow_comparitor) {
1919                writemask = WRITEMASK_Y;
1920                /* mlen already incremented */
1921             } else {
1922                writemask = WRITEMASK_X;
1923                inst->mlen++;
1924             }
1925          } else /* intel->gen == 4 */ {
1926             mrf = param_base;
1927             writemask = WRITEMASK_Z;
1928          }
1929          ir->lod_info.lod->accept(this);
1930          emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask),
1931                   this->result));
1932       } else if (ir->op == ir_txf) {
1933          ir->lod_info.lod->accept(this);
1934          emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1935                   this->result));
1936       } else if (ir->op == ir_txd) {
1937          const glsl_type *type = ir->lod_info.grad.dPdx->type;
1938
1939          ir->lod_info.grad.dPdx->accept(this);
1940          src_reg dPdx = this->result;
1941          ir->lod_info.grad.dPdy->accept(this);
1942          src_reg dPdy = this->result;
1943
1944          if (intel->gen >= 5) {
1945             dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1946             dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1947             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1948             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1949             inst->mlen++;
1950
1951             if (ir->type->vector_elements == 3) {
1952                dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
1953                dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
1954                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
1955                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
1956                inst->mlen++;
1957             }
1958          } else /* intel->gen == 4 */ {
1959             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
1960             emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
1961             inst->mlen += 2;
1962          }
1963       }
1964    }
1965
1966    emit(inst);
1967
1968    swizzle_result(ir, src_reg(inst->dst), sampler);
1969 }
1970
1971 void
1972 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
1973 {
1974    this->result = orig_val;
1975
1976    int s = c->key.tex.swizzles[sampler];
1977
1978    if (ir->op == ir_txs || ir->type == glsl_type::float_type
1979                         || s == SWIZZLE_NOOP)
1980       return;
1981
1982    int zero_mask = 0, one_mask = 0, copy_mask = 0;
1983    int swizzle[4];
1984
1985    for (int i = 0; i < 4; i++) {
1986       switch (GET_SWZ(s, i)) {
1987       case SWIZZLE_ZERO:
1988          zero_mask |= (1 << i);
1989          break;
1990       case SWIZZLE_ONE:
1991          one_mask |= (1 << i);
1992          break;
1993       default:
1994          copy_mask |= (1 << i);
1995          swizzle[i] = GET_SWZ(s, i);
1996          break;
1997       }
1998    }
1999
2000    this->result = src_reg(this, ir->type);
2001    dst_reg swizzled_result(this->result);
2002
2003    if (copy_mask) {
2004       orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2005       swizzled_result.writemask = copy_mask;
2006       emit(MOV(swizzled_result, orig_val));
2007    }
2008
2009    if (zero_mask) {
2010       swizzled_result.writemask = zero_mask;
2011       emit(MOV(swizzled_result, src_reg(0.0f)));
2012    }
2013
2014    if (one_mask) {
2015       swizzled_result.writemask = one_mask;
2016       emit(MOV(swizzled_result, src_reg(1.0f)));
2017    }
2018 }
2019
2020 void
2021 vec4_visitor::visit(ir_return *ir)
2022 {
2023    assert(!"not reached");
2024 }
2025
2026 void
2027 vec4_visitor::visit(ir_discard *ir)
2028 {
2029    assert(!"not reached");
2030 }
2031
2032 void
2033 vec4_visitor::visit(ir_if *ir)
2034 {
2035    /* Don't point the annotation at the if statement, because then it plus
2036     * the then and else blocks get printed.
2037     */
2038    this->base_ir = ir->condition;
2039
2040    if (intel->gen == 6) {
2041       emit_if_gen6(ir);
2042    } else {
2043       uint32_t predicate;
2044       emit_bool_to_cond_code(ir->condition, &predicate);
2045       emit(IF(predicate));
2046    }
2047
2048    visit_instructions(&ir->then_instructions);
2049
2050    if (!ir->else_instructions.is_empty()) {
2051       this->base_ir = ir->condition;
2052       emit(BRW_OPCODE_ELSE);
2053
2054       visit_instructions(&ir->else_instructions);
2055    }
2056
2057    this->base_ir = ir->condition;
2058    emit(BRW_OPCODE_ENDIF);
2059 }
2060
2061 void
2062 vec4_visitor::emit_ndc_computation()
2063 {
2064    /* Get the position */
2065    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2066
2067    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2068    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2069    output_reg[BRW_VERT_RESULT_NDC] = ndc;
2070
2071    current_annotation = "NDC";
2072    dst_reg ndc_w = ndc;
2073    ndc_w.writemask = WRITEMASK_W;
2074    src_reg pos_w = pos;
2075    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2076    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2077
2078    dst_reg ndc_xyz = ndc;
2079    ndc_xyz.writemask = WRITEMASK_XYZ;
2080
2081    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2082 }
2083
2084 void
2085 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2086 {
2087    if (intel->gen < 6 &&
2088        ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2089         c->key.userclip_active || brw->has_negative_rhw_bug)) {
2090       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2091       dst_reg header1_w = header1;
2092       header1_w.writemask = WRITEMASK_W;
2093       GLuint i;
2094
2095       emit(MOV(header1, 0u));
2096
2097       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2098          src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2099
2100          current_annotation = "Point size";
2101          emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2102          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2103       }
2104
2105       current_annotation = "Clipping flags";
2106       for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2107          vec4_instruction *inst;
2108
2109          inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2110                          src_reg(this->userplane[i])));
2111          inst->conditional_mod = BRW_CONDITIONAL_L;
2112
2113          inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2114          inst->predicate = BRW_PREDICATE_NORMAL;
2115       }
2116
2117       /* i965 clipping workaround:
2118        * 1) Test for -ve rhw
2119        * 2) If set,
2120        *      set ndc = (0,0,0,0)
2121        *      set ucp[6] = 1
2122        *
2123        * Later, clipping will detect ucp[6] and ensure the primitive is
2124        * clipped against all fixed planes.
2125        */
2126       if (brw->has_negative_rhw_bug) {
2127 #if 0
2128          /* FINISHME */
2129          brw_CMP(p,
2130                  vec8(brw_null_reg()),
2131                  BRW_CONDITIONAL_L,
2132                  brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2133                  brw_imm_f(0));
2134
2135          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2136          brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2137          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2138 #endif
2139       }
2140
2141       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2142    } else if (intel->gen < 6) {
2143       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2144    } else {
2145       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2146       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2147          emit(MOV(brw_writemask(reg, WRITEMASK_W),
2148                   src_reg(output_reg[VERT_RESULT_PSIZ])));
2149       }
2150    }
2151 }
2152
2153 void
2154 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2155 {
2156    if (intel->gen < 6) {
2157       /* Clip distance slots are set aside in gen5, but they are not used.  It
2158        * is not clear whether we actually need to set aside space for them,
2159        * but the performance cost is negligible.
2160        */
2161       return;
2162    }
2163
2164    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2165     *
2166     *     "If a linked set of shaders forming the vertex stage contains no
2167     *     static write to gl_ClipVertex or gl_ClipDistance, but the
2168     *     application has requested clipping against user clip planes through
2169     *     the API, then the coordinate written to gl_Position is used for
2170     *     comparison against the user clip planes."
2171     *
2172     * This function is only called if the shader didn't write to
2173     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
2174     * if the user wrote to it; otherwise we use gl_Position.
2175     */
2176    gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2177    if (!(c->prog_data.outputs_written
2178          & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2179       clip_vertex = VERT_RESULT_HPOS;
2180    }
2181
2182    for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2183         ++i) {
2184       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2185                src_reg(output_reg[clip_vertex]),
2186                src_reg(this->userplane[i + offset])));
2187    }
2188 }
2189
2190 void
2191 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2192 {
2193    assert (vert_result < VERT_RESULT_MAX);
2194    reg.type = output_reg[vert_result].type;
2195    current_annotation = output_reg_annotation[vert_result];
2196    /* Copy the register, saturating if necessary */
2197    vec4_instruction *inst = emit(MOV(reg,
2198                                      src_reg(output_reg[vert_result])));
2199    if ((vert_result == VERT_RESULT_COL0 ||
2200         vert_result == VERT_RESULT_COL1 ||
2201         vert_result == VERT_RESULT_BFC0 ||
2202         vert_result == VERT_RESULT_BFC1) &&
2203        c->key.clamp_vertex_color) {
2204       inst->saturate = true;
2205    }
2206 }
2207
2208 void
2209 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2210 {
2211    struct brw_reg hw_reg = brw_message_reg(mrf);
2212    dst_reg reg = dst_reg(MRF, mrf);
2213    reg.type = BRW_REGISTER_TYPE_F;
2214
2215    switch (vert_result) {
2216    case VERT_RESULT_PSIZ:
2217       /* PSIZ is always in slot 0, and is coupled with other flags. */
2218       current_annotation = "indices, point width, clip flags";
2219       emit_psiz_and_flags(hw_reg);
2220       break;
2221    case BRW_VERT_RESULT_NDC:
2222       current_annotation = "NDC";
2223       emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2224       break;
2225    case BRW_VERT_RESULT_HPOS_DUPLICATE:
2226    case VERT_RESULT_HPOS:
2227       current_annotation = "gl_Position";
2228       emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2229       break;
2230    case VERT_RESULT_CLIP_DIST0:
2231    case VERT_RESULT_CLIP_DIST1:
2232       if (this->c->key.uses_clip_distance) {
2233          emit_generic_urb_slot(reg, vert_result);
2234       } else {
2235          current_annotation = "user clip distances";
2236          emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2237       }
2238       break;
2239    case BRW_VERT_RESULT_PAD:
2240       /* No need to write to this slot */
2241       break;
2242    default:
2243       emit_generic_urb_slot(reg, vert_result);
2244       break;
2245    }
2246 }
2247
2248 static int
2249 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2250 {
2251    struct intel_context *intel = &brw->intel;
2252
2253    if (intel->gen >= 6) {
2254       /* URB data written (does not include the message header reg) must
2255        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
2256        * section 5.4.3.2.2: URB_INTERLEAVED.
2257        *
2258        * URB entries are allocated on a multiple of 1024 bits, so an
2259        * extra 128 bits written here to make the end align to 256 is
2260        * no problem.
2261        */
2262       if ((mlen % 2) != 1)
2263          mlen++;
2264    }
2265
2266    return mlen;
2267 }
2268
2269 /**
2270  * Generates the VUE payload plus the 1 or 2 URB write instructions to
2271  * complete the VS thread.
2272  *
2273  * The VUE layout is documented in Volume 2a.
2274  */
2275 void
2276 vec4_visitor::emit_urb_writes()
2277 {
2278    /* MRF 0 is reserved for the debugger, so start with message header
2279     * in MRF 1.
2280     */
2281    int base_mrf = 1;
2282    int mrf = base_mrf;
2283    /* In the process of generating our URB write message contents, we
2284     * may need to unspill a register or load from an array.  Those
2285     * reads would use MRFs 14-15.
2286     */
2287    int max_usable_mrf = 13;
2288
2289    /* The following assertion verifies that max_usable_mrf causes an
2290     * even-numbered amount of URB write data, which will meet gen6's
2291     * requirements for length alignment.
2292     */
2293    assert ((max_usable_mrf - base_mrf) % 2 == 0);
2294
2295    /* FINISHME: edgeflag */
2296
2297    /* First mrf is the g0-based message header containing URB handles and such,
2298     * which is implied in VS_OPCODE_URB_WRITE.
2299     */
2300    mrf++;
2301
2302    if (intel->gen < 6) {
2303       emit_ndc_computation();
2304    }
2305
2306    /* Set up the VUE data for the first URB write */
2307    int slot;
2308    for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) {
2309       emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2310
2311       /* If this was max_usable_mrf, we can't fit anything more into this URB
2312        * WRITE.
2313        */
2314       if (mrf > max_usable_mrf) {
2315          slot++;
2316          break;
2317       }
2318    }
2319
2320    current_annotation = "URB write";
2321    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2322    inst->base_mrf = base_mrf;
2323    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2324    inst->eot = (slot >= c->prog_data.vue_map.num_slots);
2325
2326    /* Optional second URB write */
2327    if (!inst->eot) {
2328       mrf = base_mrf + 1;
2329
2330       for (; slot < c->prog_data.vue_map.num_slots; ++slot) {
2331          assert(mrf < max_usable_mrf);
2332
2333          emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2334       }
2335
2336       current_annotation = "URB write";
2337       inst = emit(VS_OPCODE_URB_WRITE);
2338       inst->base_mrf = base_mrf;
2339       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2340       inst->eot = true;
2341       /* URB destination offset.  In the previous write, we got MRFs
2342        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
2343        * URB row increments, and each of our MRFs is half of one of
2344        * those, since we're doing interleaved writes.
2345        */
2346       inst->offset = (max_usable_mrf - base_mrf) / 2;
2347    }
2348 }
2349
2350 src_reg
2351 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2352                                  src_reg *reladdr, int reg_offset)
2353 {
2354    /* Because we store the values to scratch interleaved like our
2355     * vertex data, we need to scale the vec4 index by 2.
2356     */
2357    int message_header_scale = 2;
2358
2359    /* Pre-gen6, the message header uses byte offsets instead of vec4
2360     * (16-byte) offset units.
2361     */
2362    if (intel->gen < 6)
2363       message_header_scale *= 16;
2364
2365    if (reladdr) {
2366       src_reg index = src_reg(this, glsl_type::int_type);
2367
2368       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2369       emit_before(inst, MUL(dst_reg(index),
2370                             index, src_reg(message_header_scale)));
2371
2372       return index;
2373    } else {
2374       return src_reg(reg_offset * message_header_scale);
2375    }
2376 }
2377
2378 src_reg
2379 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2380                                        src_reg *reladdr, int reg_offset)
2381 {
2382    if (reladdr) {
2383       src_reg index = src_reg(this, glsl_type::int_type);
2384
2385       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2386
2387       /* Pre-gen6, the message header uses byte offsets instead of vec4
2388        * (16-byte) offset units.
2389        */
2390       if (intel->gen < 6) {
2391          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2392       }
2393
2394       return index;
2395    } else {
2396       int message_header_scale = intel->gen < 6 ? 16 : 1;
2397       return src_reg(reg_offset * message_header_scale);
2398    }
2399 }
2400
2401 /**
2402  * Emits an instruction before @inst to load the value named by @orig_src
2403  * from scratch space at @base_offset to @temp.
2404  */
2405 void
2406 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2407                                 dst_reg temp, src_reg orig_src,
2408                                 int base_offset)
2409 {
2410    int reg_offset = base_offset + orig_src.reg_offset;
2411    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2412
2413    emit_before(inst, SCRATCH_READ(temp, index));
2414 }
2415
2416 /**
2417  * Emits an instruction after @inst to store the value to be written
2418  * to @orig_dst to scratch space at @base_offset, from @temp.
2419  */
2420 void
2421 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2422                                  src_reg temp, dst_reg orig_dst,
2423                                  int base_offset)
2424 {
2425    int reg_offset = base_offset + orig_dst.reg_offset;
2426    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2427
2428    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2429                                        orig_dst.writemask));
2430    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2431    write->predicate = inst->predicate;
2432    write->ir = inst->ir;
2433    write->annotation = inst->annotation;
2434    inst->insert_after(write);
2435 }
2436
2437 /**
2438  * We can't generally support array access in GRF space, because a
2439  * single instruction's destination can only span 2 contiguous
2440  * registers.  So, we send all GRF arrays that get variable index
2441  * access to scratch space.
2442  */
2443 void
2444 vec4_visitor::move_grf_array_access_to_scratch()
2445 {
2446    int scratch_loc[this->virtual_grf_count];
2447
2448    for (int i = 0; i < this->virtual_grf_count; i++) {
2449       scratch_loc[i] = -1;
2450    }
2451
2452    /* First, calculate the set of virtual GRFs that need to be punted
2453     * to scratch due to having any array access on them, and where in
2454     * scratch.
2455     */
2456    foreach_list(node, &this->instructions) {
2457       vec4_instruction *inst = (vec4_instruction *)node;
2458
2459       if (inst->dst.file == GRF && inst->dst.reladdr &&
2460           scratch_loc[inst->dst.reg] == -1) {
2461          scratch_loc[inst->dst.reg] = c->last_scratch;
2462          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2463       }
2464
2465       for (int i = 0 ; i < 3; i++) {
2466          src_reg *src = &inst->src[i];
2467
2468          if (src->file == GRF && src->reladdr &&
2469              scratch_loc[src->reg] == -1) {
2470             scratch_loc[src->reg] = c->last_scratch;
2471             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2472          }
2473       }
2474    }
2475
2476    /* Now, for anything that will be accessed through scratch, rewrite
2477     * it to load/store.  Note that this is a _safe list walk, because
2478     * we may generate a new scratch_write instruction after the one
2479     * we're processing.
2480     */
2481    foreach_list_safe(node, &this->instructions) {
2482       vec4_instruction *inst = (vec4_instruction *)node;
2483
2484       /* Set up the annotation tracking for new generated instructions. */
2485       base_ir = inst->ir;
2486       current_annotation = inst->annotation;
2487
2488       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2489          src_reg temp = src_reg(this, glsl_type::vec4_type);
2490
2491          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2492
2493          inst->dst.file = temp.file;
2494          inst->dst.reg = temp.reg;
2495          inst->dst.reg_offset = temp.reg_offset;
2496          inst->dst.reladdr = NULL;
2497       }
2498
2499       for (int i = 0 ; i < 3; i++) {
2500          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2501             continue;
2502
2503          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2504
2505          emit_scratch_read(inst, temp, inst->src[i],
2506                            scratch_loc[inst->src[i].reg]);
2507
2508          inst->src[i].file = temp.file;
2509          inst->src[i].reg = temp.reg;
2510          inst->src[i].reg_offset = temp.reg_offset;
2511          inst->src[i].reladdr = NULL;
2512       }
2513    }
2514 }
2515
2516 /**
2517  * Emits an instruction before @inst to load the value named by @orig_src
2518  * from the pull constant buffer (surface) at @base_offset to @temp.
2519  */
2520 void
2521 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2522                                       dst_reg temp, src_reg orig_src,
2523                                       int base_offset)
2524 {
2525    int reg_offset = base_offset + orig_src.reg_offset;
2526    src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2527    vec4_instruction *load;
2528
2529    load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2530                                         temp, index);
2531    load->base_mrf = 14;
2532    load->mlen = 1;
2533    emit_before(inst, load);
2534 }
2535
2536 /**
2537  * Implements array access of uniforms by inserting a
2538  * PULL_CONSTANT_LOAD instruction.
2539  *
2540  * Unlike temporary GRF array access (where we don't support it due to
2541  * the difficulty of doing relative addressing on instruction
2542  * destinations), we could potentially do array access of uniforms
2543  * that were loaded in GRF space as push constants.  In real-world
2544  * usage we've seen, though, the arrays being used are always larger
2545  * than we could load as push constants, so just always move all
2546  * uniform array access out to a pull constant buffer.
2547  */
2548 void
2549 vec4_visitor::move_uniform_array_access_to_pull_constants()
2550 {
2551    int pull_constant_loc[this->uniforms];
2552
2553    for (int i = 0; i < this->uniforms; i++) {
2554       pull_constant_loc[i] = -1;
2555    }
2556
2557    /* Walk through and find array access of uniforms.  Put a copy of that
2558     * uniform in the pull constant buffer.
2559     *
2560     * Note that we don't move constant-indexed accesses to arrays.  No
2561     * testing has been done of the performance impact of this choice.
2562     */
2563    foreach_list_safe(node, &this->instructions) {
2564       vec4_instruction *inst = (vec4_instruction *)node;
2565
2566       for (int i = 0 ; i < 3; i++) {
2567          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2568             continue;
2569
2570          int uniform = inst->src[i].reg;
2571
2572          /* If this array isn't already present in the pull constant buffer,
2573           * add it.
2574           */
2575          if (pull_constant_loc[uniform] == -1) {
2576             const float **values = &prog_data->param[uniform * 4];
2577
2578             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2579
2580             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2581                prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2582             }
2583          }
2584
2585          /* Set up the annotation tracking for new generated instructions. */
2586          base_ir = inst->ir;
2587          current_annotation = inst->annotation;
2588
2589          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2590
2591          emit_pull_constant_load(inst, temp, inst->src[i],
2592                                  pull_constant_loc[uniform]);
2593
2594          inst->src[i].file = temp.file;
2595          inst->src[i].reg = temp.reg;
2596          inst->src[i].reg_offset = temp.reg_offset;
2597          inst->src[i].reladdr = NULL;
2598       }
2599    }
2600
2601    /* Now there are no accesses of the UNIFORM file with a reladdr, so
2602     * no need to track them as larger-than-vec4 objects.  This will be
2603     * relied on in cutting out unused uniform vectors from push
2604     * constants.
2605     */
2606    split_uniform_registers();
2607 }
2608
2609 void
2610 vec4_visitor::resolve_ud_negate(src_reg *reg)
2611 {
2612    if (reg->type != BRW_REGISTER_TYPE_UD ||
2613        !reg->negate)
2614       return;
2615
2616    src_reg temp = src_reg(this, glsl_type::uvec4_type);
2617    emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2618    *reg = temp;
2619 }
2620
2621 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2622                            struct gl_shader_program *prog,
2623                            struct brw_shader *shader)
2624 {
2625    this->c = c;
2626    this->p = &c->func;
2627    this->brw = p->brw;
2628    this->intel = &brw->intel;
2629    this->ctx = &intel->ctx;
2630    this->prog = prog;
2631    this->shader = shader;
2632
2633    this->mem_ctx = ralloc_context(NULL);
2634    this->failed = false;
2635
2636    this->base_ir = NULL;
2637    this->current_annotation = NULL;
2638
2639    this->c = c;
2640    this->vp = (struct gl_vertex_program *)
2641      prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
2642    this->prog_data = &c->prog_data;
2643
2644    this->variable_ht = hash_table_ctor(0,
2645                                        hash_table_pointer_hash,
2646                                        hash_table_pointer_compare);
2647
2648    this->virtual_grf_def = NULL;
2649    this->virtual_grf_use = NULL;
2650    this->virtual_grf_sizes = NULL;
2651    this->virtual_grf_count = 0;
2652    this->virtual_grf_reg_map = NULL;
2653    this->virtual_grf_reg_count = 0;
2654    this->virtual_grf_array_size = 0;
2655    this->live_intervals_valid = false;
2656
2657    this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2658
2659    this->uniforms = 0;
2660 }
2661
2662 vec4_visitor::~vec4_visitor()
2663 {
2664    ralloc_free(this->mem_ctx);
2665    hash_table_dtor(this->variable_ht);
2666 }
2667
2668
2669 void
2670 vec4_visitor::fail(const char *format, ...)
2671 {
2672    va_list va;
2673    char *msg;
2674
2675    if (failed)
2676       return;
2677
2678    failed = true;
2679
2680    va_start(va, format);
2681    msg = ralloc_vasprintf(mem_ctx, format, va);
2682    va_end(va);
2683    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2684
2685    this->fail_msg = msg;
2686
2687    if (INTEL_DEBUG & DEBUG_VS) {
2688       fprintf(stderr, "%s",  msg);
2689    }
2690 }
2691
2692 } /* namespace brw */