src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 #include "program/sampler.h"
  29 }
  30
  31 namespace brw {
  32
  33 vec4_instruction::vec4_instruction(vec4_visitor *v,
  34                                    enum opcode opcode, dst_reg dst,
  35                                    src_reg src0, src_reg src1, src_reg src2)
  36 {
  37    this->opcode = opcode;
  38    this->dst = dst;
  39    this->src[0] = src0;
  40    this->src[1] = src1;
  41    this->src[2] = src2;
  42    this->ir = v->base_ir;
  43    this->annotation = v->current_annotation;
  44 }
  45
  46 vec4_instruction *
  47 vec4_visitor::emit(vec4_instruction *inst)
  48 {
  49    this->instructions.push_tail(inst);
  50
  51    return inst;
  52 }
  53
  54 vec4_instruction *
  55 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  56 {
  57    new_inst->ir = inst->ir;
  58    new_inst->annotation = inst->annotation;
  59
  60    inst->insert_before(new_inst);
  61
  62    return inst;
  63 }
  64
  65 vec4_instruction *
  66 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  67                    src_reg src0, src_reg src1, src_reg src2)
  68 {
  69    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
  70                                              src0, src1, src2));
  71 }
  72
  73
  74 vec4_instruction *
  75 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  76 {
  77    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
  78 }
  79
  80 vec4_instruction *
  81 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
  82 {
  83    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
  84 }
  85
  86 vec4_instruction *
  87 vec4_visitor::emit(enum opcode opcode)
  88 {
  89    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
  90 }
  91
  92 #define ALU1(op)                                                        \
  93    vec4_instruction *                                                   \
  94    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
  95    {                                                                    \
  96       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
  97                                            src0);                       \
  98    }
  99
 100 #define ALU2(op)                                                        \
 101    vec4_instruction *                                                   \
 102    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 103    {                                                                    \
 104       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 105                                            src0, src1);                 \
 106    }
 107
 108 ALU1(NOT)
 109 ALU1(MOV)
 110 ALU1(FRC)
 111 ALU1(RNDD)
 112 ALU1(RNDE)
 113 ALU1(RNDZ)
 114 ALU2(ADD)
 115 ALU2(MUL)
 116 ALU2(MACH)
 117 ALU2(AND)
 118 ALU2(OR)
 119 ALU2(XOR)
 120 ALU2(DP3)
 121 ALU2(DP4)
 122
 123 /** Gen4 predicated IF. */
 124 vec4_instruction *
 125 vec4_visitor::IF(uint32_t predicate)
 126 {
 127    vec4_instruction *inst;
 128
 129    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 130    inst->predicate = predicate;
 131
 132    return inst;
 133 }
 134
 135 /** Gen6+ IF with embedded comparison. */
 136 vec4_instruction *
 137 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 138 {
 139    assert(intel->gen >= 6);
 140
 141    vec4_instruction *inst;
 142
 143    resolve_ud_negate(&src0);
 144    resolve_ud_negate(&src1);
 145
 146    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 147                                         src0, src1);
 148    inst->conditional_mod = condition;
 149
 150    return inst;
 151 }
 152
 153 /**
 154  * CMP: Sets the low bit of the destination channels with the result
 155  * of the comparison, while the upper bits are undefined, and updates
 156  * the flag register with the packed 16 bits of the result.
 157  */
 158 vec4_instruction *
 159 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 160 {
 161    vec4_instruction *inst;
 162
 163    /* original gen4 does type conversion to the destination type
 164     * before before comparison, producing garbage results for floating
 165     * point comparisons.
 166     */
 167    if (intel->gen == 4) {
 168       dst.type = src0.type;
 169       if (dst.file == HW_REG)
 170          dst.fixed_hw_reg.type = dst.type;
 171    }
 172
 173    resolve_ud_negate(&src0);
 174    resolve_ud_negate(&src1);
 175
 176    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 177    inst->conditional_mod = condition;
 178
 179    return inst;
 180 }
 181
 182 vec4_instruction *
 183 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 184 {
 185    vec4_instruction *inst;
 186
 187    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 188                                         dst, index);
 189    inst->base_mrf = 14;
 190    inst->mlen = 1;
 191
 192    return inst;
 193 }
 194
 195 vec4_instruction *
 196 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 197 {
 198    vec4_instruction *inst;
 199
 200    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 201                                         dst, src, index);
 202    inst->base_mrf = 13;
 203    inst->mlen = 2;
 204
 205    return inst;
 206 }
 207
 208 void
 209 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 210 {
 211    static enum opcode dot_opcodes[] = {
 212       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 213    };
 214
 215    emit(dot_opcodes[elements - 2], dst, src0, src1);
 216 }
 217
 218 void
 219 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 220 {
 221    /* The gen6 math instruction ignores the source modifiers --
 222     * swizzle, abs, negate, and at least some parts of the register
 223     * region description.
 224     *
 225     * While it would seem that this MOV could be avoided at this point
 226     * in the case that the swizzle is matched up with the destination
 227     * writemask, note that uniform packing and register allocation
 228     * could rearrange our swizzle, so let's leave this matter up to
 229     * copy propagation later.
 230     */
 231    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 232    emit(MOV(dst_reg(temp_src), src));
 233
 234    if (dst.writemask != WRITEMASK_XYZW) {
 235       /* The gen6 math instruction must be align1, so we can't do
 236        * writemasks.
 237        */
 238       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 239
 240       emit(opcode, temp_dst, temp_src);
 241
 242       emit(MOV(dst, src_reg(temp_dst)));
 243    } else {
 244       emit(opcode, dst, temp_src);
 245    }
 246 }
 247
 248 void
 249 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 250 {
 251    vec4_instruction *inst = emit(opcode, dst, src);
 252    inst->base_mrf = 1;
 253    inst->mlen = 1;
 254 }
 255
 256 void
 257 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 258 {
 259    switch (opcode) {
 260    case SHADER_OPCODE_RCP:
 261    case SHADER_OPCODE_RSQ:
 262    case SHADER_OPCODE_SQRT:
 263    case SHADER_OPCODE_EXP2:
 264    case SHADER_OPCODE_LOG2:
 265    case SHADER_OPCODE_SIN:
 266    case SHADER_OPCODE_COS:
 267       break;
 268    default:
 269       assert(!"not reached: bad math opcode");
 270       return;
 271    }
 272
 273    if (intel->gen >= 7) {
 274       emit(opcode, dst, src);
 275    } else if (intel->gen == 6) {
 276       return emit_math1_gen6(opcode, dst, src);
 277    } else {
 278       return emit_math1_gen4(opcode, dst, src);
 279    }
 280 }
 281
 282 void
 283 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 284                               dst_reg dst, src_reg src0, src_reg src1)
 285 {
 286    src_reg expanded;
 287
 288    /* The gen6 math instruction ignores the source modifiers --
 289     * swizzle, abs, negate, and at least some parts of the register
 290     * region description.  Move the sources to temporaries to make it
 291     * generally work.
 292     */
 293
 294    expanded = src_reg(this, glsl_type::vec4_type);
 295    expanded.type = src0.type;
 296    emit(MOV(dst_reg(expanded), src0));
 297    src0 = expanded;
 298
 299    expanded = src_reg(this, glsl_type::vec4_type);
 300    expanded.type = src1.type;
 301    emit(MOV(dst_reg(expanded), src1));
 302    src1 = expanded;
 303
 304    if (dst.writemask != WRITEMASK_XYZW) {
 305       /* The gen6 math instruction must be align1, so we can't do
 306        * writemasks.
 307        */
 308       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 309       temp_dst.type = dst.type;
 310
 311       emit(opcode, temp_dst, src0, src1);
 312
 313       emit(MOV(dst, src_reg(temp_dst)));
 314    } else {
 315       emit(opcode, dst, src0, src1);
 316    }
 317 }
 318
 319 void
 320 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 321                               dst_reg dst, src_reg src0, src_reg src1)
 322 {
 323    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 324    inst->base_mrf = 1;
 325    inst->mlen = 2;
 326 }
 327
 328 void
 329 vec4_visitor::emit_math(enum opcode opcode,
 330                         dst_reg dst, src_reg src0, src_reg src1)
 331 {
 332    switch (opcode) {
 333    case SHADER_OPCODE_POW:
 334    case SHADER_OPCODE_INT_QUOTIENT:
 335    case SHADER_OPCODE_INT_REMAINDER:
 336       break;
 337    default:
 338       assert(!"not reached: unsupported binary math opcode");
 339       return;
 340    }
 341
 342    if (intel->gen >= 7) {
 343       emit(opcode, dst, src0, src1);
 344    } else if (intel->gen == 6) {
 345       return emit_math2_gen6(opcode, dst, src0, src1);
 346    } else {
 347       return emit_math2_gen4(opcode, dst, src0, src1);
 348    }
 349 }
 350
 351 void
 352 vec4_visitor::visit_instructions(const exec_list *list)
 353 {
 354    foreach_list(node, list) {
 355       ir_instruction *ir = (ir_instruction *)node;
 356
 357       base_ir = ir;
 358       ir->accept(this);
 359    }
 360 }
 361
 362
 363 static int
 364 type_size(const struct glsl_type *type)
 365 {
 366    unsigned int i;
 367    int size;
 368
 369    switch (type->base_type) {
 370    case GLSL_TYPE_UINT:
 371    case GLSL_TYPE_INT:
 372    case GLSL_TYPE_FLOAT:
 373    case GLSL_TYPE_BOOL:
 374       if (type->is_matrix()) {
 375          return type->matrix_columns;
 376       } else {
 377          /* Regardless of size of vector, it gets a vec4. This is bad
 378           * packing for things like floats, but otherwise arrays become a
 379           * mess.  Hopefully a later pass over the code can pack scalars
 380           * down if appropriate.
 381           */
 382          return 1;
 383       }
 384    case GLSL_TYPE_ARRAY:
 385       assert(type->length > 0);
 386       return type_size(type->fields.array) * type->length;
 387    case GLSL_TYPE_STRUCT:
 388       size = 0;
 389       for (i = 0; i < type->length; i++) {
 390          size += type_size(type->fields.structure[i].type);
 391       }
 392       return size;
 393    case GLSL_TYPE_SAMPLER:
 394       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 395        * at link time.
 396        */
 397       return 1;
 398    default:
 399       assert(0);
 400       return 0;
 401    }
 402 }
 403
 404 int
 405 vec4_visitor::virtual_grf_alloc(int size)
 406 {
 407    if (virtual_grf_array_size <= virtual_grf_count) {
 408       if (virtual_grf_array_size == 0)
 409          virtual_grf_array_size = 16;
 410       else
 411          virtual_grf_array_size *= 2;
 412       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 413                                    virtual_grf_array_size);
 414       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 415                                      virtual_grf_array_size);
 416    }
 417    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 418    virtual_grf_reg_count += size;
 419    virtual_grf_sizes[virtual_grf_count] = size;
 420    return virtual_grf_count++;
 421 }
 422
 423 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 424 {
 425    init();
 426
 427    this->file = GRF;
 428    this->reg = v->virtual_grf_alloc(type_size(type));
 429
 430    if (type->is_array() || type->is_record()) {
 431       this->swizzle = BRW_SWIZZLE_NOOP;
 432    } else {
 433       this->swizzle = swizzle_for_size(type->vector_elements);
 434    }
 435
 436    this->type = brw_type_for_base_type(type);
 437 }
 438
 439 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 440 {
 441    init();
 442
 443    this->file = GRF;
 444    this->reg = v->virtual_grf_alloc(type_size(type));
 445
 446    if (type->is_array() || type->is_record()) {
 447       this->writemask = WRITEMASK_XYZW;
 448    } else {
 449       this->writemask = (1 << type->vector_elements) - 1;
 450    }
 451
 452    this->type = brw_type_for_base_type(type);
 453 }
 454
 455 /* Our support for uniforms is piggy-backed on the struct
 456  * gl_fragment_program, because that's where the values actually
 457  * get stored, rather than in some global gl_shader_program uniform
 458  * store.
 459  */
 460 int
 461 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 462 {
 463    unsigned int offset = 0;
 464    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 465
 466    if (type->is_matrix()) {
 467       const glsl_type *column = type->column_type();
 468
 469       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 470          offset += setup_uniform_values(loc + offset, column);
 471       }
 472
 473       return offset;
 474    }
 475
 476    switch (type->base_type) {
 477    case GLSL_TYPE_FLOAT:
 478    case GLSL_TYPE_UINT:
 479    case GLSL_TYPE_INT:
 480    case GLSL_TYPE_BOOL:
 481       for (unsigned int i = 0; i < type->vector_elements; i++) {
 482          c->prog_data.param[this->uniforms * 4 + i] = &values[i];
 483       }
 484
 485       /* Set up pad elements to get things aligned to a vec4 boundary. */
 486       for (unsigned int i = type->vector_elements; i < 4; i++) {
 487          static float zero = 0;
 488
 489          c->prog_data.param[this->uniforms * 4 + i] = &zero;
 490       }
 491
 492       /* Track the size of this uniform vector, for future packing of
 493        * uniforms.
 494        */
 495       this->uniform_vector_size[this->uniforms] = type->vector_elements;
 496       this->uniforms++;
 497
 498       return 1;
 499
 500    case GLSL_TYPE_STRUCT:
 501       for (unsigned int i = 0; i < type->length; i++) {
 502          offset += setup_uniform_values(loc + offset,
 503                                         type->fields.structure[i].type);
 504       }
 505       return offset;
 506
 507    case GLSL_TYPE_ARRAY:
 508       for (unsigned int i = 0; i < type->length; i++) {
 509          offset += setup_uniform_values(loc + offset, type->fields.array);
 510       }
 511       return offset;
 512
 513    case GLSL_TYPE_SAMPLER:
 514       /* The sampler takes up a slot, but we don't use any values from it. */
 515       return 1;
 516
 517    default:
 518       assert(!"not reached");
 519       return 0;
 520    }
 521 }
 522
 523 void
 524 vec4_visitor::setup_uniform_clipplane_values()
 525 {
 526    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 527
 528    /* Pre-Gen6, we compact clip planes.  For example, if the user
 529     * enables just clip planes 0, 1, and 3, we will enable clip planes
 530     * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
 531     * plane 2.  This simplifies the implementation of the Gen6 clip
 532     * thread.
 533     *
 534     * In Gen6 and later, we don't compact clip planes, because this
 535     * simplifies the implementation of gl_ClipDistance.
 536     */
 537    int compacted_clipplane_index = 0;
 538    for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
 539       if (intel->gen < 6 &&
 540           !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
 541          continue;
 542       }
 543       this->uniform_vector_size[this->uniforms] = 4;
 544       this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
 545       this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
 546       for (int j = 0; j < 4; ++j) {
 547          c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
 548       }
 549       ++compacted_clipplane_index;
 550       ++this->uniforms;
 551    }
 552 }
 553
 554 /* Our support for builtin uniforms is even scarier than non-builtin.
 555  * It sits on top of the PROG_STATE_VAR parameters that are
 556  * automatically updated from GL context state.
 557  */
 558 void
 559 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 560 {
 561    const ir_state_slot *const slots = ir->state_slots;
 562    assert(ir->state_slots != NULL);
 563
 564    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 565       /* This state reference has already been setup by ir_to_mesa,
 566        * but we'll get the same index back here.  We can reference
 567        * ParameterValues directly, since unlike brw_fs.cpp, we never
 568        * add new state references during compile.
 569        */
 570       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 571                                             (gl_state_index *)slots[i].tokens);
 572       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 573
 574       this->uniform_vector_size[this->uniforms] = 0;
 575       /* Add each of the unique swizzled channels of the element.
 576        * This will end up matching the size of the glsl_type of this field.
 577        */
 578       int last_swiz = -1;
 579       for (unsigned int j = 0; j < 4; j++) {
 580          int swiz = GET_SWZ(slots[i].swizzle, j);
 581          last_swiz = swiz;
 582
 583          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 584          if (swiz <= last_swiz)
 585             this->uniform_vector_size[this->uniforms]++;
 586       }
 587       this->uniforms++;
 588    }
 589 }
 590
 591 dst_reg *
 592 vec4_visitor::variable_storage(ir_variable *var)
 593 {
 594    return (dst_reg *)hash_table_find(this->variable_ht, var);
 595 }
 596
 597 void
 598 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 599 {
 600    ir_expression *expr = ir->as_expression();
 601
 602    *predicate = BRW_PREDICATE_NORMAL;
 603
 604    if (expr) {
 605       src_reg op[2];
 606       vec4_instruction *inst;
 607
 608       assert(expr->get_num_operands() <= 2);
 609       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 610          expr->operands[i]->accept(this);
 611          op[i] = this->result;
 612
 613          resolve_ud_negate(&op[i]);
 614       }
 615
 616       switch (expr->operation) {
 617       case ir_unop_logic_not:
 618          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 619          inst->conditional_mod = BRW_CONDITIONAL_Z;
 620          break;
 621
 622       case ir_binop_logic_xor:
 623          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 624          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 625          break;
 626
 627       case ir_binop_logic_or:
 628          inst = emit(OR(dst_null_d(), op[0], op[1]));
 629          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 630          break;
 631
 632       case ir_binop_logic_and:
 633          inst = emit(AND(dst_null_d(), op[0], op[1]));
 634          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 635          break;
 636
 637       case ir_unop_f2b:
 638          if (intel->gen >= 6) {
 639             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 640          } else {
 641             inst = emit(MOV(dst_null_f(), op[0]));
 642             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 643          }
 644          break;
 645
 646       case ir_unop_i2b:
 647          if (intel->gen >= 6) {
 648             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 649          } else {
 650             inst = emit(MOV(dst_null_d(), op[0]));
 651             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 652          }
 653          break;
 654
 655       case ir_binop_all_equal:
 656          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 657          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 658          break;
 659
 660       case ir_binop_any_nequal:
 661          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 662          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 663          break;
 664
 665       case ir_unop_any:
 666          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 667          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 668          break;
 669
 670       case ir_binop_greater:
 671       case ir_binop_gequal:
 672       case ir_binop_less:
 673       case ir_binop_lequal:
 674       case ir_binop_equal:
 675       case ir_binop_nequal:
 676          emit(CMP(dst_null_d(), op[0], op[1],
 677                   brw_conditional_for_comparison(expr->operation)));
 678          break;
 679
 680       default:
 681          assert(!"not reached");
 682          break;
 683       }
 684       return;
 685    }
 686
 687    ir->accept(this);
 688
 689    resolve_ud_negate(&this->result);
 690
 691    if (intel->gen >= 6) {
 692       vec4_instruction *inst = emit(AND(dst_null_d(),
 693                                         this->result, src_reg(1)));
 694       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 695    } else {
 696       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 697       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 698    }
 699 }
 700
 701 /**
 702  * Emit a gen6 IF statement with the comparison folded into the IF
 703  * instruction.
 704  */
 705 void
 706 vec4_visitor::emit_if_gen6(ir_if *ir)
 707 {
 708    ir_expression *expr = ir->condition->as_expression();
 709
 710    if (expr) {
 711       src_reg op[2];
 712       dst_reg temp;
 713
 714       assert(expr->get_num_operands() <= 2);
 715       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 716          expr->operands[i]->accept(this);
 717          op[i] = this->result;
 718       }
 719
 720       switch (expr->operation) {
 721       case ir_unop_logic_not:
 722          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 723          return;
 724
 725       case ir_binop_logic_xor:
 726          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 727          return;
 728
 729       case ir_binop_logic_or:
 730          temp = dst_reg(this, glsl_type::bool_type);
 731          emit(OR(temp, op[0], op[1]));
 732          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 733          return;
 734
 735       case ir_binop_logic_and:
 736          temp = dst_reg(this, glsl_type::bool_type);
 737          emit(AND(temp, op[0], op[1]));
 738          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 739          return;
 740
 741       case ir_unop_f2b:
 742          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 743          return;
 744
 745       case ir_unop_i2b:
 746          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 747          return;
 748
 749       case ir_binop_greater:
 750       case ir_binop_gequal:
 751       case ir_binop_less:
 752       case ir_binop_lequal:
 753       case ir_binop_equal:
 754       case ir_binop_nequal:
 755          emit(IF(op[0], op[1],
 756                  brw_conditional_for_comparison(expr->operation)));
 757          return;
 758
 759       case ir_binop_all_equal:
 760          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 761          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 762          return;
 763
 764       case ir_binop_any_nequal:
 765          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 766          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 767          return;
 768
 769       case ir_unop_any:
 770          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 771          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 772          return;
 773
 774       default:
 775          assert(!"not reached");
 776          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 777          return;
 778       }
 779       return;
 780    }
 781
 782    ir->condition->accept(this);
 783
 784    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 785 }
 786
 787 void
 788 vec4_visitor::visit(ir_variable *ir)
 789 {
 790    dst_reg *reg = NULL;
 791
 792    if (variable_storage(ir))
 793       return;
 794
 795    switch (ir->mode) {
 796    case ir_var_in:
 797       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 798
 799       /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 800        * come in as floating point conversions of the integer values.
 801        */
 802       for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
 803          if (!c->key.gl_fixed_input_size[i])
 804             continue;
 805
 806          dst_reg dst = *reg;
 807          dst.type = brw_type_for_base_type(ir->type);
 808          dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
 809          emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 810       }
 811       break;
 812
 813    case ir_var_out:
 814       reg = new(mem_ctx) dst_reg(this, ir->type);
 815
 816       for (int i = 0; i < type_size(ir->type); i++) {
 817          output_reg[ir->location + i] = *reg;
 818          output_reg[ir->location + i].reg_offset = i;
 819          output_reg[ir->location + i].type =
 820             brw_type_for_base_type(ir->type->get_scalar_type());
 821          output_reg_annotation[ir->location + i] = ir->name;
 822       }
 823       break;
 824
 825    case ir_var_auto:
 826    case ir_var_temporary:
 827       reg = new(mem_ctx) dst_reg(this, ir->type);
 828       break;
 829
 830    case ir_var_uniform:
 831       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 832
 833       /* Track how big the whole uniform variable is, in case we need to put a
 834        * copy of its data into pull constants for array access.
 835        */
 836       this->uniform_size[this->uniforms] = type_size(ir->type);
 837
 838       if (!strncmp(ir->name, "gl_", 3)) {
 839          setup_builtin_uniform_values(ir);
 840       } else {
 841          setup_uniform_values(ir->location, ir->type);
 842       }
 843       break;
 844
 845    case ir_var_system_value:
 846       /* VertexID is stored by the VF as the last vertex element, but
 847        * we don't represent it with a flag in inputs_read, so we call
 848        * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
 849        */
 850       reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
 851       prog_data->uses_vertexid = true;
 852
 853       switch (ir->location) {
 854       case SYSTEM_VALUE_VERTEX_ID:
 855          reg->writemask = WRITEMASK_X;
 856          break;
 857       case SYSTEM_VALUE_INSTANCE_ID:
 858          reg->writemask = WRITEMASK_Y;
 859          break;
 860       default:
 861          assert(!"not reached");
 862          break;
 863       }
 864       break;
 865
 866    default:
 867       assert(!"not reached");
 868    }
 869
 870    reg->type = brw_type_for_base_type(ir->type);
 871    hash_table_insert(this->variable_ht, reg, ir);
 872 }
 873
 874 void
 875 vec4_visitor::visit(ir_loop *ir)
 876 {
 877    dst_reg counter;
 878
 879    /* We don't want debugging output to print the whole body of the
 880     * loop as the annotation.
 881     */
 882    this->base_ir = NULL;
 883
 884    if (ir->counter != NULL) {
 885       this->base_ir = ir->counter;
 886       ir->counter->accept(this);
 887       counter = *(variable_storage(ir->counter));
 888
 889       if (ir->from != NULL) {
 890          this->base_ir = ir->from;
 891          ir->from->accept(this);
 892
 893          emit(MOV(counter, this->result));
 894       }
 895    }
 896
 897    emit(BRW_OPCODE_DO);
 898
 899    if (ir->to) {
 900       this->base_ir = ir->to;
 901       ir->to->accept(this);
 902
 903       emit(CMP(dst_null_d(), src_reg(counter), this->result,
 904                brw_conditional_for_comparison(ir->cmp)));
 905
 906       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
 907       inst->predicate = BRW_PREDICATE_NORMAL;
 908    }
 909
 910    visit_instructions(&ir->body_instructions);
 911
 912
 913    if (ir->increment) {
 914       this->base_ir = ir->increment;
 915       ir->increment->accept(this);
 916       emit(ADD(counter, src_reg(counter), this->result));
 917    }
 918
 919    emit(BRW_OPCODE_WHILE);
 920 }
 921
 922 void
 923 vec4_visitor::visit(ir_loop_jump *ir)
 924 {
 925    switch (ir->mode) {
 926    case ir_loop_jump::jump_break:
 927       emit(BRW_OPCODE_BREAK);
 928       break;
 929    case ir_loop_jump::jump_continue:
 930       emit(BRW_OPCODE_CONTINUE);
 931       break;
 932    }
 933 }
 934
 935
 936 void
 937 vec4_visitor::visit(ir_function_signature *ir)
 938 {
 939    assert(0);
 940    (void)ir;
 941 }
 942
 943 void
 944 vec4_visitor::visit(ir_function *ir)
 945 {
 946    /* Ignore function bodies other than main() -- we shouldn't see calls to
 947     * them since they should all be inlined.
 948     */
 949    if (strcmp(ir->name, "main") == 0) {
 950       const ir_function_signature *sig;
 951       exec_list empty;
 952
 953       sig = ir->matching_signature(&empty);
 954
 955       assert(sig);
 956
 957       visit_instructions(&sig->body);
 958    }
 959 }
 960
 961 bool
 962 vec4_visitor::try_emit_sat(ir_expression *ir)
 963 {
 964    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 965    if (!sat_src)
 966       return false;
 967
 968    sat_src->accept(this);
 969    src_reg src = this->result;
 970
 971    this->result = src_reg(this, ir->type);
 972    vec4_instruction *inst;
 973    inst = emit(MOV(dst_reg(this->result), src));
 974    inst->saturate = true;
 975
 976    return true;
 977 }
 978
 979 void
 980 vec4_visitor::emit_bool_comparison(unsigned int op,
 981                                  dst_reg dst, src_reg src0, src_reg src1)
 982 {
 983    /* original gen4 does destination conversion before comparison. */
 984    if (intel->gen < 5)
 985       dst.type = src0.type;
 986
 987    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
 988
 989    dst.type = BRW_REGISTER_TYPE_D;
 990    emit(AND(dst, src_reg(dst), src_reg(0x1)));
 991 }
 992
 993 void
 994 vec4_visitor::visit(ir_expression *ir)
 995 {
 996    unsigned int operand;
 997    src_reg op[Elements(ir->operands)];
 998    src_reg result_src;
 999    dst_reg result_dst;
1000    vec4_instruction *inst;
1001
1002    if (try_emit_sat(ir))
1003       return;
1004
1005    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1006       this->result.file = BAD_FILE;
1007       ir->operands[operand]->accept(this);
1008       if (this->result.file == BAD_FILE) {
1009          printf("Failed to get tree for expression operand:\n");
1010          ir->operands[operand]->print();
1011          exit(1);
1012       }
1013       op[operand] = this->result;
1014
1015       /* Matrix expression operands should have been broken down to vector
1016        * operations already.
1017        */
1018       assert(!ir->operands[operand]->type->is_matrix());
1019    }
1020
1021    int vector_elements = ir->operands[0]->type->vector_elements;
1022    if (ir->operands[1]) {
1023       vector_elements = MAX2(vector_elements,
1024                              ir->operands[1]->type->vector_elements);
1025    }
1026
1027    this->result.file = BAD_FILE;
1028
1029    /* Storage for our result.  Ideally for an assignment we'd be using
1030     * the actual storage for the result here, instead.
1031     */
1032    result_src = src_reg(this, ir->type);
1033    /* convenience for the emit functions below. */
1034    result_dst = dst_reg(result_src);
1035    /* If nothing special happens, this is the result. */
1036    this->result = result_src;
1037    /* Limit writes to the channels that will be used by result_src later.
1038     * This does limit this temp's use as a temporary for multi-instruction
1039     * sequences.
1040     */
1041    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1042
1043    switch (ir->operation) {
1044    case ir_unop_logic_not:
1045       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1046        * ones complement of the whole register, not just bit 0.
1047        */
1048       emit(XOR(result_dst, op[0], src_reg(1)));
1049       break;
1050    case ir_unop_neg:
1051       op[0].negate = !op[0].negate;
1052       this->result = op[0];
1053       break;
1054    case ir_unop_abs:
1055       op[0].abs = true;
1056       op[0].negate = false;
1057       this->result = op[0];
1058       break;
1059
1060    case ir_unop_sign:
1061       emit(MOV(result_dst, src_reg(0.0f)));
1062
1063       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1064       inst = emit(MOV(result_dst, src_reg(1.0f)));
1065       inst->predicate = BRW_PREDICATE_NORMAL;
1066
1067       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1068       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1069       inst->predicate = BRW_PREDICATE_NORMAL;
1070
1071       break;
1072
1073    case ir_unop_rcp:
1074       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1075       break;
1076
1077    case ir_unop_exp2:
1078       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1079       break;
1080    case ir_unop_log2:
1081       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1082       break;
1083    case ir_unop_exp:
1084    case ir_unop_log:
1085       assert(!"not reached: should be handled by ir_explog_to_explog2");
1086       break;
1087    case ir_unop_sin:
1088    case ir_unop_sin_reduced:
1089       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1090       break;
1091    case ir_unop_cos:
1092    case ir_unop_cos_reduced:
1093       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1094       break;
1095
1096    case ir_unop_dFdx:
1097    case ir_unop_dFdy:
1098       assert(!"derivatives not valid in vertex shader");
1099       break;
1100
1101    case ir_unop_noise:
1102       assert(!"not reached: should be handled by lower_noise");
1103       break;
1104
1105    case ir_binop_add:
1106       emit(ADD(result_dst, op[0], op[1]));
1107       break;
1108    case ir_binop_sub:
1109       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1110       break;
1111
1112    case ir_binop_mul:
1113       if (ir->type->is_integer()) {
1114          /* For integer multiplication, the MUL uses the low 16 bits
1115           * of one of the operands (src0 on gen6, src1 on gen7).  The
1116           * MACH accumulates in the contribution of the upper 16 bits
1117           * of that operand.
1118           *
1119           * FINISHME: Emit just the MUL if we know an operand is small
1120           * enough.
1121           */
1122          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1123
1124          emit(MUL(acc, op[0], op[1]));
1125          emit(MACH(dst_null_d(), op[0], op[1]));
1126          emit(MOV(result_dst, src_reg(acc)));
1127       } else {
1128          emit(MUL(result_dst, op[0], op[1]));
1129       }
1130       break;
1131    case ir_binop_div:
1132       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1133       assert(ir->type->is_integer());
1134       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1135       break;
1136    case ir_binop_mod:
1137       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1138       assert(ir->type->is_integer());
1139       emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1140       break;
1141
1142    case ir_binop_less:
1143    case ir_binop_greater:
1144    case ir_binop_lequal:
1145    case ir_binop_gequal:
1146    case ir_binop_equal:
1147    case ir_binop_nequal: {
1148       emit(CMP(result_dst, op[0], op[1],
1149                brw_conditional_for_comparison(ir->operation)));
1150       emit(AND(result_dst, result_src, src_reg(0x1)));
1151       break;
1152    }
1153
1154    case ir_binop_all_equal:
1155       /* "==" operator producing a scalar boolean. */
1156       if (ir->operands[0]->type->is_vector() ||
1157           ir->operands[1]->type->is_vector()) {
1158          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1159          emit(MOV(result_dst, src_reg(0)));
1160          inst = emit(MOV(result_dst, src_reg(1)));
1161          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1162       } else {
1163          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1164          emit(AND(result_dst, result_src, src_reg(0x1)));
1165       }
1166       break;
1167    case ir_binop_any_nequal:
1168       /* "!=" operator producing a scalar boolean. */
1169       if (ir->operands[0]->type->is_vector() ||
1170           ir->operands[1]->type->is_vector()) {
1171          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1172
1173          emit(MOV(result_dst, src_reg(0)));
1174          inst = emit(MOV(result_dst, src_reg(1)));
1175          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1176       } else {
1177          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1178          emit(AND(result_dst, result_src, src_reg(0x1)));
1179       }
1180       break;
1181
1182    case ir_unop_any:
1183       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1184       emit(MOV(result_dst, src_reg(0)));
1185
1186       inst = emit(MOV(result_dst, src_reg(1)));
1187       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1188       break;
1189
1190    case ir_binop_logic_xor:
1191       emit(XOR(result_dst, op[0], op[1]));
1192       break;
1193
1194    case ir_binop_logic_or:
1195       emit(OR(result_dst, op[0], op[1]));
1196       break;
1197
1198    case ir_binop_logic_and:
1199       emit(AND(result_dst, op[0], op[1]));
1200       break;
1201
1202    case ir_binop_dot:
1203       assert(ir->operands[0]->type->is_vector());
1204       assert(ir->operands[0]->type == ir->operands[1]->type);
1205       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1206       break;
1207
1208    case ir_unop_sqrt:
1209       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1210       break;
1211    case ir_unop_rsq:
1212       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1213       break;
1214
1215    case ir_unop_bitcast_i2f:
1216    case ir_unop_bitcast_u2f:
1217       this->result = op[0];
1218       this->result.type = BRW_REGISTER_TYPE_F;
1219       break;
1220
1221    case ir_unop_bitcast_f2i:
1222       this->result = op[0];
1223       this->result.type = BRW_REGISTER_TYPE_D;
1224       break;
1225
1226    case ir_unop_bitcast_f2u:
1227       this->result = op[0];
1228       this->result.type = BRW_REGISTER_TYPE_UD;
1229       break;
1230
1231    case ir_unop_i2f:
1232    case ir_unop_i2u:
1233    case ir_unop_u2i:
1234    case ir_unop_u2f:
1235    case ir_unop_b2f:
1236    case ir_unop_b2i:
1237    case ir_unop_f2i:
1238    case ir_unop_f2u:
1239       emit(MOV(result_dst, op[0]));
1240       break;
1241    case ir_unop_f2b:
1242    case ir_unop_i2b: {
1243       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1244       emit(AND(result_dst, result_src, src_reg(1)));
1245       break;
1246    }
1247
1248    case ir_unop_trunc:
1249       emit(RNDZ(result_dst, op[0]));
1250       break;
1251    case ir_unop_ceil:
1252       op[0].negate = !op[0].negate;
1253       inst = emit(RNDD(result_dst, op[0]));
1254       this->result.negate = true;
1255       break;
1256    case ir_unop_floor:
1257       inst = emit(RNDD(result_dst, op[0]));
1258       break;
1259    case ir_unop_fract:
1260       inst = emit(FRC(result_dst, op[0]));
1261       break;
1262    case ir_unop_round_even:
1263       emit(RNDE(result_dst, op[0]));
1264       break;
1265
1266    case ir_binop_min:
1267       if (intel->gen >= 6) {
1268          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1269          inst->conditional_mod = BRW_CONDITIONAL_L;
1270       } else {
1271          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1272
1273          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1274          inst->predicate = BRW_PREDICATE_NORMAL;
1275       }
1276       break;
1277    case ir_binop_max:
1278       if (intel->gen >= 6) {
1279          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1280          inst->conditional_mod = BRW_CONDITIONAL_G;
1281       } else {
1282          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1283
1284          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1285          inst->predicate = BRW_PREDICATE_NORMAL;
1286       }
1287       break;
1288
1289    case ir_binop_pow:
1290       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1291       break;
1292
1293    case ir_unop_bit_not:
1294       inst = emit(NOT(result_dst, op[0]));
1295       break;
1296    case ir_binop_bit_and:
1297       inst = emit(AND(result_dst, op[0], op[1]));
1298       break;
1299    case ir_binop_bit_xor:
1300       inst = emit(XOR(result_dst, op[0], op[1]));
1301       break;
1302    case ir_binop_bit_or:
1303       inst = emit(OR(result_dst, op[0], op[1]));
1304       break;
1305
1306    case ir_binop_lshift:
1307       inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
1308       break;
1309
1310    case ir_binop_rshift:
1311       if (ir->type->base_type == GLSL_TYPE_INT)
1312          inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
1313       else
1314          inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
1315       break;
1316
1317    case ir_binop_ubo_load:
1318       assert(!"not yet supported");
1319       break;
1320
1321    case ir_quadop_vector:
1322       assert(!"not reached: should be handled by lower_quadop_vector");
1323       break;
1324    }
1325 }
1326
1327
1328 void
1329 vec4_visitor::visit(ir_swizzle *ir)
1330 {
1331    src_reg src;
1332    int i = 0;
1333    int swizzle[4];
1334
1335    /* Note that this is only swizzles in expressions, not those on the left
1336     * hand side of an assignment, which do write masking.  See ir_assignment
1337     * for that.
1338     */
1339
1340    ir->val->accept(this);
1341    src = this->result;
1342    assert(src.file != BAD_FILE);
1343
1344    for (i = 0; i < ir->type->vector_elements; i++) {
1345       switch (i) {
1346       case 0:
1347          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1348          break;
1349       case 1:
1350          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1351          break;
1352       case 2:
1353          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1354          break;
1355       case 3:
1356          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1357             break;
1358       }
1359    }
1360    for (; i < 4; i++) {
1361       /* Replicate the last channel out. */
1362       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1363    }
1364
1365    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1366
1367    this->result = src;
1368 }
1369
1370 void
1371 vec4_visitor::visit(ir_dereference_variable *ir)
1372 {
1373    const struct glsl_type *type = ir->type;
1374    dst_reg *reg = variable_storage(ir->var);
1375
1376    if (!reg) {
1377       fail("Failed to find variable storage for %s\n", ir->var->name);
1378       this->result = src_reg(brw_null_reg());
1379       return;
1380    }
1381
1382    this->result = src_reg(*reg);
1383
1384    /* System values get their swizzle from the dst_reg writemask */
1385    if (ir->var->mode == ir_var_system_value)
1386       return;
1387
1388    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1389       this->result.swizzle = swizzle_for_size(type->vector_elements);
1390 }
1391
1392 void
1393 vec4_visitor::visit(ir_dereference_array *ir)
1394 {
1395    ir_constant *constant_index;
1396    src_reg src;
1397    int element_size = type_size(ir->type);
1398
1399    constant_index = ir->array_index->constant_expression_value();
1400
1401    ir->array->accept(this);
1402    src = this->result;
1403
1404    if (constant_index) {
1405       src.reg_offset += constant_index->value.i[0] * element_size;
1406    } else {
1407       /* Variable index array dereference.  It eats the "vec4" of the
1408        * base of the array and an index that offsets the Mesa register
1409        * index.
1410        */
1411       ir->array_index->accept(this);
1412
1413       src_reg index_reg;
1414
1415       if (element_size == 1) {
1416          index_reg = this->result;
1417       } else {
1418          index_reg = src_reg(this, glsl_type::int_type);
1419
1420          emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1421       }
1422
1423       if (src.reladdr) {
1424          src_reg temp = src_reg(this, glsl_type::int_type);
1425
1426          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1427
1428          index_reg = temp;
1429       }
1430
1431       src.reladdr = ralloc(mem_ctx, src_reg);
1432       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1433    }
1434
1435    /* If the type is smaller than a vec4, replicate the last channel out. */
1436    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1437       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1438    else
1439       src.swizzle = BRW_SWIZZLE_NOOP;
1440    src.type = brw_type_for_base_type(ir->type);
1441
1442    this->result = src;
1443 }
1444
1445 void
1446 vec4_visitor::visit(ir_dereference_record *ir)
1447 {
1448    unsigned int i;
1449    const glsl_type *struct_type = ir->record->type;
1450    int offset = 0;
1451
1452    ir->record->accept(this);
1453
1454    for (i = 0; i < struct_type->length; i++) {
1455       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1456          break;
1457       offset += type_size(struct_type->fields.structure[i].type);
1458    }
1459
1460    /* If the type is smaller than a vec4, replicate the last channel out. */
1461    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1462       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1463    else
1464       this->result.swizzle = BRW_SWIZZLE_NOOP;
1465    this->result.type = brw_type_for_base_type(ir->type);
1466
1467    this->result.reg_offset += offset;
1468 }
1469
1470 /**
1471  * We want to be careful in assignment setup to hit the actual storage
1472  * instead of potentially using a temporary like we might with the
1473  * ir_dereference handler.
1474  */
1475 static dst_reg
1476 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1477 {
1478    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1479     * access of a vector, it must be separated into a series conditional moves
1480     * before reaching this point (see ir_vec_index_to_cond_assign).
1481     */
1482    assert(ir->as_dereference());
1483    ir_dereference_array *deref_array = ir->as_dereference_array();
1484    if (deref_array) {
1485       assert(!deref_array->array->type->is_vector());
1486    }
1487
1488    /* Use the rvalue deref handler for the most part.  We'll ignore
1489     * swizzles in it and write swizzles using writemask, though.
1490     */
1491    ir->accept(v);
1492    return dst_reg(v->result);
1493 }
1494
1495 void
1496 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1497                               const struct glsl_type *type, uint32_t predicate)
1498 {
1499    if (type->base_type == GLSL_TYPE_STRUCT) {
1500       for (unsigned int i = 0; i < type->length; i++) {
1501          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1502       }
1503       return;
1504    }
1505
1506    if (type->is_array()) {
1507       for (unsigned int i = 0; i < type->length; i++) {
1508          emit_block_move(dst, src, type->fields.array, predicate);
1509       }
1510       return;
1511    }
1512
1513    if (type->is_matrix()) {
1514       const struct glsl_type *vec_type;
1515
1516       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1517                                          type->vector_elements, 1);
1518
1519       for (int i = 0; i < type->matrix_columns; i++) {
1520          emit_block_move(dst, src, vec_type, predicate);
1521       }
1522       return;
1523    }
1524
1525    assert(type->is_scalar() || type->is_vector());
1526
1527    dst->type = brw_type_for_base_type(type);
1528    src->type = dst->type;
1529
1530    dst->writemask = (1 << type->vector_elements) - 1;
1531
1532    src->swizzle = swizzle_for_size(type->vector_elements);
1533
1534    vec4_instruction *inst = emit(MOV(*dst, *src));
1535    inst->predicate = predicate;
1536
1537    dst->reg_offset++;
1538    src->reg_offset++;
1539 }
1540
1541
1542 /* If the RHS processing resulted in an instruction generating a
1543  * temporary value, and it would be easy to rewrite the instruction to
1544  * generate its result right into the LHS instead, do so.  This ends
1545  * up reliably removing instructions where it can be tricky to do so
1546  * later without real UD chain information.
1547  */
1548 bool
1549 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1550                                      dst_reg dst,
1551                                      src_reg src,
1552                                      vec4_instruction *pre_rhs_inst,
1553                                      vec4_instruction *last_rhs_inst)
1554 {
1555    /* This could be supported, but it would take more smarts. */
1556    if (ir->condition)
1557       return false;
1558
1559    if (pre_rhs_inst == last_rhs_inst)
1560       return false; /* No instructions generated to work with. */
1561
1562    /* Make sure the last instruction generated our source reg. */
1563    if (src.file != GRF ||
1564        src.file != last_rhs_inst->dst.file ||
1565        src.reg != last_rhs_inst->dst.reg ||
1566        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1567        src.reladdr ||
1568        src.abs ||
1569        src.negate ||
1570        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1571       return false;
1572
1573    /* Check that that last instruction fully initialized the channels
1574     * we want to use, in the order we want to use them.  We could
1575     * potentially reswizzle the operands of many instructions so that
1576     * we could handle out of order channels, but don't yet.
1577     */
1578
1579    for (unsigned i = 0; i < 4; i++) {
1580       if (dst.writemask & (1 << i)) {
1581          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1582             return false;
1583
1584          if (BRW_GET_SWZ(src.swizzle, i) != i)
1585             return false;
1586       }
1587    }
1588
1589    /* Success!  Rewrite the instruction. */
1590    last_rhs_inst->dst.file = dst.file;
1591    last_rhs_inst->dst.reg = dst.reg;
1592    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1593    last_rhs_inst->dst.reladdr = dst.reladdr;
1594    last_rhs_inst->dst.writemask &= dst.writemask;
1595
1596    return true;
1597 }
1598
1599 void
1600 vec4_visitor::visit(ir_assignment *ir)
1601 {
1602    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1603    uint32_t predicate = BRW_PREDICATE_NONE;
1604
1605    if (!ir->lhs->type->is_scalar() &&
1606        !ir->lhs->type->is_vector()) {
1607       ir->rhs->accept(this);
1608       src_reg src = this->result;
1609
1610       if (ir->condition) {
1611          emit_bool_to_cond_code(ir->condition, &predicate);
1612       }
1613
1614       /* emit_block_move doesn't account for swizzles in the source register.
1615        * This should be ok, since the source register is a structure or an
1616        * array, and those can't be swizzled.  But double-check to be sure.
1617        */
1618       assert(src.swizzle ==
1619              (ir->rhs->type->is_matrix()
1620               ? swizzle_for_size(ir->rhs->type->vector_elements)
1621               : BRW_SWIZZLE_NOOP));
1622
1623       emit_block_move(&dst, &src, ir->rhs->type, predicate);
1624       return;
1625    }
1626
1627    /* Now we're down to just a scalar/vector with writemasks. */
1628    int i;
1629
1630    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1631    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1632
1633    ir->rhs->accept(this);
1634
1635    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1636
1637    src_reg src = this->result;
1638
1639    int swizzles[4];
1640    int first_enabled_chan = 0;
1641    int src_chan = 0;
1642
1643    assert(ir->lhs->type->is_vector() ||
1644           ir->lhs->type->is_scalar());
1645    dst.writemask = ir->write_mask;
1646
1647    for (int i = 0; i < 4; i++) {
1648       if (dst.writemask & (1 << i)) {
1649          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1650          break;
1651       }
1652    }
1653
1654    /* Swizzle a small RHS vector into the channels being written.
1655     *
1656     * glsl ir treats write_mask as dictating how many channels are
1657     * present on the RHS while in our instructions we need to make
1658     * those channels appear in the slots of the vec4 they're written to.
1659     */
1660    for (int i = 0; i < 4; i++) {
1661       if (dst.writemask & (1 << i))
1662          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1663       else
1664          swizzles[i] = first_enabled_chan;
1665    }
1666    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1667                               swizzles[2], swizzles[3]);
1668
1669    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1670       return;
1671    }
1672
1673    if (ir->condition) {
1674       emit_bool_to_cond_code(ir->condition, &predicate);
1675    }
1676
1677    for (i = 0; i < type_size(ir->lhs->type); i++) {
1678       vec4_instruction *inst = emit(MOV(dst, src));
1679       inst->predicate = predicate;
1680
1681       dst.reg_offset++;
1682       src.reg_offset++;
1683    }
1684 }
1685
1686 void
1687 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1688 {
1689    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1690       foreach_list(node, &ir->components) {
1691          ir_constant *field_value = (ir_constant *)node;
1692
1693          emit_constant_values(dst, field_value);
1694       }
1695       return;
1696    }
1697
1698    if (ir->type->is_array()) {
1699       for (unsigned int i = 0; i < ir->type->length; i++) {
1700          emit_constant_values(dst, ir->array_elements[i]);
1701       }
1702       return;
1703    }
1704
1705    if (ir->type->is_matrix()) {
1706       for (int i = 0; i < ir->type->matrix_columns; i++) {
1707          float *vec = &ir->value.f[i * ir->type->vector_elements];
1708
1709          for (int j = 0; j < ir->type->vector_elements; j++) {
1710             dst->writemask = 1 << j;
1711             dst->type = BRW_REGISTER_TYPE_F;
1712
1713             emit(MOV(*dst, src_reg(vec[j])));
1714          }
1715          dst->reg_offset++;
1716       }
1717       return;
1718    }
1719
1720    int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1721
1722    for (int i = 0; i < ir->type->vector_elements; i++) {
1723       if (!(remaining_writemask & (1 << i)))
1724          continue;
1725
1726       dst->writemask = 1 << i;
1727       dst->type = brw_type_for_base_type(ir->type);
1728
1729       /* Find other components that match the one we're about to
1730        * write.  Emits fewer instructions for things like vec4(0.5,
1731        * 1.5, 1.5, 1.5).
1732        */
1733       for (int j = i + 1; j < ir->type->vector_elements; j++) {
1734          if (ir->type->base_type == GLSL_TYPE_BOOL) {
1735             if (ir->value.b[i] == ir->value.b[j])
1736                dst->writemask |= (1 << j);
1737          } else {
1738             /* u, i, and f storage all line up, so no need for a
1739              * switch case for comparing each type.
1740              */
1741             if (ir->value.u[i] == ir->value.u[j])
1742                dst->writemask |= (1 << j);
1743          }
1744       }
1745
1746       switch (ir->type->base_type) {
1747       case GLSL_TYPE_FLOAT:
1748          emit(MOV(*dst, src_reg(ir->value.f[i])));
1749          break;
1750       case GLSL_TYPE_INT:
1751          emit(MOV(*dst, src_reg(ir->value.i[i])));
1752          break;
1753       case GLSL_TYPE_UINT:
1754          emit(MOV(*dst, src_reg(ir->value.u[i])));
1755          break;
1756       case GLSL_TYPE_BOOL:
1757          emit(MOV(*dst, src_reg(ir->value.b[i])));
1758          break;
1759       default:
1760          assert(!"Non-float/uint/int/bool constant");
1761          break;
1762       }
1763
1764       remaining_writemask &= ~dst->writemask;
1765    }
1766    dst->reg_offset++;
1767 }
1768
1769 void
1770 vec4_visitor::visit(ir_constant *ir)
1771 {
1772    dst_reg dst = dst_reg(this, ir->type);
1773    this->result = src_reg(dst);
1774
1775    emit_constant_values(&dst, ir);
1776 }
1777
1778 void
1779 vec4_visitor::visit(ir_call *ir)
1780 {
1781    assert(!"not reached");
1782 }
1783
1784 void
1785 vec4_visitor::visit(ir_texture *ir)
1786 {
1787    int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1788    sampler = vp->Base.SamplerUnits[sampler];
1789
1790    /* Should be lowered by do_lower_texture_projection */
1791    assert(!ir->projector);
1792
1793    /* Generate code to compute all the subexpression trees.  This has to be
1794     * done before loading any values into MRFs for the sampler message since
1795     * generating these values may involve SEND messages that need the MRFs.
1796     */
1797    src_reg coordinate;
1798    if (ir->coordinate) {
1799       ir->coordinate->accept(this);
1800       coordinate = this->result;
1801    }
1802
1803    src_reg shadow_comparitor;
1804    if (ir->shadow_comparitor) {
1805       ir->shadow_comparitor->accept(this);
1806       shadow_comparitor = this->result;
1807    }
1808
1809    src_reg lod, dPdx, dPdy;
1810    switch (ir->op) {
1811    case ir_txf:
1812    case ir_txl:
1813    case ir_txs:
1814       ir->lod_info.lod->accept(this);
1815       lod = this->result;
1816       break;
1817    case ir_txd:
1818       ir->lod_info.grad.dPdx->accept(this);
1819       dPdx = this->result;
1820
1821       ir->lod_info.grad.dPdy->accept(this);
1822       dPdy = this->result;
1823       break;
1824    case ir_tex:
1825    case ir_txb:
1826       break;
1827    }
1828
1829    vec4_instruction *inst = NULL;
1830    switch (ir->op) {
1831    case ir_tex:
1832    case ir_txl:
1833       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1834       break;
1835    case ir_txd:
1836       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1837       break;
1838    case ir_txf:
1839       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1840       break;
1841    case ir_txs:
1842       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1843       break;
1844    case ir_txb:
1845       assert(!"TXB is not valid for vertex shaders.");
1846    }
1847
1848    /* Texel offsets go in the message header; Gen4 also requires headers. */
1849    inst->header_present = ir->offset || intel->gen < 5;
1850    inst->base_mrf = 2;
1851    inst->mlen = inst->header_present + 1; /* always at least one */
1852    inst->sampler = sampler;
1853    inst->dst = dst_reg(this, ir->type);
1854    inst->shadow_compare = ir->shadow_comparitor != NULL;
1855
1856    if (ir->offset != NULL && ir->op != ir_txf)
1857       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1858
1859    /* MRF for the first parameter */
1860    int param_base = inst->base_mrf + inst->header_present;
1861
1862    if (ir->op == ir_txs) {
1863       int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1864       emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1865            lod));
1866    } else {
1867       int i, coord_mask = 0, zero_mask = 0;
1868       /* Load the coordinate */
1869       /* FINISHME: gl_clamp_mask and saturate */
1870       for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1871          coord_mask |= (1 << i);
1872       for (; i < 4; i++)
1873          zero_mask |= (1 << i);
1874
1875       if (ir->offset && ir->op == ir_txf) {
1876          /* It appears that the ld instruction used for txf does its
1877           * address bounds check before adding in the offset.  To work
1878           * around this, just add the integer offset to the integer
1879           * texel coordinate, and don't put the offset in the header.
1880           */
1881          ir_constant *offset = ir->offset->as_constant();
1882          assert(offset);
1883
1884          for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
1885             src_reg src = coordinate;
1886             src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
1887                                        BRW_GET_SWZ(src.swizzle, j),
1888                                        BRW_GET_SWZ(src.swizzle, j),
1889                                        BRW_GET_SWZ(src.swizzle, j));
1890             emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
1891                      src, offset->value.i[j]));
1892          }
1893       } else {
1894          emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1895                   coordinate));
1896       }
1897       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1898                src_reg(0)));
1899       /* Load the shadow comparitor */
1900       if (ir->shadow_comparitor) {
1901          emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1902                           WRITEMASK_X),
1903                   shadow_comparitor));
1904          inst->mlen++;
1905       }
1906
1907       /* Load the LOD info */
1908       if (ir->op == ir_txl) {
1909          int mrf, writemask;
1910          if (intel->gen >= 5) {
1911             mrf = param_base + 1;
1912             if (ir->shadow_comparitor) {
1913                writemask = WRITEMASK_Y;
1914                /* mlen already incremented */
1915             } else {
1916                writemask = WRITEMASK_X;
1917                inst->mlen++;
1918             }
1919          } else /* intel->gen == 4 */ {
1920             mrf = param_base;
1921             writemask = WRITEMASK_Z;
1922          }
1923          emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask), lod));
1924       } else if (ir->op == ir_txf) {
1925          emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1926                   lod));
1927       } else if (ir->op == ir_txd) {
1928          const glsl_type *type = ir->lod_info.grad.dPdx->type;
1929
1930          if (intel->gen >= 5) {
1931             dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1932             dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1933             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1934             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1935             inst->mlen++;
1936
1937             if (ir->type->vector_elements == 3) {
1938                dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
1939                dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
1940                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
1941                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
1942                inst->mlen++;
1943             }
1944          } else /* intel->gen == 4 */ {
1945             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
1946             emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
1947             inst->mlen += 2;
1948          }
1949       }
1950    }
1951
1952    emit(inst);
1953
1954    swizzle_result(ir, src_reg(inst->dst), sampler);
1955 }
1956
1957 void
1958 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
1959 {
1960    this->result = orig_val;
1961
1962    int s = c->key.tex.swizzles[sampler];
1963
1964    if (ir->op == ir_txs || ir->type == glsl_type::float_type
1965                         || s == SWIZZLE_NOOP)
1966       return;
1967
1968    int zero_mask = 0, one_mask = 0, copy_mask = 0;
1969    int swizzle[4];
1970
1971    for (int i = 0; i < 4; i++) {
1972       switch (GET_SWZ(s, i)) {
1973       case SWIZZLE_ZERO:
1974          zero_mask |= (1 << i);
1975          break;
1976       case SWIZZLE_ONE:
1977          one_mask |= (1 << i);
1978          break;
1979       default:
1980          copy_mask |= (1 << i);
1981          swizzle[i] = GET_SWZ(s, i);
1982          break;
1983       }
1984    }
1985
1986    this->result = src_reg(this, ir->type);
1987    dst_reg swizzled_result(this->result);
1988
1989    if (copy_mask) {
1990       orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1991       swizzled_result.writemask = copy_mask;
1992       emit(MOV(swizzled_result, orig_val));
1993    }
1994
1995    if (zero_mask) {
1996       swizzled_result.writemask = zero_mask;
1997       emit(MOV(swizzled_result, src_reg(0.0f)));
1998    }
1999
2000    if (one_mask) {
2001       swizzled_result.writemask = one_mask;
2002       emit(MOV(swizzled_result, src_reg(1.0f)));
2003    }
2004 }
2005
2006 void
2007 vec4_visitor::visit(ir_return *ir)
2008 {
2009    assert(!"not reached");
2010 }
2011
2012 void
2013 vec4_visitor::visit(ir_discard *ir)
2014 {
2015    assert(!"not reached");
2016 }
2017
2018 void
2019 vec4_visitor::visit(ir_if *ir)
2020 {
2021    /* Don't point the annotation at the if statement, because then it plus
2022     * the then and else blocks get printed.
2023     */
2024    this->base_ir = ir->condition;
2025
2026    if (intel->gen == 6) {
2027       emit_if_gen6(ir);
2028    } else {
2029       uint32_t predicate;
2030       emit_bool_to_cond_code(ir->condition, &predicate);
2031       emit(IF(predicate));
2032    }
2033
2034    visit_instructions(&ir->then_instructions);
2035
2036    if (!ir->else_instructions.is_empty()) {
2037       this->base_ir = ir->condition;
2038       emit(BRW_OPCODE_ELSE);
2039
2040       visit_instructions(&ir->else_instructions);
2041    }
2042
2043    this->base_ir = ir->condition;
2044    emit(BRW_OPCODE_ENDIF);
2045 }
2046
2047 void
2048 vec4_visitor::emit_ndc_computation()
2049 {
2050    /* Get the position */
2051    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2052
2053    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2054    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2055    output_reg[BRW_VERT_RESULT_NDC] = ndc;
2056
2057    current_annotation = "NDC";
2058    dst_reg ndc_w = ndc;
2059    ndc_w.writemask = WRITEMASK_W;
2060    src_reg pos_w = pos;
2061    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2062    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2063
2064    dst_reg ndc_xyz = ndc;
2065    ndc_xyz.writemask = WRITEMASK_XYZ;
2066
2067    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2068 }
2069
2070 void
2071 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2072 {
2073    if (intel->gen < 6 &&
2074        ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2075         c->key.userclip_active || brw->has_negative_rhw_bug)) {
2076       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2077       dst_reg header1_w = header1;
2078       header1_w.writemask = WRITEMASK_W;
2079       GLuint i;
2080
2081       emit(MOV(header1, 0u));
2082
2083       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2084          src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2085
2086          current_annotation = "Point size";
2087          emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2088          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2089       }
2090
2091       current_annotation = "Clipping flags";
2092       for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2093          vec4_instruction *inst;
2094
2095          inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2096                          src_reg(this->userplane[i])));
2097          inst->conditional_mod = BRW_CONDITIONAL_L;
2098
2099          inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2100          inst->predicate = BRW_PREDICATE_NORMAL;
2101       }
2102
2103       /* i965 clipping workaround:
2104        * 1) Test for -ve rhw
2105        * 2) If set,
2106        *      set ndc = (0,0,0,0)
2107        *      set ucp[6] = 1
2108        *
2109        * Later, clipping will detect ucp[6] and ensure the primitive is
2110        * clipped against all fixed planes.
2111        */
2112       if (brw->has_negative_rhw_bug) {
2113 #if 0
2114          /* FINISHME */
2115          brw_CMP(p,
2116                  vec8(brw_null_reg()),
2117                  BRW_CONDITIONAL_L,
2118                  brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2119                  brw_imm_f(0));
2120
2121          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2122          brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2123          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2124 #endif
2125       }
2126
2127       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2128    } else if (intel->gen < 6) {
2129       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2130    } else {
2131       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2132       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2133          emit(MOV(brw_writemask(reg, WRITEMASK_W),
2134                   src_reg(output_reg[VERT_RESULT_PSIZ])));
2135       }
2136    }
2137 }
2138
2139 void
2140 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2141 {
2142    if (intel->gen < 6) {
2143       /* Clip distance slots are set aside in gen5, but they are not used.  It
2144        * is not clear whether we actually need to set aside space for them,
2145        * but the performance cost is negligible.
2146        */
2147       return;
2148    }
2149
2150    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2151     *
2152     *     "If a linked set of shaders forming the vertex stage contains no
2153     *     static write to gl_ClipVertex or gl_ClipDistance, but the
2154     *     application has requested clipping against user clip planes through
2155     *     the API, then the coordinate written to gl_Position is used for
2156     *     comparison against the user clip planes."
2157     *
2158     * This function is only called if the shader didn't write to
2159     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
2160     * if the user wrote to it; otherwise we use gl_Position.
2161     */
2162    gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2163    if (!(c->prog_data.outputs_written
2164          & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2165       clip_vertex = VERT_RESULT_HPOS;
2166    }
2167
2168    for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2169         ++i) {
2170       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2171                src_reg(output_reg[clip_vertex]),
2172                src_reg(this->userplane[i + offset])));
2173    }
2174 }
2175
2176 void
2177 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2178 {
2179    assert (vert_result < VERT_RESULT_MAX);
2180    reg.type = output_reg[vert_result].type;
2181    current_annotation = output_reg_annotation[vert_result];
2182    /* Copy the register, saturating if necessary */
2183    vec4_instruction *inst = emit(MOV(reg,
2184                                      src_reg(output_reg[vert_result])));
2185    if ((vert_result == VERT_RESULT_COL0 ||
2186         vert_result == VERT_RESULT_COL1 ||
2187         vert_result == VERT_RESULT_BFC0 ||
2188         vert_result == VERT_RESULT_BFC1) &&
2189        c->key.clamp_vertex_color) {
2190       inst->saturate = true;
2191    }
2192 }
2193
2194 void
2195 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2196 {
2197    struct brw_reg hw_reg = brw_message_reg(mrf);
2198    dst_reg reg = dst_reg(MRF, mrf);
2199    reg.type = BRW_REGISTER_TYPE_F;
2200
2201    switch (vert_result) {
2202    case VERT_RESULT_PSIZ:
2203       /* PSIZ is always in slot 0, and is coupled with other flags. */
2204       current_annotation = "indices, point width, clip flags";
2205       emit_psiz_and_flags(hw_reg);
2206       break;
2207    case BRW_VERT_RESULT_NDC:
2208       current_annotation = "NDC";
2209       emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2210       break;
2211    case BRW_VERT_RESULT_HPOS_DUPLICATE:
2212    case VERT_RESULT_HPOS:
2213       current_annotation = "gl_Position";
2214       emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2215       break;
2216    case VERT_RESULT_CLIP_DIST0:
2217    case VERT_RESULT_CLIP_DIST1:
2218       if (this->c->key.uses_clip_distance) {
2219          emit_generic_urb_slot(reg, vert_result);
2220       } else {
2221          current_annotation = "user clip distances";
2222          emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2223       }
2224       break;
2225    case BRW_VERT_RESULT_PAD:
2226       /* No need to write to this slot */
2227       break;
2228    default:
2229       emit_generic_urb_slot(reg, vert_result);
2230       break;
2231    }
2232 }
2233
2234 static int
2235 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2236 {
2237    struct intel_context *intel = &brw->intel;
2238
2239    if (intel->gen >= 6) {
2240       /* URB data written (does not include the message header reg) must
2241        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
2242        * section 5.4.3.2.2: URB_INTERLEAVED.
2243        *
2244        * URB entries are allocated on a multiple of 1024 bits, so an
2245        * extra 128 bits written here to make the end align to 256 is
2246        * no problem.
2247        */
2248       if ((mlen % 2) != 1)
2249          mlen++;
2250    }
2251
2252    return mlen;
2253 }
2254
2255 /**
2256  * Generates the VUE payload plus the 1 or 2 URB write instructions to
2257  * complete the VS thread.
2258  *
2259  * The VUE layout is documented in Volume 2a.
2260  */
2261 void
2262 vec4_visitor::emit_urb_writes()
2263 {
2264    /* MRF 0 is reserved for the debugger, so start with message header
2265     * in MRF 1.
2266     */
2267    int base_mrf = 1;
2268    int mrf = base_mrf;
2269    /* In the process of generating our URB write message contents, we
2270     * may need to unspill a register or load from an array.  Those
2271     * reads would use MRFs 14-15.
2272     */
2273    int max_usable_mrf = 13;
2274
2275    /* The following assertion verifies that max_usable_mrf causes an
2276     * even-numbered amount of URB write data, which will meet gen6's
2277     * requirements for length alignment.
2278     */
2279    assert ((max_usable_mrf - base_mrf) % 2 == 0);
2280
2281    /* FINISHME: edgeflag */
2282
2283    /* First mrf is the g0-based message header containing URB handles and such,
2284     * which is implied in VS_OPCODE_URB_WRITE.
2285     */
2286    mrf++;
2287
2288    if (intel->gen < 6) {
2289       emit_ndc_computation();
2290    }
2291
2292    /* Set up the VUE data for the first URB write */
2293    int slot;
2294    for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) {
2295       emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2296
2297       /* If this was max_usable_mrf, we can't fit anything more into this URB
2298        * WRITE.
2299        */
2300       if (mrf > max_usable_mrf) {
2301          slot++;
2302          break;
2303       }
2304    }
2305
2306    current_annotation = "URB write";
2307    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2308    inst->base_mrf = base_mrf;
2309    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2310    inst->eot = (slot >= c->prog_data.vue_map.num_slots);
2311
2312    /* Optional second URB write */
2313    if (!inst->eot) {
2314       mrf = base_mrf + 1;
2315
2316       for (; slot < c->prog_data.vue_map.num_slots; ++slot) {
2317          assert(mrf < max_usable_mrf);
2318
2319          emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2320       }
2321
2322       current_annotation = "URB write";
2323       inst = emit(VS_OPCODE_URB_WRITE);
2324       inst->base_mrf = base_mrf;
2325       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2326       inst->eot = true;
2327       /* URB destination offset.  In the previous write, we got MRFs
2328        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
2329        * URB row increments, and each of our MRFs is half of one of
2330        * those, since we're doing interleaved writes.
2331        */
2332       inst->offset = (max_usable_mrf - base_mrf) / 2;
2333    }
2334 }
2335
2336 src_reg
2337 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2338                                  src_reg *reladdr, int reg_offset)
2339 {
2340    /* Because we store the values to scratch interleaved like our
2341     * vertex data, we need to scale the vec4 index by 2.
2342     */
2343    int message_header_scale = 2;
2344
2345    /* Pre-gen6, the message header uses byte offsets instead of vec4
2346     * (16-byte) offset units.
2347     */
2348    if (intel->gen < 6)
2349       message_header_scale *= 16;
2350
2351    if (reladdr) {
2352       src_reg index = src_reg(this, glsl_type::int_type);
2353
2354       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2355       emit_before(inst, MUL(dst_reg(index),
2356                             index, src_reg(message_header_scale)));
2357
2358       return index;
2359    } else {
2360       return src_reg(reg_offset * message_header_scale);
2361    }
2362 }
2363
2364 src_reg
2365 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2366                                        src_reg *reladdr, int reg_offset)
2367 {
2368    if (reladdr) {
2369       src_reg index = src_reg(this, glsl_type::int_type);
2370
2371       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2372
2373       /* Pre-gen6, the message header uses byte offsets instead of vec4
2374        * (16-byte) offset units.
2375        */
2376       if (intel->gen < 6) {
2377          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2378       }
2379
2380       return index;
2381    } else {
2382       int message_header_scale = intel->gen < 6 ? 16 : 1;
2383       return src_reg(reg_offset * message_header_scale);
2384    }
2385 }
2386
2387 /**
2388  * Emits an instruction before @inst to load the value named by @orig_src
2389  * from scratch space at @base_offset to @temp.
2390  */
2391 void
2392 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2393                                 dst_reg temp, src_reg orig_src,
2394                                 int base_offset)
2395 {
2396    int reg_offset = base_offset + orig_src.reg_offset;
2397    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2398
2399    emit_before(inst, SCRATCH_READ(temp, index));
2400 }
2401
2402 /**
2403  * Emits an instruction after @inst to store the value to be written
2404  * to @orig_dst to scratch space at @base_offset, from @temp.
2405  */
2406 void
2407 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2408                                  src_reg temp, dst_reg orig_dst,
2409                                  int base_offset)
2410 {
2411    int reg_offset = base_offset + orig_dst.reg_offset;
2412    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2413
2414    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2415                                        orig_dst.writemask));
2416    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2417    write->predicate = inst->predicate;
2418    write->ir = inst->ir;
2419    write->annotation = inst->annotation;
2420    inst->insert_after(write);
2421 }
2422
2423 /**
2424  * We can't generally support array access in GRF space, because a
2425  * single instruction's destination can only span 2 contiguous
2426  * registers.  So, we send all GRF arrays that get variable index
2427  * access to scratch space.
2428  */
2429 void
2430 vec4_visitor::move_grf_array_access_to_scratch()
2431 {
2432    int scratch_loc[this->virtual_grf_count];
2433
2434    for (int i = 0; i < this->virtual_grf_count; i++) {
2435       scratch_loc[i] = -1;
2436    }
2437
2438    /* First, calculate the set of virtual GRFs that need to be punted
2439     * to scratch due to having any array access on them, and where in
2440     * scratch.
2441     */
2442    foreach_list(node, &this->instructions) {
2443       vec4_instruction *inst = (vec4_instruction *)node;
2444
2445       if (inst->dst.file == GRF && inst->dst.reladdr &&
2446           scratch_loc[inst->dst.reg] == -1) {
2447          scratch_loc[inst->dst.reg] = c->last_scratch;
2448          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2449       }
2450
2451       for (int i = 0 ; i < 3; i++) {
2452          src_reg *src = &inst->src[i];
2453
2454          if (src->file == GRF && src->reladdr &&
2455              scratch_loc[src->reg] == -1) {
2456             scratch_loc[src->reg] = c->last_scratch;
2457             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2458          }
2459       }
2460    }
2461
2462    /* Now, for anything that will be accessed through scratch, rewrite
2463     * it to load/store.  Note that this is a _safe list walk, because
2464     * we may generate a new scratch_write instruction after the one
2465     * we're processing.
2466     */
2467    foreach_list_safe(node, &this->instructions) {
2468       vec4_instruction *inst = (vec4_instruction *)node;
2469
2470       /* Set up the annotation tracking for new generated instructions. */
2471       base_ir = inst->ir;
2472       current_annotation = inst->annotation;
2473
2474       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2475          src_reg temp = src_reg(this, glsl_type::vec4_type);
2476
2477          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2478
2479          inst->dst.file = temp.file;
2480          inst->dst.reg = temp.reg;
2481          inst->dst.reg_offset = temp.reg_offset;
2482          inst->dst.reladdr = NULL;
2483       }
2484
2485       for (int i = 0 ; i < 3; i++) {
2486          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2487             continue;
2488
2489          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2490
2491          emit_scratch_read(inst, temp, inst->src[i],
2492                            scratch_loc[inst->src[i].reg]);
2493
2494          inst->src[i].file = temp.file;
2495          inst->src[i].reg = temp.reg;
2496          inst->src[i].reg_offset = temp.reg_offset;
2497          inst->src[i].reladdr = NULL;
2498       }
2499    }
2500 }
2501
2502 /**
2503  * Emits an instruction before @inst to load the value named by @orig_src
2504  * from the pull constant buffer (surface) at @base_offset to @temp.
2505  */
2506 void
2507 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2508                                       dst_reg temp, src_reg orig_src,
2509                                       int base_offset)
2510 {
2511    int reg_offset = base_offset + orig_src.reg_offset;
2512    src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2513    vec4_instruction *load;
2514
2515    load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2516                                         temp, index);
2517    load->base_mrf = 14;
2518    load->mlen = 1;
2519    emit_before(inst, load);
2520 }
2521
2522 /**
2523  * Implements array access of uniforms by inserting a
2524  * PULL_CONSTANT_LOAD instruction.
2525  *
2526  * Unlike temporary GRF array access (where we don't support it due to
2527  * the difficulty of doing relative addressing on instruction
2528  * destinations), we could potentially do array access of uniforms
2529  * that were loaded in GRF space as push constants.  In real-world
2530  * usage we've seen, though, the arrays being used are always larger
2531  * than we could load as push constants, so just always move all
2532  * uniform array access out to a pull constant buffer.
2533  */
2534 void
2535 vec4_visitor::move_uniform_array_access_to_pull_constants()
2536 {
2537    int pull_constant_loc[this->uniforms];
2538
2539    for (int i = 0; i < this->uniforms; i++) {
2540       pull_constant_loc[i] = -1;
2541    }
2542
2543    /* Walk through and find array access of uniforms.  Put a copy of that
2544     * uniform in the pull constant buffer.
2545     *
2546     * Note that we don't move constant-indexed accesses to arrays.  No
2547     * testing has been done of the performance impact of this choice.
2548     */
2549    foreach_list_safe(node, &this->instructions) {
2550       vec4_instruction *inst = (vec4_instruction *)node;
2551
2552       for (int i = 0 ; i < 3; i++) {
2553          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2554             continue;
2555
2556          int uniform = inst->src[i].reg;
2557
2558          /* If this array isn't already present in the pull constant buffer,
2559           * add it.
2560           */
2561          if (pull_constant_loc[uniform] == -1) {
2562             const float **values = &prog_data->param[uniform * 4];
2563
2564             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2565
2566             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2567                prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2568             }
2569          }
2570
2571          /* Set up the annotation tracking for new generated instructions. */
2572          base_ir = inst->ir;
2573          current_annotation = inst->annotation;
2574
2575          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2576
2577          emit_pull_constant_load(inst, temp, inst->src[i],
2578                                  pull_constant_loc[uniform]);
2579
2580          inst->src[i].file = temp.file;
2581          inst->src[i].reg = temp.reg;
2582          inst->src[i].reg_offset = temp.reg_offset;
2583          inst->src[i].reladdr = NULL;
2584       }
2585    }
2586
2587    /* Now there are no accesses of the UNIFORM file with a reladdr, so
2588     * no need to track them as larger-than-vec4 objects.  This will be
2589     * relied on in cutting out unused uniform vectors from push
2590     * constants.
2591     */
2592    split_uniform_registers();
2593 }
2594
2595 void
2596 vec4_visitor::resolve_ud_negate(src_reg *reg)
2597 {
2598    if (reg->type != BRW_REGISTER_TYPE_UD ||
2599        !reg->negate)
2600       return;
2601
2602    src_reg temp = src_reg(this, glsl_type::uvec4_type);
2603    emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2604    *reg = temp;
2605 }
2606
2607 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2608                            struct gl_shader_program *prog,
2609                            struct brw_shader *shader)
2610 {
2611    this->c = c;
2612    this->p = &c->func;
2613    this->brw = p->brw;
2614    this->intel = &brw->intel;
2615    this->ctx = &intel->ctx;
2616    this->prog = prog;
2617    this->shader = shader;
2618
2619    this->mem_ctx = ralloc_context(NULL);
2620    this->failed = false;
2621
2622    this->base_ir = NULL;
2623    this->current_annotation = NULL;
2624
2625    this->c = c;
2626    this->vp = (struct gl_vertex_program *)
2627      prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
2628    this->prog_data = &c->prog_data;
2629
2630    this->variable_ht = hash_table_ctor(0,
2631                                        hash_table_pointer_hash,
2632                                        hash_table_pointer_compare);
2633
2634    this->virtual_grf_def = NULL;
2635    this->virtual_grf_use = NULL;
2636    this->virtual_grf_sizes = NULL;
2637    this->virtual_grf_count = 0;
2638    this->virtual_grf_reg_map = NULL;
2639    this->virtual_grf_reg_count = 0;
2640    this->virtual_grf_array_size = 0;
2641    this->live_intervals_valid = false;
2642
2643    this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2644
2645    this->uniforms = 0;
2646 }
2647
2648 vec4_visitor::~vec4_visitor()
2649 {
2650    ralloc_free(this->mem_ctx);
2651    hash_table_dtor(this->variable_ht);
2652 }
2653
2654
2655 void
2656 vec4_visitor::fail(const char *format, ...)
2657 {
2658    va_list va;
2659    char *msg;
2660
2661    if (failed)
2662       return;
2663
2664    failed = true;
2665
2666    va_start(va, format);
2667    msg = ralloc_vasprintf(mem_ctx, format, va);
2668    va_end(va);
2669    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2670
2671    this->fail_msg = msg;
2672
2673    if (INTEL_DEBUG & DEBUG_VS) {
2674       fprintf(stderr, "%s",  msg);
2675    }
2676 }
2677
2678 } /* namespace brw */