src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 #include "program/sampler.h"
  29 }
  30
  31 namespace brw {
  32
  33 src_reg::src_reg(dst_reg reg)
  34 {
  35    init();
  36
  37    this->file = reg.file;
  38    this->reg = reg.reg;
  39    this->reg_offset = reg.reg_offset;
  40    this->type = reg.type;
  41    this->reladdr = reg.reladdr;
  42    this->fixed_hw_reg = reg.fixed_hw_reg;
  43
  44    int swizzles[4];
  45    int next_chan = 0;
  46    int last = 0;
  47
  48    for (int i = 0; i < 4; i++) {
  49       if (!(reg.writemask & (1 << i)))
  50          continue;
  51
  52       swizzles[next_chan++] = last = i;
  53    }
  54
  55    for (; next_chan < 4; next_chan++) {
  56       swizzles[next_chan] = last;
  57    }
  58
  59    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  60                                 swizzles[2], swizzles[3]);
  61 }
  62
  63 dst_reg::dst_reg(src_reg reg)
  64 {
  65    init();
  66
  67    this->file = reg.file;
  68    this->reg = reg.reg;
  69    this->reg_offset = reg.reg_offset;
  70    this->type = reg.type;
  71    this->writemask = WRITEMASK_XYZW;
  72    this->reladdr = reg.reladdr;
  73    this->fixed_hw_reg = reg.fixed_hw_reg;
  74 }
  75
  76 vec4_instruction::vec4_instruction(vec4_visitor *v,
  77                                    enum opcode opcode, dst_reg dst,
  78                                    src_reg src0, src_reg src1, src_reg src2)
  79 {
  80    this->opcode = opcode;
  81    this->dst = dst;
  82    this->src[0] = src0;
  83    this->src[1] = src1;
  84    this->src[2] = src2;
  85    this->ir = v->base_ir;
  86    this->annotation = v->current_annotation;
  87 }
  88
  89 vec4_instruction *
  90 vec4_visitor::emit(vec4_instruction *inst)
  91 {
  92    this->instructions.push_tail(inst);
  93
  94    return inst;
  95 }
  96
  97 vec4_instruction *
  98 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  99 {
 100    new_inst->ir = inst->ir;
 101    new_inst->annotation = inst->annotation;
 102
 103    inst->insert_before(new_inst);
 104
 105    return inst;
 106 }
 107
 108 vec4_instruction *
 109 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
 110                    src_reg src0, src_reg src1, src_reg src2)
 111 {
 112    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
 113                                              src0, src1, src2));
 114 }
 115
 116
 117 vec4_instruction *
 118 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
 119 {
 120    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
 121 }
 122
 123 vec4_instruction *
 124 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 125 {
 126    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
 127 }
 128
 129 vec4_instruction *
 130 vec4_visitor::emit(enum opcode opcode)
 131 {
 132    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
 133 }
 134
 135 #define ALU1(op)                                                        \
 136    vec4_instruction *                                                   \
 137    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
 138    {                                                                    \
 139       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 140                                            src0);                       \
 141    }
 142
 143 #define ALU2(op)                                                        \
 144    vec4_instruction *                                                   \
 145    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 146    {                                                                    \
 147       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 148                                            src0, src1);                 \
 149    }
 150
 151 ALU1(NOT)
 152 ALU1(MOV)
 153 ALU1(FRC)
 154 ALU1(RNDD)
 155 ALU1(RNDE)
 156 ALU1(RNDZ)
 157 ALU2(ADD)
 158 ALU2(MUL)
 159 ALU2(MACH)
 160 ALU2(AND)
 161 ALU2(OR)
 162 ALU2(XOR)
 163 ALU2(DP3)
 164 ALU2(DP4)
 165
 166 /** Gen4 predicated IF. */
 167 vec4_instruction *
 168 vec4_visitor::IF(uint32_t predicate)
 169 {
 170    vec4_instruction *inst;
 171
 172    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 173    inst->predicate = predicate;
 174
 175    return inst;
 176 }
 177
 178 /** Gen6+ IF with embedded comparison. */
 179 vec4_instruction *
 180 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 181 {
 182    assert(intel->gen >= 6);
 183
 184    vec4_instruction *inst;
 185
 186    resolve_ud_negate(&src0);
 187    resolve_ud_negate(&src1);
 188
 189    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 190                                         src0, src1);
 191    inst->conditional_mod = condition;
 192
 193    return inst;
 194 }
 195
 196 /**
 197  * CMP: Sets the low bit of the destination channels with the result
 198  * of the comparison, while the upper bits are undefined, and updates
 199  * the flag register with the packed 16 bits of the result.
 200  */
 201 vec4_instruction *
 202 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 203 {
 204    vec4_instruction *inst;
 205
 206    /* original gen4 does type conversion to the destination type
 207     * before before comparison, producing garbage results for floating
 208     * point comparisons.
 209     */
 210    if (intel->gen == 4) {
 211       dst.type = src0.type;
 212       if (dst.file == HW_REG)
 213          dst.fixed_hw_reg.type = dst.type;
 214    }
 215
 216    resolve_ud_negate(&src0);
 217    resolve_ud_negate(&src1);
 218
 219    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 220    inst->conditional_mod = condition;
 221
 222    return inst;
 223 }
 224
 225 vec4_instruction *
 226 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 227 {
 228    vec4_instruction *inst;
 229
 230    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 231                                         dst, index);
 232    inst->base_mrf = 14;
 233    inst->mlen = 1;
 234
 235    return inst;
 236 }
 237
 238 vec4_instruction *
 239 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 240 {
 241    vec4_instruction *inst;
 242
 243    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 244                                         dst, src, index);
 245    inst->base_mrf = 13;
 246    inst->mlen = 2;
 247
 248    return inst;
 249 }
 250
 251 void
 252 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 253 {
 254    static enum opcode dot_opcodes[] = {
 255       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 256    };
 257
 258    emit(dot_opcodes[elements - 2], dst, src0, src1);
 259 }
 260
 261 void
 262 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 263 {
 264    /* The gen6 math instruction ignores the source modifiers --
 265     * swizzle, abs, negate, and at least some parts of the register
 266     * region description.
 267     *
 268     * While it would seem that this MOV could be avoided at this point
 269     * in the case that the swizzle is matched up with the destination
 270     * writemask, note that uniform packing and register allocation
 271     * could rearrange our swizzle, so let's leave this matter up to
 272     * copy propagation later.
 273     */
 274    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 275    emit(MOV(dst_reg(temp_src), src));
 276
 277    if (dst.writemask != WRITEMASK_XYZW) {
 278       /* The gen6 math instruction must be align1, so we can't do
 279        * writemasks.
 280        */
 281       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 282
 283       emit(opcode, temp_dst, temp_src);
 284
 285       emit(MOV(dst, src_reg(temp_dst)));
 286    } else {
 287       emit(opcode, dst, temp_src);
 288    }
 289 }
 290
 291 void
 292 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 293 {
 294    vec4_instruction *inst = emit(opcode, dst, src);
 295    inst->base_mrf = 1;
 296    inst->mlen = 1;
 297 }
 298
 299 void
 300 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 301 {
 302    switch (opcode) {
 303    case SHADER_OPCODE_RCP:
 304    case SHADER_OPCODE_RSQ:
 305    case SHADER_OPCODE_SQRT:
 306    case SHADER_OPCODE_EXP2:
 307    case SHADER_OPCODE_LOG2:
 308    case SHADER_OPCODE_SIN:
 309    case SHADER_OPCODE_COS:
 310       break;
 311    default:
 312       assert(!"not reached: bad math opcode");
 313       return;
 314    }
 315
 316    if (intel->gen >= 6) {
 317       return emit_math1_gen6(opcode, dst, src);
 318    } else {
 319       return emit_math1_gen4(opcode, dst, src);
 320    }
 321 }
 322
 323 void
 324 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 325                               dst_reg dst, src_reg src0, src_reg src1)
 326 {
 327    src_reg expanded;
 328
 329    /* The gen6 math instruction ignores the source modifiers --
 330     * swizzle, abs, negate, and at least some parts of the register
 331     * region description.  Move the sources to temporaries to make it
 332     * generally work.
 333     */
 334
 335    expanded = src_reg(this, glsl_type::vec4_type);
 336    expanded.type = src0.type;
 337    emit(MOV(dst_reg(expanded), src0));
 338    src0 = expanded;
 339
 340    expanded = src_reg(this, glsl_type::vec4_type);
 341    expanded.type = src1.type;
 342    emit(MOV(dst_reg(expanded), src1));
 343    src1 = expanded;
 344
 345    if (dst.writemask != WRITEMASK_XYZW) {
 346       /* The gen6 math instruction must be align1, so we can't do
 347        * writemasks.
 348        */
 349       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 350       temp_dst.type = dst.type;
 351
 352       emit(opcode, temp_dst, src0, src1);
 353
 354       emit(MOV(dst, src_reg(temp_dst)));
 355    } else {
 356       emit(opcode, dst, src0, src1);
 357    }
 358 }
 359
 360 void
 361 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 362                               dst_reg dst, src_reg src0, src_reg src1)
 363 {
 364    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 365    inst->base_mrf = 1;
 366    inst->mlen = 2;
 367 }
 368
 369 void
 370 vec4_visitor::emit_math(enum opcode opcode,
 371                         dst_reg dst, src_reg src0, src_reg src1)
 372 {
 373    switch (opcode) {
 374    case SHADER_OPCODE_POW:
 375    case SHADER_OPCODE_INT_QUOTIENT:
 376    case SHADER_OPCODE_INT_REMAINDER:
 377       break;
 378    default:
 379       assert(!"not reached: unsupported binary math opcode");
 380       return;
 381    }
 382
 383    if (intel->gen >= 6) {
 384       return emit_math2_gen6(opcode, dst, src0, src1);
 385    } else {
 386       return emit_math2_gen4(opcode, dst, src0, src1);
 387    }
 388 }
 389
 390 void
 391 vec4_visitor::visit_instructions(const exec_list *list)
 392 {
 393    foreach_list(node, list) {
 394       ir_instruction *ir = (ir_instruction *)node;
 395
 396       base_ir = ir;
 397       ir->accept(this);
 398    }
 399 }
 400
 401
 402 static int
 403 type_size(const struct glsl_type *type)
 404 {
 405    unsigned int i;
 406    int size;
 407
 408    switch (type->base_type) {
 409    case GLSL_TYPE_UINT:
 410    case GLSL_TYPE_INT:
 411    case GLSL_TYPE_FLOAT:
 412    case GLSL_TYPE_BOOL:
 413       if (type->is_matrix()) {
 414          return type->matrix_columns;
 415       } else {
 416          /* Regardless of size of vector, it gets a vec4. This is bad
 417           * packing for things like floats, but otherwise arrays become a
 418           * mess.  Hopefully a later pass over the code can pack scalars
 419           * down if appropriate.
 420           */
 421          return 1;
 422       }
 423    case GLSL_TYPE_ARRAY:
 424       assert(type->length > 0);
 425       return type_size(type->fields.array) * type->length;
 426    case GLSL_TYPE_STRUCT:
 427       size = 0;
 428       for (i = 0; i < type->length; i++) {
 429          size += type_size(type->fields.structure[i].type);
 430       }
 431       return size;
 432    case GLSL_TYPE_SAMPLER:
 433       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 434        * at link time.
 435        */
 436       return 1;
 437    default:
 438       assert(0);
 439       return 0;
 440    }
 441 }
 442
 443 int
 444 vec4_visitor::virtual_grf_alloc(int size)
 445 {
 446    if (virtual_grf_array_size <= virtual_grf_count) {
 447       if (virtual_grf_array_size == 0)
 448          virtual_grf_array_size = 16;
 449       else
 450          virtual_grf_array_size *= 2;
 451       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 452                                    virtual_grf_array_size);
 453       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 454                                      virtual_grf_array_size);
 455    }
 456    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 457    virtual_grf_reg_count += size;
 458    virtual_grf_sizes[virtual_grf_count] = size;
 459    return virtual_grf_count++;
 460 }
 461
 462 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 463 {
 464    init();
 465
 466    this->file = GRF;
 467    this->reg = v->virtual_grf_alloc(type_size(type));
 468
 469    if (type->is_array() || type->is_record()) {
 470       this->swizzle = BRW_SWIZZLE_NOOP;
 471    } else {
 472       this->swizzle = swizzle_for_size(type->vector_elements);
 473    }
 474
 475    this->type = brw_type_for_base_type(type);
 476 }
 477
 478 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 479 {
 480    init();
 481
 482    this->file = GRF;
 483    this->reg = v->virtual_grf_alloc(type_size(type));
 484
 485    if (type->is_array() || type->is_record()) {
 486       this->writemask = WRITEMASK_XYZW;
 487    } else {
 488       this->writemask = (1 << type->vector_elements) - 1;
 489    }
 490
 491    this->type = brw_type_for_base_type(type);
 492 }
 493
 494 /* Our support for uniforms is piggy-backed on the struct
 495  * gl_fragment_program, because that's where the values actually
 496  * get stored, rather than in some global gl_shader_program uniform
 497  * store.
 498  */
 499 int
 500 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 501 {
 502    unsigned int offset = 0;
 503    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 504
 505    if (type->is_matrix()) {
 506       const glsl_type *column = type->column_type();
 507
 508       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 509          offset += setup_uniform_values(loc + offset, column);
 510       }
 511
 512       return offset;
 513    }
 514
 515    switch (type->base_type) {
 516    case GLSL_TYPE_FLOAT:
 517    case GLSL_TYPE_UINT:
 518    case GLSL_TYPE_INT:
 519    case GLSL_TYPE_BOOL:
 520       for (unsigned int i = 0; i < type->vector_elements; i++) {
 521          c->prog_data.param[this->uniforms * 4 + i] = &values[i];
 522       }
 523
 524       /* Set up pad elements to get things aligned to a vec4 boundary. */
 525       for (unsigned int i = type->vector_elements; i < 4; i++) {
 526          static float zero = 0;
 527
 528          c->prog_data.param[this->uniforms * 4 + i] = &zero;
 529       }
 530
 531       /* Track the size of this uniform vector, for future packing of
 532        * uniforms.
 533        */
 534       this->uniform_vector_size[this->uniforms] = type->vector_elements;
 535       this->uniforms++;
 536
 537       return 1;
 538
 539    case GLSL_TYPE_STRUCT:
 540       for (unsigned int i = 0; i < type->length; i++) {
 541          offset += setup_uniform_values(loc + offset,
 542                                         type->fields.structure[i].type);
 543       }
 544       return offset;
 545
 546    case GLSL_TYPE_ARRAY:
 547       for (unsigned int i = 0; i < type->length; i++) {
 548          offset += setup_uniform_values(loc + offset, type->fields.array);
 549       }
 550       return offset;
 551
 552    case GLSL_TYPE_SAMPLER:
 553       /* The sampler takes up a slot, but we don't use any values from it. */
 554       return 1;
 555
 556    default:
 557       assert(!"not reached");
 558       return 0;
 559    }
 560 }
 561
 562 void
 563 vec4_visitor::setup_uniform_clipplane_values()
 564 {
 565    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 566
 567    /* Pre-Gen6, we compact clip planes.  For example, if the user
 568     * enables just clip planes 0, 1, and 3, we will enable clip planes
 569     * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
 570     * plane 2.  This simplifies the implementation of the Gen6 clip
 571     * thread.
 572     *
 573     * In Gen6 and later, we don't compact clip planes, because this
 574     * simplifies the implementation of gl_ClipDistance.
 575     */
 576    int compacted_clipplane_index = 0;
 577    for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
 578       if (intel->gen < 6 &&
 579           !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
 580          continue;
 581       }
 582       this->uniform_vector_size[this->uniforms] = 4;
 583       this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
 584       this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
 585       for (int j = 0; j < 4; ++j) {
 586          c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
 587       }
 588       ++compacted_clipplane_index;
 589       ++this->uniforms;
 590    }
 591 }
 592
 593 /* Our support for builtin uniforms is even scarier than non-builtin.
 594  * It sits on top of the PROG_STATE_VAR parameters that are
 595  * automatically updated from GL context state.
 596  */
 597 void
 598 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 599 {
 600    const ir_state_slot *const slots = ir->state_slots;
 601    assert(ir->state_slots != NULL);
 602
 603    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 604       /* This state reference has already been setup by ir_to_mesa,
 605        * but we'll get the same index back here.  We can reference
 606        * ParameterValues directly, since unlike brw_fs.cpp, we never
 607        * add new state references during compile.
 608        */
 609       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 610                                             (gl_state_index *)slots[i].tokens);
 611       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 612
 613       this->uniform_vector_size[this->uniforms] = 0;
 614       /* Add each of the unique swizzled channels of the element.
 615        * This will end up matching the size of the glsl_type of this field.
 616        */
 617       int last_swiz = -1;
 618       for (unsigned int j = 0; j < 4; j++) {
 619          int swiz = GET_SWZ(slots[i].swizzle, j);
 620          last_swiz = swiz;
 621
 622          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 623          if (swiz <= last_swiz)
 624             this->uniform_vector_size[this->uniforms]++;
 625       }
 626       this->uniforms++;
 627    }
 628 }
 629
 630 dst_reg *
 631 vec4_visitor::variable_storage(ir_variable *var)
 632 {
 633    return (dst_reg *)hash_table_find(this->variable_ht, var);
 634 }
 635
 636 void
 637 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 638 {
 639    ir_expression *expr = ir->as_expression();
 640
 641    *predicate = BRW_PREDICATE_NORMAL;
 642
 643    if (expr) {
 644       src_reg op[2];
 645       vec4_instruction *inst;
 646
 647       assert(expr->get_num_operands() <= 2);
 648       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 649          expr->operands[i]->accept(this);
 650          op[i] = this->result;
 651
 652          resolve_ud_negate(&op[i]);
 653       }
 654
 655       switch (expr->operation) {
 656       case ir_unop_logic_not:
 657          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 658          inst->conditional_mod = BRW_CONDITIONAL_Z;
 659          break;
 660
 661       case ir_binop_logic_xor:
 662          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 663          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 664          break;
 665
 666       case ir_binop_logic_or:
 667          inst = emit(OR(dst_null_d(), op[0], op[1]));
 668          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 669          break;
 670
 671       case ir_binop_logic_and:
 672          inst = emit(AND(dst_null_d(), op[0], op[1]));
 673          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 674          break;
 675
 676       case ir_unop_f2b:
 677          if (intel->gen >= 6) {
 678             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 679          } else {
 680             inst = emit(MOV(dst_null_f(), op[0]));
 681             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 682          }
 683          break;
 684
 685       case ir_unop_i2b:
 686          if (intel->gen >= 6) {
 687             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 688          } else {
 689             inst = emit(MOV(dst_null_d(), op[0]));
 690             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 691          }
 692          break;
 693
 694       case ir_binop_all_equal:
 695          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 696          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 697          break;
 698
 699       case ir_binop_any_nequal:
 700          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 701          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 702          break;
 703
 704       case ir_unop_any:
 705          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 706          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 707          break;
 708
 709       case ir_binop_greater:
 710       case ir_binop_gequal:
 711       case ir_binop_less:
 712       case ir_binop_lequal:
 713       case ir_binop_equal:
 714       case ir_binop_nequal:
 715          emit(CMP(dst_null_d(), op[0], op[1],
 716                   brw_conditional_for_comparison(expr->operation)));
 717          break;
 718
 719       default:
 720          assert(!"not reached");
 721          break;
 722       }
 723       return;
 724    }
 725
 726    ir->accept(this);
 727
 728    resolve_ud_negate(&this->result);
 729
 730    if (intel->gen >= 6) {
 731       vec4_instruction *inst = emit(AND(dst_null_d(),
 732                                         this->result, src_reg(1)));
 733       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 734    } else {
 735       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 736       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 737    }
 738 }
 739
 740 /**
 741  * Emit a gen6 IF statement with the comparison folded into the IF
 742  * instruction.
 743  */
 744 void
 745 vec4_visitor::emit_if_gen6(ir_if *ir)
 746 {
 747    ir_expression *expr = ir->condition->as_expression();
 748
 749    if (expr) {
 750       src_reg op[2];
 751       dst_reg temp;
 752
 753       assert(expr->get_num_operands() <= 2);
 754       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 755          expr->operands[i]->accept(this);
 756          op[i] = this->result;
 757       }
 758
 759       switch (expr->operation) {
 760       case ir_unop_logic_not:
 761          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 762          return;
 763
 764       case ir_binop_logic_xor:
 765          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 766          return;
 767
 768       case ir_binop_logic_or:
 769          temp = dst_reg(this, glsl_type::bool_type);
 770          emit(OR(temp, op[0], op[1]));
 771          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 772          return;
 773
 774       case ir_binop_logic_and:
 775          temp = dst_reg(this, glsl_type::bool_type);
 776          emit(AND(temp, op[0], op[1]));
 777          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 778          return;
 779
 780       case ir_unop_f2b:
 781          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 782          return;
 783
 784       case ir_unop_i2b:
 785          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 786          return;
 787
 788       case ir_binop_greater:
 789       case ir_binop_gequal:
 790       case ir_binop_less:
 791       case ir_binop_lequal:
 792       case ir_binop_equal:
 793       case ir_binop_nequal:
 794          emit(IF(op[0], op[1],
 795                  brw_conditional_for_comparison(expr->operation)));
 796          return;
 797
 798       case ir_binop_all_equal:
 799          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 800          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 801          return;
 802
 803       case ir_binop_any_nequal:
 804          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 805          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 806          return;
 807
 808       case ir_unop_any:
 809          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 810          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 811          return;
 812
 813       default:
 814          assert(!"not reached");
 815          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 816          return;
 817       }
 818       return;
 819    }
 820
 821    ir->condition->accept(this);
 822
 823    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 824 }
 825
 826 void
 827 vec4_visitor::visit(ir_variable *ir)
 828 {
 829    dst_reg *reg = NULL;
 830
 831    if (variable_storage(ir))
 832       return;
 833
 834    switch (ir->mode) {
 835    case ir_var_in:
 836       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 837
 838       /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 839        * come in as floating point conversions of the integer values.
 840        */
 841       for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
 842          if (!c->key.gl_fixed_input_size[i])
 843             continue;
 844
 845          dst_reg dst = *reg;
 846          dst.type = brw_type_for_base_type(ir->type);
 847          dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
 848          emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 849       }
 850       break;
 851
 852    case ir_var_out:
 853       reg = new(mem_ctx) dst_reg(this, ir->type);
 854
 855       for (int i = 0; i < type_size(ir->type); i++) {
 856          output_reg[ir->location + i] = *reg;
 857          output_reg[ir->location + i].reg_offset = i;
 858          output_reg[ir->location + i].type =
 859             brw_type_for_base_type(ir->type->get_scalar_type());
 860          output_reg_annotation[ir->location + i] = ir->name;
 861       }
 862       break;
 863
 864    case ir_var_auto:
 865    case ir_var_temporary:
 866       reg = new(mem_ctx) dst_reg(this, ir->type);
 867       break;
 868
 869    case ir_var_uniform:
 870       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 871
 872       /* Track how big the whole uniform variable is, in case we need to put a
 873        * copy of its data into pull constants for array access.
 874        */
 875       this->uniform_size[this->uniforms] = type_size(ir->type);
 876
 877       if (!strncmp(ir->name, "gl_", 3)) {
 878          setup_builtin_uniform_values(ir);
 879       } else {
 880          setup_uniform_values(ir->location, ir->type);
 881       }
 882       break;
 883
 884    case ir_var_system_value:
 885       /* VertexID is stored by the VF as the last vertex element, but
 886        * we don't represent it with a flag in inputs_read, so we call
 887        * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
 888        */
 889       reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
 890       prog_data->uses_vertexid = true;
 891
 892       switch (ir->location) {
 893       case SYSTEM_VALUE_VERTEX_ID:
 894          reg->writemask = WRITEMASK_X;
 895          break;
 896       case SYSTEM_VALUE_INSTANCE_ID:
 897          reg->writemask = WRITEMASK_Y;
 898          break;
 899       default:
 900          assert(!"not reached");
 901          break;
 902       }
 903       break;
 904
 905    default:
 906       assert(!"not reached");
 907    }
 908
 909    reg->type = brw_type_for_base_type(ir->type);
 910    hash_table_insert(this->variable_ht, reg, ir);
 911 }
 912
 913 void
 914 vec4_visitor::visit(ir_loop *ir)
 915 {
 916    dst_reg counter;
 917
 918    /* We don't want debugging output to print the whole body of the
 919     * loop as the annotation.
 920     */
 921    this->base_ir = NULL;
 922
 923    if (ir->counter != NULL) {
 924       this->base_ir = ir->counter;
 925       ir->counter->accept(this);
 926       counter = *(variable_storage(ir->counter));
 927
 928       if (ir->from != NULL) {
 929          this->base_ir = ir->from;
 930          ir->from->accept(this);
 931
 932          emit(MOV(counter, this->result));
 933       }
 934    }
 935
 936    emit(BRW_OPCODE_DO);
 937
 938    if (ir->to) {
 939       this->base_ir = ir->to;
 940       ir->to->accept(this);
 941
 942       emit(CMP(dst_null_d(), src_reg(counter), this->result,
 943                brw_conditional_for_comparison(ir->cmp)));
 944
 945       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
 946       inst->predicate = BRW_PREDICATE_NORMAL;
 947    }
 948
 949    visit_instructions(&ir->body_instructions);
 950
 951
 952    if (ir->increment) {
 953       this->base_ir = ir->increment;
 954       ir->increment->accept(this);
 955       emit(ADD(counter, src_reg(counter), this->result));
 956    }
 957
 958    emit(BRW_OPCODE_WHILE);
 959 }
 960
 961 void
 962 vec4_visitor::visit(ir_loop_jump *ir)
 963 {
 964    switch (ir->mode) {
 965    case ir_loop_jump::jump_break:
 966       emit(BRW_OPCODE_BREAK);
 967       break;
 968    case ir_loop_jump::jump_continue:
 969       emit(BRW_OPCODE_CONTINUE);
 970       break;
 971    }
 972 }
 973
 974
 975 void
 976 vec4_visitor::visit(ir_function_signature *ir)
 977 {
 978    assert(0);
 979    (void)ir;
 980 }
 981
 982 void
 983 vec4_visitor::visit(ir_function *ir)
 984 {
 985    /* Ignore function bodies other than main() -- we shouldn't see calls to
 986     * them since they should all be inlined.
 987     */
 988    if (strcmp(ir->name, "main") == 0) {
 989       const ir_function_signature *sig;
 990       exec_list empty;
 991
 992       sig = ir->matching_signature(&empty);
 993
 994       assert(sig);
 995
 996       visit_instructions(&sig->body);
 997    }
 998 }
 999
1000 bool
1001 vec4_visitor::try_emit_sat(ir_expression *ir)
1002 {
1003    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1004    if (!sat_src)
1005       return false;
1006
1007    sat_src->accept(this);
1008    src_reg src = this->result;
1009
1010    this->result = src_reg(this, ir->type);
1011    vec4_instruction *inst;
1012    inst = emit(MOV(dst_reg(this->result), src));
1013    inst->saturate = true;
1014
1015    return true;
1016 }
1017
1018 void
1019 vec4_visitor::emit_bool_comparison(unsigned int op,
1020                                  dst_reg dst, src_reg src0, src_reg src1)
1021 {
1022    /* original gen4 does destination conversion before comparison. */
1023    if (intel->gen < 5)
1024       dst.type = src0.type;
1025
1026    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1027
1028    dst.type = BRW_REGISTER_TYPE_D;
1029    emit(AND(dst, src_reg(dst), src_reg(0x1)));
1030 }
1031
1032 void
1033 vec4_visitor::visit(ir_expression *ir)
1034 {
1035    unsigned int operand;
1036    src_reg op[Elements(ir->operands)];
1037    src_reg result_src;
1038    dst_reg result_dst;
1039    vec4_instruction *inst;
1040
1041    if (try_emit_sat(ir))
1042       return;
1043
1044    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1045       this->result.file = BAD_FILE;
1046       ir->operands[operand]->accept(this);
1047       if (this->result.file == BAD_FILE) {
1048          printf("Failed to get tree for expression operand:\n");
1049          ir->operands[operand]->print();
1050          exit(1);
1051       }
1052       op[operand] = this->result;
1053
1054       /* Matrix expression operands should have been broken down to vector
1055        * operations already.
1056        */
1057       assert(!ir->operands[operand]->type->is_matrix());
1058    }
1059
1060    int vector_elements = ir->operands[0]->type->vector_elements;
1061    if (ir->operands[1]) {
1062       vector_elements = MAX2(vector_elements,
1063                              ir->operands[1]->type->vector_elements);
1064    }
1065
1066    this->result.file = BAD_FILE;
1067
1068    /* Storage for our result.  Ideally for an assignment we'd be using
1069     * the actual storage for the result here, instead.
1070     */
1071    result_src = src_reg(this, ir->type);
1072    /* convenience for the emit functions below. */
1073    result_dst = dst_reg(result_src);
1074    /* If nothing special happens, this is the result. */
1075    this->result = result_src;
1076    /* Limit writes to the channels that will be used by result_src later.
1077     * This does limit this temp's use as a temporary for multi-instruction
1078     * sequences.
1079     */
1080    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1081
1082    switch (ir->operation) {
1083    case ir_unop_logic_not:
1084       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1085        * ones complement of the whole register, not just bit 0.
1086        */
1087       emit(XOR(result_dst, op[0], src_reg(1)));
1088       break;
1089    case ir_unop_neg:
1090       op[0].negate = !op[0].negate;
1091       this->result = op[0];
1092       break;
1093    case ir_unop_abs:
1094       op[0].abs = true;
1095       op[0].negate = false;
1096       this->result = op[0];
1097       break;
1098
1099    case ir_unop_sign:
1100       emit(MOV(result_dst, src_reg(0.0f)));
1101
1102       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1103       inst = emit(MOV(result_dst, src_reg(1.0f)));
1104       inst->predicate = BRW_PREDICATE_NORMAL;
1105
1106       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1107       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1108       inst->predicate = BRW_PREDICATE_NORMAL;
1109
1110       break;
1111
1112    case ir_unop_rcp:
1113       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1114       break;
1115
1116    case ir_unop_exp2:
1117       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1118       break;
1119    case ir_unop_log2:
1120       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1121       break;
1122    case ir_unop_exp:
1123    case ir_unop_log:
1124       assert(!"not reached: should be handled by ir_explog_to_explog2");
1125       break;
1126    case ir_unop_sin:
1127    case ir_unop_sin_reduced:
1128       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1129       break;
1130    case ir_unop_cos:
1131    case ir_unop_cos_reduced:
1132       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1133       break;
1134
1135    case ir_unop_dFdx:
1136    case ir_unop_dFdy:
1137       assert(!"derivatives not valid in vertex shader");
1138       break;
1139
1140    case ir_unop_noise:
1141       assert(!"not reached: should be handled by lower_noise");
1142       break;
1143
1144    case ir_binop_add:
1145       emit(ADD(result_dst, op[0], op[1]));
1146       break;
1147    case ir_binop_sub:
1148       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1149       break;
1150
1151    case ir_binop_mul:
1152       if (ir->type->is_integer()) {
1153          /* For integer multiplication, the MUL uses the low 16 bits
1154           * of one of the operands (src0 on gen6, src1 on gen7).  The
1155           * MACH accumulates in the contribution of the upper 16 bits
1156           * of that operand.
1157           *
1158           * FINISHME: Emit just the MUL if we know an operand is small
1159           * enough.
1160           */
1161          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1162
1163          emit(MUL(acc, op[0], op[1]));
1164          emit(MACH(dst_null_d(), op[0], op[1]));
1165          emit(MOV(result_dst, src_reg(acc)));
1166       } else {
1167          emit(MUL(result_dst, op[0], op[1]));
1168       }
1169       break;
1170    case ir_binop_div:
1171       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1172       assert(ir->type->is_integer());
1173       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1174       break;
1175    case ir_binop_mod:
1176       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1177       assert(ir->type->is_integer());
1178       emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1179       break;
1180
1181    case ir_binop_less:
1182    case ir_binop_greater:
1183    case ir_binop_lequal:
1184    case ir_binop_gequal:
1185    case ir_binop_equal:
1186    case ir_binop_nequal: {
1187       emit(CMP(result_dst, op[0], op[1],
1188                brw_conditional_for_comparison(ir->operation)));
1189       emit(AND(result_dst, result_src, src_reg(0x1)));
1190       break;
1191    }
1192
1193    case ir_binop_all_equal:
1194       /* "==" operator producing a scalar boolean. */
1195       if (ir->operands[0]->type->is_vector() ||
1196           ir->operands[1]->type->is_vector()) {
1197          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1198          emit(MOV(result_dst, src_reg(0)));
1199          inst = emit(MOV(result_dst, src_reg(1)));
1200          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1201       } else {
1202          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1203          emit(AND(result_dst, result_src, src_reg(0x1)));
1204       }
1205       break;
1206    case ir_binop_any_nequal:
1207       /* "!=" operator producing a scalar boolean. */
1208       if (ir->operands[0]->type->is_vector() ||
1209           ir->operands[1]->type->is_vector()) {
1210          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1211
1212          emit(MOV(result_dst, src_reg(0)));
1213          inst = emit(MOV(result_dst, src_reg(1)));
1214          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1215       } else {
1216          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1217          emit(AND(result_dst, result_src, src_reg(0x1)));
1218       }
1219       break;
1220
1221    case ir_unop_any:
1222       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1223       emit(MOV(result_dst, src_reg(0)));
1224
1225       inst = emit(MOV(result_dst, src_reg(1)));
1226       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1227       break;
1228
1229    case ir_binop_logic_xor:
1230       emit(XOR(result_dst, op[0], op[1]));
1231       break;
1232
1233    case ir_binop_logic_or:
1234       emit(OR(result_dst, op[0], op[1]));
1235       break;
1236
1237    case ir_binop_logic_and:
1238       emit(AND(result_dst, op[0], op[1]));
1239       break;
1240
1241    case ir_binop_dot:
1242       assert(ir->operands[0]->type->is_vector());
1243       assert(ir->operands[0]->type == ir->operands[1]->type);
1244       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1245       break;
1246
1247    case ir_unop_sqrt:
1248       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1249       break;
1250    case ir_unop_rsq:
1251       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1252       break;
1253    case ir_unop_i2f:
1254    case ir_unop_i2u:
1255    case ir_unop_u2i:
1256    case ir_unop_u2f:
1257    case ir_unop_b2f:
1258    case ir_unop_b2i:
1259    case ir_unop_f2i:
1260       emit(MOV(result_dst, op[0]));
1261       break;
1262    case ir_unop_f2b:
1263    case ir_unop_i2b: {
1264       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1265       emit(AND(result_dst, result_src, src_reg(1)));
1266       break;
1267    }
1268
1269    case ir_unop_trunc:
1270       emit(RNDZ(result_dst, op[0]));
1271       break;
1272    case ir_unop_ceil:
1273       op[0].negate = !op[0].negate;
1274       inst = emit(RNDD(result_dst, op[0]));
1275       this->result.negate = true;
1276       break;
1277    case ir_unop_floor:
1278       inst = emit(RNDD(result_dst, op[0]));
1279       break;
1280    case ir_unop_fract:
1281       inst = emit(FRC(result_dst, op[0]));
1282       break;
1283    case ir_unop_round_even:
1284       emit(RNDE(result_dst, op[0]));
1285       break;
1286
1287    case ir_binop_min:
1288       if (intel->gen >= 6) {
1289          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1290          inst->conditional_mod = BRW_CONDITIONAL_L;
1291       } else {
1292          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1293
1294          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1295          inst->predicate = BRW_PREDICATE_NORMAL;
1296       }
1297       break;
1298    case ir_binop_max:
1299       if (intel->gen >= 6) {
1300          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1301          inst->conditional_mod = BRW_CONDITIONAL_G;
1302       } else {
1303          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1304
1305          inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1306          inst->predicate = BRW_PREDICATE_NORMAL;
1307       }
1308       break;
1309
1310    case ir_binop_pow:
1311       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1312       break;
1313
1314    case ir_unop_bit_not:
1315       inst = emit(NOT(result_dst, op[0]));
1316       break;
1317    case ir_binop_bit_and:
1318       inst = emit(AND(result_dst, op[0], op[1]));
1319       break;
1320    case ir_binop_bit_xor:
1321       inst = emit(XOR(result_dst, op[0], op[1]));
1322       break;
1323    case ir_binop_bit_or:
1324       inst = emit(OR(result_dst, op[0], op[1]));
1325       break;
1326
1327    case ir_binop_lshift:
1328       inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
1329       break;
1330
1331    case ir_binop_rshift:
1332       if (ir->type->base_type == GLSL_TYPE_INT)
1333          inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
1334       else
1335          inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
1336       break;
1337
1338    case ir_quadop_vector:
1339       assert(!"not reached: should be handled by lower_quadop_vector");
1340       break;
1341    }
1342 }
1343
1344
1345 void
1346 vec4_visitor::visit(ir_swizzle *ir)
1347 {
1348    src_reg src;
1349    int i = 0;
1350    int swizzle[4];
1351
1352    /* Note that this is only swizzles in expressions, not those on the left
1353     * hand side of an assignment, which do write masking.  See ir_assignment
1354     * for that.
1355     */
1356
1357    ir->val->accept(this);
1358    src = this->result;
1359    assert(src.file != BAD_FILE);
1360
1361    for (i = 0; i < ir->type->vector_elements; i++) {
1362       switch (i) {
1363       case 0:
1364          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1365          break;
1366       case 1:
1367          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1368          break;
1369       case 2:
1370          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1371          break;
1372       case 3:
1373          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1374             break;
1375       }
1376    }
1377    for (; i < 4; i++) {
1378       /* Replicate the last channel out. */
1379       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1380    }
1381
1382    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1383
1384    this->result = src;
1385 }
1386
1387 void
1388 vec4_visitor::visit(ir_dereference_variable *ir)
1389 {
1390    const struct glsl_type *type = ir->type;
1391    dst_reg *reg = variable_storage(ir->var);
1392
1393    if (!reg) {
1394       fail("Failed to find variable storage for %s\n", ir->var->name);
1395       this->result = src_reg(brw_null_reg());
1396       return;
1397    }
1398
1399    this->result = src_reg(*reg);
1400
1401    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1402       this->result.swizzle = swizzle_for_size(type->vector_elements);
1403 }
1404
1405 void
1406 vec4_visitor::visit(ir_dereference_array *ir)
1407 {
1408    ir_constant *constant_index;
1409    src_reg src;
1410    int element_size = type_size(ir->type);
1411
1412    constant_index = ir->array_index->constant_expression_value();
1413
1414    ir->array->accept(this);
1415    src = this->result;
1416
1417    if (constant_index) {
1418       src.reg_offset += constant_index->value.i[0] * element_size;
1419    } else {
1420       /* Variable index array dereference.  It eats the "vec4" of the
1421        * base of the array and an index that offsets the Mesa register
1422        * index.
1423        */
1424       ir->array_index->accept(this);
1425
1426       src_reg index_reg;
1427
1428       if (element_size == 1) {
1429          index_reg = this->result;
1430       } else {
1431          index_reg = src_reg(this, glsl_type::int_type);
1432
1433          emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1434       }
1435
1436       if (src.reladdr) {
1437          src_reg temp = src_reg(this, glsl_type::int_type);
1438
1439          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1440
1441          index_reg = temp;
1442       }
1443
1444       src.reladdr = ralloc(mem_ctx, src_reg);
1445       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1446    }
1447
1448    /* If the type is smaller than a vec4, replicate the last channel out. */
1449    if (ir->type->is_scalar() || ir->type->is_vector())
1450       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1451    else
1452       src.swizzle = BRW_SWIZZLE_NOOP;
1453    src.type = brw_type_for_base_type(ir->type);
1454
1455    this->result = src;
1456 }
1457
1458 void
1459 vec4_visitor::visit(ir_dereference_record *ir)
1460 {
1461    unsigned int i;
1462    const glsl_type *struct_type = ir->record->type;
1463    int offset = 0;
1464
1465    ir->record->accept(this);
1466
1467    for (i = 0; i < struct_type->length; i++) {
1468       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1469          break;
1470       offset += type_size(struct_type->fields.structure[i].type);
1471    }
1472
1473    /* If the type is smaller than a vec4, replicate the last channel out. */
1474    if (ir->type->is_scalar() || ir->type->is_vector())
1475       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1476    else
1477       this->result.swizzle = BRW_SWIZZLE_NOOP;
1478    this->result.type = brw_type_for_base_type(ir->type);
1479
1480    this->result.reg_offset += offset;
1481 }
1482
1483 /**
1484  * We want to be careful in assignment setup to hit the actual storage
1485  * instead of potentially using a temporary like we might with the
1486  * ir_dereference handler.
1487  */
1488 static dst_reg
1489 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1490 {
1491    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1492     * access of a vector, it must be separated into a series conditional moves
1493     * before reaching this point (see ir_vec_index_to_cond_assign).
1494     */
1495    assert(ir->as_dereference());
1496    ir_dereference_array *deref_array = ir->as_dereference_array();
1497    if (deref_array) {
1498       assert(!deref_array->array->type->is_vector());
1499    }
1500
1501    /* Use the rvalue deref handler for the most part.  We'll ignore
1502     * swizzles in it and write swizzles using writemask, though.
1503     */
1504    ir->accept(v);
1505    return dst_reg(v->result);
1506 }
1507
1508 void
1509 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1510                               const struct glsl_type *type, uint32_t predicate)
1511 {
1512    if (type->base_type == GLSL_TYPE_STRUCT) {
1513       for (unsigned int i = 0; i < type->length; i++) {
1514          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1515       }
1516       return;
1517    }
1518
1519    if (type->is_array()) {
1520       for (unsigned int i = 0; i < type->length; i++) {
1521          emit_block_move(dst, src, type->fields.array, predicate);
1522       }
1523       return;
1524    }
1525
1526    if (type->is_matrix()) {
1527       const struct glsl_type *vec_type;
1528
1529       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1530                                          type->vector_elements, 1);
1531
1532       for (int i = 0; i < type->matrix_columns; i++) {
1533          emit_block_move(dst, src, vec_type, predicate);
1534       }
1535       return;
1536    }
1537
1538    assert(type->is_scalar() || type->is_vector());
1539
1540    dst->type = brw_type_for_base_type(type);
1541    src->type = dst->type;
1542
1543    dst->writemask = (1 << type->vector_elements) - 1;
1544
1545    /* Do we need to worry about swizzling a swizzle? */
1546    assert(src->swizzle == BRW_SWIZZLE_NOOP
1547           || src->swizzle == swizzle_for_size(type->vector_elements));
1548    src->swizzle = swizzle_for_size(type->vector_elements);
1549
1550    vec4_instruction *inst = emit(MOV(*dst, *src));
1551    inst->predicate = predicate;
1552
1553    dst->reg_offset++;
1554    src->reg_offset++;
1555 }
1556
1557
1558 /* If the RHS processing resulted in an instruction generating a
1559  * temporary value, and it would be easy to rewrite the instruction to
1560  * generate its result right into the LHS instead, do so.  This ends
1561  * up reliably removing instructions where it can be tricky to do so
1562  * later without real UD chain information.
1563  */
1564 bool
1565 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1566                                      dst_reg dst,
1567                                      src_reg src,
1568                                      vec4_instruction *pre_rhs_inst,
1569                                      vec4_instruction *last_rhs_inst)
1570 {
1571    /* This could be supported, but it would take more smarts. */
1572    if (ir->condition)
1573       return false;
1574
1575    if (pre_rhs_inst == last_rhs_inst)
1576       return false; /* No instructions generated to work with. */
1577
1578    /* Make sure the last instruction generated our source reg. */
1579    if (src.file != GRF ||
1580        src.file != last_rhs_inst->dst.file ||
1581        src.reg != last_rhs_inst->dst.reg ||
1582        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1583        src.reladdr ||
1584        src.abs ||
1585        src.negate ||
1586        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1587       return false;
1588
1589    /* Check that that last instruction fully initialized the channels
1590     * we want to use, in the order we want to use them.  We could
1591     * potentially reswizzle the operands of many instructions so that
1592     * we could handle out of order channels, but don't yet.
1593     */
1594
1595    for (unsigned i = 0; i < 4; i++) {
1596       if (dst.writemask & (1 << i)) {
1597          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1598             return false;
1599
1600          if (BRW_GET_SWZ(src.swizzle, i) != i)
1601             return false;
1602       }
1603    }
1604
1605    /* Success!  Rewrite the instruction. */
1606    last_rhs_inst->dst.file = dst.file;
1607    last_rhs_inst->dst.reg = dst.reg;
1608    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1609    last_rhs_inst->dst.reladdr = dst.reladdr;
1610    last_rhs_inst->dst.writemask &= dst.writemask;
1611
1612    return true;
1613 }
1614
1615 void
1616 vec4_visitor::visit(ir_assignment *ir)
1617 {
1618    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1619    uint32_t predicate = BRW_PREDICATE_NONE;
1620
1621    if (!ir->lhs->type->is_scalar() &&
1622        !ir->lhs->type->is_vector()) {
1623       ir->rhs->accept(this);
1624       src_reg src = this->result;
1625
1626       if (ir->condition) {
1627          emit_bool_to_cond_code(ir->condition, &predicate);
1628       }
1629
1630       emit_block_move(&dst, &src, ir->rhs->type, predicate);
1631       return;
1632    }
1633
1634    /* Now we're down to just a scalar/vector with writemasks. */
1635    int i;
1636
1637    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1638    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1639
1640    ir->rhs->accept(this);
1641
1642    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1643
1644    src_reg src = this->result;
1645
1646    int swizzles[4];
1647    int first_enabled_chan = 0;
1648    int src_chan = 0;
1649
1650    assert(ir->lhs->type->is_vector() ||
1651           ir->lhs->type->is_scalar());
1652    dst.writemask = ir->write_mask;
1653
1654    for (int i = 0; i < 4; i++) {
1655       if (dst.writemask & (1 << i)) {
1656          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1657          break;
1658       }
1659    }
1660
1661    /* Swizzle a small RHS vector into the channels being written.
1662     *
1663     * glsl ir treats write_mask as dictating how many channels are
1664     * present on the RHS while in our instructions we need to make
1665     * those channels appear in the slots of the vec4 they're written to.
1666     */
1667    for (int i = 0; i < 4; i++) {
1668       if (dst.writemask & (1 << i))
1669          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1670       else
1671          swizzles[i] = first_enabled_chan;
1672    }
1673    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1674                               swizzles[2], swizzles[3]);
1675
1676    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1677       return;
1678    }
1679
1680    if (ir->condition) {
1681       emit_bool_to_cond_code(ir->condition, &predicate);
1682    }
1683
1684    for (i = 0; i < type_size(ir->lhs->type); i++) {
1685       vec4_instruction *inst = emit(MOV(dst, src));
1686       inst->predicate = predicate;
1687
1688       dst.reg_offset++;
1689       src.reg_offset++;
1690    }
1691 }
1692
1693 void
1694 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1695 {
1696    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1697       foreach_list(node, &ir->components) {
1698          ir_constant *field_value = (ir_constant *)node;
1699
1700          emit_constant_values(dst, field_value);
1701       }
1702       return;
1703    }
1704
1705    if (ir->type->is_array()) {
1706       for (unsigned int i = 0; i < ir->type->length; i++) {
1707          emit_constant_values(dst, ir->array_elements[i]);
1708       }
1709       return;
1710    }
1711
1712    if (ir->type->is_matrix()) {
1713       for (int i = 0; i < ir->type->matrix_columns; i++) {
1714          float *vec = &ir->value.f[i * ir->type->vector_elements];
1715
1716          for (int j = 0; j < ir->type->vector_elements; j++) {
1717             dst->writemask = 1 << j;
1718             dst->type = BRW_REGISTER_TYPE_F;
1719
1720             emit(MOV(*dst, src_reg(vec[j])));
1721          }
1722          dst->reg_offset++;
1723       }
1724       return;
1725    }
1726
1727    int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1728
1729    for (int i = 0; i < ir->type->vector_elements; i++) {
1730       if (!(remaining_writemask & (1 << i)))
1731          continue;
1732
1733       dst->writemask = 1 << i;
1734       dst->type = brw_type_for_base_type(ir->type);
1735
1736       /* Find other components that match the one we're about to
1737        * write.  Emits fewer instructions for things like vec4(0.5,
1738        * 1.5, 1.5, 1.5).
1739        */
1740       for (int j = i + 1; j < ir->type->vector_elements; j++) {
1741          if (ir->type->base_type == GLSL_TYPE_BOOL) {
1742             if (ir->value.b[i] == ir->value.b[j])
1743                dst->writemask |= (1 << j);
1744          } else {
1745             /* u, i, and f storage all line up, so no need for a
1746              * switch case for comparing each type.
1747              */
1748             if (ir->value.u[i] == ir->value.u[j])
1749                dst->writemask |= (1 << j);
1750          }
1751       }
1752
1753       switch (ir->type->base_type) {
1754       case GLSL_TYPE_FLOAT:
1755          emit(MOV(*dst, src_reg(ir->value.f[i])));
1756          break;
1757       case GLSL_TYPE_INT:
1758          emit(MOV(*dst, src_reg(ir->value.i[i])));
1759          break;
1760       case GLSL_TYPE_UINT:
1761          emit(MOV(*dst, src_reg(ir->value.u[i])));
1762          break;
1763       case GLSL_TYPE_BOOL:
1764          emit(MOV(*dst, src_reg(ir->value.b[i])));
1765          break;
1766       default:
1767          assert(!"Non-float/uint/int/bool constant");
1768          break;
1769       }
1770
1771       remaining_writemask &= ~dst->writemask;
1772    }
1773    dst->reg_offset++;
1774 }
1775
1776 void
1777 vec4_visitor::visit(ir_constant *ir)
1778 {
1779    dst_reg dst = dst_reg(this, ir->type);
1780    this->result = src_reg(dst);
1781
1782    emit_constant_values(&dst, ir);
1783 }
1784
1785 void
1786 vec4_visitor::visit(ir_call *ir)
1787 {
1788    assert(!"not reached");
1789 }
1790
1791 void
1792 vec4_visitor::visit(ir_texture *ir)
1793 {
1794    int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1795    sampler = vp->Base.SamplerUnits[sampler];
1796
1797    /* Should be lowered by do_lower_texture_projection */
1798    assert(!ir->projector);
1799
1800    vec4_instruction *inst = NULL;
1801    switch (ir->op) {
1802    case ir_tex:
1803    case ir_txl:
1804       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1805       break;
1806    case ir_txd:
1807       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1808       break;
1809    case ir_txf:
1810       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1811       break;
1812    case ir_txs:
1813       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1814       break;
1815    case ir_txb:
1816       assert(!"TXB is not valid for vertex shaders.");
1817    }
1818
1819    /* Texel offsets go in the message header; Gen4 also requires headers. */
1820    inst->header_present = ir->offset || intel->gen < 5;
1821    inst->base_mrf = 2;
1822    inst->mlen = inst->header_present + 1; /* always at least one */
1823    inst->sampler = sampler;
1824    inst->dst = dst_reg(this, ir->type);
1825    inst->shadow_compare = ir->shadow_comparitor != NULL;
1826
1827    if (ir->offset != NULL)
1828       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1829
1830    /* MRF for the first parameter */
1831    int param_base = inst->base_mrf + inst->header_present;
1832
1833    if (ir->op == ir_txs) {
1834       ir->lod_info.lod->accept(this);
1835       int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1836       emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1837            this->result));
1838    } else {
1839       int i, coord_mask = 0, zero_mask = 0;
1840       /* Load the coordinate */
1841       /* FINISHME: gl_clamp_mask and saturate */
1842       for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1843          coord_mask |= (1 << i);
1844       for (; i < 4; i++)
1845          zero_mask |= (1 << i);
1846
1847       ir->coordinate->accept(this);
1848       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1849                this->result));
1850       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1851                src_reg(0)));
1852       /* Load the shadow comparitor */
1853       if (ir->shadow_comparitor) {
1854          ir->shadow_comparitor->accept(this);
1855          emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1856                           WRITEMASK_X),
1857                   this->result));
1858          inst->mlen++;
1859       }
1860
1861       /* Load the LOD info */
1862       if (ir->op == ir_txl) {
1863          int mrf, writemask;
1864          if (intel->gen >= 5) {
1865             mrf = param_base + 1;
1866             if (ir->shadow_comparitor) {
1867                writemask = WRITEMASK_Y;
1868                /* mlen already incremented */
1869             } else {
1870                writemask = WRITEMASK_X;
1871                inst->mlen++;
1872             }
1873          } else /* intel->gen == 4 */ {
1874             mrf = param_base;
1875             writemask = WRITEMASK_Z;
1876          }
1877          ir->lod_info.lod->accept(this);
1878          emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask),
1879                   this->result));
1880       } else if (ir->op == ir_txf) {
1881          ir->lod_info.lod->accept(this);
1882          emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1883                   this->result));
1884       } else if (ir->op == ir_txd) {
1885          const glsl_type *type = ir->lod_info.grad.dPdx->type;
1886
1887          ir->lod_info.grad.dPdx->accept(this);
1888          src_reg dPdx = this->result;
1889          ir->lod_info.grad.dPdy->accept(this);
1890          src_reg dPdy = this->result;
1891
1892          if (intel->gen >= 5) {
1893             dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1894             dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1895             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1896             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1897             inst->mlen++;
1898
1899             if (ir->type->vector_elements == 3) {
1900                dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
1901                dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
1902                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
1903                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
1904                inst->mlen++;
1905             }
1906          } else /* intel->gen == 4 */ {
1907             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
1908             emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
1909             inst->mlen += 2;
1910          }
1911       }
1912    }
1913
1914    emit(inst);
1915
1916    swizzle_result(ir, src_reg(inst->dst), sampler);
1917 }
1918
1919 void
1920 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
1921 {
1922    this->result = orig_val;
1923
1924    int s = c->key.tex.swizzles[sampler];
1925
1926    if (ir->op == ir_txs || ir->type == glsl_type::float_type
1927                         || s == SWIZZLE_NOOP)
1928       return;
1929
1930    int zero_mask = 0, one_mask = 0, copy_mask = 0;
1931    int swizzle[4];
1932
1933    for (int i = 0; i < 4; i++) {
1934       switch (GET_SWZ(s, i)) {
1935       case SWIZZLE_ZERO:
1936          zero_mask |= (1 << i);
1937          break;
1938       case SWIZZLE_ONE:
1939          one_mask |= (1 << i);
1940          break;
1941       default:
1942          copy_mask |= (1 << i);
1943          swizzle[i] = GET_SWZ(s, i);
1944          break;
1945       }
1946    }
1947
1948    this->result = src_reg(this, ir->type);
1949    dst_reg swizzled_result(this->result);
1950
1951    if (copy_mask) {
1952       orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1953       swizzled_result.writemask = copy_mask;
1954       emit(MOV(swizzled_result, orig_val));
1955    }
1956
1957    if (zero_mask) {
1958       swizzled_result.writemask = zero_mask;
1959       emit(MOV(swizzled_result, src_reg(0.0f)));
1960    }
1961
1962    if (one_mask) {
1963       swizzled_result.writemask = one_mask;
1964       emit(MOV(swizzled_result, src_reg(1.0f)));
1965    }
1966 }
1967
1968 void
1969 vec4_visitor::visit(ir_return *ir)
1970 {
1971    assert(!"not reached");
1972 }
1973
1974 void
1975 vec4_visitor::visit(ir_discard *ir)
1976 {
1977    assert(!"not reached");
1978 }
1979
1980 void
1981 vec4_visitor::visit(ir_if *ir)
1982 {
1983    /* Don't point the annotation at the if statement, because then it plus
1984     * the then and else blocks get printed.
1985     */
1986    this->base_ir = ir->condition;
1987
1988    if (intel->gen == 6) {
1989       emit_if_gen6(ir);
1990    } else {
1991       uint32_t predicate;
1992       emit_bool_to_cond_code(ir->condition, &predicate);
1993       emit(IF(predicate));
1994    }
1995
1996    visit_instructions(&ir->then_instructions);
1997
1998    if (!ir->else_instructions.is_empty()) {
1999       this->base_ir = ir->condition;
2000       emit(BRW_OPCODE_ELSE);
2001
2002       visit_instructions(&ir->else_instructions);
2003    }
2004
2005    this->base_ir = ir->condition;
2006    emit(BRW_OPCODE_ENDIF);
2007 }
2008
2009 void
2010 vec4_visitor::emit_ndc_computation()
2011 {
2012    /* Get the position */
2013    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2014
2015    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2016    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2017    output_reg[BRW_VERT_RESULT_NDC] = ndc;
2018
2019    current_annotation = "NDC";
2020    dst_reg ndc_w = ndc;
2021    ndc_w.writemask = WRITEMASK_W;
2022    src_reg pos_w = pos;
2023    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2024    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2025
2026    dst_reg ndc_xyz = ndc;
2027    ndc_xyz.writemask = WRITEMASK_XYZ;
2028
2029    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2030 }
2031
2032 void
2033 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2034 {
2035    if (intel->gen < 6 &&
2036        ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2037         c->key.userclip_active || brw->has_negative_rhw_bug)) {
2038       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2039       dst_reg header1_w = header1;
2040       header1_w.writemask = WRITEMASK_W;
2041       GLuint i;
2042
2043       emit(MOV(header1, 0u));
2044
2045       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2046          src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2047
2048          current_annotation = "Point size";
2049          emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2050          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2051       }
2052
2053       current_annotation = "Clipping flags";
2054       for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2055          vec4_instruction *inst;
2056
2057          inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2058                          src_reg(this->userplane[i])));
2059          inst->conditional_mod = BRW_CONDITIONAL_L;
2060
2061          inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2062          inst->predicate = BRW_PREDICATE_NORMAL;
2063       }
2064
2065       /* i965 clipping workaround:
2066        * 1) Test for -ve rhw
2067        * 2) If set,
2068        *      set ndc = (0,0,0,0)
2069        *      set ucp[6] = 1
2070        *
2071        * Later, clipping will detect ucp[6] and ensure the primitive is
2072        * clipped against all fixed planes.
2073        */
2074       if (brw->has_negative_rhw_bug) {
2075 #if 0
2076          /* FINISHME */
2077          brw_CMP(p,
2078                  vec8(brw_null_reg()),
2079                  BRW_CONDITIONAL_L,
2080                  brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2081                  brw_imm_f(0));
2082
2083          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2084          brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2085          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2086 #endif
2087       }
2088
2089       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2090    } else if (intel->gen < 6) {
2091       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2092    } else {
2093       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2094       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2095          emit(MOV(brw_writemask(reg, WRITEMASK_W),
2096                   src_reg(output_reg[VERT_RESULT_PSIZ])));
2097       }
2098    }
2099 }
2100
2101 void
2102 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2103 {
2104    if (intel->gen < 6) {
2105       /* Clip distance slots are set aside in gen5, but they are not used.  It
2106        * is not clear whether we actually need to set aside space for them,
2107        * but the performance cost is negligible.
2108        */
2109       return;
2110    }
2111
2112    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2113     *
2114     *     "If a linked set of shaders forming the vertex stage contains no
2115     *     static write to gl_ClipVertex or gl_ClipDistance, but the
2116     *     application has requested clipping against user clip planes through
2117     *     the API, then the coordinate written to gl_Position is used for
2118     *     comparison against the user clip planes."
2119     *
2120     * This function is only called if the shader didn't write to
2121     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
2122     * if the user wrote to it; otherwise we use gl_Position.
2123     */
2124    gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2125    if (!(c->prog_data.outputs_written
2126          & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2127       clip_vertex = VERT_RESULT_HPOS;
2128    }
2129
2130    for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2131         ++i) {
2132       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2133                src_reg(output_reg[clip_vertex]),
2134                src_reg(this->userplane[i + offset])));
2135    }
2136 }
2137
2138 void
2139 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2140 {
2141    assert (vert_result < VERT_RESULT_MAX);
2142    reg.type = output_reg[vert_result].type;
2143    current_annotation = output_reg_annotation[vert_result];
2144    /* Copy the register, saturating if necessary */
2145    vec4_instruction *inst = emit(MOV(reg,
2146                                      src_reg(output_reg[vert_result])));
2147    if ((vert_result == VERT_RESULT_COL0 ||
2148         vert_result == VERT_RESULT_COL1 ||
2149         vert_result == VERT_RESULT_BFC0 ||
2150         vert_result == VERT_RESULT_BFC1) &&
2151        c->key.clamp_vertex_color) {
2152       inst->saturate = true;
2153    }
2154 }
2155
2156 void
2157 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2158 {
2159    struct brw_reg hw_reg = brw_message_reg(mrf);
2160    dst_reg reg = dst_reg(MRF, mrf);
2161    reg.type = BRW_REGISTER_TYPE_F;
2162
2163    switch (vert_result) {
2164    case VERT_RESULT_PSIZ:
2165       /* PSIZ is always in slot 0, and is coupled with other flags. */
2166       current_annotation = "indices, point width, clip flags";
2167       emit_psiz_and_flags(hw_reg);
2168       break;
2169    case BRW_VERT_RESULT_NDC:
2170       current_annotation = "NDC";
2171       emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2172       break;
2173    case BRW_VERT_RESULT_HPOS_DUPLICATE:
2174    case VERT_RESULT_HPOS:
2175       current_annotation = "gl_Position";
2176       emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2177       break;
2178    case VERT_RESULT_CLIP_DIST0:
2179    case VERT_RESULT_CLIP_DIST1:
2180       if (this->c->key.uses_clip_distance) {
2181          emit_generic_urb_slot(reg, vert_result);
2182       } else {
2183          current_annotation = "user clip distances";
2184          emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2185       }
2186       break;
2187    case BRW_VERT_RESULT_PAD:
2188       /* No need to write to this slot */
2189       break;
2190    default:
2191       emit_generic_urb_slot(reg, vert_result);
2192       break;
2193    }
2194 }
2195
2196 static int
2197 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2198 {
2199    struct intel_context *intel = &brw->intel;
2200
2201    if (intel->gen >= 6) {
2202       /* URB data written (does not include the message header reg) must
2203        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
2204        * section 5.4.3.2.2: URB_INTERLEAVED.
2205        *
2206        * URB entries are allocated on a multiple of 1024 bits, so an
2207        * extra 128 bits written here to make the end align to 256 is
2208        * no problem.
2209        */
2210       if ((mlen % 2) != 1)
2211          mlen++;
2212    }
2213
2214    return mlen;
2215 }
2216
2217 /**
2218  * Generates the VUE payload plus the 1 or 2 URB write instructions to
2219  * complete the VS thread.
2220  *
2221  * The VUE layout is documented in Volume 2a.
2222  */
2223 void
2224 vec4_visitor::emit_urb_writes()
2225 {
2226    /* MRF 0 is reserved for the debugger, so start with message header
2227     * in MRF 1.
2228     */
2229    int base_mrf = 1;
2230    int mrf = base_mrf;
2231    /* In the process of generating our URB write message contents, we
2232     * may need to unspill a register or load from an array.  Those
2233     * reads would use MRFs 14-15.
2234     */
2235    int max_usable_mrf = 13;
2236
2237    /* The following assertion verifies that max_usable_mrf causes an
2238     * even-numbered amount of URB write data, which will meet gen6's
2239     * requirements for length alignment.
2240     */
2241    assert ((max_usable_mrf - base_mrf) % 2 == 0);
2242
2243    /* FINISHME: edgeflag */
2244
2245    brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
2246                        c->prog_data.outputs_written);
2247
2248    /* First mrf is the g0-based message header containing URB handles and such,
2249     * which is implied in VS_OPCODE_URB_WRITE.
2250     */
2251    mrf++;
2252
2253    if (intel->gen < 6) {
2254       emit_ndc_computation();
2255    }
2256
2257    /* Set up the VUE data for the first URB write */
2258    int slot;
2259    for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
2260       emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
2261
2262       /* If this was max_usable_mrf, we can't fit anything more into this URB
2263        * WRITE.
2264        */
2265       if (mrf > max_usable_mrf) {
2266          slot++;
2267          break;
2268       }
2269    }
2270
2271    current_annotation = "URB write";
2272    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2273    inst->base_mrf = base_mrf;
2274    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2275    inst->eot = (slot >= c->vue_map.num_slots);
2276
2277    /* Optional second URB write */
2278    if (!inst->eot) {
2279       mrf = base_mrf + 1;
2280
2281       for (; slot < c->vue_map.num_slots; ++slot) {
2282          assert(mrf < max_usable_mrf);
2283
2284          emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
2285       }
2286
2287       current_annotation = "URB write";
2288       inst = emit(VS_OPCODE_URB_WRITE);
2289       inst->base_mrf = base_mrf;
2290       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2291       inst->eot = true;
2292       /* URB destination offset.  In the previous write, we got MRFs
2293        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
2294        * URB row increments, and each of our MRFs is half of one of
2295        * those, since we're doing interleaved writes.
2296        */
2297       inst->offset = (max_usable_mrf - base_mrf) / 2;
2298    }
2299 }
2300
2301 src_reg
2302 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2303                                  src_reg *reladdr, int reg_offset)
2304 {
2305    /* Because we store the values to scratch interleaved like our
2306     * vertex data, we need to scale the vec4 index by 2.
2307     */
2308    int message_header_scale = 2;
2309
2310    /* Pre-gen6, the message header uses byte offsets instead of vec4
2311     * (16-byte) offset units.
2312     */
2313    if (intel->gen < 6)
2314       message_header_scale *= 16;
2315
2316    if (reladdr) {
2317       src_reg index = src_reg(this, glsl_type::int_type);
2318
2319       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2320       emit_before(inst, MUL(dst_reg(index),
2321                             index, src_reg(message_header_scale)));
2322
2323       return index;
2324    } else {
2325       return src_reg(reg_offset * message_header_scale);
2326    }
2327 }
2328
2329 src_reg
2330 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2331                                        src_reg *reladdr, int reg_offset)
2332 {
2333    if (reladdr) {
2334       src_reg index = src_reg(this, glsl_type::int_type);
2335
2336       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2337
2338       /* Pre-gen6, the message header uses byte offsets instead of vec4
2339        * (16-byte) offset units.
2340        */
2341       if (intel->gen < 6) {
2342          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2343       }
2344
2345       return index;
2346    } else {
2347       int message_header_scale = intel->gen < 6 ? 16 : 1;
2348       return src_reg(reg_offset * message_header_scale);
2349    }
2350 }
2351
2352 /**
2353  * Emits an instruction before @inst to load the value named by @orig_src
2354  * from scratch space at @base_offset to @temp.
2355  */
2356 void
2357 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2358                                 dst_reg temp, src_reg orig_src,
2359                                 int base_offset)
2360 {
2361    int reg_offset = base_offset + orig_src.reg_offset;
2362    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2363
2364    emit_before(inst, SCRATCH_READ(temp, index));
2365 }
2366
2367 /**
2368  * Emits an instruction after @inst to store the value to be written
2369  * to @orig_dst to scratch space at @base_offset, from @temp.
2370  */
2371 void
2372 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2373                                  src_reg temp, dst_reg orig_dst,
2374                                  int base_offset)
2375 {
2376    int reg_offset = base_offset + orig_dst.reg_offset;
2377    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2378
2379    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2380                                        orig_dst.writemask));
2381    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2382    write->predicate = inst->predicate;
2383    write->ir = inst->ir;
2384    write->annotation = inst->annotation;
2385    inst->insert_after(write);
2386 }
2387
2388 /**
2389  * We can't generally support array access in GRF space, because a
2390  * single instruction's destination can only span 2 contiguous
2391  * registers.  So, we send all GRF arrays that get variable index
2392  * access to scratch space.
2393  */
2394 void
2395 vec4_visitor::move_grf_array_access_to_scratch()
2396 {
2397    int scratch_loc[this->virtual_grf_count];
2398
2399    for (int i = 0; i < this->virtual_grf_count; i++) {
2400       scratch_loc[i] = -1;
2401    }
2402
2403    /* First, calculate the set of virtual GRFs that need to be punted
2404     * to scratch due to having any array access on them, and where in
2405     * scratch.
2406     */
2407    foreach_list(node, &this->instructions) {
2408       vec4_instruction *inst = (vec4_instruction *)node;
2409
2410       if (inst->dst.file == GRF && inst->dst.reladdr &&
2411           scratch_loc[inst->dst.reg] == -1) {
2412          scratch_loc[inst->dst.reg] = c->last_scratch;
2413          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2414       }
2415
2416       for (int i = 0 ; i < 3; i++) {
2417          src_reg *src = &inst->src[i];
2418
2419          if (src->file == GRF && src->reladdr &&
2420              scratch_loc[src->reg] == -1) {
2421             scratch_loc[src->reg] = c->last_scratch;
2422             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2423          }
2424       }
2425    }
2426
2427    /* Now, for anything that will be accessed through scratch, rewrite
2428     * it to load/store.  Note that this is a _safe list walk, because
2429     * we may generate a new scratch_write instruction after the one
2430     * we're processing.
2431     */
2432    foreach_list_safe(node, &this->instructions) {
2433       vec4_instruction *inst = (vec4_instruction *)node;
2434
2435       /* Set up the annotation tracking for new generated instructions. */
2436       base_ir = inst->ir;
2437       current_annotation = inst->annotation;
2438
2439       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2440          src_reg temp = src_reg(this, glsl_type::vec4_type);
2441
2442          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2443
2444          inst->dst.file = temp.file;
2445          inst->dst.reg = temp.reg;
2446          inst->dst.reg_offset = temp.reg_offset;
2447          inst->dst.reladdr = NULL;
2448       }
2449
2450       for (int i = 0 ; i < 3; i++) {
2451          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2452             continue;
2453
2454          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2455
2456          emit_scratch_read(inst, temp, inst->src[i],
2457                            scratch_loc[inst->src[i].reg]);
2458
2459          inst->src[i].file = temp.file;
2460          inst->src[i].reg = temp.reg;
2461          inst->src[i].reg_offset = temp.reg_offset;
2462          inst->src[i].reladdr = NULL;
2463       }
2464    }
2465 }
2466
2467 /**
2468  * Emits an instruction before @inst to load the value named by @orig_src
2469  * from the pull constant buffer (surface) at @base_offset to @temp.
2470  */
2471 void
2472 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2473                                       dst_reg temp, src_reg orig_src,
2474                                       int base_offset)
2475 {
2476    int reg_offset = base_offset + orig_src.reg_offset;
2477    src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2478    vec4_instruction *load;
2479
2480    load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2481                                         temp, index);
2482    load->base_mrf = 14;
2483    load->mlen = 1;
2484    emit_before(inst, load);
2485 }
2486
2487 /**
2488  * Implements array access of uniforms by inserting a
2489  * PULL_CONSTANT_LOAD instruction.
2490  *
2491  * Unlike temporary GRF array access (where we don't support it due to
2492  * the difficulty of doing relative addressing on instruction
2493  * destinations), we could potentially do array access of uniforms
2494  * that were loaded in GRF space as push constants.  In real-world
2495  * usage we've seen, though, the arrays being used are always larger
2496  * than we could load as push constants, so just always move all
2497  * uniform array access out to a pull constant buffer.
2498  */
2499 void
2500 vec4_visitor::move_uniform_array_access_to_pull_constants()
2501 {
2502    int pull_constant_loc[this->uniforms];
2503
2504    for (int i = 0; i < this->uniforms; i++) {
2505       pull_constant_loc[i] = -1;
2506    }
2507
2508    /* Walk through and find array access of uniforms.  Put a copy of that
2509     * uniform in the pull constant buffer.
2510     *
2511     * Note that we don't move constant-indexed accesses to arrays.  No
2512     * testing has been done of the performance impact of this choice.
2513     */
2514    foreach_list_safe(node, &this->instructions) {
2515       vec4_instruction *inst = (vec4_instruction *)node;
2516
2517       for (int i = 0 ; i < 3; i++) {
2518          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2519             continue;
2520
2521          int uniform = inst->src[i].reg;
2522
2523          /* If this array isn't already present in the pull constant buffer,
2524           * add it.
2525           */
2526          if (pull_constant_loc[uniform] == -1) {
2527             const float **values = &prog_data->param[uniform * 4];
2528
2529             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2530
2531             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2532                prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2533             }
2534          }
2535
2536          /* Set up the annotation tracking for new generated instructions. */
2537          base_ir = inst->ir;
2538          current_annotation = inst->annotation;
2539
2540          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2541
2542          emit_pull_constant_load(inst, temp, inst->src[i],
2543                                  pull_constant_loc[uniform]);
2544
2545          inst->src[i].file = temp.file;
2546          inst->src[i].reg = temp.reg;
2547          inst->src[i].reg_offset = temp.reg_offset;
2548          inst->src[i].reladdr = NULL;
2549       }
2550    }
2551
2552    /* Now there are no accesses of the UNIFORM file with a reladdr, so
2553     * no need to track them as larger-than-vec4 objects.  This will be
2554     * relied on in cutting out unused uniform vectors from push
2555     * constants.
2556     */
2557    split_uniform_registers();
2558 }
2559
2560 void
2561 vec4_visitor::resolve_ud_negate(src_reg *reg)
2562 {
2563    if (reg->type != BRW_REGISTER_TYPE_UD ||
2564        !reg->negate)
2565       return;
2566
2567    src_reg temp = src_reg(this, glsl_type::uvec4_type);
2568    emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2569    *reg = temp;
2570 }
2571
2572 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2573                            struct gl_shader_program *prog,
2574                            struct brw_shader *shader)
2575 {
2576    this->c = c;
2577    this->p = &c->func;
2578    this->brw = p->brw;
2579    this->intel = &brw->intel;
2580    this->ctx = &intel->ctx;
2581    this->prog = prog;
2582    this->shader = shader;
2583
2584    this->mem_ctx = ralloc_context(NULL);
2585    this->failed = false;
2586
2587    this->base_ir = NULL;
2588    this->current_annotation = NULL;
2589
2590    this->c = c;
2591    this->vp = (struct gl_vertex_program *)
2592      prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
2593    this->prog_data = &c->prog_data;
2594
2595    this->variable_ht = hash_table_ctor(0,
2596                                        hash_table_pointer_hash,
2597                                        hash_table_pointer_compare);
2598
2599    this->virtual_grf_def = NULL;
2600    this->virtual_grf_use = NULL;
2601    this->virtual_grf_sizes = NULL;
2602    this->virtual_grf_count = 0;
2603    this->virtual_grf_reg_map = NULL;
2604    this->virtual_grf_reg_count = 0;
2605    this->virtual_grf_array_size = 0;
2606    this->live_intervals_valid = false;
2607
2608    this->uniforms = 0;
2609 }
2610
2611 vec4_visitor::~vec4_visitor()
2612 {
2613    ralloc_free(this->mem_ctx);
2614    hash_table_dtor(this->variable_ht);
2615 }
2616
2617
2618 void
2619 vec4_visitor::fail(const char *format, ...)
2620 {
2621    va_list va;
2622    char *msg;
2623
2624    if (failed)
2625       return;
2626
2627    failed = true;
2628
2629    va_start(va, format);
2630    msg = ralloc_vasprintf(mem_ctx, format, va);
2631    va_end(va);
2632    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2633
2634    this->fail_msg = msg;
2635
2636    if (INTEL_DEBUG & DEBUG_VS) {
2637       fprintf(stderr, "%s",  msg);
2638    }
2639 }
2640
2641 } /* namespace brw */