src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 #include "program/sampler.h"
  29 }
  30
  31 namespace brw {
  32
  33 src_reg::src_reg(dst_reg reg)
  34 {
  35    init();
  36
  37    this->file = reg.file;
  38    this->reg = reg.reg;
  39    this->reg_offset = reg.reg_offset;
  40    this->type = reg.type;
  41    this->reladdr = reg.reladdr;
  42    this->fixed_hw_reg = reg.fixed_hw_reg;
  43
  44    int swizzles[4];
  45    int next_chan = 0;
  46    int last = 0;
  47
  48    for (int i = 0; i < 4; i++) {
  49       if (!(reg.writemask & (1 << i)))
  50          continue;
  51
  52       swizzles[next_chan++] = last = i;
  53    }
  54
  55    for (; next_chan < 4; next_chan++) {
  56       swizzles[next_chan] = last;
  57    }
  58
  59    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  60                                 swizzles[2], swizzles[3]);
  61 }
  62
  63 dst_reg::dst_reg(src_reg reg)
  64 {
  65    init();
  66
  67    this->file = reg.file;
  68    this->reg = reg.reg;
  69    this->reg_offset = reg.reg_offset;
  70    this->type = reg.type;
  71    this->writemask = WRITEMASK_XYZW;
  72    this->reladdr = reg.reladdr;
  73    this->fixed_hw_reg = reg.fixed_hw_reg;
  74 }
  75
  76 vec4_instruction::vec4_instruction(vec4_visitor *v,
  77                                    enum opcode opcode, dst_reg dst,
  78                                    src_reg src0, src_reg src1, src_reg src2)
  79 {
  80    this->opcode = opcode;
  81    this->dst = dst;
  82    this->src[0] = src0;
  83    this->src[1] = src1;
  84    this->src[2] = src2;
  85    this->ir = v->base_ir;
  86    this->annotation = v->current_annotation;
  87 }
  88
  89 vec4_instruction *
  90 vec4_visitor::emit(vec4_instruction *inst)
  91 {
  92    this->instructions.push_tail(inst);
  93
  94    return inst;
  95 }
  96
  97 vec4_instruction *
  98 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  99 {
 100    new_inst->ir = inst->ir;
 101    new_inst->annotation = inst->annotation;
 102
 103    inst->insert_before(new_inst);
 104
 105    return inst;
 106 }
 107
 108 vec4_instruction *
 109 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
 110                    src_reg src0, src_reg src1, src_reg src2)
 111 {
 112    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
 113                                              src0, src1, src2));
 114 }
 115
 116
 117 vec4_instruction *
 118 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
 119 {
 120    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
 121 }
 122
 123 vec4_instruction *
 124 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 125 {
 126    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
 127 }
 128
 129 vec4_instruction *
 130 vec4_visitor::emit(enum opcode opcode)
 131 {
 132    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
 133 }
 134
 135 #define ALU1(op)                                                        \
 136    vec4_instruction *                                                   \
 137    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
 138    {                                                                    \
 139       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 140                                            src0);                       \
 141    }
 142
 143 #define ALU2(op)                                                        \
 144    vec4_instruction *                                                   \
 145    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 146    {                                                                    \
 147       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 148                                            src0, src1);                 \
 149    }
 150
 151 ALU1(NOT)
 152 ALU1(MOV)
 153 ALU1(FRC)
 154 ALU1(RNDD)
 155 ALU1(RNDE)
 156 ALU1(RNDZ)
 157 ALU2(ADD)
 158 ALU2(MUL)
 159 ALU2(MACH)
 160 ALU2(AND)
 161 ALU2(OR)
 162 ALU2(XOR)
 163 ALU2(DP3)
 164 ALU2(DP4)
 165
 166 /** Gen4 predicated IF. */
 167 vec4_instruction *
 168 vec4_visitor::IF(uint32_t predicate)
 169 {
 170    vec4_instruction *inst;
 171
 172    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 173    inst->predicate = predicate;
 174
 175    return inst;
 176 }
 177
 178 /** Gen6+ IF with embedded comparison. */
 179 vec4_instruction *
 180 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 181 {
 182    assert(intel->gen >= 6);
 183
 184    vec4_instruction *inst;
 185
 186    resolve_ud_negate(&src0);
 187    resolve_ud_negate(&src1);
 188
 189    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 190                                         src0, src1);
 191    inst->conditional_mod = condition;
 192
 193    return inst;
 194 }
 195
 196 /**
 197  * CMP: Sets the low bit of the destination channels with the result
 198  * of the comparison, while the upper bits are undefined, and updates
 199  * the flag register with the packed 16 bits of the result.
 200  */
 201 vec4_instruction *
 202 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 203 {
 204    vec4_instruction *inst;
 205
 206    /* original gen4 does type conversion to the destination type
 207     * before before comparison, producing garbage results for floating
 208     * point comparisons.
 209     */
 210    if (intel->gen == 4) {
 211       dst.type = src0.type;
 212       if (dst.file == HW_REG)
 213          dst.fixed_hw_reg.type = dst.type;
 214    }
 215
 216    resolve_ud_negate(&src0);
 217    resolve_ud_negate(&src1);
 218
 219    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 220    inst->conditional_mod = condition;
 221
 222    return inst;
 223 }
 224
 225 vec4_instruction *
 226 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 227 {
 228    vec4_instruction *inst;
 229
 230    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 231                                         dst, index);
 232    inst->base_mrf = 14;
 233    inst->mlen = 1;
 234
 235    return inst;
 236 }
 237
 238 vec4_instruction *
 239 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 240 {
 241    vec4_instruction *inst;
 242
 243    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 244                                         dst, src, index);
 245    inst->base_mrf = 13;
 246    inst->mlen = 2;
 247
 248    return inst;
 249 }
 250
 251 void
 252 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 253 {
 254    static enum opcode dot_opcodes[] = {
 255       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 256    };
 257
 258    emit(dot_opcodes[elements - 2], dst, src0, src1);
 259 }
 260
 261 void
 262 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 263 {
 264    /* The gen6 math instruction ignores the source modifiers --
 265     * swizzle, abs, negate, and at least some parts of the register
 266     * region description.
 267     *
 268     * While it would seem that this MOV could be avoided at this point
 269     * in the case that the swizzle is matched up with the destination
 270     * writemask, note that uniform packing and register allocation
 271     * could rearrange our swizzle, so let's leave this matter up to
 272     * copy propagation later.
 273     */
 274    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 275    emit(MOV(dst_reg(temp_src), src));
 276
 277    if (dst.writemask != WRITEMASK_XYZW) {
 278       /* The gen6 math instruction must be align1, so we can't do
 279        * writemasks.
 280        */
 281       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 282
 283       emit(opcode, temp_dst, temp_src);
 284
 285       emit(MOV(dst, src_reg(temp_dst)));
 286    } else {
 287       emit(opcode, dst, temp_src);
 288    }
 289 }
 290
 291 void
 292 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 293 {
 294    vec4_instruction *inst = emit(opcode, dst, src);
 295    inst->base_mrf = 1;
 296    inst->mlen = 1;
 297 }
 298
 299 void
 300 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 301 {
 302    switch (opcode) {
 303    case SHADER_OPCODE_RCP:
 304    case SHADER_OPCODE_RSQ:
 305    case SHADER_OPCODE_SQRT:
 306    case SHADER_OPCODE_EXP2:
 307    case SHADER_OPCODE_LOG2:
 308    case SHADER_OPCODE_SIN:
 309    case SHADER_OPCODE_COS:
 310       break;
 311    default:
 312       assert(!"not reached: bad math opcode");
 313       return;
 314    }
 315
 316    if (intel->gen >= 6) {
 317       return emit_math1_gen6(opcode, dst, src);
 318    } else {
 319       return emit_math1_gen4(opcode, dst, src);
 320    }
 321 }
 322
 323 void
 324 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 325                               dst_reg dst, src_reg src0, src_reg src1)
 326 {
 327    src_reg expanded;
 328
 329    /* The gen6 math instruction ignores the source modifiers --
 330     * swizzle, abs, negate, and at least some parts of the register
 331     * region description.  Move the sources to temporaries to make it
 332     * generally work.
 333     */
 334
 335    expanded = src_reg(this, glsl_type::vec4_type);
 336    expanded.type = src0.type;
 337    emit(MOV(dst_reg(expanded), src0));
 338    src0 = expanded;
 339
 340    expanded = src_reg(this, glsl_type::vec4_type);
 341    expanded.type = src1.type;
 342    emit(MOV(dst_reg(expanded), src1));
 343    src1 = expanded;
 344
 345    if (dst.writemask != WRITEMASK_XYZW) {
 346       /* The gen6 math instruction must be align1, so we can't do
 347        * writemasks.
 348        */
 349       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 350       temp_dst.type = dst.type;
 351
 352       emit(opcode, temp_dst, src0, src1);
 353
 354       emit(MOV(dst, src_reg(temp_dst)));
 355    } else {
 356       emit(opcode, dst, src0, src1);
 357    }
 358 }
 359
 360 void
 361 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 362                               dst_reg dst, src_reg src0, src_reg src1)
 363 {
 364    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 365    inst->base_mrf = 1;
 366    inst->mlen = 2;
 367 }
 368
 369 void
 370 vec4_visitor::emit_math(enum opcode opcode,
 371                         dst_reg dst, src_reg src0, src_reg src1)
 372 {
 373    switch (opcode) {
 374    case SHADER_OPCODE_POW:
 375    case SHADER_OPCODE_INT_QUOTIENT:
 376    case SHADER_OPCODE_INT_REMAINDER:
 377       break;
 378    default:
 379       assert(!"not reached: unsupported binary math opcode");
 380       return;
 381    }
 382
 383    if (intel->gen >= 6) {
 384       return emit_math2_gen6(opcode, dst, src0, src1);
 385    } else {
 386       return emit_math2_gen4(opcode, dst, src0, src1);
 387    }
 388 }
 389
 390 void
 391 vec4_visitor::visit_instructions(const exec_list *list)
 392 {
 393    foreach_list(node, list) {
 394       ir_instruction *ir = (ir_instruction *)node;
 395
 396       base_ir = ir;
 397       ir->accept(this);
 398    }
 399 }
 400
 401
 402 static int
 403 type_size(const struct glsl_type *type)
 404 {
 405    unsigned int i;
 406    int size;
 407
 408    switch (type->base_type) {
 409    case GLSL_TYPE_UINT:
 410    case GLSL_TYPE_INT:
 411    case GLSL_TYPE_FLOAT:
 412    case GLSL_TYPE_BOOL:
 413       if (type->is_matrix()) {
 414          return type->matrix_columns;
 415       } else {
 416          /* Regardless of size of vector, it gets a vec4. This is bad
 417           * packing for things like floats, but otherwise arrays become a
 418           * mess.  Hopefully a later pass over the code can pack scalars
 419           * down if appropriate.
 420           */
 421          return 1;
 422       }
 423    case GLSL_TYPE_ARRAY:
 424       assert(type->length > 0);
 425       return type_size(type->fields.array) * type->length;
 426    case GLSL_TYPE_STRUCT:
 427       size = 0;
 428       for (i = 0; i < type->length; i++) {
 429          size += type_size(type->fields.structure[i].type);
 430       }
 431       return size;
 432    case GLSL_TYPE_SAMPLER:
 433       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 434        * at link time.
 435        */
 436       return 1;
 437    default:
 438       assert(0);
 439       return 0;
 440    }
 441 }
 442
 443 int
 444 vec4_visitor::virtual_grf_alloc(int size)
 445 {
 446    if (virtual_grf_array_size <= virtual_grf_count) {
 447       if (virtual_grf_array_size == 0)
 448          virtual_grf_array_size = 16;
 449       else
 450          virtual_grf_array_size *= 2;
 451       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 452                                    virtual_grf_array_size);
 453       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 454                                      virtual_grf_array_size);
 455    }
 456    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 457    virtual_grf_reg_count += size;
 458    virtual_grf_sizes[virtual_grf_count] = size;
 459    return virtual_grf_count++;
 460 }
 461
 462 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 463 {
 464    init();
 465
 466    this->file = GRF;
 467    this->reg = v->virtual_grf_alloc(type_size(type));
 468
 469    if (type->is_array() || type->is_record()) {
 470       this->swizzle = BRW_SWIZZLE_NOOP;
 471    } else {
 472       this->swizzle = swizzle_for_size(type->vector_elements);
 473    }
 474
 475    this->type = brw_type_for_base_type(type);
 476 }
 477
 478 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 479 {
 480    init();
 481
 482    this->file = GRF;
 483    this->reg = v->virtual_grf_alloc(type_size(type));
 484
 485    if (type->is_array() || type->is_record()) {
 486       this->writemask = WRITEMASK_XYZW;
 487    } else {
 488       this->writemask = (1 << type->vector_elements) - 1;
 489    }
 490
 491    this->type = brw_type_for_base_type(type);
 492 }
 493
 494 /* Our support for uniforms is piggy-backed on the struct
 495  * gl_fragment_program, because that's where the values actually
 496  * get stored, rather than in some global gl_shader_program uniform
 497  * store.
 498  */
 499 int
 500 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 501 {
 502    unsigned int offset = 0;
 503    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 504
 505    if (type->is_matrix()) {
 506       const glsl_type *column = type->column_type();
 507
 508       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 509          offset += setup_uniform_values(loc + offset, column);
 510       }
 511
 512       return offset;
 513    }
 514
 515    switch (type->base_type) {
 516    case GLSL_TYPE_FLOAT:
 517    case GLSL_TYPE_UINT:
 518    case GLSL_TYPE_INT:
 519    case GLSL_TYPE_BOOL:
 520       for (unsigned int i = 0; i < type->vector_elements; i++) {
 521          c->prog_data.param[this->uniforms * 4 + i] = &values[i];
 522       }
 523
 524       /* Set up pad elements to get things aligned to a vec4 boundary. */
 525       for (unsigned int i = type->vector_elements; i < 4; i++) {
 526          static float zero = 0;
 527
 528          c->prog_data.param[this->uniforms * 4 + i] = &zero;
 529       }
 530
 531       /* Track the size of this uniform vector, for future packing of
 532        * uniforms.
 533        */
 534       this->uniform_vector_size[this->uniforms] = type->vector_elements;
 535       this->uniforms++;
 536
 537       return 1;
 538
 539    case GLSL_TYPE_STRUCT:
 540       for (unsigned int i = 0; i < type->length; i++) {
 541          offset += setup_uniform_values(loc + offset,
 542                                         type->fields.structure[i].type);
 543       }
 544       return offset;
 545
 546    case GLSL_TYPE_ARRAY:
 547       for (unsigned int i = 0; i < type->length; i++) {
 548          offset += setup_uniform_values(loc + offset, type->fields.array);
 549       }
 550       return offset;
 551
 552    case GLSL_TYPE_SAMPLER:
 553       /* The sampler takes up a slot, but we don't use any values from it. */
 554       return 1;
 555
 556    default:
 557       assert(!"not reached");
 558       return 0;
 559    }
 560 }
 561
 562 void
 563 vec4_visitor::setup_uniform_clipplane_values()
 564 {
 565    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 566
 567    /* Pre-Gen6, we compact clip planes.  For example, if the user
 568     * enables just clip planes 0, 1, and 3, we will enable clip planes
 569     * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
 570     * plane 2.  This simplifies the implementation of the Gen6 clip
 571     * thread.
 572     *
 573     * In Gen6 and later, we don't compact clip planes, because this
 574     * simplifies the implementation of gl_ClipDistance.
 575     */
 576    int compacted_clipplane_index = 0;
 577    for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
 578       if (intel->gen < 6 &&
 579           !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
 580          continue;
 581       }
 582       this->uniform_vector_size[this->uniforms] = 4;
 583       this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
 584       this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
 585       for (int j = 0; j < 4; ++j) {
 586          c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
 587       }
 588       ++compacted_clipplane_index;
 589       ++this->uniforms;
 590    }
 591 }
 592
 593 /* Our support for builtin uniforms is even scarier than non-builtin.
 594  * It sits on top of the PROG_STATE_VAR parameters that are
 595  * automatically updated from GL context state.
 596  */
 597 void
 598 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 599 {
 600    const ir_state_slot *const slots = ir->state_slots;
 601    assert(ir->state_slots != NULL);
 602
 603    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 604       /* This state reference has already been setup by ir_to_mesa,
 605        * but we'll get the same index back here.  We can reference
 606        * ParameterValues directly, since unlike brw_fs.cpp, we never
 607        * add new state references during compile.
 608        */
 609       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 610                                             (gl_state_index *)slots[i].tokens);
 611       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 612
 613       this->uniform_vector_size[this->uniforms] = 0;
 614       /* Add each of the unique swizzled channels of the element.
 615        * This will end up matching the size of the glsl_type of this field.
 616        */
 617       int last_swiz = -1;
 618       for (unsigned int j = 0; j < 4; j++) {
 619          int swiz = GET_SWZ(slots[i].swizzle, j);
 620          last_swiz = swiz;
 621
 622          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 623          if (swiz <= last_swiz)
 624             this->uniform_vector_size[this->uniforms]++;
 625       }
 626       this->uniforms++;
 627    }
 628 }
 629
 630 dst_reg *
 631 vec4_visitor::variable_storage(ir_variable *var)
 632 {
 633    return (dst_reg *)hash_table_find(this->variable_ht, var);
 634 }
 635
 636 void
 637 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 638 {
 639    ir_expression *expr = ir->as_expression();
 640
 641    *predicate = BRW_PREDICATE_NORMAL;
 642
 643    if (expr) {
 644       src_reg op[2];
 645       vec4_instruction *inst;
 646
 647       assert(expr->get_num_operands() <= 2);
 648       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 649          expr->operands[i]->accept(this);
 650          op[i] = this->result;
 651
 652          resolve_ud_negate(&op[i]);
 653       }
 654
 655       switch (expr->operation) {
 656       case ir_unop_logic_not:
 657          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 658          inst->conditional_mod = BRW_CONDITIONAL_Z;
 659          break;
 660
 661       case ir_binop_logic_xor:
 662          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 663          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 664          break;
 665
 666       case ir_binop_logic_or:
 667          inst = emit(OR(dst_null_d(), op[0], op[1]));
 668          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 669          break;
 670
 671       case ir_binop_logic_and:
 672          inst = emit(AND(dst_null_d(), op[0], op[1]));
 673          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 674          break;
 675
 676       case ir_unop_f2b:
 677          if (intel->gen >= 6) {
 678             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 679          } else {
 680             inst = emit(MOV(dst_null_f(), op[0]));
 681             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 682          }
 683          break;
 684
 685       case ir_unop_i2b:
 686          if (intel->gen >= 6) {
 687             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 688          } else {
 689             inst = emit(MOV(dst_null_d(), op[0]));
 690             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 691          }
 692          break;
 693
 694       case ir_binop_all_equal:
 695          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 696          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 697          break;
 698
 699       case ir_binop_any_nequal:
 700          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 701          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 702          break;
 703
 704       case ir_unop_any:
 705          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 706          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 707          break;
 708
 709       case ir_binop_greater:
 710       case ir_binop_gequal:
 711       case ir_binop_less:
 712       case ir_binop_lequal:
 713       case ir_binop_equal:
 714       case ir_binop_nequal:
 715          emit(CMP(dst_null_d(), op[0], op[1],
 716                   brw_conditional_for_comparison(expr->operation)));
 717          break;
 718
 719       default:
 720          assert(!"not reached");
 721          break;
 722       }
 723       return;
 724    }
 725
 726    ir->accept(this);
 727
 728    resolve_ud_negate(&this->result);
 729
 730    if (intel->gen >= 6) {
 731       vec4_instruction *inst = emit(AND(dst_null_d(),
 732                                         this->result, src_reg(1)));
 733       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 734    } else {
 735       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 736       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 737    }
 738 }
 739
 740 /**
 741  * Emit a gen6 IF statement with the comparison folded into the IF
 742  * instruction.
 743  */
 744 void
 745 vec4_visitor::emit_if_gen6(ir_if *ir)
 746 {
 747    ir_expression *expr = ir->condition->as_expression();
 748
 749    if (expr) {
 750       src_reg op[2];
 751       dst_reg temp;
 752
 753       assert(expr->get_num_operands() <= 2);
 754       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 755          expr->operands[i]->accept(this);
 756          op[i] = this->result;
 757       }
 758
 759       switch (expr->operation) {
 760       case ir_unop_logic_not:
 761          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 762          return;
 763
 764       case ir_binop_logic_xor:
 765          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 766          return;
 767
 768       case ir_binop_logic_or:
 769          temp = dst_reg(this, glsl_type::bool_type);
 770          emit(OR(temp, op[0], op[1]));
 771          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 772          return;
 773
 774       case ir_binop_logic_and:
 775          temp = dst_reg(this, glsl_type::bool_type);
 776          emit(AND(temp, op[0], op[1]));
 777          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 778          return;
 779
 780       case ir_unop_f2b:
 781          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 782          return;
 783
 784       case ir_unop_i2b:
 785          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 786          return;
 787
 788       case ir_binop_greater:
 789       case ir_binop_gequal:
 790       case ir_binop_less:
 791       case ir_binop_lequal:
 792       case ir_binop_equal:
 793       case ir_binop_nequal:
 794          emit(IF(op[0], op[1],
 795                  brw_conditional_for_comparison(expr->operation)));
 796          return;
 797
 798       case ir_binop_all_equal:
 799          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 800          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 801          return;
 802
 803       case ir_binop_any_nequal:
 804          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 805          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 806          return;
 807
 808       case ir_unop_any:
 809          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 810          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 811          return;
 812
 813       default:
 814          assert(!"not reached");
 815          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 816          return;
 817       }
 818       return;
 819    }
 820
 821    ir->condition->accept(this);
 822
 823    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 824 }
 825
 826 void
 827 vec4_visitor::visit(ir_variable *ir)
 828 {
 829    dst_reg *reg = NULL;
 830
 831    if (variable_storage(ir))
 832       return;
 833
 834    switch (ir->mode) {
 835    case ir_var_in:
 836       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 837
 838       /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 839        * come in as floating point conversions of the integer values.
 840        */
 841       for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
 842          if (!c->key.gl_fixed_input_size[i])
 843             continue;
 844
 845          dst_reg dst = *reg;
 846          dst.type = brw_type_for_base_type(ir->type);
 847          dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
 848          emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 849       }
 850       break;
 851
 852    case ir_var_out:
 853       reg = new(mem_ctx) dst_reg(this, ir->type);
 854
 855       for (int i = 0; i < type_size(ir->type); i++) {
 856          output_reg[ir->location + i] = *reg;
 857          output_reg[ir->location + i].reg_offset = i;
 858          output_reg[ir->location + i].type =
 859             brw_type_for_base_type(ir->type->get_scalar_type());
 860          output_reg_annotation[ir->location + i] = ir->name;
 861       }
 862       break;
 863
 864    case ir_var_auto:
 865    case ir_var_temporary:
 866       reg = new(mem_ctx) dst_reg(this, ir->type);
 867       break;
 868
 869    case ir_var_uniform:
 870       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 871
 872       /* Track how big the whole uniform variable is, in case we need to put a
 873        * copy of its data into pull constants for array access.
 874        */
 875       this->uniform_size[this->uniforms] = type_size(ir->type);
 876
 877       if (!strncmp(ir->name, "gl_", 3)) {
 878          setup_builtin_uniform_values(ir);
 879       } else {
 880          setup_uniform_values(ir->location, ir->type);
 881       }
 882       break;
 883
 884    case ir_var_system_value:
 885       /* VertexID is stored by the VF as the last vertex element, but
 886        * we don't represent it with a flag in inputs_read, so we call
 887        * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
 888        */
 889       reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
 890       prog_data->uses_vertexid = true;
 891
 892       switch (ir->location) {
 893       case SYSTEM_VALUE_VERTEX_ID:
 894          reg->writemask = WRITEMASK_X;
 895          break;
 896       case SYSTEM_VALUE_INSTANCE_ID:
 897          reg->writemask = WRITEMASK_Y;
 898          break;
 899       default:
 900          assert(!"not reached");
 901          break;
 902       }
 903       break;
 904
 905    default:
 906       assert(!"not reached");
 907    }
 908
 909    reg->type = brw_type_for_base_type(ir->type);
 910    hash_table_insert(this->variable_ht, reg, ir);
 911 }
 912
 913 void
 914 vec4_visitor::visit(ir_loop *ir)
 915 {
 916    dst_reg counter;
 917
 918    /* We don't want debugging output to print the whole body of the
 919     * loop as the annotation.
 920     */
 921    this->base_ir = NULL;
 922
 923    if (ir->counter != NULL) {
 924       this->base_ir = ir->counter;
 925       ir->counter->accept(this);
 926       counter = *(variable_storage(ir->counter));
 927
 928       if (ir->from != NULL) {
 929          this->base_ir = ir->from;
 930          ir->from->accept(this);
 931
 932          emit(MOV(counter, this->result));
 933       }
 934    }
 935
 936    emit(BRW_OPCODE_DO);
 937
 938    if (ir->to) {
 939       this->base_ir = ir->to;
 940       ir->to->accept(this);
 941
 942       emit(CMP(dst_null_d(), src_reg(counter), this->result,
 943                brw_conditional_for_comparison(ir->cmp)));
 944
 945       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
 946       inst->predicate = BRW_PREDICATE_NORMAL;
 947    }
 948
 949    visit_instructions(&ir->body_instructions);
 950
 951
 952    if (ir->increment) {
 953       this->base_ir = ir->increment;
 954       ir->increment->accept(this);
 955       emit(ADD(counter, src_reg(counter), this->result));
 956    }
 957
 958    emit(BRW_OPCODE_WHILE);
 959 }
 960
 961 void
 962 vec4_visitor::visit(ir_loop_jump *ir)
 963 {
 964    switch (ir->mode) {
 965    case ir_loop_jump::jump_break:
 966       emit(BRW_OPCODE_BREAK);
 967       break;
 968    case ir_loop_jump::jump_continue:
 969       emit(BRW_OPCODE_CONTINUE);
 970       break;
 971    }
 972 }
 973
 974
 975 void
 976 vec4_visitor::visit(ir_function_signature *ir)
 977 {
 978    assert(0);
 979    (void)ir;
 980 }
 981
 982 void
 983 vec4_visitor::visit(ir_function *ir)
 984 {
 985    /* Ignore function bodies other than main() -- we shouldn't see calls to
 986     * them since they should all be inlined.
 987     */
 988    if (strcmp(ir->name, "main") == 0) {
 989       const ir_function_signature *sig;
 990       exec_list empty;
 991
 992       sig = ir->matching_signature(&empty);
 993
 994       assert(sig);
 995
 996       visit_instructions(&sig->body);
 997    }
 998 }
 999
1000 bool
1001 vec4_visitor::try_emit_sat(ir_expression *ir)
1002 {
1003    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1004    if (!sat_src)
1005       return false;
1006
1007    sat_src->accept(this);
1008    src_reg src = this->result;
1009
1010    this->result = src_reg(this, ir->type);
1011    vec4_instruction *inst;
1012    inst = emit(MOV(dst_reg(this->result), src));
1013    inst->saturate = true;
1014
1015    return true;
1016 }
1017
1018 void
1019 vec4_visitor::emit_bool_comparison(unsigned int op,
1020                                  dst_reg dst, src_reg src0, src_reg src1)
1021 {
1022    /* original gen4 does destination conversion before comparison. */
1023    if (intel->gen < 5)
1024       dst.type = src0.type;
1025
1026    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1027
1028    dst.type = BRW_REGISTER_TYPE_D;
1029    emit(AND(dst, src_reg(dst), src_reg(0x1)));
1030 }
1031
1032 void
1033 vec4_visitor::visit(ir_expression *ir)
1034 {
1035    unsigned int operand;
1036    src_reg op[Elements(ir->operands)];
1037    src_reg result_src;
1038    dst_reg result_dst;
1039    vec4_instruction *inst;
1040
1041    if (try_emit_sat(ir))
1042       return;
1043
1044    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1045       this->result.file = BAD_FILE;
1046       ir->operands[operand]->accept(this);
1047       if (this->result.file == BAD_FILE) {
1048          printf("Failed to get tree for expression operand:\n");
1049          ir->operands[operand]->print();
1050          exit(1);
1051       }
1052       op[operand] = this->result;
1053
1054       /* Matrix expression operands should have been broken down to vector
1055        * operations already.
1056        */
1057       assert(!ir->operands[operand]->type->is_matrix());
1058    }
1059
1060    int vector_elements = ir->operands[0]->type->vector_elements;
1061    if (ir->operands[1]) {
1062       vector_elements = MAX2(vector_elements,
1063                              ir->operands[1]->type->vector_elements);
1064    }
1065
1066    this->result.file = BAD_FILE;
1067
1068    /* Storage for our result.  Ideally for an assignment we'd be using
1069     * the actual storage for the result here, instead.
1070     */
1071    result_src = src_reg(this, ir->type);
1072    /* convenience for the emit functions below. */
1073    result_dst = dst_reg(result_src);
1074    /* If nothing special happens, this is the result. */
1075    this->result = result_src;
1076    /* Limit writes to the channels that will be used by result_src later.
1077     * This does limit this temp's use as a temporary for multi-instruction
1078     * sequences.
1079     */
1080    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1081
1082    switch (ir->operation) {
1083    case ir_unop_logic_not:
1084       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1085        * ones complement of the whole register, not just bit 0.
1086        */
1087       emit(XOR(result_dst, op[0], src_reg(1)));
1088       break;
1089    case ir_unop_neg:
1090       op[0].negate = !op[0].negate;
1091       this->result = op[0];
1092       break;
1093    case ir_unop_abs:
1094       op[0].abs = true;
1095       op[0].negate = false;
1096       this->result = op[0];
1097       break;
1098
1099    case ir_unop_sign:
1100       emit(MOV(result_dst, src_reg(0.0f)));
1101
1102       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1103       inst = emit(MOV(result_dst, src_reg(1.0f)));
1104       inst->predicate = BRW_PREDICATE_NORMAL;
1105
1106       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1107       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1108       inst->predicate = BRW_PREDICATE_NORMAL;
1109
1110       break;
1111
1112    case ir_unop_rcp:
1113       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1114       break;
1115
1116    case ir_unop_exp2:
1117       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1118       break;
1119    case ir_unop_log2:
1120       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1121       break;
1122    case ir_unop_exp:
1123    case ir_unop_log:
1124       assert(!"not reached: should be handled by ir_explog_to_explog2");
1125       break;
1126    case ir_unop_sin:
1127    case ir_unop_sin_reduced:
1128       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1129       break;
1130    case ir_unop_cos:
1131    case ir_unop_cos_reduced:
1132       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1133       break;
1134
1135    case ir_unop_dFdx:
1136    case ir_unop_dFdy:
1137       assert(!"derivatives not valid in vertex shader");
1138       break;
1139
1140    case ir_unop_noise:
1141       assert(!"not reached: should be handled by lower_noise");
1142       break;
1143
1144    case ir_binop_add:
1145       emit(ADD(result_dst, op[0], op[1]));
1146       break;
1147    case ir_binop_sub:
1148       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1149       break;
1150
1151    case ir_binop_mul:
1152       if (ir->type->is_integer()) {
1153          /* For integer multiplication, the MUL uses the low 16 bits
1154           * of one of the operands (src0 on gen6, src1 on gen7).  The
1155           * MACH accumulates in the contribution of the upper 16 bits
1156           * of that operand.
1157           *
1158           * FINISHME: Emit just the MUL if we know an operand is small
1159           * enough.
1160           */
1161          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1162
1163          emit(MUL(acc, op[0], op[1]));
1164          emit(MACH(dst_null_d(), op[0], op[1]));
1165          emit(MOV(result_dst, src_reg(acc)));
1166       } else {
1167          emit(MUL(result_dst, op[0], op[1]));
1168       }
1169       break;
1170    case ir_binop_div:
1171       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1172       assert(ir->type->is_integer());
1173       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1174       break;
1175    case ir_binop_mod:
1176       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1177       assert(ir->type->is_integer());
1178       emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1179       break;
1180
1181    case ir_binop_less:
1182    case ir_binop_greater:
1183    case ir_binop_lequal:
1184    case ir_binop_gequal:
1185    case ir_binop_equal:
1186    case ir_binop_nequal: {
1187       emit(CMP(result_dst, op[0], op[1],
1188                brw_conditional_for_comparison(ir->operation)));
1189       emit(AND(result_dst, result_src, src_reg(0x1)));
1190       break;
1191    }
1192
1193    case ir_binop_all_equal:
1194       /* "==" operator producing a scalar boolean. */
1195       if (ir->operands[0]->type->is_vector() ||
1196           ir->operands[1]->type->is_vector()) {
1197          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1198          emit(MOV(result_dst, src_reg(0)));
1199          inst = emit(MOV(result_dst, src_reg(1)));
1200          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1201       } else {
1202          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1203          emit(AND(result_dst, result_src, src_reg(0x1)));
1204       }
1205       break;
1206    case ir_binop_any_nequal:
1207       /* "!=" operator producing a scalar boolean. */
1208       if (ir->operands[0]->type->is_vector() ||
1209           ir->operands[1]->type->is_vector()) {
1210          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1211
1212          emit(MOV(result_dst, src_reg(0)));
1213          inst = emit(MOV(result_dst, src_reg(1)));
1214          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1215       } else {
1216          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1217          emit(AND(result_dst, result_src, src_reg(0x1)));
1218       }
1219       break;
1220
1221    case ir_unop_any:
1222       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1223       emit(MOV(result_dst, src_reg(0)));
1224
1225       inst = emit(MOV(result_dst, src_reg(1)));
1226       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1227       break;
1228
1229    case ir_binop_logic_xor:
1230       emit(XOR(result_dst, op[0], op[1]));
1231       break;
1232
1233    case ir_binop_logic_or:
1234       emit(OR(result_dst, op[0], op[1]));
1235       break;
1236
1237    case ir_binop_logic_and:
1238       emit(AND(result_dst, op[0], op[1]));
1239       break;
1240
1241    case ir_binop_dot:
1242       assert(ir->operands[0]->type->is_vector());
1243       assert(ir->operands[0]->type == ir->operands[1]->type);
1244       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1245       break;
1246
1247    case ir_unop_sqrt:
1248       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1249       break;
1250    case ir_unop_rsq:
1251       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1252       break;
1253    case ir_unop_i2f:
1254    case ir_unop_i2u:
1255    case ir_unop_u2i:
1256    case ir_unop_u2f:
1257    case ir_unop_b2f:
1258    case ir_unop_b2i:
1259    case ir_unop_f2i:
1260       emit(MOV(result_dst, op[0]));
1261       break;
1262    case ir_unop_f2b:
1263    case ir_unop_i2b: {
1264       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1265       emit(AND(result_dst, result_src, src_reg(1)));
1266       break;
1267    }
1268
1269    case ir_unop_trunc:
1270       emit(RNDZ(result_dst, op[0]));
1271       break;
1272    case ir_unop_ceil:
1273       op[0].negate = !op[0].negate;
1274       inst = emit(RNDD(result_dst, op[0]));
1275       this->result.negate = true;
1276       break;
1277    case ir_unop_floor:
1278       inst = emit(RNDD(result_dst, op[0]));
1279       break;
1280    case ir_unop_fract:
1281       inst = emit(FRC(result_dst, op[0]));
1282       break;
1283    case ir_unop_round_even:
1284       emit(RNDE(result_dst, op[0]));
1285       break;
1286
1287    case ir_binop_min:
1288       emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1289
1290       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1291       inst->predicate = BRW_PREDICATE_NORMAL;
1292       break;
1293    case ir_binop_max:
1294       emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1295
1296       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1297       inst->predicate = BRW_PREDICATE_NORMAL;
1298       break;
1299
1300    case ir_binop_pow:
1301       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1302       break;
1303
1304    case ir_unop_bit_not:
1305       inst = emit(NOT(result_dst, op[0]));
1306       break;
1307    case ir_binop_bit_and:
1308       inst = emit(AND(result_dst, op[0], op[1]));
1309       break;
1310    case ir_binop_bit_xor:
1311       inst = emit(XOR(result_dst, op[0], op[1]));
1312       break;
1313    case ir_binop_bit_or:
1314       inst = emit(OR(result_dst, op[0], op[1]));
1315       break;
1316
1317    case ir_binop_lshift:
1318       inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
1319       break;
1320
1321    case ir_binop_rshift:
1322       if (ir->type->base_type == GLSL_TYPE_INT)
1323          inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
1324       else
1325          inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
1326       break;
1327
1328    case ir_quadop_vector:
1329       assert(!"not reached: should be handled by lower_quadop_vector");
1330       break;
1331    }
1332 }
1333
1334
1335 void
1336 vec4_visitor::visit(ir_swizzle *ir)
1337 {
1338    src_reg src;
1339    int i = 0;
1340    int swizzle[4];
1341
1342    /* Note that this is only swizzles in expressions, not those on the left
1343     * hand side of an assignment, which do write masking.  See ir_assignment
1344     * for that.
1345     */
1346
1347    ir->val->accept(this);
1348    src = this->result;
1349    assert(src.file != BAD_FILE);
1350
1351    for (i = 0; i < ir->type->vector_elements; i++) {
1352       switch (i) {
1353       case 0:
1354          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1355          break;
1356       case 1:
1357          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1358          break;
1359       case 2:
1360          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1361          break;
1362       case 3:
1363          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1364             break;
1365       }
1366    }
1367    for (; i < 4; i++) {
1368       /* Replicate the last channel out. */
1369       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1370    }
1371
1372    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1373
1374    this->result = src;
1375 }
1376
1377 void
1378 vec4_visitor::visit(ir_dereference_variable *ir)
1379 {
1380    const struct glsl_type *type = ir->type;
1381    dst_reg *reg = variable_storage(ir->var);
1382
1383    if (!reg) {
1384       fail("Failed to find variable storage for %s\n", ir->var->name);
1385       this->result = src_reg(brw_null_reg());
1386       return;
1387    }
1388
1389    this->result = src_reg(*reg);
1390
1391    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1392       this->result.swizzle = swizzle_for_size(type->vector_elements);
1393 }
1394
1395 void
1396 vec4_visitor::visit(ir_dereference_array *ir)
1397 {
1398    ir_constant *constant_index;
1399    src_reg src;
1400    int element_size = type_size(ir->type);
1401
1402    constant_index = ir->array_index->constant_expression_value();
1403
1404    ir->array->accept(this);
1405    src = this->result;
1406
1407    if (constant_index) {
1408       src.reg_offset += constant_index->value.i[0] * element_size;
1409    } else {
1410       /* Variable index array dereference.  It eats the "vec4" of the
1411        * base of the array and an index that offsets the Mesa register
1412        * index.
1413        */
1414       ir->array_index->accept(this);
1415
1416       src_reg index_reg;
1417
1418       if (element_size == 1) {
1419          index_reg = this->result;
1420       } else {
1421          index_reg = src_reg(this, glsl_type::int_type);
1422
1423          emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1424       }
1425
1426       if (src.reladdr) {
1427          src_reg temp = src_reg(this, glsl_type::int_type);
1428
1429          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1430
1431          index_reg = temp;
1432       }
1433
1434       src.reladdr = ralloc(mem_ctx, src_reg);
1435       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1436    }
1437
1438    /* If the type is smaller than a vec4, replicate the last channel out. */
1439    if (ir->type->is_scalar() || ir->type->is_vector())
1440       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1441    else
1442       src.swizzle = BRW_SWIZZLE_NOOP;
1443    src.type = brw_type_for_base_type(ir->type);
1444
1445    this->result = src;
1446 }
1447
1448 void
1449 vec4_visitor::visit(ir_dereference_record *ir)
1450 {
1451    unsigned int i;
1452    const glsl_type *struct_type = ir->record->type;
1453    int offset = 0;
1454
1455    ir->record->accept(this);
1456
1457    for (i = 0; i < struct_type->length; i++) {
1458       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1459          break;
1460       offset += type_size(struct_type->fields.structure[i].type);
1461    }
1462
1463    /* If the type is smaller than a vec4, replicate the last channel out. */
1464    if (ir->type->is_scalar() || ir->type->is_vector())
1465       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1466    else
1467       this->result.swizzle = BRW_SWIZZLE_NOOP;
1468    this->result.type = brw_type_for_base_type(ir->type);
1469
1470    this->result.reg_offset += offset;
1471 }
1472
1473 /**
1474  * We want to be careful in assignment setup to hit the actual storage
1475  * instead of potentially using a temporary like we might with the
1476  * ir_dereference handler.
1477  */
1478 static dst_reg
1479 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1480 {
1481    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1482     * access of a vector, it must be separated into a series conditional moves
1483     * before reaching this point (see ir_vec_index_to_cond_assign).
1484     */
1485    assert(ir->as_dereference());
1486    ir_dereference_array *deref_array = ir->as_dereference_array();
1487    if (deref_array) {
1488       assert(!deref_array->array->type->is_vector());
1489    }
1490
1491    /* Use the rvalue deref handler for the most part.  We'll ignore
1492     * swizzles in it and write swizzles using writemask, though.
1493     */
1494    ir->accept(v);
1495    return dst_reg(v->result);
1496 }
1497
1498 void
1499 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1500                               const struct glsl_type *type, uint32_t predicate)
1501 {
1502    if (type->base_type == GLSL_TYPE_STRUCT) {
1503       for (unsigned int i = 0; i < type->length; i++) {
1504          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1505       }
1506       return;
1507    }
1508
1509    if (type->is_array()) {
1510       for (unsigned int i = 0; i < type->length; i++) {
1511          emit_block_move(dst, src, type->fields.array, predicate);
1512       }
1513       return;
1514    }
1515
1516    if (type->is_matrix()) {
1517       const struct glsl_type *vec_type;
1518
1519       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1520                                          type->vector_elements, 1);
1521
1522       for (int i = 0; i < type->matrix_columns; i++) {
1523          emit_block_move(dst, src, vec_type, predicate);
1524       }
1525       return;
1526    }
1527
1528    assert(type->is_scalar() || type->is_vector());
1529
1530    dst->type = brw_type_for_base_type(type);
1531    src->type = dst->type;
1532
1533    dst->writemask = (1 << type->vector_elements) - 1;
1534
1535    /* Do we need to worry about swizzling a swizzle? */
1536    assert(src->swizzle == BRW_SWIZZLE_NOOP
1537           || src->swizzle == swizzle_for_size(type->vector_elements));
1538    src->swizzle = swizzle_for_size(type->vector_elements);
1539
1540    vec4_instruction *inst = emit(MOV(*dst, *src));
1541    inst->predicate = predicate;
1542
1543    dst->reg_offset++;
1544    src->reg_offset++;
1545 }
1546
1547
1548 /* If the RHS processing resulted in an instruction generating a
1549  * temporary value, and it would be easy to rewrite the instruction to
1550  * generate its result right into the LHS instead, do so.  This ends
1551  * up reliably removing instructions where it can be tricky to do so
1552  * later without real UD chain information.
1553  */
1554 bool
1555 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1556                                      dst_reg dst,
1557                                      src_reg src,
1558                                      vec4_instruction *pre_rhs_inst,
1559                                      vec4_instruction *last_rhs_inst)
1560 {
1561    /* This could be supported, but it would take more smarts. */
1562    if (ir->condition)
1563       return false;
1564
1565    if (pre_rhs_inst == last_rhs_inst)
1566       return false; /* No instructions generated to work with. */
1567
1568    /* Make sure the last instruction generated our source reg. */
1569    if (src.file != GRF ||
1570        src.file != last_rhs_inst->dst.file ||
1571        src.reg != last_rhs_inst->dst.reg ||
1572        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1573        src.reladdr ||
1574        src.abs ||
1575        src.negate ||
1576        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1577       return false;
1578
1579    /* Check that that last instruction fully initialized the channels
1580     * we want to use, in the order we want to use them.  We could
1581     * potentially reswizzle the operands of many instructions so that
1582     * we could handle out of order channels, but don't yet.
1583     */
1584
1585    for (unsigned i = 0; i < 4; i++) {
1586       if (dst.writemask & (1 << i)) {
1587          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1588             return false;
1589
1590          if (BRW_GET_SWZ(src.swizzle, i) != i)
1591             return false;
1592       }
1593    }
1594
1595    /* Success!  Rewrite the instruction. */
1596    last_rhs_inst->dst.file = dst.file;
1597    last_rhs_inst->dst.reg = dst.reg;
1598    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1599    last_rhs_inst->dst.reladdr = dst.reladdr;
1600    last_rhs_inst->dst.writemask &= dst.writemask;
1601
1602    return true;
1603 }
1604
1605 void
1606 vec4_visitor::visit(ir_assignment *ir)
1607 {
1608    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1609    uint32_t predicate = BRW_PREDICATE_NONE;
1610
1611    if (!ir->lhs->type->is_scalar() &&
1612        !ir->lhs->type->is_vector()) {
1613       ir->rhs->accept(this);
1614       src_reg src = this->result;
1615
1616       if (ir->condition) {
1617          emit_bool_to_cond_code(ir->condition, &predicate);
1618       }
1619
1620       emit_block_move(&dst, &src, ir->rhs->type, predicate);
1621       return;
1622    }
1623
1624    /* Now we're down to just a scalar/vector with writemasks. */
1625    int i;
1626
1627    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1628    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1629
1630    ir->rhs->accept(this);
1631
1632    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1633
1634    src_reg src = this->result;
1635
1636    int swizzles[4];
1637    int first_enabled_chan = 0;
1638    int src_chan = 0;
1639
1640    assert(ir->lhs->type->is_vector() ||
1641           ir->lhs->type->is_scalar());
1642    dst.writemask = ir->write_mask;
1643
1644    for (int i = 0; i < 4; i++) {
1645       if (dst.writemask & (1 << i)) {
1646          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1647          break;
1648       }
1649    }
1650
1651    /* Swizzle a small RHS vector into the channels being written.
1652     *
1653     * glsl ir treats write_mask as dictating how many channels are
1654     * present on the RHS while in our instructions we need to make
1655     * those channels appear in the slots of the vec4 they're written to.
1656     */
1657    for (int i = 0; i < 4; i++) {
1658       if (dst.writemask & (1 << i))
1659          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1660       else
1661          swizzles[i] = first_enabled_chan;
1662    }
1663    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1664                               swizzles[2], swizzles[3]);
1665
1666    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1667       return;
1668    }
1669
1670    if (ir->condition) {
1671       emit_bool_to_cond_code(ir->condition, &predicate);
1672    }
1673
1674    for (i = 0; i < type_size(ir->lhs->type); i++) {
1675       vec4_instruction *inst = emit(MOV(dst, src));
1676       inst->predicate = predicate;
1677
1678       dst.reg_offset++;
1679       src.reg_offset++;
1680    }
1681 }
1682
1683 void
1684 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1685 {
1686    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1687       foreach_list(node, &ir->components) {
1688          ir_constant *field_value = (ir_constant *)node;
1689
1690          emit_constant_values(dst, field_value);
1691       }
1692       return;
1693    }
1694
1695    if (ir->type->is_array()) {
1696       for (unsigned int i = 0; i < ir->type->length; i++) {
1697          emit_constant_values(dst, ir->array_elements[i]);
1698       }
1699       return;
1700    }
1701
1702    if (ir->type->is_matrix()) {
1703       for (int i = 0; i < ir->type->matrix_columns; i++) {
1704          float *vec = &ir->value.f[i * ir->type->vector_elements];
1705
1706          for (int j = 0; j < ir->type->vector_elements; j++) {
1707             dst->writemask = 1 << j;
1708             dst->type = BRW_REGISTER_TYPE_F;
1709
1710             emit(MOV(*dst, src_reg(vec[j])));
1711          }
1712          dst->reg_offset++;
1713       }
1714       return;
1715    }
1716
1717    int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1718
1719    for (int i = 0; i < ir->type->vector_elements; i++) {
1720       if (!(remaining_writemask & (1 << i)))
1721          continue;
1722
1723       dst->writemask = 1 << i;
1724       dst->type = brw_type_for_base_type(ir->type);
1725
1726       /* Find other components that match the one we're about to
1727        * write.  Emits fewer instructions for things like vec4(0.5,
1728        * 1.5, 1.5, 1.5).
1729        */
1730       for (int j = i + 1; j < ir->type->vector_elements; j++) {
1731          if (ir->type->base_type == GLSL_TYPE_BOOL) {
1732             if (ir->value.b[i] == ir->value.b[j])
1733                dst->writemask |= (1 << j);
1734          } else {
1735             /* u, i, and f storage all line up, so no need for a
1736              * switch case for comparing each type.
1737              */
1738             if (ir->value.u[i] == ir->value.u[j])
1739                dst->writemask |= (1 << j);
1740          }
1741       }
1742
1743       switch (ir->type->base_type) {
1744       case GLSL_TYPE_FLOAT:
1745          emit(MOV(*dst, src_reg(ir->value.f[i])));
1746          break;
1747       case GLSL_TYPE_INT:
1748          emit(MOV(*dst, src_reg(ir->value.i[i])));
1749          break;
1750       case GLSL_TYPE_UINT:
1751          emit(MOV(*dst, src_reg(ir->value.u[i])));
1752          break;
1753       case GLSL_TYPE_BOOL:
1754          emit(MOV(*dst, src_reg(ir->value.b[i])));
1755          break;
1756       default:
1757          assert(!"Non-float/uint/int/bool constant");
1758          break;
1759       }
1760
1761       remaining_writemask &= ~dst->writemask;
1762    }
1763    dst->reg_offset++;
1764 }
1765
1766 void
1767 vec4_visitor::visit(ir_constant *ir)
1768 {
1769    dst_reg dst = dst_reg(this, ir->type);
1770    this->result = src_reg(dst);
1771
1772    emit_constant_values(&dst, ir);
1773 }
1774
1775 void
1776 vec4_visitor::visit(ir_call *ir)
1777 {
1778    assert(!"not reached");
1779 }
1780
1781 void
1782 vec4_visitor::visit(ir_texture *ir)
1783 {
1784    int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1785    sampler = vp->Base.SamplerUnits[sampler];
1786
1787    /* Should be lowered by do_lower_texture_projection */
1788    assert(!ir->projector);
1789
1790    vec4_instruction *inst = NULL;
1791    switch (ir->op) {
1792    case ir_tex:
1793    case ir_txl:
1794       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1795       break;
1796    case ir_txd:
1797       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1798       break;
1799    case ir_txf:
1800       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1801       break;
1802    case ir_txs:
1803       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1804       break;
1805    case ir_txb:
1806       assert(!"TXB is not valid for vertex shaders.");
1807    }
1808
1809    /* Texel offsets go in the message header; Gen4 also requires headers. */
1810    inst->header_present = ir->offset || intel->gen < 5;
1811    inst->base_mrf = 2;
1812    inst->mlen = inst->header_present + 1; /* always at least one */
1813    inst->sampler = sampler;
1814    inst->dst = dst_reg(this, ir->type);
1815    inst->shadow_compare = ir->shadow_comparitor != NULL;
1816
1817    if (ir->offset != NULL)
1818       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1819
1820    /* MRF for the first parameter */
1821    int param_base = inst->base_mrf + inst->header_present;
1822
1823    if (ir->op == ir_txs) {
1824       ir->lod_info.lod->accept(this);
1825       int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1826       emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1827            this->result));
1828    } else {
1829       int i, coord_mask = 0, zero_mask = 0;
1830       /* Load the coordinate */
1831       /* FINISHME: gl_clamp_mask and saturate */
1832       for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1833          coord_mask |= (1 << i);
1834       for (; i < 4; i++)
1835          zero_mask |= (1 << i);
1836
1837       ir->coordinate->accept(this);
1838       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1839                this->result));
1840       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1841                src_reg(0)));
1842       /* Load the shadow comparitor */
1843       if (ir->shadow_comparitor) {
1844          ir->shadow_comparitor->accept(this);
1845          emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1846                           WRITEMASK_X),
1847                   this->result));
1848          inst->mlen++;
1849       }
1850
1851       /* Load the LOD info */
1852       if (ir->op == ir_txl) {
1853          int mrf, writemask;
1854          if (intel->gen >= 5) {
1855             mrf = param_base + 1;
1856             if (ir->shadow_comparitor) {
1857                writemask = WRITEMASK_Y;
1858                /* mlen already incremented */
1859             } else {
1860                writemask = WRITEMASK_X;
1861                inst->mlen++;
1862             }
1863          } else /* intel->gen == 4 */ {
1864             mrf = param_base;
1865             writemask = WRITEMASK_Z;
1866          }
1867          ir->lod_info.lod->accept(this);
1868          emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask),
1869                   this->result));
1870       } else if (ir->op == ir_txf) {
1871          ir->lod_info.lod->accept(this);
1872          emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1873                   this->result));
1874       } else if (ir->op == ir_txd) {
1875          const glsl_type *type = ir->lod_info.grad.dPdx->type;
1876
1877          ir->lod_info.grad.dPdx->accept(this);
1878          src_reg dPdx = this->result;
1879          ir->lod_info.grad.dPdy->accept(this);
1880          src_reg dPdy = this->result;
1881
1882          if (intel->gen >= 5) {
1883             dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1884             dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1885             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1886             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1887             inst->mlen++;
1888
1889             if (ir->type->vector_elements == 3) {
1890                dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
1891                dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
1892                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
1893                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
1894                inst->mlen++;
1895             }
1896          } else /* intel->gen == 4 */ {
1897             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
1898             emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
1899             inst->mlen += 2;
1900          }
1901       }
1902    }
1903
1904    emit(inst);
1905
1906    swizzle_result(ir, src_reg(inst->dst), sampler);
1907 }
1908
1909 void
1910 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
1911 {
1912    this->result = orig_val;
1913
1914    int s = c->key.tex.swizzles[sampler];
1915
1916    if (ir->op == ir_txs || ir->type == glsl_type::float_type
1917                         || s == SWIZZLE_NOOP)
1918       return;
1919
1920    int zero_mask = 0, one_mask = 0, copy_mask = 0;
1921    int swizzle[4];
1922
1923    for (int i = 0; i < 4; i++) {
1924       switch (GET_SWZ(s, i)) {
1925       case SWIZZLE_ZERO:
1926          zero_mask |= (1 << i);
1927          break;
1928       case SWIZZLE_ONE:
1929          one_mask |= (1 << i);
1930          break;
1931       default:
1932          copy_mask |= (1 << i);
1933          swizzle[i] = GET_SWZ(s, i);
1934          break;
1935       }
1936    }
1937
1938    this->result = src_reg(this, ir->type);
1939    dst_reg swizzled_result(this->result);
1940
1941    if (copy_mask) {
1942       orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1943       swizzled_result.writemask = copy_mask;
1944       emit(MOV(swizzled_result, orig_val));
1945    }
1946
1947    if (zero_mask) {
1948       swizzled_result.writemask = zero_mask;
1949       emit(MOV(swizzled_result, src_reg(0.0f)));
1950    }
1951
1952    if (one_mask) {
1953       swizzled_result.writemask = one_mask;
1954       emit(MOV(swizzled_result, src_reg(1.0f)));
1955    }
1956 }
1957
1958 void
1959 vec4_visitor::visit(ir_return *ir)
1960 {
1961    assert(!"not reached");
1962 }
1963
1964 void
1965 vec4_visitor::visit(ir_discard *ir)
1966 {
1967    assert(!"not reached");
1968 }
1969
1970 void
1971 vec4_visitor::visit(ir_if *ir)
1972 {
1973    /* Don't point the annotation at the if statement, because then it plus
1974     * the then and else blocks get printed.
1975     */
1976    this->base_ir = ir->condition;
1977
1978    if (intel->gen == 6) {
1979       emit_if_gen6(ir);
1980    } else {
1981       uint32_t predicate;
1982       emit_bool_to_cond_code(ir->condition, &predicate);
1983       emit(IF(predicate));
1984    }
1985
1986    visit_instructions(&ir->then_instructions);
1987
1988    if (!ir->else_instructions.is_empty()) {
1989       this->base_ir = ir->condition;
1990       emit(BRW_OPCODE_ELSE);
1991
1992       visit_instructions(&ir->else_instructions);
1993    }
1994
1995    this->base_ir = ir->condition;
1996    emit(BRW_OPCODE_ENDIF);
1997 }
1998
1999 void
2000 vec4_visitor::emit_ndc_computation()
2001 {
2002    /* Get the position */
2003    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2004
2005    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2006    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2007    output_reg[BRW_VERT_RESULT_NDC] = ndc;
2008
2009    current_annotation = "NDC";
2010    dst_reg ndc_w = ndc;
2011    ndc_w.writemask = WRITEMASK_W;
2012    src_reg pos_w = pos;
2013    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2014    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2015
2016    dst_reg ndc_xyz = ndc;
2017    ndc_xyz.writemask = WRITEMASK_XYZ;
2018
2019    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2020 }
2021
2022 void
2023 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2024 {
2025    if (intel->gen < 6 &&
2026        ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2027         c->key.userclip_active || brw->has_negative_rhw_bug)) {
2028       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2029       dst_reg header1_w = header1;
2030       header1_w.writemask = WRITEMASK_W;
2031       GLuint i;
2032
2033       emit(MOV(header1, 0u));
2034
2035       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2036          src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2037
2038          current_annotation = "Point size";
2039          emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2040          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2041       }
2042
2043       current_annotation = "Clipping flags";
2044       for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2045          vec4_instruction *inst;
2046
2047          inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2048                          src_reg(this->userplane[i])));
2049          inst->conditional_mod = BRW_CONDITIONAL_L;
2050
2051          inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2052          inst->predicate = BRW_PREDICATE_NORMAL;
2053       }
2054
2055       /* i965 clipping workaround:
2056        * 1) Test for -ve rhw
2057        * 2) If set,
2058        *      set ndc = (0,0,0,0)
2059        *      set ucp[6] = 1
2060        *
2061        * Later, clipping will detect ucp[6] and ensure the primitive is
2062        * clipped against all fixed planes.
2063        */
2064       if (brw->has_negative_rhw_bug) {
2065 #if 0
2066          /* FINISHME */
2067          brw_CMP(p,
2068                  vec8(brw_null_reg()),
2069                  BRW_CONDITIONAL_L,
2070                  brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2071                  brw_imm_f(0));
2072
2073          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2074          brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2075          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2076 #endif
2077       }
2078
2079       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2080    } else if (intel->gen < 6) {
2081       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2082    } else {
2083       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2084       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2085          emit(MOV(brw_writemask(reg, WRITEMASK_W),
2086                   src_reg(output_reg[VERT_RESULT_PSIZ])));
2087       }
2088    }
2089 }
2090
2091 void
2092 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2093 {
2094    if (intel->gen < 6) {
2095       /* Clip distance slots are set aside in gen5, but they are not used.  It
2096        * is not clear whether we actually need to set aside space for them,
2097        * but the performance cost is negligible.
2098        */
2099       return;
2100    }
2101
2102    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2103     *
2104     *     "If a linked set of shaders forming the vertex stage contains no
2105     *     static write to gl_ClipVertex or gl_ClipDistance, but the
2106     *     application has requested clipping against user clip planes through
2107     *     the API, then the coordinate written to gl_Position is used for
2108     *     comparison against the user clip planes."
2109     *
2110     * This function is only called if the shader didn't write to
2111     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
2112     * if the user wrote to it; otherwise we use gl_Position.
2113     */
2114    gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2115    if (!(c->prog_data.outputs_written
2116          & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2117       clip_vertex = VERT_RESULT_HPOS;
2118    }
2119
2120    for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2121         ++i) {
2122       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2123                src_reg(output_reg[clip_vertex]),
2124                src_reg(this->userplane[i + offset])));
2125    }
2126 }
2127
2128 void
2129 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2130 {
2131    assert (vert_result < VERT_RESULT_MAX);
2132    reg.type = output_reg[vert_result].type;
2133    current_annotation = output_reg_annotation[vert_result];
2134    /* Copy the register, saturating if necessary */
2135    vec4_instruction *inst = emit(MOV(reg,
2136                                      src_reg(output_reg[vert_result])));
2137    if ((vert_result == VERT_RESULT_COL0 ||
2138         vert_result == VERT_RESULT_COL1 ||
2139         vert_result == VERT_RESULT_BFC0 ||
2140         vert_result == VERT_RESULT_BFC1) &&
2141        c->key.clamp_vertex_color) {
2142       inst->saturate = true;
2143    }
2144 }
2145
2146 void
2147 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2148 {
2149    struct brw_reg hw_reg = brw_message_reg(mrf);
2150    dst_reg reg = dst_reg(MRF, mrf);
2151    reg.type = BRW_REGISTER_TYPE_F;
2152
2153    switch (vert_result) {
2154    case VERT_RESULT_PSIZ:
2155       /* PSIZ is always in slot 0, and is coupled with other flags. */
2156       current_annotation = "indices, point width, clip flags";
2157       emit_psiz_and_flags(hw_reg);
2158       break;
2159    case BRW_VERT_RESULT_NDC:
2160       current_annotation = "NDC";
2161       emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2162       break;
2163    case BRW_VERT_RESULT_HPOS_DUPLICATE:
2164    case VERT_RESULT_HPOS:
2165       current_annotation = "gl_Position";
2166       emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2167       break;
2168    case VERT_RESULT_CLIP_DIST0:
2169    case VERT_RESULT_CLIP_DIST1:
2170       if (this->c->key.uses_clip_distance) {
2171          emit_generic_urb_slot(reg, vert_result);
2172       } else {
2173          current_annotation = "user clip distances";
2174          emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2175       }
2176       break;
2177    case BRW_VERT_RESULT_PAD:
2178       /* No need to write to this slot */
2179       break;
2180    default:
2181       emit_generic_urb_slot(reg, vert_result);
2182       break;
2183    }
2184 }
2185
2186 static int
2187 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2188 {
2189    struct intel_context *intel = &brw->intel;
2190
2191    if (intel->gen >= 6) {
2192       /* URB data written (does not include the message header reg) must
2193        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
2194        * section 5.4.3.2.2: URB_INTERLEAVED.
2195        *
2196        * URB entries are allocated on a multiple of 1024 bits, so an
2197        * extra 128 bits written here to make the end align to 256 is
2198        * no problem.
2199        */
2200       if ((mlen % 2) != 1)
2201          mlen++;
2202    }
2203
2204    return mlen;
2205 }
2206
2207 /**
2208  * Generates the VUE payload plus the 1 or 2 URB write instructions to
2209  * complete the VS thread.
2210  *
2211  * The VUE layout is documented in Volume 2a.
2212  */
2213 void
2214 vec4_visitor::emit_urb_writes()
2215 {
2216    /* MRF 0 is reserved for the debugger, so start with message header
2217     * in MRF 1.
2218     */
2219    int base_mrf = 1;
2220    int mrf = base_mrf;
2221    /* In the process of generating our URB write message contents, we
2222     * may need to unspill a register or load from an array.  Those
2223     * reads would use MRFs 14-15.
2224     */
2225    int max_usable_mrf = 13;
2226
2227    /* The following assertion verifies that max_usable_mrf causes an
2228     * even-numbered amount of URB write data, which will meet gen6's
2229     * requirements for length alignment.
2230     */
2231    assert ((max_usable_mrf - base_mrf) % 2 == 0);
2232
2233    /* FINISHME: edgeflag */
2234
2235    brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
2236                        c->prog_data.outputs_written);
2237
2238    /* First mrf is the g0-based message header containing URB handles and such,
2239     * which is implied in VS_OPCODE_URB_WRITE.
2240     */
2241    mrf++;
2242
2243    if (intel->gen < 6) {
2244       emit_ndc_computation();
2245    }
2246
2247    /* Set up the VUE data for the first URB write */
2248    int slot;
2249    for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
2250       emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
2251
2252       /* If this was max_usable_mrf, we can't fit anything more into this URB
2253        * WRITE.
2254        */
2255       if (mrf > max_usable_mrf) {
2256          slot++;
2257          break;
2258       }
2259    }
2260
2261    current_annotation = "URB write";
2262    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2263    inst->base_mrf = base_mrf;
2264    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2265    inst->eot = (slot >= c->vue_map.num_slots);
2266
2267    /* Optional second URB write */
2268    if (!inst->eot) {
2269       mrf = base_mrf + 1;
2270
2271       for (; slot < c->vue_map.num_slots; ++slot) {
2272          assert(mrf < max_usable_mrf);
2273
2274          emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
2275       }
2276
2277       current_annotation = "URB write";
2278       inst = emit(VS_OPCODE_URB_WRITE);
2279       inst->base_mrf = base_mrf;
2280       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2281       inst->eot = true;
2282       /* URB destination offset.  In the previous write, we got MRFs
2283        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
2284        * URB row increments, and each of our MRFs is half of one of
2285        * those, since we're doing interleaved writes.
2286        */
2287       inst->offset = (max_usable_mrf - base_mrf) / 2;
2288    }
2289
2290    if (intel->gen == 6)
2291       c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 8) / 8;
2292    else
2293       c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 4) / 4;
2294 }
2295
2296 src_reg
2297 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2298                                  src_reg *reladdr, int reg_offset)
2299 {
2300    /* Because we store the values to scratch interleaved like our
2301     * vertex data, we need to scale the vec4 index by 2.
2302     */
2303    int message_header_scale = 2;
2304
2305    /* Pre-gen6, the message header uses byte offsets instead of vec4
2306     * (16-byte) offset units.
2307     */
2308    if (intel->gen < 6)
2309       message_header_scale *= 16;
2310
2311    if (reladdr) {
2312       src_reg index = src_reg(this, glsl_type::int_type);
2313
2314       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2315       emit_before(inst, MUL(dst_reg(index),
2316                             index, src_reg(message_header_scale)));
2317
2318       return index;
2319    } else {
2320       return src_reg(reg_offset * message_header_scale);
2321    }
2322 }
2323
2324 src_reg
2325 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2326                                        src_reg *reladdr, int reg_offset)
2327 {
2328    if (reladdr) {
2329       src_reg index = src_reg(this, glsl_type::int_type);
2330
2331       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2332
2333       /* Pre-gen6, the message header uses byte offsets instead of vec4
2334        * (16-byte) offset units.
2335        */
2336       if (intel->gen < 6) {
2337          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2338       }
2339
2340       return index;
2341    } else {
2342       int message_header_scale = intel->gen < 6 ? 16 : 1;
2343       return src_reg(reg_offset * message_header_scale);
2344    }
2345 }
2346
2347 /**
2348  * Emits an instruction before @inst to load the value named by @orig_src
2349  * from scratch space at @base_offset to @temp.
2350  */
2351 void
2352 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2353                                 dst_reg temp, src_reg orig_src,
2354                                 int base_offset)
2355 {
2356    int reg_offset = base_offset + orig_src.reg_offset;
2357    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2358
2359    emit_before(inst, SCRATCH_READ(temp, index));
2360 }
2361
2362 /**
2363  * Emits an instruction after @inst to store the value to be written
2364  * to @orig_dst to scratch space at @base_offset, from @temp.
2365  */
2366 void
2367 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2368                                  src_reg temp, dst_reg orig_dst,
2369                                  int base_offset)
2370 {
2371    int reg_offset = base_offset + orig_dst.reg_offset;
2372    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2373
2374    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2375                                        orig_dst.writemask));
2376    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2377    write->predicate = inst->predicate;
2378    write->ir = inst->ir;
2379    write->annotation = inst->annotation;
2380    inst->insert_after(write);
2381 }
2382
2383 /**
2384  * We can't generally support array access in GRF space, because a
2385  * single instruction's destination can only span 2 contiguous
2386  * registers.  So, we send all GRF arrays that get variable index
2387  * access to scratch space.
2388  */
2389 void
2390 vec4_visitor::move_grf_array_access_to_scratch()
2391 {
2392    int scratch_loc[this->virtual_grf_count];
2393
2394    for (int i = 0; i < this->virtual_grf_count; i++) {
2395       scratch_loc[i] = -1;
2396    }
2397
2398    /* First, calculate the set of virtual GRFs that need to be punted
2399     * to scratch due to having any array access on them, and where in
2400     * scratch.
2401     */
2402    foreach_list(node, &this->instructions) {
2403       vec4_instruction *inst = (vec4_instruction *)node;
2404
2405       if (inst->dst.file == GRF && inst->dst.reladdr &&
2406           scratch_loc[inst->dst.reg] == -1) {
2407          scratch_loc[inst->dst.reg] = c->last_scratch;
2408          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2409       }
2410
2411       for (int i = 0 ; i < 3; i++) {
2412          src_reg *src = &inst->src[i];
2413
2414          if (src->file == GRF && src->reladdr &&
2415              scratch_loc[src->reg] == -1) {
2416             scratch_loc[src->reg] = c->last_scratch;
2417             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2418          }
2419       }
2420    }
2421
2422    /* Now, for anything that will be accessed through scratch, rewrite
2423     * it to load/store.  Note that this is a _safe list walk, because
2424     * we may generate a new scratch_write instruction after the one
2425     * we're processing.
2426     */
2427    foreach_list_safe(node, &this->instructions) {
2428       vec4_instruction *inst = (vec4_instruction *)node;
2429
2430       /* Set up the annotation tracking for new generated instructions. */
2431       base_ir = inst->ir;
2432       current_annotation = inst->annotation;
2433
2434       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2435          src_reg temp = src_reg(this, glsl_type::vec4_type);
2436
2437          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2438
2439          inst->dst.file = temp.file;
2440          inst->dst.reg = temp.reg;
2441          inst->dst.reg_offset = temp.reg_offset;
2442          inst->dst.reladdr = NULL;
2443       }
2444
2445       for (int i = 0 ; i < 3; i++) {
2446          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2447             continue;
2448
2449          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2450
2451          emit_scratch_read(inst, temp, inst->src[i],
2452                            scratch_loc[inst->src[i].reg]);
2453
2454          inst->src[i].file = temp.file;
2455          inst->src[i].reg = temp.reg;
2456          inst->src[i].reg_offset = temp.reg_offset;
2457          inst->src[i].reladdr = NULL;
2458       }
2459    }
2460 }
2461
2462 /**
2463  * Emits an instruction before @inst to load the value named by @orig_src
2464  * from the pull constant buffer (surface) at @base_offset to @temp.
2465  */
2466 void
2467 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2468                                       dst_reg temp, src_reg orig_src,
2469                                       int base_offset)
2470 {
2471    int reg_offset = base_offset + orig_src.reg_offset;
2472    src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2473    vec4_instruction *load;
2474
2475    load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2476                                         temp, index);
2477    load->base_mrf = 14;
2478    load->mlen = 1;
2479    emit_before(inst, load);
2480 }
2481
2482 /**
2483  * Implements array access of uniforms by inserting a
2484  * PULL_CONSTANT_LOAD instruction.
2485  *
2486  * Unlike temporary GRF array access (where we don't support it due to
2487  * the difficulty of doing relative addressing on instruction
2488  * destinations), we could potentially do array access of uniforms
2489  * that were loaded in GRF space as push constants.  In real-world
2490  * usage we've seen, though, the arrays being used are always larger
2491  * than we could load as push constants, so just always move all
2492  * uniform array access out to a pull constant buffer.
2493  */
2494 void
2495 vec4_visitor::move_uniform_array_access_to_pull_constants()
2496 {
2497    int pull_constant_loc[this->uniforms];
2498
2499    for (int i = 0; i < this->uniforms; i++) {
2500       pull_constant_loc[i] = -1;
2501    }
2502
2503    /* Walk through and find array access of uniforms.  Put a copy of that
2504     * uniform in the pull constant buffer.
2505     *
2506     * Note that we don't move constant-indexed accesses to arrays.  No
2507     * testing has been done of the performance impact of this choice.
2508     */
2509    foreach_list_safe(node, &this->instructions) {
2510       vec4_instruction *inst = (vec4_instruction *)node;
2511
2512       for (int i = 0 ; i < 3; i++) {
2513          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2514             continue;
2515
2516          int uniform = inst->src[i].reg;
2517
2518          /* If this array isn't already present in the pull constant buffer,
2519           * add it.
2520           */
2521          if (pull_constant_loc[uniform] == -1) {
2522             const float **values = &prog_data->param[uniform * 4];
2523
2524             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2525
2526             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2527                prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2528             }
2529          }
2530
2531          /* Set up the annotation tracking for new generated instructions. */
2532          base_ir = inst->ir;
2533          current_annotation = inst->annotation;
2534
2535          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2536
2537          emit_pull_constant_load(inst, temp, inst->src[i],
2538                                  pull_constant_loc[uniform]);
2539
2540          inst->src[i].file = temp.file;
2541          inst->src[i].reg = temp.reg;
2542          inst->src[i].reg_offset = temp.reg_offset;
2543          inst->src[i].reladdr = NULL;
2544       }
2545    }
2546
2547    /* Now there are no accesses of the UNIFORM file with a reladdr, so
2548     * no need to track them as larger-than-vec4 objects.  This will be
2549     * relied on in cutting out unused uniform vectors from push
2550     * constants.
2551     */
2552    split_uniform_registers();
2553 }
2554
2555 void
2556 vec4_visitor::resolve_ud_negate(src_reg *reg)
2557 {
2558    if (reg->type != BRW_REGISTER_TYPE_UD ||
2559        !reg->negate)
2560       return;
2561
2562    src_reg temp = src_reg(this, glsl_type::uvec4_type);
2563    emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2564    *reg = temp;
2565 }
2566
2567 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2568                            struct gl_shader_program *prog,
2569                            struct brw_shader *shader)
2570 {
2571    this->c = c;
2572    this->p = &c->func;
2573    this->brw = p->brw;
2574    this->intel = &brw->intel;
2575    this->ctx = &intel->ctx;
2576    this->prog = prog;
2577    this->shader = shader;
2578
2579    this->mem_ctx = ralloc_context(NULL);
2580    this->failed = false;
2581
2582    this->base_ir = NULL;
2583    this->current_annotation = NULL;
2584
2585    this->c = c;
2586    this->vp = (struct gl_vertex_program *)
2587      prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
2588    this->prog_data = &c->prog_data;
2589
2590    this->variable_ht = hash_table_ctor(0,
2591                                        hash_table_pointer_hash,
2592                                        hash_table_pointer_compare);
2593
2594    this->virtual_grf_def = NULL;
2595    this->virtual_grf_use = NULL;
2596    this->virtual_grf_sizes = NULL;
2597    this->virtual_grf_count = 0;
2598    this->virtual_grf_reg_map = NULL;
2599    this->virtual_grf_reg_count = 0;
2600    this->virtual_grf_array_size = 0;
2601    this->live_intervals_valid = false;
2602
2603    this->uniforms = 0;
2604 }
2605
2606 vec4_visitor::~vec4_visitor()
2607 {
2608    ralloc_free(this->mem_ctx);
2609    hash_table_dtor(this->variable_ht);
2610 }
2611
2612
2613 void
2614 vec4_visitor::fail(const char *format, ...)
2615 {
2616    va_list va;
2617    char *msg;
2618
2619    if (failed)
2620       return;
2621
2622    failed = true;
2623
2624    va_start(va, format);
2625    msg = ralloc_vasprintf(mem_ctx, format, va);
2626    va_end(va);
2627    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2628
2629    this->fail_msg = msg;
2630
2631    if (INTEL_DEBUG & DEBUG_VS) {
2632       fprintf(stderr, "%s",  msg);
2633    }
2634 }
2635
2636 } /* namespace brw */