src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 #include "glsl/ir_uniform.h"
  26 extern "C" {
  27 #include "main/context.h"
  28 #include "main/macros.h"
  29 #include "program/prog_parameter.h"
  30 #include "program/sampler.h"
  31 }
  32
  33 namespace brw {
  34
  35 vec4_instruction::vec4_instruction(vec4_visitor *v,
  36                                    enum opcode opcode, dst_reg dst,
  37                                    src_reg src0, src_reg src1, src_reg src2)
  38 {
  39    this->opcode = opcode;
  40    this->dst = dst;
  41    this->src[0] = src0;
  42    this->src[1] = src1;
  43    this->src[2] = src2;
  44    this->ir = v->base_ir;
  45    this->annotation = v->current_annotation;
  46 }
  47
  48 vec4_instruction *
  49 vec4_visitor::emit(vec4_instruction *inst)
  50 {
  51    this->instructions.push_tail(inst);
  52
  53    return inst;
  54 }
  55
  56 vec4_instruction *
  57 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  58 {
  59    new_inst->ir = inst->ir;
  60    new_inst->annotation = inst->annotation;
  61
  62    inst->insert_before(new_inst);
  63
  64    return inst;
  65 }
  66
  67 vec4_instruction *
  68 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  69                    src_reg src0, src_reg src1, src_reg src2)
  70 {
  71    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
  72                                              src0, src1, src2));
  73 }
  74
  75
  76 vec4_instruction *
  77 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  78 {
  79    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
  80 }
  81
  82 vec4_instruction *
  83 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
  84 {
  85    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
  86 }
  87
  88 vec4_instruction *
  89 vec4_visitor::emit(enum opcode opcode)
  90 {
  91    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
  92 }
  93
  94 #define ALU1(op)                                                        \
  95    vec4_instruction *                                                   \
  96    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
  97    {                                                                    \
  98       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
  99                                            src0);                       \
 100    }
 101
 102 #define ALU2(op)                                                        \
 103    vec4_instruction *                                                   \
 104    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 105    {                                                                    \
 106       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 107                                            src0, src1);                 \
 108    }
 109
 110 #define ALU3(op)                                                        \
 111    vec4_instruction *                                                   \
 112    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
 113    {                                                                    \
 114       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 115                                            src0, src1, src2);           \
 116    }
 117
 118 ALU1(NOT)
 119 ALU1(MOV)
 120 ALU1(FRC)
 121 ALU1(RNDD)
 122 ALU1(RNDE)
 123 ALU1(RNDZ)
 124 ALU1(F32TO16)
 125 ALU1(F16TO32)
 126 ALU2(ADD)
 127 ALU2(MUL)
 128 ALU2(MACH)
 129 ALU2(AND)
 130 ALU2(OR)
 131 ALU2(XOR)
 132 ALU2(DP3)
 133 ALU2(DP4)
 134 ALU2(DPH)
 135 ALU2(SHL)
 136 ALU2(SHR)
 137 ALU2(ASR)
 138 ALU3(LRP)
 139 ALU1(BFREV)
 140 ALU3(BFE)
 141 ALU2(BFI1)
 142 ALU3(BFI2)
 143 ALU1(FBH)
 144 ALU1(FBL)
 145 ALU1(CBIT)
 146
 147 /** Gen4 predicated IF. */
 148 vec4_instruction *
 149 vec4_visitor::IF(uint32_t predicate)
 150 {
 151    vec4_instruction *inst;
 152
 153    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 154    inst->predicate = predicate;
 155
 156    return inst;
 157 }
 158
 159 /** Gen6+ IF with embedded comparison. */
 160 vec4_instruction *
 161 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 162 {
 163    assert(intel->gen >= 6);
 164
 165    vec4_instruction *inst;
 166
 167    resolve_ud_negate(&src0);
 168    resolve_ud_negate(&src1);
 169
 170    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 171                                         src0, src1);
 172    inst->conditional_mod = condition;
 173
 174    return inst;
 175 }
 176
 177 /**
 178  * CMP: Sets the low bit of the destination channels with the result
 179  * of the comparison, while the upper bits are undefined, and updates
 180  * the flag register with the packed 16 bits of the result.
 181  */
 182 vec4_instruction *
 183 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 184 {
 185    vec4_instruction *inst;
 186
 187    /* original gen4 does type conversion to the destination type
 188     * before before comparison, producing garbage results for floating
 189     * point comparisons.
 190     */
 191    if (intel->gen == 4) {
 192       dst.type = src0.type;
 193       if (dst.file == HW_REG)
 194          dst.fixed_hw_reg.type = dst.type;
 195    }
 196
 197    resolve_ud_negate(&src0);
 198    resolve_ud_negate(&src1);
 199
 200    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 201    inst->conditional_mod = condition;
 202
 203    return inst;
 204 }
 205
 206 vec4_instruction *
 207 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 208 {
 209    vec4_instruction *inst;
 210
 211    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 212                                         dst, index);
 213    inst->base_mrf = 14;
 214    inst->mlen = 2;
 215
 216    return inst;
 217 }
 218
 219 vec4_instruction *
 220 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 221 {
 222    vec4_instruction *inst;
 223
 224    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 225                                         dst, src, index);
 226    inst->base_mrf = 13;
 227    inst->mlen = 3;
 228
 229    return inst;
 230 }
 231
 232 void
 233 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 234 {
 235    static enum opcode dot_opcodes[] = {
 236       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 237    };
 238
 239    emit(dot_opcodes[elements - 2], dst, src0, src1);
 240 }
 241
 242 src_reg
 243 vec4_visitor::fix_3src_operand(src_reg src)
 244 {
 245    /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
 246     * able to use vertical stride of zero to replicate the vec4 uniform, like
 247     *
 248     *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
 249     *
 250     * But you can't, since vertical stride is always four in three-source
 251     * instructions. Instead, insert a MOV instruction to do the replication so
 252     * that the three-source instruction can consume it.
 253     */
 254
 255    /* The MOV is only needed if the source is a uniform or immediate. */
 256    if (src.file != UNIFORM && src.file != IMM)
 257       return src;
 258
 259    dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
 260    expanded.type = src.type;
 261    emit(MOV(expanded, src));
 262    return src_reg(expanded);
 263 }
 264
 265 src_reg
 266 vec4_visitor::fix_math_operand(src_reg src)
 267 {
 268    /* The gen6 math instruction ignores the source modifiers --
 269     * swizzle, abs, negate, and at least some parts of the register
 270     * region description.
 271     *
 272     * Rather than trying to enumerate all these cases, *always* expand the
 273     * operand to a temp GRF for gen6.
 274     *
 275     * For gen7, keep the operand as-is, except if immediate, which gen7 still
 276     * can't use.
 277     */
 278
 279    if (intel->gen == 7 && src.file != IMM)
 280       return src;
 281
 282    dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
 283    expanded.type = src.type;
 284    emit(MOV(expanded, src));
 285    return src_reg(expanded);
 286 }
 287
 288 void
 289 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 290 {
 291    src = fix_math_operand(src);
 292
 293    if (dst.writemask != WRITEMASK_XYZW) {
 294       /* The gen6 math instruction must be align1, so we can't do
 295        * writemasks.
 296        */
 297       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 298
 299       emit(opcode, temp_dst, src);
 300
 301       emit(MOV(dst, src_reg(temp_dst)));
 302    } else {
 303       emit(opcode, dst, src);
 304    }
 305 }
 306
 307 void
 308 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 309 {
 310    vec4_instruction *inst = emit(opcode, dst, src);
 311    inst->base_mrf = 1;
 312    inst->mlen = 1;
 313 }
 314
 315 void
 316 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 317 {
 318    switch (opcode) {
 319    case SHADER_OPCODE_RCP:
 320    case SHADER_OPCODE_RSQ:
 321    case SHADER_OPCODE_SQRT:
 322    case SHADER_OPCODE_EXP2:
 323    case SHADER_OPCODE_LOG2:
 324    case SHADER_OPCODE_SIN:
 325    case SHADER_OPCODE_COS:
 326       break;
 327    default:
 328       assert(!"not reached: bad math opcode");
 329       return;
 330    }
 331
 332    if (intel->gen >= 6) {
 333       return emit_math1_gen6(opcode, dst, src);
 334    } else {
 335       return emit_math1_gen4(opcode, dst, src);
 336    }
 337 }
 338
 339 void
 340 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 341                               dst_reg dst, src_reg src0, src_reg src1)
 342 {
 343    src0 = fix_math_operand(src0);
 344    src1 = fix_math_operand(src1);
 345
 346    if (dst.writemask != WRITEMASK_XYZW) {
 347       /* The gen6 math instruction must be align1, so we can't do
 348        * writemasks.
 349        */
 350       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 351       temp_dst.type = dst.type;
 352
 353       emit(opcode, temp_dst, src0, src1);
 354
 355       emit(MOV(dst, src_reg(temp_dst)));
 356    } else {
 357       emit(opcode, dst, src0, src1);
 358    }
 359 }
 360
 361 void
 362 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 363                               dst_reg dst, src_reg src0, src_reg src1)
 364 {
 365    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 366    inst->base_mrf = 1;
 367    inst->mlen = 2;
 368 }
 369
 370 void
 371 vec4_visitor::emit_math(enum opcode opcode,
 372                         dst_reg dst, src_reg src0, src_reg src1)
 373 {
 374    switch (opcode) {
 375    case SHADER_OPCODE_POW:
 376    case SHADER_OPCODE_INT_QUOTIENT:
 377    case SHADER_OPCODE_INT_REMAINDER:
 378       break;
 379    default:
 380       assert(!"not reached: unsupported binary math opcode");
 381       return;
 382    }
 383
 384    if (intel->gen >= 6) {
 385       return emit_math2_gen6(opcode, dst, src0, src1);
 386    } else {
 387       return emit_math2_gen4(opcode, dst, src0, src1);
 388    }
 389 }
 390
 391 void
 392 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
 393 {
 394    if (intel->gen < 7)
 395       assert(!"ir_unop_pack_half_2x16 should be lowered");
 396
 397    assert(dst.type == BRW_REGISTER_TYPE_UD);
 398    assert(src0.type == BRW_REGISTER_TYPE_F);
 399
 400    /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
 401     *
 402     *   Because this instruction does not have a 16-bit floating-point type,
 403     *   the destination data type must be Word (W).
 404     *
 405     *   The destination must be DWord-aligned and specify a horizontal stride
 406     *   (HorzStride) of 2. The 16-bit result is stored in the lower word of
 407     *   each destination channel and the upper word is not modified.
 408     *
 409     * The above restriction implies that the f32to16 instruction must use
 410     * align1 mode, because only in align1 mode is it possible to specify
 411     * horizontal stride.  We choose here to defy the hardware docs and emit
 412     * align16 instructions.
 413     *
 414     * (I [chadv] did attempt to emit align1 instructions for VS f32to16
 415     * instructions. I was partially successful in that the code passed all
 416     * tests.  However, the code was dubiously correct and fragile, and the
 417     * tests were not harsh enough to probe that frailty. Not trusting the
 418     * code, I chose instead to remain in align16 mode in defiance of the hw
 419     * docs).
 420     *
 421     * I've [chadv] experimentally confirmed that, on gen7 hardware and the
 422     * simulator, emitting a f32to16 in align16 mode with UD as destination
 423     * data type is safe. The behavior differs from that specified in the PRM
 424     * in that the upper word of each destination channel is cleared to 0.
 425     */
 426
 427    dst_reg tmp_dst(this, glsl_type::uvec2_type);
 428    src_reg tmp_src(tmp_dst);
 429
 430 #if 0
 431    /* Verify the undocumented behavior on which the following instructions
 432     * rely.  If f32to16 fails to clear the upper word of the X and Y channels,
 433     * then the result of the bit-or instruction below will be incorrect.
 434     *
 435     * You should inspect the disasm output in order to verify that the MOV is
 436     * not optimized away.
 437     */
 438    emit(MOV(tmp_dst, src_reg(0x12345678u)));
 439 #endif
 440
 441    /* Give tmp the form below, where "." means untouched.
 442     *
 443     *     w z          y          x w z          y          x
 444     *   |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
 445     *
 446     * That the upper word of each write-channel be 0 is required for the
 447     * following bit-shift and bit-or instructions to work. Note that this
 448     * relies on the undocumented hardware behavior mentioned above.
 449     */
 450    tmp_dst.writemask = WRITEMASK_XY;
 451    emit(F32TO16(tmp_dst, src0));
 452
 453    /* Give the write-channels of dst the form:
 454     *   0xhhhh0000
 455     */
 456    tmp_src.swizzle = SWIZZLE_Y;
 457    emit(SHL(dst, tmp_src, src_reg(16u)));
 458
 459    /* Finally, give the write-channels of dst the form of packHalf2x16's
 460     * output:
 461     *   0xhhhhllll
 462     */
 463    tmp_src.swizzle = SWIZZLE_X;
 464    emit(OR(dst, src_reg(dst), tmp_src));
 465 }
 466
 467 void
 468 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
 469 {
 470    if (intel->gen < 7)
 471       assert(!"ir_unop_unpack_half_2x16 should be lowered");
 472
 473    assert(dst.type == BRW_REGISTER_TYPE_F);
 474    assert(src0.type == BRW_REGISTER_TYPE_UD);
 475
 476    /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
 477     *
 478     *   Because this instruction does not have a 16-bit floating-point type,
 479     *   the source data type must be Word (W). The destination type must be
 480     *   F (Float).
 481     *
 482     * To use W as the source data type, we must adjust horizontal strides,
 483     * which is only possible in align1 mode. All my [chadv] attempts at
 484     * emitting align1 instructions for unpackHalf2x16 failed to pass the
 485     * Piglit tests, so I gave up.
 486     *
 487     * I've verified that, on gen7 hardware and the simulator, it is safe to
 488     * emit f16to32 in align16 mode with UD as source data type.
 489     */
 490
 491    dst_reg tmp_dst(this, glsl_type::uvec2_type);
 492    src_reg tmp_src(tmp_dst);
 493
 494    tmp_dst.writemask = WRITEMASK_X;
 495    emit(AND(tmp_dst, src0, src_reg(0xffffu)));
 496
 497    tmp_dst.writemask = WRITEMASK_Y;
 498    emit(SHR(tmp_dst, src0, src_reg(16u)));
 499
 500    dst.writemask = WRITEMASK_XY;
 501    emit(F16TO32(dst, tmp_src));
 502 }
 503
 504 void
 505 vec4_visitor::visit_instructions(const exec_list *list)
 506 {
 507    foreach_list(node, list) {
 508       ir_instruction *ir = (ir_instruction *)node;
 509
 510       base_ir = ir;
 511       ir->accept(this);
 512    }
 513 }
 514
 515
 516 static int
 517 type_size(const struct glsl_type *type)
 518 {
 519    unsigned int i;
 520    int size;
 521
 522    switch (type->base_type) {
 523    case GLSL_TYPE_UINT:
 524    case GLSL_TYPE_INT:
 525    case GLSL_TYPE_FLOAT:
 526    case GLSL_TYPE_BOOL:
 527       if (type->is_matrix()) {
 528          return type->matrix_columns;
 529       } else {
 530          /* Regardless of size of vector, it gets a vec4. This is bad
 531           * packing for things like floats, but otherwise arrays become a
 532           * mess.  Hopefully a later pass over the code can pack scalars
 533           * down if appropriate.
 534           */
 535          return 1;
 536       }
 537    case GLSL_TYPE_ARRAY:
 538       assert(type->length > 0);
 539       return type_size(type->fields.array) * type->length;
 540    case GLSL_TYPE_STRUCT:
 541       size = 0;
 542       for (i = 0; i < type->length; i++) {
 543          size += type_size(type->fields.structure[i].type);
 544       }
 545       return size;
 546    case GLSL_TYPE_SAMPLER:
 547       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 548        * at link time.
 549        */
 550       return 1;
 551    case GLSL_TYPE_VOID:
 552    case GLSL_TYPE_ERROR:
 553    case GLSL_TYPE_INTERFACE:
 554       assert(0);
 555       break;
 556    }
 557
 558    return 0;
 559 }
 560
 561 int
 562 vec4_visitor::virtual_grf_alloc(int size)
 563 {
 564    if (virtual_grf_array_size <= virtual_grf_count) {
 565       if (virtual_grf_array_size == 0)
 566          virtual_grf_array_size = 16;
 567       else
 568          virtual_grf_array_size *= 2;
 569       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 570                                    virtual_grf_array_size);
 571       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 572                                      virtual_grf_array_size);
 573    }
 574    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 575    virtual_grf_reg_count += size;
 576    virtual_grf_sizes[virtual_grf_count] = size;
 577    return virtual_grf_count++;
 578 }
 579
 580 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 581 {
 582    init();
 583
 584    this->file = GRF;
 585    this->reg = v->virtual_grf_alloc(type_size(type));
 586
 587    if (type->is_array() || type->is_record()) {
 588       this->swizzle = BRW_SWIZZLE_NOOP;
 589    } else {
 590       this->swizzle = swizzle_for_size(type->vector_elements);
 591    }
 592
 593    this->type = brw_type_for_base_type(type);
 594 }
 595
 596 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 597 {
 598    init();
 599
 600    this->file = GRF;
 601    this->reg = v->virtual_grf_alloc(type_size(type));
 602
 603    if (type->is_array() || type->is_record()) {
 604       this->writemask = WRITEMASK_XYZW;
 605    } else {
 606       this->writemask = (1 << type->vector_elements) - 1;
 607    }
 608
 609    this->type = brw_type_for_base_type(type);
 610 }
 611
 612 /* Our support for uniforms is piggy-backed on the struct
 613  * gl_fragment_program, because that's where the values actually
 614  * get stored, rather than in some global gl_shader_program uniform
 615  * store.
 616  */
 617 void
 618 vec4_visitor::setup_uniform_values(ir_variable *ir)
 619 {
 620    int namelen = strlen(ir->name);
 621
 622    /* The data for our (non-builtin) uniforms is stored in a series of
 623     * gl_uniform_driver_storage structs for each subcomponent that
 624     * glGetUniformLocation() could name.  We know it's been set up in the same
 625     * order we'd walk the type, so walk the list of storage and find anything
 626     * with our name, or the prefix of a component that starts with our name.
 627     */
 628    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
 629       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 630
 631       if (strncmp(ir->name, storage->name, namelen) != 0 ||
 632           (storage->name[namelen] != 0 &&
 633            storage->name[namelen] != '.' &&
 634            storage->name[namelen] != '[')) {
 635          continue;
 636       }
 637
 638       gl_constant_value *components = storage->storage;
 639       unsigned vector_count = (MAX2(storage->array_elements, 1) *
 640                                storage->type->matrix_columns);
 641
 642       for (unsigned s = 0; s < vector_count; s++) {
 643          uniform_vector_size[uniforms] = storage->type->vector_elements;
 644
 645          int i;
 646          for (i = 0; i < uniform_vector_size[uniforms]; i++) {
 647             prog_data->param[uniforms * 4 + i] = &components->f;
 648             components++;
 649          }
 650          for (; i < 4; i++) {
 651             static float zero = 0;
 652             prog_data->param[uniforms * 4 + i] = &zero;
 653          }
 654
 655          uniforms++;
 656       }
 657    }
 658 }
 659
 660 void
 661 vec4_visitor::setup_uniform_clipplane_values()
 662 {
 663    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 664
 665    if (intel->gen < 6) {
 666       /* Pre-Gen6, we compact clip planes.  For example, if the user
 667        * enables just clip planes 0, 1, and 3, we will enable clip planes
 668        * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
 669        * plane 2.  This simplifies the implementation of the Gen6 clip
 670        * thread.
 671        */
 672       int compacted_clipplane_index = 0;
 673       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 674          if (!(key->userclip_planes_enabled_gen_4_5 & (1 << i)))
 675             continue;
 676
 677          this->uniform_vector_size[this->uniforms] = 4;
 678          this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
 679          this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
 680          for (int j = 0; j < 4; ++j) {
 681             prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
 682          }
 683          ++compacted_clipplane_index;
 684          ++this->uniforms;
 685       }
 686    } else {
 687       /* In Gen6 and later, we don't compact clip planes, because this
 688        * simplifies the implementation of gl_ClipDistance.
 689        */
 690       for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
 691          this->uniform_vector_size[this->uniforms] = 4;
 692          this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
 693          this->userplane[i].type = BRW_REGISTER_TYPE_F;
 694          for (int j = 0; j < 4; ++j) {
 695             prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
 696          }
 697          ++this->uniforms;
 698       }
 699    }
 700 }
 701
 702 /* Our support for builtin uniforms is even scarier than non-builtin.
 703  * It sits on top of the PROG_STATE_VAR parameters that are
 704  * automatically updated from GL context state.
 705  */
 706 void
 707 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 708 {
 709    const ir_state_slot *const slots = ir->state_slots;
 710    assert(ir->state_slots != NULL);
 711
 712    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 713       /* This state reference has already been setup by ir_to_mesa,
 714        * but we'll get the same index back here.  We can reference
 715        * ParameterValues directly, since unlike brw_fs.cpp, we never
 716        * add new state references during compile.
 717        */
 718       int index = _mesa_add_state_reference(this->prog->Parameters,
 719                                             (gl_state_index *)slots[i].tokens);
 720       float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 721
 722       this->uniform_vector_size[this->uniforms] = 0;
 723       /* Add each of the unique swizzled channels of the element.
 724        * This will end up matching the size of the glsl_type of this field.
 725        */
 726       int last_swiz = -1;
 727       for (unsigned int j = 0; j < 4; j++) {
 728          int swiz = GET_SWZ(slots[i].swizzle, j);
 729          last_swiz = swiz;
 730
 731          prog_data->param[this->uniforms * 4 + j] = &values[swiz];
 732          if (swiz <= last_swiz)
 733             this->uniform_vector_size[this->uniforms]++;
 734       }
 735       this->uniforms++;
 736    }
 737 }
 738
 739 dst_reg *
 740 vec4_visitor::variable_storage(ir_variable *var)
 741 {
 742    return (dst_reg *)hash_table_find(this->variable_ht, var);
 743 }
 744
 745 void
 746 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 747 {
 748    ir_expression *expr = ir->as_expression();
 749
 750    *predicate = BRW_PREDICATE_NORMAL;
 751
 752    if (expr) {
 753       src_reg op[2];
 754       vec4_instruction *inst;
 755
 756       assert(expr->get_num_operands() <= 2);
 757       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 758          expr->operands[i]->accept(this);
 759          op[i] = this->result;
 760
 761          resolve_ud_negate(&op[i]);
 762       }
 763
 764       switch (expr->operation) {
 765       case ir_unop_logic_not:
 766          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 767          inst->conditional_mod = BRW_CONDITIONAL_Z;
 768          break;
 769
 770       case ir_binop_logic_xor:
 771          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 772          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 773          break;
 774
 775       case ir_binop_logic_or:
 776          inst = emit(OR(dst_null_d(), op[0], op[1]));
 777          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 778          break;
 779
 780       case ir_binop_logic_and:
 781          inst = emit(AND(dst_null_d(), op[0], op[1]));
 782          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 783          break;
 784
 785       case ir_unop_f2b:
 786          if (intel->gen >= 6) {
 787             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 788          } else {
 789             inst = emit(MOV(dst_null_f(), op[0]));
 790             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 791          }
 792          break;
 793
 794       case ir_unop_i2b:
 795          if (intel->gen >= 6) {
 796             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 797          } else {
 798             inst = emit(MOV(dst_null_d(), op[0]));
 799             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 800          }
 801          break;
 802
 803       case ir_binop_all_equal:
 804          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 805          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 806          break;
 807
 808       case ir_binop_any_nequal:
 809          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 810          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 811          break;
 812
 813       case ir_unop_any:
 814          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 815          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 816          break;
 817
 818       case ir_binop_greater:
 819       case ir_binop_gequal:
 820       case ir_binop_less:
 821       case ir_binop_lequal:
 822       case ir_binop_equal:
 823       case ir_binop_nequal:
 824          emit(CMP(dst_null_d(), op[0], op[1],
 825                   brw_conditional_for_comparison(expr->operation)));
 826          break;
 827
 828       default:
 829          assert(!"not reached");
 830          break;
 831       }
 832       return;
 833    }
 834
 835    ir->accept(this);
 836
 837    resolve_ud_negate(&this->result);
 838
 839    if (intel->gen >= 6) {
 840       vec4_instruction *inst = emit(AND(dst_null_d(),
 841                                         this->result, src_reg(1)));
 842       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 843    } else {
 844       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 845       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 846    }
 847 }
 848
 849 /**
 850  * Emit a gen6 IF statement with the comparison folded into the IF
 851  * instruction.
 852  */
 853 void
 854 vec4_visitor::emit_if_gen6(ir_if *ir)
 855 {
 856    ir_expression *expr = ir->condition->as_expression();
 857
 858    if (expr) {
 859       src_reg op[2];
 860       dst_reg temp;
 861
 862       assert(expr->get_num_operands() <= 2);
 863       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 864          expr->operands[i]->accept(this);
 865          op[i] = this->result;
 866       }
 867
 868       switch (expr->operation) {
 869       case ir_unop_logic_not:
 870          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 871          return;
 872
 873       case ir_binop_logic_xor:
 874          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 875          return;
 876
 877       case ir_binop_logic_or:
 878          temp = dst_reg(this, glsl_type::bool_type);
 879          emit(OR(temp, op[0], op[1]));
 880          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 881          return;
 882
 883       case ir_binop_logic_and:
 884          temp = dst_reg(this, glsl_type::bool_type);
 885          emit(AND(temp, op[0], op[1]));
 886          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 887          return;
 888
 889       case ir_unop_f2b:
 890          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 891          return;
 892
 893       case ir_unop_i2b:
 894          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 895          return;
 896
 897       case ir_binop_greater:
 898       case ir_binop_gequal:
 899       case ir_binop_less:
 900       case ir_binop_lequal:
 901       case ir_binop_equal:
 902       case ir_binop_nequal:
 903          emit(IF(op[0], op[1],
 904                  brw_conditional_for_comparison(expr->operation)));
 905          return;
 906
 907       case ir_binop_all_equal:
 908          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 909          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 910          return;
 911
 912       case ir_binop_any_nequal:
 913          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 914          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 915          return;
 916
 917       case ir_unop_any:
 918          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 919          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 920          return;
 921
 922       default:
 923          assert(!"not reached");
 924          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 925          return;
 926       }
 927       return;
 928    }
 929
 930    ir->condition->accept(this);
 931
 932    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 933 }
 934
 935 static dst_reg
 936 with_writemask(dst_reg const & r, int mask)
 937 {
 938    dst_reg result = r;
 939    result.writemask = mask;
 940    return result;
 941 }
 942
 943 void
 944 vec4_vs_visitor::emit_prolog()
 945 {
 946    dst_reg sign_recovery_shift;
 947    dst_reg normalize_factor;
 948    dst_reg es3_normalize_factor;
 949
 950    for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
 951       if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
 952          uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
 953          dst_reg reg(ATTR, i);
 954          dst_reg reg_d = reg;
 955          reg_d.type = BRW_REGISTER_TYPE_D;
 956          dst_reg reg_ud = reg;
 957          reg_ud.type = BRW_REGISTER_TYPE_UD;
 958
 959          /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 960           * come in as floating point conversions of the integer values.
 961           */
 962          if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
 963             dst_reg dst = reg;
 964             dst.type = brw_type_for_base_type(glsl_type::vec4_type);
 965             dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
 966             emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 967          }
 968
 969          /* Do sign recovery for 2101010 formats if required. */
 970          if (wa_flags & BRW_ATTRIB_WA_SIGN) {
 971             if (sign_recovery_shift.file == BAD_FILE) {
 972                /* shift constant: <22,22,22,30> */
 973                sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
 974                emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
 975                emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
 976             }
 977
 978             emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
 979             emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
 980          }
 981
 982          /* Apply BGRA swizzle if required. */
 983          if (wa_flags & BRW_ATTRIB_WA_BGRA) {
 984             src_reg temp = src_reg(reg);
 985             temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
 986             emit(MOV(reg, temp));
 987          }
 988
 989          if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
 990             /* ES 3.0 has different rules for converting signed normalized
 991              * fixed-point numbers than desktop GL.
 992              */
 993             if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
 994                /* According to equation 2.2 of the ES 3.0 specification,
 995                 * signed normalization conversion is done by:
 996                 *
 997                 * f = c / (2^(b-1)-1)
 998                 */
 999                if (es3_normalize_factor.file == BAD_FILE) {
1000                   /* mul constant: 1 / (2^(b-1) - 1) */
1001                   es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
1002                   emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
1003                            src_reg(1.0f / ((1<<9) - 1))));
1004                   emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
1005                            src_reg(1.0f / ((1<<1) - 1))));
1006                }
1007
1008                dst_reg dst = reg;
1009                dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1010                emit(MOV(dst, src_reg(reg_d)));
1011                emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
1012                emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
1013             } else {
1014                /* The following equations are from the OpenGL 3.2 specification:
1015                 *
1016                 * 2.1 unsigned normalization
1017                 * f = c/(2^n-1)
1018                 *
1019                 * 2.2 signed normalization
1020                 * f = (2c+1)/(2^n-1)
1021                 *
1022                 * Both of these share a common divisor, which is represented by
1023                 * "normalize_factor" in the code below.
1024                 */
1025                if (normalize_factor.file == BAD_FILE) {
1026                   /* 1 / (2^b - 1) for b=<10,10,10,2> */
1027                   normalize_factor = dst_reg(this, glsl_type::vec4_type);
1028                   emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
1029                            src_reg(1.0f / ((1<<10) - 1))));
1030                   emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
1031                            src_reg(1.0f / ((1<<2) - 1))));
1032                }
1033
1034                dst_reg dst = reg;
1035                dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1036                emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1037
1038                /* For signed normalization, we want the numerator to be 2c+1. */
1039                if (wa_flags & BRW_ATTRIB_WA_SIGN) {
1040                   emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
1041                   emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
1042                }
1043
1044                emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
1045             }
1046          }
1047
1048          if (wa_flags & BRW_ATTRIB_WA_SCALE) {
1049             dst_reg dst = reg;
1050             dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1051             emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1052          }
1053       }
1054    }
1055 }
1056
1057
1058 dst_reg *
1059 vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
1060 {
1061    /* VertexID is stored by the VF as the last vertex element, but
1062     * we don't represent it with a flag in inputs_read, so we call
1063     * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
1064     */
1065    dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
1066    vs_prog_data->uses_vertexid = true;
1067
1068    switch (ir->location) {
1069    case SYSTEM_VALUE_VERTEX_ID:
1070       reg->writemask = WRITEMASK_X;
1071       break;
1072    case SYSTEM_VALUE_INSTANCE_ID:
1073       reg->writemask = WRITEMASK_Y;
1074       break;
1075    default:
1076       assert(!"not reached");
1077       break;
1078    }
1079
1080    return reg;
1081 }
1082
1083
1084 void
1085 vec4_visitor::visit(ir_variable *ir)
1086 {
1087    dst_reg *reg = NULL;
1088
1089    if (variable_storage(ir))
1090       return;
1091
1092    switch (ir->mode) {
1093    case ir_var_shader_in:
1094       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
1095       break;
1096
1097    case ir_var_shader_out:
1098       reg = new(mem_ctx) dst_reg(this, ir->type);
1099
1100       for (int i = 0; i < type_size(ir->type); i++) {
1101          output_reg[ir->location + i] = *reg;
1102          output_reg[ir->location + i].reg_offset = i;
1103          output_reg[ir->location + i].type =
1104             brw_type_for_base_type(ir->type->get_scalar_type());
1105          output_reg_annotation[ir->location + i] = ir->name;
1106       }
1107       break;
1108
1109    case ir_var_auto:
1110    case ir_var_temporary:
1111       reg = new(mem_ctx) dst_reg(this, ir->type);
1112       break;
1113
1114    case ir_var_uniform:
1115       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1116
1117       /* Thanks to the lower_ubo_reference pass, we will see only
1118        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1119        * variables, so no need for them to be in variable_ht.
1120        */
1121       if (ir->is_in_uniform_block())
1122          return;
1123
1124       /* Track how big the whole uniform variable is, in case we need to put a
1125        * copy of its data into pull constants for array access.
1126        */
1127       this->uniform_size[this->uniforms] = type_size(ir->type);
1128
1129       if (!strncmp(ir->name, "gl_", 3)) {
1130          setup_builtin_uniform_values(ir);
1131       } else {
1132          setup_uniform_values(ir);
1133       }
1134       break;
1135
1136    case ir_var_system_value:
1137       reg = make_reg_for_system_value(ir);
1138       break;
1139
1140    default:
1141       assert(!"not reached");
1142    }
1143
1144    reg->type = brw_type_for_base_type(ir->type);
1145    hash_table_insert(this->variable_ht, reg, ir);
1146 }
1147
1148 void
1149 vec4_visitor::visit(ir_loop *ir)
1150 {
1151    dst_reg counter;
1152
1153    /* We don't want debugging output to print the whole body of the
1154     * loop as the annotation.
1155     */
1156    this->base_ir = NULL;
1157
1158    if (ir->counter != NULL) {
1159       this->base_ir = ir->counter;
1160       ir->counter->accept(this);
1161       counter = *(variable_storage(ir->counter));
1162
1163       if (ir->from != NULL) {
1164          this->base_ir = ir->from;
1165          ir->from->accept(this);
1166
1167          emit(MOV(counter, this->result));
1168       }
1169    }
1170
1171    emit(BRW_OPCODE_DO);
1172
1173    if (ir->to) {
1174       this->base_ir = ir->to;
1175       ir->to->accept(this);
1176
1177       emit(CMP(dst_null_d(), src_reg(counter), this->result,
1178                brw_conditional_for_comparison(ir->cmp)));
1179
1180       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1181       inst->predicate = BRW_PREDICATE_NORMAL;
1182    }
1183
1184    visit_instructions(&ir->body_instructions);
1185
1186
1187    if (ir->increment) {
1188       this->base_ir = ir->increment;
1189       ir->increment->accept(this);
1190       emit(ADD(counter, src_reg(counter), this->result));
1191    }
1192
1193    emit(BRW_OPCODE_WHILE);
1194 }
1195
1196 void
1197 vec4_visitor::visit(ir_loop_jump *ir)
1198 {
1199    switch (ir->mode) {
1200    case ir_loop_jump::jump_break:
1201       emit(BRW_OPCODE_BREAK);
1202       break;
1203    case ir_loop_jump::jump_continue:
1204       emit(BRW_OPCODE_CONTINUE);
1205       break;
1206    }
1207 }
1208
1209
1210 void
1211 vec4_visitor::visit(ir_function_signature *ir)
1212 {
1213    assert(0);
1214    (void)ir;
1215 }
1216
1217 void
1218 vec4_visitor::visit(ir_function *ir)
1219 {
1220    /* Ignore function bodies other than main() -- we shouldn't see calls to
1221     * them since they should all be inlined.
1222     */
1223    if (strcmp(ir->name, "main") == 0) {
1224       const ir_function_signature *sig;
1225       exec_list empty;
1226
1227       sig = ir->matching_signature(&empty);
1228
1229       assert(sig);
1230
1231       visit_instructions(&sig->body);
1232    }
1233 }
1234
1235 bool
1236 vec4_visitor::try_emit_sat(ir_expression *ir)
1237 {
1238    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1239    if (!sat_src)
1240       return false;
1241
1242    sat_src->accept(this);
1243    src_reg src = this->result;
1244
1245    this->result = src_reg(this, ir->type);
1246    vec4_instruction *inst;
1247    inst = emit(MOV(dst_reg(this->result), src));
1248    inst->saturate = true;
1249
1250    return true;
1251 }
1252
1253 void
1254 vec4_visitor::emit_bool_comparison(unsigned int op,
1255                                  dst_reg dst, src_reg src0, src_reg src1)
1256 {
1257    /* original gen4 does destination conversion before comparison. */
1258    if (intel->gen < 5)
1259       dst.type = src0.type;
1260
1261    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1262
1263    dst.type = BRW_REGISTER_TYPE_D;
1264    emit(AND(dst, src_reg(dst), src_reg(0x1)));
1265 }
1266
1267 void
1268 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1269                           src_reg src0, src_reg src1)
1270 {
1271    vec4_instruction *inst;
1272
1273    if (intel->gen >= 6) {
1274       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1275       inst->conditional_mod = conditionalmod;
1276    } else {
1277       emit(CMP(dst, src0, src1, conditionalmod));
1278
1279       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1280       inst->predicate = BRW_PREDICATE_NORMAL;
1281    }
1282 }
1283
1284 void
1285 vec4_visitor::visit(ir_expression *ir)
1286 {
1287    unsigned int operand;
1288    src_reg op[Elements(ir->operands)];
1289    src_reg result_src;
1290    dst_reg result_dst;
1291    vec4_instruction *inst;
1292
1293    if (try_emit_sat(ir))
1294       return;
1295
1296    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1297       this->result.file = BAD_FILE;
1298       ir->operands[operand]->accept(this);
1299       if (this->result.file == BAD_FILE) {
1300          printf("Failed to get tree for expression operand:\n");
1301          ir->operands[operand]->print();
1302          exit(1);
1303       }
1304       op[operand] = this->result;
1305
1306       /* Matrix expression operands should have been broken down to vector
1307        * operations already.
1308        */
1309       assert(!ir->operands[operand]->type->is_matrix());
1310    }
1311
1312    int vector_elements = ir->operands[0]->type->vector_elements;
1313    if (ir->operands[1]) {
1314       vector_elements = MAX2(vector_elements,
1315                              ir->operands[1]->type->vector_elements);
1316    }
1317
1318    this->result.file = BAD_FILE;
1319
1320    /* Storage for our result.  Ideally for an assignment we'd be using
1321     * the actual storage for the result here, instead.
1322     */
1323    result_src = src_reg(this, ir->type);
1324    /* convenience for the emit functions below. */
1325    result_dst = dst_reg(result_src);
1326    /* If nothing special happens, this is the result. */
1327    this->result = result_src;
1328    /* Limit writes to the channels that will be used by result_src later.
1329     * This does limit this temp's use as a temporary for multi-instruction
1330     * sequences.
1331     */
1332    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1333
1334    switch (ir->operation) {
1335    case ir_unop_logic_not:
1336       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1337        * ones complement of the whole register, not just bit 0.
1338        */
1339       emit(XOR(result_dst, op[0], src_reg(1)));
1340       break;
1341    case ir_unop_neg:
1342       op[0].negate = !op[0].negate;
1343       this->result = op[0];
1344       break;
1345    case ir_unop_abs:
1346       op[0].abs = true;
1347       op[0].negate = false;
1348       this->result = op[0];
1349       break;
1350
1351    case ir_unop_sign:
1352       emit(MOV(result_dst, src_reg(0.0f)));
1353
1354       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1355       inst = emit(MOV(result_dst, src_reg(1.0f)));
1356       inst->predicate = BRW_PREDICATE_NORMAL;
1357
1358       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1359       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1360       inst->predicate = BRW_PREDICATE_NORMAL;
1361
1362       break;
1363
1364    case ir_unop_rcp:
1365       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1366       break;
1367
1368    case ir_unop_exp2:
1369       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1370       break;
1371    case ir_unop_log2:
1372       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1373       break;
1374    case ir_unop_exp:
1375    case ir_unop_log:
1376       assert(!"not reached: should be handled by ir_explog_to_explog2");
1377       break;
1378    case ir_unop_sin:
1379    case ir_unop_sin_reduced:
1380       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1381       break;
1382    case ir_unop_cos:
1383    case ir_unop_cos_reduced:
1384       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1385       break;
1386
1387    case ir_unop_dFdx:
1388    case ir_unop_dFdy:
1389       assert(!"derivatives not valid in vertex shader");
1390       break;
1391
1392    case ir_unop_bitfield_reverse:
1393       emit(BFREV(result_dst, op[0]));
1394       break;
1395    case ir_unop_bit_count:
1396       emit(CBIT(result_dst, op[0]));
1397       break;
1398    case ir_unop_find_msb: {
1399       src_reg temp = src_reg(this, glsl_type::uint_type);
1400
1401       inst = emit(FBH(dst_reg(temp), op[0]));
1402       inst->dst.writemask = WRITEMASK_XYZW;
1403
1404       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1405        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1406        * subtract the result from 31 to convert the MSB count into an LSB count.
1407        */
1408
1409       /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1410       temp.swizzle = BRW_SWIZZLE_NOOP;
1411       emit(MOV(result_dst, temp));
1412
1413       src_reg src_tmp = src_reg(result_dst);
1414       emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1415
1416       src_tmp.negate = true;
1417       inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1418       inst->predicate = BRW_PREDICATE_NORMAL;
1419       break;
1420    }
1421    case ir_unop_find_lsb:
1422       emit(FBL(result_dst, op[0]));
1423       break;
1424
1425    case ir_unop_noise:
1426       assert(!"not reached: should be handled by lower_noise");
1427       break;
1428
1429    case ir_binop_add:
1430       emit(ADD(result_dst, op[0], op[1]));
1431       break;
1432    case ir_binop_sub:
1433       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1434       break;
1435
1436    case ir_binop_mul:
1437       if (ir->type->is_integer()) {
1438          /* For integer multiplication, the MUL uses the low 16 bits
1439           * of one of the operands (src0 on gen6, src1 on gen7).  The
1440           * MACH accumulates in the contribution of the upper 16 bits
1441           * of that operand.
1442           *
1443           * FINISHME: Emit just the MUL if we know an operand is small
1444           * enough.
1445           */
1446          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1447
1448          emit(MUL(acc, op[0], op[1]));
1449          emit(MACH(dst_null_d(), op[0], op[1]));
1450          emit(MOV(result_dst, src_reg(acc)));
1451       } else {
1452          emit(MUL(result_dst, op[0], op[1]));
1453       }
1454       break;
1455    case ir_binop_div:
1456       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1457       assert(ir->type->is_integer());
1458       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1459       break;
1460    case ir_binop_mod:
1461       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1462       assert(ir->type->is_integer());
1463       emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1464       break;
1465
1466    case ir_binop_less:
1467    case ir_binop_greater:
1468    case ir_binop_lequal:
1469    case ir_binop_gequal:
1470    case ir_binop_equal:
1471    case ir_binop_nequal: {
1472       emit(CMP(result_dst, op[0], op[1],
1473                brw_conditional_for_comparison(ir->operation)));
1474       emit(AND(result_dst, result_src, src_reg(0x1)));
1475       break;
1476    }
1477
1478    case ir_binop_all_equal:
1479       /* "==" operator producing a scalar boolean. */
1480       if (ir->operands[0]->type->is_vector() ||
1481           ir->operands[1]->type->is_vector()) {
1482          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1483          emit(MOV(result_dst, src_reg(0)));
1484          inst = emit(MOV(result_dst, src_reg(1)));
1485          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1486       } else {
1487          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1488          emit(AND(result_dst, result_src, src_reg(0x1)));
1489       }
1490       break;
1491    case ir_binop_any_nequal:
1492       /* "!=" operator producing a scalar boolean. */
1493       if (ir->operands[0]->type->is_vector() ||
1494           ir->operands[1]->type->is_vector()) {
1495          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1496
1497          emit(MOV(result_dst, src_reg(0)));
1498          inst = emit(MOV(result_dst, src_reg(1)));
1499          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1500       } else {
1501          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1502          emit(AND(result_dst, result_src, src_reg(0x1)));
1503       }
1504       break;
1505
1506    case ir_unop_any:
1507       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1508       emit(MOV(result_dst, src_reg(0)));
1509
1510       inst = emit(MOV(result_dst, src_reg(1)));
1511       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1512       break;
1513
1514    case ir_binop_logic_xor:
1515       emit(XOR(result_dst, op[0], op[1]));
1516       break;
1517
1518    case ir_binop_logic_or:
1519       emit(OR(result_dst, op[0], op[1]));
1520       break;
1521
1522    case ir_binop_logic_and:
1523       emit(AND(result_dst, op[0], op[1]));
1524       break;
1525
1526    case ir_binop_dot:
1527       assert(ir->operands[0]->type->is_vector());
1528       assert(ir->operands[0]->type == ir->operands[1]->type);
1529       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1530       break;
1531
1532    case ir_unop_sqrt:
1533       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1534       break;
1535    case ir_unop_rsq:
1536       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1537       break;
1538
1539    case ir_unop_bitcast_i2f:
1540    case ir_unop_bitcast_u2f:
1541       this->result = op[0];
1542       this->result.type = BRW_REGISTER_TYPE_F;
1543       break;
1544
1545    case ir_unop_bitcast_f2i:
1546       this->result = op[0];
1547       this->result.type = BRW_REGISTER_TYPE_D;
1548       break;
1549
1550    case ir_unop_bitcast_f2u:
1551       this->result = op[0];
1552       this->result.type = BRW_REGISTER_TYPE_UD;
1553       break;
1554
1555    case ir_unop_i2f:
1556    case ir_unop_i2u:
1557    case ir_unop_u2i:
1558    case ir_unop_u2f:
1559    case ir_unop_b2f:
1560    case ir_unop_b2i:
1561    case ir_unop_f2i:
1562    case ir_unop_f2u:
1563       emit(MOV(result_dst, op[0]));
1564       break;
1565    case ir_unop_f2b:
1566    case ir_unop_i2b: {
1567       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1568       emit(AND(result_dst, result_src, src_reg(1)));
1569       break;
1570    }
1571
1572    case ir_unop_trunc:
1573       emit(RNDZ(result_dst, op[0]));
1574       break;
1575    case ir_unop_ceil:
1576       op[0].negate = !op[0].negate;
1577       inst = emit(RNDD(result_dst, op[0]));
1578       this->result.negate = true;
1579       break;
1580    case ir_unop_floor:
1581       inst = emit(RNDD(result_dst, op[0]));
1582       break;
1583    case ir_unop_fract:
1584       inst = emit(FRC(result_dst, op[0]));
1585       break;
1586    case ir_unop_round_even:
1587       emit(RNDE(result_dst, op[0]));
1588       break;
1589
1590    case ir_binop_min:
1591       emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1592       break;
1593    case ir_binop_max:
1594       emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1595       break;
1596
1597    case ir_binop_pow:
1598       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1599       break;
1600
1601    case ir_unop_bit_not:
1602       inst = emit(NOT(result_dst, op[0]));
1603       break;
1604    case ir_binop_bit_and:
1605       inst = emit(AND(result_dst, op[0], op[1]));
1606       break;
1607    case ir_binop_bit_xor:
1608       inst = emit(XOR(result_dst, op[0], op[1]));
1609       break;
1610    case ir_binop_bit_or:
1611       inst = emit(OR(result_dst, op[0], op[1]));
1612       break;
1613
1614    case ir_binop_lshift:
1615       inst = emit(SHL(result_dst, op[0], op[1]));
1616       break;
1617
1618    case ir_binop_rshift:
1619       if (ir->type->base_type == GLSL_TYPE_INT)
1620          inst = emit(ASR(result_dst, op[0], op[1]));
1621       else
1622          inst = emit(SHR(result_dst, op[0], op[1]));
1623       break;
1624
1625    case ir_binop_bfm:
1626       emit(BFI1(result_dst, op[0], op[1]));
1627       break;
1628
1629    case ir_binop_ubo_load: {
1630       ir_constant *uniform_block = ir->operands[0]->as_constant();
1631       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1632       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1633       src_reg offset = op[1];
1634
1635       /* Now, load the vector from that offset. */
1636       assert(ir->type->is_vector() || ir->type->is_scalar());
1637
1638       src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1639       packed_consts.type = result.type;
1640       src_reg surf_index =
1641          src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1642       if (const_offset_ir) {
1643          offset = src_reg(const_offset / 16);
1644       } else {
1645          emit(SHR(dst_reg(offset), offset, src_reg(4)));
1646       }
1647
1648       vec4_instruction *pull =
1649          emit(new(mem_ctx) vec4_instruction(this,
1650                                             VS_OPCODE_PULL_CONSTANT_LOAD,
1651                                             dst_reg(packed_consts),
1652                                             surf_index,
1653                                             offset));
1654       pull->base_mrf = 14;
1655       pull->mlen = 1;
1656
1657       packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1658       packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1659                                             const_offset % 16 / 4,
1660                                             const_offset % 16 / 4,
1661                                             const_offset % 16 / 4);
1662
1663       /* UBO bools are any nonzero int.  We store bools as either 0 or 1. */
1664       if (ir->type->base_type == GLSL_TYPE_BOOL) {
1665          emit(CMP(result_dst, packed_consts, src_reg(0u),
1666                   BRW_CONDITIONAL_NZ));
1667          emit(AND(result_dst, result, src_reg(0x1)));
1668       } else {
1669          emit(MOV(result_dst, packed_consts));
1670       }
1671       break;
1672    }
1673
1674    case ir_binop_vector_extract:
1675       assert(!"should have been lowered by vec_index_to_cond_assign");
1676       break;
1677
1678    case ir_triop_lrp:
1679       op[0] = fix_3src_operand(op[0]);
1680       op[1] = fix_3src_operand(op[1]);
1681       op[2] = fix_3src_operand(op[2]);
1682       /* Note that the instruction's argument order is reversed from GLSL
1683        * and the IR.
1684        */
1685       emit(LRP(result_dst, op[2], op[1], op[0]));
1686       break;
1687
1688    case ir_triop_bfi:
1689       op[0] = fix_3src_operand(op[0]);
1690       op[1] = fix_3src_operand(op[1]);
1691       op[2] = fix_3src_operand(op[2]);
1692       emit(BFI2(result_dst, op[0], op[1], op[2]));
1693       break;
1694
1695    case ir_triop_bitfield_extract:
1696       op[0] = fix_3src_operand(op[0]);
1697       op[1] = fix_3src_operand(op[1]);
1698       op[2] = fix_3src_operand(op[2]);
1699       /* Note that the instruction's argument order is reversed from GLSL
1700        * and the IR.
1701        */
1702       emit(BFE(result_dst, op[2], op[1], op[0]));
1703       break;
1704
1705    case ir_quadop_bitfield_insert:
1706       assert(!"not reached: should be handled by "
1707               "bitfield_insert_to_bfm_bfi\n");
1708       break;
1709
1710    case ir_quadop_vector:
1711       assert(!"not reached: should be handled by lower_quadop_vector");
1712       break;
1713
1714    case ir_unop_pack_half_2x16:
1715       emit_pack_half_2x16(result_dst, op[0]);
1716       break;
1717    case ir_unop_unpack_half_2x16:
1718       emit_unpack_half_2x16(result_dst, op[0]);
1719       break;
1720    case ir_unop_pack_snorm_2x16:
1721    case ir_unop_pack_snorm_4x8:
1722    case ir_unop_pack_unorm_2x16:
1723    case ir_unop_pack_unorm_4x8:
1724    case ir_unop_unpack_snorm_2x16:
1725    case ir_unop_unpack_snorm_4x8:
1726    case ir_unop_unpack_unorm_2x16:
1727    case ir_unop_unpack_unorm_4x8:
1728       assert(!"not reached: should be handled by lower_packing_builtins");
1729       break;
1730    case ir_unop_unpack_half_2x16_split_x:
1731    case ir_unop_unpack_half_2x16_split_y:
1732    case ir_binop_pack_half_2x16_split:
1733       assert(!"not reached: should not occur in vertex shader");
1734       break;
1735    }
1736 }
1737
1738
1739 void
1740 vec4_visitor::visit(ir_swizzle *ir)
1741 {
1742    src_reg src;
1743    int i = 0;
1744    int swizzle[4];
1745
1746    /* Note that this is only swizzles in expressions, not those on the left
1747     * hand side of an assignment, which do write masking.  See ir_assignment
1748     * for that.
1749     */
1750
1751    ir->val->accept(this);
1752    src = this->result;
1753    assert(src.file != BAD_FILE);
1754
1755    for (i = 0; i < ir->type->vector_elements; i++) {
1756       switch (i) {
1757       case 0:
1758          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1759          break;
1760       case 1:
1761          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1762          break;
1763       case 2:
1764          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1765          break;
1766       case 3:
1767          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1768             break;
1769       }
1770    }
1771    for (; i < 4; i++) {
1772       /* Replicate the last channel out. */
1773       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1774    }
1775
1776    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1777
1778    this->result = src;
1779 }
1780
1781 void
1782 vec4_visitor::visit(ir_dereference_variable *ir)
1783 {
1784    const struct glsl_type *type = ir->type;
1785    dst_reg *reg = variable_storage(ir->var);
1786
1787    if (!reg) {
1788       fail("Failed to find variable storage for %s\n", ir->var->name);
1789       this->result = src_reg(brw_null_reg());
1790       return;
1791    }
1792
1793    this->result = src_reg(*reg);
1794
1795    /* System values get their swizzle from the dst_reg writemask */
1796    if (ir->var->mode == ir_var_system_value)
1797       return;
1798
1799    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1800       this->result.swizzle = swizzle_for_size(type->vector_elements);
1801 }
1802
1803
1804 int
1805 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1806 {
1807    /* Under normal circumstances array elements are stored consecutively, so
1808     * the stride is equal to the size of the array element.
1809     */
1810    return type_size(ir->type);
1811 }
1812
1813
1814 void
1815 vec4_visitor::visit(ir_dereference_array *ir)
1816 {
1817    ir_constant *constant_index;
1818    src_reg src;
1819    int array_stride = compute_array_stride(ir);
1820
1821    constant_index = ir->array_index->constant_expression_value();
1822
1823    ir->array->accept(this);
1824    src = this->result;
1825
1826    if (constant_index) {
1827       src.reg_offset += constant_index->value.i[0] * array_stride;
1828    } else {
1829       /* Variable index array dereference.  It eats the "vec4" of the
1830        * base of the array and an index that offsets the Mesa register
1831        * index.
1832        */
1833       ir->array_index->accept(this);
1834
1835       src_reg index_reg;
1836
1837       if (array_stride == 1) {
1838          index_reg = this->result;
1839       } else {
1840          index_reg = src_reg(this, glsl_type::int_type);
1841
1842          emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1843       }
1844
1845       if (src.reladdr) {
1846          src_reg temp = src_reg(this, glsl_type::int_type);
1847
1848          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1849
1850          index_reg = temp;
1851       }
1852
1853       src.reladdr = ralloc(mem_ctx, src_reg);
1854       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1855    }
1856
1857    /* If the type is smaller than a vec4, replicate the last channel out. */
1858    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1859       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1860    else
1861       src.swizzle = BRW_SWIZZLE_NOOP;
1862    src.type = brw_type_for_base_type(ir->type);
1863
1864    this->result = src;
1865 }
1866
1867 void
1868 vec4_visitor::visit(ir_dereference_record *ir)
1869 {
1870    unsigned int i;
1871    const glsl_type *struct_type = ir->record->type;
1872    int offset = 0;
1873
1874    ir->record->accept(this);
1875
1876    for (i = 0; i < struct_type->length; i++) {
1877       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1878          break;
1879       offset += type_size(struct_type->fields.structure[i].type);
1880    }
1881
1882    /* If the type is smaller than a vec4, replicate the last channel out. */
1883    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1884       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1885    else
1886       this->result.swizzle = BRW_SWIZZLE_NOOP;
1887    this->result.type = brw_type_for_base_type(ir->type);
1888
1889    this->result.reg_offset += offset;
1890 }
1891
1892 /**
1893  * We want to be careful in assignment setup to hit the actual storage
1894  * instead of potentially using a temporary like we might with the
1895  * ir_dereference handler.
1896  */
1897 static dst_reg
1898 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1899 {
1900    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1901     * access of a vector, it must be separated into a series conditional moves
1902     * before reaching this point (see ir_vec_index_to_cond_assign).
1903     */
1904    assert(ir->as_dereference());
1905    ir_dereference_array *deref_array = ir->as_dereference_array();
1906    if (deref_array) {
1907       assert(!deref_array->array->type->is_vector());
1908    }
1909
1910    /* Use the rvalue deref handler for the most part.  We'll ignore
1911     * swizzles in it and write swizzles using writemask, though.
1912     */
1913    ir->accept(v);
1914    return dst_reg(v->result);
1915 }
1916
1917 void
1918 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1919                               const struct glsl_type *type, uint32_t predicate)
1920 {
1921    if (type->base_type == GLSL_TYPE_STRUCT) {
1922       for (unsigned int i = 0; i < type->length; i++) {
1923          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1924       }
1925       return;
1926    }
1927
1928    if (type->is_array()) {
1929       for (unsigned int i = 0; i < type->length; i++) {
1930          emit_block_move(dst, src, type->fields.array, predicate);
1931       }
1932       return;
1933    }
1934
1935    if (type->is_matrix()) {
1936       const struct glsl_type *vec_type;
1937
1938       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1939                                          type->vector_elements, 1);
1940
1941       for (int i = 0; i < type->matrix_columns; i++) {
1942          emit_block_move(dst, src, vec_type, predicate);
1943       }
1944       return;
1945    }
1946
1947    assert(type->is_scalar() || type->is_vector());
1948
1949    dst->type = brw_type_for_base_type(type);
1950    src->type = dst->type;
1951
1952    dst->writemask = (1 << type->vector_elements) - 1;
1953
1954    src->swizzle = swizzle_for_size(type->vector_elements);
1955
1956    vec4_instruction *inst = emit(MOV(*dst, *src));
1957    inst->predicate = predicate;
1958
1959    dst->reg_offset++;
1960    src->reg_offset++;
1961 }
1962
1963
1964 /* If the RHS processing resulted in an instruction generating a
1965  * temporary value, and it would be easy to rewrite the instruction to
1966  * generate its result right into the LHS instead, do so.  This ends
1967  * up reliably removing instructions where it can be tricky to do so
1968  * later without real UD chain information.
1969  */
1970 bool
1971 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1972                                      dst_reg dst,
1973                                      src_reg src,
1974                                      vec4_instruction *pre_rhs_inst,
1975                                      vec4_instruction *last_rhs_inst)
1976 {
1977    /* This could be supported, but it would take more smarts. */
1978    if (ir->condition)
1979       return false;
1980
1981    if (pre_rhs_inst == last_rhs_inst)
1982       return false; /* No instructions generated to work with. */
1983
1984    /* Make sure the last instruction generated our source reg. */
1985    if (src.file != GRF ||
1986        src.file != last_rhs_inst->dst.file ||
1987        src.reg != last_rhs_inst->dst.reg ||
1988        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1989        src.reladdr ||
1990        src.abs ||
1991        src.negate ||
1992        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1993       return false;
1994
1995    /* Check that that last instruction fully initialized the channels
1996     * we want to use, in the order we want to use them.  We could
1997     * potentially reswizzle the operands of many instructions so that
1998     * we could handle out of order channels, but don't yet.
1999     */
2000
2001    for (unsigned i = 0; i < 4; i++) {
2002       if (dst.writemask & (1 << i)) {
2003          if (!(last_rhs_inst->dst.writemask & (1 << i)))
2004             return false;
2005
2006          if (BRW_GET_SWZ(src.swizzle, i) != i)
2007             return false;
2008       }
2009    }
2010
2011    /* Success!  Rewrite the instruction. */
2012    last_rhs_inst->dst.file = dst.file;
2013    last_rhs_inst->dst.reg = dst.reg;
2014    last_rhs_inst->dst.reg_offset = dst.reg_offset;
2015    last_rhs_inst->dst.reladdr = dst.reladdr;
2016    last_rhs_inst->dst.writemask &= dst.writemask;
2017
2018    return true;
2019 }
2020
2021 void
2022 vec4_visitor::visit(ir_assignment *ir)
2023 {
2024    dst_reg dst = get_assignment_lhs(ir->lhs, this);
2025    uint32_t predicate = BRW_PREDICATE_NONE;
2026
2027    if (!ir->lhs->type->is_scalar() &&
2028        !ir->lhs->type->is_vector()) {
2029       ir->rhs->accept(this);
2030       src_reg src = this->result;
2031
2032       if (ir->condition) {
2033          emit_bool_to_cond_code(ir->condition, &predicate);
2034       }
2035
2036       /* emit_block_move doesn't account for swizzles in the source register.
2037        * This should be ok, since the source register is a structure or an
2038        * array, and those can't be swizzled.  But double-check to be sure.
2039        */
2040       assert(src.swizzle ==
2041              (ir->rhs->type->is_matrix()
2042               ? swizzle_for_size(ir->rhs->type->vector_elements)
2043               : BRW_SWIZZLE_NOOP));
2044
2045       emit_block_move(&dst, &src, ir->rhs->type, predicate);
2046       return;
2047    }
2048
2049    /* Now we're down to just a scalar/vector with writemasks. */
2050    int i;
2051
2052    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2053    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2054
2055    ir->rhs->accept(this);
2056
2057    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2058
2059    src_reg src = this->result;
2060
2061    int swizzles[4];
2062    int first_enabled_chan = 0;
2063    int src_chan = 0;
2064
2065    assert(ir->lhs->type->is_vector() ||
2066           ir->lhs->type->is_scalar());
2067    dst.writemask = ir->write_mask;
2068
2069    for (int i = 0; i < 4; i++) {
2070       if (dst.writemask & (1 << i)) {
2071          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2072          break;
2073       }
2074    }
2075
2076    /* Swizzle a small RHS vector into the channels being written.
2077     *
2078     * glsl ir treats write_mask as dictating how many channels are
2079     * present on the RHS while in our instructions we need to make
2080     * those channels appear in the slots of the vec4 they're written to.
2081     */
2082    for (int i = 0; i < 4; i++) {
2083       if (dst.writemask & (1 << i))
2084          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2085       else
2086          swizzles[i] = first_enabled_chan;
2087    }
2088    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2089                               swizzles[2], swizzles[3]);
2090
2091    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2092       return;
2093    }
2094
2095    if (ir->condition) {
2096       emit_bool_to_cond_code(ir->condition, &predicate);
2097    }
2098
2099    for (i = 0; i < type_size(ir->lhs->type); i++) {
2100       vec4_instruction *inst = emit(MOV(dst, src));
2101       inst->predicate = predicate;
2102
2103       dst.reg_offset++;
2104       src.reg_offset++;
2105    }
2106 }
2107
2108 void
2109 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2110 {
2111    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2112       foreach_list(node, &ir->components) {
2113          ir_constant *field_value = (ir_constant *)node;
2114
2115          emit_constant_values(dst, field_value);
2116       }
2117       return;
2118    }
2119
2120    if (ir->type->is_array()) {
2121       for (unsigned int i = 0; i < ir->type->length; i++) {
2122          emit_constant_values(dst, ir->array_elements[i]);
2123       }
2124       return;
2125    }
2126
2127    if (ir->type->is_matrix()) {
2128       for (int i = 0; i < ir->type->matrix_columns; i++) {
2129          float *vec = &ir->value.f[i * ir->type->vector_elements];
2130
2131          for (int j = 0; j < ir->type->vector_elements; j++) {
2132             dst->writemask = 1 << j;
2133             dst->type = BRW_REGISTER_TYPE_F;
2134
2135             emit(MOV(*dst, src_reg(vec[j])));
2136          }
2137          dst->reg_offset++;
2138       }
2139       return;
2140    }
2141
2142    int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2143
2144    for (int i = 0; i < ir->type->vector_elements; i++) {
2145       if (!(remaining_writemask & (1 << i)))
2146          continue;
2147
2148       dst->writemask = 1 << i;
2149       dst->type = brw_type_for_base_type(ir->type);
2150
2151       /* Find other components that match the one we're about to
2152        * write.  Emits fewer instructions for things like vec4(0.5,
2153        * 1.5, 1.5, 1.5).
2154        */
2155       for (int j = i + 1; j < ir->type->vector_elements; j++) {
2156          if (ir->type->base_type == GLSL_TYPE_BOOL) {
2157             if (ir->value.b[i] == ir->value.b[j])
2158                dst->writemask |= (1 << j);
2159          } else {
2160             /* u, i, and f storage all line up, so no need for a
2161              * switch case for comparing each type.
2162              */
2163             if (ir->value.u[i] == ir->value.u[j])
2164                dst->writemask |= (1 << j);
2165          }
2166       }
2167
2168       switch (ir->type->base_type) {
2169       case GLSL_TYPE_FLOAT:
2170          emit(MOV(*dst, src_reg(ir->value.f[i])));
2171          break;
2172       case GLSL_TYPE_INT:
2173          emit(MOV(*dst, src_reg(ir->value.i[i])));
2174          break;
2175       case GLSL_TYPE_UINT:
2176          emit(MOV(*dst, src_reg(ir->value.u[i])));
2177          break;
2178       case GLSL_TYPE_BOOL:
2179          emit(MOV(*dst, src_reg(ir->value.b[i])));
2180          break;
2181       default:
2182          assert(!"Non-float/uint/int/bool constant");
2183          break;
2184       }
2185
2186       remaining_writemask &= ~dst->writemask;
2187    }
2188    dst->reg_offset++;
2189 }
2190
2191 void
2192 vec4_visitor::visit(ir_constant *ir)
2193 {
2194    dst_reg dst = dst_reg(this, ir->type);
2195    this->result = src_reg(dst);
2196
2197    emit_constant_values(&dst, ir);
2198 }
2199
2200 void
2201 vec4_visitor::visit(ir_call *ir)
2202 {
2203    assert(!"not reached");
2204 }
2205
2206 void
2207 vec4_visitor::visit(ir_texture *ir)
2208 {
2209    int sampler =
2210       _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2211
2212    /* Should be lowered by do_lower_texture_projection */
2213    assert(!ir->projector);
2214
2215    /* Generate code to compute all the subexpression trees.  This has to be
2216     * done before loading any values into MRFs for the sampler message since
2217     * generating these values may involve SEND messages that need the MRFs.
2218     */
2219    src_reg coordinate;
2220    if (ir->coordinate) {
2221       ir->coordinate->accept(this);
2222       coordinate = this->result;
2223    }
2224
2225    src_reg shadow_comparitor;
2226    if (ir->shadow_comparitor) {
2227       ir->shadow_comparitor->accept(this);
2228       shadow_comparitor = this->result;
2229    }
2230
2231    const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2232    src_reg lod, dPdx, dPdy, sample_index;
2233    switch (ir->op) {
2234    case ir_tex:
2235       lod = src_reg(0.0f);
2236       lod_type = glsl_type::float_type;
2237       break;
2238    case ir_txf:
2239    case ir_txl:
2240    case ir_txs:
2241       ir->lod_info.lod->accept(this);
2242       lod = this->result;
2243       lod_type = ir->lod_info.lod->type;
2244       break;
2245    case ir_txf_ms:
2246       ir->lod_info.sample_index->accept(this);
2247       sample_index = this->result;
2248       sample_index_type = ir->lod_info.sample_index->type;
2249       break;
2250    case ir_txd:
2251       ir->lod_info.grad.dPdx->accept(this);
2252       dPdx = this->result;
2253
2254       ir->lod_info.grad.dPdy->accept(this);
2255       dPdy = this->result;
2256
2257       lod_type = ir->lod_info.grad.dPdx->type;
2258       break;
2259    case ir_txb:
2260    case ir_lod:
2261       break;
2262    }
2263
2264    vec4_instruction *inst = NULL;
2265    switch (ir->op) {
2266    case ir_tex:
2267    case ir_txl:
2268       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2269       break;
2270    case ir_txd:
2271       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2272       break;
2273    case ir_txf:
2274       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2275       break;
2276    case ir_txf_ms:
2277       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2278       break;
2279    case ir_txs:
2280       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2281       break;
2282    case ir_txb:
2283       assert(!"TXB is not valid for vertex shaders.");
2284       break;
2285    case ir_lod:
2286       assert(!"LOD is not valid for vertex shaders.");
2287       break;
2288    }
2289
2290    bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2291
2292    /* Texel offsets go in the message header; Gen4 also requires headers. */
2293    inst->header_present = use_texture_offset || intel->gen < 5;
2294    inst->base_mrf = 2;
2295    inst->mlen = inst->header_present + 1; /* always at least one */
2296    inst->sampler = sampler;
2297    inst->dst = dst_reg(this, ir->type);
2298    inst->dst.writemask = WRITEMASK_XYZW;
2299    inst->shadow_compare = ir->shadow_comparitor != NULL;
2300
2301    if (use_texture_offset)
2302       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2303
2304    /* MRF for the first parameter */
2305    int param_base = inst->base_mrf + inst->header_present;
2306
2307    if (ir->op == ir_txs) {
2308       int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2309       emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2310    } else {
2311       int i, coord_mask = 0, zero_mask = 0;
2312       /* Load the coordinate */
2313       /* FINISHME: gl_clamp_mask and saturate */
2314       for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2315          coord_mask |= (1 << i);
2316       for (; i < 4; i++)
2317          zero_mask |= (1 << i);
2318
2319       if (ir->offset && ir->op == ir_txf) {
2320          /* It appears that the ld instruction used for txf does its
2321           * address bounds check before adding in the offset.  To work
2322           * around this, just add the integer offset to the integer
2323           * texel coordinate, and don't put the offset in the header.
2324           */
2325          ir_constant *offset = ir->offset->as_constant();
2326          assert(offset);
2327
2328          for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2329             src_reg src = coordinate;
2330             src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2331                                        BRW_GET_SWZ(src.swizzle, j),
2332                                        BRW_GET_SWZ(src.swizzle, j),
2333                                        BRW_GET_SWZ(src.swizzle, j));
2334             emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2335                      src, offset->value.i[j]));
2336          }
2337       } else {
2338          emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2339                   coordinate));
2340       }
2341       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2342                src_reg(0)));
2343       /* Load the shadow comparitor */
2344       if (ir->shadow_comparitor && ir->op != ir_txd) {
2345          emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2346                           WRITEMASK_X),
2347                   shadow_comparitor));
2348          inst->mlen++;
2349       }
2350
2351       /* Load the LOD info */
2352       if (ir->op == ir_tex || ir->op == ir_txl) {
2353          int mrf, writemask;
2354          if (intel->gen >= 5) {
2355             mrf = param_base + 1;
2356             if (ir->shadow_comparitor) {
2357                writemask = WRITEMASK_Y;
2358                /* mlen already incremented */
2359             } else {
2360                writemask = WRITEMASK_X;
2361                inst->mlen++;
2362             }
2363          } else /* intel->gen == 4 */ {
2364             mrf = param_base;
2365             writemask = WRITEMASK_Z;
2366          }
2367          emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2368       } else if (ir->op == ir_txf) {
2369          emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2370       } else if (ir->op == ir_txf_ms) {
2371          emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2372                   sample_index));
2373          inst->mlen++;
2374
2375          /* on Gen7, there is an additional MCS parameter here after SI,
2376           * but we don't bother to emit it since it's always zero. If
2377           * we start supporting texturing from CMS surfaces, this will have
2378           * to change
2379           */
2380       } else if (ir->op == ir_txd) {
2381          const glsl_type *type = lod_type;
2382
2383          if (intel->gen >= 5) {
2384             dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2385             dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2386             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2387             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2388             inst->mlen++;
2389
2390             if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2391                dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2392                dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2393                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2394                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2395                inst->mlen++;
2396
2397                if (ir->shadow_comparitor) {
2398                   emit(MOV(dst_reg(MRF, param_base + 2,
2399                                    ir->shadow_comparitor->type, WRITEMASK_Z),
2400                            shadow_comparitor));
2401                }
2402             }
2403          } else /* intel->gen == 4 */ {
2404             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2405             emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2406             inst->mlen += 2;
2407          }
2408       }
2409    }
2410
2411    emit(inst);
2412
2413    /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2414     * spec requires layers.
2415     */
2416    if (ir->op == ir_txs) {
2417       glsl_type const *type = ir->sampler->type;
2418       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2419           type->sampler_array) {
2420          emit_math(SHADER_OPCODE_INT_QUOTIENT,
2421                    with_writemask(inst->dst, WRITEMASK_Z),
2422                    src_reg(inst->dst), src_reg(6));
2423       }
2424    }
2425
2426    swizzle_result(ir, src_reg(inst->dst), sampler);
2427 }
2428
2429 void
2430 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2431 {
2432    int s = key->tex.swizzles[sampler];
2433
2434    this->result = src_reg(this, ir->type);
2435    dst_reg swizzled_result(this->result);
2436
2437    if (ir->op == ir_txs || ir->type == glsl_type::float_type
2438                         || s == SWIZZLE_NOOP) {
2439       emit(MOV(swizzled_result, orig_val));
2440       return;
2441    }
2442
2443    int zero_mask = 0, one_mask = 0, copy_mask = 0;
2444    int swizzle[4];
2445
2446    for (int i = 0; i < 4; i++) {
2447       switch (GET_SWZ(s, i)) {
2448       case SWIZZLE_ZERO:
2449          zero_mask |= (1 << i);
2450          break;
2451       case SWIZZLE_ONE:
2452          one_mask |= (1 << i);
2453          break;
2454       default:
2455          copy_mask |= (1 << i);
2456          swizzle[i] = GET_SWZ(s, i);
2457          break;
2458       }
2459    }
2460
2461    if (copy_mask) {
2462       orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2463       swizzled_result.writemask = copy_mask;
2464       emit(MOV(swizzled_result, orig_val));
2465    }
2466
2467    if (zero_mask) {
2468       swizzled_result.writemask = zero_mask;
2469       emit(MOV(swizzled_result, src_reg(0.0f)));
2470    }
2471
2472    if (one_mask) {
2473       swizzled_result.writemask = one_mask;
2474       emit(MOV(swizzled_result, src_reg(1.0f)));
2475    }
2476 }
2477
2478 void
2479 vec4_visitor::visit(ir_return *ir)
2480 {
2481    assert(!"not reached");
2482 }
2483
2484 void
2485 vec4_visitor::visit(ir_discard *ir)
2486 {
2487    assert(!"not reached");
2488 }
2489
2490 void
2491 vec4_visitor::visit(ir_if *ir)
2492 {
2493    /* Don't point the annotation at the if statement, because then it plus
2494     * the then and else blocks get printed.
2495     */
2496    this->base_ir = ir->condition;
2497
2498    if (intel->gen == 6) {
2499       emit_if_gen6(ir);
2500    } else {
2501       uint32_t predicate;
2502       emit_bool_to_cond_code(ir->condition, &predicate);
2503       emit(IF(predicate));
2504    }
2505
2506    visit_instructions(&ir->then_instructions);
2507
2508    if (!ir->else_instructions.is_empty()) {
2509       this->base_ir = ir->condition;
2510       emit(BRW_OPCODE_ELSE);
2511
2512       visit_instructions(&ir->else_instructions);
2513    }
2514
2515    this->base_ir = ir->condition;
2516    emit(BRW_OPCODE_ENDIF);
2517 }
2518
2519 void
2520 vec4_visitor::emit_ndc_computation()
2521 {
2522    /* Get the position */
2523    src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2524
2525    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2526    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2527    output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2528
2529    current_annotation = "NDC";
2530    dst_reg ndc_w = ndc;
2531    ndc_w.writemask = WRITEMASK_W;
2532    src_reg pos_w = pos;
2533    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2534    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2535
2536    dst_reg ndc_xyz = ndc;
2537    ndc_xyz.writemask = WRITEMASK_XYZ;
2538
2539    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2540 }
2541
2542 void
2543 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2544 {
2545    if (intel->gen < 6 &&
2546        ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2547         key->userclip_active || brw->has_negative_rhw_bug)) {
2548       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2549       dst_reg header1_w = header1;
2550       header1_w.writemask = WRITEMASK_W;
2551       GLuint i;
2552
2553       emit(MOV(header1, 0u));
2554
2555       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2556          src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2557
2558          current_annotation = "Point size";
2559          emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2560          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2561       }
2562
2563       current_annotation = "Clipping flags";
2564       for (i = 0; i < key->nr_userclip_plane_consts; i++) {
2565          vec4_instruction *inst;
2566
2567          inst = emit(DP4(dst_null_f(), src_reg(output_reg[VARYING_SLOT_POS]),
2568                          src_reg(this->userplane[i])));
2569          inst->conditional_mod = BRW_CONDITIONAL_L;
2570
2571          inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2572          inst->predicate = BRW_PREDICATE_NORMAL;
2573       }
2574
2575       /* i965 clipping workaround:
2576        * 1) Test for -ve rhw
2577        * 2) If set,
2578        *      set ndc = (0,0,0,0)
2579        *      set ucp[6] = 1
2580        *
2581        * Later, clipping will detect ucp[6] and ensure the primitive is
2582        * clipped against all fixed planes.
2583        */
2584       if (brw->has_negative_rhw_bug) {
2585          src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2586          ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2587          emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2588          vec4_instruction *inst;
2589          inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2590          inst->predicate = BRW_PREDICATE_NORMAL;
2591          inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2592          inst->predicate = BRW_PREDICATE_NORMAL;
2593       }
2594
2595       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2596    } else if (intel->gen < 6) {
2597       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2598    } else {
2599       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2600       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2601          emit(MOV(brw_writemask(reg, WRITEMASK_W),
2602                   src_reg(output_reg[VARYING_SLOT_PSIZ])));
2603       }
2604       if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2605          emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2606                   src_reg(output_reg[VARYING_SLOT_LAYER])));
2607       }
2608    }
2609 }
2610
2611 void
2612 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2613 {
2614    if (intel->gen < 6) {
2615       /* Clip distance slots are set aside in gen5, but they are not used.  It
2616        * is not clear whether we actually need to set aside space for them,
2617        * but the performance cost is negligible.
2618        */
2619       return;
2620    }
2621
2622    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2623     *
2624     *     "If a linked set of shaders forming the vertex stage contains no
2625     *     static write to gl_ClipVertex or gl_ClipDistance, but the
2626     *     application has requested clipping against user clip planes through
2627     *     the API, then the coordinate written to gl_Position is used for
2628     *     comparison against the user clip planes."
2629     *
2630     * This function is only called if the shader didn't write to
2631     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
2632     * if the user wrote to it; otherwise we use gl_Position.
2633     */
2634    gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2635    if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2636       clip_vertex = VARYING_SLOT_POS;
2637    }
2638
2639    for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2640         ++i) {
2641       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2642                src_reg(output_reg[clip_vertex]),
2643                src_reg(this->userplane[i + offset])));
2644    }
2645 }
2646
2647 void
2648 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2649 {
2650    assert (varying < VARYING_SLOT_MAX);
2651    reg.type = output_reg[varying].type;
2652    current_annotation = output_reg_annotation[varying];
2653    /* Copy the register, saturating if necessary */
2654    vec4_instruction *inst = emit(MOV(reg,
2655                                      src_reg(output_reg[varying])));
2656    if ((varying == VARYING_SLOT_COL0 ||
2657         varying == VARYING_SLOT_COL1 ||
2658         varying == VARYING_SLOT_BFC0 ||
2659         varying == VARYING_SLOT_BFC1) &&
2660        key->clamp_vertex_color) {
2661       inst->saturate = true;
2662    }
2663 }
2664
2665 void
2666 vec4_visitor::emit_urb_slot(int mrf, int varying)
2667 {
2668    struct brw_reg hw_reg = brw_message_reg(mrf);
2669    dst_reg reg = dst_reg(MRF, mrf);
2670    reg.type = BRW_REGISTER_TYPE_F;
2671
2672    switch (varying) {
2673    case VARYING_SLOT_PSIZ:
2674       /* PSIZ is always in slot 0, and is coupled with other flags. */
2675       current_annotation = "indices, point width, clip flags";
2676       emit_psiz_and_flags(hw_reg);
2677       break;
2678    case BRW_VARYING_SLOT_NDC:
2679       current_annotation = "NDC";
2680       emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2681       break;
2682    case BRW_VARYING_SLOT_POS_DUPLICATE:
2683    case VARYING_SLOT_POS:
2684       current_annotation = "gl_Position";
2685       emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2686       break;
2687    case VARYING_SLOT_CLIP_DIST0:
2688    case VARYING_SLOT_CLIP_DIST1:
2689       if (this->key->uses_clip_distance) {
2690          emit_generic_urb_slot(reg, varying);
2691       } else {
2692          current_annotation = "user clip distances";
2693          emit_clip_distances(hw_reg, (varying - VARYING_SLOT_CLIP_DIST0) * 4);
2694       }
2695       break;
2696    case VARYING_SLOT_EDGE:
2697       /* This is present when doing unfilled polygons.  We're supposed to copy
2698        * the edge flag from the user-provided vertex array
2699        * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2700        * of that attribute (starts as 1.0f).  This is then used in clipping to
2701        * determine which edges should be drawn as wireframe.
2702        */
2703       current_annotation = "edge flag";
2704       emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2705                                     glsl_type::float_type, WRITEMASK_XYZW))));
2706       break;
2707    case BRW_VARYING_SLOT_PAD:
2708       /* No need to write to this slot */
2709       break;
2710    default:
2711       emit_generic_urb_slot(reg, varying);
2712       break;
2713    }
2714 }
2715
2716 static int
2717 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2718 {
2719    struct intel_context *intel = &brw->intel;
2720
2721    if (intel->gen >= 6) {
2722       /* URB data written (does not include the message header reg) must
2723        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
2724        * section 5.4.3.2.2: URB_INTERLEAVED.
2725        *
2726        * URB entries are allocated on a multiple of 1024 bits, so an
2727        * extra 128 bits written here to make the end align to 256 is
2728        * no problem.
2729        */
2730       if ((mlen % 2) != 1)
2731          mlen++;
2732    }
2733
2734    return mlen;
2735 }
2736
2737 void
2738 vec4_vs_visitor::emit_urb_write_header(int mrf)
2739 {
2740    /* No need to do anything for VS; an implied write to this MRF will be
2741     * performed by VS_OPCODE_URB_WRITE.
2742     */
2743    (void) mrf;
2744 }
2745
2746 vec4_instruction *
2747 vec4_vs_visitor::emit_urb_write_opcode(bool complete)
2748 {
2749    /* For VS, the URB writes end the thread. */
2750    if (complete) {
2751       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
2752          emit_shader_time_end();
2753    }
2754
2755    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2756    inst->eot = complete;
2757
2758    return inst;
2759 }
2760
2761 /**
2762  * Generates the VUE payload plus the necessary URB write instructions to
2763  * output it.
2764  *
2765  * The VUE layout is documented in Volume 2a.
2766  */
2767 void
2768 vec4_visitor::emit_vertex()
2769 {
2770    /* MRF 0 is reserved for the debugger, so start with message header
2771     * in MRF 1.
2772     */
2773    int base_mrf = 1;
2774    int mrf = base_mrf;
2775    /* In the process of generating our URB write message contents, we
2776     * may need to unspill a register or load from an array.  Those
2777     * reads would use MRFs 14-15.
2778     */
2779    int max_usable_mrf = 13;
2780
2781    /* The following assertion verifies that max_usable_mrf causes an
2782     * even-numbered amount of URB write data, which will meet gen6's
2783     * requirements for length alignment.
2784     */
2785    assert ((max_usable_mrf - base_mrf) % 2 == 0);
2786
2787    /* First mrf is the g0-based message header containing URB handles and
2788     * such.
2789     */
2790    emit_urb_write_header(mrf++);
2791
2792    if (intel->gen < 6) {
2793       emit_ndc_computation();
2794    }
2795
2796    /* Set up the VUE data for the first URB write */
2797    int slot;
2798    for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
2799       emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2800
2801       /* If this was max_usable_mrf, we can't fit anything more into this URB
2802        * WRITE.
2803        */
2804       if (mrf > max_usable_mrf) {
2805          slot++;
2806          break;
2807       }
2808    }
2809
2810    bool complete = slot >= prog_data->vue_map.num_slots;
2811    current_annotation = "URB write";
2812    vec4_instruction *inst = emit_urb_write_opcode(complete);
2813    inst->base_mrf = base_mrf;
2814    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2815
2816    /* Optional second URB write */
2817    if (!complete) {
2818       mrf = base_mrf + 1;
2819
2820       for (; slot < prog_data->vue_map.num_slots; ++slot) {
2821          assert(mrf < max_usable_mrf);
2822
2823          emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2824       }
2825
2826       current_annotation = "URB write";
2827       inst = emit_urb_write_opcode(true /* complete */);
2828       inst->base_mrf = base_mrf;
2829       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2830       /* URB destination offset.  In the previous write, we got MRFs
2831        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
2832        * URB row increments, and each of our MRFs is half of one of
2833        * those, since we're doing interleaved writes.
2834        */
2835       inst->offset = (max_usable_mrf - base_mrf) / 2;
2836    }
2837 }
2838
2839 void
2840 vec4_vs_visitor::emit_thread_end()
2841 {
2842    /* For VS, we always end the thread by emitting a single vertex.
2843     * emit_urb_write_opcode() will take care of setting the eot flag on the
2844     * SEND instruction.
2845     */
2846    emit_vertex();
2847 }
2848
2849 src_reg
2850 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2851                                  src_reg *reladdr, int reg_offset)
2852 {
2853    /* Because we store the values to scratch interleaved like our
2854     * vertex data, we need to scale the vec4 index by 2.
2855     */
2856    int message_header_scale = 2;
2857
2858    /* Pre-gen6, the message header uses byte offsets instead of vec4
2859     * (16-byte) offset units.
2860     */
2861    if (intel->gen < 6)
2862       message_header_scale *= 16;
2863
2864    if (reladdr) {
2865       src_reg index = src_reg(this, glsl_type::int_type);
2866
2867       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2868       emit_before(inst, MUL(dst_reg(index),
2869                             index, src_reg(message_header_scale)));
2870
2871       return index;
2872    } else {
2873       return src_reg(reg_offset * message_header_scale);
2874    }
2875 }
2876
2877 src_reg
2878 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2879                                        src_reg *reladdr, int reg_offset)
2880 {
2881    if (reladdr) {
2882       src_reg index = src_reg(this, glsl_type::int_type);
2883
2884       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2885
2886       /* Pre-gen6, the message header uses byte offsets instead of vec4
2887        * (16-byte) offset units.
2888        */
2889       if (intel->gen < 6) {
2890          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2891       }
2892
2893       return index;
2894    } else {
2895       int message_header_scale = intel->gen < 6 ? 16 : 1;
2896       return src_reg(reg_offset * message_header_scale);
2897    }
2898 }
2899
2900 /**
2901  * Emits an instruction before @inst to load the value named by @orig_src
2902  * from scratch space at @base_offset to @temp.
2903  *
2904  * @base_offset is measured in 32-byte units (the size of a register).
2905  */
2906 void
2907 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2908                                 dst_reg temp, src_reg orig_src,
2909                                 int base_offset)
2910 {
2911    int reg_offset = base_offset + orig_src.reg_offset;
2912    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2913
2914    emit_before(inst, SCRATCH_READ(temp, index));
2915 }
2916
2917 /**
2918  * Emits an instruction after @inst to store the value to be written
2919  * to @orig_dst to scratch space at @base_offset, from @temp.
2920  *
2921  * @base_offset is measured in 32-byte units (the size of a register).
2922  */
2923 void
2924 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2925 {
2926    int reg_offset = base_offset + inst->dst.reg_offset;
2927    src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2928
2929    /* Create a temporary register to store *inst's result in.
2930     *
2931     * We have to be careful in MOVing from our temporary result register in
2932     * the scratch write.  If we swizzle from channels of the temporary that
2933     * weren't initialized, it will confuse live interval analysis, which will
2934     * make spilling fail to make progress.
2935     */
2936    src_reg temp = src_reg(this, glsl_type::vec4_type);
2937    temp.type = inst->dst.type;
2938    int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2939    int swizzles[4];
2940    for (int i = 0; i < 4; i++)
2941       if (inst->dst.writemask & (1 << i))
2942          swizzles[i] = i;
2943       else
2944          swizzles[i] = first_writemask_chan;
2945    temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2946                                swizzles[2], swizzles[3]);
2947
2948    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2949                                        inst->dst.writemask));
2950    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2951    write->predicate = inst->predicate;
2952    write->ir = inst->ir;
2953    write->annotation = inst->annotation;
2954    inst->insert_after(write);
2955
2956    inst->dst.file = temp.file;
2957    inst->dst.reg = temp.reg;
2958    inst->dst.reg_offset = temp.reg_offset;
2959    inst->dst.reladdr = NULL;
2960 }
2961
2962 /**
2963  * We can't generally support array access in GRF space, because a
2964  * single instruction's destination can only span 2 contiguous
2965  * registers.  So, we send all GRF arrays that get variable index
2966  * access to scratch space.
2967  */
2968 void
2969 vec4_visitor::move_grf_array_access_to_scratch()
2970 {
2971    int scratch_loc[this->virtual_grf_count];
2972
2973    for (int i = 0; i < this->virtual_grf_count; i++) {
2974       scratch_loc[i] = -1;
2975    }
2976
2977    /* First, calculate the set of virtual GRFs that need to be punted
2978     * to scratch due to having any array access on them, and where in
2979     * scratch.
2980     */
2981    foreach_list(node, &this->instructions) {
2982       vec4_instruction *inst = (vec4_instruction *)node;
2983
2984       if (inst->dst.file == GRF && inst->dst.reladdr &&
2985           scratch_loc[inst->dst.reg] == -1) {
2986          scratch_loc[inst->dst.reg] = c->last_scratch;
2987          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
2988       }
2989
2990       for (int i = 0 ; i < 3; i++) {
2991          src_reg *src = &inst->src[i];
2992
2993          if (src->file == GRF && src->reladdr &&
2994              scratch_loc[src->reg] == -1) {
2995             scratch_loc[src->reg] = c->last_scratch;
2996             c->last_scratch += this->virtual_grf_sizes[src->reg];
2997          }
2998       }
2999    }
3000
3001    /* Now, for anything that will be accessed through scratch, rewrite
3002     * it to load/store.  Note that this is a _safe list walk, because
3003     * we may generate a new scratch_write instruction after the one
3004     * we're processing.
3005     */
3006    foreach_list_safe(node, &this->instructions) {
3007       vec4_instruction *inst = (vec4_instruction *)node;
3008
3009       /* Set up the annotation tracking for new generated instructions. */
3010       base_ir = inst->ir;
3011       current_annotation = inst->annotation;
3012
3013       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3014          emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3015       }
3016
3017       for (int i = 0 ; i < 3; i++) {
3018          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3019             continue;
3020
3021          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3022
3023          emit_scratch_read(inst, temp, inst->src[i],
3024                            scratch_loc[inst->src[i].reg]);
3025
3026          inst->src[i].file = temp.file;
3027          inst->src[i].reg = temp.reg;
3028          inst->src[i].reg_offset = temp.reg_offset;
3029          inst->src[i].reladdr = NULL;
3030       }
3031    }
3032 }
3033
3034 /**
3035  * Emits an instruction before @inst to load the value named by @orig_src
3036  * from the pull constant buffer (surface) at @base_offset to @temp.
3037  */
3038 void
3039 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3040                                       dst_reg temp, src_reg orig_src,
3041                                       int base_offset)
3042 {
3043    int reg_offset = base_offset + orig_src.reg_offset;
3044    src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
3045    src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3046    vec4_instruction *load;
3047
3048    if (intel->gen >= 7) {
3049       dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3050       grf_offset.type = offset.type;
3051       emit_before(inst, MOV(grf_offset, offset));
3052
3053       load = new(mem_ctx) vec4_instruction(this,
3054                                            VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3055                                            temp, index, src_reg(grf_offset));
3056    } else {
3057       load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3058                                            temp, index, offset);
3059       load->base_mrf = 14;
3060       load->mlen = 1;
3061    }
3062    emit_before(inst, load);
3063 }
3064
3065 /**
3066  * Implements array access of uniforms by inserting a
3067  * PULL_CONSTANT_LOAD instruction.
3068  *
3069  * Unlike temporary GRF array access (where we don't support it due to
3070  * the difficulty of doing relative addressing on instruction
3071  * destinations), we could potentially do array access of uniforms
3072  * that were loaded in GRF space as push constants.  In real-world
3073  * usage we've seen, though, the arrays being used are always larger
3074  * than we could load as push constants, so just always move all
3075  * uniform array access out to a pull constant buffer.
3076  */
3077 void
3078 vec4_visitor::move_uniform_array_access_to_pull_constants()
3079 {
3080    int pull_constant_loc[this->uniforms];
3081
3082    for (int i = 0; i < this->uniforms; i++) {
3083       pull_constant_loc[i] = -1;
3084    }
3085
3086    /* Walk through and find array access of uniforms.  Put a copy of that
3087     * uniform in the pull constant buffer.
3088     *
3089     * Note that we don't move constant-indexed accesses to arrays.  No
3090     * testing has been done of the performance impact of this choice.
3091     */
3092    foreach_list_safe(node, &this->instructions) {
3093       vec4_instruction *inst = (vec4_instruction *)node;
3094
3095       for (int i = 0 ; i < 3; i++) {
3096          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3097             continue;
3098
3099          int uniform = inst->src[i].reg;
3100
3101          /* If this array isn't already present in the pull constant buffer,
3102           * add it.
3103           */
3104          if (pull_constant_loc[uniform] == -1) {
3105             const float **values = &prog_data->param[uniform * 4];
3106
3107             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3108
3109             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3110                prog_data->pull_param[prog_data->nr_pull_params++]
3111                   = values[j];
3112             }
3113          }
3114
3115          /* Set up the annotation tracking for new generated instructions. */
3116          base_ir = inst->ir;
3117          current_annotation = inst->annotation;
3118
3119          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3120
3121          emit_pull_constant_load(inst, temp, inst->src[i],
3122                                  pull_constant_loc[uniform]);
3123
3124          inst->src[i].file = temp.file;
3125          inst->src[i].reg = temp.reg;
3126          inst->src[i].reg_offset = temp.reg_offset;
3127          inst->src[i].reladdr = NULL;
3128       }
3129    }
3130
3131    /* Now there are no accesses of the UNIFORM file with a reladdr, so
3132     * no need to track them as larger-than-vec4 objects.  This will be
3133     * relied on in cutting out unused uniform vectors from push
3134     * constants.
3135     */
3136    split_uniform_registers();
3137 }
3138
3139 void
3140 vec4_visitor::resolve_ud_negate(src_reg *reg)
3141 {
3142    if (reg->type != BRW_REGISTER_TYPE_UD ||
3143        !reg->negate)
3144       return;
3145
3146    src_reg temp = src_reg(this, glsl_type::uvec4_type);
3147    emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3148    *reg = temp;
3149 }
3150
3151 vec4_visitor::vec4_visitor(struct brw_context *brw,
3152                            struct brw_vec4_compile *c,
3153                            struct gl_program *prog,
3154                            const struct brw_vec4_prog_key *key,
3155                            struct brw_vec4_prog_data *prog_data,
3156                            struct gl_shader_program *shader_prog,
3157                            struct brw_shader *shader,
3158                            void *mem_ctx,
3159                            bool debug_flag)
3160    : debug_flag(debug_flag)
3161 {
3162    this->brw = brw;
3163    this->intel = &brw->intel;
3164    this->ctx = &intel->ctx;
3165    this->shader_prog = shader_prog;
3166    this->shader = shader;
3167
3168    this->mem_ctx = mem_ctx;
3169    this->failed = false;
3170
3171    this->base_ir = NULL;
3172    this->current_annotation = NULL;
3173    memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3174
3175    this->c = c;
3176    this->prog = prog;
3177    this->key = key;
3178    this->prog_data = prog_data;
3179
3180    this->variable_ht = hash_table_ctor(0,
3181                                        hash_table_pointer_hash,
3182                                        hash_table_pointer_compare);
3183
3184    this->virtual_grf_start = NULL;
3185    this->virtual_grf_end = NULL;
3186    this->virtual_grf_sizes = NULL;
3187    this->virtual_grf_count = 0;
3188    this->virtual_grf_reg_map = NULL;
3189    this->virtual_grf_reg_count = 0;
3190    this->virtual_grf_array_size = 0;
3191    this->live_intervals_valid = false;
3192
3193    this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3194
3195    this->uniforms = 0;
3196 }
3197
3198 vec4_visitor::~vec4_visitor()
3199 {
3200    hash_table_dtor(this->variable_ht);
3201 }
3202
3203
3204 vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
3205                                  struct brw_vs_compile *vs_compile,
3206                                  struct brw_vs_prog_data *vs_prog_data,
3207                                  struct gl_shader_program *prog,
3208                                  struct brw_shader *shader,
3209                                  void *mem_ctx)
3210    : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
3211                   &vs_compile->key.base, &vs_prog_data->base, prog, shader,
3212                   mem_ctx, INTEL_DEBUG & DEBUG_VS),
3213      vs_compile(vs_compile),
3214      vs_prog_data(vs_prog_data)
3215 {
3216 }
3217
3218
3219 void
3220 vec4_visitor::fail(const char *format, ...)
3221 {
3222    va_list va;
3223    char *msg;
3224
3225    if (failed)
3226       return;
3227
3228    failed = true;
3229
3230    va_start(va, format);
3231    msg = ralloc_vasprintf(mem_ctx, format, va);
3232    va_end(va);
3233    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3234
3235    this->fail_msg = msg;
3236
3237    if (debug_flag) {
3238       fprintf(stderr, "%s",  msg);
3239    }
3240 }
3241
3242 } /* namespace brw */