src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 #include "glsl/ir_uniform.h"
  26 extern "C" {
  27 #include "main/context.h"
  28 #include "main/macros.h"
  29 #include "program/prog_parameter.h"
  30 #include "program/sampler.h"
  31 }
  32
  33 namespace brw {
  34
  35 vec4_instruction::vec4_instruction(vec4_visitor *v,
  36                                    enum opcode opcode, dst_reg dst,
  37                                    src_reg src0, src_reg src1, src_reg src2)
  38 {
  39    this->opcode = opcode;
  40    this->dst = dst;
  41    this->src[0] = src0;
  42    this->src[1] = src1;
  43    this->src[2] = src2;
  44    this->ir = v->base_ir;
  45    this->annotation = v->current_annotation;
  46 }
  47
  48 vec4_instruction *
  49 vec4_visitor::emit(vec4_instruction *inst)
  50 {
  51    this->instructions.push_tail(inst);
  52
  53    return inst;
  54 }
  55
  56 vec4_instruction *
  57 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  58 {
  59    new_inst->ir = inst->ir;
  60    new_inst->annotation = inst->annotation;
  61
  62    inst->insert_before(new_inst);
  63
  64    return inst;
  65 }
  66
  67 vec4_instruction *
  68 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  69                    src_reg src0, src_reg src1, src_reg src2)
  70 {
  71    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
  72                                              src0, src1, src2));
  73 }
  74
  75
  76 vec4_instruction *
  77 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  78 {
  79    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
  80 }
  81
  82 vec4_instruction *
  83 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
  84 {
  85    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
  86 }
  87
  88 vec4_instruction *
  89 vec4_visitor::emit(enum opcode opcode)
  90 {
  91    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
  92 }
  93
  94 #define ALU1(op)                                                        \
  95    vec4_instruction *                                                   \
  96    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
  97    {                                                                    \
  98       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
  99                                            src0);                       \
 100    }
 101
 102 #define ALU2(op)                                                        \
 103    vec4_instruction *                                                   \
 104    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 105    {                                                                    \
 106       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 107                                            src0, src1);                 \
 108    }
 109
 110 #define ALU3(op)                                                        \
 111    vec4_instruction *                                                   \
 112    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
 113    {                                                                    \
 114       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 115                                            src0, src1, src2);           \
 116    }
 117
 118 ALU1(NOT)
 119 ALU1(MOV)
 120 ALU1(FRC)
 121 ALU1(RNDD)
 122 ALU1(RNDE)
 123 ALU1(RNDZ)
 124 ALU1(F32TO16)
 125 ALU1(F16TO32)
 126 ALU2(ADD)
 127 ALU2(MUL)
 128 ALU2(MACH)
 129 ALU2(AND)
 130 ALU2(OR)
 131 ALU2(XOR)
 132 ALU2(DP3)
 133 ALU2(DP4)
 134 ALU2(DPH)
 135 ALU2(SHL)
 136 ALU2(SHR)
 137 ALU2(ASR)
 138 ALU3(LRP)
 139 ALU1(BFREV)
 140 ALU3(BFE)
 141 ALU2(BFI1)
 142 ALU3(BFI2)
 143 ALU1(FBH)
 144 ALU1(FBL)
 145 ALU1(CBIT)
 146
 147 /** Gen4 predicated IF. */
 148 vec4_instruction *
 149 vec4_visitor::IF(uint32_t predicate)
 150 {
 151    vec4_instruction *inst;
 152
 153    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 154    inst->predicate = predicate;
 155
 156    return inst;
 157 }
 158
 159 /** Gen6+ IF with embedded comparison. */
 160 vec4_instruction *
 161 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 162 {
 163    assert(brw->gen >= 6);
 164
 165    vec4_instruction *inst;
 166
 167    resolve_ud_negate(&src0);
 168    resolve_ud_negate(&src1);
 169
 170    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 171                                         src0, src1);
 172    inst->conditional_mod = condition;
 173
 174    return inst;
 175 }
 176
 177 /**
 178  * CMP: Sets the low bit of the destination channels with the result
 179  * of the comparison, while the upper bits are undefined, and updates
 180  * the flag register with the packed 16 bits of the result.
 181  */
 182 vec4_instruction *
 183 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 184 {
 185    vec4_instruction *inst;
 186
 187    /* original gen4 does type conversion to the destination type
 188     * before before comparison, producing garbage results for floating
 189     * point comparisons.
 190     */
 191    if (brw->gen == 4) {
 192       dst.type = src0.type;
 193       if (dst.file == HW_REG)
 194          dst.fixed_hw_reg.type = dst.type;
 195    }
 196
 197    resolve_ud_negate(&src0);
 198    resolve_ud_negate(&src1);
 199
 200    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 201    inst->conditional_mod = condition;
 202
 203    return inst;
 204 }
 205
 206 vec4_instruction *
 207 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 208 {
 209    vec4_instruction *inst;
 210
 211    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 212                                         dst, index);
 213    inst->base_mrf = 14;
 214    inst->mlen = 2;
 215
 216    return inst;
 217 }
 218
 219 vec4_instruction *
 220 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 221 {
 222    vec4_instruction *inst;
 223
 224    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 225                                         dst, src, index);
 226    inst->base_mrf = 13;
 227    inst->mlen = 3;
 228
 229    return inst;
 230 }
 231
 232 void
 233 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 234 {
 235    static enum opcode dot_opcodes[] = {
 236       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 237    };
 238
 239    emit(dot_opcodes[elements - 2], dst, src0, src1);
 240 }
 241
 242 src_reg
 243 vec4_visitor::fix_3src_operand(src_reg src)
 244 {
 245    /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
 246     * able to use vertical stride of zero to replicate the vec4 uniform, like
 247     *
 248     *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
 249     *
 250     * But you can't, since vertical stride is always four in three-source
 251     * instructions. Instead, insert a MOV instruction to do the replication so
 252     * that the three-source instruction can consume it.
 253     */
 254
 255    /* The MOV is only needed if the source is a uniform or immediate. */
 256    if (src.file != UNIFORM && src.file != IMM)
 257       return src;
 258
 259    dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
 260    expanded.type = src.type;
 261    emit(MOV(expanded, src));
 262    return src_reg(expanded);
 263 }
 264
 265 src_reg
 266 vec4_visitor::fix_math_operand(src_reg src)
 267 {
 268    /* The gen6 math instruction ignores the source modifiers --
 269     * swizzle, abs, negate, and at least some parts of the register
 270     * region description.
 271     *
 272     * Rather than trying to enumerate all these cases, *always* expand the
 273     * operand to a temp GRF for gen6.
 274     *
 275     * For gen7, keep the operand as-is, except if immediate, which gen7 still
 276     * can't use.
 277     */
 278
 279    if (brw->gen == 7 && src.file != IMM)
 280       return src;
 281
 282    dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
 283    expanded.type = src.type;
 284    emit(MOV(expanded, src));
 285    return src_reg(expanded);
 286 }
 287
 288 void
 289 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 290 {
 291    src = fix_math_operand(src);
 292
 293    if (dst.writemask != WRITEMASK_XYZW) {
 294       /* The gen6 math instruction must be align1, so we can't do
 295        * writemasks.
 296        */
 297       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 298
 299       emit(opcode, temp_dst, src);
 300
 301       emit(MOV(dst, src_reg(temp_dst)));
 302    } else {
 303       emit(opcode, dst, src);
 304    }
 305 }
 306
 307 void
 308 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 309 {
 310    vec4_instruction *inst = emit(opcode, dst, src);
 311    inst->base_mrf = 1;
 312    inst->mlen = 1;
 313 }
 314
 315 void
 316 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 317 {
 318    switch (opcode) {
 319    case SHADER_OPCODE_RCP:
 320    case SHADER_OPCODE_RSQ:
 321    case SHADER_OPCODE_SQRT:
 322    case SHADER_OPCODE_EXP2:
 323    case SHADER_OPCODE_LOG2:
 324    case SHADER_OPCODE_SIN:
 325    case SHADER_OPCODE_COS:
 326       break;
 327    default:
 328       assert(!"not reached: bad math opcode");
 329       return;
 330    }
 331
 332    if (brw->gen >= 6) {
 333       return emit_math1_gen6(opcode, dst, src);
 334    } else {
 335       return emit_math1_gen4(opcode, dst, src);
 336    }
 337 }
 338
 339 void
 340 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 341                               dst_reg dst, src_reg src0, src_reg src1)
 342 {
 343    src0 = fix_math_operand(src0);
 344    src1 = fix_math_operand(src1);
 345
 346    if (dst.writemask != WRITEMASK_XYZW) {
 347       /* The gen6 math instruction must be align1, so we can't do
 348        * writemasks.
 349        */
 350       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 351       temp_dst.type = dst.type;
 352
 353       emit(opcode, temp_dst, src0, src1);
 354
 355       emit(MOV(dst, src_reg(temp_dst)));
 356    } else {
 357       emit(opcode, dst, src0, src1);
 358    }
 359 }
 360
 361 void
 362 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 363                               dst_reg dst, src_reg src0, src_reg src1)
 364 {
 365    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 366    inst->base_mrf = 1;
 367    inst->mlen = 2;
 368 }
 369
 370 void
 371 vec4_visitor::emit_math(enum opcode opcode,
 372                         dst_reg dst, src_reg src0, src_reg src1)
 373 {
 374    switch (opcode) {
 375    case SHADER_OPCODE_POW:
 376    case SHADER_OPCODE_INT_QUOTIENT:
 377    case SHADER_OPCODE_INT_REMAINDER:
 378       break;
 379    default:
 380       assert(!"not reached: unsupported binary math opcode");
 381       return;
 382    }
 383
 384    if (brw->gen >= 6) {
 385       return emit_math2_gen6(opcode, dst, src0, src1);
 386    } else {
 387       return emit_math2_gen4(opcode, dst, src0, src1);
 388    }
 389 }
 390
 391 void
 392 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
 393 {
 394    if (brw->gen < 7)
 395       assert(!"ir_unop_pack_half_2x16 should be lowered");
 396
 397    assert(dst.type == BRW_REGISTER_TYPE_UD);
 398    assert(src0.type == BRW_REGISTER_TYPE_F);
 399
 400    /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
 401     *
 402     *   Because this instruction does not have a 16-bit floating-point type,
 403     *   the destination data type must be Word (W).
 404     *
 405     *   The destination must be DWord-aligned and specify a horizontal stride
 406     *   (HorzStride) of 2. The 16-bit result is stored in the lower word of
 407     *   each destination channel and the upper word is not modified.
 408     *
 409     * The above restriction implies that the f32to16 instruction must use
 410     * align1 mode, because only in align1 mode is it possible to specify
 411     * horizontal stride.  We choose here to defy the hardware docs and emit
 412     * align16 instructions.
 413     *
 414     * (I [chadv] did attempt to emit align1 instructions for VS f32to16
 415     * instructions. I was partially successful in that the code passed all
 416     * tests.  However, the code was dubiously correct and fragile, and the
 417     * tests were not harsh enough to probe that frailty. Not trusting the
 418     * code, I chose instead to remain in align16 mode in defiance of the hw
 419     * docs).
 420     *
 421     * I've [chadv] experimentally confirmed that, on gen7 hardware and the
 422     * simulator, emitting a f32to16 in align16 mode with UD as destination
 423     * data type is safe. The behavior differs from that specified in the PRM
 424     * in that the upper word of each destination channel is cleared to 0.
 425     */
 426
 427    dst_reg tmp_dst(this, glsl_type::uvec2_type);
 428    src_reg tmp_src(tmp_dst);
 429
 430 #if 0
 431    /* Verify the undocumented behavior on which the following instructions
 432     * rely.  If f32to16 fails to clear the upper word of the X and Y channels,
 433     * then the result of the bit-or instruction below will be incorrect.
 434     *
 435     * You should inspect the disasm output in order to verify that the MOV is
 436     * not optimized away.
 437     */
 438    emit(MOV(tmp_dst, src_reg(0x12345678u)));
 439 #endif
 440
 441    /* Give tmp the form below, where "." means untouched.
 442     *
 443     *     w z          y          x w z          y          x
 444     *   |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
 445     *
 446     * That the upper word of each write-channel be 0 is required for the
 447     * following bit-shift and bit-or instructions to work. Note that this
 448     * relies on the undocumented hardware behavior mentioned above.
 449     */
 450    tmp_dst.writemask = WRITEMASK_XY;
 451    emit(F32TO16(tmp_dst, src0));
 452
 453    /* Give the write-channels of dst the form:
 454     *   0xhhhh0000
 455     */
 456    tmp_src.swizzle = SWIZZLE_Y;
 457    emit(SHL(dst, tmp_src, src_reg(16u)));
 458
 459    /* Finally, give the write-channels of dst the form of packHalf2x16's
 460     * output:
 461     *   0xhhhhllll
 462     */
 463    tmp_src.swizzle = SWIZZLE_X;
 464    emit(OR(dst, src_reg(dst), tmp_src));
 465 }
 466
 467 void
 468 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
 469 {
 470    if (brw->gen < 7)
 471       assert(!"ir_unop_unpack_half_2x16 should be lowered");
 472
 473    assert(dst.type == BRW_REGISTER_TYPE_F);
 474    assert(src0.type == BRW_REGISTER_TYPE_UD);
 475
 476    /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
 477     *
 478     *   Because this instruction does not have a 16-bit floating-point type,
 479     *   the source data type must be Word (W). The destination type must be
 480     *   F (Float).
 481     *
 482     * To use W as the source data type, we must adjust horizontal strides,
 483     * which is only possible in align1 mode. All my [chadv] attempts at
 484     * emitting align1 instructions for unpackHalf2x16 failed to pass the
 485     * Piglit tests, so I gave up.
 486     *
 487     * I've verified that, on gen7 hardware and the simulator, it is safe to
 488     * emit f16to32 in align16 mode with UD as source data type.
 489     */
 490
 491    dst_reg tmp_dst(this, glsl_type::uvec2_type);
 492    src_reg tmp_src(tmp_dst);
 493
 494    tmp_dst.writemask = WRITEMASK_X;
 495    emit(AND(tmp_dst, src0, src_reg(0xffffu)));
 496
 497    tmp_dst.writemask = WRITEMASK_Y;
 498    emit(SHR(tmp_dst, src0, src_reg(16u)));
 499
 500    dst.writemask = WRITEMASK_XY;
 501    emit(F16TO32(dst, tmp_src));
 502 }
 503
 504 void
 505 vec4_visitor::visit_instructions(const exec_list *list)
 506 {
 507    foreach_list(node, list) {
 508       ir_instruction *ir = (ir_instruction *)node;
 509
 510       base_ir = ir;
 511       ir->accept(this);
 512    }
 513 }
 514
 515
 516 static int
 517 type_size(const struct glsl_type *type)
 518 {
 519    unsigned int i;
 520    int size;
 521
 522    switch (type->base_type) {
 523    case GLSL_TYPE_UINT:
 524    case GLSL_TYPE_INT:
 525    case GLSL_TYPE_FLOAT:
 526    case GLSL_TYPE_BOOL:
 527       if (type->is_matrix()) {
 528          return type->matrix_columns;
 529       } else {
 530          /* Regardless of size of vector, it gets a vec4. This is bad
 531           * packing for things like floats, but otherwise arrays become a
 532           * mess.  Hopefully a later pass over the code can pack scalars
 533           * down if appropriate.
 534           */
 535          return 1;
 536       }
 537    case GLSL_TYPE_ARRAY:
 538       assert(type->length > 0);
 539       return type_size(type->fields.array) * type->length;
 540    case GLSL_TYPE_STRUCT:
 541       size = 0;
 542       for (i = 0; i < type->length; i++) {
 543          size += type_size(type->fields.structure[i].type);
 544       }
 545       return size;
 546    case GLSL_TYPE_SAMPLER:
 547       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 548        * at link time.
 549        */
 550       return 1;
 551    case GLSL_TYPE_VOID:
 552    case GLSL_TYPE_ERROR:
 553    case GLSL_TYPE_INTERFACE:
 554       assert(0);
 555       break;
 556    }
 557
 558    return 0;
 559 }
 560
 561 int
 562 vec4_visitor::virtual_grf_alloc(int size)
 563 {
 564    if (virtual_grf_array_size <= virtual_grf_count) {
 565       if (virtual_grf_array_size == 0)
 566          virtual_grf_array_size = 16;
 567       else
 568          virtual_grf_array_size *= 2;
 569       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 570                                    virtual_grf_array_size);
 571       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 572                                      virtual_grf_array_size);
 573    }
 574    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 575    virtual_grf_reg_count += size;
 576    virtual_grf_sizes[virtual_grf_count] = size;
 577    return virtual_grf_count++;
 578 }
 579
 580 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 581 {
 582    init();
 583
 584    this->file = GRF;
 585    this->reg = v->virtual_grf_alloc(type_size(type));
 586
 587    if (type->is_array() || type->is_record()) {
 588       this->swizzle = BRW_SWIZZLE_NOOP;
 589    } else {
 590       this->swizzle = swizzle_for_size(type->vector_elements);
 591    }
 592
 593    this->type = brw_type_for_base_type(type);
 594 }
 595
 596 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 597 {
 598    init();
 599
 600    this->file = GRF;
 601    this->reg = v->virtual_grf_alloc(type_size(type));
 602
 603    if (type->is_array() || type->is_record()) {
 604       this->writemask = WRITEMASK_XYZW;
 605    } else {
 606       this->writemask = (1 << type->vector_elements) - 1;
 607    }
 608
 609    this->type = brw_type_for_base_type(type);
 610 }
 611
 612 /* Our support for uniforms is piggy-backed on the struct
 613  * gl_fragment_program, because that's where the values actually
 614  * get stored, rather than in some global gl_shader_program uniform
 615  * store.
 616  */
 617 void
 618 vec4_visitor::setup_uniform_values(ir_variable *ir)
 619 {
 620    int namelen = strlen(ir->name);
 621
 622    /* The data for our (non-builtin) uniforms is stored in a series of
 623     * gl_uniform_driver_storage structs for each subcomponent that
 624     * glGetUniformLocation() could name.  We know it's been set up in the same
 625     * order we'd walk the type, so walk the list of storage and find anything
 626     * with our name, or the prefix of a component that starts with our name.
 627     */
 628    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
 629       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 630
 631       if (strncmp(ir->name, storage->name, namelen) != 0 ||
 632           (storage->name[namelen] != 0 &&
 633            storage->name[namelen] != '.' &&
 634            storage->name[namelen] != '[')) {
 635          continue;
 636       }
 637
 638       gl_constant_value *components = storage->storage;
 639       unsigned vector_count = (MAX2(storage->array_elements, 1) *
 640                                storage->type->matrix_columns);
 641
 642       for (unsigned s = 0; s < vector_count; s++) {
 643          uniform_vector_size[uniforms] = storage->type->vector_elements;
 644
 645          int i;
 646          for (i = 0; i < uniform_vector_size[uniforms]; i++) {
 647             prog_data->param[uniforms * 4 + i] = &components->f;
 648             components++;
 649          }
 650          for (; i < 4; i++) {
 651             static float zero = 0;
 652             prog_data->param[uniforms * 4 + i] = &zero;
 653          }
 654
 655          uniforms++;
 656       }
 657    }
 658 }
 659
 660 void
 661 vec4_visitor::setup_uniform_clipplane_values()
 662 {
 663    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 664
 665    for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
 666       this->uniform_vector_size[this->uniforms] = 4;
 667       this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
 668       this->userplane[i].type = BRW_REGISTER_TYPE_F;
 669       for (int j = 0; j < 4; ++j) {
 670          prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
 671       }
 672       ++this->uniforms;
 673    }
 674 }
 675
 676 /* Our support for builtin uniforms is even scarier than non-builtin.
 677  * It sits on top of the PROG_STATE_VAR parameters that are
 678  * automatically updated from GL context state.
 679  */
 680 void
 681 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 682 {
 683    const ir_state_slot *const slots = ir->state_slots;
 684    assert(ir->state_slots != NULL);
 685
 686    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 687       /* This state reference has already been setup by ir_to_mesa,
 688        * but we'll get the same index back here.  We can reference
 689        * ParameterValues directly, since unlike brw_fs.cpp, we never
 690        * add new state references during compile.
 691        */
 692       int index = _mesa_add_state_reference(this->prog->Parameters,
 693                                             (gl_state_index *)slots[i].tokens);
 694       float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 695
 696       this->uniform_vector_size[this->uniforms] = 0;
 697       /* Add each of the unique swizzled channels of the element.
 698        * This will end up matching the size of the glsl_type of this field.
 699        */
 700       int last_swiz = -1;
 701       for (unsigned int j = 0; j < 4; j++) {
 702          int swiz = GET_SWZ(slots[i].swizzle, j);
 703          last_swiz = swiz;
 704
 705          prog_data->param[this->uniforms * 4 + j] = &values[swiz];
 706          if (swiz <= last_swiz)
 707             this->uniform_vector_size[this->uniforms]++;
 708       }
 709       this->uniforms++;
 710    }
 711 }
 712
 713 dst_reg *
 714 vec4_visitor::variable_storage(ir_variable *var)
 715 {
 716    return (dst_reg *)hash_table_find(this->variable_ht, var);
 717 }
 718
 719 void
 720 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 721 {
 722    ir_expression *expr = ir->as_expression();
 723
 724    *predicate = BRW_PREDICATE_NORMAL;
 725
 726    if (expr) {
 727       src_reg op[2];
 728       vec4_instruction *inst;
 729
 730       assert(expr->get_num_operands() <= 2);
 731       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 732          expr->operands[i]->accept(this);
 733          op[i] = this->result;
 734
 735          resolve_ud_negate(&op[i]);
 736       }
 737
 738       switch (expr->operation) {
 739       case ir_unop_logic_not:
 740          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 741          inst->conditional_mod = BRW_CONDITIONAL_Z;
 742          break;
 743
 744       case ir_binop_logic_xor:
 745          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 746          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 747          break;
 748
 749       case ir_binop_logic_or:
 750          inst = emit(OR(dst_null_d(), op[0], op[1]));
 751          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 752          break;
 753
 754       case ir_binop_logic_and:
 755          inst = emit(AND(dst_null_d(), op[0], op[1]));
 756          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 757          break;
 758
 759       case ir_unop_f2b:
 760          if (brw->gen >= 6) {
 761             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 762          } else {
 763             inst = emit(MOV(dst_null_f(), op[0]));
 764             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 765          }
 766          break;
 767
 768       case ir_unop_i2b:
 769          if (brw->gen >= 6) {
 770             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 771          } else {
 772             inst = emit(MOV(dst_null_d(), op[0]));
 773             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 774          }
 775          break;
 776
 777       case ir_binop_all_equal:
 778          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 779          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 780          break;
 781
 782       case ir_binop_any_nequal:
 783          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 784          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 785          break;
 786
 787       case ir_unop_any:
 788          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 789          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 790          break;
 791
 792       case ir_binop_greater:
 793       case ir_binop_gequal:
 794       case ir_binop_less:
 795       case ir_binop_lequal:
 796       case ir_binop_equal:
 797       case ir_binop_nequal:
 798          emit(CMP(dst_null_d(), op[0], op[1],
 799                   brw_conditional_for_comparison(expr->operation)));
 800          break;
 801
 802       default:
 803          assert(!"not reached");
 804          break;
 805       }
 806       return;
 807    }
 808
 809    ir->accept(this);
 810
 811    resolve_ud_negate(&this->result);
 812
 813    if (brw->gen >= 6) {
 814       vec4_instruction *inst = emit(AND(dst_null_d(),
 815                                         this->result, src_reg(1)));
 816       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 817    } else {
 818       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 819       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 820    }
 821 }
 822
 823 /**
 824  * Emit a gen6 IF statement with the comparison folded into the IF
 825  * instruction.
 826  */
 827 void
 828 vec4_visitor::emit_if_gen6(ir_if *ir)
 829 {
 830    ir_expression *expr = ir->condition->as_expression();
 831
 832    if (expr) {
 833       src_reg op[2];
 834       dst_reg temp;
 835
 836       assert(expr->get_num_operands() <= 2);
 837       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 838          expr->operands[i]->accept(this);
 839          op[i] = this->result;
 840       }
 841
 842       switch (expr->operation) {
 843       case ir_unop_logic_not:
 844          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 845          return;
 846
 847       case ir_binop_logic_xor:
 848          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 849          return;
 850
 851       case ir_binop_logic_or:
 852          temp = dst_reg(this, glsl_type::bool_type);
 853          emit(OR(temp, op[0], op[1]));
 854          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 855          return;
 856
 857       case ir_binop_logic_and:
 858          temp = dst_reg(this, glsl_type::bool_type);
 859          emit(AND(temp, op[0], op[1]));
 860          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 861          return;
 862
 863       case ir_unop_f2b:
 864          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 865          return;
 866
 867       case ir_unop_i2b:
 868          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 869          return;
 870
 871       case ir_binop_greater:
 872       case ir_binop_gequal:
 873       case ir_binop_less:
 874       case ir_binop_lequal:
 875       case ir_binop_equal:
 876       case ir_binop_nequal:
 877          emit(IF(op[0], op[1],
 878                  brw_conditional_for_comparison(expr->operation)));
 879          return;
 880
 881       case ir_binop_all_equal:
 882          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 883          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 884          return;
 885
 886       case ir_binop_any_nequal:
 887          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 888          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 889          return;
 890
 891       case ir_unop_any:
 892          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 893          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 894          return;
 895
 896       default:
 897          assert(!"not reached");
 898          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 899          return;
 900       }
 901       return;
 902    }
 903
 904    ir->condition->accept(this);
 905
 906    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 907 }
 908
 909 static dst_reg
 910 with_writemask(dst_reg const & r, int mask)
 911 {
 912    dst_reg result = r;
 913    result.writemask = mask;
 914    return result;
 915 }
 916
 917 void
 918 vec4_vs_visitor::emit_prolog()
 919 {
 920    dst_reg sign_recovery_shift;
 921    dst_reg normalize_factor;
 922    dst_reg es3_normalize_factor;
 923
 924    for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
 925       if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
 926          uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
 927          dst_reg reg(ATTR, i);
 928          dst_reg reg_d = reg;
 929          reg_d.type = BRW_REGISTER_TYPE_D;
 930          dst_reg reg_ud = reg;
 931          reg_ud.type = BRW_REGISTER_TYPE_UD;
 932
 933          /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 934           * come in as floating point conversions of the integer values.
 935           */
 936          if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
 937             dst_reg dst = reg;
 938             dst.type = brw_type_for_base_type(glsl_type::vec4_type);
 939             dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
 940             emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 941          }
 942
 943          /* Do sign recovery for 2101010 formats if required. */
 944          if (wa_flags & BRW_ATTRIB_WA_SIGN) {
 945             if (sign_recovery_shift.file == BAD_FILE) {
 946                /* shift constant: <22,22,22,30> */
 947                sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
 948                emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
 949                emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
 950             }
 951
 952             emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
 953             emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
 954          }
 955
 956          /* Apply BGRA swizzle if required. */
 957          if (wa_flags & BRW_ATTRIB_WA_BGRA) {
 958             src_reg temp = src_reg(reg);
 959             temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
 960             emit(MOV(reg, temp));
 961          }
 962
 963          if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
 964             /* ES 3.0 has different rules for converting signed normalized
 965              * fixed-point numbers than desktop GL.
 966              */
 967             if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
 968                /* According to equation 2.2 of the ES 3.0 specification,
 969                 * signed normalization conversion is done by:
 970                 *
 971                 * f = c / (2^(b-1)-1)
 972                 */
 973                if (es3_normalize_factor.file == BAD_FILE) {
 974                   /* mul constant: 1 / (2^(b-1) - 1) */
 975                   es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
 976                   emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
 977                            src_reg(1.0f / ((1<<9) - 1))));
 978                   emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
 979                            src_reg(1.0f / ((1<<1) - 1))));
 980                }
 981
 982                dst_reg dst = reg;
 983                dst.type = brw_type_for_base_type(glsl_type::vec4_type);
 984                emit(MOV(dst, src_reg(reg_d)));
 985                emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
 986                emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
 987             } else {
 988                /* The following equations are from the OpenGL 3.2 specification:
 989                 *
 990                 * 2.1 unsigned normalization
 991                 * f = c/(2^n-1)
 992                 *
 993                 * 2.2 signed normalization
 994                 * f = (2c+1)/(2^n-1)
 995                 *
 996                 * Both of these share a common divisor, which is represented by
 997                 * "normalize_factor" in the code below.
 998                 */
 999                if (normalize_factor.file == BAD_FILE) {
1000                   /* 1 / (2^b - 1) for b=<10,10,10,2> */
1001                   normalize_factor = dst_reg(this, glsl_type::vec4_type);
1002                   emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
1003                            src_reg(1.0f / ((1<<10) - 1))));
1004                   emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
1005                            src_reg(1.0f / ((1<<2) - 1))));
1006                }
1007
1008                dst_reg dst = reg;
1009                dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1010                emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1011
1012                /* For signed normalization, we want the numerator to be 2c+1. */
1013                if (wa_flags & BRW_ATTRIB_WA_SIGN) {
1014                   emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
1015                   emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
1016                }
1017
1018                emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
1019             }
1020          }
1021
1022          if (wa_flags & BRW_ATTRIB_WA_SCALE) {
1023             dst_reg dst = reg;
1024             dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1025             emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1026          }
1027       }
1028    }
1029 }
1030
1031
1032 dst_reg *
1033 vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
1034 {
1035    /* VertexID is stored by the VF as the last vertex element, but
1036     * we don't represent it with a flag in inputs_read, so we call
1037     * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
1038     */
1039    dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
1040    vs_prog_data->uses_vertexid = true;
1041
1042    switch (ir->location) {
1043    case SYSTEM_VALUE_VERTEX_ID:
1044       reg->writemask = WRITEMASK_X;
1045       break;
1046    case SYSTEM_VALUE_INSTANCE_ID:
1047       reg->writemask = WRITEMASK_Y;
1048       break;
1049    default:
1050       assert(!"not reached");
1051       break;
1052    }
1053
1054    return reg;
1055 }
1056
1057
1058 void
1059 vec4_visitor::visit(ir_variable *ir)
1060 {
1061    dst_reg *reg = NULL;
1062
1063    if (variable_storage(ir))
1064       return;
1065
1066    switch (ir->mode) {
1067    case ir_var_shader_in:
1068       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
1069       break;
1070
1071    case ir_var_shader_out:
1072       reg = new(mem_ctx) dst_reg(this, ir->type);
1073
1074       for (int i = 0; i < type_size(ir->type); i++) {
1075          output_reg[ir->location + i] = *reg;
1076          output_reg[ir->location + i].reg_offset = i;
1077          output_reg[ir->location + i].type =
1078             brw_type_for_base_type(ir->type->get_scalar_type());
1079          output_reg_annotation[ir->location + i] = ir->name;
1080       }
1081       break;
1082
1083    case ir_var_auto:
1084    case ir_var_temporary:
1085       reg = new(mem_ctx) dst_reg(this, ir->type);
1086       break;
1087
1088    case ir_var_uniform:
1089       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1090
1091       /* Thanks to the lower_ubo_reference pass, we will see only
1092        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1093        * variables, so no need for them to be in variable_ht.
1094        */
1095       if (ir->is_in_uniform_block())
1096          return;
1097
1098       /* Track how big the whole uniform variable is, in case we need to put a
1099        * copy of its data into pull constants for array access.
1100        */
1101       this->uniform_size[this->uniforms] = type_size(ir->type);
1102
1103       if (!strncmp(ir->name, "gl_", 3)) {
1104          setup_builtin_uniform_values(ir);
1105       } else {
1106          setup_uniform_values(ir);
1107       }
1108       break;
1109
1110    case ir_var_system_value:
1111       reg = make_reg_for_system_value(ir);
1112       break;
1113
1114    default:
1115       assert(!"not reached");
1116    }
1117
1118    reg->type = brw_type_for_base_type(ir->type);
1119    hash_table_insert(this->variable_ht, reg, ir);
1120 }
1121
1122 void
1123 vec4_visitor::visit(ir_loop *ir)
1124 {
1125    dst_reg counter;
1126
1127    /* We don't want debugging output to print the whole body of the
1128     * loop as the annotation.
1129     */
1130    this->base_ir = NULL;
1131
1132    if (ir->counter != NULL) {
1133       this->base_ir = ir->counter;
1134       ir->counter->accept(this);
1135       counter = *(variable_storage(ir->counter));
1136
1137       if (ir->from != NULL) {
1138          this->base_ir = ir->from;
1139          ir->from->accept(this);
1140
1141          emit(MOV(counter, this->result));
1142       }
1143    }
1144
1145    emit(BRW_OPCODE_DO);
1146
1147    if (ir->to) {
1148       this->base_ir = ir->to;
1149       ir->to->accept(this);
1150
1151       emit(CMP(dst_null_d(), src_reg(counter), this->result,
1152                brw_conditional_for_comparison(ir->cmp)));
1153
1154       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1155       inst->predicate = BRW_PREDICATE_NORMAL;
1156    }
1157
1158    visit_instructions(&ir->body_instructions);
1159
1160
1161    if (ir->increment) {
1162       this->base_ir = ir->increment;
1163       ir->increment->accept(this);
1164       emit(ADD(counter, src_reg(counter), this->result));
1165    }
1166
1167    emit(BRW_OPCODE_WHILE);
1168 }
1169
1170 void
1171 vec4_visitor::visit(ir_loop_jump *ir)
1172 {
1173    switch (ir->mode) {
1174    case ir_loop_jump::jump_break:
1175       emit(BRW_OPCODE_BREAK);
1176       break;
1177    case ir_loop_jump::jump_continue:
1178       emit(BRW_OPCODE_CONTINUE);
1179       break;
1180    }
1181 }
1182
1183
1184 void
1185 vec4_visitor::visit(ir_function_signature *ir)
1186 {
1187    assert(0);
1188    (void)ir;
1189 }
1190
1191 void
1192 vec4_visitor::visit(ir_function *ir)
1193 {
1194    /* Ignore function bodies other than main() -- we shouldn't see calls to
1195     * them since they should all be inlined.
1196     */
1197    if (strcmp(ir->name, "main") == 0) {
1198       const ir_function_signature *sig;
1199       exec_list empty;
1200
1201       sig = ir->matching_signature(&empty);
1202
1203       assert(sig);
1204
1205       visit_instructions(&sig->body);
1206    }
1207 }
1208
1209 bool
1210 vec4_visitor::try_emit_sat(ir_expression *ir)
1211 {
1212    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1213    if (!sat_src)
1214       return false;
1215
1216    sat_src->accept(this);
1217    src_reg src = this->result;
1218
1219    this->result = src_reg(this, ir->type);
1220    vec4_instruction *inst;
1221    inst = emit(MOV(dst_reg(this->result), src));
1222    inst->saturate = true;
1223
1224    return true;
1225 }
1226
1227 bool
1228 vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
1229 {
1230    /* 3-src instructions were introduced in gen6. */
1231    if (brw->gen < 6)
1232       return false;
1233
1234    /* MAD can only handle floating-point data. */
1235    if (ir->type->base_type != GLSL_TYPE_FLOAT)
1236       return false;
1237
1238    ir_rvalue *nonmul = ir->operands[1 - mul_arg];
1239    ir_expression *mul = ir->operands[mul_arg]->as_expression();
1240
1241    if (!mul || mul->operation != ir_binop_mul)
1242       return false;
1243
1244    nonmul->accept(this);
1245    src_reg src0 = fix_3src_operand(this->result);
1246
1247    mul->operands[0]->accept(this);
1248    src_reg src1 = fix_3src_operand(this->result);
1249
1250    mul->operands[1]->accept(this);
1251    src_reg src2 = fix_3src_operand(this->result);
1252
1253    this->result = src_reg(this, ir->type);
1254    emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1255
1256    return true;
1257 }
1258
1259 void
1260 vec4_visitor::emit_bool_comparison(unsigned int op,
1261                                  dst_reg dst, src_reg src0, src_reg src1)
1262 {
1263    /* original gen4 does destination conversion before comparison. */
1264    if (brw->gen < 5)
1265       dst.type = src0.type;
1266
1267    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1268
1269    dst.type = BRW_REGISTER_TYPE_D;
1270    emit(AND(dst, src_reg(dst), src_reg(0x1)));
1271 }
1272
1273 void
1274 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1275                           src_reg src0, src_reg src1)
1276 {
1277    vec4_instruction *inst;
1278
1279    if (brw->gen >= 6) {
1280       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1281       inst->conditional_mod = conditionalmod;
1282    } else {
1283       emit(CMP(dst, src0, src1, conditionalmod));
1284
1285       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1286       inst->predicate = BRW_PREDICATE_NORMAL;
1287    }
1288 }
1289
1290 static bool
1291 is_16bit_constant(ir_rvalue *rvalue)
1292 {
1293    ir_constant *constant = rvalue->as_constant();
1294    if (!constant)
1295       return false;
1296
1297    if (constant->type != glsl_type::int_type &&
1298        constant->type != glsl_type::uint_type)
1299       return false;
1300
1301    return constant->value.u[0] < (1 << 16);
1302 }
1303
1304 void
1305 vec4_visitor::visit(ir_expression *ir)
1306 {
1307    unsigned int operand;
1308    src_reg op[Elements(ir->operands)];
1309    src_reg result_src;
1310    dst_reg result_dst;
1311    vec4_instruction *inst;
1312
1313    if (try_emit_sat(ir))
1314       return;
1315
1316    if (ir->operation == ir_binop_add) {
1317       if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
1318          return;
1319    }
1320
1321    for (operand = 0; operand < ir->get_num_operands(); operand++) {
1322       this->result.file = BAD_FILE;
1323       ir->operands[operand]->accept(this);
1324       if (this->result.file == BAD_FILE) {
1325          printf("Failed to get tree for expression operand:\n");
1326          ir->operands[operand]->print();
1327          exit(1);
1328       }
1329       op[operand] = this->result;
1330
1331       /* Matrix expression operands should have been broken down to vector
1332        * operations already.
1333        */
1334       assert(!ir->operands[operand]->type->is_matrix());
1335    }
1336
1337    int vector_elements = ir->operands[0]->type->vector_elements;
1338    if (ir->operands[1]) {
1339       vector_elements = MAX2(vector_elements,
1340                              ir->operands[1]->type->vector_elements);
1341    }
1342
1343    this->result.file = BAD_FILE;
1344
1345    /* Storage for our result.  Ideally for an assignment we'd be using
1346     * the actual storage for the result here, instead.
1347     */
1348    result_src = src_reg(this, ir->type);
1349    /* convenience for the emit functions below. */
1350    result_dst = dst_reg(result_src);
1351    /* If nothing special happens, this is the result. */
1352    this->result = result_src;
1353    /* Limit writes to the channels that will be used by result_src later.
1354     * This does limit this temp's use as a temporary for multi-instruction
1355     * sequences.
1356     */
1357    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1358
1359    switch (ir->operation) {
1360    case ir_unop_logic_not:
1361       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1362        * ones complement of the whole register, not just bit 0.
1363        */
1364       emit(XOR(result_dst, op[0], src_reg(1)));
1365       break;
1366    case ir_unop_neg:
1367       op[0].negate = !op[0].negate;
1368       emit(MOV(result_dst, op[0]));
1369       break;
1370    case ir_unop_abs:
1371       op[0].abs = true;
1372       op[0].negate = false;
1373       emit(MOV(result_dst, op[0]));
1374       break;
1375
1376    case ir_unop_sign:
1377       emit(MOV(result_dst, src_reg(0.0f)));
1378
1379       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1380       inst = emit(MOV(result_dst, src_reg(1.0f)));
1381       inst->predicate = BRW_PREDICATE_NORMAL;
1382
1383       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1384       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1385       inst->predicate = BRW_PREDICATE_NORMAL;
1386
1387       break;
1388
1389    case ir_unop_rcp:
1390       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1391       break;
1392
1393    case ir_unop_exp2:
1394       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1395       break;
1396    case ir_unop_log2:
1397       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1398       break;
1399    case ir_unop_exp:
1400    case ir_unop_log:
1401       assert(!"not reached: should be handled by ir_explog_to_explog2");
1402       break;
1403    case ir_unop_sin:
1404    case ir_unop_sin_reduced:
1405       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1406       break;
1407    case ir_unop_cos:
1408    case ir_unop_cos_reduced:
1409       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1410       break;
1411
1412    case ir_unop_dFdx:
1413    case ir_unop_dFdy:
1414       assert(!"derivatives not valid in vertex shader");
1415       break;
1416
1417    case ir_unop_bitfield_reverse:
1418       emit(BFREV(result_dst, op[0]));
1419       break;
1420    case ir_unop_bit_count:
1421       emit(CBIT(result_dst, op[0]));
1422       break;
1423    case ir_unop_find_msb: {
1424       src_reg temp = src_reg(this, glsl_type::uint_type);
1425
1426       inst = emit(FBH(dst_reg(temp), op[0]));
1427       inst->dst.writemask = WRITEMASK_XYZW;
1428
1429       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1430        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1431        * subtract the result from 31 to convert the MSB count into an LSB count.
1432        */
1433
1434       /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1435       temp.swizzle = BRW_SWIZZLE_NOOP;
1436       emit(MOV(result_dst, temp));
1437
1438       src_reg src_tmp = src_reg(result_dst);
1439       emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1440
1441       src_tmp.negate = true;
1442       inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1443       inst->predicate = BRW_PREDICATE_NORMAL;
1444       break;
1445    }
1446    case ir_unop_find_lsb:
1447       emit(FBL(result_dst, op[0]));
1448       break;
1449
1450    case ir_unop_noise:
1451       assert(!"not reached: should be handled by lower_noise");
1452       break;
1453
1454    case ir_binop_add:
1455       emit(ADD(result_dst, op[0], op[1]));
1456       break;
1457    case ir_binop_sub:
1458       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1459       break;
1460
1461    case ir_binop_mul:
1462       if (ir->type->is_integer()) {
1463          /* For integer multiplication, the MUL uses the low 16 bits of one of
1464           * the operands (src0 through SNB, src1 on IVB and later).  The MACH
1465           * accumulates in the contribution of the upper 16 bits of that
1466           * operand.  If we can determine that one of the args is in the low
1467           * 16 bits, though, we can just emit a single MUL.
1468           */
1469          if (is_16bit_constant(ir->operands[0])) {
1470             if (brw->gen < 7)
1471                emit(MUL(result_dst, op[0], op[1]));
1472             else
1473                emit(MUL(result_dst, op[1], op[0]));
1474          } else if (is_16bit_constant(ir->operands[1])) {
1475             if (brw->gen < 7)
1476                emit(MUL(result_dst, op[1], op[0]));
1477             else
1478                emit(MUL(result_dst, op[0], op[1]));
1479          } else {
1480             struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1481
1482             emit(MUL(acc, op[0], op[1]));
1483             emit(MACH(dst_null_d(), op[0], op[1]));
1484             emit(MOV(result_dst, src_reg(acc)));
1485          }
1486       } else {
1487          emit(MUL(result_dst, op[0], op[1]));
1488       }
1489       break;
1490    case ir_binop_div:
1491       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1492       assert(ir->type->is_integer());
1493       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1494       break;
1495    case ir_binop_mod:
1496       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1497       assert(ir->type->is_integer());
1498       emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1499       break;
1500
1501    case ir_binop_less:
1502    case ir_binop_greater:
1503    case ir_binop_lequal:
1504    case ir_binop_gequal:
1505    case ir_binop_equal:
1506    case ir_binop_nequal: {
1507       emit(CMP(result_dst, op[0], op[1],
1508                brw_conditional_for_comparison(ir->operation)));
1509       emit(AND(result_dst, result_src, src_reg(0x1)));
1510       break;
1511    }
1512
1513    case ir_binop_all_equal:
1514       /* "==" operator producing a scalar boolean. */
1515       if (ir->operands[0]->type->is_vector() ||
1516           ir->operands[1]->type->is_vector()) {
1517          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1518          emit(MOV(result_dst, src_reg(0)));
1519          inst = emit(MOV(result_dst, src_reg(1)));
1520          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1521       } else {
1522          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1523          emit(AND(result_dst, result_src, src_reg(0x1)));
1524       }
1525       break;
1526    case ir_binop_any_nequal:
1527       /* "!=" operator producing a scalar boolean. */
1528       if (ir->operands[0]->type->is_vector() ||
1529           ir->operands[1]->type->is_vector()) {
1530          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1531
1532          emit(MOV(result_dst, src_reg(0)));
1533          inst = emit(MOV(result_dst, src_reg(1)));
1534          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1535       } else {
1536          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1537          emit(AND(result_dst, result_src, src_reg(0x1)));
1538       }
1539       break;
1540
1541    case ir_unop_any:
1542       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1543       emit(MOV(result_dst, src_reg(0)));
1544
1545       inst = emit(MOV(result_dst, src_reg(1)));
1546       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1547       break;
1548
1549    case ir_binop_logic_xor:
1550       emit(XOR(result_dst, op[0], op[1]));
1551       break;
1552
1553    case ir_binop_logic_or:
1554       emit(OR(result_dst, op[0], op[1]));
1555       break;
1556
1557    case ir_binop_logic_and:
1558       emit(AND(result_dst, op[0], op[1]));
1559       break;
1560
1561    case ir_binop_dot:
1562       assert(ir->operands[0]->type->is_vector());
1563       assert(ir->operands[0]->type == ir->operands[1]->type);
1564       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1565       break;
1566
1567    case ir_unop_sqrt:
1568       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1569       break;
1570    case ir_unop_rsq:
1571       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1572       break;
1573
1574    case ir_unop_bitcast_i2f:
1575    case ir_unop_bitcast_u2f:
1576       this->result = op[0];
1577       this->result.type = BRW_REGISTER_TYPE_F;
1578       break;
1579
1580    case ir_unop_bitcast_f2i:
1581       this->result = op[0];
1582       this->result.type = BRW_REGISTER_TYPE_D;
1583       break;
1584
1585    case ir_unop_bitcast_f2u:
1586       this->result = op[0];
1587       this->result.type = BRW_REGISTER_TYPE_UD;
1588       break;
1589
1590    case ir_unop_i2f:
1591    case ir_unop_i2u:
1592    case ir_unop_u2i:
1593    case ir_unop_u2f:
1594    case ir_unop_b2f:
1595    case ir_unop_b2i:
1596    case ir_unop_f2i:
1597    case ir_unop_f2u:
1598       emit(MOV(result_dst, op[0]));
1599       break;
1600    case ir_unop_f2b:
1601    case ir_unop_i2b: {
1602       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1603       emit(AND(result_dst, result_src, src_reg(1)));
1604       break;
1605    }
1606
1607    case ir_unop_trunc:
1608       emit(RNDZ(result_dst, op[0]));
1609       break;
1610    case ir_unop_ceil:
1611       op[0].negate = !op[0].negate;
1612       inst = emit(RNDD(result_dst, op[0]));
1613       this->result.negate = true;
1614       break;
1615    case ir_unop_floor:
1616       inst = emit(RNDD(result_dst, op[0]));
1617       break;
1618    case ir_unop_fract:
1619       inst = emit(FRC(result_dst, op[0]));
1620       break;
1621    case ir_unop_round_even:
1622       emit(RNDE(result_dst, op[0]));
1623       break;
1624
1625    case ir_binop_min:
1626       emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1627       break;
1628    case ir_binop_max:
1629       emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1630       break;
1631
1632    case ir_binop_pow:
1633       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1634       break;
1635
1636    case ir_unop_bit_not:
1637       inst = emit(NOT(result_dst, op[0]));
1638       break;
1639    case ir_binop_bit_and:
1640       inst = emit(AND(result_dst, op[0], op[1]));
1641       break;
1642    case ir_binop_bit_xor:
1643       inst = emit(XOR(result_dst, op[0], op[1]));
1644       break;
1645    case ir_binop_bit_or:
1646       inst = emit(OR(result_dst, op[0], op[1]));
1647       break;
1648
1649    case ir_binop_lshift:
1650       inst = emit(SHL(result_dst, op[0], op[1]));
1651       break;
1652
1653    case ir_binop_rshift:
1654       if (ir->type->base_type == GLSL_TYPE_INT)
1655          inst = emit(ASR(result_dst, op[0], op[1]));
1656       else
1657          inst = emit(SHR(result_dst, op[0], op[1]));
1658       break;
1659
1660    case ir_binop_bfm:
1661       emit(BFI1(result_dst, op[0], op[1]));
1662       break;
1663
1664    case ir_binop_ubo_load: {
1665       ir_constant *uniform_block = ir->operands[0]->as_constant();
1666       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1667       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1668       src_reg offset = op[1];
1669
1670       /* Now, load the vector from that offset. */
1671       assert(ir->type->is_vector() || ir->type->is_scalar());
1672
1673       src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1674       packed_consts.type = result.type;
1675       src_reg surf_index =
1676          src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1677       if (const_offset_ir) {
1678          offset = src_reg(const_offset / 16);
1679       } else {
1680          emit(SHR(dst_reg(offset), offset, src_reg(4)));
1681       }
1682
1683       vec4_instruction *pull =
1684          emit(new(mem_ctx) vec4_instruction(this,
1685                                             VS_OPCODE_PULL_CONSTANT_LOAD,
1686                                             dst_reg(packed_consts),
1687                                             surf_index,
1688                                             offset));
1689       pull->base_mrf = 14;
1690       pull->mlen = 1;
1691
1692       packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1693       packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1694                                             const_offset % 16 / 4,
1695                                             const_offset % 16 / 4,
1696                                             const_offset % 16 / 4);
1697
1698       /* UBO bools are any nonzero int.  We store bools as either 0 or 1. */
1699       if (ir->type->base_type == GLSL_TYPE_BOOL) {
1700          emit(CMP(result_dst, packed_consts, src_reg(0u),
1701                   BRW_CONDITIONAL_NZ));
1702          emit(AND(result_dst, result, src_reg(0x1)));
1703       } else {
1704          emit(MOV(result_dst, packed_consts));
1705       }
1706       break;
1707    }
1708
1709    case ir_binop_vector_extract:
1710       assert(!"should have been lowered by vec_index_to_cond_assign");
1711       break;
1712
1713    case ir_triop_lrp:
1714       op[0] = fix_3src_operand(op[0]);
1715       op[1] = fix_3src_operand(op[1]);
1716       op[2] = fix_3src_operand(op[2]);
1717       /* Note that the instruction's argument order is reversed from GLSL
1718        * and the IR.
1719        */
1720       emit(LRP(result_dst, op[2], op[1], op[0]));
1721       break;
1722
1723    case ir_triop_bfi:
1724       op[0] = fix_3src_operand(op[0]);
1725       op[1] = fix_3src_operand(op[1]);
1726       op[2] = fix_3src_operand(op[2]);
1727       emit(BFI2(result_dst, op[0], op[1], op[2]));
1728       break;
1729
1730    case ir_triop_bitfield_extract:
1731       op[0] = fix_3src_operand(op[0]);
1732       op[1] = fix_3src_operand(op[1]);
1733       op[2] = fix_3src_operand(op[2]);
1734       /* Note that the instruction's argument order is reversed from GLSL
1735        * and the IR.
1736        */
1737       emit(BFE(result_dst, op[2], op[1], op[0]));
1738       break;
1739
1740    case ir_triop_vector_insert:
1741       assert(!"should have been lowered by lower_vector_insert");
1742       break;
1743
1744    case ir_quadop_bitfield_insert:
1745       assert(!"not reached: should be handled by "
1746               "bitfield_insert_to_bfm_bfi\n");
1747       break;
1748
1749    case ir_quadop_vector:
1750       assert(!"not reached: should be handled by lower_quadop_vector");
1751       break;
1752
1753    case ir_unop_pack_half_2x16:
1754       emit_pack_half_2x16(result_dst, op[0]);
1755       break;
1756    case ir_unop_unpack_half_2x16:
1757       emit_unpack_half_2x16(result_dst, op[0]);
1758       break;
1759    case ir_unop_pack_snorm_2x16:
1760    case ir_unop_pack_snorm_4x8:
1761    case ir_unop_pack_unorm_2x16:
1762    case ir_unop_pack_unorm_4x8:
1763    case ir_unop_unpack_snorm_2x16:
1764    case ir_unop_unpack_snorm_4x8:
1765    case ir_unop_unpack_unorm_2x16:
1766    case ir_unop_unpack_unorm_4x8:
1767       assert(!"not reached: should be handled by lower_packing_builtins");
1768       break;
1769    case ir_unop_unpack_half_2x16_split_x:
1770    case ir_unop_unpack_half_2x16_split_y:
1771    case ir_binop_pack_half_2x16_split:
1772       assert(!"not reached: should not occur in vertex shader");
1773       break;
1774    }
1775 }
1776
1777
1778 void
1779 vec4_visitor::visit(ir_swizzle *ir)
1780 {
1781    src_reg src;
1782    int i = 0;
1783    int swizzle[4];
1784
1785    /* Note that this is only swizzles in expressions, not those on the left
1786     * hand side of an assignment, which do write masking.  See ir_assignment
1787     * for that.
1788     */
1789
1790    ir->val->accept(this);
1791    src = this->result;
1792    assert(src.file != BAD_FILE);
1793
1794    for (i = 0; i < ir->type->vector_elements; i++) {
1795       switch (i) {
1796       case 0:
1797          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1798          break;
1799       case 1:
1800          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1801          break;
1802       case 2:
1803          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1804          break;
1805       case 3:
1806          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1807             break;
1808       }
1809    }
1810    for (; i < 4; i++) {
1811       /* Replicate the last channel out. */
1812       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1813    }
1814
1815    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1816
1817    this->result = src;
1818 }
1819
1820 void
1821 vec4_visitor::visit(ir_dereference_variable *ir)
1822 {
1823    const struct glsl_type *type = ir->type;
1824    dst_reg *reg = variable_storage(ir->var);
1825
1826    if (!reg) {
1827       fail("Failed to find variable storage for %s\n", ir->var->name);
1828       this->result = src_reg(brw_null_reg());
1829       return;
1830    }
1831
1832    this->result = src_reg(*reg);
1833
1834    /* System values get their swizzle from the dst_reg writemask */
1835    if (ir->var->mode == ir_var_system_value)
1836       return;
1837
1838    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1839       this->result.swizzle = swizzle_for_size(type->vector_elements);
1840 }
1841
1842
1843 int
1844 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1845 {
1846    /* Under normal circumstances array elements are stored consecutively, so
1847     * the stride is equal to the size of the array element.
1848     */
1849    return type_size(ir->type);
1850 }
1851
1852
1853 void
1854 vec4_visitor::visit(ir_dereference_array *ir)
1855 {
1856    ir_constant *constant_index;
1857    src_reg src;
1858    int array_stride = compute_array_stride(ir);
1859
1860    constant_index = ir->array_index->constant_expression_value();
1861
1862    ir->array->accept(this);
1863    src = this->result;
1864
1865    if (constant_index) {
1866       src.reg_offset += constant_index->value.i[0] * array_stride;
1867    } else {
1868       /* Variable index array dereference.  It eats the "vec4" of the
1869        * base of the array and an index that offsets the Mesa register
1870        * index.
1871        */
1872       ir->array_index->accept(this);
1873
1874       src_reg index_reg;
1875
1876       if (array_stride == 1) {
1877          index_reg = this->result;
1878       } else {
1879          index_reg = src_reg(this, glsl_type::int_type);
1880
1881          emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1882       }
1883
1884       if (src.reladdr) {
1885          src_reg temp = src_reg(this, glsl_type::int_type);
1886
1887          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1888
1889          index_reg = temp;
1890       }
1891
1892       src.reladdr = ralloc(mem_ctx, src_reg);
1893       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1894    }
1895
1896    /* If the type is smaller than a vec4, replicate the last channel out. */
1897    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1898       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1899    else
1900       src.swizzle = BRW_SWIZZLE_NOOP;
1901    src.type = brw_type_for_base_type(ir->type);
1902
1903    this->result = src;
1904 }
1905
1906 void
1907 vec4_visitor::visit(ir_dereference_record *ir)
1908 {
1909    unsigned int i;
1910    const glsl_type *struct_type = ir->record->type;
1911    int offset = 0;
1912
1913    ir->record->accept(this);
1914
1915    for (i = 0; i < struct_type->length; i++) {
1916       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1917          break;
1918       offset += type_size(struct_type->fields.structure[i].type);
1919    }
1920
1921    /* If the type is smaller than a vec4, replicate the last channel out. */
1922    if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1923       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1924    else
1925       this->result.swizzle = BRW_SWIZZLE_NOOP;
1926    this->result.type = brw_type_for_base_type(ir->type);
1927
1928    this->result.reg_offset += offset;
1929 }
1930
1931 /**
1932  * We want to be careful in assignment setup to hit the actual storage
1933  * instead of potentially using a temporary like we might with the
1934  * ir_dereference handler.
1935  */
1936 static dst_reg
1937 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1938 {
1939    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1940     * access of a vector, it must be separated into a series conditional moves
1941     * before reaching this point (see ir_vec_index_to_cond_assign).
1942     */
1943    assert(ir->as_dereference());
1944    ir_dereference_array *deref_array = ir->as_dereference_array();
1945    if (deref_array) {
1946       assert(!deref_array->array->type->is_vector());
1947    }
1948
1949    /* Use the rvalue deref handler for the most part.  We'll ignore
1950     * swizzles in it and write swizzles using writemask, though.
1951     */
1952    ir->accept(v);
1953    return dst_reg(v->result);
1954 }
1955
1956 void
1957 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1958                               const struct glsl_type *type, uint32_t predicate)
1959 {
1960    if (type->base_type == GLSL_TYPE_STRUCT) {
1961       for (unsigned int i = 0; i < type->length; i++) {
1962          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1963       }
1964       return;
1965    }
1966
1967    if (type->is_array()) {
1968       for (unsigned int i = 0; i < type->length; i++) {
1969          emit_block_move(dst, src, type->fields.array, predicate);
1970       }
1971       return;
1972    }
1973
1974    if (type->is_matrix()) {
1975       const struct glsl_type *vec_type;
1976
1977       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1978                                          type->vector_elements, 1);
1979
1980       for (int i = 0; i < type->matrix_columns; i++) {
1981          emit_block_move(dst, src, vec_type, predicate);
1982       }
1983       return;
1984    }
1985
1986    assert(type->is_scalar() || type->is_vector());
1987
1988    dst->type = brw_type_for_base_type(type);
1989    src->type = dst->type;
1990
1991    dst->writemask = (1 << type->vector_elements) - 1;
1992
1993    src->swizzle = swizzle_for_size(type->vector_elements);
1994
1995    vec4_instruction *inst = emit(MOV(*dst, *src));
1996    inst->predicate = predicate;
1997
1998    dst->reg_offset++;
1999    src->reg_offset++;
2000 }
2001
2002
2003 /* If the RHS processing resulted in an instruction generating a
2004  * temporary value, and it would be easy to rewrite the instruction to
2005  * generate its result right into the LHS instead, do so.  This ends
2006  * up reliably removing instructions where it can be tricky to do so
2007  * later without real UD chain information.
2008  */
2009 bool
2010 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2011                                      dst_reg dst,
2012                                      src_reg src,
2013                                      vec4_instruction *pre_rhs_inst,
2014                                      vec4_instruction *last_rhs_inst)
2015 {
2016    /* This could be supported, but it would take more smarts. */
2017    if (ir->condition)
2018       return false;
2019
2020    if (pre_rhs_inst == last_rhs_inst)
2021       return false; /* No instructions generated to work with. */
2022
2023    /* Make sure the last instruction generated our source reg. */
2024    if (src.file != GRF ||
2025        src.file != last_rhs_inst->dst.file ||
2026        src.reg != last_rhs_inst->dst.reg ||
2027        src.reg_offset != last_rhs_inst->dst.reg_offset ||
2028        src.reladdr ||
2029        src.abs ||
2030        src.negate ||
2031        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2032       return false;
2033
2034    /* Check that that last instruction fully initialized the channels
2035     * we want to use, in the order we want to use them.  We could
2036     * potentially reswizzle the operands of many instructions so that
2037     * we could handle out of order channels, but don't yet.
2038     */
2039
2040    for (unsigned i = 0; i < 4; i++) {
2041       if (dst.writemask & (1 << i)) {
2042          if (!(last_rhs_inst->dst.writemask & (1 << i)))
2043             return false;
2044
2045          if (BRW_GET_SWZ(src.swizzle, i) != i)
2046             return false;
2047       }
2048    }
2049
2050    /* Success!  Rewrite the instruction. */
2051    last_rhs_inst->dst.file = dst.file;
2052    last_rhs_inst->dst.reg = dst.reg;
2053    last_rhs_inst->dst.reg_offset = dst.reg_offset;
2054    last_rhs_inst->dst.reladdr = dst.reladdr;
2055    last_rhs_inst->dst.writemask &= dst.writemask;
2056
2057    return true;
2058 }
2059
2060 void
2061 vec4_visitor::visit(ir_assignment *ir)
2062 {
2063    dst_reg dst = get_assignment_lhs(ir->lhs, this);
2064    uint32_t predicate = BRW_PREDICATE_NONE;
2065
2066    if (!ir->lhs->type->is_scalar() &&
2067        !ir->lhs->type->is_vector()) {
2068       ir->rhs->accept(this);
2069       src_reg src = this->result;
2070
2071       if (ir->condition) {
2072          emit_bool_to_cond_code(ir->condition, &predicate);
2073       }
2074
2075       /* emit_block_move doesn't account for swizzles in the source register.
2076        * This should be ok, since the source register is a structure or an
2077        * array, and those can't be swizzled.  But double-check to be sure.
2078        */
2079       assert(src.swizzle ==
2080              (ir->rhs->type->is_matrix()
2081               ? swizzle_for_size(ir->rhs->type->vector_elements)
2082               : BRW_SWIZZLE_NOOP));
2083
2084       emit_block_move(&dst, &src, ir->rhs->type, predicate);
2085       return;
2086    }
2087
2088    /* Now we're down to just a scalar/vector with writemasks. */
2089    int i;
2090
2091    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2092    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2093
2094    ir->rhs->accept(this);
2095
2096    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2097
2098    src_reg src = this->result;
2099
2100    int swizzles[4];
2101    int first_enabled_chan = 0;
2102    int src_chan = 0;
2103
2104    assert(ir->lhs->type->is_vector() ||
2105           ir->lhs->type->is_scalar());
2106    dst.writemask = ir->write_mask;
2107
2108    for (int i = 0; i < 4; i++) {
2109       if (dst.writemask & (1 << i)) {
2110          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2111          break;
2112       }
2113    }
2114
2115    /* Swizzle a small RHS vector into the channels being written.
2116     *
2117     * glsl ir treats write_mask as dictating how many channels are
2118     * present on the RHS while in our instructions we need to make
2119     * those channels appear in the slots of the vec4 they're written to.
2120     */
2121    for (int i = 0; i < 4; i++) {
2122       if (dst.writemask & (1 << i))
2123          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2124       else
2125          swizzles[i] = first_enabled_chan;
2126    }
2127    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2128                               swizzles[2], swizzles[3]);
2129
2130    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2131       return;
2132    }
2133
2134    if (ir->condition) {
2135       emit_bool_to_cond_code(ir->condition, &predicate);
2136    }
2137
2138    for (i = 0; i < type_size(ir->lhs->type); i++) {
2139       vec4_instruction *inst = emit(MOV(dst, src));
2140       inst->predicate = predicate;
2141
2142       dst.reg_offset++;
2143       src.reg_offset++;
2144    }
2145 }
2146
2147 void
2148 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2149 {
2150    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2151       foreach_list(node, &ir->components) {
2152          ir_constant *field_value = (ir_constant *)node;
2153
2154          emit_constant_values(dst, field_value);
2155       }
2156       return;
2157    }
2158
2159    if (ir->type->is_array()) {
2160       for (unsigned int i = 0; i < ir->type->length; i++) {
2161          emit_constant_values(dst, ir->array_elements[i]);
2162       }
2163       return;
2164    }
2165
2166    if (ir->type->is_matrix()) {
2167       for (int i = 0; i < ir->type->matrix_columns; i++) {
2168          float *vec = &ir->value.f[i * ir->type->vector_elements];
2169
2170          for (int j = 0; j < ir->type->vector_elements; j++) {
2171             dst->writemask = 1 << j;
2172             dst->type = BRW_REGISTER_TYPE_F;
2173
2174             emit(MOV(*dst, src_reg(vec[j])));
2175          }
2176          dst->reg_offset++;
2177       }
2178       return;
2179    }
2180
2181    int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2182
2183    for (int i = 0; i < ir->type->vector_elements; i++) {
2184       if (!(remaining_writemask & (1 << i)))
2185          continue;
2186
2187       dst->writemask = 1 << i;
2188       dst->type = brw_type_for_base_type(ir->type);
2189
2190       /* Find other components that match the one we're about to
2191        * write.  Emits fewer instructions for things like vec4(0.5,
2192        * 1.5, 1.5, 1.5).
2193        */
2194       for (int j = i + 1; j < ir->type->vector_elements; j++) {
2195          if (ir->type->base_type == GLSL_TYPE_BOOL) {
2196             if (ir->value.b[i] == ir->value.b[j])
2197                dst->writemask |= (1 << j);
2198          } else {
2199             /* u, i, and f storage all line up, so no need for a
2200              * switch case for comparing each type.
2201              */
2202             if (ir->value.u[i] == ir->value.u[j])
2203                dst->writemask |= (1 << j);
2204          }
2205       }
2206
2207       switch (ir->type->base_type) {
2208       case GLSL_TYPE_FLOAT:
2209          emit(MOV(*dst, src_reg(ir->value.f[i])));
2210          break;
2211       case GLSL_TYPE_INT:
2212          emit(MOV(*dst, src_reg(ir->value.i[i])));
2213          break;
2214       case GLSL_TYPE_UINT:
2215          emit(MOV(*dst, src_reg(ir->value.u[i])));
2216          break;
2217       case GLSL_TYPE_BOOL:
2218          emit(MOV(*dst, src_reg(ir->value.b[i])));
2219          break;
2220       default:
2221          assert(!"Non-float/uint/int/bool constant");
2222          break;
2223       }
2224
2225       remaining_writemask &= ~dst->writemask;
2226    }
2227    dst->reg_offset++;
2228 }
2229
2230 void
2231 vec4_visitor::visit(ir_constant *ir)
2232 {
2233    dst_reg dst = dst_reg(this, ir->type);
2234    this->result = src_reg(dst);
2235
2236    emit_constant_values(&dst, ir);
2237 }
2238
2239 void
2240 vec4_visitor::visit(ir_call *ir)
2241 {
2242    assert(!"not reached");
2243 }
2244
2245 void
2246 vec4_visitor::visit(ir_texture *ir)
2247 {
2248    int sampler =
2249       _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2250
2251    /* Should be lowered by do_lower_texture_projection */
2252    assert(!ir->projector);
2253
2254    /* Generate code to compute all the subexpression trees.  This has to be
2255     * done before loading any values into MRFs for the sampler message since
2256     * generating these values may involve SEND messages that need the MRFs.
2257     */
2258    src_reg coordinate;
2259    if (ir->coordinate) {
2260       ir->coordinate->accept(this);
2261       coordinate = this->result;
2262    }
2263
2264    src_reg shadow_comparitor;
2265    if (ir->shadow_comparitor) {
2266       ir->shadow_comparitor->accept(this);
2267       shadow_comparitor = this->result;
2268    }
2269
2270    const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2271    src_reg lod, dPdx, dPdy, sample_index;
2272    switch (ir->op) {
2273    case ir_tex:
2274       lod = src_reg(0.0f);
2275       lod_type = glsl_type::float_type;
2276       break;
2277    case ir_txf:
2278    case ir_txl:
2279    case ir_txs:
2280       ir->lod_info.lod->accept(this);
2281       lod = this->result;
2282       lod_type = ir->lod_info.lod->type;
2283       break;
2284    case ir_txf_ms:
2285       ir->lod_info.sample_index->accept(this);
2286       sample_index = this->result;
2287       sample_index_type = ir->lod_info.sample_index->type;
2288       break;
2289    case ir_txd:
2290       ir->lod_info.grad.dPdx->accept(this);
2291       dPdx = this->result;
2292
2293       ir->lod_info.grad.dPdy->accept(this);
2294       dPdy = this->result;
2295
2296       lod_type = ir->lod_info.grad.dPdx->type;
2297       break;
2298    case ir_txb:
2299    case ir_lod:
2300       break;
2301    }
2302
2303    vec4_instruction *inst = NULL;
2304    switch (ir->op) {
2305    case ir_tex:
2306    case ir_txl:
2307       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2308       break;
2309    case ir_txd:
2310       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2311       break;
2312    case ir_txf:
2313       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2314       break;
2315    case ir_txf_ms:
2316       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2317       break;
2318    case ir_txs:
2319       inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2320       break;
2321    case ir_txb:
2322       assert(!"TXB is not valid for vertex shaders.");
2323       break;
2324    case ir_lod:
2325       assert(!"LOD is not valid for vertex shaders.");
2326       break;
2327    }
2328
2329    bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2330
2331    /* Texel offsets go in the message header; Gen4 also requires headers. */
2332    inst->header_present = use_texture_offset || brw->gen < 5;
2333    inst->base_mrf = 2;
2334    inst->mlen = inst->header_present + 1; /* always at least one */
2335    inst->sampler = sampler;
2336    inst->dst = dst_reg(this, ir->type);
2337    inst->dst.writemask = WRITEMASK_XYZW;
2338    inst->shadow_compare = ir->shadow_comparitor != NULL;
2339
2340    if (use_texture_offset)
2341       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2342
2343    /* MRF for the first parameter */
2344    int param_base = inst->base_mrf + inst->header_present;
2345
2346    if (ir->op == ir_txs) {
2347       int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2348       emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2349    } else {
2350       int i, coord_mask = 0, zero_mask = 0;
2351       /* Load the coordinate */
2352       /* FINISHME: gl_clamp_mask and saturate */
2353       for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2354          coord_mask |= (1 << i);
2355       for (; i < 4; i++)
2356          zero_mask |= (1 << i);
2357
2358       if (ir->offset && ir->op == ir_txf) {
2359          /* It appears that the ld instruction used for txf does its
2360           * address bounds check before adding in the offset.  To work
2361           * around this, just add the integer offset to the integer
2362           * texel coordinate, and don't put the offset in the header.
2363           */
2364          ir_constant *offset = ir->offset->as_constant();
2365          assert(offset);
2366
2367          for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2368             src_reg src = coordinate;
2369             src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2370                                        BRW_GET_SWZ(src.swizzle, j),
2371                                        BRW_GET_SWZ(src.swizzle, j),
2372                                        BRW_GET_SWZ(src.swizzle, j));
2373             emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2374                      src, offset->value.i[j]));
2375          }
2376       } else {
2377          emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2378                   coordinate));
2379       }
2380       emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2381                src_reg(0)));
2382       /* Load the shadow comparitor */
2383       if (ir->shadow_comparitor && ir->op != ir_txd) {
2384          emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2385                           WRITEMASK_X),
2386                   shadow_comparitor));
2387          inst->mlen++;
2388       }
2389
2390       /* Load the LOD info */
2391       if (ir->op == ir_tex || ir->op == ir_txl) {
2392          int mrf, writemask;
2393          if (brw->gen >= 5) {
2394             mrf = param_base + 1;
2395             if (ir->shadow_comparitor) {
2396                writemask = WRITEMASK_Y;
2397                /* mlen already incremented */
2398             } else {
2399                writemask = WRITEMASK_X;
2400                inst->mlen++;
2401             }
2402          } else /* brw->gen == 4 */ {
2403             mrf = param_base;
2404             writemask = WRITEMASK_W;
2405          }
2406          emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2407       } else if (ir->op == ir_txf) {
2408          emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2409       } else if (ir->op == ir_txf_ms) {
2410          emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2411                   sample_index));
2412          inst->mlen++;
2413
2414          /* on Gen7, there is an additional MCS parameter here after SI,
2415           * but we don't bother to emit it since it's always zero. If
2416           * we start supporting texturing from CMS surfaces, this will have
2417           * to change
2418           */
2419       } else if (ir->op == ir_txd) {
2420          const glsl_type *type = lod_type;
2421
2422          if (brw->gen >= 5) {
2423             dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2424             dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2425             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2426             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2427             inst->mlen++;
2428
2429             if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2430                dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2431                dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2432                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2433                emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2434                inst->mlen++;
2435
2436                if (ir->shadow_comparitor) {
2437                   emit(MOV(dst_reg(MRF, param_base + 2,
2438                                    ir->shadow_comparitor->type, WRITEMASK_Z),
2439                            shadow_comparitor));
2440                }
2441             }
2442          } else /* brw->gen == 4 */ {
2443             emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2444             emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2445             inst->mlen += 2;
2446          }
2447       }
2448    }
2449
2450    emit(inst);
2451
2452    /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2453     * spec requires layers.
2454     */
2455    if (ir->op == ir_txs) {
2456       glsl_type const *type = ir->sampler->type;
2457       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2458           type->sampler_array) {
2459          emit_math(SHADER_OPCODE_INT_QUOTIENT,
2460                    with_writemask(inst->dst, WRITEMASK_Z),
2461                    src_reg(inst->dst), src_reg(6));
2462       }
2463    }
2464
2465    swizzle_result(ir, src_reg(inst->dst), sampler);
2466 }
2467
2468 void
2469 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2470 {
2471    int s = key->tex.swizzles[sampler];
2472
2473    this->result = src_reg(this, ir->type);
2474    dst_reg swizzled_result(this->result);
2475
2476    if (ir->op == ir_txs || ir->type == glsl_type::float_type
2477                         || s == SWIZZLE_NOOP) {
2478       emit(MOV(swizzled_result, orig_val));
2479       return;
2480    }
2481
2482    int zero_mask = 0, one_mask = 0, copy_mask = 0;
2483    int swizzle[4] = {0};
2484
2485    for (int i = 0; i < 4; i++) {
2486       switch (GET_SWZ(s, i)) {
2487       case SWIZZLE_ZERO:
2488          zero_mask |= (1 << i);
2489          break;
2490       case SWIZZLE_ONE:
2491          one_mask |= (1 << i);
2492          break;
2493       default:
2494          copy_mask |= (1 << i);
2495          swizzle[i] = GET_SWZ(s, i);
2496          break;
2497       }
2498    }
2499
2500    if (copy_mask) {
2501       orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2502       swizzled_result.writemask = copy_mask;
2503       emit(MOV(swizzled_result, orig_val));
2504    }
2505
2506    if (zero_mask) {
2507       swizzled_result.writemask = zero_mask;
2508       emit(MOV(swizzled_result, src_reg(0.0f)));
2509    }
2510
2511    if (one_mask) {
2512       swizzled_result.writemask = one_mask;
2513       emit(MOV(swizzled_result, src_reg(1.0f)));
2514    }
2515 }
2516
2517 void
2518 vec4_visitor::visit(ir_return *ir)
2519 {
2520    assert(!"not reached");
2521 }
2522
2523 void
2524 vec4_visitor::visit(ir_discard *ir)
2525 {
2526    assert(!"not reached");
2527 }
2528
2529 void
2530 vec4_visitor::visit(ir_if *ir)
2531 {
2532    /* Don't point the annotation at the if statement, because then it plus
2533     * the then and else blocks get printed.
2534     */
2535    this->base_ir = ir->condition;
2536
2537    if (brw->gen == 6) {
2538       emit_if_gen6(ir);
2539    } else {
2540       uint32_t predicate;
2541       emit_bool_to_cond_code(ir->condition, &predicate);
2542       emit(IF(predicate));
2543    }
2544
2545    visit_instructions(&ir->then_instructions);
2546
2547    if (!ir->else_instructions.is_empty()) {
2548       this->base_ir = ir->condition;
2549       emit(BRW_OPCODE_ELSE);
2550
2551       visit_instructions(&ir->else_instructions);
2552    }
2553
2554    this->base_ir = ir->condition;
2555    emit(BRW_OPCODE_ENDIF);
2556 }
2557
2558 void
2559 vec4_visitor::visit(ir_emit_vertex *)
2560 {
2561    assert(!"not reached");
2562 }
2563
2564 void
2565 vec4_visitor::visit(ir_end_primitive *)
2566 {
2567    assert(!"not reached");
2568 }
2569
2570 void
2571 vec4_visitor::emit_ndc_computation()
2572 {
2573    /* Get the position */
2574    src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2575
2576    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2577    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2578    output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2579
2580    current_annotation = "NDC";
2581    dst_reg ndc_w = ndc;
2582    ndc_w.writemask = WRITEMASK_W;
2583    src_reg pos_w = pos;
2584    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2585    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2586
2587    dst_reg ndc_xyz = ndc;
2588    ndc_xyz.writemask = WRITEMASK_XYZ;
2589
2590    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2591 }
2592
2593 void
2594 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2595 {
2596    if (brw->gen < 6 &&
2597        ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2598         key->userclip_active || brw->has_negative_rhw_bug)) {
2599       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2600       dst_reg header1_w = header1;
2601       header1_w.writemask = WRITEMASK_W;
2602
2603       emit(MOV(header1, 0u));
2604
2605       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2606          src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2607
2608          current_annotation = "Point size";
2609          emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2610          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2611       }
2612
2613       if (key->userclip_active) {
2614          current_annotation = "Clipping flags";
2615          dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
2616          dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
2617
2618          emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
2619          emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
2620          emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
2621
2622          emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
2623          emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
2624          emit(SHL(flags1, src_reg(flags1), src_reg(4)));
2625          emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
2626       }
2627
2628       /* i965 clipping workaround:
2629        * 1) Test for -ve rhw
2630        * 2) If set,
2631        *      set ndc = (0,0,0,0)
2632        *      set ucp[6] = 1
2633        *
2634        * Later, clipping will detect ucp[6] and ensure the primitive is
2635        * clipped against all fixed planes.
2636        */
2637       if (brw->has_negative_rhw_bug) {
2638          src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2639          ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2640          emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2641          vec4_instruction *inst;
2642          inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2643          inst->predicate = BRW_PREDICATE_NORMAL;
2644          inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2645          inst->predicate = BRW_PREDICATE_NORMAL;
2646       }
2647
2648       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2649    } else if (brw->gen < 6) {
2650       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2651    } else {
2652       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2653       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2654          emit(MOV(brw_writemask(reg, WRITEMASK_W),
2655                   src_reg(output_reg[VARYING_SLOT_PSIZ])));
2656       }
2657       if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2658          emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2659                   src_reg(output_reg[VARYING_SLOT_LAYER])));
2660       }
2661    }
2662 }
2663
2664 void
2665 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
2666 {
2667    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2668     *
2669     *     "If a linked set of shaders forming the vertex stage contains no
2670     *     static write to gl_ClipVertex or gl_ClipDistance, but the
2671     *     application has requested clipping against user clip planes through
2672     *     the API, then the coordinate written to gl_Position is used for
2673     *     comparison against the user clip planes."
2674     *
2675     * This function is only called if the shader didn't write to
2676     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
2677     * if the user wrote to it; otherwise we use gl_Position.
2678     */
2679    gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2680    if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2681       clip_vertex = VARYING_SLOT_POS;
2682    }
2683
2684    for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2685         ++i) {
2686       reg.writemask = 1 << i;
2687       emit(DP4(reg,
2688                src_reg(output_reg[clip_vertex]),
2689                src_reg(this->userplane[i + offset])));
2690    }
2691 }
2692
2693 void
2694 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2695 {
2696    assert (varying < VARYING_SLOT_MAX);
2697    reg.type = output_reg[varying].type;
2698    current_annotation = output_reg_annotation[varying];
2699    /* Copy the register, saturating if necessary */
2700    vec4_instruction *inst = emit(MOV(reg,
2701                                      src_reg(output_reg[varying])));
2702    if ((varying == VARYING_SLOT_COL0 ||
2703         varying == VARYING_SLOT_COL1 ||
2704         varying == VARYING_SLOT_BFC0 ||
2705         varying == VARYING_SLOT_BFC1) &&
2706        key->clamp_vertex_color) {
2707       inst->saturate = true;
2708    }
2709 }
2710
2711 void
2712 vec4_visitor::emit_urb_slot(int mrf, int varying)
2713 {
2714    struct brw_reg hw_reg = brw_message_reg(mrf);
2715    dst_reg reg = dst_reg(MRF, mrf);
2716    reg.type = BRW_REGISTER_TYPE_F;
2717
2718    switch (varying) {
2719    case VARYING_SLOT_PSIZ:
2720       /* PSIZ is always in slot 0, and is coupled with other flags. */
2721       current_annotation = "indices, point width, clip flags";
2722       emit_psiz_and_flags(hw_reg);
2723       break;
2724    case BRW_VARYING_SLOT_NDC:
2725       current_annotation = "NDC";
2726       emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2727       break;
2728    case VARYING_SLOT_POS:
2729       current_annotation = "gl_Position";
2730       emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2731       break;
2732    case VARYING_SLOT_EDGE:
2733       /* This is present when doing unfilled polygons.  We're supposed to copy
2734        * the edge flag from the user-provided vertex array
2735        * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2736        * of that attribute (starts as 1.0f).  This is then used in clipping to
2737        * determine which edges should be drawn as wireframe.
2738        */
2739       current_annotation = "edge flag";
2740       emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2741                                     glsl_type::float_type, WRITEMASK_XYZW))));
2742       break;
2743    case BRW_VARYING_SLOT_PAD:
2744       /* No need to write to this slot */
2745       break;
2746    default:
2747       emit_generic_urb_slot(reg, varying);
2748       break;
2749    }
2750 }
2751
2752 static int
2753 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2754 {
2755    if (brw->gen >= 6) {
2756       /* URB data written (does not include the message header reg) must
2757        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
2758        * section 5.4.3.2.2: URB_INTERLEAVED.
2759        *
2760        * URB entries are allocated on a multiple of 1024 bits, so an
2761        * extra 128 bits written here to make the end align to 256 is
2762        * no problem.
2763        */
2764       if ((mlen % 2) != 1)
2765          mlen++;
2766    }
2767
2768    return mlen;
2769 }
2770
2771 void
2772 vec4_vs_visitor::emit_urb_write_header(int mrf)
2773 {
2774    /* No need to do anything for VS; an implied write to this MRF will be
2775     * performed by VS_OPCODE_URB_WRITE.
2776     */
2777    (void) mrf;
2778 }
2779
2780 vec4_instruction *
2781 vec4_vs_visitor::emit_urb_write_opcode(bool complete)
2782 {
2783    /* For VS, the URB writes end the thread. */
2784    if (complete) {
2785       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
2786          emit_shader_time_end();
2787    }
2788
2789    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2790    inst->eot = complete;
2791
2792    return inst;
2793 }
2794
2795 /**
2796  * Generates the VUE payload plus the necessary URB write instructions to
2797  * output it.
2798  *
2799  * The VUE layout is documented in Volume 2a.
2800  */
2801 void
2802 vec4_visitor::emit_vertex()
2803 {
2804    /* MRF 0 is reserved for the debugger, so start with message header
2805     * in MRF 1.
2806     */
2807    int base_mrf = 1;
2808    int mrf = base_mrf;
2809    /* In the process of generating our URB write message contents, we
2810     * may need to unspill a register or load from an array.  Those
2811     * reads would use MRFs 14-15.
2812     */
2813    int max_usable_mrf = 13;
2814
2815    /* The following assertion verifies that max_usable_mrf causes an
2816     * even-numbered amount of URB write data, which will meet gen6's
2817     * requirements for length alignment.
2818     */
2819    assert ((max_usable_mrf - base_mrf) % 2 == 0);
2820
2821    /* First mrf is the g0-based message header containing URB handles and
2822     * such.
2823     */
2824    emit_urb_write_header(mrf++);
2825
2826    if (brw->gen < 6) {
2827       emit_ndc_computation();
2828    }
2829
2830    /* Lower legacy ff and ClipVertex clipping to clip distances */
2831    if (key->userclip_active && !key->uses_clip_distance) {
2832       current_annotation = "user clip distances";
2833
2834       output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
2835       output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
2836
2837       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
2838       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
2839    }
2840
2841    /* Set up the VUE data for the first URB write */
2842    int slot;
2843    for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
2844       emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2845
2846       /* If this was max_usable_mrf, we can't fit anything more into this URB
2847        * WRITE.
2848        */
2849       if (mrf > max_usable_mrf) {
2850          slot++;
2851          break;
2852       }
2853    }
2854
2855    bool complete = slot >= prog_data->vue_map.num_slots;
2856    current_annotation = "URB write";
2857    vec4_instruction *inst = emit_urb_write_opcode(complete);
2858    inst->base_mrf = base_mrf;
2859    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2860
2861    /* Optional second URB write */
2862    if (!complete) {
2863       mrf = base_mrf + 1;
2864
2865       for (; slot < prog_data->vue_map.num_slots; ++slot) {
2866          assert(mrf < max_usable_mrf);
2867
2868          emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2869       }
2870
2871       current_annotation = "URB write";
2872       inst = emit_urb_write_opcode(true /* complete */);
2873       inst->base_mrf = base_mrf;
2874       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2875       /* URB destination offset.  In the previous write, we got MRFs
2876        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
2877        * URB row increments, and each of our MRFs is half of one of
2878        * those, since we're doing interleaved writes.
2879        */
2880       inst->offset = (max_usable_mrf - base_mrf) / 2;
2881    }
2882 }
2883
2884 void
2885 vec4_vs_visitor::emit_thread_end()
2886 {
2887    /* For VS, we always end the thread by emitting a single vertex.
2888     * emit_urb_write_opcode() will take care of setting the eot flag on the
2889     * SEND instruction.
2890     */
2891    emit_vertex();
2892 }
2893
2894 src_reg
2895 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2896                                  src_reg *reladdr, int reg_offset)
2897 {
2898    /* Because we store the values to scratch interleaved like our
2899     * vertex data, we need to scale the vec4 index by 2.
2900     */
2901    int message_header_scale = 2;
2902
2903    /* Pre-gen6, the message header uses byte offsets instead of vec4
2904     * (16-byte) offset units.
2905     */
2906    if (brw->gen < 6)
2907       message_header_scale *= 16;
2908
2909    if (reladdr) {
2910       src_reg index = src_reg(this, glsl_type::int_type);
2911
2912       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2913       emit_before(inst, MUL(dst_reg(index),
2914                             index, src_reg(message_header_scale)));
2915
2916       return index;
2917    } else {
2918       return src_reg(reg_offset * message_header_scale);
2919    }
2920 }
2921
2922 src_reg
2923 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2924                                        src_reg *reladdr, int reg_offset)
2925 {
2926    if (reladdr) {
2927       src_reg index = src_reg(this, glsl_type::int_type);
2928
2929       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2930
2931       /* Pre-gen6, the message header uses byte offsets instead of vec4
2932        * (16-byte) offset units.
2933        */
2934       if (brw->gen < 6) {
2935          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2936       }
2937
2938       return index;
2939    } else {
2940       int message_header_scale = brw->gen < 6 ? 16 : 1;
2941       return src_reg(reg_offset * message_header_scale);
2942    }
2943 }
2944
2945 /**
2946  * Emits an instruction before @inst to load the value named by @orig_src
2947  * from scratch space at @base_offset to @temp.
2948  *
2949  * @base_offset is measured in 32-byte units (the size of a register).
2950  */
2951 void
2952 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2953                                 dst_reg temp, src_reg orig_src,
2954                                 int base_offset)
2955 {
2956    int reg_offset = base_offset + orig_src.reg_offset;
2957    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2958
2959    emit_before(inst, SCRATCH_READ(temp, index));
2960 }
2961
2962 /**
2963  * Emits an instruction after @inst to store the value to be written
2964  * to @orig_dst to scratch space at @base_offset, from @temp.
2965  *
2966  * @base_offset is measured in 32-byte units (the size of a register).
2967  */
2968 void
2969 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2970 {
2971    int reg_offset = base_offset + inst->dst.reg_offset;
2972    src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2973
2974    /* Create a temporary register to store *inst's result in.
2975     *
2976     * We have to be careful in MOVing from our temporary result register in
2977     * the scratch write.  If we swizzle from channels of the temporary that
2978     * weren't initialized, it will confuse live interval analysis, which will
2979     * make spilling fail to make progress.
2980     */
2981    src_reg temp = src_reg(this, glsl_type::vec4_type);
2982    temp.type = inst->dst.type;
2983    int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2984    int swizzles[4];
2985    for (int i = 0; i < 4; i++)
2986       if (inst->dst.writemask & (1 << i))
2987          swizzles[i] = i;
2988       else
2989          swizzles[i] = first_writemask_chan;
2990    temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2991                                swizzles[2], swizzles[3]);
2992
2993    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2994                                        inst->dst.writemask));
2995    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2996    write->predicate = inst->predicate;
2997    write->ir = inst->ir;
2998    write->annotation = inst->annotation;
2999    inst->insert_after(write);
3000
3001    inst->dst.file = temp.file;
3002    inst->dst.reg = temp.reg;
3003    inst->dst.reg_offset = temp.reg_offset;
3004    inst->dst.reladdr = NULL;
3005 }
3006
3007 /**
3008  * We can't generally support array access in GRF space, because a
3009  * single instruction's destination can only span 2 contiguous
3010  * registers.  So, we send all GRF arrays that get variable index
3011  * access to scratch space.
3012  */
3013 void
3014 vec4_visitor::move_grf_array_access_to_scratch()
3015 {
3016    int scratch_loc[this->virtual_grf_count];
3017
3018    for (int i = 0; i < this->virtual_grf_count; i++) {
3019       scratch_loc[i] = -1;
3020    }
3021
3022    /* First, calculate the set of virtual GRFs that need to be punted
3023     * to scratch due to having any array access on them, and where in
3024     * scratch.
3025     */
3026    foreach_list(node, &this->instructions) {
3027       vec4_instruction *inst = (vec4_instruction *)node;
3028
3029       if (inst->dst.file == GRF && inst->dst.reladdr &&
3030           scratch_loc[inst->dst.reg] == -1) {
3031          scratch_loc[inst->dst.reg] = c->last_scratch;
3032          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
3033       }
3034
3035       for (int i = 0 ; i < 3; i++) {
3036          src_reg *src = &inst->src[i];
3037
3038          if (src->file == GRF && src->reladdr &&
3039              scratch_loc[src->reg] == -1) {
3040             scratch_loc[src->reg] = c->last_scratch;
3041             c->last_scratch += this->virtual_grf_sizes[src->reg];
3042          }
3043       }
3044    }
3045
3046    /* Now, for anything that will be accessed through scratch, rewrite
3047     * it to load/store.  Note that this is a _safe list walk, because
3048     * we may generate a new scratch_write instruction after the one
3049     * we're processing.
3050     */
3051    foreach_list_safe(node, &this->instructions) {
3052       vec4_instruction *inst = (vec4_instruction *)node;
3053
3054       /* Set up the annotation tracking for new generated instructions. */
3055       base_ir = inst->ir;
3056       current_annotation = inst->annotation;
3057
3058       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3059          emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3060       }
3061
3062       for (int i = 0 ; i < 3; i++) {
3063          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3064             continue;
3065
3066          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3067
3068          emit_scratch_read(inst, temp, inst->src[i],
3069                            scratch_loc[inst->src[i].reg]);
3070
3071          inst->src[i].file = temp.file;
3072          inst->src[i].reg = temp.reg;
3073          inst->src[i].reg_offset = temp.reg_offset;
3074          inst->src[i].reladdr = NULL;
3075       }
3076    }
3077 }
3078
3079 /**
3080  * Emits an instruction before @inst to load the value named by @orig_src
3081  * from the pull constant buffer (surface) at @base_offset to @temp.
3082  */
3083 void
3084 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3085                                       dst_reg temp, src_reg orig_src,
3086                                       int base_offset)
3087 {
3088    int reg_offset = base_offset + orig_src.reg_offset;
3089    src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
3090    src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3091    vec4_instruction *load;
3092
3093    if (brw->gen >= 7) {
3094       dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3095       grf_offset.type = offset.type;
3096       emit_before(inst, MOV(grf_offset, offset));
3097
3098       load = new(mem_ctx) vec4_instruction(this,
3099                                            VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3100                                            temp, index, src_reg(grf_offset));
3101    } else {
3102       load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3103                                            temp, index, offset);
3104       load->base_mrf = 14;
3105       load->mlen = 1;
3106    }
3107    emit_before(inst, load);
3108 }
3109
3110 /**
3111  * Implements array access of uniforms by inserting a
3112  * PULL_CONSTANT_LOAD instruction.
3113  *
3114  * Unlike temporary GRF array access (where we don't support it due to
3115  * the difficulty of doing relative addressing on instruction
3116  * destinations), we could potentially do array access of uniforms
3117  * that were loaded in GRF space as push constants.  In real-world
3118  * usage we've seen, though, the arrays being used are always larger
3119  * than we could load as push constants, so just always move all
3120  * uniform array access out to a pull constant buffer.
3121  */
3122 void
3123 vec4_visitor::move_uniform_array_access_to_pull_constants()
3124 {
3125    int pull_constant_loc[this->uniforms];
3126
3127    for (int i = 0; i < this->uniforms; i++) {
3128       pull_constant_loc[i] = -1;
3129    }
3130
3131    /* Walk through and find array access of uniforms.  Put a copy of that
3132     * uniform in the pull constant buffer.
3133     *
3134     * Note that we don't move constant-indexed accesses to arrays.  No
3135     * testing has been done of the performance impact of this choice.
3136     */
3137    foreach_list_safe(node, &this->instructions) {
3138       vec4_instruction *inst = (vec4_instruction *)node;
3139
3140       for (int i = 0 ; i < 3; i++) {
3141          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3142             continue;
3143
3144          int uniform = inst->src[i].reg;
3145
3146          /* If this array isn't already present in the pull constant buffer,
3147           * add it.
3148           */
3149          if (pull_constant_loc[uniform] == -1) {
3150             const float **values = &prog_data->param[uniform * 4];
3151
3152             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3153
3154             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3155                prog_data->pull_param[prog_data->nr_pull_params++]
3156                   = values[j];
3157             }
3158          }
3159
3160          /* Set up the annotation tracking for new generated instructions. */
3161          base_ir = inst->ir;
3162          current_annotation = inst->annotation;
3163
3164          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3165
3166          emit_pull_constant_load(inst, temp, inst->src[i],
3167                                  pull_constant_loc[uniform]);
3168
3169          inst->src[i].file = temp.file;
3170          inst->src[i].reg = temp.reg;
3171          inst->src[i].reg_offset = temp.reg_offset;
3172          inst->src[i].reladdr = NULL;
3173       }
3174    }
3175
3176    /* Now there are no accesses of the UNIFORM file with a reladdr, so
3177     * no need to track them as larger-than-vec4 objects.  This will be
3178     * relied on in cutting out unused uniform vectors from push
3179     * constants.
3180     */
3181    split_uniform_registers();
3182 }
3183
3184 void
3185 vec4_visitor::resolve_ud_negate(src_reg *reg)
3186 {
3187    if (reg->type != BRW_REGISTER_TYPE_UD ||
3188        !reg->negate)
3189       return;
3190
3191    src_reg temp = src_reg(this, glsl_type::uvec4_type);
3192    emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3193    *reg = temp;
3194 }
3195
3196 vec4_visitor::vec4_visitor(struct brw_context *brw,
3197                            struct brw_vec4_compile *c,
3198                            struct gl_program *prog,
3199                            const struct brw_vec4_prog_key *key,
3200                            struct brw_vec4_prog_data *prog_data,
3201                            struct gl_shader_program *shader_prog,
3202                            struct brw_shader *shader,
3203                            void *mem_ctx,
3204                            bool debug_flag)
3205    : debug_flag(debug_flag)
3206 {
3207    this->brw = brw;
3208    this->ctx = &brw->ctx;
3209    this->shader_prog = shader_prog;
3210    this->shader = shader;
3211
3212    this->mem_ctx = mem_ctx;
3213    this->failed = false;
3214
3215    this->base_ir = NULL;
3216    this->current_annotation = NULL;
3217    memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3218
3219    this->c = c;
3220    this->prog = prog;
3221    this->key = key;
3222    this->prog_data = prog_data;
3223
3224    this->variable_ht = hash_table_ctor(0,
3225                                        hash_table_pointer_hash,
3226                                        hash_table_pointer_compare);
3227
3228    this->virtual_grf_start = NULL;
3229    this->virtual_grf_end = NULL;
3230    this->virtual_grf_sizes = NULL;
3231    this->virtual_grf_count = 0;
3232    this->virtual_grf_reg_map = NULL;
3233    this->virtual_grf_reg_count = 0;
3234    this->virtual_grf_array_size = 0;
3235    this->live_intervals_valid = false;
3236
3237    this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3238
3239    this->uniforms = 0;
3240 }
3241
3242 vec4_visitor::~vec4_visitor()
3243 {
3244    hash_table_dtor(this->variable_ht);
3245 }
3246
3247
3248 vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
3249                                  struct brw_vs_compile *vs_compile,
3250                                  struct brw_vs_prog_data *vs_prog_data,
3251                                  struct gl_shader_program *prog,
3252                                  struct brw_shader *shader,
3253                                  void *mem_ctx)
3254    : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
3255                   &vs_compile->key.base, &vs_prog_data->base, prog, shader,
3256                   mem_ctx, INTEL_DEBUG & DEBUG_VS),
3257      vs_compile(vs_compile),
3258      vs_prog_data(vs_prog_data)
3259 {
3260 }
3261
3262
3263 void
3264 vec4_visitor::fail(const char *format, ...)
3265 {
3266    va_list va;
3267    char *msg;
3268
3269    if (failed)
3270       return;
3271
3272    failed = true;
3273
3274    va_start(va, format);
3275    msg = ralloc_vasprintf(mem_ctx, format, va);
3276    va_end(va);
3277    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3278
3279    this->fail_msg = msg;
3280
3281    if (debug_flag) {
3282       fprintf(stderr, "%s",  msg);
3283    }
3284 }
3285
3286 } /* namespace brw */