src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 }
  29
  30 namespace brw {
  31
  32 src_reg::src_reg(dst_reg reg)
  33 {
  34    init();
  35
  36    this->file = reg.file;
  37    this->reg = reg.reg;
  38    this->reg_offset = reg.reg_offset;
  39    this->type = reg.type;
  40    this->reladdr = reg.reladdr;
  41    this->fixed_hw_reg = reg.fixed_hw_reg;
  42
  43    int swizzles[4];
  44    int next_chan = 0;
  45    int last = 0;
  46
  47    for (int i = 0; i < 4; i++) {
  48       if (!(reg.writemask & (1 << i)))
  49          continue;
  50
  51       swizzles[next_chan++] = last = i;
  52    }
  53
  54    for (; next_chan < 4; next_chan++) {
  55       swizzles[next_chan] = last;
  56    }
  57
  58    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  59                                 swizzles[2], swizzles[3]);
  60 }
  61
  62 dst_reg::dst_reg(src_reg reg)
  63 {
  64    init();
  65
  66    this->file = reg.file;
  67    this->reg = reg.reg;
  68    this->reg_offset = reg.reg_offset;
  69    this->type = reg.type;
  70    this->writemask = WRITEMASK_XYZW;
  71    this->reladdr = reg.reladdr;
  72    this->fixed_hw_reg = reg.fixed_hw_reg;
  73 }
  74
  75 vec4_instruction::vec4_instruction(vec4_visitor *v,
  76                                    enum opcode opcode, dst_reg dst,
  77                                    src_reg src0, src_reg src1, src_reg src2)
  78 {
  79    this->opcode = opcode;
  80    this->dst = dst;
  81    this->src[0] = src0;
  82    this->src[1] = src1;
  83    this->src[2] = src2;
  84    this->ir = v->base_ir;
  85    this->annotation = v->current_annotation;
  86 }
  87
  88 vec4_instruction *
  89 vec4_visitor::emit(vec4_instruction *inst)
  90 {
  91    this->instructions.push_tail(inst);
  92
  93    return inst;
  94 }
  95
  96 vec4_instruction *
  97 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
  98 {
  99    new_inst->ir = inst->ir;
 100    new_inst->annotation = inst->annotation;
 101
 102    inst->insert_before(new_inst);
 103
 104    return inst;
 105 }
 106
 107 vec4_instruction *
 108 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
 109                    src_reg src0, src_reg src1, src_reg src2)
 110 {
 111    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
 112                                              src0, src1, src2));
 113 }
 114
 115
 116 vec4_instruction *
 117 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
 118 {
 119    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
 120 }
 121
 122 vec4_instruction *
 123 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 124 {
 125    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
 126 }
 127
 128 vec4_instruction *
 129 vec4_visitor::emit(enum opcode opcode)
 130 {
 131    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
 132 }
 133
 134 #define ALU1(op)                                                        \
 135    vec4_instruction *                                                   \
 136    vec4_visitor::op(dst_reg dst, src_reg src0)                          \
 137    {                                                                    \
 138       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 139                                            src0);                       \
 140    }
 141
 142 #define ALU2(op)                                                        \
 143    vec4_instruction *                                                   \
 144    vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
 145    {                                                                    \
 146       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
 147                                            src0, src1);                 \
 148    }
 149
 150 ALU1(NOT)
 151 ALU1(MOV)
 152 ALU1(FRC)
 153 ALU1(RNDD)
 154 ALU1(RNDE)
 155 ALU1(RNDZ)
 156 ALU2(ADD)
 157 ALU2(MUL)
 158 ALU2(MACH)
 159 ALU2(AND)
 160 ALU2(OR)
 161 ALU2(XOR)
 162 ALU2(DP3)
 163 ALU2(DP4)
 164
 165 /** Gen4 predicated IF. */
 166 vec4_instruction *
 167 vec4_visitor::IF(uint32_t predicate)
 168 {
 169    vec4_instruction *inst;
 170
 171    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
 172    inst->predicate = predicate;
 173
 174    return inst;
 175 }
 176
 177 /** Gen6+ IF with embedded comparison. */
 178 vec4_instruction *
 179 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
 180 {
 181    assert(intel->gen >= 6);
 182
 183    vec4_instruction *inst;
 184
 185    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
 186                                         src0, src1);
 187    inst->conditional_mod = condition;
 188
 189    return inst;
 190 }
 191
 192 /**
 193  * CMP: Sets the low bit of the destination channels with the result
 194  * of the comparison, while the upper bits are undefined, and updates
 195  * the flag register with the packed 16 bits of the result.
 196  */
 197 vec4_instruction *
 198 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 199 {
 200    vec4_instruction *inst;
 201
 202    /* original gen4 does type conversion to the destination type
 203     * before before comparison, producing garbage results for floating
 204     * point comparisons.
 205     */
 206    if (intel->gen == 4) {
 207       dst.type = src0.type;
 208       if (dst.file == HW_REG)
 209          dst.fixed_hw_reg.type = dst.type;
 210    }
 211
 212    inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
 213    inst->conditional_mod = condition;
 214
 215    return inst;
 216 }
 217
 218 vec4_instruction *
 219 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 220 {
 221    vec4_instruction *inst;
 222
 223    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
 224                                         dst, index);
 225    inst->base_mrf = 14;
 226    inst->mlen = 1;
 227
 228    return inst;
 229 }
 230
 231 vec4_instruction *
 232 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
 233 {
 234    vec4_instruction *inst;
 235
 236    inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
 237                                         dst, src, index);
 238    inst->base_mrf = 13;
 239    inst->mlen = 2;
 240
 241    return inst;
 242 }
 243
 244 void
 245 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 246 {
 247    static enum opcode dot_opcodes[] = {
 248       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 249    };
 250
 251    emit(dot_opcodes[elements - 2], dst, src0, src1);
 252 }
 253
 254 void
 255 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 256 {
 257    /* The gen6 math instruction ignores the source modifiers --
 258     * swizzle, abs, negate, and at least some parts of the register
 259     * region description.
 260     *
 261     * While it would seem that this MOV could be avoided at this point
 262     * in the case that the swizzle is matched up with the destination
 263     * writemask, note that uniform packing and register allocation
 264     * could rearrange our swizzle, so let's leave this matter up to
 265     * copy propagation later.
 266     */
 267    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 268    emit(MOV(dst_reg(temp_src), src));
 269
 270    if (dst.writemask != WRITEMASK_XYZW) {
 271       /* The gen6 math instruction must be align1, so we can't do
 272        * writemasks.
 273        */
 274       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 275
 276       emit(opcode, temp_dst, temp_src);
 277
 278       emit(MOV(dst, src_reg(temp_dst)));
 279    } else {
 280       emit(opcode, dst, temp_src);
 281    }
 282 }
 283
 284 void
 285 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 286 {
 287    vec4_instruction *inst = emit(opcode, dst, src);
 288    inst->base_mrf = 1;
 289    inst->mlen = 1;
 290 }
 291
 292 void
 293 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 294 {
 295    switch (opcode) {
 296    case SHADER_OPCODE_RCP:
 297    case SHADER_OPCODE_RSQ:
 298    case SHADER_OPCODE_SQRT:
 299    case SHADER_OPCODE_EXP2:
 300    case SHADER_OPCODE_LOG2:
 301    case SHADER_OPCODE_SIN:
 302    case SHADER_OPCODE_COS:
 303       break;
 304    default:
 305       assert(!"not reached: bad math opcode");
 306       return;
 307    }
 308
 309    if (intel->gen >= 6) {
 310       return emit_math1_gen6(opcode, dst, src);
 311    } else {
 312       return emit_math1_gen4(opcode, dst, src);
 313    }
 314 }
 315
 316 void
 317 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 318                               dst_reg dst, src_reg src0, src_reg src1)
 319 {
 320    src_reg expanded;
 321
 322    /* The gen6 math instruction ignores the source modifiers --
 323     * swizzle, abs, negate, and at least some parts of the register
 324     * region description.  Move the sources to temporaries to make it
 325     * generally work.
 326     */
 327
 328    expanded = src_reg(this, glsl_type::vec4_type);
 329    emit(MOV(dst_reg(expanded), src0));
 330    src0 = expanded;
 331
 332    expanded = src_reg(this, glsl_type::vec4_type);
 333    emit(MOV(dst_reg(expanded), src1));
 334    src1 = expanded;
 335
 336    if (dst.writemask != WRITEMASK_XYZW) {
 337       /* The gen6 math instruction must be align1, so we can't do
 338        * writemasks.
 339        */
 340       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 341
 342       emit(opcode, temp_dst, src0, src1);
 343
 344       emit(MOV(dst, src_reg(temp_dst)));
 345    } else {
 346       emit(opcode, dst, src0, src1);
 347    }
 348 }
 349
 350 void
 351 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 352                               dst_reg dst, src_reg src0, src_reg src1)
 353 {
 354    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 355    inst->base_mrf = 1;
 356    inst->mlen = 2;
 357 }
 358
 359 void
 360 vec4_visitor::emit_math(enum opcode opcode,
 361                         dst_reg dst, src_reg src0, src_reg src1)
 362 {
 363    assert(opcode == SHADER_OPCODE_POW);
 364
 365    if (intel->gen >= 6) {
 366       return emit_math2_gen6(opcode, dst, src0, src1);
 367    } else {
 368       return emit_math2_gen4(opcode, dst, src0, src1);
 369    }
 370 }
 371
 372 void
 373 vec4_visitor::visit_instructions(const exec_list *list)
 374 {
 375    foreach_list(node, list) {
 376       ir_instruction *ir = (ir_instruction *)node;
 377
 378       base_ir = ir;
 379       ir->accept(this);
 380    }
 381 }
 382
 383
 384 static int
 385 type_size(const struct glsl_type *type)
 386 {
 387    unsigned int i;
 388    int size;
 389
 390    switch (type->base_type) {
 391    case GLSL_TYPE_UINT:
 392    case GLSL_TYPE_INT:
 393    case GLSL_TYPE_FLOAT:
 394    case GLSL_TYPE_BOOL:
 395       if (type->is_matrix()) {
 396          return type->matrix_columns;
 397       } else {
 398          /* Regardless of size of vector, it gets a vec4. This is bad
 399           * packing for things like floats, but otherwise arrays become a
 400           * mess.  Hopefully a later pass over the code can pack scalars
 401           * down if appropriate.
 402           */
 403          return 1;
 404       }
 405    case GLSL_TYPE_ARRAY:
 406       assert(type->length > 0);
 407       return type_size(type->fields.array) * type->length;
 408    case GLSL_TYPE_STRUCT:
 409       size = 0;
 410       for (i = 0; i < type->length; i++) {
 411          size += type_size(type->fields.structure[i].type);
 412       }
 413       return size;
 414    case GLSL_TYPE_SAMPLER:
 415       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 416        * at link time.
 417        */
 418       return 1;
 419    default:
 420       assert(0);
 421       return 0;
 422    }
 423 }
 424
 425 int
 426 vec4_visitor::virtual_grf_alloc(int size)
 427 {
 428    if (virtual_grf_array_size <= virtual_grf_count) {
 429       if (virtual_grf_array_size == 0)
 430          virtual_grf_array_size = 16;
 431       else
 432          virtual_grf_array_size *= 2;
 433       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 434                                    virtual_grf_array_size);
 435       virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
 436                                      virtual_grf_array_size);
 437    }
 438    virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
 439    virtual_grf_reg_count += size;
 440    virtual_grf_sizes[virtual_grf_count] = size;
 441    return virtual_grf_count++;
 442 }
 443
 444 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 445 {
 446    init();
 447
 448    this->file = GRF;
 449    this->reg = v->virtual_grf_alloc(type_size(type));
 450
 451    if (type->is_array() || type->is_record()) {
 452       this->swizzle = BRW_SWIZZLE_NOOP;
 453    } else {
 454       this->swizzle = swizzle_for_size(type->vector_elements);
 455    }
 456
 457    this->type = brw_type_for_base_type(type);
 458 }
 459
 460 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 461 {
 462    init();
 463
 464    this->file = GRF;
 465    this->reg = v->virtual_grf_alloc(type_size(type));
 466
 467    if (type->is_array() || type->is_record()) {
 468       this->writemask = WRITEMASK_XYZW;
 469    } else {
 470       this->writemask = (1 << type->vector_elements) - 1;
 471    }
 472
 473    this->type = brw_type_for_base_type(type);
 474 }
 475
 476 /* Our support for uniforms is piggy-backed on the struct
 477  * gl_fragment_program, because that's where the values actually
 478  * get stored, rather than in some global gl_shader_program uniform
 479  * store.
 480  */
 481 int
 482 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 483 {
 484    unsigned int offset = 0;
 485    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 486
 487    if (type->is_matrix()) {
 488       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 489                                                         type->vector_elements,
 490                                                         1);
 491
 492       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 493          offset += setup_uniform_values(loc + offset, column);
 494       }
 495
 496       return offset;
 497    }
 498
 499    switch (type->base_type) {
 500    case GLSL_TYPE_FLOAT:
 501    case GLSL_TYPE_UINT:
 502    case GLSL_TYPE_INT:
 503    case GLSL_TYPE_BOOL:
 504       for (unsigned int i = 0; i < type->vector_elements; i++) {
 505          c->prog_data.param[this->uniforms * 4 + i] = &values[i];
 506       }
 507
 508       /* Set up pad elements to get things aligned to a vec4 boundary. */
 509       for (unsigned int i = type->vector_elements; i < 4; i++) {
 510          static float zero = 0;
 511
 512          c->prog_data.param[this->uniforms * 4 + i] = &zero;
 513       }
 514
 515       /* Track the size of this uniform vector, for future packing of
 516        * uniforms.
 517        */
 518       this->uniform_vector_size[this->uniforms] = type->vector_elements;
 519       this->uniforms++;
 520
 521       return 1;
 522
 523    case GLSL_TYPE_STRUCT:
 524       for (unsigned int i = 0; i < type->length; i++) {
 525          offset += setup_uniform_values(loc + offset,
 526                                         type->fields.structure[i].type);
 527       }
 528       return offset;
 529
 530    case GLSL_TYPE_ARRAY:
 531       for (unsigned int i = 0; i < type->length; i++) {
 532          offset += setup_uniform_values(loc + offset, type->fields.array);
 533       }
 534       return offset;
 535
 536    case GLSL_TYPE_SAMPLER:
 537       /* The sampler takes up a slot, but we don't use any values from it. */
 538       return 1;
 539
 540    default:
 541       assert(!"not reached");
 542       return 0;
 543    }
 544 }
 545
 546 /* Our support for builtin uniforms is even scarier than non-builtin.
 547  * It sits on top of the PROG_STATE_VAR parameters that are
 548  * automatically updated from GL context state.
 549  */
 550 void
 551 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 552 {
 553    const ir_state_slot *const slots = ir->state_slots;
 554    assert(ir->state_slots != NULL);
 555
 556    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 557       /* This state reference has already been setup by ir_to_mesa,
 558        * but we'll get the same index back here.  We can reference
 559        * ParameterValues directly, since unlike brw_fs.cpp, we never
 560        * add new state references during compile.
 561        */
 562       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 563                                             (gl_state_index *)slots[i].tokens);
 564       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 565
 566       this->uniform_vector_size[this->uniforms] = 0;
 567       /* Add each of the unique swizzled channels of the element.
 568        * This will end up matching the size of the glsl_type of this field.
 569        */
 570       int last_swiz = -1;
 571       for (unsigned int j = 0; j < 4; j++) {
 572          int swiz = GET_SWZ(slots[i].swizzle, j);
 573          last_swiz = swiz;
 574
 575          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 576          if (swiz <= last_swiz)
 577             this->uniform_vector_size[this->uniforms]++;
 578       }
 579       this->uniforms++;
 580    }
 581 }
 582
 583 dst_reg *
 584 vec4_visitor::variable_storage(ir_variable *var)
 585 {
 586    return (dst_reg *)hash_table_find(this->variable_ht, var);
 587 }
 588
 589 void
 590 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
 591 {
 592    ir_expression *expr = ir->as_expression();
 593
 594    *predicate = BRW_PREDICATE_NORMAL;
 595
 596    if (expr) {
 597       src_reg op[2];
 598       vec4_instruction *inst;
 599
 600       assert(expr->get_num_operands() <= 2);
 601       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 602          expr->operands[i]->accept(this);
 603          op[i] = this->result;
 604       }
 605
 606       switch (expr->operation) {
 607       case ir_unop_logic_not:
 608          inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
 609          inst->conditional_mod = BRW_CONDITIONAL_Z;
 610          break;
 611
 612       case ir_binop_logic_xor:
 613          inst = emit(XOR(dst_null_d(), op[0], op[1]));
 614          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 615          break;
 616
 617       case ir_binop_logic_or:
 618          inst = emit(OR(dst_null_d(), op[0], op[1]));
 619          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 620          break;
 621
 622       case ir_binop_logic_and:
 623          inst = emit(AND(dst_null_d(), op[0], op[1]));
 624          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 625          break;
 626
 627       case ir_unop_f2b:
 628          if (intel->gen >= 6) {
 629             emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 630          } else {
 631             inst = emit(MOV(dst_null_f(), op[0]));
 632             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 633          }
 634          break;
 635
 636       case ir_unop_i2b:
 637          if (intel->gen >= 6) {
 638             emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 639          } else {
 640             inst = emit(MOV(dst_null_d(), op[0]));
 641             inst->conditional_mod = BRW_CONDITIONAL_NZ;
 642          }
 643          break;
 644
 645       case ir_binop_all_equal:
 646          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 647          *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 648          break;
 649
 650       case ir_binop_any_nequal:
 651          inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 652          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 653          break;
 654
 655       case ir_unop_any:
 656          inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 657          *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 658          break;
 659
 660       case ir_binop_greater:
 661       case ir_binop_gequal:
 662       case ir_binop_less:
 663       case ir_binop_lequal:
 664       case ir_binop_equal:
 665       case ir_binop_nequal:
 666          emit(CMP(dst_null_d(), op[0], op[1],
 667                   brw_conditional_for_comparison(expr->operation)));
 668          break;
 669
 670       default:
 671          assert(!"not reached");
 672          break;
 673       }
 674       return;
 675    }
 676
 677    ir->accept(this);
 678
 679    if (intel->gen >= 6) {
 680       vec4_instruction *inst = emit(AND(dst_null_d(),
 681                                         this->result, src_reg(1)));
 682       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 683    } else {
 684       vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
 685       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 686    }
 687 }
 688
 689 /**
 690  * Emit a gen6 IF statement with the comparison folded into the IF
 691  * instruction.
 692  */
 693 void
 694 vec4_visitor::emit_if_gen6(ir_if *ir)
 695 {
 696    ir_expression *expr = ir->condition->as_expression();
 697
 698    if (expr) {
 699       src_reg op[2];
 700       dst_reg temp;
 701
 702       assert(expr->get_num_operands() <= 2);
 703       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 704          expr->operands[i]->accept(this);
 705          op[i] = this->result;
 706       }
 707
 708       switch (expr->operation) {
 709       case ir_unop_logic_not:
 710          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
 711          return;
 712
 713       case ir_binop_logic_xor:
 714          emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 715          return;
 716
 717       case ir_binop_logic_or:
 718          temp = dst_reg(this, glsl_type::bool_type);
 719          emit(OR(temp, op[0], op[1]));
 720          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 721          return;
 722
 723       case ir_binop_logic_and:
 724          temp = dst_reg(this, glsl_type::bool_type);
 725          emit(AND(temp, op[0], op[1]));
 726          emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
 727          return;
 728
 729       case ir_unop_f2b:
 730          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 731          return;
 732
 733       case ir_unop_i2b:
 734          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 735          return;
 736
 737       case ir_binop_greater:
 738       case ir_binop_gequal:
 739       case ir_binop_less:
 740       case ir_binop_lequal:
 741       case ir_binop_equal:
 742       case ir_binop_nequal:
 743          emit(IF(op[0], op[1],
 744                  brw_conditional_for_comparison(expr->operation)));
 745          return;
 746
 747       case ir_binop_all_equal:
 748          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
 749          emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
 750          return;
 751
 752       case ir_binop_any_nequal:
 753          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
 754          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 755          return;
 756
 757       case ir_unop_any:
 758          emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 759          emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 760          return;
 761
 762       default:
 763          assert(!"not reached");
 764          emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
 765          return;
 766       }
 767       return;
 768    }
 769
 770    ir->condition->accept(this);
 771
 772    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 773 }
 774
 775 void
 776 vec4_visitor::visit(ir_variable *ir)
 777 {
 778    dst_reg *reg = NULL;
 779
 780    if (variable_storage(ir))
 781       return;
 782
 783    switch (ir->mode) {
 784    case ir_var_in:
 785       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 786
 787       /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
 788        * come in as floating point conversions of the integer values.
 789        */
 790       for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
 791          if (!c->key.gl_fixed_input_size[i])
 792             continue;
 793
 794          dst_reg dst = *reg;
 795          dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
 796          emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
 797       }
 798       break;
 799
 800    case ir_var_out:
 801       reg = new(mem_ctx) dst_reg(this, ir->type);
 802
 803       for (int i = 0; i < type_size(ir->type); i++) {
 804          output_reg[ir->location + i] = *reg;
 805          output_reg[ir->location + i].reg_offset = i;
 806          output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
 807          output_reg_annotation[ir->location + i] = ir->name;
 808       }
 809       break;
 810
 811    case ir_var_auto:
 812    case ir_var_temporary:
 813       reg = new(mem_ctx) dst_reg(this, ir->type);
 814       break;
 815
 816    case ir_var_uniform:
 817       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 818
 819       /* Track how big the whole uniform variable is, in case we need to put a
 820        * copy of its data into pull constants for array access.
 821        */
 822       this->uniform_size[this->uniforms] = type_size(ir->type);
 823
 824       if (!strncmp(ir->name, "gl_", 3)) {
 825          setup_builtin_uniform_values(ir);
 826       } else {
 827          setup_uniform_values(ir->location, ir->type);
 828       }
 829       break;
 830
 831    default:
 832       assert(!"not reached");
 833    }
 834
 835    reg->type = brw_type_for_base_type(ir->type);
 836    hash_table_insert(this->variable_ht, reg, ir);
 837 }
 838
 839 void
 840 vec4_visitor::visit(ir_loop *ir)
 841 {
 842    dst_reg counter;
 843
 844    /* We don't want debugging output to print the whole body of the
 845     * loop as the annotation.
 846     */
 847    this->base_ir = NULL;
 848
 849    if (ir->counter != NULL) {
 850       this->base_ir = ir->counter;
 851       ir->counter->accept(this);
 852       counter = *(variable_storage(ir->counter));
 853
 854       if (ir->from != NULL) {
 855          this->base_ir = ir->from;
 856          ir->from->accept(this);
 857
 858          emit(MOV(counter, this->result));
 859       }
 860    }
 861
 862    emit(BRW_OPCODE_DO);
 863
 864    if (ir->to) {
 865       this->base_ir = ir->to;
 866       ir->to->accept(this);
 867
 868       emit(CMP(dst_null_d(), src_reg(counter), this->result,
 869                brw_conditional_for_comparison(ir->cmp)));
 870
 871       vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
 872       inst->predicate = BRW_PREDICATE_NORMAL;
 873    }
 874
 875    visit_instructions(&ir->body_instructions);
 876
 877
 878    if (ir->increment) {
 879       this->base_ir = ir->increment;
 880       ir->increment->accept(this);
 881       emit(ADD(counter, src_reg(counter), this->result));
 882    }
 883
 884    emit(BRW_OPCODE_WHILE);
 885 }
 886
 887 void
 888 vec4_visitor::visit(ir_loop_jump *ir)
 889 {
 890    switch (ir->mode) {
 891    case ir_loop_jump::jump_break:
 892       emit(BRW_OPCODE_BREAK);
 893       break;
 894    case ir_loop_jump::jump_continue:
 895       emit(BRW_OPCODE_CONTINUE);
 896       break;
 897    }
 898 }
 899
 900
 901 void
 902 vec4_visitor::visit(ir_function_signature *ir)
 903 {
 904    assert(0);
 905    (void)ir;
 906 }
 907
 908 void
 909 vec4_visitor::visit(ir_function *ir)
 910 {
 911    /* Ignore function bodies other than main() -- we shouldn't see calls to
 912     * them since they should all be inlined.
 913     */
 914    if (strcmp(ir->name, "main") == 0) {
 915       const ir_function_signature *sig;
 916       exec_list empty;
 917
 918       sig = ir->matching_signature(&empty);
 919
 920       assert(sig);
 921
 922       visit_instructions(&sig->body);
 923    }
 924 }
 925
 926 GLboolean
 927 vec4_visitor::try_emit_sat(ir_expression *ir)
 928 {
 929    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 930    if (!sat_src)
 931       return false;
 932
 933    sat_src->accept(this);
 934    src_reg src = this->result;
 935
 936    this->result = src_reg(this, ir->type);
 937    vec4_instruction *inst;
 938    inst = emit(MOV(dst_reg(this->result), src));
 939    inst->saturate = true;
 940
 941    return true;
 942 }
 943
 944 void
 945 vec4_visitor::emit_bool_comparison(unsigned int op,
 946                                  dst_reg dst, src_reg src0, src_reg src1)
 947 {
 948    /* original gen4 does destination conversion before comparison. */
 949    if (intel->gen < 5)
 950       dst.type = src0.type;
 951
 952    emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
 953
 954    dst.type = BRW_REGISTER_TYPE_D;
 955    emit(AND(dst, src_reg(dst), src_reg(0x1)));
 956 }
 957
 958 void
 959 vec4_visitor::visit(ir_expression *ir)
 960 {
 961    unsigned int operand;
 962    src_reg op[Elements(ir->operands)];
 963    src_reg result_src;
 964    dst_reg result_dst;
 965    vec4_instruction *inst;
 966
 967    if (try_emit_sat(ir))
 968       return;
 969
 970    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 971       this->result.file = BAD_FILE;
 972       ir->operands[operand]->accept(this);
 973       if (this->result.file == BAD_FILE) {
 974          printf("Failed to get tree for expression operand:\n");
 975          ir->operands[operand]->print();
 976          exit(1);
 977       }
 978       op[operand] = this->result;
 979
 980       /* Matrix expression operands should have been broken down to vector
 981        * operations already.
 982        */
 983       assert(!ir->operands[operand]->type->is_matrix());
 984    }
 985
 986    int vector_elements = ir->operands[0]->type->vector_elements;
 987    if (ir->operands[1]) {
 988       vector_elements = MAX2(vector_elements,
 989                              ir->operands[1]->type->vector_elements);
 990    }
 991
 992    this->result.file = BAD_FILE;
 993
 994    /* Storage for our result.  Ideally for an assignment we'd be using
 995     * the actual storage for the result here, instead.
 996     */
 997    result_src = src_reg(this, ir->type);
 998    /* convenience for the emit functions below. */
 999    result_dst = dst_reg(result_src);
1000    /* If nothing special happens, this is the result. */
1001    this->result = result_src;
1002    /* Limit writes to the channels that will be used by result_src later.
1003     * This does limit this temp's use as a temporary for multi-instruction
1004     * sequences.
1005     */
1006    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1007
1008    switch (ir->operation) {
1009    case ir_unop_logic_not:
1010       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1011        * ones complement of the whole register, not just bit 0.
1012        */
1013       emit(XOR(result_dst, op[0], src_reg(1)));
1014       break;
1015    case ir_unop_neg:
1016       op[0].negate = !op[0].negate;
1017       this->result = op[0];
1018       break;
1019    case ir_unop_abs:
1020       op[0].abs = true;
1021       op[0].negate = false;
1022       this->result = op[0];
1023       break;
1024
1025    case ir_unop_sign:
1026       emit(MOV(result_dst, src_reg(0.0f)));
1027
1028       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1029       inst = emit(MOV(result_dst, src_reg(1.0f)));
1030       inst->predicate = BRW_PREDICATE_NORMAL;
1031
1032       emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1033       inst = emit(MOV(result_dst, src_reg(-1.0f)));
1034       inst->predicate = BRW_PREDICATE_NORMAL;
1035
1036       break;
1037
1038    case ir_unop_rcp:
1039       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1040       break;
1041
1042    case ir_unop_exp2:
1043       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1044       break;
1045    case ir_unop_log2:
1046       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1047       break;
1048    case ir_unop_exp:
1049    case ir_unop_log:
1050       assert(!"not reached: should be handled by ir_explog_to_explog2");
1051       break;
1052    case ir_unop_sin:
1053    case ir_unop_sin_reduced:
1054       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1055       break;
1056    case ir_unop_cos:
1057    case ir_unop_cos_reduced:
1058       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1059       break;
1060
1061    case ir_unop_dFdx:
1062    case ir_unop_dFdy:
1063       assert(!"derivatives not valid in vertex shader");
1064       break;
1065
1066    case ir_unop_noise:
1067       assert(!"not reached: should be handled by lower_noise");
1068       break;
1069
1070    case ir_binop_add:
1071       emit(ADD(result_dst, op[0], op[1]));
1072       break;
1073    case ir_binop_sub:
1074       assert(!"not reached: should be handled by ir_sub_to_add_neg");
1075       break;
1076
1077    case ir_binop_mul:
1078       if (ir->type->is_integer()) {
1079          /* For integer multiplication, the MUL uses the low 16 bits
1080           * of one of the operands (src0 on gen6, src1 on gen7).  The
1081           * MACH accumulates in the contribution of the upper 16 bits
1082           * of that operand.
1083           *
1084           * FINISHME: Emit just the MUL if we know an operand is small
1085           * enough.
1086           */
1087          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1088
1089          emit(MUL(acc, op[0], op[1]));
1090          emit(MACH(dst_null_d(), op[0], op[1]));
1091          emit(MOV(result_dst, src_reg(acc)));
1092       } else {
1093          emit(MUL(result_dst, op[0], op[1]));
1094       }
1095       break;
1096    case ir_binop_div:
1097       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1098    case ir_binop_mod:
1099       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1100       break;
1101
1102    case ir_binop_less:
1103    case ir_binop_greater:
1104    case ir_binop_lequal:
1105    case ir_binop_gequal:
1106    case ir_binop_equal:
1107    case ir_binop_nequal: {
1108       emit(CMP(result_dst, op[0], op[1],
1109                brw_conditional_for_comparison(ir->operation)));
1110       emit(AND(result_dst, result_src, src_reg(0x1)));
1111       break;
1112    }
1113
1114    case ir_binop_all_equal:
1115       /* "==" operator producing a scalar boolean. */
1116       if (ir->operands[0]->type->is_vector() ||
1117           ir->operands[1]->type->is_vector()) {
1118          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1119          emit(MOV(result_dst, src_reg(0)));
1120          inst = emit(MOV(result_dst, src_reg(1)));
1121          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1122       } else {
1123          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1124          emit(AND(result_dst, result_src, src_reg(0x1)));
1125       }
1126       break;
1127    case ir_binop_any_nequal:
1128       /* "!=" operator producing a scalar boolean. */
1129       if (ir->operands[0]->type->is_vector() ||
1130           ir->operands[1]->type->is_vector()) {
1131          emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1132
1133          emit(MOV(result_dst, src_reg(0)));
1134          inst = emit(MOV(result_dst, src_reg(1)));
1135          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1136       } else {
1137          emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1138          emit(AND(result_dst, result_src, src_reg(0x1)));
1139       }
1140       break;
1141
1142    case ir_unop_any:
1143       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1144       emit(MOV(result_dst, src_reg(0)));
1145
1146       inst = emit(MOV(result_dst, src_reg(1)));
1147       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1148       break;
1149
1150    case ir_binop_logic_xor:
1151       emit(XOR(result_dst, op[0], op[1]));
1152       break;
1153
1154    case ir_binop_logic_or:
1155       emit(OR(result_dst, op[0], op[1]));
1156       break;
1157
1158    case ir_binop_logic_and:
1159       emit(AND(result_dst, op[0], op[1]));
1160       break;
1161
1162    case ir_binop_dot:
1163       assert(ir->operands[0]->type->is_vector());
1164       assert(ir->operands[0]->type == ir->operands[1]->type);
1165       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1166       break;
1167
1168    case ir_unop_sqrt:
1169       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1170       break;
1171    case ir_unop_rsq:
1172       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1173       break;
1174    case ir_unop_i2f:
1175    case ir_unop_i2u:
1176    case ir_unop_u2i:
1177    case ir_unop_u2f:
1178    case ir_unop_b2f:
1179    case ir_unop_b2i:
1180    case ir_unop_f2i:
1181       emit(MOV(result_dst, op[0]));
1182       break;
1183    case ir_unop_f2b:
1184    case ir_unop_i2b: {
1185       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1186       emit(AND(result_dst, result_src, src_reg(1)));
1187       break;
1188    }
1189
1190    case ir_unop_trunc:
1191       emit(RNDZ(result_dst, op[0]));
1192       break;
1193    case ir_unop_ceil:
1194       op[0].negate = !op[0].negate;
1195       inst = emit(RNDD(result_dst, op[0]));
1196       this->result.negate = true;
1197       break;
1198    case ir_unop_floor:
1199       inst = emit(RNDD(result_dst, op[0]));
1200       break;
1201    case ir_unop_fract:
1202       inst = emit(FRC(result_dst, op[0]));
1203       break;
1204    case ir_unop_round_even:
1205       emit(RNDE(result_dst, op[0]));
1206       break;
1207
1208    case ir_binop_min:
1209       emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1210
1211       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1212       inst->predicate = BRW_PREDICATE_NORMAL;
1213       break;
1214    case ir_binop_max:
1215       emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1216
1217       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1218       inst->predicate = BRW_PREDICATE_NORMAL;
1219       break;
1220
1221    case ir_binop_pow:
1222       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1223       break;
1224
1225    case ir_unop_bit_not:
1226       inst = emit(NOT(result_dst, op[0]));
1227       break;
1228    case ir_binop_bit_and:
1229       inst = emit(AND(result_dst, op[0], op[1]));
1230       break;
1231    case ir_binop_bit_xor:
1232       inst = emit(XOR(result_dst, op[0], op[1]));
1233       break;
1234    case ir_binop_bit_or:
1235       inst = emit(OR(result_dst, op[0], op[1]));
1236       break;
1237
1238    case ir_binop_lshift:
1239    case ir_binop_rshift:
1240       assert(!"GLSL 1.30 features unsupported");
1241       break;
1242
1243    case ir_quadop_vector:
1244       assert(!"not reached: should be handled by lower_quadop_vector");
1245       break;
1246    }
1247 }
1248
1249
1250 void
1251 vec4_visitor::visit(ir_swizzle *ir)
1252 {
1253    src_reg src;
1254    int i = 0;
1255    int swizzle[4];
1256
1257    /* Note that this is only swizzles in expressions, not those on the left
1258     * hand side of an assignment, which do write masking.  See ir_assignment
1259     * for that.
1260     */
1261
1262    ir->val->accept(this);
1263    src = this->result;
1264    assert(src.file != BAD_FILE);
1265
1266    for (i = 0; i < ir->type->vector_elements; i++) {
1267       switch (i) {
1268       case 0:
1269          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1270          break;
1271       case 1:
1272          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1273          break;
1274       case 2:
1275          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1276          break;
1277       case 3:
1278          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1279             break;
1280       }
1281    }
1282    for (; i < 4; i++) {
1283       /* Replicate the last channel out. */
1284       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1285    }
1286
1287    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1288
1289    this->result = src;
1290 }
1291
1292 void
1293 vec4_visitor::visit(ir_dereference_variable *ir)
1294 {
1295    const struct glsl_type *type = ir->type;
1296    dst_reg *reg = variable_storage(ir->var);
1297
1298    if (!reg) {
1299       fail("Failed to find variable storage for %s\n", ir->var->name);
1300       this->result = src_reg(brw_null_reg());
1301       return;
1302    }
1303
1304    this->result = src_reg(*reg);
1305
1306    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1307       this->result.swizzle = swizzle_for_size(type->vector_elements);
1308 }
1309
1310 void
1311 vec4_visitor::visit(ir_dereference_array *ir)
1312 {
1313    ir_constant *constant_index;
1314    src_reg src;
1315    int element_size = type_size(ir->type);
1316
1317    constant_index = ir->array_index->constant_expression_value();
1318
1319    ir->array->accept(this);
1320    src = this->result;
1321
1322    if (constant_index) {
1323       src.reg_offset += constant_index->value.i[0] * element_size;
1324    } else {
1325       /* Variable index array dereference.  It eats the "vec4" of the
1326        * base of the array and an index that offsets the Mesa register
1327        * index.
1328        */
1329       ir->array_index->accept(this);
1330
1331       src_reg index_reg;
1332
1333       if (element_size == 1) {
1334          index_reg = this->result;
1335       } else {
1336          index_reg = src_reg(this, glsl_type::int_type);
1337
1338          emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1339       }
1340
1341       if (src.reladdr) {
1342          src_reg temp = src_reg(this, glsl_type::int_type);
1343
1344          emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1345
1346          index_reg = temp;
1347       }
1348
1349       src.reladdr = ralloc(mem_ctx, src_reg);
1350       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1351    }
1352
1353    /* If the type is smaller than a vec4, replicate the last channel out. */
1354    if (ir->type->is_scalar() || ir->type->is_vector())
1355       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1356    else
1357       src.swizzle = BRW_SWIZZLE_NOOP;
1358    src.type = brw_type_for_base_type(ir->type);
1359
1360    this->result = src;
1361 }
1362
1363 void
1364 vec4_visitor::visit(ir_dereference_record *ir)
1365 {
1366    unsigned int i;
1367    const glsl_type *struct_type = ir->record->type;
1368    int offset = 0;
1369
1370    ir->record->accept(this);
1371
1372    for (i = 0; i < struct_type->length; i++) {
1373       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1374          break;
1375       offset += type_size(struct_type->fields.structure[i].type);
1376    }
1377
1378    /* If the type is smaller than a vec4, replicate the last channel out. */
1379    if (ir->type->is_scalar() || ir->type->is_vector())
1380       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1381    else
1382       this->result.swizzle = BRW_SWIZZLE_NOOP;
1383    this->result.type = brw_type_for_base_type(ir->type);
1384
1385    this->result.reg_offset += offset;
1386 }
1387
1388 /**
1389  * We want to be careful in assignment setup to hit the actual storage
1390  * instead of potentially using a temporary like we might with the
1391  * ir_dereference handler.
1392  */
1393 static dst_reg
1394 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1395 {
1396    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1397     * access of a vector, it must be separated into a series conditional moves
1398     * before reaching this point (see ir_vec_index_to_cond_assign).
1399     */
1400    assert(ir->as_dereference());
1401    ir_dereference_array *deref_array = ir->as_dereference_array();
1402    if (deref_array) {
1403       assert(!deref_array->array->type->is_vector());
1404    }
1405
1406    /* Use the rvalue deref handler for the most part.  We'll ignore
1407     * swizzles in it and write swizzles using writemask, though.
1408     */
1409    ir->accept(v);
1410    return dst_reg(v->result);
1411 }
1412
1413 void
1414 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1415                               const struct glsl_type *type, uint32_t predicate)
1416 {
1417    if (type->base_type == GLSL_TYPE_STRUCT) {
1418       for (unsigned int i = 0; i < type->length; i++) {
1419          emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1420       }
1421       return;
1422    }
1423
1424    if (type->is_array()) {
1425       for (unsigned int i = 0; i < type->length; i++) {
1426          emit_block_move(dst, src, type->fields.array, predicate);
1427       }
1428       return;
1429    }
1430
1431    if (type->is_matrix()) {
1432       const struct glsl_type *vec_type;
1433
1434       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1435                                          type->vector_elements, 1);
1436
1437       for (int i = 0; i < type->matrix_columns; i++) {
1438          emit_block_move(dst, src, vec_type, predicate);
1439       }
1440       return;
1441    }
1442
1443    assert(type->is_scalar() || type->is_vector());
1444
1445    dst->type = brw_type_for_base_type(type);
1446    src->type = dst->type;
1447
1448    dst->writemask = (1 << type->vector_elements) - 1;
1449
1450    /* Do we need to worry about swizzling a swizzle? */
1451    assert(src->swizzle = BRW_SWIZZLE_NOOP);
1452    src->swizzle = swizzle_for_size(type->vector_elements);
1453
1454    vec4_instruction *inst = emit(MOV(*dst, *src));
1455    inst->predicate = predicate;
1456
1457    dst->reg_offset++;
1458    src->reg_offset++;
1459 }
1460
1461
1462 /* If the RHS processing resulted in an instruction generating a
1463  * temporary value, and it would be easy to rewrite the instruction to
1464  * generate its result right into the LHS instead, do so.  This ends
1465  * up reliably removing instructions where it can be tricky to do so
1466  * later without real UD chain information.
1467  */
1468 bool
1469 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1470                                      dst_reg dst,
1471                                      src_reg src,
1472                                      vec4_instruction *pre_rhs_inst,
1473                                      vec4_instruction *last_rhs_inst)
1474 {
1475    /* This could be supported, but it would take more smarts. */
1476    if (ir->condition)
1477       return false;
1478
1479    if (pre_rhs_inst == last_rhs_inst)
1480       return false; /* No instructions generated to work with. */
1481
1482    /* Make sure the last instruction generated our source reg. */
1483    if (src.file != GRF ||
1484        src.file != last_rhs_inst->dst.file ||
1485        src.reg != last_rhs_inst->dst.reg ||
1486        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1487        src.reladdr ||
1488        src.abs ||
1489        src.negate ||
1490        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1491       return false;
1492
1493    /* Check that that last instruction fully initialized the channels
1494     * we want to use, in the order we want to use them.  We could
1495     * potentially reswizzle the operands of many instructions so that
1496     * we could handle out of order channels, but don't yet.
1497     */
1498    for (int i = 0; i < 4; i++) {
1499       if (dst.writemask & (1 << i)) {
1500          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1501             return false;
1502
1503          if (BRW_GET_SWZ(src.swizzle, i) != i)
1504             return false;
1505       }
1506    }
1507
1508    /* Success!  Rewrite the instruction. */
1509    last_rhs_inst->dst.file = dst.file;
1510    last_rhs_inst->dst.reg = dst.reg;
1511    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1512    last_rhs_inst->dst.reladdr = dst.reladdr;
1513    last_rhs_inst->dst.writemask &= dst.writemask;
1514
1515    return true;
1516 }
1517
1518 void
1519 vec4_visitor::visit(ir_assignment *ir)
1520 {
1521    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1522    uint32_t predicate = BRW_PREDICATE_NONE;
1523
1524    if (!ir->lhs->type->is_scalar() &&
1525        !ir->lhs->type->is_vector()) {
1526       ir->rhs->accept(this);
1527       src_reg src = this->result;
1528
1529       if (ir->condition) {
1530          emit_bool_to_cond_code(ir->condition, &predicate);
1531       }
1532
1533       emit_block_move(&dst, &src, ir->rhs->type, predicate);
1534       return;
1535    }
1536
1537    /* Now we're down to just a scalar/vector with writemasks. */
1538    int i;
1539
1540    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1541    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1542
1543    ir->rhs->accept(this);
1544
1545    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1546
1547    src_reg src = this->result;
1548
1549    int swizzles[4];
1550    int first_enabled_chan = 0;
1551    int src_chan = 0;
1552
1553    assert(ir->lhs->type->is_vector() ||
1554           ir->lhs->type->is_scalar());
1555    dst.writemask = ir->write_mask;
1556
1557    for (int i = 0; i < 4; i++) {
1558       if (dst.writemask & (1 << i)) {
1559          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1560          break;
1561       }
1562    }
1563
1564    /* Swizzle a small RHS vector into the channels being written.
1565     *
1566     * glsl ir treats write_mask as dictating how many channels are
1567     * present on the RHS while in our instructions we need to make
1568     * those channels appear in the slots of the vec4 they're written to.
1569     */
1570    for (int i = 0; i < 4; i++) {
1571       if (dst.writemask & (1 << i))
1572          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1573       else
1574          swizzles[i] = first_enabled_chan;
1575    }
1576    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1577                               swizzles[2], swizzles[3]);
1578
1579    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1580       return;
1581    }
1582
1583    if (ir->condition) {
1584       emit_bool_to_cond_code(ir->condition, &predicate);
1585    }
1586
1587    for (i = 0; i < type_size(ir->lhs->type); i++) {
1588       vec4_instruction *inst = emit(MOV(dst, src));
1589       inst->predicate = predicate;
1590
1591       dst.reg_offset++;
1592       src.reg_offset++;
1593    }
1594 }
1595
1596 void
1597 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1598 {
1599    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1600       foreach_list(node, &ir->components) {
1601          ir_constant *field_value = (ir_constant *)node;
1602
1603          emit_constant_values(dst, field_value);
1604       }
1605       return;
1606    }
1607
1608    if (ir->type->is_array()) {
1609       for (unsigned int i = 0; i < ir->type->length; i++) {
1610          emit_constant_values(dst, ir->array_elements[i]);
1611       }
1612       return;
1613    }
1614
1615    if (ir->type->is_matrix()) {
1616       for (int i = 0; i < ir->type->matrix_columns; i++) {
1617          for (int j = 0; j < ir->type->vector_elements; j++) {
1618             dst->writemask = 1 << j;
1619             dst->type = BRW_REGISTER_TYPE_F;
1620
1621             emit(MOV(*dst,
1622                      src_reg(ir->value.f[i * ir->type->vector_elements + j])));
1623          }
1624          dst->reg_offset++;
1625       }
1626       return;
1627    }
1628
1629    for (int i = 0; i < ir->type->vector_elements; i++) {
1630       dst->writemask = 1 << i;
1631       dst->type = brw_type_for_base_type(ir->type);
1632
1633       switch (ir->type->base_type) {
1634       case GLSL_TYPE_FLOAT:
1635          emit(MOV(*dst, src_reg(ir->value.f[i])));
1636          break;
1637       case GLSL_TYPE_INT:
1638          emit(MOV(*dst, src_reg(ir->value.i[i])));
1639          break;
1640       case GLSL_TYPE_UINT:
1641          emit(MOV(*dst, src_reg(ir->value.u[i])));
1642          break;
1643       case GLSL_TYPE_BOOL:
1644          emit(MOV(*dst, src_reg(ir->value.b[i])));
1645          break;
1646       default:
1647          assert(!"Non-float/uint/int/bool constant");
1648          break;
1649       }
1650    }
1651    dst->reg_offset++;
1652 }
1653
1654 void
1655 vec4_visitor::visit(ir_constant *ir)
1656 {
1657    dst_reg dst = dst_reg(this, ir->type);
1658    this->result = src_reg(dst);
1659
1660    emit_constant_values(&dst, ir);
1661 }
1662
1663 void
1664 vec4_visitor::visit(ir_call *ir)
1665 {
1666    assert(!"not reached");
1667 }
1668
1669 void
1670 vec4_visitor::visit(ir_texture *ir)
1671 {
1672    /* FINISHME: Implement vertex texturing.
1673     *
1674     * With 0 vertex samplers available, the linker will reject
1675     * programs that do vertex texturing, but after our visitor has
1676     * run.
1677     */
1678 }
1679
1680 void
1681 vec4_visitor::visit(ir_return *ir)
1682 {
1683    assert(!"not reached");
1684 }
1685
1686 void
1687 vec4_visitor::visit(ir_discard *ir)
1688 {
1689    assert(!"not reached");
1690 }
1691
1692 void
1693 vec4_visitor::visit(ir_if *ir)
1694 {
1695    /* Don't point the annotation at the if statement, because then it plus
1696     * the then and else blocks get printed.
1697     */
1698    this->base_ir = ir->condition;
1699
1700    if (intel->gen == 6) {
1701       emit_if_gen6(ir);
1702    } else {
1703       uint32_t predicate;
1704       emit_bool_to_cond_code(ir->condition, &predicate);
1705       emit(IF(predicate));
1706    }
1707
1708    visit_instructions(&ir->then_instructions);
1709
1710    if (!ir->else_instructions.is_empty()) {
1711       this->base_ir = ir->condition;
1712       emit(BRW_OPCODE_ELSE);
1713
1714       visit_instructions(&ir->else_instructions);
1715    }
1716
1717    this->base_ir = ir->condition;
1718    emit(BRW_OPCODE_ENDIF);
1719 }
1720
1721 void
1722 vec4_visitor::emit_ndc_computation()
1723 {
1724    /* Get the position */
1725    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1726
1727    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1728    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1729    output_reg[BRW_VERT_RESULT_NDC] = ndc;
1730
1731    current_annotation = "NDC";
1732    dst_reg ndc_w = ndc;
1733    ndc_w.writemask = WRITEMASK_W;
1734    src_reg pos_w = pos;
1735    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1736    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1737
1738    dst_reg ndc_xyz = ndc;
1739    ndc_xyz.writemask = WRITEMASK_XYZ;
1740
1741    emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1742 }
1743
1744 void
1745 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
1746 {
1747    if (intel->gen < 6 &&
1748        ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1749         c->key.nr_userclip || brw->has_negative_rhw_bug)) {
1750       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1751       GLuint i;
1752
1753       emit(MOV(header1, 0u));
1754
1755       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1756          src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
1757
1758          current_annotation = "Point size";
1759          header1.writemask = WRITEMASK_W;
1760          emit(MUL(header1, psiz, src_reg((float)(1 << 11))));
1761          emit(AND(header1, src_reg(header1), 0x7ff << 8));
1762       }
1763
1764       current_annotation = "Clipping flags";
1765       for (i = 0; i < c->key.nr_userclip; i++) {
1766          vec4_instruction *inst;
1767
1768          inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
1769                          src_reg(c->userplane[i])));
1770          inst->conditional_mod = BRW_CONDITIONAL_L;
1771
1772          emit(OR(header1, src_reg(header1), 1u << i));
1773          inst->predicate = BRW_PREDICATE_NORMAL;
1774       }
1775
1776       /* i965 clipping workaround:
1777        * 1) Test for -ve rhw
1778        * 2) If set,
1779        *      set ndc = (0,0,0,0)
1780        *      set ucp[6] = 1
1781        *
1782        * Later, clipping will detect ucp[6] and ensure the primitive is
1783        * clipped against all fixed planes.
1784        */
1785       if (brw->has_negative_rhw_bug) {
1786 #if 0
1787          /* FINISHME */
1788          brw_CMP(p,
1789                  vec8(brw_null_reg()),
1790                  BRW_CONDITIONAL_L,
1791                  brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
1792                  brw_imm_f(0));
1793
1794          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1795          brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
1796          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1797 #endif
1798       }
1799
1800       header1.writemask = WRITEMASK_XYZW;
1801       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1802    } else if (intel->gen < 6) {
1803       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1804    } else {
1805       emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1806       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1807          emit(MOV(brw_writemask(reg, WRITEMASK_W),
1808                   src_reg(output_reg[VERT_RESULT_PSIZ])));
1809       }
1810    }
1811 }
1812
1813 void
1814 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
1815 {
1816    if (intel->gen < 6) {
1817       /* Clip distance slots are set aside in gen5, but they are not used.  It
1818        * is not clear whether we actually need to set aside space for them,
1819        * but the performance cost is negligible.
1820        */
1821       return;
1822    }
1823
1824    for (int i = 0; i + offset < c->key.nr_userclip && i < 4; ++i) {
1825       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
1826                src_reg(output_reg[VERT_RESULT_HPOS]),
1827                src_reg(c->userplane[i + offset])));
1828    }
1829 }
1830
1831 void
1832 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
1833 {
1834    struct brw_reg hw_reg = brw_message_reg(mrf);
1835    dst_reg reg = dst_reg(MRF, mrf);
1836    reg.type = BRW_REGISTER_TYPE_F;
1837
1838    switch (vert_result) {
1839    case VERT_RESULT_PSIZ:
1840       /* PSIZ is always in slot 0, and is coupled with other flags. */
1841       current_annotation = "indices, point width, clip flags";
1842       emit_psiz_and_flags(hw_reg);
1843       break;
1844    case BRW_VERT_RESULT_NDC:
1845       current_annotation = "NDC";
1846       emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
1847       break;
1848    case BRW_VERT_RESULT_HPOS_DUPLICATE:
1849    case VERT_RESULT_HPOS:
1850       current_annotation = "gl_Position";
1851       emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
1852       break;
1853    case BRW_VERT_RESULT_CLIP0:
1854       current_annotation = "user clip distances";
1855       emit_clip_distances(hw_reg, 0);
1856       break;
1857    case BRW_VERT_RESULT_CLIP1:
1858       current_annotation = "user clip distances";
1859       emit_clip_distances(hw_reg, 4);
1860       break;
1861    case BRW_VERT_RESULT_PAD:
1862       /* No need to write to this slot */
1863       break;
1864    default: {
1865       assert (vert_result < VERT_RESULT_MAX);
1866       current_annotation = output_reg_annotation[vert_result];
1867       /* Copy the register, saturating if necessary */
1868       vec4_instruction *inst = emit(MOV(reg,
1869                                         src_reg(output_reg[vert_result])));
1870       if ((vert_result == VERT_RESULT_COL0 ||
1871            vert_result == VERT_RESULT_COL1 ||
1872            vert_result == VERT_RESULT_BFC0 ||
1873            vert_result == VERT_RESULT_BFC1) &&
1874           c->key.clamp_vertex_color) {
1875          inst->saturate = true;
1876       }
1877    }
1878       break;
1879    }
1880 }
1881
1882 static int
1883 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1884 {
1885    struct intel_context *intel = &brw->intel;
1886
1887    if (intel->gen >= 6) {
1888       /* URB data written (does not include the message header reg) must
1889        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1890        * section 5.4.3.2.2: URB_INTERLEAVED.
1891        *
1892        * URB entries are allocated on a multiple of 1024 bits, so an
1893        * extra 128 bits written here to make the end align to 256 is
1894        * no problem.
1895        */
1896       if ((mlen % 2) != 1)
1897          mlen++;
1898    }
1899
1900    return mlen;
1901 }
1902
1903 /**
1904  * Generates the VUE payload plus the 1 or 2 URB write instructions to
1905  * complete the VS thread.
1906  *
1907  * The VUE layout is documented in Volume 2a.
1908  */
1909 void
1910 vec4_visitor::emit_urb_writes()
1911 {
1912    /* MRF 0 is reserved for the debugger, so start with message header
1913     * in MRF 1.
1914     */
1915    int base_mrf = 1;
1916    int mrf = base_mrf;
1917    /* In the process of generating our URB write message contents, we
1918     * may need to unspill a register or load from an array.  Those
1919     * reads would use MRFs 14-15.
1920     */
1921    int max_usable_mrf = 13;
1922
1923    /* The following assertion verifies that max_usable_mrf causes an
1924     * even-numbered amount of URB write data, which will meet gen6's
1925     * requirements for length alignment.
1926     */
1927    assert ((max_usable_mrf - base_mrf) % 2 == 0);
1928
1929    /* FINISHME: edgeflag */
1930
1931    brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
1932                        c->prog_data.outputs_written);
1933
1934    /* First mrf is the g0-based message header containing URB handles and such,
1935     * which is implied in VS_OPCODE_URB_WRITE.
1936     */
1937    mrf++;
1938
1939    if (intel->gen < 6) {
1940       emit_ndc_computation();
1941    }
1942
1943    /* Set up the VUE data for the first URB write */
1944    int slot;
1945    for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
1946       emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1947
1948       /* If this was max_usable_mrf, we can't fit anything more into this URB
1949        * WRITE.
1950        */
1951       if (mrf > max_usable_mrf) {
1952          slot++;
1953          break;
1954       }
1955    }
1956
1957    current_annotation = "URB write";
1958    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1959    inst->base_mrf = base_mrf;
1960    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1961    inst->eot = (slot >= c->vue_map.num_slots);
1962
1963    /* Optional second URB write */
1964    if (!inst->eot) {
1965       mrf = base_mrf + 1;
1966
1967       for (; slot < c->vue_map.num_slots; ++slot) {
1968          assert(mrf < max_usable_mrf);
1969
1970          emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1971       }
1972
1973       current_annotation = "URB write";
1974       inst = emit(VS_OPCODE_URB_WRITE);
1975       inst->base_mrf = base_mrf;
1976       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1977       inst->eot = true;
1978       /* URB destination offset.  In the previous write, we got MRFs
1979        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1980        * URB row increments, and each of our MRFs is half of one of
1981        * those, since we're doing interleaved writes.
1982        */
1983       inst->offset = (max_usable_mrf - base_mrf) / 2;
1984    }
1985
1986    if (intel->gen == 6)
1987       c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 8) / 8;
1988    else
1989       c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 4) / 4;
1990 }
1991
1992 src_reg
1993 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1994                                  src_reg *reladdr, int reg_offset)
1995 {
1996    /* Because we store the values to scratch interleaved like our
1997     * vertex data, we need to scale the vec4 index by 2.
1998     */
1999    int message_header_scale = 2;
2000
2001    /* Pre-gen6, the message header uses byte offsets instead of vec4
2002     * (16-byte) offset units.
2003     */
2004    if (intel->gen < 6)
2005       message_header_scale *= 16;
2006
2007    if (reladdr) {
2008       src_reg index = src_reg(this, glsl_type::int_type);
2009
2010       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2011       emit_before(inst, MUL(dst_reg(index),
2012                             index, src_reg(message_header_scale)));
2013
2014       return index;
2015    } else {
2016       return src_reg(reg_offset * message_header_scale);
2017    }
2018 }
2019
2020 src_reg
2021 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2022                                        src_reg *reladdr, int reg_offset)
2023 {
2024    if (reladdr) {
2025       src_reg index = src_reg(this, glsl_type::int_type);
2026
2027       emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2028
2029       /* Pre-gen6, the message header uses byte offsets instead of vec4
2030        * (16-byte) offset units.
2031        */
2032       if (intel->gen < 6) {
2033          emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2034       }
2035
2036       return index;
2037    } else {
2038       int message_header_scale = intel->gen < 6 ? 16 : 1;
2039       return src_reg(reg_offset * message_header_scale);
2040    }
2041 }
2042
2043 /**
2044  * Emits an instruction before @inst to load the value named by @orig_src
2045  * from scratch space at @base_offset to @temp.
2046  */
2047 void
2048 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2049                                 dst_reg temp, src_reg orig_src,
2050                                 int base_offset)
2051 {
2052    int reg_offset = base_offset + orig_src.reg_offset;
2053    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2054
2055    emit_before(inst, SCRATCH_READ(temp, index));
2056 }
2057
2058 /**
2059  * Emits an instruction after @inst to store the value to be written
2060  * to @orig_dst to scratch space at @base_offset, from @temp.
2061  */
2062 void
2063 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2064                                  src_reg temp, dst_reg orig_dst,
2065                                  int base_offset)
2066 {
2067    int reg_offset = base_offset + orig_dst.reg_offset;
2068    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2069
2070    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2071                                        orig_dst.writemask));
2072    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2073    write->predicate = inst->predicate;
2074    write->ir = inst->ir;
2075    write->annotation = inst->annotation;
2076    inst->insert_after(write);
2077 }
2078
2079 /**
2080  * We can't generally support array access in GRF space, because a
2081  * single instruction's destination can only span 2 contiguous
2082  * registers.  So, we send all GRF arrays that get variable index
2083  * access to scratch space.
2084  */
2085 void
2086 vec4_visitor::move_grf_array_access_to_scratch()
2087 {
2088    int scratch_loc[this->virtual_grf_count];
2089
2090    for (int i = 0; i < this->virtual_grf_count; i++) {
2091       scratch_loc[i] = -1;
2092    }
2093
2094    /* First, calculate the set of virtual GRFs that need to be punted
2095     * to scratch due to having any array access on them, and where in
2096     * scratch.
2097     */
2098    foreach_list(node, &this->instructions) {
2099       vec4_instruction *inst = (vec4_instruction *)node;
2100
2101       if (inst->dst.file == GRF && inst->dst.reladdr &&
2102           scratch_loc[inst->dst.reg] == -1) {
2103          scratch_loc[inst->dst.reg] = c->last_scratch;
2104          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2105       }
2106
2107       for (int i = 0 ; i < 3; i++) {
2108          src_reg *src = &inst->src[i];
2109
2110          if (src->file == GRF && src->reladdr &&
2111              scratch_loc[src->reg] == -1) {
2112             scratch_loc[src->reg] = c->last_scratch;
2113             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2114          }
2115       }
2116    }
2117
2118    /* Now, for anything that will be accessed through scratch, rewrite
2119     * it to load/store.  Note that this is a _safe list walk, because
2120     * we may generate a new scratch_write instruction after the one
2121     * we're processing.
2122     */
2123    foreach_list_safe(node, &this->instructions) {
2124       vec4_instruction *inst = (vec4_instruction *)node;
2125
2126       /* Set up the annotation tracking for new generated instructions. */
2127       base_ir = inst->ir;
2128       current_annotation = inst->annotation;
2129
2130       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2131          src_reg temp = src_reg(this, glsl_type::vec4_type);
2132
2133          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2134
2135          inst->dst.file = temp.file;
2136          inst->dst.reg = temp.reg;
2137          inst->dst.reg_offset = temp.reg_offset;
2138          inst->dst.reladdr = NULL;
2139       }
2140
2141       for (int i = 0 ; i < 3; i++) {
2142          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2143             continue;
2144
2145          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2146
2147          emit_scratch_read(inst, temp, inst->src[i],
2148                            scratch_loc[inst->src[i].reg]);
2149
2150          inst->src[i].file = temp.file;
2151          inst->src[i].reg = temp.reg;
2152          inst->src[i].reg_offset = temp.reg_offset;
2153          inst->src[i].reladdr = NULL;
2154       }
2155    }
2156 }
2157
2158 /**
2159  * Emits an instruction before @inst to load the value named by @orig_src
2160  * from the pull constant buffer (surface) at @base_offset to @temp.
2161  */
2162 void
2163 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2164                                       dst_reg temp, src_reg orig_src,
2165                                       int base_offset)
2166 {
2167    int reg_offset = base_offset + orig_src.reg_offset;
2168    src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2169    vec4_instruction *load;
2170
2171    load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2172                                         temp, index);
2173    load->base_mrf = 14;
2174    load->mlen = 1;
2175    emit_before(inst, load);
2176 }
2177
2178 /**
2179  * Implements array access of uniforms by inserting a
2180  * PULL_CONSTANT_LOAD instruction.
2181  *
2182  * Unlike temporary GRF array access (where we don't support it due to
2183  * the difficulty of doing relative addressing on instruction
2184  * destinations), we could potentially do array access of uniforms
2185  * that were loaded in GRF space as push constants.  In real-world
2186  * usage we've seen, though, the arrays being used are always larger
2187  * than we could load as push constants, so just always move all
2188  * uniform array access out to a pull constant buffer.
2189  */
2190 void
2191 vec4_visitor::move_uniform_array_access_to_pull_constants()
2192 {
2193    int pull_constant_loc[this->uniforms];
2194
2195    for (int i = 0; i < this->uniforms; i++) {
2196       pull_constant_loc[i] = -1;
2197    }
2198
2199    /* Walk through and find array access of uniforms.  Put a copy of that
2200     * uniform in the pull constant buffer.
2201     *
2202     * Note that we don't move constant-indexed accesses to arrays.  No
2203     * testing has been done of the performance impact of this choice.
2204     */
2205    foreach_list_safe(node, &this->instructions) {
2206       vec4_instruction *inst = (vec4_instruction *)node;
2207
2208       for (int i = 0 ; i < 3; i++) {
2209          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2210             continue;
2211
2212          int uniform = inst->src[i].reg;
2213
2214          /* If this array isn't already present in the pull constant buffer,
2215           * add it.
2216           */
2217          if (pull_constant_loc[uniform] == -1) {
2218             const float **values = &prog_data->param[uniform * 4];
2219
2220             pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2221
2222             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2223                prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2224             }
2225          }
2226
2227          /* Set up the annotation tracking for new generated instructions. */
2228          base_ir = inst->ir;
2229          current_annotation = inst->annotation;
2230
2231          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2232
2233          emit_pull_constant_load(inst, temp, inst->src[i],
2234                                  pull_constant_loc[uniform]);
2235
2236          inst->src[i].file = temp.file;
2237          inst->src[i].reg = temp.reg;
2238          inst->src[i].reg_offset = temp.reg_offset;
2239          inst->src[i].reladdr = NULL;
2240       }
2241    }
2242
2243    /* Now there are no accesses of the UNIFORM file with a reladdr, so
2244     * no need to track them as larger-than-vec4 objects.  This will be
2245     * relied on in cutting out unused uniform vectors from push
2246     * constants.
2247     */
2248    split_uniform_registers();
2249 }
2250
2251 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2252                            struct gl_shader_program *prog,
2253                            struct brw_shader *shader)
2254 {
2255    this->c = c;
2256    this->p = &c->func;
2257    this->brw = p->brw;
2258    this->intel = &brw->intel;
2259    this->ctx = &intel->ctx;
2260    this->prog = prog;
2261    this->shader = shader;
2262
2263    this->mem_ctx = ralloc_context(NULL);
2264    this->failed = false;
2265
2266    this->base_ir = NULL;
2267    this->current_annotation = NULL;
2268
2269    this->c = c;
2270    this->vp = prog->VertexProgram;
2271    this->prog_data = &c->prog_data;
2272
2273    this->variable_ht = hash_table_ctor(0,
2274                                        hash_table_pointer_hash,
2275                                        hash_table_pointer_compare);
2276
2277    this->virtual_grf_def = NULL;
2278    this->virtual_grf_use = NULL;
2279    this->virtual_grf_sizes = NULL;
2280    this->virtual_grf_count = 0;
2281    this->virtual_grf_reg_map = NULL;
2282    this->virtual_grf_reg_count = 0;
2283    this->virtual_grf_array_size = 0;
2284    this->live_intervals_valid = false;
2285
2286    this->uniforms = 0;
2287
2288    this->variable_ht = hash_table_ctor(0,
2289                                        hash_table_pointer_hash,
2290                                        hash_table_pointer_compare);
2291 }
2292
2293 vec4_visitor::~vec4_visitor()
2294 {
2295    ralloc_free(this->mem_ctx);
2296    hash_table_dtor(this->variable_ht);
2297 }
2298
2299
2300 void
2301 vec4_visitor::fail(const char *format, ...)
2302 {
2303    va_list va;
2304    char *msg;
2305
2306    if (failed)
2307       return;
2308
2309    failed = true;
2310
2311    va_start(va, format);
2312    msg = ralloc_vasprintf(mem_ctx, format, va);
2313    va_end(va);
2314    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2315
2316    this->fail_msg = msg;
2317
2318    if (INTEL_DEBUG & DEBUG_VS) {
2319       fprintf(stderr, "%s",  msg);
2320    }
2321 }
2322
2323 } /* namespace brw */