src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 }
  29
  30 namespace brw {
  31
  32 src_reg::src_reg(dst_reg reg)
  33 {
  34    init();
  35
  36    this->file = reg.file;
  37    this->reg = reg.reg;
  38    this->reg_offset = reg.reg_offset;
  39    this->type = reg.type;
  40    this->reladdr = reg.reladdr;
  41    this->fixed_hw_reg = reg.fixed_hw_reg;
  42
  43    int swizzles[4];
  44    int next_chan = 0;
  45    int last = 0;
  46
  47    for (int i = 0; i < 4; i++) {
  48       if (!(reg.writemask & (1 << i)))
  49          continue;
  50
  51       swizzles[next_chan++] = last = i;
  52    }
  53
  54    for (; next_chan < 4; next_chan++) {
  55       swizzles[next_chan] = last;
  56    }
  57
  58    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  59                                 swizzles[2], swizzles[3]);
  60 }
  61
  62 dst_reg::dst_reg(src_reg reg)
  63 {
  64    init();
  65
  66    this->file = reg.file;
  67    this->reg = reg.reg;
  68    this->reg_offset = reg.reg_offset;
  69    this->type = reg.type;
  70    this->writemask = WRITEMASK_XYZW;
  71    this->reladdr = reg.reladdr;
  72    this->fixed_hw_reg = reg.fixed_hw_reg;
  73 }
  74
  75 vec4_instruction::vec4_instruction(vec4_visitor *v,
  76                                    enum opcode opcode, dst_reg dst,
  77                                    src_reg src0, src_reg src1, src_reg src2)
  78 {
  79    this->opcode = opcode;
  80    this->dst = dst;
  81    this->src[0] = src0;
  82    this->src[1] = src1;
  83    this->src[2] = src2;
  84    this->ir = v->base_ir;
  85    this->annotation = v->current_annotation;
  86 }
  87
  88 vec4_instruction *
  89 vec4_visitor::emit(vec4_instruction *inst)
  90 {
  91    this->instructions.push_tail(inst);
  92
  93    return inst;
  94 }
  95
  96 vec4_instruction *
  97 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  98                    src_reg src0, src_reg src1, src_reg src2)
  99 {
 100    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
 101                                              src0, src1, src2));
 102 }
 103
 104
 105 vec4_instruction *
 106 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
 107 {
 108    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
 109 }
 110
 111 vec4_instruction *
 112 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 113 {
 114    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
 115 }
 116
 117 vec4_instruction *
 118 vec4_visitor::emit(enum opcode opcode)
 119 {
 120    return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
 121 }
 122
 123 void
 124 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 125 {
 126    static enum opcode dot_opcodes[] = {
 127       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 128    };
 129
 130    emit(dot_opcodes[elements - 2], dst, src0, src1);
 131 }
 132
 133 void
 134 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 135 {
 136    /* The gen6 math instruction ignores the source modifiers --
 137     * swizzle, abs, negate, and at least some parts of the register
 138     * region description.
 139     */
 140    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 141    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 142
 143    if (dst.writemask != WRITEMASK_XYZW) {
 144       /* The gen6 math instruction must be align1, so we can't do
 145        * writemasks.
 146        */
 147       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 148
 149       emit(opcode, temp_dst, temp_src);
 150
 151       emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
 152    } else {
 153       emit(opcode, dst, temp_src);
 154    }
 155 }
 156
 157 void
 158 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 159 {
 160    vec4_instruction *inst = emit(opcode, dst, src);
 161    inst->base_mrf = 1;
 162    inst->mlen = 1;
 163 }
 164
 165 void
 166 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 167 {
 168    switch (opcode) {
 169    case SHADER_OPCODE_RCP:
 170    case SHADER_OPCODE_RSQ:
 171    case SHADER_OPCODE_SQRT:
 172    case SHADER_OPCODE_EXP2:
 173    case SHADER_OPCODE_LOG2:
 174    case SHADER_OPCODE_SIN:
 175    case SHADER_OPCODE_COS:
 176       break;
 177    default:
 178       assert(!"not reached: bad math opcode");
 179       return;
 180    }
 181
 182    if (intel->gen >= 6) {
 183       return emit_math1_gen6(opcode, dst, src);
 184    } else {
 185       return emit_math1_gen4(opcode, dst, src);
 186    }
 187 }
 188
 189 void
 190 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 191                               dst_reg dst, src_reg src0, src_reg src1)
 192 {
 193    src_reg expanded;
 194
 195    /* The gen6 math instruction ignores the source modifiers --
 196     * swizzle, abs, negate, and at least some parts of the register
 197     * region description.  Move the sources to temporaries to make it
 198     * generally work.
 199     */
 200
 201    expanded = src_reg(this, glsl_type::vec4_type);
 202    emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
 203    src0 = expanded;
 204
 205    expanded = src_reg(this, glsl_type::vec4_type);
 206    emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
 207    src1 = expanded;
 208
 209    if (dst.writemask != WRITEMASK_XYZW) {
 210       /* The gen6 math instruction must be align1, so we can't do
 211        * writemasks.
 212        */
 213       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 214
 215       emit(opcode, temp_dst, src0, src1);
 216
 217       emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
 218    } else {
 219       emit(opcode, dst, src0, src1);
 220    }
 221 }
 222
 223 void
 224 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 225                               dst_reg dst, src_reg src0, src_reg src1)
 226 {
 227    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 228    inst->base_mrf = 1;
 229    inst->mlen = 2;
 230 }
 231
 232 void
 233 vec4_visitor::emit_math(enum opcode opcode,
 234                         dst_reg dst, src_reg src0, src_reg src1)
 235 {
 236    assert(opcode == SHADER_OPCODE_POW);
 237
 238    if (intel->gen >= 6) {
 239       return emit_math2_gen6(opcode, dst, src0, src1);
 240    } else {
 241       return emit_math2_gen4(opcode, dst, src0, src1);
 242    }
 243 }
 244
 245 void
 246 vec4_visitor::visit_instructions(const exec_list *list)
 247 {
 248    foreach_list(node, list) {
 249       ir_instruction *ir = (ir_instruction *)node;
 250
 251       base_ir = ir;
 252       ir->accept(this);
 253    }
 254 }
 255
 256
 257 static int
 258 type_size(const struct glsl_type *type)
 259 {
 260    unsigned int i;
 261    int size;
 262
 263    switch (type->base_type) {
 264    case GLSL_TYPE_UINT:
 265    case GLSL_TYPE_INT:
 266    case GLSL_TYPE_FLOAT:
 267    case GLSL_TYPE_BOOL:
 268       if (type->is_matrix()) {
 269          return type->matrix_columns;
 270       } else {
 271          /* Regardless of size of vector, it gets a vec4. This is bad
 272           * packing for things like floats, but otherwise arrays become a
 273           * mess.  Hopefully a later pass over the code can pack scalars
 274           * down if appropriate.
 275           */
 276          return 1;
 277       }
 278    case GLSL_TYPE_ARRAY:
 279       assert(type->length > 0);
 280       return type_size(type->fields.array) * type->length;
 281    case GLSL_TYPE_STRUCT:
 282       size = 0;
 283       for (i = 0; i < type->length; i++) {
 284          size += type_size(type->fields.structure[i].type);
 285       }
 286       return size;
 287    case GLSL_TYPE_SAMPLER:
 288       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 289        * at link time.
 290        */
 291       return 1;
 292    default:
 293       assert(0);
 294       return 0;
 295    }
 296 }
 297
 298 int
 299 vec4_visitor::virtual_grf_alloc(int size)
 300 {
 301    if (virtual_grf_array_size <= virtual_grf_count) {
 302       if (virtual_grf_array_size == 0)
 303          virtual_grf_array_size = 16;
 304       else
 305          virtual_grf_array_size *= 2;
 306       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 307                                    virtual_grf_array_size);
 308    }
 309    virtual_grf_sizes[virtual_grf_count] = size;
 310    return virtual_grf_count++;
 311 }
 312
 313 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 314 {
 315    init();
 316
 317    this->file = GRF;
 318    this->reg = v->virtual_grf_alloc(type_size(type));
 319
 320    if (type->is_array() || type->is_record()) {
 321       this->swizzle = BRW_SWIZZLE_NOOP;
 322    } else {
 323       this->swizzle = swizzle_for_size(type->vector_elements);
 324    }
 325
 326    this->type = brw_type_for_base_type(type);
 327 }
 328
 329 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 330 {
 331    init();
 332
 333    this->file = GRF;
 334    this->reg = v->virtual_grf_alloc(type_size(type));
 335
 336    if (type->is_array() || type->is_record()) {
 337       this->writemask = WRITEMASK_XYZW;
 338    } else {
 339       this->writemask = (1 << type->vector_elements) - 1;
 340    }
 341
 342    this->type = brw_type_for_base_type(type);
 343 }
 344
 345 /* Our support for uniforms is piggy-backed on the struct
 346  * gl_fragment_program, because that's where the values actually
 347  * get stored, rather than in some global gl_shader_program uniform
 348  * store.
 349  */
 350 int
 351 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 352 {
 353    unsigned int offset = 0;
 354    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 355
 356    if (type->is_matrix()) {
 357       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 358                                                         type->vector_elements,
 359                                                         1);
 360
 361       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 362          offset += setup_uniform_values(loc + offset, column);
 363       }
 364
 365       return offset;
 366    }
 367
 368    switch (type->base_type) {
 369    case GLSL_TYPE_FLOAT:
 370    case GLSL_TYPE_UINT:
 371    case GLSL_TYPE_INT:
 372    case GLSL_TYPE_BOOL:
 373       for (unsigned int i = 0; i < type->vector_elements; i++) {
 374          c->prog_data.param[this->uniforms * 4 + i] = &values[i];
 375       }
 376
 377       /* Set up pad elements to get things aligned to a vec4 boundary. */
 378       for (unsigned int i = type->vector_elements; i < 4; i++) {
 379          static float zero = 0;
 380
 381          c->prog_data.param[this->uniforms * 4 + i] = &zero;
 382       }
 383
 384       /* Track the size of this uniform vector, for future packing of
 385        * uniforms.
 386        */
 387       this->uniform_vector_size[this->uniforms] = type->vector_elements;
 388       this->uniforms++;
 389
 390       return 1;
 391
 392    case GLSL_TYPE_STRUCT:
 393       for (unsigned int i = 0; i < type->length; i++) {
 394          offset += setup_uniform_values(loc + offset,
 395                                         type->fields.structure[i].type);
 396       }
 397       return offset;
 398
 399    case GLSL_TYPE_ARRAY:
 400       for (unsigned int i = 0; i < type->length; i++) {
 401          offset += setup_uniform_values(loc + offset, type->fields.array);
 402       }
 403       return offset;
 404
 405    case GLSL_TYPE_SAMPLER:
 406       /* The sampler takes up a slot, but we don't use any values from it. */
 407       return 1;
 408
 409    default:
 410       assert(!"not reached");
 411       return 0;
 412    }
 413 }
 414
 415 /* Our support for builtin uniforms is even scarier than non-builtin.
 416  * It sits on top of the PROG_STATE_VAR parameters that are
 417  * automatically updated from GL context state.
 418  */
 419 void
 420 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 421 {
 422    const ir_state_slot *const slots = ir->state_slots;
 423    assert(ir->state_slots != NULL);
 424
 425    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 426       /* This state reference has already been setup by ir_to_mesa,
 427        * but we'll get the same index back here.  We can reference
 428        * ParameterValues directly, since unlike brw_fs.cpp, we never
 429        * add new state references during compile.
 430        */
 431       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 432                                             (gl_state_index *)slots[i].tokens);
 433       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 434
 435       this->uniform_vector_size[this->uniforms] = 0;
 436       /* Add each of the unique swizzled channels of the element.
 437        * This will end up matching the size of the glsl_type of this field.
 438        */
 439       int last_swiz = -1;
 440       for (unsigned int j = 0; j < 4; j++) {
 441          int swiz = GET_SWZ(slots[i].swizzle, j);
 442          last_swiz = swiz;
 443
 444          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 445          if (swiz <= last_swiz)
 446             this->uniform_vector_size[this->uniforms]++;
 447       }
 448       this->uniforms++;
 449    }
 450 }
 451
 452 dst_reg *
 453 vec4_visitor::variable_storage(ir_variable *var)
 454 {
 455    return (dst_reg *)hash_table_find(this->variable_ht, var);
 456 }
 457
 458 void
 459 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 460 {
 461    ir_expression *expr = ir->as_expression();
 462
 463    if (expr) {
 464       src_reg op[2];
 465       vec4_instruction *inst;
 466
 467       assert(expr->get_num_operands() <= 2);
 468       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 469          assert(expr->operands[i]->type->is_scalar());
 470
 471          expr->operands[i]->accept(this);
 472          op[i] = this->result;
 473       }
 474
 475       switch (expr->operation) {
 476       case ir_unop_logic_not:
 477          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
 478          inst->conditional_mod = BRW_CONDITIONAL_Z;
 479          break;
 480
 481       case ir_binop_logic_xor:
 482          inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
 483          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 484          break;
 485
 486       case ir_binop_logic_or:
 487          inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
 488          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 489          break;
 490
 491       case ir_binop_logic_and:
 492          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
 493          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 494          break;
 495
 496       case ir_unop_f2b:
 497          if (intel->gen >= 6) {
 498             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
 499          } else {
 500             inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
 501          }
 502          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 503          break;
 504
 505       case ir_unop_i2b:
 506          if (intel->gen >= 6) {
 507             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 508          } else {
 509             inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
 510          }
 511          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 512          break;
 513
 514       case ir_binop_greater:
 515       case ir_binop_gequal:
 516       case ir_binop_less:
 517       case ir_binop_lequal:
 518       case ir_binop_equal:
 519       case ir_binop_all_equal:
 520       case ir_binop_nequal:
 521       case ir_binop_any_nequal:
 522          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 523          inst->conditional_mod =
 524             brw_conditional_for_comparison(expr->operation);
 525          break;
 526
 527       default:
 528          assert(!"not reached");
 529          break;
 530       }
 531       return;
 532    }
 533
 534    ir->accept(this);
 535
 536    if (intel->gen >= 6) {
 537       vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
 538                                this->result, src_reg(1));
 539       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 540    } else {
 541       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
 542       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 543    }
 544 }
 545
 546 /**
 547  * Emit a gen6 IF statement with the comparison folded into the IF
 548  * instruction.
 549  */
 550 void
 551 vec4_visitor::emit_if_gen6(ir_if *ir)
 552 {
 553    ir_expression *expr = ir->condition->as_expression();
 554
 555    if (expr) {
 556       src_reg op[2];
 557       vec4_instruction *inst;
 558       dst_reg temp;
 559
 560       assert(expr->get_num_operands() <= 2);
 561       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 562          expr->operands[i]->accept(this);
 563          op[i] = this->result;
 564       }
 565
 566       switch (expr->operation) {
 567       case ir_unop_logic_not:
 568          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 569          inst->conditional_mod = BRW_CONDITIONAL_Z;
 570          return;
 571
 572       case ir_binop_logic_xor:
 573          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 574          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 575          return;
 576
 577       case ir_binop_logic_or:
 578          temp = dst_reg(this, glsl_type::bool_type);
 579          emit(BRW_OPCODE_OR, temp, op[0], op[1]);
 580          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 581          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 582          return;
 583
 584       case ir_binop_logic_and:
 585          temp = dst_reg(this, glsl_type::bool_type);
 586          emit(BRW_OPCODE_AND, temp, op[0], op[1]);
 587          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 588          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 589          return;
 590
 591       case ir_unop_f2b:
 592          inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
 593          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 594          return;
 595
 596       case ir_unop_i2b:
 597          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 598          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 599          return;
 600
 601       case ir_binop_greater:
 602       case ir_binop_gequal:
 603       case ir_binop_less:
 604       case ir_binop_lequal:
 605       case ir_binop_equal:
 606       case ir_binop_nequal:
 607          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 608          inst->conditional_mod =
 609             brw_conditional_for_comparison(expr->operation);
 610          return;
 611
 612       case ir_binop_all_equal:
 613          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 614          inst->conditional_mod = BRW_CONDITIONAL_Z;
 615
 616          inst = emit(BRW_OPCODE_IF);
 617          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 618          return;
 619
 620       case ir_binop_any_nequal:
 621          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 622          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 623
 624          inst = emit(BRW_OPCODE_IF);
 625          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 626          return;
 627
 628       case ir_unop_any:
 629          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 630          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 631
 632          inst = emit(BRW_OPCODE_IF);
 633          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 634          return;
 635
 636       default:
 637          assert(!"not reached");
 638          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 639          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 640          return;
 641       }
 642       return;
 643    }
 644
 645    ir->condition->accept(this);
 646
 647    vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
 648                             this->result, src_reg(0));
 649    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 650 }
 651
 652 void
 653 vec4_visitor::visit(ir_variable *ir)
 654 {
 655    dst_reg *reg = NULL;
 656
 657    if (variable_storage(ir))
 658       return;
 659
 660    switch (ir->mode) {
 661    case ir_var_in:
 662       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 663       break;
 664
 665    case ir_var_out:
 666       reg = new(mem_ctx) dst_reg(this, ir->type);
 667
 668       for (int i = 0; i < type_size(ir->type); i++) {
 669          output_reg[ir->location + i] = *reg;
 670          output_reg[ir->location + i].reg_offset = i;
 671          output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
 672       }
 673       break;
 674
 675    case ir_var_auto:
 676    case ir_var_temporary:
 677       reg = new(mem_ctx) dst_reg(this, ir->type);
 678       break;
 679
 680    case ir_var_uniform:
 681       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 682
 683       /* Track how big the whole uniform variable is, in case we need to put a
 684        * copy of its data into pull constants for array access.
 685        */
 686       this->uniform_size[this->uniforms] = type_size(ir->type);
 687
 688       if (!strncmp(ir->name, "gl_", 3)) {
 689          setup_builtin_uniform_values(ir);
 690       } else {
 691          setup_uniform_values(ir->location, ir->type);
 692       }
 693       break;
 694
 695    default:
 696       assert(!"not reached");
 697    }
 698
 699    reg->type = brw_type_for_base_type(ir->type);
 700    hash_table_insert(this->variable_ht, reg, ir);
 701 }
 702
 703 void
 704 vec4_visitor::visit(ir_loop *ir)
 705 {
 706    dst_reg counter;
 707
 708    /* We don't want debugging output to print the whole body of the
 709     * loop as the annotation.
 710     */
 711    this->base_ir = NULL;
 712
 713    if (ir->counter != NULL) {
 714       this->base_ir = ir->counter;
 715       ir->counter->accept(this);
 716       counter = *(variable_storage(ir->counter));
 717
 718       if (ir->from != NULL) {
 719          this->base_ir = ir->from;
 720          ir->from->accept(this);
 721
 722          emit(BRW_OPCODE_MOV, counter, this->result);
 723       }
 724    }
 725
 726    emit(BRW_OPCODE_DO);
 727
 728    if (ir->to) {
 729       this->base_ir = ir->to;
 730       ir->to->accept(this);
 731
 732       vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
 733                                     src_reg(counter), this->result);
 734       inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
 735
 736       inst = emit(BRW_OPCODE_BREAK);
 737       inst->predicate = BRW_PREDICATE_NORMAL;
 738    }
 739
 740    visit_instructions(&ir->body_instructions);
 741
 742
 743    if (ir->increment) {
 744       this->base_ir = ir->increment;
 745       ir->increment->accept(this);
 746       emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
 747    }
 748
 749    emit(BRW_OPCODE_WHILE);
 750 }
 751
 752 void
 753 vec4_visitor::visit(ir_loop_jump *ir)
 754 {
 755    switch (ir->mode) {
 756    case ir_loop_jump::jump_break:
 757       emit(BRW_OPCODE_BREAK);
 758       break;
 759    case ir_loop_jump::jump_continue:
 760       emit(BRW_OPCODE_CONTINUE);
 761       break;
 762    }
 763 }
 764
 765
 766 void
 767 vec4_visitor::visit(ir_function_signature *ir)
 768 {
 769    assert(0);
 770    (void)ir;
 771 }
 772
 773 void
 774 vec4_visitor::visit(ir_function *ir)
 775 {
 776    /* Ignore function bodies other than main() -- we shouldn't see calls to
 777     * them since they should all be inlined.
 778     */
 779    if (strcmp(ir->name, "main") == 0) {
 780       const ir_function_signature *sig;
 781       exec_list empty;
 782
 783       sig = ir->matching_signature(&empty);
 784
 785       assert(sig);
 786
 787       visit_instructions(&sig->body);
 788    }
 789 }
 790
 791 GLboolean
 792 vec4_visitor::try_emit_sat(ir_expression *ir)
 793 {
 794    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 795    if (!sat_src)
 796       return false;
 797
 798    sat_src->accept(this);
 799    src_reg src = this->result;
 800
 801    this->result = src_reg(this, ir->type);
 802    vec4_instruction *inst;
 803    inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
 804    inst->saturate = true;
 805
 806    return true;
 807 }
 808
 809 void
 810 vec4_visitor::emit_bool_comparison(unsigned int op,
 811                                  dst_reg dst, src_reg src0, src_reg src1)
 812 {
 813    /* original gen4 does destination conversion before comparison. */
 814    if (intel->gen < 5)
 815       dst.type = src0.type;
 816
 817    vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
 818    inst->conditional_mod = brw_conditional_for_comparison(op);
 819
 820    dst.type = BRW_REGISTER_TYPE_D;
 821    emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
 822 }
 823
 824 void
 825 vec4_visitor::visit(ir_expression *ir)
 826 {
 827    unsigned int operand;
 828    src_reg op[Elements(ir->operands)];
 829    src_reg result_src;
 830    dst_reg result_dst;
 831    vec4_instruction *inst;
 832
 833    if (try_emit_sat(ir))
 834       return;
 835
 836    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 837       this->result.file = BAD_FILE;
 838       ir->operands[operand]->accept(this);
 839       if (this->result.file == BAD_FILE) {
 840          printf("Failed to get tree for expression operand:\n");
 841          ir->operands[operand]->print();
 842          exit(1);
 843       }
 844       op[operand] = this->result;
 845
 846       /* Matrix expression operands should have been broken down to vector
 847        * operations already.
 848        */
 849       assert(!ir->operands[operand]->type->is_matrix());
 850    }
 851
 852    int vector_elements = ir->operands[0]->type->vector_elements;
 853    if (ir->operands[1]) {
 854       vector_elements = MAX2(vector_elements,
 855                              ir->operands[1]->type->vector_elements);
 856    }
 857
 858    this->result.file = BAD_FILE;
 859
 860    /* Storage for our result.  Ideally for an assignment we'd be using
 861     * the actual storage for the result here, instead.
 862     */
 863    result_src = src_reg(this, ir->type);
 864    /* convenience for the emit functions below. */
 865    result_dst = dst_reg(result_src);
 866    /* If nothing special happens, this is the result. */
 867    this->result = result_src;
 868    /* Limit writes to the channels that will be used by result_src later.
 869     * This does limit this temp's use as a temporary for multi-instruction
 870     * sequences.
 871     */
 872    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 873
 874    switch (ir->operation) {
 875    case ir_unop_logic_not:
 876       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 877        * ones complement of the whole register, not just bit 0.
 878        */
 879       emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
 880       break;
 881    case ir_unop_neg:
 882       op[0].negate = !op[0].negate;
 883       this->result = op[0];
 884       break;
 885    case ir_unop_abs:
 886       op[0].abs = true;
 887       op[0].negate = false;
 888       this->result = op[0];
 889       break;
 890
 891    case ir_unop_sign:
 892       emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
 893
 894       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 895       inst->conditional_mod = BRW_CONDITIONAL_G;
 896       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
 897       inst->predicate = BRW_PREDICATE_NORMAL;
 898
 899       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 900       inst->conditional_mod = BRW_CONDITIONAL_L;
 901       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
 902       inst->predicate = BRW_PREDICATE_NORMAL;
 903
 904       break;
 905
 906    case ir_unop_rcp:
 907       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
 908       break;
 909
 910    case ir_unop_exp2:
 911       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
 912       break;
 913    case ir_unop_log2:
 914       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
 915       break;
 916    case ir_unop_exp:
 917    case ir_unop_log:
 918       assert(!"not reached: should be handled by ir_explog_to_explog2");
 919       break;
 920    case ir_unop_sin:
 921    case ir_unop_sin_reduced:
 922       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
 923       break;
 924    case ir_unop_cos:
 925    case ir_unop_cos_reduced:
 926       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
 927       break;
 928
 929    case ir_unop_dFdx:
 930    case ir_unop_dFdy:
 931       assert(!"derivatives not valid in vertex shader");
 932       break;
 933
 934    case ir_unop_noise:
 935       assert(!"not reached: should be handled by lower_noise");
 936       break;
 937
 938    case ir_binop_add:
 939       emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
 940       break;
 941    case ir_binop_sub:
 942       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 943       break;
 944
 945    case ir_binop_mul:
 946       if (ir->type->is_integer()) {
 947          /* For integer multiplication, the MUL uses the low 16 bits
 948           * of one of the operands (src0 on gen6, src1 on gen7).  The
 949           * MACH accumulates in the contribution of the upper 16 bits
 950           * of that operand.
 951           *
 952           * FINISHME: Emit just the MUL if we know an operand is small
 953           * enough.
 954           */
 955          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
 956
 957          emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
 958          emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
 959          emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
 960       } else {
 961          emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
 962       }
 963       break;
 964    case ir_binop_div:
 965       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 966    case ir_binop_mod:
 967       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 968       break;
 969
 970    case ir_binop_less:
 971    case ir_binop_greater:
 972    case ir_binop_lequal:
 973    case ir_binop_gequal:
 974    case ir_binop_equal:
 975    case ir_binop_nequal: {
 976       dst_reg temp = result_dst;
 977       /* original gen4 does implicit conversion before comparison. */
 978       if (intel->gen < 5)
 979          temp.type = op[0].type;
 980
 981       inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 982       inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
 983       emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
 984       break;
 985    }
 986
 987    case ir_binop_all_equal:
 988       /* "==" operator producing a scalar boolean. */
 989       if (ir->operands[0]->type->is_vector() ||
 990           ir->operands[1]->type->is_vector()) {
 991          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 992          inst->conditional_mod = BRW_CONDITIONAL_Z;
 993
 994          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 995          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 996          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 997       } else {
 998          dst_reg temp = result_dst;
 999          /* original gen4 does implicit conversion before comparison. */
1000          if (intel->gen < 5)
1001             temp.type = op[0].type;
1002
1003          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1004          inst->conditional_mod = BRW_CONDITIONAL_Z;
1005          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1006       }
1007       break;
1008    case ir_binop_any_nequal:
1009       /* "!=" operator producing a scalar boolean. */
1010       if (ir->operands[0]->type->is_vector() ||
1011           ir->operands[1]->type->is_vector()) {
1012          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
1013          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1014
1015          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1016          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1017          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1018       } else {
1019          dst_reg temp = result_dst;
1020          /* original gen4 does implicit conversion before comparison. */
1021          if (intel->gen < 5)
1022             temp.type = op[0].type;
1023
1024          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1025          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1026          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1027       }
1028       break;
1029
1030    case ir_unop_any:
1031       inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1032       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1033
1034       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1035
1036       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1037       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1038       break;
1039
1040    case ir_binop_logic_xor:
1041       emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1042       break;
1043
1044    case ir_binop_logic_or:
1045       emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1046       break;
1047
1048    case ir_binop_logic_and:
1049       emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1050       break;
1051
1052    case ir_binop_dot:
1053       assert(ir->operands[0]->type->is_vector());
1054       assert(ir->operands[0]->type == ir->operands[1]->type);
1055       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1056       break;
1057
1058    case ir_unop_sqrt:
1059       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1060       break;
1061    case ir_unop_rsq:
1062       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1063       break;
1064    case ir_unop_i2f:
1065    case ir_unop_i2u:
1066    case ir_unop_u2i:
1067    case ir_unop_u2f:
1068    case ir_unop_b2f:
1069    case ir_unop_b2i:
1070    case ir_unop_f2i:
1071       emit(BRW_OPCODE_MOV, result_dst, op[0]);
1072       break;
1073    case ir_unop_f2b:
1074    case ir_unop_i2b: {
1075       dst_reg temp = result_dst;
1076       /* original gen4 does implicit conversion before comparison. */
1077       if (intel->gen < 5)
1078          temp.type = op[0].type;
1079
1080       inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1081       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1082       inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1083       break;
1084    }
1085
1086    case ir_unop_trunc:
1087       emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1088       break;
1089    case ir_unop_ceil:
1090       op[0].negate = !op[0].negate;
1091       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1092       this->result.negate = true;
1093       break;
1094    case ir_unop_floor:
1095       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1096       break;
1097    case ir_unop_fract:
1098       inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1099       break;
1100    case ir_unop_round_even:
1101       emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1102       break;
1103
1104    case ir_binop_min:
1105       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1106       inst->conditional_mod = BRW_CONDITIONAL_L;
1107
1108       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1109       inst->predicate = BRW_PREDICATE_NORMAL;
1110       break;
1111    case ir_binop_max:
1112       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1113       inst->conditional_mod = BRW_CONDITIONAL_G;
1114
1115       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1116       inst->predicate = BRW_PREDICATE_NORMAL;
1117       break;
1118
1119    case ir_binop_pow:
1120       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1121       break;
1122
1123    case ir_unop_bit_not:
1124       inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1125       break;
1126    case ir_binop_bit_and:
1127       inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1128       break;
1129    case ir_binop_bit_xor:
1130       inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1131       break;
1132    case ir_binop_bit_or:
1133       inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1134       break;
1135
1136    case ir_binop_lshift:
1137    case ir_binop_rshift:
1138       assert(!"GLSL 1.30 features unsupported");
1139       break;
1140
1141    case ir_quadop_vector:
1142       assert(!"not reached: should be handled by lower_quadop_vector");
1143       break;
1144    }
1145 }
1146
1147
1148 void
1149 vec4_visitor::visit(ir_swizzle *ir)
1150 {
1151    src_reg src;
1152    int i = 0;
1153    int swizzle[4];
1154
1155    /* Note that this is only swizzles in expressions, not those on the left
1156     * hand side of an assignment, which do write masking.  See ir_assignment
1157     * for that.
1158     */
1159
1160    ir->val->accept(this);
1161    src = this->result;
1162    assert(src.file != BAD_FILE);
1163
1164    for (i = 0; i < ir->type->vector_elements; i++) {
1165       switch (i) {
1166       case 0:
1167          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1168          break;
1169       case 1:
1170          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1171          break;
1172       case 2:
1173          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1174          break;
1175       case 3:
1176          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1177             break;
1178       }
1179    }
1180    for (; i < 4; i++) {
1181       /* Replicate the last channel out. */
1182       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1183    }
1184
1185    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1186
1187    this->result = src;
1188 }
1189
1190 void
1191 vec4_visitor::visit(ir_dereference_variable *ir)
1192 {
1193    const struct glsl_type *type = ir->type;
1194    dst_reg *reg = variable_storage(ir->var);
1195
1196    if (!reg) {
1197       fail("Failed to find variable storage for %s\n", ir->var->name);
1198       this->result = src_reg(brw_null_reg());
1199       return;
1200    }
1201
1202    this->result = src_reg(*reg);
1203
1204    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1205       this->result.swizzle = swizzle_for_size(type->vector_elements);
1206 }
1207
1208 void
1209 vec4_visitor::visit(ir_dereference_array *ir)
1210 {
1211    ir_constant *constant_index;
1212    src_reg src;
1213    int element_size = type_size(ir->type);
1214
1215    constant_index = ir->array_index->constant_expression_value();
1216
1217    ir->array->accept(this);
1218    src = this->result;
1219
1220    if (constant_index) {
1221       src.reg_offset += constant_index->value.i[0] * element_size;
1222    } else {
1223       /* Variable index array dereference.  It eats the "vec4" of the
1224        * base of the array and an index that offsets the Mesa register
1225        * index.
1226        */
1227       ir->array_index->accept(this);
1228
1229       src_reg index_reg;
1230
1231       if (element_size == 1) {
1232          index_reg = this->result;
1233       } else {
1234          index_reg = src_reg(this, glsl_type::int_type);
1235
1236          emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1237               this->result, src_reg(element_size));
1238       }
1239
1240       if (src.reladdr) {
1241          src_reg temp = src_reg(this, glsl_type::int_type);
1242
1243          emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1244
1245          index_reg = temp;
1246       }
1247
1248       src.reladdr = ralloc(mem_ctx, src_reg);
1249       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1250    }
1251
1252    /* If the type is smaller than a vec4, replicate the last channel out. */
1253    if (ir->type->is_scalar() || ir->type->is_vector())
1254       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1255    else
1256       src.swizzle = BRW_SWIZZLE_NOOP;
1257    src.type = brw_type_for_base_type(ir->type);
1258
1259    this->result = src;
1260 }
1261
1262 void
1263 vec4_visitor::visit(ir_dereference_record *ir)
1264 {
1265    unsigned int i;
1266    const glsl_type *struct_type = ir->record->type;
1267    int offset = 0;
1268
1269    ir->record->accept(this);
1270
1271    for (i = 0; i < struct_type->length; i++) {
1272       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1273          break;
1274       offset += type_size(struct_type->fields.structure[i].type);
1275    }
1276
1277    /* If the type is smaller than a vec4, replicate the last channel out. */
1278    if (ir->type->is_scalar() || ir->type->is_vector())
1279       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1280    else
1281       this->result.swizzle = BRW_SWIZZLE_NOOP;
1282    this->result.type = brw_type_for_base_type(ir->type);
1283
1284    this->result.reg_offset += offset;
1285 }
1286
1287 /**
1288  * We want to be careful in assignment setup to hit the actual storage
1289  * instead of potentially using a temporary like we might with the
1290  * ir_dereference handler.
1291  */
1292 static dst_reg
1293 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1294 {
1295    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1296     * access of a vector, it must be separated into a series conditional moves
1297     * before reaching this point (see ir_vec_index_to_cond_assign).
1298     */
1299    assert(ir->as_dereference());
1300    ir_dereference_array *deref_array = ir->as_dereference_array();
1301    if (deref_array) {
1302       assert(!deref_array->array->type->is_vector());
1303    }
1304
1305    /* Use the rvalue deref handler for the most part.  We'll ignore
1306     * swizzles in it and write swizzles using writemask, though.
1307     */
1308    ir->accept(v);
1309    return dst_reg(v->result);
1310 }
1311
1312 void
1313 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1314                               const struct glsl_type *type, bool predicated)
1315 {
1316    if (type->base_type == GLSL_TYPE_STRUCT) {
1317       for (unsigned int i = 0; i < type->length; i++) {
1318          emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1319       }
1320       return;
1321    }
1322
1323    if (type->is_array()) {
1324       for (unsigned int i = 0; i < type->length; i++) {
1325          emit_block_move(dst, src, type->fields.array, predicated);
1326       }
1327       return;
1328    }
1329
1330    if (type->is_matrix()) {
1331       const struct glsl_type *vec_type;
1332
1333       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1334                                          type->vector_elements, 1);
1335
1336       for (int i = 0; i < type->matrix_columns; i++) {
1337          emit_block_move(dst, src, vec_type, predicated);
1338       }
1339       return;
1340    }
1341
1342    assert(type->is_scalar() || type->is_vector());
1343
1344    dst->type = brw_type_for_base_type(type);
1345    src->type = dst->type;
1346
1347    dst->writemask = (1 << type->vector_elements) - 1;
1348
1349    /* Do we need to worry about swizzling a swizzle? */
1350    assert(src->swizzle = BRW_SWIZZLE_NOOP);
1351    src->swizzle = swizzle_for_size(type->vector_elements);
1352
1353    vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1354    if (predicated)
1355       inst->predicate = BRW_PREDICATE_NORMAL;
1356
1357    dst->reg_offset++;
1358    src->reg_offset++;
1359 }
1360
1361
1362 /* If the RHS processing resulted in an instruction generating a
1363  * temporary value, and it would be easy to rewrite the instruction to
1364  * generate its result right into the LHS instead, do so.  This ends
1365  * up reliably removing instructions where it can be tricky to do so
1366  * later without real UD chain information.
1367  */
1368 bool
1369 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1370                                      dst_reg dst,
1371                                      src_reg src,
1372                                      vec4_instruction *pre_rhs_inst,
1373                                      vec4_instruction *last_rhs_inst)
1374 {
1375    /* This could be supported, but it would take more smarts. */
1376    if (ir->condition)
1377       return false;
1378
1379    if (pre_rhs_inst == last_rhs_inst)
1380       return false; /* No instructions generated to work with. */
1381
1382    /* Make sure the last instruction generated our source reg. */
1383    if (src.file != GRF ||
1384        src.file != last_rhs_inst->dst.file ||
1385        src.reg != last_rhs_inst->dst.reg ||
1386        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1387        src.reladdr ||
1388        src.abs ||
1389        src.negate ||
1390        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1391       return false;
1392
1393    /* Check that that last instruction fully initialized the channels
1394     * we want to use, in the order we want to use them.  We could
1395     * potentially reswizzle the operands of many instructions so that
1396     * we could handle out of order channels, but don't yet.
1397     */
1398    for (int i = 0; i < 4; i++) {
1399       if (dst.writemask & (1 << i)) {
1400          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1401             return false;
1402
1403          if (BRW_GET_SWZ(src.swizzle, i) != i)
1404             return false;
1405       }
1406    }
1407
1408    /* Success!  Rewrite the instruction. */
1409    last_rhs_inst->dst.file = dst.file;
1410    last_rhs_inst->dst.reg = dst.reg;
1411    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1412    last_rhs_inst->dst.reladdr = dst.reladdr;
1413    last_rhs_inst->dst.writemask &= dst.writemask;
1414
1415    return true;
1416 }
1417
1418 void
1419 vec4_visitor::visit(ir_assignment *ir)
1420 {
1421    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1422
1423    if (!ir->lhs->type->is_scalar() &&
1424        !ir->lhs->type->is_vector()) {
1425       ir->rhs->accept(this);
1426       src_reg src = this->result;
1427
1428       if (ir->condition) {
1429          emit_bool_to_cond_code(ir->condition);
1430       }
1431
1432       emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1433       return;
1434    }
1435
1436    /* Now we're down to just a scalar/vector with writemasks. */
1437    int i;
1438
1439    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1440    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1441
1442    ir->rhs->accept(this);
1443
1444    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1445
1446    src_reg src = this->result;
1447
1448    int swizzles[4];
1449    int first_enabled_chan = 0;
1450    int src_chan = 0;
1451
1452    assert(ir->lhs->type->is_vector() ||
1453           ir->lhs->type->is_scalar());
1454    dst.writemask = ir->write_mask;
1455
1456    for (int i = 0; i < 4; i++) {
1457       if (dst.writemask & (1 << i)) {
1458          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1459          break;
1460       }
1461    }
1462
1463    /* Swizzle a small RHS vector into the channels being written.
1464     *
1465     * glsl ir treats write_mask as dictating how many channels are
1466     * present on the RHS while in our instructions we need to make
1467     * those channels appear in the slots of the vec4 they're written to.
1468     */
1469    for (int i = 0; i < 4; i++) {
1470       if (dst.writemask & (1 << i))
1471          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1472       else
1473          swizzles[i] = first_enabled_chan;
1474    }
1475    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1476                               swizzles[2], swizzles[3]);
1477
1478    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1479       return;
1480    }
1481
1482    if (ir->condition) {
1483       emit_bool_to_cond_code(ir->condition);
1484    }
1485
1486    for (i = 0; i < type_size(ir->lhs->type); i++) {
1487       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1488
1489       if (ir->condition)
1490          inst->predicate = BRW_PREDICATE_NORMAL;
1491
1492       dst.reg_offset++;
1493       src.reg_offset++;
1494    }
1495 }
1496
1497 void
1498 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1499 {
1500    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1501       foreach_list(node, &ir->components) {
1502          ir_constant *field_value = (ir_constant *)node;
1503
1504          emit_constant_values(dst, field_value);
1505       }
1506       return;
1507    }
1508
1509    if (ir->type->is_array()) {
1510       for (unsigned int i = 0; i < ir->type->length; i++) {
1511          emit_constant_values(dst, ir->array_elements[i]);
1512       }
1513       return;
1514    }
1515
1516    if (ir->type->is_matrix()) {
1517       for (int i = 0; i < ir->type->matrix_columns; i++) {
1518          for (int j = 0; j < ir->type->vector_elements; j++) {
1519             dst->writemask = 1 << j;
1520             dst->type = BRW_REGISTER_TYPE_F;
1521
1522             emit(BRW_OPCODE_MOV, *dst,
1523                  src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1524          }
1525          dst->reg_offset++;
1526       }
1527       return;
1528    }
1529
1530    for (int i = 0; i < ir->type->vector_elements; i++) {
1531       dst->writemask = 1 << i;
1532       dst->type = brw_type_for_base_type(ir->type);
1533
1534       switch (ir->type->base_type) {
1535       case GLSL_TYPE_FLOAT:
1536          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1537          break;
1538       case GLSL_TYPE_INT:
1539          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1540          break;
1541       case GLSL_TYPE_UINT:
1542          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1543          break;
1544       case GLSL_TYPE_BOOL:
1545          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1546          break;
1547       default:
1548          assert(!"Non-float/uint/int/bool constant");
1549          break;
1550       }
1551    }
1552    dst->reg_offset++;
1553 }
1554
1555 void
1556 vec4_visitor::visit(ir_constant *ir)
1557 {
1558    dst_reg dst = dst_reg(this, ir->type);
1559    this->result = src_reg(dst);
1560
1561    emit_constant_values(&dst, ir);
1562 }
1563
1564 void
1565 vec4_visitor::visit(ir_call *ir)
1566 {
1567    assert(!"not reached");
1568 }
1569
1570 void
1571 vec4_visitor::visit(ir_texture *ir)
1572 {
1573    /* FINISHME: Implement vertex texturing.
1574     *
1575     * With 0 vertex samplers available, the linker will reject
1576     * programs that do vertex texturing, but after our visitor has
1577     * run.
1578     */
1579 }
1580
1581 void
1582 vec4_visitor::visit(ir_return *ir)
1583 {
1584    assert(!"not reached");
1585 }
1586
1587 void
1588 vec4_visitor::visit(ir_discard *ir)
1589 {
1590    assert(!"not reached");
1591 }
1592
1593 void
1594 vec4_visitor::visit(ir_if *ir)
1595 {
1596    /* Don't point the annotation at the if statement, because then it plus
1597     * the then and else blocks get printed.
1598     */
1599    this->base_ir = ir->condition;
1600
1601    if (intel->gen == 6) {
1602       emit_if_gen6(ir);
1603    } else {
1604       emit_bool_to_cond_code(ir->condition);
1605       vec4_instruction *inst = emit(BRW_OPCODE_IF);
1606       inst->predicate = BRW_PREDICATE_NORMAL;
1607    }
1608
1609    visit_instructions(&ir->then_instructions);
1610
1611    if (!ir->else_instructions.is_empty()) {
1612       this->base_ir = ir->condition;
1613       emit(BRW_OPCODE_ELSE);
1614
1615       visit_instructions(&ir->else_instructions);
1616    }
1617
1618    this->base_ir = ir->condition;
1619    emit(BRW_OPCODE_ENDIF);
1620 }
1621
1622 int
1623 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1624 {
1625    /* Get the position */
1626    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1627
1628    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1629    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1630
1631    current_annotation = "NDC";
1632    dst_reg ndc_w = ndc;
1633    ndc_w.writemask = WRITEMASK_W;
1634    src_reg pos_w = pos;
1635    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1636    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1637
1638    dst_reg ndc_xyz = ndc;
1639    ndc_xyz.writemask = WRITEMASK_XYZ;
1640
1641    emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1642
1643    if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1644        c->key.nr_userclip || brw->has_negative_rhw_bug) {
1645       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1646       GLuint i;
1647
1648       emit(BRW_OPCODE_MOV, header1, 0u);
1649
1650       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1651          assert(!"finishme: psiz");
1652          src_reg psiz;
1653
1654          header1.writemask = WRITEMASK_W;
1655          emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1656          emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1657       }
1658
1659       for (i = 0; i < c->key.nr_userclip; i++) {
1660          vec4_instruction *inst;
1661
1662          inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1663                      pos, src_reg(c->userplane[i]));
1664          inst->conditional_mod = BRW_CONDITIONAL_L;
1665
1666          emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1667          inst->predicate = BRW_PREDICATE_NORMAL;
1668       }
1669
1670       /* i965 clipping workaround:
1671        * 1) Test for -ve rhw
1672        * 2) If set,
1673        *      set ndc = (0,0,0,0)
1674        *      set ucp[6] = 1
1675        *
1676        * Later, clipping will detect ucp[6] and ensure the primitive is
1677        * clipped against all fixed planes.
1678        */
1679       if (brw->has_negative_rhw_bug) {
1680 #if 0
1681          /* FINISHME */
1682          brw_CMP(p,
1683                  vec8(brw_null_reg()),
1684                  BRW_CONDITIONAL_L,
1685                  brw_swizzle1(ndc, 3),
1686                  brw_imm_f(0));
1687
1688          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1689          brw_MOV(p, ndc, brw_imm_f(0));
1690          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1691 #endif
1692       }
1693
1694       header1.writemask = WRITEMASK_XYZW;
1695       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1696    } else {
1697       emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1698                                   BRW_REGISTER_TYPE_UD), 0u);
1699    }
1700
1701    if (intel->gen == 5) {
1702       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1703        * dword 0-3 (m1) of the header is indices, point width, clip flags.
1704        * dword 4-7 (m2) is the ndc position (set above)
1705        * dword 8-11 (m3) of the vertex header is the 4D space position
1706        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1707        * m6 is a pad so that the vertex element data is aligned
1708        * m7 is the first vertex data we fill.
1709        */
1710       current_annotation = "NDC";
1711       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1712
1713       current_annotation = "gl_Position";
1714       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1715
1716       /* user clip distance. */
1717       header_mrf += 2;
1718
1719       /* Pad so that vertex element data is aligned. */
1720       header_mrf++;
1721    } else {
1722       /* There are 8 dwords in VUE header pre-Ironlake:
1723        * dword 0-3 (m1) is indices, point width, clip flags.
1724        * dword 4-7 (m2) is ndc position (set above)
1725        *
1726        * dword 8-11 (m3) is the first vertex data.
1727        */
1728       current_annotation = "NDC";
1729       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1730
1731       current_annotation = "gl_Position";
1732       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1733    }
1734
1735    return header_mrf;
1736 }
1737
1738 int
1739 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1740 {
1741    struct brw_reg reg;
1742
1743    /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1744     * dword 0-3 (m2) of the header is indices, point width, clip flags.
1745     * dword 4-7 (m3) is the 4D space position
1746     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1747     * enabled.
1748     *
1749     * m4 or 6 is the first vertex element data we fill.
1750     */
1751
1752    current_annotation = "indices, point width, clip flags";
1753    reg = brw_message_reg(header_mrf++);
1754    emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1755    if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1756       emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1757            src_reg(output_reg[VERT_RESULT_PSIZ]));
1758    }
1759
1760    current_annotation = "gl_Position";
1761    emit(BRW_OPCODE_MOV,
1762         brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1763
1764    current_annotation = "user clip distances";
1765    if (c->key.nr_userclip) {
1766       for (int i = 0; i < c->key.nr_userclip; i++) {
1767          struct brw_reg m;
1768          if (i < 4)
1769             m = brw_message_reg(header_mrf);
1770          else
1771             m = brw_message_reg(header_mrf + 1);
1772
1773          emit(BRW_OPCODE_DP4,
1774               dst_reg(brw_writemask(m, 1 << (i & 3))),
1775               src_reg(c->userplane[i]));
1776       }
1777       header_mrf += 2;
1778    }
1779
1780    current_annotation = NULL;
1781
1782    return header_mrf;
1783 }
1784
1785 static int
1786 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1787 {
1788    struct intel_context *intel = &brw->intel;
1789
1790    if (intel->gen >= 6) {
1791       /* URB data written (does not include the message header reg) must
1792        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1793        * section 5.4.3.2.2: URB_INTERLEAVED.
1794        *
1795        * URB entries are allocated on a multiple of 1024 bits, so an
1796        * extra 128 bits written here to make the end align to 256 is
1797        * no problem.
1798        */
1799       if ((mlen % 2) != 1)
1800          mlen++;
1801    }
1802
1803    return mlen;
1804 }
1805
1806 /**
1807  * Generates the VUE payload plus the 1 or 2 URB write instructions to
1808  * complete the VS thread.
1809  *
1810  * The VUE layout is documented in Volume 2a.
1811  */
1812 void
1813 vec4_visitor::emit_urb_writes()
1814 {
1815    /* MRF 0 is reserved for the debugger, so start with message header
1816     * in MRF 1.
1817     */
1818    int base_mrf = 1;
1819    int mrf = base_mrf;
1820    int urb_entry_size;
1821    uint64_t outputs_remaining = c->prog_data.outputs_written;
1822    /* In the process of generating our URB write message contents, we
1823     * may need to unspill a register or load from an array.  Those
1824     * reads would use MRFs 14-15.
1825     */
1826    int max_usable_mrf = 13;
1827
1828    /* FINISHME: edgeflag */
1829
1830    /* First mrf is the g0-based message header containing URB handles and such,
1831     * which is implied in VS_OPCODE_URB_WRITE.
1832     */
1833    mrf++;
1834
1835    if (intel->gen >= 6) {
1836       mrf = emit_vue_header_gen6(mrf);
1837    } else {
1838       mrf = emit_vue_header_gen4(mrf);
1839    }
1840
1841    /* Set up the VUE data for the first URB write */
1842    int attr;
1843    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1844       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1845          continue;
1846
1847       outputs_remaining &= ~BITFIELD64_BIT(attr);
1848
1849       /* This is set up in the VUE header. */
1850       if (attr == VERT_RESULT_HPOS)
1851          continue;
1852
1853       /* This is loaded into the VUE header, and thus doesn't occupy
1854        * an attribute slot.
1855        */
1856       if (attr == VERT_RESULT_PSIZ)
1857          continue;
1858
1859       vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
1860                                     src_reg(output_reg[attr]));
1861
1862       if ((attr == VERT_RESULT_COL0 ||
1863            attr == VERT_RESULT_COL1 ||
1864            attr == VERT_RESULT_BFC0 ||
1865            attr == VERT_RESULT_BFC1) &&
1866           c->key.clamp_vertex_color) {
1867          inst->saturate = true;
1868       }
1869
1870       /* If this was MRF 15, we can't fit anything more into this URB
1871        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1872        * even-numbered amount of URB write data, which will meet
1873        * gen6's requirements for length alignment.
1874        */
1875       if (mrf > max_usable_mrf) {
1876          attr++;
1877          break;
1878       }
1879    }
1880
1881    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1882    inst->base_mrf = base_mrf;
1883    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1884    inst->eot = !outputs_remaining;
1885
1886    urb_entry_size = mrf - base_mrf;
1887
1888    /* Optional second URB write */
1889    if (outputs_remaining) {
1890       mrf = base_mrf + 1;
1891
1892       for (; attr < VERT_RESULT_MAX; attr++) {
1893          if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1894             continue;
1895
1896          assert(mrf < max_usable_mrf);
1897
1898          emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1899       }
1900
1901       inst = emit(VS_OPCODE_URB_WRITE);
1902       inst->base_mrf = base_mrf;
1903       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1904       inst->eot = true;
1905       /* URB destination offset.  In the previous write, we got MRFs
1906        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1907        * URB row increments, and each of our MRFs is half of one of
1908        * those, since we're doing interleaved writes.
1909        */
1910       inst->offset = (max_usable_mrf - base_mrf) / 2;
1911
1912       urb_entry_size += mrf - base_mrf;
1913    }
1914
1915    if (intel->gen == 6)
1916       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1917    else
1918       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1919 }
1920
1921 src_reg
1922 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1923                                  src_reg *reladdr, int reg_offset)
1924 {
1925    /* Because we store the values to scratch interleaved like our
1926     * vertex data, we need to scale the vec4 index by 2.
1927     */
1928    int message_header_scale = 2;
1929
1930    /* Pre-gen6, the message header uses byte offsets instead of vec4
1931     * (16-byte) offset units.
1932     */
1933    if (intel->gen < 6)
1934       message_header_scale *= 16;
1935
1936    if (reladdr) {
1937       src_reg index = src_reg(this, glsl_type::int_type);
1938
1939       vec4_instruction *add = emit(BRW_OPCODE_ADD,
1940                                    dst_reg(index),
1941                                    *reladdr,
1942                                    src_reg(reg_offset));
1943       /* Move our new instruction from the tail to its correct place. */
1944       add->remove();
1945       inst->insert_before(add);
1946
1947       vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1948                                    index, src_reg(message_header_scale));
1949       mul->remove();
1950       inst->insert_before(mul);
1951
1952       return index;
1953    } else {
1954       return src_reg(reg_offset * message_header_scale);
1955    }
1956 }
1957
1958 src_reg
1959 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
1960                                        src_reg *reladdr, int reg_offset)
1961 {
1962    if (reladdr) {
1963       src_reg index = src_reg(this, glsl_type::int_type);
1964
1965       vec4_instruction *add = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_ADD,
1966                                                             dst_reg(index),
1967                                                             *reladdr,
1968                                                             src_reg(reg_offset));
1969       add->ir = inst->ir;
1970       add->annotation = inst->annotation;
1971       inst->insert_before(add);
1972
1973       /* Pre-gen6, the message header uses byte offsets instead of vec4
1974        * (16-byte) offset units.
1975        */
1976       if (intel->gen < 6) {
1977          vec4_instruction *mul = new(mem_ctx) vec4_instruction(this,
1978                                                                BRW_OPCODE_MUL,
1979                                                                dst_reg(index),
1980                                                                index,
1981                                                                src_reg(16));
1982          mul->ir = inst->ir;
1983          mul->annotation = inst->annotation;
1984          inst->insert_before(mul);
1985       }
1986
1987       return index;
1988    } else {
1989       int message_header_scale = intel->gen < 6 ? 16 : 1;
1990       return src_reg(reg_offset * message_header_scale);
1991    }
1992 }
1993
1994 /**
1995  * Emits an instruction before @inst to load the value named by @orig_src
1996  * from scratch space at @base_offset to @temp.
1997  */
1998 void
1999 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2000                                 dst_reg temp, src_reg orig_src,
2001                                 int base_offset)
2002 {
2003    int reg_offset = base_offset + orig_src.reg_offset;
2004    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2005
2006    vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
2007                                               temp, index);
2008
2009    scratch_read_inst->base_mrf = 14;
2010    scratch_read_inst->mlen = 1;
2011    /* Move our instruction from the tail to its correct place. */
2012    scratch_read_inst->remove();
2013    inst->insert_before(scratch_read_inst);
2014 }
2015
2016 /**
2017  * Emits an instruction after @inst to store the value to be written
2018  * to @orig_dst to scratch space at @base_offset, from @temp.
2019  */
2020 void
2021 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2022                                  src_reg temp, dst_reg orig_dst,
2023                                  int base_offset)
2024 {
2025    int reg_offset = base_offset + orig_dst.reg_offset;
2026    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2027
2028    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2029                                        orig_dst.writemask));
2030    vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
2031                                                dst, temp, index);
2032    scratch_write_inst->base_mrf = 13;
2033    scratch_write_inst->mlen = 2;
2034    scratch_write_inst->predicate = inst->predicate;
2035    /* Move our instruction from the tail to its correct place. */
2036    scratch_write_inst->remove();
2037    inst->insert_after(scratch_write_inst);
2038 }
2039
2040 /**
2041  * We can't generally support array access in GRF space, because a
2042  * single instruction's destination can only span 2 contiguous
2043  * registers.  So, we send all GRF arrays that get variable index
2044  * access to scratch space.
2045  */
2046 void
2047 vec4_visitor::move_grf_array_access_to_scratch()
2048 {
2049    int scratch_loc[this->virtual_grf_count];
2050
2051    for (int i = 0; i < this->virtual_grf_count; i++) {
2052       scratch_loc[i] = -1;
2053    }
2054
2055    /* First, calculate the set of virtual GRFs that need to be punted
2056     * to scratch due to having any array access on them, and where in
2057     * scratch.
2058     */
2059    foreach_list(node, &this->instructions) {
2060       vec4_instruction *inst = (vec4_instruction *)node;
2061
2062       if (inst->dst.file == GRF && inst->dst.reladdr &&
2063           scratch_loc[inst->dst.reg] == -1) {
2064          scratch_loc[inst->dst.reg] = c->last_scratch;
2065          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2066       }
2067
2068       for (int i = 0 ; i < 3; i++) {
2069          src_reg *src = &inst->src[i];
2070
2071          if (src->file == GRF && src->reladdr &&
2072              scratch_loc[src->reg] == -1) {
2073             scratch_loc[src->reg] = c->last_scratch;
2074             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2075          }
2076       }
2077    }
2078
2079    /* Now, for anything that will be accessed through scratch, rewrite
2080     * it to load/store.  Note that this is a _safe list walk, because
2081     * we may generate a new scratch_write instruction after the one
2082     * we're processing.
2083     */
2084    foreach_list_safe(node, &this->instructions) {
2085       vec4_instruction *inst = (vec4_instruction *)node;
2086
2087       /* Set up the annotation tracking for new generated instructions. */
2088       base_ir = inst->ir;
2089       current_annotation = inst->annotation;
2090
2091       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2092          src_reg temp = src_reg(this, glsl_type::vec4_type);
2093
2094          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2095
2096          inst->dst.file = temp.file;
2097          inst->dst.reg = temp.reg;
2098          inst->dst.reg_offset = temp.reg_offset;
2099          inst->dst.reladdr = NULL;
2100       }
2101
2102       for (int i = 0 ; i < 3; i++) {
2103          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2104             continue;
2105
2106          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2107
2108          emit_scratch_read(inst, temp, inst->src[i],
2109                            scratch_loc[inst->src[i].reg]);
2110
2111          inst->src[i].file = temp.file;
2112          inst->src[i].reg = temp.reg;
2113          inst->src[i].reg_offset = temp.reg_offset;
2114          inst->src[i].reladdr = NULL;
2115       }
2116    }
2117 }
2118
2119 /**
2120  * Emits an instruction before @inst to load the value named by @orig_src
2121  * from the pull constant buffer (surface) at @base_offset to @temp.
2122  */
2123 void
2124 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2125                                       dst_reg temp, src_reg orig_src,
2126                                       int base_offset)
2127 {
2128    int reg_offset = base_offset + orig_src.reg_offset;
2129    src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2130    vec4_instruction *load;
2131
2132    load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2133                                         temp, index);
2134    load->annotation = inst->annotation;
2135    load->ir = inst->ir;
2136    load->base_mrf = 14;
2137    load->mlen = 1;
2138    inst->insert_before(load);
2139 }
2140
2141 /**
2142  * Implements array access of uniforms by inserting a
2143  * PULL_CONSTANT_LOAD instruction.
2144  *
2145  * Unlike temporary GRF array access (where we don't support it due to
2146  * the difficulty of doing relative addressing on instruction
2147  * destinations), we could potentially do array access of uniforms
2148  * that were loaded in GRF space as push constants.  In real-world
2149  * usage we've seen, though, the arrays being used are always larger
2150  * than we could load as push constants, so just always move all
2151  * uniform array access out to a pull constant buffer.
2152  */
2153 void
2154 vec4_visitor::move_uniform_array_access_to_pull_constants()
2155 {
2156    int pull_constant_loc[this->uniforms];
2157
2158    for (int i = 0; i < this->uniforms; i++) {
2159       pull_constant_loc[i] = -1;
2160    }
2161
2162    /* Walk through and find array access of uniforms.  Put a copy of that
2163     * uniform in the pull constant buffer.
2164     *
2165     * Note that we don't move constant-indexed accesses to arrays.  No
2166     * testing has been done of the performance impact of this choice.
2167     */
2168    foreach_list_safe(node, &this->instructions) {
2169       vec4_instruction *inst = (vec4_instruction *)node;
2170
2171       for (int i = 0 ; i < 3; i++) {
2172          if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2173             continue;
2174
2175          int uniform = inst->src[i].reg;
2176
2177          /* If this array isn't already present in the pull constant buffer,
2178           * add it.
2179           */
2180          if (pull_constant_loc[uniform] == -1) {
2181             const float **values = &prog_data->param[uniform * 4];
2182
2183             pull_constant_loc[uniform] = prog_data->nr_pull_params;
2184
2185             for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2186                prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2187             }
2188          }
2189
2190          /* Set up the annotation tracking for new generated instructions. */
2191          base_ir = inst->ir;
2192          current_annotation = inst->annotation;
2193
2194          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2195
2196          emit_pull_constant_load(inst, temp, inst->src[i],
2197                                  pull_constant_loc[uniform]);
2198
2199          inst->src[i].file = temp.file;
2200          inst->src[i].reg = temp.reg;
2201          inst->src[i].reg_offset = temp.reg_offset;
2202          inst->src[i].reladdr = NULL;
2203       }
2204    }
2205 }
2206
2207 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2208                            struct gl_shader_program *prog,
2209                            struct brw_shader *shader)
2210 {
2211    this->c = c;
2212    this->p = &c->func;
2213    this->brw = p->brw;
2214    this->intel = &brw->intel;
2215    this->ctx = &intel->ctx;
2216    this->prog = prog;
2217    this->shader = shader;
2218
2219    this->mem_ctx = ralloc_context(NULL);
2220    this->failed = false;
2221
2222    this->base_ir = NULL;
2223    this->current_annotation = NULL;
2224
2225    this->c = c;
2226    this->vp = prog->VertexProgram;
2227    this->prog_data = &c->prog_data;
2228
2229    this->variable_ht = hash_table_ctor(0,
2230                                        hash_table_pointer_hash,
2231                                        hash_table_pointer_compare);
2232
2233    this->virtual_grf_def = NULL;
2234    this->virtual_grf_use = NULL;
2235    this->virtual_grf_sizes = NULL;
2236    this->virtual_grf_count = 0;
2237    this->virtual_grf_array_size = 0;
2238    this->live_intervals_valid = false;
2239
2240    this->uniforms = 0;
2241
2242    this->variable_ht = hash_table_ctor(0,
2243                                        hash_table_pointer_hash,
2244                                        hash_table_pointer_compare);
2245 }
2246
2247 vec4_visitor::~vec4_visitor()
2248 {
2249    ralloc_free(this->mem_ctx);
2250    hash_table_dtor(this->variable_ht);
2251 }
2252
2253
2254 void
2255 vec4_visitor::fail(const char *format, ...)
2256 {
2257    va_list va;
2258    char *msg;
2259
2260    if (failed)
2261       return;
2262
2263    failed = true;
2264
2265    va_start(va, format);
2266    msg = ralloc_vasprintf(mem_ctx, format, va);
2267    va_end(va);
2268    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2269
2270    this->fail_msg = msg;
2271
2272    if (INTEL_DEBUG & DEBUG_VS) {
2273       fprintf(stderr, "%s",  msg);
2274    }
2275 }
2276
2277 } /* namespace brw */