src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 }
  29
  30 namespace brw {
  31
  32 src_reg::src_reg(dst_reg reg)
  33 {
  34    init();
  35
  36    this->file = reg.file;
  37    this->reg = reg.reg;
  38    this->reg_offset = reg.reg_offset;
  39    this->type = reg.type;
  40    this->reladdr = reg.reladdr;
  41    this->fixed_hw_reg = reg.fixed_hw_reg;
  42
  43    int swizzles[4];
  44    int next_chan = 0;
  45    int last = 0;
  46
  47    for (int i = 0; i < 4; i++) {
  48       if (!(reg.writemask & (1 << i)))
  49          continue;
  50
  51       swizzles[next_chan++] = last = i;
  52    }
  53
  54    for (; next_chan < 4; next_chan++) {
  55       swizzles[next_chan] = last;
  56    }
  57
  58    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  59                                 swizzles[2], swizzles[3]);
  60 }
  61
  62 dst_reg::dst_reg(src_reg reg)
  63 {
  64    init();
  65
  66    this->file = reg.file;
  67    this->reg = reg.reg;
  68    this->reg_offset = reg.reg_offset;
  69    this->type = reg.type;
  70    this->writemask = WRITEMASK_XYZW;
  71    this->reladdr = reg.reladdr;
  72    this->fixed_hw_reg = reg.fixed_hw_reg;
  73 }
  74
  75 vec4_instruction *
  76 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  77                    src_reg src0, src_reg src1, src_reg src2)
  78 {
  79    vec4_instruction *inst = new(mem_ctx) vec4_instruction();
  80
  81    inst->opcode = opcode;
  82    inst->dst = dst;
  83    inst->src[0] = src0;
  84    inst->src[1] = src1;
  85    inst->src[2] = src2;
  86    inst->ir = this->base_ir;
  87    inst->annotation = this->current_annotation;
  88
  89    this->instructions.push_tail(inst);
  90
  91    return inst;
  92 }
  93
  94
  95 vec4_instruction *
  96 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  97 {
  98    return emit(opcode, dst, src0, src1, src_reg());
  99 }
 100
 101 vec4_instruction *
 102 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 103 {
 104    assert(dst.writemask != 0);
 105    return emit(opcode, dst, src0, src_reg(), src_reg());
 106 }
 107
 108 vec4_instruction *
 109 vec4_visitor::emit(enum opcode opcode)
 110 {
 111    return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
 112 }
 113
 114 void
 115 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 116 {
 117    static enum opcode dot_opcodes[] = {
 118       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 119    };
 120
 121    emit(dot_opcodes[elements - 2], dst, src0, src1);
 122 }
 123
 124 void
 125 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 126 {
 127    /* The gen6 math instruction ignores the source modifiers --
 128     * swizzle, abs, negate, and at least some parts of the register
 129     * region description.
 130     */
 131    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 132    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 133
 134    if (dst.writemask != WRITEMASK_XYZW) {
 135       /* The gen6 math instruction must be align1, so we can't do
 136        * writemasks.
 137        */
 138       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 139
 140       emit(opcode, temp_dst, temp_src);
 141
 142       emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
 143    } else {
 144       emit(opcode, dst, temp_src);
 145    }
 146 }
 147
 148 void
 149 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 150 {
 151    vec4_instruction *inst = emit(opcode, dst, src);
 152    inst->base_mrf = 1;
 153    inst->mlen = 1;
 154 }
 155
 156 void
 157 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 158 {
 159    switch (opcode) {
 160    case SHADER_OPCODE_RCP:
 161    case SHADER_OPCODE_RSQ:
 162    case SHADER_OPCODE_SQRT:
 163    case SHADER_OPCODE_EXP2:
 164    case SHADER_OPCODE_LOG2:
 165    case SHADER_OPCODE_SIN:
 166    case SHADER_OPCODE_COS:
 167       break;
 168    default:
 169       assert(!"not reached: bad math opcode");
 170       return;
 171    }
 172
 173    if (intel->gen >= 6) {
 174       return emit_math1_gen6(opcode, dst, src);
 175    } else {
 176       return emit_math1_gen4(opcode, dst, src);
 177    }
 178 }
 179
 180 void
 181 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 182                               dst_reg dst, src_reg src0, src_reg src1)
 183 {
 184    src_reg expanded;
 185
 186    /* The gen6 math instruction ignores the source modifiers --
 187     * swizzle, abs, negate, and at least some parts of the register
 188     * region description.  Move the sources to temporaries to make it
 189     * generally work.
 190     */
 191
 192    expanded = src_reg(this, glsl_type::vec4_type);
 193    emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
 194    src0 = expanded;
 195
 196    expanded = src_reg(this, glsl_type::vec4_type);
 197    emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
 198    src1 = expanded;
 199
 200    if (dst.writemask != WRITEMASK_XYZW) {
 201       /* The gen6 math instruction must be align1, so we can't do
 202        * writemasks.
 203        */
 204       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 205
 206       emit(opcode, temp_dst, src0, src1);
 207
 208       emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
 209    } else {
 210       emit(opcode, dst, src0, src1);
 211    }
 212 }
 213
 214 void
 215 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 216                               dst_reg dst, src_reg src0, src_reg src1)
 217 {
 218    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 219    inst->base_mrf = 1;
 220    inst->mlen = 2;
 221 }
 222
 223 void
 224 vec4_visitor::emit_math(enum opcode opcode,
 225                         dst_reg dst, src_reg src0, src_reg src1)
 226 {
 227    assert(opcode == SHADER_OPCODE_POW);
 228
 229    if (intel->gen >= 6) {
 230       return emit_math2_gen6(opcode, dst, src0, src1);
 231    } else {
 232       return emit_math2_gen4(opcode, dst, src0, src1);
 233    }
 234 }
 235
 236 void
 237 vec4_visitor::visit_instructions(const exec_list *list)
 238 {
 239    foreach_list(node, list) {
 240       ir_instruction *ir = (ir_instruction *)node;
 241
 242       base_ir = ir;
 243       ir->accept(this);
 244    }
 245 }
 246
 247
 248 static int
 249 type_size(const struct glsl_type *type)
 250 {
 251    unsigned int i;
 252    int size;
 253
 254    switch (type->base_type) {
 255    case GLSL_TYPE_UINT:
 256    case GLSL_TYPE_INT:
 257    case GLSL_TYPE_FLOAT:
 258    case GLSL_TYPE_BOOL:
 259       if (type->is_matrix()) {
 260          return type->matrix_columns;
 261       } else {
 262          /* Regardless of size of vector, it gets a vec4. This is bad
 263           * packing for things like floats, but otherwise arrays become a
 264           * mess.  Hopefully a later pass over the code can pack scalars
 265           * down if appropriate.
 266           */
 267          return 1;
 268       }
 269    case GLSL_TYPE_ARRAY:
 270       assert(type->length > 0);
 271       return type_size(type->fields.array) * type->length;
 272    case GLSL_TYPE_STRUCT:
 273       size = 0;
 274       for (i = 0; i < type->length; i++) {
 275          size += type_size(type->fields.structure[i].type);
 276       }
 277       return size;
 278    case GLSL_TYPE_SAMPLER:
 279       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 280        * at link time.
 281        */
 282       return 1;
 283    default:
 284       assert(0);
 285       return 0;
 286    }
 287 }
 288
 289 int
 290 vec4_visitor::virtual_grf_alloc(int size)
 291 {
 292    if (virtual_grf_array_size <= virtual_grf_count) {
 293       if (virtual_grf_array_size == 0)
 294          virtual_grf_array_size = 16;
 295       else
 296          virtual_grf_array_size *= 2;
 297       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 298                                    virtual_grf_array_size);
 299    }
 300    virtual_grf_sizes[virtual_grf_count] = size;
 301    return virtual_grf_count++;
 302 }
 303
 304 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 305 {
 306    init();
 307
 308    this->file = GRF;
 309    this->reg = v->virtual_grf_alloc(type_size(type));
 310
 311    if (type->is_array() || type->is_record()) {
 312       this->swizzle = BRW_SWIZZLE_NOOP;
 313    } else {
 314       this->swizzle = swizzle_for_size(type->vector_elements);
 315    }
 316
 317    this->type = brw_type_for_base_type(type);
 318 }
 319
 320 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 321 {
 322    init();
 323
 324    this->file = GRF;
 325    this->reg = v->virtual_grf_alloc(type_size(type));
 326
 327    if (type->is_array() || type->is_record()) {
 328       this->writemask = WRITEMASK_XYZW;
 329    } else {
 330       this->writemask = (1 << type->vector_elements) - 1;
 331    }
 332
 333    this->type = brw_type_for_base_type(type);
 334 }
 335
 336 /* Our support for uniforms is piggy-backed on the struct
 337  * gl_fragment_program, because that's where the values actually
 338  * get stored, rather than in some global gl_shader_program uniform
 339  * store.
 340  */
 341 int
 342 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 343 {
 344    unsigned int offset = 0;
 345    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 346
 347    if (type->is_matrix()) {
 348       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 349                                                         type->vector_elements,
 350                                                         1);
 351
 352       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 353          offset += setup_uniform_values(loc + offset, column);
 354       }
 355
 356       return offset;
 357    }
 358
 359    switch (type->base_type) {
 360    case GLSL_TYPE_FLOAT:
 361    case GLSL_TYPE_UINT:
 362    case GLSL_TYPE_INT:
 363    case GLSL_TYPE_BOOL:
 364       for (unsigned int i = 0; i < type->vector_elements; i++) {
 365          int slot = this->uniforms * 4 + i;
 366          switch (type->base_type) {
 367          case GLSL_TYPE_FLOAT:
 368             c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
 369             break;
 370          case GLSL_TYPE_UINT:
 371             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
 372             break;
 373          case GLSL_TYPE_INT:
 374             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
 375             break;
 376          case GLSL_TYPE_BOOL:
 377             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
 378             break;
 379          default:
 380             assert(!"not reached");
 381             c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
 382             break;
 383          }
 384          c->prog_data.param[slot] = &values[i];
 385       }
 386
 387       for (unsigned int i = type->vector_elements; i < 4; i++) {
 388          c->prog_data.param_convert[this->uniforms * 4 + i] =
 389             PARAM_CONVERT_ZERO;
 390          c->prog_data.param[this->uniforms * 4 + i] = NULL;
 391       }
 392
 393       this->uniform_size[this->uniforms] = type->vector_elements;
 394       this->uniforms++;
 395
 396       return 1;
 397
 398    case GLSL_TYPE_STRUCT:
 399       for (unsigned int i = 0; i < type->length; i++) {
 400          offset += setup_uniform_values(loc + offset,
 401                                         type->fields.structure[i].type);
 402       }
 403       return offset;
 404
 405    case GLSL_TYPE_ARRAY:
 406       for (unsigned int i = 0; i < type->length; i++) {
 407          offset += setup_uniform_values(loc + offset, type->fields.array);
 408       }
 409       return offset;
 410
 411    case GLSL_TYPE_SAMPLER:
 412       /* The sampler takes up a slot, but we don't use any values from it. */
 413       return 1;
 414
 415    default:
 416       assert(!"not reached");
 417       return 0;
 418    }
 419 }
 420
 421 /* Our support for builtin uniforms is even scarier than non-builtin.
 422  * It sits on top of the PROG_STATE_VAR parameters that are
 423  * automatically updated from GL context state.
 424  */
 425 void
 426 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 427 {
 428    const ir_state_slot *const slots = ir->state_slots;
 429    assert(ir->state_slots != NULL);
 430
 431    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 432       /* This state reference has already been setup by ir_to_mesa,
 433        * but we'll get the same index back here.  We can reference
 434        * ParameterValues directly, since unlike brw_fs.cpp, we never
 435        * add new state references during compile.
 436        */
 437       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 438                                             (gl_state_index *)slots[i].tokens);
 439       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 440
 441       this->uniform_size[this->uniforms] = 0;
 442       /* Add each of the unique swizzled channels of the element.
 443        * This will end up matching the size of the glsl_type of this field.
 444        */
 445       int last_swiz = -1;
 446       for (unsigned int j = 0; j < 4; j++) {
 447          int swiz = GET_SWZ(slots[i].swizzle, j);
 448          last_swiz = swiz;
 449
 450          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 451          c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
 452          if (swiz <= last_swiz)
 453             this->uniform_size[this->uniforms]++;
 454       }
 455       this->uniforms++;
 456    }
 457 }
 458
 459 dst_reg *
 460 vec4_visitor::variable_storage(ir_variable *var)
 461 {
 462    return (dst_reg *)hash_table_find(this->variable_ht, var);
 463 }
 464
 465 void
 466 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 467 {
 468    ir_expression *expr = ir->as_expression();
 469
 470    if (expr) {
 471       src_reg op[2];
 472       vec4_instruction *inst;
 473
 474       assert(expr->get_num_operands() <= 2);
 475       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 476          assert(expr->operands[i]->type->is_scalar());
 477
 478          expr->operands[i]->accept(this);
 479          op[i] = this->result;
 480       }
 481
 482       switch (expr->operation) {
 483       case ir_unop_logic_not:
 484          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
 485          inst->conditional_mod = BRW_CONDITIONAL_Z;
 486          break;
 487
 488       case ir_binop_logic_xor:
 489          inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
 490          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 491          break;
 492
 493       case ir_binop_logic_or:
 494          inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
 495          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 496          break;
 497
 498       case ir_binop_logic_and:
 499          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
 500          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 501          break;
 502
 503       case ir_unop_f2b:
 504          if (intel->gen >= 6) {
 505             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
 506          } else {
 507             inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
 508          }
 509          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 510          break;
 511
 512       case ir_unop_i2b:
 513          if (intel->gen >= 6) {
 514             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 515          } else {
 516             inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
 517          }
 518          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 519          break;
 520
 521       case ir_binop_greater:
 522       case ir_binop_gequal:
 523       case ir_binop_less:
 524       case ir_binop_lequal:
 525       case ir_binop_equal:
 526       case ir_binop_all_equal:
 527       case ir_binop_nequal:
 528       case ir_binop_any_nequal:
 529          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 530          inst->conditional_mod =
 531             brw_conditional_for_comparison(expr->operation);
 532          break;
 533
 534       default:
 535          assert(!"not reached");
 536          break;
 537       }
 538       return;
 539    }
 540
 541    ir->accept(this);
 542
 543    if (intel->gen >= 6) {
 544       vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
 545                                this->result, src_reg(1));
 546       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 547    } else {
 548       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
 549       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 550    }
 551 }
 552
 553 /**
 554  * Emit a gen6 IF statement with the comparison folded into the IF
 555  * instruction.
 556  */
 557 void
 558 vec4_visitor::emit_if_gen6(ir_if *ir)
 559 {
 560    ir_expression *expr = ir->condition->as_expression();
 561
 562    if (expr) {
 563       src_reg op[2];
 564       vec4_instruction *inst;
 565       dst_reg temp;
 566
 567       assert(expr->get_num_operands() <= 2);
 568       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 569          expr->operands[i]->accept(this);
 570          op[i] = this->result;
 571       }
 572
 573       switch (expr->operation) {
 574       case ir_unop_logic_not:
 575          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 576          inst->conditional_mod = BRW_CONDITIONAL_Z;
 577          return;
 578
 579       case ir_binop_logic_xor:
 580          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 581          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 582          return;
 583
 584       case ir_binop_logic_or:
 585          temp = dst_reg(this, glsl_type::bool_type);
 586          emit(BRW_OPCODE_OR, temp, op[0], op[1]);
 587          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 588          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 589          return;
 590
 591       case ir_binop_logic_and:
 592          temp = dst_reg(this, glsl_type::bool_type);
 593          emit(BRW_OPCODE_AND, temp, op[0], op[1]);
 594          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 595          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 596          return;
 597
 598       case ir_unop_f2b:
 599          inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
 600          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 601          return;
 602
 603       case ir_unop_i2b:
 604          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 605          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 606          return;
 607
 608       case ir_binop_greater:
 609       case ir_binop_gequal:
 610       case ir_binop_less:
 611       case ir_binop_lequal:
 612       case ir_binop_equal:
 613       case ir_binop_nequal:
 614          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 615          inst->conditional_mod =
 616             brw_conditional_for_comparison(expr->operation);
 617          return;
 618
 619       case ir_binop_all_equal:
 620          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 621          inst->conditional_mod = BRW_CONDITIONAL_Z;
 622
 623          inst = emit(BRW_OPCODE_IF);
 624          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 625          return;
 626
 627       case ir_binop_any_nequal:
 628          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 629          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 630
 631          inst = emit(BRW_OPCODE_IF);
 632          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 633          return;
 634
 635       case ir_unop_any:
 636          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 637          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 638
 639          inst = emit(BRW_OPCODE_IF);
 640          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 641          return;
 642
 643       default:
 644          assert(!"not reached");
 645          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 646          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 647          return;
 648       }
 649       return;
 650    }
 651
 652    ir->condition->accept(this);
 653
 654    vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
 655                             this->result, src_reg(0));
 656    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 657 }
 658
 659 void
 660 vec4_visitor::visit(ir_variable *ir)
 661 {
 662    dst_reg *reg = NULL;
 663
 664    if (variable_storage(ir))
 665       return;
 666
 667    switch (ir->mode) {
 668    case ir_var_in:
 669       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 670       break;
 671
 672    case ir_var_out:
 673       reg = new(mem_ctx) dst_reg(this, ir->type);
 674
 675       for (int i = 0; i < type_size(ir->type); i++) {
 676          output_reg[ir->location + i] = *reg;
 677          output_reg[ir->location + i].reg_offset = i;
 678          output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
 679       }
 680       break;
 681
 682    case ir_var_auto:
 683    case ir_var_temporary:
 684       reg = new(mem_ctx) dst_reg(this, ir->type);
 685       break;
 686
 687    case ir_var_uniform:
 688       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 689
 690       if (!strncmp(ir->name, "gl_", 3)) {
 691          setup_builtin_uniform_values(ir);
 692       } else {
 693          setup_uniform_values(ir->location, ir->type);
 694       }
 695       break;
 696
 697    default:
 698       assert(!"not reached");
 699    }
 700
 701    reg->type = brw_type_for_base_type(ir->type);
 702    hash_table_insert(this->variable_ht, reg, ir);
 703 }
 704
 705 void
 706 vec4_visitor::visit(ir_loop *ir)
 707 {
 708    dst_reg counter;
 709
 710    /* We don't want debugging output to print the whole body of the
 711     * loop as the annotation.
 712     */
 713    this->base_ir = NULL;
 714
 715    if (ir->counter != NULL) {
 716       this->base_ir = ir->counter;
 717       ir->counter->accept(this);
 718       counter = *(variable_storage(ir->counter));
 719
 720       if (ir->from != NULL) {
 721          this->base_ir = ir->from;
 722          ir->from->accept(this);
 723
 724          emit(BRW_OPCODE_MOV, counter, this->result);
 725       }
 726    }
 727
 728    emit(BRW_OPCODE_DO);
 729
 730    if (ir->to) {
 731       this->base_ir = ir->to;
 732       ir->to->accept(this);
 733
 734       vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
 735                                     src_reg(counter), this->result);
 736       inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
 737
 738       inst = emit(BRW_OPCODE_BREAK);
 739       inst->predicate = BRW_PREDICATE_NORMAL;
 740    }
 741
 742    visit_instructions(&ir->body_instructions);
 743
 744
 745    if (ir->increment) {
 746       this->base_ir = ir->increment;
 747       ir->increment->accept(this);
 748       emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
 749    }
 750
 751    emit(BRW_OPCODE_WHILE);
 752 }
 753
 754 void
 755 vec4_visitor::visit(ir_loop_jump *ir)
 756 {
 757    switch (ir->mode) {
 758    case ir_loop_jump::jump_break:
 759       emit(BRW_OPCODE_BREAK);
 760       break;
 761    case ir_loop_jump::jump_continue:
 762       emit(BRW_OPCODE_CONTINUE);
 763       break;
 764    }
 765 }
 766
 767
 768 void
 769 vec4_visitor::visit(ir_function_signature *ir)
 770 {
 771    assert(0);
 772    (void)ir;
 773 }
 774
 775 void
 776 vec4_visitor::visit(ir_function *ir)
 777 {
 778    /* Ignore function bodies other than main() -- we shouldn't see calls to
 779     * them since they should all be inlined.
 780     */
 781    if (strcmp(ir->name, "main") == 0) {
 782       const ir_function_signature *sig;
 783       exec_list empty;
 784
 785       sig = ir->matching_signature(&empty);
 786
 787       assert(sig);
 788
 789       visit_instructions(&sig->body);
 790    }
 791 }
 792
 793 GLboolean
 794 vec4_visitor::try_emit_sat(ir_expression *ir)
 795 {
 796    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 797    if (!sat_src)
 798       return false;
 799
 800    sat_src->accept(this);
 801    src_reg src = this->result;
 802
 803    this->result = src_reg(this, ir->type);
 804    vec4_instruction *inst;
 805    inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
 806    inst->saturate = true;
 807
 808    return true;
 809 }
 810
 811 void
 812 vec4_visitor::emit_bool_comparison(unsigned int op,
 813                                  dst_reg dst, src_reg src0, src_reg src1)
 814 {
 815    /* original gen4 does destination conversion before comparison. */
 816    if (intel->gen < 5)
 817       dst.type = src0.type;
 818
 819    vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
 820    inst->conditional_mod = brw_conditional_for_comparison(op);
 821
 822    dst.type = BRW_REGISTER_TYPE_D;
 823    emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
 824 }
 825
 826 void
 827 vec4_visitor::visit(ir_expression *ir)
 828 {
 829    unsigned int operand;
 830    src_reg op[Elements(ir->operands)];
 831    src_reg result_src;
 832    dst_reg result_dst;
 833    vec4_instruction *inst;
 834
 835    if (try_emit_sat(ir))
 836       return;
 837
 838    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 839       this->result.file = BAD_FILE;
 840       ir->operands[operand]->accept(this);
 841       if (this->result.file == BAD_FILE) {
 842          printf("Failed to get tree for expression operand:\n");
 843          ir->operands[operand]->print();
 844          exit(1);
 845       }
 846       op[operand] = this->result;
 847
 848       /* Matrix expression operands should have been broken down to vector
 849        * operations already.
 850        */
 851       assert(!ir->operands[operand]->type->is_matrix());
 852    }
 853
 854    int vector_elements = ir->operands[0]->type->vector_elements;
 855    if (ir->operands[1]) {
 856       vector_elements = MAX2(vector_elements,
 857                              ir->operands[1]->type->vector_elements);
 858    }
 859
 860    this->result.file = BAD_FILE;
 861
 862    /* Storage for our result.  Ideally for an assignment we'd be using
 863     * the actual storage for the result here, instead.
 864     */
 865    result_src = src_reg(this, ir->type);
 866    /* convenience for the emit functions below. */
 867    result_dst = dst_reg(result_src);
 868    /* If nothing special happens, this is the result. */
 869    this->result = result_src;
 870    /* Limit writes to the channels that will be used by result_src later.
 871     * This does limit this temp's use as a temporary for multi-instruction
 872     * sequences.
 873     */
 874    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 875
 876    switch (ir->operation) {
 877    case ir_unop_logic_not:
 878       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 879        * ones complement of the whole register, not just bit 0.
 880        */
 881       emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
 882       break;
 883    case ir_unop_neg:
 884       op[0].negate = !op[0].negate;
 885       this->result = op[0];
 886       break;
 887    case ir_unop_abs:
 888       op[0].abs = true;
 889       op[0].negate = false;
 890       this->result = op[0];
 891       break;
 892
 893    case ir_unop_sign:
 894       emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
 895
 896       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 897       inst->conditional_mod = BRW_CONDITIONAL_G;
 898       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
 899       inst->predicate = BRW_PREDICATE_NORMAL;
 900
 901       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 902       inst->conditional_mod = BRW_CONDITIONAL_L;
 903       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
 904       inst->predicate = BRW_PREDICATE_NORMAL;
 905
 906       break;
 907
 908    case ir_unop_rcp:
 909       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
 910       break;
 911
 912    case ir_unop_exp2:
 913       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
 914       break;
 915    case ir_unop_log2:
 916       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
 917       break;
 918    case ir_unop_exp:
 919    case ir_unop_log:
 920       assert(!"not reached: should be handled by ir_explog_to_explog2");
 921       break;
 922    case ir_unop_sin:
 923    case ir_unop_sin_reduced:
 924       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
 925       break;
 926    case ir_unop_cos:
 927    case ir_unop_cos_reduced:
 928       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
 929       break;
 930
 931    case ir_unop_dFdx:
 932    case ir_unop_dFdy:
 933       assert(!"derivatives not valid in vertex shader");
 934       break;
 935
 936    case ir_unop_noise:
 937       assert(!"not reached: should be handled by lower_noise");
 938       break;
 939
 940    case ir_binop_add:
 941       emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
 942       break;
 943    case ir_binop_sub:
 944       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 945       break;
 946
 947    case ir_binop_mul:
 948       if (ir->type->is_integer()) {
 949          /* For integer multiplication, the MUL uses the low 16 bits
 950           * of one of the operands (src0 on gen6, src1 on gen7).  The
 951           * MACH accumulates in the contribution of the upper 16 bits
 952           * of that operand.
 953           *
 954           * FINISHME: Emit just the MUL if we know an operand is small
 955           * enough.
 956           */
 957          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
 958
 959          emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
 960          emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
 961          emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
 962       } else {
 963          emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
 964       }
 965       break;
 966    case ir_binop_div:
 967       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 968    case ir_binop_mod:
 969       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 970       break;
 971
 972    case ir_binop_less:
 973    case ir_binop_greater:
 974    case ir_binop_lequal:
 975    case ir_binop_gequal:
 976    case ir_binop_equal:
 977    case ir_binop_nequal: {
 978       dst_reg temp = result_dst;
 979       /* original gen4 does implicit conversion before comparison. */
 980       if (intel->gen < 5)
 981          temp.type = op[0].type;
 982
 983       inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 984       inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
 985       emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
 986       break;
 987    }
 988
 989    case ir_binop_all_equal:
 990       /* "==" operator producing a scalar boolean. */
 991       if (ir->operands[0]->type->is_vector() ||
 992           ir->operands[1]->type->is_vector()) {
 993          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 994          inst->conditional_mod = BRW_CONDITIONAL_Z;
 995
 996          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 997          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 998          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 999       } else {
1000          dst_reg temp = result_dst;
1001          /* original gen4 does implicit conversion before comparison. */
1002          if (intel->gen < 5)
1003             temp.type = op[0].type;
1004
1005          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1006          inst->conditional_mod = BRW_CONDITIONAL_Z;
1007          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1008       }
1009       break;
1010    case ir_binop_any_nequal:
1011       /* "!=" operator producing a scalar boolean. */
1012       if (ir->operands[0]->type->is_vector() ||
1013           ir->operands[1]->type->is_vector()) {
1014          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
1015          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1016
1017          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1018          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1019          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1020       } else {
1021          dst_reg temp = result_dst;
1022          /* original gen4 does implicit conversion before comparison. */
1023          if (intel->gen < 5)
1024             temp.type = op[0].type;
1025
1026          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1027          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1028          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1029       }
1030       break;
1031
1032    case ir_unop_any:
1033       inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1034       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1035
1036       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1037
1038       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1039       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1040       break;
1041
1042    case ir_binop_logic_xor:
1043       emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1044       break;
1045
1046    case ir_binop_logic_or:
1047       emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1048       break;
1049
1050    case ir_binop_logic_and:
1051       emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1052       break;
1053
1054    case ir_binop_dot:
1055       assert(ir->operands[0]->type->is_vector());
1056       assert(ir->operands[0]->type == ir->operands[1]->type);
1057       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1058       break;
1059
1060    case ir_unop_sqrt:
1061       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1062       break;
1063    case ir_unop_rsq:
1064       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1065       break;
1066    case ir_unop_i2f:
1067    case ir_unop_i2u:
1068    case ir_unop_u2i:
1069    case ir_unop_u2f:
1070    case ir_unop_b2f:
1071    case ir_unop_b2i:
1072    case ir_unop_f2i:
1073       emit(BRW_OPCODE_MOV, result_dst, op[0]);
1074       break;
1075    case ir_unop_f2b:
1076    case ir_unop_i2b: {
1077       dst_reg temp = result_dst;
1078       /* original gen4 does implicit conversion before comparison. */
1079       if (intel->gen < 5)
1080          temp.type = op[0].type;
1081
1082       inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1083       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1084       inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1085       break;
1086    }
1087
1088    case ir_unop_trunc:
1089       emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1090       break;
1091    case ir_unop_ceil:
1092       op[0].negate = !op[0].negate;
1093       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1094       this->result.negate = true;
1095       break;
1096    case ir_unop_floor:
1097       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1098       break;
1099    case ir_unop_fract:
1100       inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1101       break;
1102    case ir_unop_round_even:
1103       emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1104       break;
1105
1106    case ir_binop_min:
1107       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1108       inst->conditional_mod = BRW_CONDITIONAL_L;
1109
1110       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1111       inst->predicate = BRW_PREDICATE_NORMAL;
1112       break;
1113    case ir_binop_max:
1114       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1115       inst->conditional_mod = BRW_CONDITIONAL_G;
1116
1117       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1118       inst->predicate = BRW_PREDICATE_NORMAL;
1119       break;
1120
1121    case ir_binop_pow:
1122       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1123       break;
1124
1125    case ir_unop_bit_not:
1126       inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1127       break;
1128    case ir_binop_bit_and:
1129       inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1130       break;
1131    case ir_binop_bit_xor:
1132       inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1133       break;
1134    case ir_binop_bit_or:
1135       inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1136       break;
1137
1138    case ir_binop_lshift:
1139    case ir_binop_rshift:
1140       assert(!"GLSL 1.30 features unsupported");
1141       break;
1142
1143    case ir_quadop_vector:
1144       assert(!"not reached: should be handled by lower_quadop_vector");
1145       break;
1146    }
1147 }
1148
1149
1150 void
1151 vec4_visitor::visit(ir_swizzle *ir)
1152 {
1153    src_reg src;
1154    int i = 0;
1155    int swizzle[4];
1156
1157    /* Note that this is only swizzles in expressions, not those on the left
1158     * hand side of an assignment, which do write masking.  See ir_assignment
1159     * for that.
1160     */
1161
1162    ir->val->accept(this);
1163    src = this->result;
1164    assert(src.file != BAD_FILE);
1165
1166    for (i = 0; i < ir->type->vector_elements; i++) {
1167       switch (i) {
1168       case 0:
1169          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1170          break;
1171       case 1:
1172          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1173          break;
1174       case 2:
1175          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1176          break;
1177       case 3:
1178          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1179             break;
1180       }
1181    }
1182    for (; i < 4; i++) {
1183       /* Replicate the last channel out. */
1184       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1185    }
1186
1187    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1188
1189    this->result = src;
1190 }
1191
1192 void
1193 vec4_visitor::visit(ir_dereference_variable *ir)
1194 {
1195    const struct glsl_type *type = ir->type;
1196    dst_reg *reg = variable_storage(ir->var);
1197
1198    if (!reg) {
1199       fail("Failed to find variable storage for %s\n", ir->var->name);
1200       this->result = src_reg(brw_null_reg());
1201       return;
1202    }
1203
1204    this->result = src_reg(*reg);
1205
1206    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1207       this->result.swizzle = swizzle_for_size(type->vector_elements);
1208 }
1209
1210 void
1211 vec4_visitor::visit(ir_dereference_array *ir)
1212 {
1213    ir_constant *constant_index;
1214    src_reg src;
1215    int element_size = type_size(ir->type);
1216
1217    constant_index = ir->array_index->constant_expression_value();
1218
1219    ir->array->accept(this);
1220    src = this->result;
1221
1222    if (constant_index) {
1223       src.reg_offset += constant_index->value.i[0] * element_size;
1224    } else {
1225       /* Variable index array dereference.  It eats the "vec4" of the
1226        * base of the array and an index that offsets the Mesa register
1227        * index.
1228        */
1229       ir->array_index->accept(this);
1230
1231       src_reg index_reg;
1232
1233       if (element_size == 1) {
1234          index_reg = this->result;
1235       } else {
1236          index_reg = src_reg(this, glsl_type::int_type);
1237
1238          emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1239               this->result, src_reg(element_size));
1240       }
1241
1242       if (src.reladdr) {
1243          src_reg temp = src_reg(this, glsl_type::int_type);
1244
1245          emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1246
1247          index_reg = temp;
1248       }
1249
1250       src.reladdr = ralloc(mem_ctx, src_reg);
1251       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1252    }
1253
1254    /* If the type is smaller than a vec4, replicate the last channel out. */
1255    if (ir->type->is_scalar() || ir->type->is_vector())
1256       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1257    else
1258       src.swizzle = BRW_SWIZZLE_NOOP;
1259    src.type = brw_type_for_base_type(ir->type);
1260
1261    this->result = src;
1262 }
1263
1264 void
1265 vec4_visitor::visit(ir_dereference_record *ir)
1266 {
1267    unsigned int i;
1268    const glsl_type *struct_type = ir->record->type;
1269    int offset = 0;
1270
1271    ir->record->accept(this);
1272
1273    for (i = 0; i < struct_type->length; i++) {
1274       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1275          break;
1276       offset += type_size(struct_type->fields.structure[i].type);
1277    }
1278
1279    /* If the type is smaller than a vec4, replicate the last channel out. */
1280    if (ir->type->is_scalar() || ir->type->is_vector())
1281       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1282    else
1283       this->result.swizzle = BRW_SWIZZLE_NOOP;
1284    this->result.type = brw_type_for_base_type(ir->type);
1285
1286    this->result.reg_offset += offset;
1287 }
1288
1289 /**
1290  * We want to be careful in assignment setup to hit the actual storage
1291  * instead of potentially using a temporary like we might with the
1292  * ir_dereference handler.
1293  */
1294 static dst_reg
1295 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1296 {
1297    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1298     * access of a vector, it must be separated into a series conditional moves
1299     * before reaching this point (see ir_vec_index_to_cond_assign).
1300     */
1301    assert(ir->as_dereference());
1302    ir_dereference_array *deref_array = ir->as_dereference_array();
1303    if (deref_array) {
1304       assert(!deref_array->array->type->is_vector());
1305    }
1306
1307    /* Use the rvalue deref handler for the most part.  We'll ignore
1308     * swizzles in it and write swizzles using writemask, though.
1309     */
1310    ir->accept(v);
1311    return dst_reg(v->result);
1312 }
1313
1314 void
1315 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1316                               const struct glsl_type *type, bool predicated)
1317 {
1318    if (type->base_type == GLSL_TYPE_STRUCT) {
1319       for (unsigned int i = 0; i < type->length; i++) {
1320          emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1321       }
1322       return;
1323    }
1324
1325    if (type->is_array()) {
1326       for (unsigned int i = 0; i < type->length; i++) {
1327          emit_block_move(dst, src, type->fields.array, predicated);
1328       }
1329       return;
1330    }
1331
1332    if (type->is_matrix()) {
1333       const struct glsl_type *vec_type;
1334
1335       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1336                                          type->vector_elements, 1);
1337
1338       for (int i = 0; i < type->matrix_columns; i++) {
1339          emit_block_move(dst, src, vec_type, predicated);
1340       }
1341       return;
1342    }
1343
1344    assert(type->is_scalar() || type->is_vector());
1345
1346    dst->type = brw_type_for_base_type(type);
1347    src->type = dst->type;
1348
1349    dst->writemask = (1 << type->vector_elements) - 1;
1350
1351    /* Do we need to worry about swizzling a swizzle? */
1352    assert(src->swizzle = BRW_SWIZZLE_NOOP);
1353    src->swizzle = swizzle_for_size(type->vector_elements);
1354
1355    vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1356    if (predicated)
1357       inst->predicate = BRW_PREDICATE_NORMAL;
1358
1359    dst->reg_offset++;
1360    src->reg_offset++;
1361 }
1362
1363
1364 /* If the RHS processing resulted in an instruction generating a
1365  * temporary value, and it would be easy to rewrite the instruction to
1366  * generate its result right into the LHS instead, do so.  This ends
1367  * up reliably removing instructions where it can be tricky to do so
1368  * later without real UD chain information.
1369  */
1370 bool
1371 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1372                                      dst_reg dst,
1373                                      src_reg src,
1374                                      vec4_instruction *pre_rhs_inst,
1375                                      vec4_instruction *last_rhs_inst)
1376 {
1377    /* This could be supported, but it would take more smarts. */
1378    if (ir->condition)
1379       return false;
1380
1381    if (pre_rhs_inst == last_rhs_inst)
1382       return false; /* No instructions generated to work with. */
1383
1384    /* Make sure the last instruction generated our source reg. */
1385    if (src.file != GRF ||
1386        src.file != last_rhs_inst->dst.file ||
1387        src.reg != last_rhs_inst->dst.reg ||
1388        src.reg_offset != last_rhs_inst->dst.reg_offset ||
1389        src.reladdr ||
1390        src.abs ||
1391        src.negate ||
1392        last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1393       return false;
1394
1395    /* Check that that last instruction fully initialized the channels
1396     * we want to use, in the order we want to use them.  We could
1397     * potentially reswizzle the operands of many instructions so that
1398     * we could handle out of order channels, but don't yet.
1399     */
1400    for (int i = 0; i < 4; i++) {
1401       if (dst.writemask & (1 << i)) {
1402          if (!(last_rhs_inst->dst.writemask & (1 << i)))
1403             return false;
1404
1405          if (BRW_GET_SWZ(src.swizzle, i) != i)
1406             return false;
1407       }
1408    }
1409
1410    /* Success!  Rewrite the instruction. */
1411    last_rhs_inst->dst.file = dst.file;
1412    last_rhs_inst->dst.reg = dst.reg;
1413    last_rhs_inst->dst.reg_offset = dst.reg_offset;
1414    last_rhs_inst->dst.reladdr = dst.reladdr;
1415    last_rhs_inst->dst.writemask &= dst.writemask;
1416
1417    return true;
1418 }
1419
1420 void
1421 vec4_visitor::visit(ir_assignment *ir)
1422 {
1423    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1424
1425    if (!ir->lhs->type->is_scalar() &&
1426        !ir->lhs->type->is_vector()) {
1427       ir->rhs->accept(this);
1428       src_reg src = this->result;
1429
1430       if (ir->condition) {
1431          emit_bool_to_cond_code(ir->condition);
1432       }
1433
1434       emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1435       return;
1436    }
1437
1438    /* Now we're down to just a scalar/vector with writemasks. */
1439    int i;
1440
1441    vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1442    pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1443
1444    ir->rhs->accept(this);
1445
1446    last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1447
1448    src_reg src = this->result;
1449
1450    int swizzles[4];
1451    int first_enabled_chan = 0;
1452    int src_chan = 0;
1453
1454    assert(ir->lhs->type->is_vector() ||
1455           ir->lhs->type->is_scalar());
1456    dst.writemask = ir->write_mask;
1457
1458    for (int i = 0; i < 4; i++) {
1459       if (dst.writemask & (1 << i)) {
1460          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1461          break;
1462       }
1463    }
1464
1465    /* Swizzle a small RHS vector into the channels being written.
1466     *
1467     * glsl ir treats write_mask as dictating how many channels are
1468     * present on the RHS while in our instructions we need to make
1469     * those channels appear in the slots of the vec4 they're written to.
1470     */
1471    for (int i = 0; i < 4; i++) {
1472       if (dst.writemask & (1 << i))
1473          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1474       else
1475          swizzles[i] = first_enabled_chan;
1476    }
1477    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1478                               swizzles[2], swizzles[3]);
1479
1480    if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1481       return;
1482    }
1483
1484    if (ir->condition) {
1485       emit_bool_to_cond_code(ir->condition);
1486    }
1487
1488    for (i = 0; i < type_size(ir->lhs->type); i++) {
1489       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1490
1491       if (ir->condition)
1492          inst->predicate = BRW_PREDICATE_NORMAL;
1493
1494       dst.reg_offset++;
1495       src.reg_offset++;
1496    }
1497 }
1498
1499 void
1500 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1501 {
1502    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1503       foreach_list(node, &ir->components) {
1504          ir_constant *field_value = (ir_constant *)node;
1505
1506          emit_constant_values(dst, field_value);
1507       }
1508       return;
1509    }
1510
1511    if (ir->type->is_array()) {
1512       for (unsigned int i = 0; i < ir->type->length; i++) {
1513          emit_constant_values(dst, ir->array_elements[i]);
1514       }
1515       return;
1516    }
1517
1518    if (ir->type->is_matrix()) {
1519       for (int i = 0; i < ir->type->matrix_columns; i++) {
1520          for (int j = 0; j < ir->type->vector_elements; j++) {
1521             dst->writemask = 1 << j;
1522             dst->type = BRW_REGISTER_TYPE_F;
1523
1524             emit(BRW_OPCODE_MOV, *dst,
1525                  src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1526          }
1527          dst->reg_offset++;
1528       }
1529       return;
1530    }
1531
1532    for (int i = 0; i < ir->type->vector_elements; i++) {
1533       dst->writemask = 1 << i;
1534       dst->type = brw_type_for_base_type(ir->type);
1535
1536       switch (ir->type->base_type) {
1537       case GLSL_TYPE_FLOAT:
1538          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1539          break;
1540       case GLSL_TYPE_INT:
1541          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1542          break;
1543       case GLSL_TYPE_UINT:
1544          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1545          break;
1546       case GLSL_TYPE_BOOL:
1547          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1548          break;
1549       default:
1550          assert(!"Non-float/uint/int/bool constant");
1551          break;
1552       }
1553    }
1554    dst->reg_offset++;
1555 }
1556
1557 void
1558 vec4_visitor::visit(ir_constant *ir)
1559 {
1560    dst_reg dst = dst_reg(this, ir->type);
1561    this->result = src_reg(dst);
1562
1563    emit_constant_values(&dst, ir);
1564 }
1565
1566 void
1567 vec4_visitor::visit(ir_call *ir)
1568 {
1569    assert(!"not reached");
1570 }
1571
1572 void
1573 vec4_visitor::visit(ir_texture *ir)
1574 {
1575    /* FINISHME: Implement vertex texturing.
1576     *
1577     * With 0 vertex samplers available, the linker will reject
1578     * programs that do vertex texturing, but after our visitor has
1579     * run.
1580     */
1581 }
1582
1583 void
1584 vec4_visitor::visit(ir_return *ir)
1585 {
1586    assert(!"not reached");
1587 }
1588
1589 void
1590 vec4_visitor::visit(ir_discard *ir)
1591 {
1592    assert(!"not reached");
1593 }
1594
1595 void
1596 vec4_visitor::visit(ir_if *ir)
1597 {
1598    /* Don't point the annotation at the if statement, because then it plus
1599     * the then and else blocks get printed.
1600     */
1601    this->base_ir = ir->condition;
1602
1603    if (intel->gen == 6) {
1604       emit_if_gen6(ir);
1605    } else {
1606       emit_bool_to_cond_code(ir->condition);
1607       vec4_instruction *inst = emit(BRW_OPCODE_IF);
1608       inst->predicate = BRW_PREDICATE_NORMAL;
1609    }
1610
1611    visit_instructions(&ir->then_instructions);
1612
1613    if (!ir->else_instructions.is_empty()) {
1614       this->base_ir = ir->condition;
1615       emit(BRW_OPCODE_ELSE);
1616
1617       visit_instructions(&ir->else_instructions);
1618    }
1619
1620    this->base_ir = ir->condition;
1621    emit(BRW_OPCODE_ENDIF);
1622 }
1623
1624 int
1625 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1626 {
1627    /* Get the position */
1628    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1629
1630    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1631    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1632
1633    current_annotation = "NDC";
1634    dst_reg ndc_w = ndc;
1635    ndc_w.writemask = WRITEMASK_W;
1636    src_reg pos_w = pos;
1637    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1638    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1639
1640    dst_reg ndc_xyz = ndc;
1641    ndc_xyz.writemask = WRITEMASK_XYZ;
1642
1643    emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1644
1645    if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1646        c->key.nr_userclip || brw->has_negative_rhw_bug) {
1647       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1648       GLuint i;
1649
1650       emit(BRW_OPCODE_MOV, header1, 0u);
1651
1652       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1653          assert(!"finishme: psiz");
1654          src_reg psiz;
1655
1656          header1.writemask = WRITEMASK_W;
1657          emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1658          emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1659       }
1660
1661       for (i = 0; i < c->key.nr_userclip; i++) {
1662          vec4_instruction *inst;
1663
1664          inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1665                      pos, src_reg(c->userplane[i]));
1666          inst->conditional_mod = BRW_CONDITIONAL_L;
1667
1668          emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1669          inst->predicate = BRW_PREDICATE_NORMAL;
1670       }
1671
1672       /* i965 clipping workaround:
1673        * 1) Test for -ve rhw
1674        * 2) If set,
1675        *      set ndc = (0,0,0,0)
1676        *      set ucp[6] = 1
1677        *
1678        * Later, clipping will detect ucp[6] and ensure the primitive is
1679        * clipped against all fixed planes.
1680        */
1681       if (brw->has_negative_rhw_bug) {
1682 #if 0
1683          /* FINISHME */
1684          brw_CMP(p,
1685                  vec8(brw_null_reg()),
1686                  BRW_CONDITIONAL_L,
1687                  brw_swizzle1(ndc, 3),
1688                  brw_imm_f(0));
1689
1690          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1691          brw_MOV(p, ndc, brw_imm_f(0));
1692          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1693 #endif
1694       }
1695
1696       header1.writemask = WRITEMASK_XYZW;
1697       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1698    } else {
1699       emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1700                                   BRW_REGISTER_TYPE_UD), 0u);
1701    }
1702
1703    if (intel->gen == 5) {
1704       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1705        * dword 0-3 (m1) of the header is indices, point width, clip flags.
1706        * dword 4-7 (m2) is the ndc position (set above)
1707        * dword 8-11 (m3) of the vertex header is the 4D space position
1708        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1709        * m6 is a pad so that the vertex element data is aligned
1710        * m7 is the first vertex data we fill.
1711        */
1712       current_annotation = "NDC";
1713       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1714
1715       current_annotation = "gl_Position";
1716       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1717
1718       /* user clip distance. */
1719       header_mrf += 2;
1720
1721       /* Pad so that vertex element data is aligned. */
1722       header_mrf++;
1723    } else {
1724       /* There are 8 dwords in VUE header pre-Ironlake:
1725        * dword 0-3 (m1) is indices, point width, clip flags.
1726        * dword 4-7 (m2) is ndc position (set above)
1727        *
1728        * dword 8-11 (m3) is the first vertex data.
1729        */
1730       current_annotation = "NDC";
1731       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1732
1733       current_annotation = "gl_Position";
1734       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1735    }
1736
1737    return header_mrf;
1738 }
1739
1740 int
1741 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1742 {
1743    struct brw_reg reg;
1744
1745    /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1746     * dword 0-3 (m2) of the header is indices, point width, clip flags.
1747     * dword 4-7 (m3) is the 4D space position
1748     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1749     * enabled.
1750     *
1751     * m4 or 6 is the first vertex element data we fill.
1752     */
1753
1754    current_annotation = "indices, point width, clip flags";
1755    reg = brw_message_reg(header_mrf++);
1756    emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1757    if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1758       emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1759            src_reg(output_reg[VERT_RESULT_PSIZ]));
1760    }
1761
1762    current_annotation = "gl_Position";
1763    emit(BRW_OPCODE_MOV,
1764         brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1765
1766    current_annotation = "user clip distances";
1767    if (c->key.nr_userclip) {
1768       for (int i = 0; i < c->key.nr_userclip; i++) {
1769          struct brw_reg m;
1770          if (i < 4)
1771             m = brw_message_reg(header_mrf);
1772          else
1773             m = brw_message_reg(header_mrf + 1);
1774
1775          emit(BRW_OPCODE_DP4,
1776               dst_reg(brw_writemask(m, 1 << (i & 3))),
1777               src_reg(c->userplane[i]));
1778       }
1779       header_mrf += 2;
1780    }
1781
1782    current_annotation = NULL;
1783
1784    return header_mrf;
1785 }
1786
1787 static int
1788 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1789 {
1790    struct intel_context *intel = &brw->intel;
1791
1792    if (intel->gen >= 6) {
1793       /* URB data written (does not include the message header reg) must
1794        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1795        * section 5.4.3.2.2: URB_INTERLEAVED.
1796        *
1797        * URB entries are allocated on a multiple of 1024 bits, so an
1798        * extra 128 bits written here to make the end align to 256 is
1799        * no problem.
1800        */
1801       if ((mlen % 2) != 1)
1802          mlen++;
1803    }
1804
1805    return mlen;
1806 }
1807
1808 /**
1809  * Generates the VUE payload plus the 1 or 2 URB write instructions to
1810  * complete the VS thread.
1811  *
1812  * The VUE layout is documented in Volume 2a.
1813  */
1814 void
1815 vec4_visitor::emit_urb_writes()
1816 {
1817    /* MRF 0 is reserved for the debugger, so start with message header
1818     * in MRF 1.
1819     */
1820    int base_mrf = 1;
1821    int mrf = base_mrf;
1822    int urb_entry_size;
1823    uint64_t outputs_remaining = c->prog_data.outputs_written;
1824    /* In the process of generating our URB write message contents, we
1825     * may need to unspill a register or load from an array.  Those
1826     * reads would use MRFs 14-15.
1827     */
1828    int max_usable_mrf = 13;
1829
1830    /* FINISHME: edgeflag */
1831
1832    /* First mrf is the g0-based message header containing URB handles and such,
1833     * which is implied in VS_OPCODE_URB_WRITE.
1834     */
1835    mrf++;
1836
1837    if (intel->gen >= 6) {
1838       mrf = emit_vue_header_gen6(mrf);
1839    } else {
1840       mrf = emit_vue_header_gen4(mrf);
1841    }
1842
1843    /* Set up the VUE data for the first URB write */
1844    int attr;
1845    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1846       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1847          continue;
1848
1849       outputs_remaining &= ~BITFIELD64_BIT(attr);
1850
1851       /* This is set up in the VUE header. */
1852       if (attr == VERT_RESULT_HPOS)
1853          continue;
1854
1855       /* This is loaded into the VUE header, and thus doesn't occupy
1856        * an attribute slot.
1857        */
1858       if (attr == VERT_RESULT_PSIZ)
1859          continue;
1860
1861       vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
1862                                     src_reg(output_reg[attr]));
1863
1864       if ((attr == VERT_RESULT_COL0 ||
1865            attr == VERT_RESULT_COL1 ||
1866            attr == VERT_RESULT_BFC0 ||
1867            attr == VERT_RESULT_BFC1) &&
1868           c->key.clamp_vertex_color) {
1869          inst->saturate = true;
1870       }
1871
1872       /* If this was MRF 15, we can't fit anything more into this URB
1873        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1874        * even-numbered amount of URB write data, which will meet
1875        * gen6's requirements for length alignment.
1876        */
1877       if (mrf > max_usable_mrf) {
1878          attr++;
1879          break;
1880       }
1881    }
1882
1883    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1884    inst->base_mrf = base_mrf;
1885    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1886    inst->eot = !outputs_remaining;
1887
1888    urb_entry_size = mrf - base_mrf;
1889
1890    /* Optional second URB write */
1891    if (outputs_remaining) {
1892       mrf = base_mrf + 1;
1893
1894       for (; attr < VERT_RESULT_MAX; attr++) {
1895          if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1896             continue;
1897
1898          assert(mrf < max_usable_mrf);
1899
1900          emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1901       }
1902
1903       inst = emit(VS_OPCODE_URB_WRITE);
1904       inst->base_mrf = base_mrf;
1905       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1906       inst->eot = true;
1907       /* URB destination offset.  In the previous write, we got MRFs
1908        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1909        * URB row increments, and each of our MRFs is half of one of
1910        * those, since we're doing interleaved writes.
1911        */
1912       inst->offset = (max_usable_mrf - base_mrf) / 2;
1913
1914       urb_entry_size += mrf - base_mrf;
1915    }
1916
1917    if (intel->gen == 6)
1918       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1919    else
1920       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1921 }
1922
1923 src_reg
1924 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1925                                  src_reg *reladdr, int reg_offset)
1926 {
1927    /* Because we store the values to scratch interleaved like our
1928     * vertex data, we need to scale the vec4 index by 2.
1929     */
1930    int message_header_scale = 2;
1931
1932    /* Pre-gen6, the message header uses byte offsets instead of vec4
1933     * (16-byte) offset units.
1934     */
1935    if (intel->gen < 6)
1936       message_header_scale *= 16;
1937
1938    if (reladdr) {
1939       src_reg index = src_reg(this, glsl_type::int_type);
1940
1941       vec4_instruction *add = emit(BRW_OPCODE_ADD,
1942                                    dst_reg(index),
1943                                    *reladdr,
1944                                    src_reg(reg_offset));
1945       /* Move our new instruction from the tail to its correct place. */
1946       add->remove();
1947       inst->insert_before(add);
1948
1949       vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1950                                    index, src_reg(message_header_scale));
1951       mul->remove();
1952       inst->insert_before(mul);
1953
1954       return index;
1955    } else {
1956       return src_reg(reg_offset * message_header_scale);
1957    }
1958 }
1959
1960 /**
1961  * Emits an instruction before @inst to load the value named by @orig_src
1962  * from scratch space at @base_offset to @temp.
1963  */
1964 void
1965 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
1966                                 dst_reg temp, src_reg orig_src,
1967                                 int base_offset)
1968 {
1969    int reg_offset = base_offset + orig_src.reg_offset;
1970    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
1971
1972    vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
1973                                               temp, index);
1974
1975    scratch_read_inst->base_mrf = 14;
1976    scratch_read_inst->mlen = 1;
1977    /* Move our instruction from the tail to its correct place. */
1978    scratch_read_inst->remove();
1979    inst->insert_before(scratch_read_inst);
1980 }
1981
1982 /**
1983  * Emits an instruction after @inst to store the value to be written
1984  * to @orig_dst to scratch space at @base_offset, from @temp.
1985  */
1986 void
1987 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
1988                                  src_reg temp, dst_reg orig_dst,
1989                                  int base_offset)
1990 {
1991    int reg_offset = base_offset + orig_dst.reg_offset;
1992    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
1993
1994    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1995                                        orig_dst.writemask));
1996    vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
1997                                                dst, temp, index);
1998    scratch_write_inst->base_mrf = 13;
1999    scratch_write_inst->mlen = 2;
2000    scratch_write_inst->predicate = inst->predicate;
2001    /* Move our instruction from the tail to its correct place. */
2002    scratch_write_inst->remove();
2003    inst->insert_after(scratch_write_inst);
2004 }
2005
2006 /**
2007  * We can't generally support array access in GRF space, because a
2008  * single instruction's destination can only span 2 contiguous
2009  * registers.  So, we send all GRF arrays that get variable index
2010  * access to scratch space.
2011  */
2012 void
2013 vec4_visitor::move_grf_array_access_to_scratch()
2014 {
2015    int scratch_loc[this->virtual_grf_count];
2016
2017    for (int i = 0; i < this->virtual_grf_count; i++) {
2018       scratch_loc[i] = -1;
2019    }
2020
2021    /* First, calculate the set of virtual GRFs that need to be punted
2022     * to scratch due to having any array access on them, and where in
2023     * scratch.
2024     */
2025    foreach_list(node, &this->instructions) {
2026       vec4_instruction *inst = (vec4_instruction *)node;
2027
2028       if (inst->dst.file == GRF && inst->dst.reladdr &&
2029           scratch_loc[inst->dst.reg] == -1) {
2030          scratch_loc[inst->dst.reg] = c->last_scratch;
2031          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2032       }
2033
2034       for (int i = 0 ; i < 3; i++) {
2035          src_reg *src = &inst->src[i];
2036
2037          if (src->file == GRF && src->reladdr &&
2038              scratch_loc[src->reg] == -1) {
2039             scratch_loc[src->reg] = c->last_scratch;
2040             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2041          }
2042       }
2043    }
2044
2045    /* Now, for anything that will be accessed through scratch, rewrite
2046     * it to load/store.  Note that this is a _safe list walk, because
2047     * we may generate a new scratch_write instruction after the one
2048     * we're processing.
2049     */
2050    foreach_list_safe(node, &this->instructions) {
2051       vec4_instruction *inst = (vec4_instruction *)node;
2052
2053       /* Set up the annotation tracking for new generated instructions. */
2054       base_ir = inst->ir;
2055       current_annotation = inst->annotation;
2056
2057       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2058          src_reg temp = src_reg(this, glsl_type::vec4_type);
2059
2060          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2061
2062          inst->dst.file = temp.file;
2063          inst->dst.reg = temp.reg;
2064          inst->dst.reg_offset = temp.reg_offset;
2065          inst->dst.reladdr = NULL;
2066       }
2067
2068       for (int i = 0 ; i < 3; i++) {
2069          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2070             continue;
2071
2072          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2073
2074          emit_scratch_read(inst, temp, inst->src[i],
2075                            scratch_loc[inst->src[i].reg]);
2076
2077          inst->src[i].file = temp.file;
2078          inst->src[i].reg = temp.reg;
2079          inst->src[i].reg_offset = temp.reg_offset;
2080          inst->src[i].reladdr = NULL;
2081       }
2082    }
2083 }
2084
2085
2086 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2087                            struct gl_shader_program *prog,
2088                            struct brw_shader *shader)
2089 {
2090    this->c = c;
2091    this->p = &c->func;
2092    this->brw = p->brw;
2093    this->intel = &brw->intel;
2094    this->ctx = &intel->ctx;
2095    this->prog = prog;
2096    this->shader = shader;
2097
2098    this->mem_ctx = ralloc_context(NULL);
2099    this->failed = false;
2100
2101    this->base_ir = NULL;
2102    this->current_annotation = NULL;
2103
2104    this->c = c;
2105    this->vp = prog->VertexProgram;
2106    this->prog_data = &c->prog_data;
2107
2108    this->variable_ht = hash_table_ctor(0,
2109                                        hash_table_pointer_hash,
2110                                        hash_table_pointer_compare);
2111
2112    this->virtual_grf_def = NULL;
2113    this->virtual_grf_use = NULL;
2114    this->virtual_grf_sizes = NULL;
2115    this->virtual_grf_count = 0;
2116    this->virtual_grf_array_size = 0;
2117    this->live_intervals_valid = false;
2118
2119    this->uniforms = 0;
2120
2121    this->variable_ht = hash_table_ctor(0,
2122                                        hash_table_pointer_hash,
2123                                        hash_table_pointer_compare);
2124 }
2125
2126 vec4_visitor::~vec4_visitor()
2127 {
2128    ralloc_free(this->mem_ctx);
2129    hash_table_dtor(this->variable_ht);
2130 }
2131
2132
2133 void
2134 vec4_visitor::fail(const char *format, ...)
2135 {
2136    va_list va;
2137    char *msg;
2138
2139    if (failed)
2140       return;
2141
2142    failed = true;
2143
2144    va_start(va, format);
2145    msg = ralloc_vasprintf(mem_ctx, format, va);
2146    va_end(va);
2147    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2148
2149    this->fail_msg = msg;
2150
2151    if (INTEL_DEBUG & DEBUG_VS) {
2152       fprintf(stderr, "%s",  msg);
2153    }
2154 }
2155
2156 } /* namespace brw */