src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4.h"
  25 extern "C" {
  26 #include "main/macros.h"
  27 #include "program/prog_parameter.h"
  28 }
  29
  30 namespace brw {
  31
  32 src_reg::src_reg(dst_reg reg)
  33 {
  34    init();
  35
  36    this->file = reg.file;
  37    this->reg = reg.reg;
  38    this->reg_offset = reg.reg_offset;
  39    this->type = reg.type;
  40    this->reladdr = reg.reladdr;
  41
  42    int swizzles[4];
  43    int next_chan = 0;
  44    int last = 0;
  45
  46    for (int i = 0; i < 4; i++) {
  47       if (!(reg.writemask & (1 << i)))
  48          continue;
  49
  50       swizzles[next_chan++] = last = i;
  51    }
  52
  53    for (; next_chan < 4; next_chan++) {
  54       swizzles[next_chan] = last;
  55    }
  56
  57    this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
  58                                 swizzles[2], swizzles[3]);
  59 }
  60
  61 dst_reg::dst_reg(src_reg reg)
  62 {
  63    init();
  64
  65    this->file = reg.file;
  66    this->reg = reg.reg;
  67    this->reg_offset = reg.reg_offset;
  68    this->type = reg.type;
  69    this->writemask = WRITEMASK_XYZW;
  70    this->reladdr = reg.reladdr;
  71 }
  72
  73 vec4_instruction *
  74 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
  75                    src_reg src0, src_reg src1, src_reg src2)
  76 {
  77    vec4_instruction *inst = new(mem_ctx) vec4_instruction();
  78
  79    inst->opcode = opcode;
  80    inst->dst = dst;
  81    inst->src[0] = src0;
  82    inst->src[1] = src1;
  83    inst->src[2] = src2;
  84    inst->ir = this->base_ir;
  85    inst->annotation = this->current_annotation;
  86
  87    this->instructions.push_tail(inst);
  88
  89    return inst;
  90 }
  91
  92
  93 vec4_instruction *
  94 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
  95 {
  96    return emit(opcode, dst, src0, src1, src_reg());
  97 }
  98
  99 vec4_instruction *
 100 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
 101 {
 102    assert(dst.writemask != 0);
 103    return emit(opcode, dst, src0, src_reg(), src_reg());
 104 }
 105
 106 vec4_instruction *
 107 vec4_visitor::emit(enum opcode opcode)
 108 {
 109    return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
 110 }
 111
 112 void
 113 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
 114 {
 115    static enum opcode dot_opcodes[] = {
 116       BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
 117    };
 118
 119    emit(dot_opcodes[elements - 2], dst, src0, src1);
 120 }
 121
 122 void
 123 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 124 {
 125    /* The gen6 math instruction ignores the source modifiers --
 126     * swizzle, abs, negate, and at least some parts of the register
 127     * region description.
 128     */
 129    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
 130    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 131
 132    if (dst.writemask != WRITEMASK_XYZW) {
 133       /* The gen6 math instruction must be align1, so we can't do
 134        * writemasks.
 135        */
 136       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 137
 138       emit(opcode, temp_dst, temp_src);
 139
 140       emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
 141    } else {
 142       emit(opcode, dst, temp_src);
 143    }
 144 }
 145
 146 void
 147 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
 148 {
 149    vec4_instruction *inst = emit(opcode, dst, src);
 150    inst->base_mrf = 1;
 151    inst->mlen = 1;
 152 }
 153
 154 void
 155 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
 156 {
 157    switch (opcode) {
 158    case SHADER_OPCODE_RCP:
 159    case SHADER_OPCODE_RSQ:
 160    case SHADER_OPCODE_SQRT:
 161    case SHADER_OPCODE_EXP2:
 162    case SHADER_OPCODE_LOG2:
 163    case SHADER_OPCODE_SIN:
 164    case SHADER_OPCODE_COS:
 165       break;
 166    default:
 167       assert(!"not reached: bad math opcode");
 168       return;
 169    }
 170
 171    if (intel->gen >= 6) {
 172       return emit_math1_gen6(opcode, dst, src);
 173    } else {
 174       return emit_math1_gen4(opcode, dst, src);
 175    }
 176 }
 177
 178 void
 179 vec4_visitor::emit_math2_gen6(enum opcode opcode,
 180                               dst_reg dst, src_reg src0, src_reg src1)
 181 {
 182    src_reg expanded;
 183
 184    /* The gen6 math instruction ignores the source modifiers --
 185     * swizzle, abs, negate, and at least some parts of the register
 186     * region description.  Move the sources to temporaries to make it
 187     * generally work.
 188     */
 189
 190    expanded = src_reg(this, glsl_type::vec4_type);
 191    emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
 192    src0 = expanded;
 193
 194    expanded = src_reg(this, glsl_type::vec4_type);
 195    emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
 196    src1 = expanded;
 197
 198    if (dst.writemask != WRITEMASK_XYZW) {
 199       /* The gen6 math instruction must be align1, so we can't do
 200        * writemasks.
 201        */
 202       dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
 203
 204       emit(opcode, temp_dst, src0, src1);
 205
 206       emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
 207    } else {
 208       emit(opcode, dst, src0, src1);
 209    }
 210 }
 211
 212 void
 213 vec4_visitor::emit_math2_gen4(enum opcode opcode,
 214                               dst_reg dst, src_reg src0, src_reg src1)
 215 {
 216    vec4_instruction *inst = emit(opcode, dst, src0, src1);
 217    inst->base_mrf = 1;
 218    inst->mlen = 2;
 219 }
 220
 221 void
 222 vec4_visitor::emit_math(enum opcode opcode,
 223                         dst_reg dst, src_reg src0, src_reg src1)
 224 {
 225    assert(opcode == SHADER_OPCODE_POW);
 226
 227    if (intel->gen >= 6) {
 228       return emit_math2_gen6(opcode, dst, src0, src1);
 229    } else {
 230       return emit_math2_gen4(opcode, dst, src0, src1);
 231    }
 232 }
 233
 234 void
 235 vec4_visitor::visit_instructions(const exec_list *list)
 236 {
 237    foreach_iter(exec_list_iterator, iter, *list) {
 238       ir_instruction *ir = (ir_instruction *)iter.get();
 239
 240       base_ir = ir;
 241       ir->accept(this);
 242    }
 243 }
 244
 245
 246 static int
 247 type_size(const struct glsl_type *type)
 248 {
 249    unsigned int i;
 250    int size;
 251
 252    switch (type->base_type) {
 253    case GLSL_TYPE_UINT:
 254    case GLSL_TYPE_INT:
 255    case GLSL_TYPE_FLOAT:
 256    case GLSL_TYPE_BOOL:
 257       if (type->is_matrix()) {
 258          return type->matrix_columns;
 259       } else {
 260          /* Regardless of size of vector, it gets a vec4. This is bad
 261           * packing for things like floats, but otherwise arrays become a
 262           * mess.  Hopefully a later pass over the code can pack scalars
 263           * down if appropriate.
 264           */
 265          return 1;
 266       }
 267    case GLSL_TYPE_ARRAY:
 268       assert(type->length > 0);
 269       return type_size(type->fields.array) * type->length;
 270    case GLSL_TYPE_STRUCT:
 271       size = 0;
 272       for (i = 0; i < type->length; i++) {
 273          size += type_size(type->fields.structure[i].type);
 274       }
 275       return size;
 276    case GLSL_TYPE_SAMPLER:
 277       /* Samplers take up one slot in UNIFORMS[], but they're baked in
 278        * at link time.
 279        */
 280       return 1;
 281    default:
 282       assert(0);
 283       return 0;
 284    }
 285 }
 286
 287 int
 288 vec4_visitor::virtual_grf_alloc(int size)
 289 {
 290    if (virtual_grf_array_size <= virtual_grf_count) {
 291       if (virtual_grf_array_size == 0)
 292          virtual_grf_array_size = 16;
 293       else
 294          virtual_grf_array_size *= 2;
 295       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 296                                    virtual_grf_array_size);
 297    }
 298    virtual_grf_sizes[virtual_grf_count] = size;
 299    return virtual_grf_count++;
 300 }
 301
 302 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 303 {
 304    init();
 305
 306    this->file = GRF;
 307    this->reg = v->virtual_grf_alloc(type_size(type));
 308
 309    if (type->is_array() || type->is_record()) {
 310       this->swizzle = BRW_SWIZZLE_NOOP;
 311    } else {
 312       this->swizzle = swizzle_for_size(type->vector_elements);
 313    }
 314
 315    this->type = brw_type_for_base_type(type);
 316 }
 317
 318 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 319 {
 320    init();
 321
 322    this->file = GRF;
 323    this->reg = v->virtual_grf_alloc(type_size(type));
 324
 325    if (type->is_array() || type->is_record()) {
 326       this->writemask = WRITEMASK_XYZW;
 327    } else {
 328       this->writemask = (1 << type->vector_elements) - 1;
 329    }
 330
 331    this->type = brw_type_for_base_type(type);
 332 }
 333
 334 /* Our support for uniforms is piggy-backed on the struct
 335  * gl_fragment_program, because that's where the values actually
 336  * get stored, rather than in some global gl_shader_program uniform
 337  * store.
 338  */
 339 int
 340 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
 341 {
 342    unsigned int offset = 0;
 343    float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
 344
 345    if (type->is_matrix()) {
 346       const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 347                                                         type->vector_elements,
 348                                                         1);
 349
 350       for (unsigned int i = 0; i < type->matrix_columns; i++) {
 351          offset += setup_uniform_values(loc + offset, column);
 352       }
 353
 354       return offset;
 355    }
 356
 357    switch (type->base_type) {
 358    case GLSL_TYPE_FLOAT:
 359    case GLSL_TYPE_UINT:
 360    case GLSL_TYPE_INT:
 361    case GLSL_TYPE_BOOL:
 362       for (unsigned int i = 0; i < type->vector_elements; i++) {
 363          int slot = this->uniforms * 4 + i;
 364          switch (type->base_type) {
 365          case GLSL_TYPE_FLOAT:
 366             c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
 367             break;
 368          case GLSL_TYPE_UINT:
 369             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
 370             break;
 371          case GLSL_TYPE_INT:
 372             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
 373             break;
 374          case GLSL_TYPE_BOOL:
 375             c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
 376             break;
 377          default:
 378             assert(!"not reached");
 379             c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
 380             break;
 381          }
 382          c->prog_data.param[slot] = &values[i];
 383       }
 384
 385       for (unsigned int i = type->vector_elements; i < 4; i++) {
 386          c->prog_data.param_convert[this->uniforms * 4 + i] =
 387             PARAM_CONVERT_ZERO;
 388          c->prog_data.param[this->uniforms * 4 + i] = NULL;
 389       }
 390
 391       this->uniform_size[this->uniforms] = type->vector_elements;
 392       this->uniforms++;
 393
 394       return 1;
 395
 396    case GLSL_TYPE_STRUCT:
 397       for (unsigned int i = 0; i < type->length; i++) {
 398          offset += setup_uniform_values(loc + offset,
 399                                         type->fields.structure[i].type);
 400       }
 401       return offset;
 402
 403    case GLSL_TYPE_ARRAY:
 404       for (unsigned int i = 0; i < type->length; i++) {
 405          offset += setup_uniform_values(loc + offset, type->fields.array);
 406       }
 407       return offset;
 408
 409    case GLSL_TYPE_SAMPLER:
 410       /* The sampler takes up a slot, but we don't use any values from it. */
 411       return 1;
 412
 413    default:
 414       assert(!"not reached");
 415       return 0;
 416    }
 417 }
 418
 419 /* Our support for builtin uniforms is even scarier than non-builtin.
 420  * It sits on top of the PROG_STATE_VAR parameters that are
 421  * automatically updated from GL context state.
 422  */
 423 void
 424 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 425 {
 426    const ir_state_slot *const slots = ir->state_slots;
 427    assert(ir->state_slots != NULL);
 428
 429    for (unsigned int i = 0; i < ir->num_state_slots; i++) {
 430       /* This state reference has already been setup by ir_to_mesa,
 431        * but we'll get the same index back here.  We can reference
 432        * ParameterValues directly, since unlike brw_fs.cpp, we never
 433        * add new state references during compile.
 434        */
 435       int index = _mesa_add_state_reference(this->vp->Base.Parameters,
 436                                             (gl_state_index *)slots[i].tokens);
 437       float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
 438
 439       this->uniform_size[this->uniforms] = 0;
 440       /* Add each of the unique swizzled channels of the element.
 441        * This will end up matching the size of the glsl_type of this field.
 442        */
 443       int last_swiz = -1;
 444       for (unsigned int j = 0; j < 4; j++) {
 445          int swiz = GET_SWZ(slots[i].swizzle, j);
 446          last_swiz = swiz;
 447
 448          c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 449          c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
 450          if (swiz <= last_swiz)
 451             this->uniform_size[this->uniforms]++;
 452       }
 453       this->uniforms++;
 454    }
 455 }
 456
 457 dst_reg *
 458 vec4_visitor::variable_storage(ir_variable *var)
 459 {
 460    return (dst_reg *)hash_table_find(this->variable_ht, var);
 461 }
 462
 463 void
 464 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 465 {
 466    ir_expression *expr = ir->as_expression();
 467
 468    if (expr) {
 469       src_reg op[2];
 470       vec4_instruction *inst;
 471
 472       assert(expr->get_num_operands() <= 2);
 473       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 474          assert(expr->operands[i]->type->is_scalar());
 475
 476          expr->operands[i]->accept(this);
 477          op[i] = this->result;
 478       }
 479
 480       switch (expr->operation) {
 481       case ir_unop_logic_not:
 482          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
 483          inst->conditional_mod = BRW_CONDITIONAL_Z;
 484          break;
 485
 486       case ir_binop_logic_xor:
 487          inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
 488          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 489          break;
 490
 491       case ir_binop_logic_or:
 492          inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
 493          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 494          break;
 495
 496       case ir_binop_logic_and:
 497          inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
 498          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 499          break;
 500
 501       case ir_unop_f2b:
 502          if (intel->gen >= 6) {
 503             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
 504          } else {
 505             inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
 506          }
 507          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 508          break;
 509
 510       case ir_unop_i2b:
 511          if (intel->gen >= 6) {
 512             inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 513          } else {
 514             inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
 515          }
 516          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 517          break;
 518
 519       case ir_binop_greater:
 520       case ir_binop_gequal:
 521       case ir_binop_less:
 522       case ir_binop_lequal:
 523       case ir_binop_equal:
 524       case ir_binop_all_equal:
 525       case ir_binop_nequal:
 526       case ir_binop_any_nequal:
 527          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 528          inst->conditional_mod =
 529             brw_conditional_for_comparison(expr->operation);
 530          break;
 531
 532       default:
 533          assert(!"not reached");
 534          break;
 535       }
 536       return;
 537    }
 538
 539    ir->accept(this);
 540
 541    if (intel->gen >= 6) {
 542       vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
 543                                this->result, src_reg(1));
 544       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 545    } else {
 546       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
 547       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 548    }
 549 }
 550
 551 /**
 552  * Emit a gen6 IF statement with the comparison folded into the IF
 553  * instruction.
 554  */
 555 void
 556 vec4_visitor::emit_if_gen6(ir_if *ir)
 557 {
 558    ir_expression *expr = ir->condition->as_expression();
 559
 560    if (expr) {
 561       src_reg op[2];
 562       vec4_instruction *inst;
 563       dst_reg temp;
 564
 565       assert(expr->get_num_operands() <= 2);
 566       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 567          expr->operands[i]->accept(this);
 568          op[i] = this->result;
 569       }
 570
 571       switch (expr->operation) {
 572       case ir_unop_logic_not:
 573          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 574          inst->conditional_mod = BRW_CONDITIONAL_Z;
 575          return;
 576
 577       case ir_binop_logic_xor:
 578          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 579          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 580          return;
 581
 582       case ir_binop_logic_or:
 583          temp = dst_reg(this, glsl_type::bool_type);
 584          emit(BRW_OPCODE_OR, temp, op[0], op[1]);
 585          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 586          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 587          return;
 588
 589       case ir_binop_logic_and:
 590          temp = dst_reg(this, glsl_type::bool_type);
 591          emit(BRW_OPCODE_AND, temp, op[0], op[1]);
 592          inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
 593          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 594          return;
 595
 596       case ir_unop_f2b:
 597          inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
 598          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 599          return;
 600
 601       case ir_unop_i2b:
 602          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 603          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 604          return;
 605
 606       case ir_binop_greater:
 607       case ir_binop_gequal:
 608       case ir_binop_less:
 609       case ir_binop_lequal:
 610       case ir_binop_equal:
 611       case ir_binop_nequal:
 612          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 613          inst->conditional_mod =
 614             brw_conditional_for_comparison(expr->operation);
 615          return;
 616
 617       case ir_binop_all_equal:
 618          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 619          inst->conditional_mod = BRW_CONDITIONAL_Z;
 620
 621          inst = emit(BRW_OPCODE_IF);
 622          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 623          return;
 624
 625       case ir_binop_any_nequal:
 626          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
 627          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 628
 629          inst = emit(BRW_OPCODE_IF);
 630          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 631          return;
 632
 633       case ir_unop_any:
 634          inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
 635          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 636
 637          inst = emit(BRW_OPCODE_IF);
 638          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 639          return;
 640
 641       default:
 642          assert(!"not reached");
 643          inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
 644          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 645          return;
 646       }
 647       return;
 648    }
 649
 650    ir->condition->accept(this);
 651
 652    vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
 653                             this->result, src_reg(0));
 654    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 655 }
 656
 657 void
 658 vec4_visitor::visit(ir_variable *ir)
 659 {
 660    dst_reg *reg = NULL;
 661
 662    if (variable_storage(ir))
 663       return;
 664
 665    switch (ir->mode) {
 666    case ir_var_in:
 667       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
 668       break;
 669
 670    case ir_var_out:
 671       reg = new(mem_ctx) dst_reg(this, ir->type);
 672
 673       for (int i = 0; i < type_size(ir->type); i++) {
 674          output_reg[ir->location + i] = *reg;
 675          output_reg[ir->location + i].reg_offset = i;
 676          output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
 677       }
 678       break;
 679
 680    case ir_var_auto:
 681    case ir_var_temporary:
 682       reg = new(mem_ctx) dst_reg(this, ir->type);
 683       break;
 684
 685    case ir_var_uniform:
 686       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 687
 688       if (!strncmp(ir->name, "gl_", 3)) {
 689          setup_builtin_uniform_values(ir);
 690       } else {
 691          setup_uniform_values(ir->location, ir->type);
 692       }
 693       break;
 694
 695    default:
 696       assert(!"not reached");
 697    }
 698
 699    reg->type = brw_type_for_base_type(ir->type);
 700    hash_table_insert(this->variable_ht, reg, ir);
 701 }
 702
 703 void
 704 vec4_visitor::visit(ir_loop *ir)
 705 {
 706    dst_reg counter;
 707
 708    /* We don't want debugging output to print the whole body of the
 709     * loop as the annotation.
 710     */
 711    this->base_ir = NULL;
 712
 713    if (ir->counter != NULL) {
 714       this->base_ir = ir->counter;
 715       ir->counter->accept(this);
 716       counter = *(variable_storage(ir->counter));
 717
 718       if (ir->from != NULL) {
 719          this->base_ir = ir->from;
 720          ir->from->accept(this);
 721
 722          emit(BRW_OPCODE_MOV, counter, this->result);
 723       }
 724    }
 725
 726    emit(BRW_OPCODE_DO);
 727
 728    if (ir->to) {
 729       this->base_ir = ir->to;
 730       ir->to->accept(this);
 731
 732       vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
 733                                     src_reg(counter), this->result);
 734       inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
 735
 736       inst = emit(BRW_OPCODE_BREAK);
 737       inst->predicate = BRW_PREDICATE_NORMAL;
 738    }
 739
 740    visit_instructions(&ir->body_instructions);
 741
 742
 743    if (ir->increment) {
 744       this->base_ir = ir->increment;
 745       ir->increment->accept(this);
 746       emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
 747    }
 748
 749    emit(BRW_OPCODE_WHILE);
 750 }
 751
 752 void
 753 vec4_visitor::visit(ir_loop_jump *ir)
 754 {
 755    switch (ir->mode) {
 756    case ir_loop_jump::jump_break:
 757       emit(BRW_OPCODE_BREAK);
 758       break;
 759    case ir_loop_jump::jump_continue:
 760       emit(BRW_OPCODE_CONTINUE);
 761       break;
 762    }
 763 }
 764
 765
 766 void
 767 vec4_visitor::visit(ir_function_signature *ir)
 768 {
 769    assert(0);
 770    (void)ir;
 771 }
 772
 773 void
 774 vec4_visitor::visit(ir_function *ir)
 775 {
 776    /* Ignore function bodies other than main() -- we shouldn't see calls to
 777     * them since they should all be inlined.
 778     */
 779    if (strcmp(ir->name, "main") == 0) {
 780       const ir_function_signature *sig;
 781       exec_list empty;
 782
 783       sig = ir->matching_signature(&empty);
 784
 785       assert(sig);
 786
 787       visit_instructions(&sig->body);
 788    }
 789 }
 790
 791 GLboolean
 792 vec4_visitor::try_emit_sat(ir_expression *ir)
 793 {
 794    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
 795    if (!sat_src)
 796       return false;
 797
 798    sat_src->accept(this);
 799    src_reg src = this->result;
 800
 801    this->result = src_reg(this, ir->type);
 802    vec4_instruction *inst;
 803    inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
 804    inst->saturate = true;
 805
 806    return true;
 807 }
 808
 809 void
 810 vec4_visitor::emit_bool_comparison(unsigned int op,
 811                                  dst_reg dst, src_reg src0, src_reg src1)
 812 {
 813    /* original gen4 does destination conversion before comparison. */
 814    if (intel->gen < 5)
 815       dst.type = src0.type;
 816
 817    vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
 818    inst->conditional_mod = brw_conditional_for_comparison(op);
 819
 820    dst.type = BRW_REGISTER_TYPE_D;
 821    emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
 822 }
 823
 824 void
 825 vec4_visitor::visit(ir_expression *ir)
 826 {
 827    unsigned int operand;
 828    src_reg op[Elements(ir->operands)];
 829    src_reg result_src;
 830    dst_reg result_dst;
 831    vec4_instruction *inst;
 832
 833    if (try_emit_sat(ir))
 834       return;
 835
 836    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 837       this->result.file = BAD_FILE;
 838       ir->operands[operand]->accept(this);
 839       if (this->result.file == BAD_FILE) {
 840          printf("Failed to get tree for expression operand:\n");
 841          ir->operands[operand]->print();
 842          exit(1);
 843       }
 844       op[operand] = this->result;
 845
 846       /* Matrix expression operands should have been broken down to vector
 847        * operations already.
 848        */
 849       assert(!ir->operands[operand]->type->is_matrix());
 850    }
 851
 852    int vector_elements = ir->operands[0]->type->vector_elements;
 853    if (ir->operands[1]) {
 854       vector_elements = MAX2(vector_elements,
 855                              ir->operands[1]->type->vector_elements);
 856    }
 857
 858    this->result.file = BAD_FILE;
 859
 860    /* Storage for our result.  Ideally for an assignment we'd be using
 861     * the actual storage for the result here, instead.
 862     */
 863    result_src = src_reg(this, ir->type);
 864    /* convenience for the emit functions below. */
 865    result_dst = dst_reg(result_src);
 866    /* If nothing special happens, this is the result. */
 867    this->result = result_src;
 868    /* Limit writes to the channels that will be used by result_src later.
 869     * This does limit this temp's use as a temporary for multi-instruction
 870     * sequences.
 871     */
 872    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
 873
 874    switch (ir->operation) {
 875    case ir_unop_logic_not:
 876       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 877        * ones complement of the whole register, not just bit 0.
 878        */
 879       emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
 880       break;
 881    case ir_unop_neg:
 882       op[0].negate = !op[0].negate;
 883       this->result = op[0];
 884       break;
 885    case ir_unop_abs:
 886       op[0].abs = true;
 887       op[0].negate = false;
 888       this->result = op[0];
 889       break;
 890
 891    case ir_unop_sign:
 892       emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
 893
 894       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 895       inst->conditional_mod = BRW_CONDITIONAL_G;
 896       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
 897       inst->predicate = BRW_PREDICATE_NORMAL;
 898
 899       inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
 900       inst->conditional_mod = BRW_CONDITIONAL_L;
 901       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
 902       inst->predicate = BRW_PREDICATE_NORMAL;
 903
 904       break;
 905
 906    case ir_unop_rcp:
 907       emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
 908       break;
 909
 910    case ir_unop_exp2:
 911       emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
 912       break;
 913    case ir_unop_log2:
 914       emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
 915       break;
 916    case ir_unop_exp:
 917    case ir_unop_log:
 918       assert(!"not reached: should be handled by ir_explog_to_explog2");
 919       break;
 920    case ir_unop_sin:
 921    case ir_unop_sin_reduced:
 922       emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
 923       break;
 924    case ir_unop_cos:
 925    case ir_unop_cos_reduced:
 926       emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
 927       break;
 928
 929    case ir_unop_dFdx:
 930    case ir_unop_dFdy:
 931       assert(!"derivatives not valid in vertex shader");
 932       break;
 933
 934    case ir_unop_noise:
 935       assert(!"not reached: should be handled by lower_noise");
 936       break;
 937
 938    case ir_binop_add:
 939       emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
 940       break;
 941    case ir_binop_sub:
 942       assert(!"not reached: should be handled by ir_sub_to_add_neg");
 943       break;
 944
 945    case ir_binop_mul:
 946       emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
 947       break;
 948    case ir_binop_div:
 949       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
 950    case ir_binop_mod:
 951       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
 952       break;
 953
 954    case ir_binop_less:
 955    case ir_binop_greater:
 956    case ir_binop_lequal:
 957    case ir_binop_gequal:
 958    case ir_binop_equal:
 959    case ir_binop_nequal: {
 960       dst_reg temp = result_dst;
 961       /* original gen4 does implicit conversion before comparison. */
 962       if (intel->gen < 5)
 963          temp.type = op[0].type;
 964
 965       inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 966       inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
 967       emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
 968       break;
 969    }
 970
 971    case ir_binop_all_equal:
 972       /* "==" operator producing a scalar boolean. */
 973       if (ir->operands[0]->type->is_vector() ||
 974           ir->operands[1]->type->is_vector()) {
 975          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 976          inst->conditional_mod = BRW_CONDITIONAL_Z;
 977
 978          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 979          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
 980          inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 981       } else {
 982          dst_reg temp = result_dst;
 983          /* original gen4 does implicit conversion before comparison. */
 984          if (intel->gen < 5)
 985             temp.type = op[0].type;
 986
 987          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
 988          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 989          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
 990       }
 991       break;
 992    case ir_binop_any_nequal:
 993       /* "!=" operator producing a scalar boolean. */
 994       if (ir->operands[0]->type->is_vector() ||
 995           ir->operands[1]->type->is_vector()) {
 996          inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
 997          inst->conditional_mod = BRW_CONDITIONAL_NZ;
 998
 999          emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1000          inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1001          inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1002       } else {
1003          dst_reg temp = result_dst;
1004          /* original gen4 does implicit conversion before comparison. */
1005          if (intel->gen < 5)
1006             temp.type = op[0].type;
1007
1008          inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1009          inst->conditional_mod = BRW_CONDITIONAL_NZ;
1010          emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1011       }
1012       break;
1013
1014    case ir_unop_any:
1015       inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1016       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1017
1018       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1019
1020       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1021       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1022       break;
1023
1024    case ir_binop_logic_xor:
1025       emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1026       break;
1027
1028    case ir_binop_logic_or:
1029       emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1030       break;
1031
1032    case ir_binop_logic_and:
1033       emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1034       break;
1035
1036    case ir_binop_dot:
1037       assert(ir->operands[0]->type->is_vector());
1038       assert(ir->operands[0]->type == ir->operands[1]->type);
1039       emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1040       break;
1041
1042    case ir_unop_sqrt:
1043       emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1044       break;
1045    case ir_unop_rsq:
1046       emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1047       break;
1048    case ir_unop_i2f:
1049    case ir_unop_i2u:
1050    case ir_unop_u2i:
1051    case ir_unop_u2f:
1052    case ir_unop_b2f:
1053    case ir_unop_b2i:
1054    case ir_unop_f2i:
1055       emit(BRW_OPCODE_MOV, result_dst, op[0]);
1056       break;
1057    case ir_unop_f2b:
1058    case ir_unop_i2b: {
1059       dst_reg temp = result_dst;
1060       /* original gen4 does implicit conversion before comparison. */
1061       if (intel->gen < 5)
1062          temp.type = op[0].type;
1063
1064       inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1065       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1066       inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1067       break;
1068    }
1069
1070    case ir_unop_trunc:
1071       emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1072       break;
1073    case ir_unop_ceil:
1074       op[0].negate = !op[0].negate;
1075       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1076       this->result.negate = true;
1077       break;
1078    case ir_unop_floor:
1079       inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1080       break;
1081    case ir_unop_fract:
1082       inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1083       break;
1084    case ir_unop_round_even:
1085       emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1086       break;
1087
1088    case ir_binop_min:
1089       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1090       inst->conditional_mod = BRW_CONDITIONAL_L;
1091
1092       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1093       inst->predicate = BRW_PREDICATE_NORMAL;
1094       break;
1095    case ir_binop_max:
1096       inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1097       inst->conditional_mod = BRW_CONDITIONAL_G;
1098
1099       inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1100       inst->predicate = BRW_PREDICATE_NORMAL;
1101       break;
1102
1103    case ir_binop_pow:
1104       emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1105       break;
1106
1107    case ir_unop_bit_not:
1108       inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1109       break;
1110    case ir_binop_bit_and:
1111       inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1112       break;
1113    case ir_binop_bit_xor:
1114       inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1115       break;
1116    case ir_binop_bit_or:
1117       inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1118       break;
1119
1120    case ir_binop_lshift:
1121    case ir_binop_rshift:
1122       assert(!"GLSL 1.30 features unsupported");
1123       break;
1124
1125    case ir_quadop_vector:
1126       assert(!"not reached: should be handled by lower_quadop_vector");
1127       break;
1128    }
1129 }
1130
1131
1132 void
1133 vec4_visitor::visit(ir_swizzle *ir)
1134 {
1135    src_reg src;
1136    int i = 0;
1137    int swizzle[4];
1138
1139    /* Note that this is only swizzles in expressions, not those on the left
1140     * hand side of an assignment, which do write masking.  See ir_assignment
1141     * for that.
1142     */
1143
1144    ir->val->accept(this);
1145    src = this->result;
1146    assert(src.file != BAD_FILE);
1147
1148    for (i = 0; i < ir->type->vector_elements; i++) {
1149       switch (i) {
1150       case 0:
1151          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1152          break;
1153       case 1:
1154          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1155          break;
1156       case 2:
1157          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1158          break;
1159       case 3:
1160          swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1161             break;
1162       }
1163    }
1164    for (; i < 4; i++) {
1165       /* Replicate the last channel out. */
1166       swizzle[i] = swizzle[ir->type->vector_elements - 1];
1167    }
1168
1169    src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1170
1171    this->result = src;
1172 }
1173
1174 void
1175 vec4_visitor::visit(ir_dereference_variable *ir)
1176 {
1177    const struct glsl_type *type = ir->type;
1178    dst_reg *reg = variable_storage(ir->var);
1179
1180    if (!reg) {
1181       fail("Failed to find variable storage for %s\n", ir->var->name);
1182       this->result = src_reg(brw_null_reg());
1183       return;
1184    }
1185
1186    this->result = src_reg(*reg);
1187
1188    if (type->is_scalar() || type->is_vector() || type->is_matrix())
1189       this->result.swizzle = swizzle_for_size(type->vector_elements);
1190 }
1191
1192 void
1193 vec4_visitor::visit(ir_dereference_array *ir)
1194 {
1195    ir_constant *constant_index;
1196    src_reg src;
1197    int element_size = type_size(ir->type);
1198
1199    constant_index = ir->array_index->constant_expression_value();
1200
1201    ir->array->accept(this);
1202    src = this->result;
1203
1204    if (constant_index) {
1205       src.reg_offset += constant_index->value.i[0] * element_size;
1206    } else {
1207       /* Variable index array dereference.  It eats the "vec4" of the
1208        * base of the array and an index that offsets the Mesa register
1209        * index.
1210        */
1211       ir->array_index->accept(this);
1212
1213       src_reg index_reg;
1214
1215       if (element_size == 1) {
1216          index_reg = this->result;
1217       } else {
1218          index_reg = src_reg(this, glsl_type::int_type);
1219
1220          emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1221               this->result, src_reg(element_size));
1222       }
1223
1224       if (src.reladdr) {
1225          src_reg temp = src_reg(this, glsl_type::int_type);
1226
1227          emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1228
1229          index_reg = temp;
1230       }
1231
1232       src.reladdr = ralloc(mem_ctx, src_reg);
1233       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1234    }
1235
1236    /* If the type is smaller than a vec4, replicate the last channel out. */
1237    if (ir->type->is_scalar() || ir->type->is_vector())
1238       src.swizzle = swizzle_for_size(ir->type->vector_elements);
1239    else
1240       src.swizzle = BRW_SWIZZLE_NOOP;
1241    src.type = brw_type_for_base_type(ir->type);
1242
1243    this->result = src;
1244 }
1245
1246 void
1247 vec4_visitor::visit(ir_dereference_record *ir)
1248 {
1249    unsigned int i;
1250    const glsl_type *struct_type = ir->record->type;
1251    int offset = 0;
1252
1253    ir->record->accept(this);
1254
1255    for (i = 0; i < struct_type->length; i++) {
1256       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1257          break;
1258       offset += type_size(struct_type->fields.structure[i].type);
1259    }
1260
1261    /* If the type is smaller than a vec4, replicate the last channel out. */
1262    if (ir->type->is_scalar() || ir->type->is_vector())
1263       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1264    else
1265       this->result.swizzle = BRW_SWIZZLE_NOOP;
1266    this->result.type = brw_type_for_base_type(ir->type);
1267
1268    this->result.reg_offset += offset;
1269 }
1270
1271 /**
1272  * We want to be careful in assignment setup to hit the actual storage
1273  * instead of potentially using a temporary like we might with the
1274  * ir_dereference handler.
1275  */
1276 static dst_reg
1277 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1278 {
1279    /* The LHS must be a dereference.  If the LHS is a variable indexed array
1280     * access of a vector, it must be separated into a series conditional moves
1281     * before reaching this point (see ir_vec_index_to_cond_assign).
1282     */
1283    assert(ir->as_dereference());
1284    ir_dereference_array *deref_array = ir->as_dereference_array();
1285    if (deref_array) {
1286       assert(!deref_array->array->type->is_vector());
1287    }
1288
1289    /* Use the rvalue deref handler for the most part.  We'll ignore
1290     * swizzles in it and write swizzles using writemask, though.
1291     */
1292    ir->accept(v);
1293    return dst_reg(v->result);
1294 }
1295
1296 void
1297 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1298                               const struct glsl_type *type, bool predicated)
1299 {
1300    if (type->base_type == GLSL_TYPE_STRUCT) {
1301       for (unsigned int i = 0; i < type->length; i++) {
1302          emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1303       }
1304       return;
1305    }
1306
1307    if (type->is_array()) {
1308       for (unsigned int i = 0; i < type->length; i++) {
1309          emit_block_move(dst, src, type->fields.array, predicated);
1310       }
1311       return;
1312    }
1313
1314    if (type->is_matrix()) {
1315       const struct glsl_type *vec_type;
1316
1317       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1318                                          type->vector_elements, 1);
1319
1320       for (int i = 0; i < type->matrix_columns; i++) {
1321          emit_block_move(dst, src, vec_type, predicated);
1322       }
1323       return;
1324    }
1325
1326    assert(type->is_scalar() || type->is_vector());
1327
1328    dst->type = brw_type_for_base_type(type);
1329    src->type = dst->type;
1330
1331    dst->writemask = (1 << type->vector_elements) - 1;
1332
1333    /* Do we need to worry about swizzling a swizzle? */
1334    assert(src->swizzle = BRW_SWIZZLE_NOOP);
1335    src->swizzle = swizzle_for_size(type->vector_elements);
1336
1337    vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1338    if (predicated)
1339       inst->predicate = BRW_PREDICATE_NORMAL;
1340
1341    dst->reg_offset++;
1342    src->reg_offset++;
1343 }
1344
1345 void
1346 vec4_visitor::visit(ir_assignment *ir)
1347 {
1348    dst_reg dst = get_assignment_lhs(ir->lhs, this);
1349
1350    if (!ir->lhs->type->is_scalar() &&
1351        !ir->lhs->type->is_vector()) {
1352       ir->rhs->accept(this);
1353       src_reg src = this->result;
1354
1355       if (ir->condition) {
1356          emit_bool_to_cond_code(ir->condition);
1357       }
1358
1359       emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1360       return;
1361    }
1362
1363    /* Now we're down to just a scalar/vector with writemasks. */
1364    int i;
1365
1366    ir->rhs->accept(this);
1367    src_reg src = this->result;
1368
1369    int swizzles[4];
1370    int first_enabled_chan = 0;
1371    int src_chan = 0;
1372
1373    assert(ir->lhs->type->is_vector() ||
1374           ir->lhs->type->is_scalar());
1375    dst.writemask = ir->write_mask;
1376
1377    for (int i = 0; i < 4; i++) {
1378       if (dst.writemask & (1 << i)) {
1379          first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1380          break;
1381       }
1382    }
1383
1384    /* Swizzle a small RHS vector into the channels being written.
1385     *
1386     * glsl ir treats write_mask as dictating how many channels are
1387     * present on the RHS while in our instructions we need to make
1388     * those channels appear in the slots of the vec4 they're written to.
1389     */
1390    for (int i = 0; i < 4; i++) {
1391       if (dst.writemask & (1 << i))
1392          swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1393       else
1394          swizzles[i] = first_enabled_chan;
1395    }
1396    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1397                               swizzles[2], swizzles[3]);
1398
1399    if (ir->condition) {
1400       emit_bool_to_cond_code(ir->condition);
1401    }
1402
1403    for (i = 0; i < type_size(ir->lhs->type); i++) {
1404       vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1405
1406       if (ir->condition)
1407          inst->predicate = BRW_PREDICATE_NORMAL;
1408
1409       dst.reg_offset++;
1410       src.reg_offset++;
1411    }
1412 }
1413
1414 void
1415 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1416 {
1417    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1418       foreach_list(node, &ir->components) {
1419          ir_constant *field_value = (ir_constant *)node;
1420
1421          emit_constant_values(dst, field_value);
1422       }
1423       return;
1424    }
1425
1426    if (ir->type->is_array()) {
1427       for (unsigned int i = 0; i < ir->type->length; i++) {
1428          emit_constant_values(dst, ir->array_elements[i]);
1429       }
1430       return;
1431    }
1432
1433    if (ir->type->is_matrix()) {
1434       for (int i = 0; i < ir->type->matrix_columns; i++) {
1435          for (int j = 0; j < ir->type->vector_elements; j++) {
1436             dst->writemask = 1 << j;
1437             dst->type = BRW_REGISTER_TYPE_F;
1438
1439             emit(BRW_OPCODE_MOV, *dst,
1440                  src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1441          }
1442          dst->reg_offset++;
1443       }
1444       return;
1445    }
1446
1447    for (int i = 0; i < ir->type->vector_elements; i++) {
1448       dst->writemask = 1 << i;
1449       dst->type = brw_type_for_base_type(ir->type);
1450
1451       switch (ir->type->base_type) {
1452       case GLSL_TYPE_FLOAT:
1453          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1454          break;
1455       case GLSL_TYPE_INT:
1456          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1457          break;
1458       case GLSL_TYPE_UINT:
1459          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1460          break;
1461       case GLSL_TYPE_BOOL:
1462          emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1463          break;
1464       default:
1465          assert(!"Non-float/uint/int/bool constant");
1466          break;
1467       }
1468    }
1469    dst->reg_offset++;
1470 }
1471
1472 void
1473 vec4_visitor::visit(ir_constant *ir)
1474 {
1475    dst_reg dst = dst_reg(this, ir->type);
1476    this->result = src_reg(dst);
1477
1478    emit_constant_values(&dst, ir);
1479 }
1480
1481 void
1482 vec4_visitor::visit(ir_call *ir)
1483 {
1484    assert(!"not reached");
1485 }
1486
1487 void
1488 vec4_visitor::visit(ir_texture *ir)
1489 {
1490    assert(!"not reached");
1491 }
1492
1493 void
1494 vec4_visitor::visit(ir_return *ir)
1495 {
1496    assert(!"not reached");
1497 }
1498
1499 void
1500 vec4_visitor::visit(ir_discard *ir)
1501 {
1502    assert(!"not reached");
1503 }
1504
1505 void
1506 vec4_visitor::visit(ir_if *ir)
1507 {
1508    /* Don't point the annotation at the if statement, because then it plus
1509     * the then and else blocks get printed.
1510     */
1511    this->base_ir = ir->condition;
1512
1513    if (intel->gen == 6) {
1514       emit_if_gen6(ir);
1515    } else {
1516       emit_bool_to_cond_code(ir->condition);
1517       vec4_instruction *inst = emit(BRW_OPCODE_IF);
1518       inst->predicate = BRW_PREDICATE_NORMAL;
1519    }
1520
1521    visit_instructions(&ir->then_instructions);
1522
1523    if (!ir->else_instructions.is_empty()) {
1524       this->base_ir = ir->condition;
1525       emit(BRW_OPCODE_ELSE);
1526
1527       visit_instructions(&ir->else_instructions);
1528    }
1529
1530    this->base_ir = ir->condition;
1531    emit(BRW_OPCODE_ENDIF);
1532 }
1533
1534 int
1535 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1536 {
1537    /* Get the position */
1538    src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1539
1540    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1541    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1542
1543    current_annotation = "NDC";
1544    dst_reg ndc_w = ndc;
1545    ndc_w.writemask = WRITEMASK_W;
1546    src_reg pos_w = pos;
1547    pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1548    emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1549
1550    dst_reg ndc_xyz = ndc;
1551    ndc_xyz.writemask = WRITEMASK_XYZ;
1552
1553    emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1554
1555    if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1556        c->key.nr_userclip || brw->has_negative_rhw_bug) {
1557       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1558       GLuint i;
1559
1560       emit(BRW_OPCODE_MOV, header1, 0u);
1561
1562       if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1563          assert(!"finishme: psiz");
1564          src_reg psiz;
1565
1566          header1.writemask = WRITEMASK_W;
1567          emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1568          emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1569       }
1570
1571       for (i = 0; i < c->key.nr_userclip; i++) {
1572          vec4_instruction *inst;
1573
1574          inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1575                      pos, src_reg(c->userplane[i]));
1576          inst->conditional_mod = BRW_CONDITIONAL_L;
1577
1578          emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1579          inst->predicate = BRW_PREDICATE_NORMAL;
1580       }
1581
1582       /* i965 clipping workaround:
1583        * 1) Test for -ve rhw
1584        * 2) If set,
1585        *      set ndc = (0,0,0,0)
1586        *      set ucp[6] = 1
1587        *
1588        * Later, clipping will detect ucp[6] and ensure the primitive is
1589        * clipped against all fixed planes.
1590        */
1591       if (brw->has_negative_rhw_bug) {
1592 #if 0
1593          /* FINISHME */
1594          brw_CMP(p,
1595                  vec8(brw_null_reg()),
1596                  BRW_CONDITIONAL_L,
1597                  brw_swizzle1(ndc, 3),
1598                  brw_imm_f(0));
1599
1600          brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1601          brw_MOV(p, ndc, brw_imm_f(0));
1602          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1603 #endif
1604       }
1605
1606       header1.writemask = WRITEMASK_XYZW;
1607       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1608    } else {
1609       emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1610                                   BRW_REGISTER_TYPE_UD), 0u);
1611    }
1612
1613    if (intel->gen == 5) {
1614       /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1615        * dword 0-3 (m1) of the header is indices, point width, clip flags.
1616        * dword 4-7 (m2) is the ndc position (set above)
1617        * dword 8-11 (m3) of the vertex header is the 4D space position
1618        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1619        * m6 is a pad so that the vertex element data is aligned
1620        * m7 is the first vertex data we fill.
1621        */
1622       current_annotation = "NDC";
1623       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1624
1625       current_annotation = "gl_Position";
1626       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1627
1628       /* user clip distance. */
1629       header_mrf += 2;
1630
1631       /* Pad so that vertex element data is aligned. */
1632       header_mrf++;
1633    } else {
1634       /* There are 8 dwords in VUE header pre-Ironlake:
1635        * dword 0-3 (m1) is indices, point width, clip flags.
1636        * dword 4-7 (m2) is ndc position (set above)
1637        *
1638        * dword 8-11 (m3) is the first vertex data.
1639        */
1640       current_annotation = "NDC";
1641       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1642
1643       current_annotation = "gl_Position";
1644       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1645    }
1646
1647    return header_mrf;
1648 }
1649
1650 int
1651 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1652 {
1653    struct brw_reg reg;
1654
1655    /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1656     * dword 0-3 (m2) of the header is indices, point width, clip flags.
1657     * dword 4-7 (m3) is the 4D space position
1658     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1659     * enabled.
1660     *
1661     * m4 or 6 is the first vertex element data we fill.
1662     */
1663
1664    current_annotation = "indices, point width, clip flags";
1665    reg = brw_message_reg(header_mrf++);
1666    emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1667    if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1668       emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1669            src_reg(output_reg[VERT_RESULT_PSIZ]));
1670    }
1671
1672    current_annotation = "gl_Position";
1673    emit(BRW_OPCODE_MOV,
1674         brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1675
1676    current_annotation = "user clip distances";
1677    if (c->key.nr_userclip) {
1678       for (int i = 0; i < c->key.nr_userclip; i++) {
1679          struct brw_reg m;
1680          if (i < 4)
1681             m = brw_message_reg(header_mrf);
1682          else
1683             m = brw_message_reg(header_mrf + 1);
1684
1685          emit(BRW_OPCODE_DP4,
1686               dst_reg(brw_writemask(m, 1 << (i & 3))),
1687               src_reg(c->userplane[i]));
1688       }
1689       header_mrf += 2;
1690    }
1691
1692    current_annotation = NULL;
1693
1694    return header_mrf;
1695 }
1696
1697 static int
1698 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1699 {
1700    struct intel_context *intel = &brw->intel;
1701
1702    if (intel->gen >= 6) {
1703       /* URB data written (does not include the message header reg) must
1704        * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1705        * section 5.4.3.2.2: URB_INTERLEAVED.
1706        *
1707        * URB entries are allocated on a multiple of 1024 bits, so an
1708        * extra 128 bits written here to make the end align to 256 is
1709        * no problem.
1710        */
1711       if ((mlen % 2) != 1)
1712          mlen++;
1713    }
1714
1715    return mlen;
1716 }
1717
1718 /**
1719  * Generates the VUE payload plus the 1 or 2 URB write instructions to
1720  * complete the VS thread.
1721  *
1722  * The VUE layout is documented in Volume 2a.
1723  */
1724 void
1725 vec4_visitor::emit_urb_writes()
1726 {
1727    /* MRF 0 is reserved for the debugger, so start with message header
1728     * in MRF 1.
1729     */
1730    int base_mrf = 1;
1731    int mrf = base_mrf;
1732    int urb_entry_size;
1733    uint64_t outputs_remaining = c->prog_data.outputs_written;
1734    /* In the process of generating our URB write message contents, we
1735     * may need to unspill a register or load from an array.  Those
1736     * reads would use MRFs 14-15.
1737     */
1738    int max_usable_mrf = 13;
1739
1740    /* FINISHME: edgeflag */
1741
1742    /* First mrf is the g0-based message header containing URB handles and such,
1743     * which is implied in VS_OPCODE_URB_WRITE.
1744     */
1745    mrf++;
1746
1747    if (intel->gen >= 6) {
1748       mrf = emit_vue_header_gen6(mrf);
1749    } else {
1750       mrf = emit_vue_header_gen4(mrf);
1751    }
1752
1753    /* Set up the VUE data for the first URB write */
1754    int attr;
1755    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1756       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1757          continue;
1758
1759       outputs_remaining &= ~BITFIELD64_BIT(attr);
1760
1761       /* This is set up in the VUE header. */
1762       if (attr == VERT_RESULT_HPOS)
1763          continue;
1764
1765       /* This is loaded into the VUE header, and thus doesn't occupy
1766        * an attribute slot.
1767        */
1768       if (attr == VERT_RESULT_PSIZ)
1769          continue;
1770
1771       vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
1772                                     src_reg(output_reg[attr]));
1773
1774       if ((attr == VERT_RESULT_COL0 ||
1775            attr == VERT_RESULT_COL1 ||
1776            attr == VERT_RESULT_BFC0 ||
1777            attr == VERT_RESULT_BFC1) &&
1778           c->key.clamp_vertex_color) {
1779          inst->saturate = true;
1780       }
1781
1782       /* If this was MRF 15, we can't fit anything more into this URB
1783        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1784        * even-numbered amount of URB write data, which will meet
1785        * gen6's requirements for length alignment.
1786        */
1787       if (mrf > max_usable_mrf) {
1788          attr++;
1789          break;
1790       }
1791    }
1792
1793    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1794    inst->base_mrf = base_mrf;
1795    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1796    inst->eot = !outputs_remaining;
1797
1798    urb_entry_size = mrf - base_mrf;
1799
1800    /* Optional second URB write */
1801    if (outputs_remaining) {
1802       mrf = base_mrf + 1;
1803
1804       for (; attr < VERT_RESULT_MAX; attr++) {
1805          if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1806             continue;
1807
1808          assert(mrf < max_usable_mrf);
1809
1810          emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1811       }
1812
1813       inst = emit(VS_OPCODE_URB_WRITE);
1814       inst->base_mrf = base_mrf;
1815       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1816       inst->eot = true;
1817       /* URB destination offset.  In the previous write, we got MRFs
1818        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1819        * URB row increments, and each of our MRFs is half of one of
1820        * those, since we're doing interleaved writes.
1821        */
1822       inst->offset = (max_usable_mrf - base_mrf) / 2;
1823
1824       urb_entry_size += mrf - base_mrf;
1825    }
1826
1827    if (intel->gen == 6)
1828       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1829    else
1830       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1831 }
1832
1833 src_reg
1834 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1835                                  src_reg *reladdr, int reg_offset)
1836 {
1837    /* Because we store the values to scratch interleaved like our
1838     * vertex data, we need to scale the vec4 index by 2.
1839     */
1840    int message_header_scale = 2;
1841
1842    /* Pre-gen6, the message header uses byte offsets instead of vec4
1843     * (16-byte) offset units.
1844     */
1845    if (intel->gen < 6)
1846       message_header_scale *= 16;
1847
1848    if (reladdr) {
1849       src_reg index = src_reg(this, glsl_type::int_type);
1850
1851       vec4_instruction *add = emit(BRW_OPCODE_ADD,
1852                                    dst_reg(index),
1853                                    *reladdr,
1854                                    src_reg(reg_offset));
1855       /* Move our new instruction from the tail to its correct place. */
1856       add->remove();
1857       inst->insert_before(add);
1858
1859       vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1860                                    index, src_reg(message_header_scale));
1861       mul->remove();
1862       inst->insert_before(mul);
1863
1864       return index;
1865    } else {
1866       return src_reg(reg_offset * message_header_scale);
1867    }
1868 }
1869
1870 /**
1871  * Emits an instruction before @inst to load the value named by @orig_src
1872  * from scratch space at @base_offset to @temp.
1873  */
1874 void
1875 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
1876                                 dst_reg temp, src_reg orig_src,
1877                                 int base_offset)
1878 {
1879    int reg_offset = base_offset + orig_src.reg_offset;
1880    src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
1881
1882    vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
1883                                               temp, index);
1884
1885    scratch_read_inst->base_mrf = 14;
1886    scratch_read_inst->mlen = 1;
1887    /* Move our instruction from the tail to its correct place. */
1888    scratch_read_inst->remove();
1889    inst->insert_before(scratch_read_inst);
1890 }
1891
1892 /**
1893  * Emits an instruction after @inst to store the value to be written
1894  * to @orig_dst to scratch space at @base_offset, from @temp.
1895  */
1896 void
1897 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
1898                                  src_reg temp, dst_reg orig_dst,
1899                                  int base_offset)
1900 {
1901    int reg_offset = base_offset + orig_dst.reg_offset;
1902    src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
1903
1904    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1905                                        orig_dst.writemask));
1906    vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
1907                                                dst, temp, index);
1908    scratch_write_inst->base_mrf = 13;
1909    scratch_write_inst->mlen = 2;
1910    scratch_write_inst->predicate = inst->predicate;
1911    /* Move our instruction from the tail to its correct place. */
1912    scratch_write_inst->remove();
1913    inst->insert_after(scratch_write_inst);
1914 }
1915
1916 /**
1917  * We can't generally support array access in GRF space, because a
1918  * single instruction's destination can only span 2 contiguous
1919  * registers.  So, we send all GRF arrays that get variable index
1920  * access to scratch space.
1921  */
1922 void
1923 vec4_visitor::move_grf_array_access_to_scratch()
1924 {
1925    int scratch_loc[this->virtual_grf_count];
1926
1927    for (int i = 0; i < this->virtual_grf_count; i++) {
1928       scratch_loc[i] = -1;
1929    }
1930
1931    /* First, calculate the set of virtual GRFs that need to be punted
1932     * to scratch due to having any array access on them, and where in
1933     * scratch.
1934     */
1935    foreach_list(node, &this->instructions) {
1936       vec4_instruction *inst = (vec4_instruction *)node;
1937
1938       if (inst->dst.file == GRF && inst->dst.reladdr &&
1939           scratch_loc[inst->dst.reg] == -1) {
1940          scratch_loc[inst->dst.reg] = c->last_scratch;
1941          c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
1942       }
1943
1944       for (int i = 0 ; i < 3; i++) {
1945          src_reg *src = &inst->src[i];
1946
1947          if (src->file == GRF && src->reladdr &&
1948              scratch_loc[src->reg] == -1) {
1949             scratch_loc[src->reg] = c->last_scratch;
1950             c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
1951          }
1952       }
1953    }
1954
1955    /* Now, for anything that will be accessed through scratch, rewrite
1956     * it to load/store.  Note that this is a _safe list walk, because
1957     * we may generate a new scratch_write instruction after the one
1958     * we're processing.
1959     */
1960    foreach_list_safe(node, &this->instructions) {
1961       vec4_instruction *inst = (vec4_instruction *)node;
1962
1963       /* Set up the annotation tracking for new generated instructions. */
1964       base_ir = inst->ir;
1965       current_annotation = inst->annotation;
1966
1967       if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
1968          src_reg temp = src_reg(this, glsl_type::vec4_type);
1969
1970          emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
1971
1972          inst->dst.file = temp.file;
1973          inst->dst.reg = temp.reg;
1974          inst->dst.reg_offset = temp.reg_offset;
1975          inst->dst.reladdr = NULL;
1976       }
1977
1978       for (int i = 0 ; i < 3; i++) {
1979          if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
1980             continue;
1981
1982          dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1983
1984          emit_scratch_read(inst, temp, inst->src[i],
1985                            scratch_loc[inst->src[i].reg]);
1986
1987          inst->src[i].file = temp.file;
1988          inst->src[i].reg = temp.reg;
1989          inst->src[i].reg_offset = temp.reg_offset;
1990          inst->src[i].reladdr = NULL;
1991       }
1992    }
1993 }
1994
1995
1996 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1997                            struct gl_shader_program *prog,
1998                            struct brw_shader *shader)
1999 {
2000    this->c = c;
2001    this->p = &c->func;
2002    this->brw = p->brw;
2003    this->intel = &brw->intel;
2004    this->ctx = &intel->ctx;
2005    this->prog = prog;
2006    this->shader = shader;
2007
2008    this->mem_ctx = ralloc_context(NULL);
2009    this->failed = false;
2010
2011    this->base_ir = NULL;
2012    this->current_annotation = NULL;
2013
2014    this->c = c;
2015    this->vp = brw->vertex_program; /* FINISHME: change for precompile */
2016    this->prog_data = &c->prog_data;
2017
2018    this->variable_ht = hash_table_ctor(0,
2019                                        hash_table_pointer_hash,
2020                                        hash_table_pointer_compare);
2021
2022    this->virtual_grf_sizes = NULL;
2023    this->virtual_grf_count = 0;
2024    this->virtual_grf_array_size = 0;
2025
2026    this->uniforms = 0;
2027
2028    this->variable_ht = hash_table_ctor(0,
2029                                        hash_table_pointer_hash,
2030                                        hash_table_pointer_compare);
2031 }
2032
2033 vec4_visitor::~vec4_visitor()
2034 {
2035    hash_table_dtor(this->variable_ht);
2036 }
2037
2038
2039 void
2040 vec4_visitor::fail(const char *format, ...)
2041 {
2042    va_list va;
2043    char *msg;
2044
2045    if (failed)
2046       return;
2047
2048    failed = true;
2049
2050    va_start(va, format);
2051    msg = ralloc_vasprintf(mem_ctx, format, va);
2052    va_end(va);
2053    msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2054
2055    this->fail_msg = msg;
2056
2057    if (INTEL_DEBUG & DEBUG_VS) {
2058       fprintf(stderr, "%s",  msg);
2059    }
2060 }
2061
2062 } /* namespace brw */