src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_visitor.cpp
  25  *
  26  * This file supports generating the FS LIR from the GLSL IR.  The LIR
  27  * makes it easier to do backend-specific optimizations than doing so
  28  * in the GLSL IR or in the native code.
  29  */
  30 extern "C" {
  31
  32 #include <sys/types.h>
  33
  34 #include "main/macros.h"
  35 #include "main/shaderobj.h"
  36 #include "program/prog_parameter.h"
  37 #include "program/prog_print.h"
  38 #include "program/prog_optimize.h"
  39 #include "program/register_allocate.h"
  40 #include "program/sampler.h"
  41 #include "program/hash_table.h"
  42 #include "brw_context.h"
  43 #include "brw_eu.h"
  44 #include "brw_wm.h"
  45 }
  46 #include "brw_fs.h"
  47 #include "main/uniforms.h"
  48 #include "glsl/glsl_types.h"
  49 #include "glsl/ir_optimization.h"
  50
  51 void
  52 fs_visitor::visit(ir_variable *ir)
  53 {
  54    fs_reg *reg = NULL;
  55
  56    if (variable_storage(ir))
  57       return;
  58
  59    if (ir->data.mode == ir_var_shader_in) {
  60       if (!strcmp(ir->name, "gl_FragCoord")) {
  61          reg = emit_fragcoord_interpolation(ir);
  62       } else if (!strcmp(ir->name, "gl_FrontFacing")) {
  63          reg = emit_frontfacing_interpolation(ir);
  64       } else {
  65          reg = emit_general_interpolation(ir);
  66       }
  67       assert(reg);
  68       hash_table_insert(this->variable_ht, reg, ir);
  69       return;
  70    } else if (ir->data.mode == ir_var_shader_out) {
  71       reg = new(this->mem_ctx) fs_reg(this, ir->type);
  72
  73       if (ir->data.index > 0) {
  74          assert(ir->data.location == FRAG_RESULT_DATA0);
  75          assert(ir->data.index == 1);
  76          this->dual_src_output = *reg;
  77          this->do_dual_src = true;
  78       } else if (ir->data.location == FRAG_RESULT_COLOR) {
  79          /* Writing gl_FragColor outputs to all color regions. */
  80          for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
  81             this->outputs[i] = *reg;
  82             this->output_components[i] = 4;
  83          }
  84       } else if (ir->data.location == FRAG_RESULT_DEPTH) {
  85          this->frag_depth = *reg;
  86       } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
  87          this->sample_mask = *reg;
  88       } else {
  89          /* gl_FragData or a user-defined FS output */
  90          assert(ir->data.location >= FRAG_RESULT_DATA0 &&
  91                 ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
  92
  93          int vector_elements =
  94             ir->type->is_array() ? ir->type->fields.array->vector_elements
  95                                  : ir->type->vector_elements;
  96
  97          /* General color output. */
  98          for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
  99             int output = ir->data.location - FRAG_RESULT_DATA0 + i;
 100             this->outputs[output] = *reg;
 101             this->outputs[output].reg_offset += vector_elements * i;
 102             this->output_components[output] = vector_elements;
 103          }
 104       }
 105    } else if (ir->data.mode == ir_var_uniform) {
 106       int param_index = uniforms;
 107
 108       /* Thanks to the lower_ubo_reference pass, we will see only
 109        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
 110        * variables, so no need for them to be in variable_ht.
 111        *
 112        * Atomic counters take no uniform storage, no need to do
 113        * anything here.
 114        */
 115       if (ir->is_in_uniform_block() || ir->type->contains_atomic())
 116          return;
 117
 118       if (dispatch_width == 16) {
 119          if (!variable_storage(ir)) {
 120             fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
 121          }
 122          return;
 123       }
 124
 125       param_size[param_index] = type_size(ir->type);
 126       if (!strncmp(ir->name, "gl_", 3)) {
 127          setup_builtin_uniform_values(ir);
 128       } else {
 129          setup_uniform_values(ir);
 130       }
 131
 132       reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
 133       reg->type = brw_type_for_base_type(ir->type);
 134
 135    } else if (ir->data.mode == ir_var_system_value) {
 136       if (ir->data.location == SYSTEM_VALUE_SAMPLE_POS) {
 137          reg = emit_samplepos_setup(ir);
 138       } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_ID) {
 139          reg = emit_sampleid_setup(ir);
 140       } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN) {
 141          assert(brw->gen >= 7);
 142          reg = new(mem_ctx)
 143             fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
 144                           BRW_REGISTER_TYPE_D));
 145       }
 146    }
 147
 148    if (!reg)
 149       reg = new(this->mem_ctx) fs_reg(this, ir->type);
 150
 151    hash_table_insert(this->variable_ht, reg, ir);
 152 }
 153
 154 void
 155 fs_visitor::visit(ir_dereference_variable *ir)
 156 {
 157    fs_reg *reg = variable_storage(ir->var);
 158
 159    if (!reg) {
 160       fail("Failed to find variable storage for %s\n", ir->var->name);
 161       this->result = fs_reg(reg_null_d);
 162       return;
 163    }
 164    this->result = *reg;
 165 }
 166
 167 void
 168 fs_visitor::visit(ir_dereference_record *ir)
 169 {
 170    const glsl_type *struct_type = ir->record->type;
 171
 172    ir->record->accept(this);
 173
 174    unsigned int offset = 0;
 175    for (unsigned int i = 0; i < struct_type->length; i++) {
 176       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
 177          break;
 178       offset += type_size(struct_type->fields.structure[i].type);
 179    }
 180    this->result.reg_offset += offset;
 181    this->result.type = brw_type_for_base_type(ir->type);
 182 }
 183
 184 void
 185 fs_visitor::visit(ir_dereference_array *ir)
 186 {
 187    ir_constant *constant_index;
 188    fs_reg src;
 189    int element_size = type_size(ir->type);
 190
 191    constant_index = ir->array_index->as_constant();
 192
 193    ir->array->accept(this);
 194    src = this->result;
 195    src.type = brw_type_for_base_type(ir->type);
 196
 197    if (constant_index) {
 198       assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG);
 199       src.reg_offset += constant_index->value.i[0] * element_size;
 200    } else {
 201       /* Variable index array dereference.  We attach the variable index
 202        * component to the reg as a pointer to a register containing the
 203        * offset.  Currently only uniform arrays are supported in this patch,
 204        * and that reladdr pointer is resolved by
 205        * move_uniform_array_access_to_pull_constants().  All other array types
 206        * are lowered by lower_variable_index_to_cond_assign().
 207        */
 208       ir->array_index->accept(this);
 209
 210       fs_reg index_reg;
 211       index_reg = fs_reg(this, glsl_type::int_type);
 212       emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
 213
 214       if (src.reladdr) {
 215          emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
 216       }
 217
 218       src.reladdr = ralloc(mem_ctx, fs_reg);
 219       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
 220    }
 221    this->result = src;
 222 }
 223
 224 void
 225 fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
 226                      const fs_reg &a)
 227 {
 228    if (brw->gen < 6 ||
 229        !x.is_valid_3src() ||
 230        !y.is_valid_3src() ||
 231        !a.is_valid_3src()) {
 232       /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
 233       fs_reg y_times_a           = fs_reg(this, glsl_type::float_type);
 234       fs_reg one_minus_a         = fs_reg(this, glsl_type::float_type);
 235       fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
 236
 237       emit(MUL(y_times_a, y, a));
 238
 239       fs_reg negative_a = a;
 240       negative_a.negate = !a.negate;
 241       emit(ADD(one_minus_a, negative_a, fs_reg(1.0f)));
 242       emit(MUL(x_times_one_minus_a, x, one_minus_a));
 243
 244       emit(ADD(dst, x_times_one_minus_a, y_times_a));
 245    } else {
 246       /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
 247        * we need to reorder the operands.
 248        */
 249       emit(LRP(dst, a, y, x));
 250    }
 251 }
 252
 253 void
 254 fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
 255                         const fs_reg &src0, const fs_reg &src1)
 256 {
 257    fs_inst *inst;
 258
 259    if (brw->gen >= 6) {
 260       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
 261       inst->conditional_mod = conditionalmod;
 262    } else {
 263       emit(CMP(reg_null_d, src0, src1, conditionalmod));
 264
 265       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
 266       inst->predicate = BRW_PREDICATE_NORMAL;
 267    }
 268 }
 269
 270 /* Instruction selection: Produce a MOV.sat instead of
 271  * MIN(MAX(val, 0), 1) when possible.
 272  */
 273 bool
 274 fs_visitor::try_emit_saturate(ir_expression *ir)
 275 {
 276    ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
 277
 278    if (!sat_val)
 279       return false;
 280
 281    fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
 282
 283    sat_val->accept(this);
 284    fs_reg src = this->result;
 285
 286    fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
 287
 288    /* If the last instruction from our accept() didn't generate our
 289     * src, generate a saturated MOV
 290     */
 291    fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
 292    if (!modify || modify->regs_written != 1) {
 293       this->result = fs_reg(this, ir->type);
 294       fs_inst *inst = emit(MOV(this->result, src));
 295       inst->saturate = true;
 296    } else {
 297       modify->saturate = true;
 298       this->result = src;
 299    }
 300
 301
 302    return true;
 303 }
 304
 305 bool
 306 fs_visitor::try_emit_mad(ir_expression *ir)
 307 {
 308    /* 3-src instructions were introduced in gen6. */
 309    if (brw->gen < 6)
 310       return false;
 311
 312    /* MAD can only handle floating-point data. */
 313    if (ir->type != glsl_type::float_type)
 314       return false;
 315
 316    ir_rvalue *nonmul = ir->operands[1];
 317    ir_expression *mul = ir->operands[0]->as_expression();
 318
 319    if (!mul || mul->operation != ir_binop_mul) {
 320       nonmul = ir->operands[0];
 321       mul = ir->operands[1]->as_expression();
 322
 323       if (!mul || mul->operation != ir_binop_mul)
 324          return false;
 325    }
 326
 327    if (nonmul->as_constant() ||
 328        mul->operands[0]->as_constant() ||
 329        mul->operands[1]->as_constant())
 330       return false;
 331
 332    nonmul->accept(this);
 333    fs_reg src0 = this->result;
 334
 335    mul->operands[0]->accept(this);
 336    fs_reg src1 = this->result;
 337
 338    mul->operands[1]->accept(this);
 339    fs_reg src2 = this->result;
 340
 341    this->result = fs_reg(this, ir->type);
 342    emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
 343
 344    return true;
 345 }
 346
 347 static int
 348 pack_pixel_offset(float x)
 349 {
 350    /* Clamp upper end of the range to +7/16. See explanation in non-constant
 351     * offset case below. */
 352    int n = MIN2((int)(x * 16), 7);
 353    return n & 0xf;
 354 }
 355
 356 void
 357 fs_visitor::emit_interpolate_expression(ir_expression *ir)
 358 {
 359    /* in SIMD16 mode, the pixel interpolator returns coords interleaved
 360     * 8 channels at a time, same as the barycentric coords presented in
 361     * the FS payload. this requires a bit of extra work to support.
 362     */
 363    no16("interpolate_at_* not yet supported in SIMD16 mode.");
 364
 365    ir_dereference * deref = ir->operands[0]->as_dereference();
 366    ir_swizzle * swiz = NULL;
 367    if (!deref) {
 368       /* the api does not allow a swizzle here, but the varying packing code
 369        * may have pushed one into here.
 370        */
 371       swiz = ir->operands[0]->as_swizzle();
 372       assert(swiz);
 373       deref = swiz->val->as_dereference();
 374    }
 375    assert(deref);
 376    ir_variable * var = deref->variable_referenced();
 377    assert(var);
 378
 379    /* 1. collect interpolation factors */
 380
 381    fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 2, 1));
 382    fs_reg dst_y = dst_x;
 383    dst_y.reg_offset++;
 384
 385    /* for most messages, we need one reg of ignored data; the hardware requires mlen==1
 386     * even when there is no payload. in the per-slot offset case, we'll replace this with
 387     * the proper source data. */
 388    fs_reg src = fs_reg(this, glsl_type::float_type);
 389    int mlen = 1;     /* one reg unless overriden */
 390    int reg_width = dispatch_width / 8;
 391    fs_inst *inst;
 392
 393    switch (ir->operation) {
 394    case ir_unop_interpolate_at_centroid:
 395       inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
 396       break;
 397
 398    case ir_binop_interpolate_at_sample: {
 399       ir_constant *sample_num = ir->operands[1]->as_constant();
 400       assert(sample_num || !"nonconstant sample number should have been lowered.");
 401
 402       unsigned msg_data = sample_num->value.i[0] << 4;
 403       inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
 404       break;
 405    }
 406
 407    case ir_binop_interpolate_at_offset: {
 408       ir_constant *const_offset = ir->operands[1]->as_constant();
 409       if (const_offset) {
 410          unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
 411                             (pack_pixel_offset(const_offset->value.f[1]) << 4);
 412          inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
 413                      fs_reg(msg_data));
 414       } else {
 415          /* pack the operands: hw wants offsets as 4 bit signed ints */
 416          ir->operands[1]->accept(this);
 417          src = fs_reg(this, glsl_type::ivec2_type);
 418          fs_reg src2 = src;
 419          for (int i = 0; i < 2; i++) {
 420             fs_reg temp = fs_reg(this, glsl_type::float_type);
 421             emit(MUL(temp, this->result, fs_reg(16.0f)));
 422             emit(MOV(src2, temp));  /* float to int */
 423
 424             /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires
 425              * that we support a maximum offset of +0.5, which isn't representable
 426              * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16,
 427              * which is the opposite of what the shader author wanted.
 428              *
 429              * This is legal due to ARB_gpu_shader5's quantization rules:
 430              *
 431              * "Not all values of <offset> may be supported; x and y offsets may
 432              * be rounded to fixed-point values with the number of fraction bits
 433              * given by the implementation-dependent constant
 434              * FRAGMENT_INTERPOLATION_OFFSET_BITS"
 435              */
 436
 437             fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
 438             inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
 439
 440             src2.reg_offset++;
 441             this->result.reg_offset++;
 442          }
 443
 444          mlen = 2 * reg_width;
 445          inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
 446                      fs_reg(0u));
 447       }
 448       break;
 449    }
 450
 451    default:
 452       unreachable("not reached");
 453    }
 454
 455    inst->mlen = mlen;
 456    inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */
 457    inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) ==
 458          INTERP_QUALIFIER_NOPERSPECTIVE;
 459
 460    /* 2. emit linterp */
 461
 462    fs_reg res(this, ir->type);
 463    this->result = res;
 464
 465    for (int i = 0; i < ir->type->vector_elements; i++) {
 466       int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
 467       emit(FS_OPCODE_LINTERP, res,
 468            dst_x, dst_y,
 469            fs_reg(interp_reg(var->data.location, ch)));
 470       res.reg_offset++;
 471    }
 472 }
 473
 474 void
 475 fs_visitor::visit(ir_expression *ir)
 476 {
 477    unsigned int operand;
 478    fs_reg op[3], temp;
 479    fs_inst *inst;
 480
 481    assert(ir->get_num_operands() <= 3);
 482
 483    if (try_emit_saturate(ir))
 484       return;
 485
 486    /* Deal with the real oddball stuff first */
 487    switch (ir->operation) {
 488    case ir_binop_add:
 489       if (try_emit_mad(ir))
 490          return;
 491       break;
 492
 493    case ir_unop_interpolate_at_centroid:
 494    case ir_binop_interpolate_at_offset:
 495    case ir_binop_interpolate_at_sample:
 496       emit_interpolate_expression(ir);
 497       return;
 498
 499    default:
 500       break;
 501    }
 502
 503    for (operand = 0; operand < ir->get_num_operands(); operand++) {
 504       ir->operands[operand]->accept(this);
 505       if (this->result.file == BAD_FILE) {
 506          fail("Failed to get tree for expression operand:\n");
 507          ir->operands[operand]->fprint(stderr);
 508          fprintf(stderr, "\n");
 509       }
 510       assert(this->result.is_valid_3src());
 511       op[operand] = this->result;
 512
 513       /* Matrix expression operands should have been broken down to vector
 514        * operations already.
 515        */
 516       assert(!ir->operands[operand]->type->is_matrix());
 517       /* And then those vector operands should have been broken down to scalar.
 518        */
 519       assert(!ir->operands[operand]->type->is_vector());
 520    }
 521
 522    /* Storage for our result.  If our result goes into an assignment, it will
 523     * just get copy-propagated out, so no worries.
 524     */
 525    this->result = fs_reg(this, ir->type);
 526
 527    switch (ir->operation) {
 528    case ir_unop_logic_not:
 529       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
 530        * ones complement of the whole register, not just bit 0.
 531        */
 532       emit(XOR(this->result, op[0], fs_reg(1)));
 533       break;
 534    case ir_unop_neg:
 535       op[0].negate = !op[0].negate;
 536       emit(MOV(this->result, op[0]));
 537       break;
 538    case ir_unop_abs:
 539       op[0].abs = true;
 540       op[0].negate = false;
 541       emit(MOV(this->result, op[0]));
 542       break;
 543    case ir_unop_sign:
 544       if (ir->type->is_float()) {
 545          /* AND(val, 0x80000000) gives the sign bit.
 546           *
 547           * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
 548           * zero.
 549           */
 550          emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 551
 552          op[0].type = BRW_REGISTER_TYPE_UD;
 553          this->result.type = BRW_REGISTER_TYPE_UD;
 554          emit(AND(this->result, op[0], fs_reg(0x80000000u)));
 555
 556          inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u)));
 557          inst->predicate = BRW_PREDICATE_NORMAL;
 558
 559          this->result.type = BRW_REGISTER_TYPE_F;
 560       } else {
 561          /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
 562           *               -> non-negative val generates 0x00000000.
 563           *  Predicated OR sets 1 if val is positive.
 564           */
 565          emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
 566
 567          emit(ASR(this->result, op[0], fs_reg(31)));
 568
 569          inst = emit(OR(this->result, this->result, fs_reg(1)));
 570          inst->predicate = BRW_PREDICATE_NORMAL;
 571       }
 572       break;
 573    case ir_unop_rcp:
 574       emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
 575       break;
 576
 577    case ir_unop_exp2:
 578       emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
 579       break;
 580    case ir_unop_log2:
 581       emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
 582       break;
 583    case ir_unop_exp:
 584    case ir_unop_log:
 585       unreachable("not reached: should be handled by ir_explog_to_explog2");
 586    case ir_unop_sin:
 587    case ir_unop_sin_reduced:
 588       emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
 589       break;
 590    case ir_unop_cos:
 591    case ir_unop_cos_reduced:
 592       emit_math(SHADER_OPCODE_COS, this->result, op[0]);
 593       break;
 594
 595    case ir_unop_dFdx:
 596       emit(FS_OPCODE_DDX, this->result, op[0]);
 597       break;
 598    case ir_unop_dFdy:
 599       emit(FS_OPCODE_DDY, this->result, op[0]);
 600       break;
 601
 602    case ir_binop_add:
 603       emit(ADD(this->result, op[0], op[1]));
 604       break;
 605    case ir_binop_sub:
 606       unreachable("not reached: should be handled by ir_sub_to_add_neg");
 607
 608    case ir_binop_mul:
 609       if (brw->gen < 8 && ir->type->is_integer()) {
 610          /* For integer multiplication, the MUL uses the low 16 bits
 611           * of one of the operands (src0 on gen6, src1 on gen7).  The
 612           * MACH accumulates in the contribution of the upper 16 bits
 613           * of that operand.
 614           */
 615          if (ir->operands[0]->is_uint16_constant()) {
 616             if (brw->gen < 7)
 617                emit(MUL(this->result, op[0], op[1]));
 618             else
 619                emit(MUL(this->result, op[1], op[0]));
 620          } else if (ir->operands[1]->is_uint16_constant()) {
 621             if (brw->gen < 7)
 622                emit(MUL(this->result, op[1], op[0]));
 623             else
 624                emit(MUL(this->result, op[0], op[1]));
 625          } else {
 626             if (brw->gen >= 7)
 627                no16("SIMD16 explicit accumulator operands unsupported\n");
 628
 629             struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
 630
 631             emit(MUL(acc, op[0], op[1]));
 632             emit(MACH(reg_null_d, op[0], op[1]));
 633             emit(MOV(this->result, fs_reg(acc)));
 634          }
 635       } else {
 636          emit(MUL(this->result, op[0], op[1]));
 637       }
 638       break;
 639    case ir_binop_imul_high: {
 640       if (brw->gen >= 7)
 641          no16("SIMD16 explicit accumulator operands unsupported\n");
 642
 643       struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
 644
 645       emit(MUL(acc, op[0], op[1]));
 646       emit(MACH(this->result, op[0], op[1]));
 647       break;
 648    }
 649    case ir_binop_div:
 650       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
 651       assert(ir->type->is_integer());
 652       emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
 653       break;
 654    case ir_binop_carry: {
 655       if (brw->gen >= 7)
 656          no16("SIMD16 explicit accumulator operands unsupported\n");
 657
 658       struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
 659
 660       emit(ADDC(reg_null_ud, op[0], op[1]));
 661       emit(MOV(this->result, fs_reg(acc)));
 662       break;
 663    }
 664    case ir_binop_borrow: {
 665       if (brw->gen >= 7)
 666          no16("SIMD16 explicit accumulator operands unsupported\n");
 667
 668       struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
 669
 670       emit(SUBB(reg_null_ud, op[0], op[1]));
 671       emit(MOV(this->result, fs_reg(acc)));
 672       break;
 673    }
 674    case ir_binop_mod:
 675       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
 676       assert(ir->type->is_integer());
 677       emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
 678       break;
 679
 680    case ir_binop_less:
 681    case ir_binop_greater:
 682    case ir_binop_lequal:
 683    case ir_binop_gequal:
 684    case ir_binop_equal:
 685    case ir_binop_all_equal:
 686    case ir_binop_nequal:
 687    case ir_binop_any_nequal:
 688       resolve_bool_comparison(ir->operands[0], &op[0]);
 689       resolve_bool_comparison(ir->operands[1], &op[1]);
 690
 691       emit(CMP(this->result, op[0], op[1],
 692                brw_conditional_for_comparison(ir->operation)));
 693       break;
 694
 695    case ir_binop_logic_xor:
 696       emit(XOR(this->result, op[0], op[1]));
 697       break;
 698
 699    case ir_binop_logic_or:
 700       emit(OR(this->result, op[0], op[1]));
 701       break;
 702
 703    case ir_binop_logic_and:
 704       emit(AND(this->result, op[0], op[1]));
 705       break;
 706
 707    case ir_binop_dot:
 708    case ir_unop_any:
 709       unreachable("not reached: should be handled by brw_fs_channel_expressions");
 710
 711    case ir_unop_noise:
 712       unreachable("not reached: should be handled by lower_noise");
 713
 714    case ir_quadop_vector:
 715       unreachable("not reached: should be handled by lower_quadop_vector");
 716
 717    case ir_binop_vector_extract:
 718       unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()");
 719
 720    case ir_triop_vector_insert:
 721       unreachable("not reached: should be handled by lower_vector_insert()");
 722
 723    case ir_binop_ldexp:
 724       unreachable("not reached: should be handled by ldexp_to_arith()");
 725
 726    case ir_unop_sqrt:
 727       emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
 728       break;
 729
 730    case ir_unop_rsq:
 731       emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
 732       break;
 733
 734    case ir_unop_bitcast_i2f:
 735    case ir_unop_bitcast_u2f:
 736       op[0].type = BRW_REGISTER_TYPE_F;
 737       this->result = op[0];
 738       break;
 739    case ir_unop_i2u:
 740    case ir_unop_bitcast_f2u:
 741       op[0].type = BRW_REGISTER_TYPE_UD;
 742       this->result = op[0];
 743       break;
 744    case ir_unop_u2i:
 745    case ir_unop_bitcast_f2i:
 746       op[0].type = BRW_REGISTER_TYPE_D;
 747       this->result = op[0];
 748       break;
 749    case ir_unop_i2f:
 750    case ir_unop_u2f:
 751    case ir_unop_f2i:
 752    case ir_unop_f2u:
 753       emit(MOV(this->result, op[0]));
 754       break;
 755
 756    case ir_unop_b2i:
 757       emit(AND(this->result, op[0], fs_reg(1)));
 758       break;
 759    case ir_unop_b2f:
 760       temp = fs_reg(this, glsl_type::int_type);
 761       emit(AND(temp, op[0], fs_reg(1)));
 762       emit(MOV(this->result, temp));
 763       break;
 764
 765    case ir_unop_f2b:
 766       emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 767       break;
 768    case ir_unop_i2b:
 769       emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 770       break;
 771
 772    case ir_unop_trunc:
 773       emit(RNDZ(this->result, op[0]));
 774       break;
 775    case ir_unop_ceil:
 776       op[0].negate = !op[0].negate;
 777       emit(RNDD(this->result, op[0]));
 778       this->result.negate = true;
 779       break;
 780    case ir_unop_floor:
 781       emit(RNDD(this->result, op[0]));
 782       break;
 783    case ir_unop_fract:
 784       emit(FRC(this->result, op[0]));
 785       break;
 786    case ir_unop_round_even:
 787       emit(RNDE(this->result, op[0]));
 788       break;
 789
 790    case ir_binop_min:
 791    case ir_binop_max:
 792       resolve_ud_negate(&op[0]);
 793       resolve_ud_negate(&op[1]);
 794       emit_minmax(ir->operation == ir_binop_min ?
 795                   BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
 796                   this->result, op[0], op[1]);
 797       break;
 798    case ir_unop_pack_snorm_2x16:
 799    case ir_unop_pack_snorm_4x8:
 800    case ir_unop_pack_unorm_2x16:
 801    case ir_unop_pack_unorm_4x8:
 802    case ir_unop_unpack_snorm_2x16:
 803    case ir_unop_unpack_snorm_4x8:
 804    case ir_unop_unpack_unorm_2x16:
 805    case ir_unop_unpack_unorm_4x8:
 806    case ir_unop_unpack_half_2x16:
 807    case ir_unop_pack_half_2x16:
 808       unreachable("not reached: should be handled by lower_packing_builtins");
 809    case ir_unop_unpack_half_2x16_split_x:
 810       emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
 811       break;
 812    case ir_unop_unpack_half_2x16_split_y:
 813       emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
 814       break;
 815    case ir_binop_pow:
 816       emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
 817       break;
 818
 819    case ir_unop_bitfield_reverse:
 820       emit(BFREV(this->result, op[0]));
 821       break;
 822    case ir_unop_bit_count:
 823       emit(CBIT(this->result, op[0]));
 824       break;
 825    case ir_unop_find_msb:
 826       temp = fs_reg(this, glsl_type::uint_type);
 827       emit(FBH(temp, op[0]));
 828
 829       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
 830        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
 831        * subtract the result from 31 to convert the MSB count into an LSB count.
 832        */
 833
 834       /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
 835       emit(MOV(this->result, temp));
 836       emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
 837
 838       temp.negate = true;
 839       inst = emit(ADD(this->result, temp, fs_reg(31)));
 840       inst->predicate = BRW_PREDICATE_NORMAL;
 841       break;
 842    case ir_unop_find_lsb:
 843       emit(FBL(this->result, op[0]));
 844       break;
 845    case ir_triop_bitfield_extract:
 846       /* Note that the instruction's argument order is reversed from GLSL
 847        * and the IR.
 848        */
 849       emit(BFE(this->result, op[2], op[1], op[0]));
 850       break;
 851    case ir_binop_bfm:
 852       emit(BFI1(this->result, op[0], op[1]));
 853       break;
 854    case ir_triop_bfi:
 855       emit(BFI2(this->result, op[0], op[1], op[2]));
 856       break;
 857    case ir_quadop_bitfield_insert:
 858       unreachable("not reached: should be handled by "
 859               "lower_instructions::bitfield_insert_to_bfm_bfi");
 860
 861    case ir_unop_bit_not:
 862       emit(NOT(this->result, op[0]));
 863       break;
 864    case ir_binop_bit_and:
 865       emit(AND(this->result, op[0], op[1]));
 866       break;
 867    case ir_binop_bit_xor:
 868       emit(XOR(this->result, op[0], op[1]));
 869       break;
 870    case ir_binop_bit_or:
 871       emit(OR(this->result, op[0], op[1]));
 872       break;
 873
 874    case ir_binop_lshift:
 875       emit(SHL(this->result, op[0], op[1]));
 876       break;
 877
 878    case ir_binop_rshift:
 879       if (ir->type->base_type == GLSL_TYPE_INT)
 880          emit(ASR(this->result, op[0], op[1]));
 881       else
 882          emit(SHR(this->result, op[0], op[1]));
 883       break;
 884    case ir_binop_pack_half_2x16_split:
 885       emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
 886       break;
 887    case ir_binop_ubo_load: {
 888       /* This IR node takes a constant uniform block and a constant or
 889        * variable byte offset within the block and loads a vector from that.
 890        */
 891       ir_constant *uniform_block = ir->operands[0]->as_constant();
 892       ir_constant *const_offset = ir->operands[1]->as_constant();
 893       fs_reg surf_index = fs_reg(prog_data->base.binding_table.ubo_start +
 894                                  uniform_block->value.u[0]);
 895       if (const_offset) {
 896          fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
 897          packed_consts.type = result.type;
 898
 899          fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
 900          emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
 901                                    packed_consts, surf_index, const_offset_reg));
 902
 903          for (int i = 0; i < ir->type->vector_elements; i++) {
 904             packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i);
 905
 906             /* The std140 packing rules don't allow vectors to cross 16-byte
 907              * boundaries, and a reg is 32 bytes.
 908              */
 909             assert(packed_consts.subreg_offset < 32);
 910
 911             /* UBO bools are any nonzero value.  We consider bools to be
 912              * values with the low bit set to 1.  Convert them using CMP.
 913              */
 914             if (ir->type->base_type == GLSL_TYPE_BOOL) {
 915                emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
 916             } else {
 917                emit(MOV(result, packed_consts));
 918             }
 919
 920             result.reg_offset++;
 921          }
 922       } else {
 923          /* Turn the byte offset into a dword offset. */
 924          fs_reg base_offset = fs_reg(this, glsl_type::int_type);
 925          emit(SHR(base_offset, op[1], fs_reg(2)));
 926
 927          for (int i = 0; i < ir->type->vector_elements; i++) {
 928             emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
 929                                             base_offset, i));
 930
 931             if (ir->type->base_type == GLSL_TYPE_BOOL)
 932                emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
 933
 934             result.reg_offset++;
 935          }
 936       }
 937
 938       result.reg_offset = 0;
 939       break;
 940    }
 941
 942    case ir_triop_fma:
 943       /* Note that the instruction's argument order is reversed from GLSL
 944        * and the IR.
 945        */
 946       emit(MAD(this->result, op[2], op[1], op[0]));
 947       break;
 948
 949    case ir_triop_lrp:
 950       emit_lrp(this->result, op[0], op[1], op[2]);
 951       break;
 952
 953    case ir_triop_csel:
 954       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 955       inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]);
 956       inst->predicate = BRW_PREDICATE_NORMAL;
 957       break;
 958
 959    case ir_unop_interpolate_at_centroid:
 960    case ir_binop_interpolate_at_offset:
 961    case ir_binop_interpolate_at_sample:
 962       unreachable("already handled above");
 963       break;
 964    }
 965 }
 966
 967 void
 968 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
 969                                    const glsl_type *type, bool predicated)
 970 {
 971    switch (type->base_type) {
 972    case GLSL_TYPE_FLOAT:
 973    case GLSL_TYPE_UINT:
 974    case GLSL_TYPE_INT:
 975    case GLSL_TYPE_BOOL:
 976       for (unsigned int i = 0; i < type->components(); i++) {
 977          l.type = brw_type_for_base_type(type);
 978          r.type = brw_type_for_base_type(type);
 979
 980          if (predicated || !l.equals(r)) {
 981             fs_inst *inst = emit(MOV(l, r));
 982             inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
 983          }
 984
 985          l.reg_offset++;
 986          r.reg_offset++;
 987       }
 988       break;
 989    case GLSL_TYPE_ARRAY:
 990       for (unsigned int i = 0; i < type->length; i++) {
 991          emit_assignment_writes(l, r, type->fields.array, predicated);
 992       }
 993       break;
 994
 995    case GLSL_TYPE_STRUCT:
 996       for (unsigned int i = 0; i < type->length; i++) {
 997          emit_assignment_writes(l, r, type->fields.structure[i].type,
 998                                 predicated);
 999       }
1000       break;
1001
1002    case GLSL_TYPE_SAMPLER:
1003    case GLSL_TYPE_IMAGE:
1004    case GLSL_TYPE_ATOMIC_UINT:
1005       break;
1006
1007    case GLSL_TYPE_VOID:
1008    case GLSL_TYPE_ERROR:
1009    case GLSL_TYPE_INTERFACE:
1010       unreachable("not reached");
1011    }
1012 }
1013
1014 /* If the RHS processing resulted in an instruction generating a
1015  * temporary value, and it would be easy to rewrite the instruction to
1016  * generate its result right into the LHS instead, do so.  This ends
1017  * up reliably removing instructions where it can be tricky to do so
1018  * later without real UD chain information.
1019  */
1020 bool
1021 fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1022                                    fs_reg dst,
1023                                    fs_reg src,
1024                                    fs_inst *pre_rhs_inst,
1025                                    fs_inst *last_rhs_inst)
1026 {
1027    /* Only attempt if we're doing a direct assignment. */
1028    if (ir->condition ||
1029        !(ir->lhs->type->is_scalar() ||
1030         (ir->lhs->type->is_vector() &&
1031          ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
1032       return false;
1033
1034    /* Make sure the last instruction generated our source reg. */
1035    fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
1036                                                     last_rhs_inst,
1037                                                     src);
1038    if (!modify)
1039       return false;
1040
1041    /* If last_rhs_inst wrote a different number of components than our LHS,
1042     * we can't safely rewrite it.
1043     */
1044    if (virtual_grf_sizes[dst.reg] != modify->regs_written)
1045       return false;
1046
1047    /* Success!  Rewrite the instruction. */
1048    modify->dst = dst;
1049
1050    return true;
1051 }
1052
1053 void
1054 fs_visitor::visit(ir_assignment *ir)
1055 {
1056    fs_reg l, r;
1057    fs_inst *inst;
1058
1059    /* FINISHME: arrays on the lhs */
1060    ir->lhs->accept(this);
1061    l = this->result;
1062
1063    fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
1064
1065    ir->rhs->accept(this);
1066    r = this->result;
1067
1068    fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
1069
1070    assert(l.file != BAD_FILE);
1071    assert(r.file != BAD_FILE);
1072
1073    if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
1074       return;
1075
1076    if (ir->condition) {
1077       emit_bool_to_cond_code(ir->condition);
1078    }
1079
1080    if (ir->lhs->type->is_scalar() ||
1081        ir->lhs->type->is_vector()) {
1082       for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
1083          if (ir->write_mask & (1 << i)) {
1084             inst = emit(MOV(l, r));
1085             if (ir->condition)
1086                inst->predicate = BRW_PREDICATE_NORMAL;
1087             r.reg_offset++;
1088          }
1089          l.reg_offset++;
1090       }
1091    } else {
1092       emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
1093    }
1094 }
1095
1096 fs_inst *
1097 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
1098                               fs_reg shadow_c, fs_reg lod, fs_reg dPdy,
1099                               uint32_t sampler)
1100 {
1101    int mlen;
1102    int base_mrf = 1;
1103    bool simd16 = false;
1104    fs_reg orig_dst;
1105
1106    /* g0 header. */
1107    mlen = 1;
1108
1109    if (ir->shadow_comparitor) {
1110       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1111          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
1112          coordinate.reg_offset++;
1113       }
1114
1115       /* gen4's SIMD8 sampler always has the slots for u,v,r present.
1116        * the unused slots must be zeroed.
1117        */
1118       for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
1119          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
1120       }
1121       mlen += 3;
1122
1123       if (ir->op == ir_tex) {
1124          /* There's no plain shadow compare message, so we use shadow
1125           * compare with a bias of 0.0.
1126           */
1127          emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
1128          mlen++;
1129       } else if (ir->op == ir_txb || ir->op == ir_txl) {
1130          emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1131          mlen++;
1132       } else {
1133          unreachable("Should not get here.");
1134       }
1135
1136       emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
1137       mlen++;
1138    } else if (ir->op == ir_tex) {
1139       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1140          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
1141          coordinate.reg_offset++;
1142       }
1143       /* zero the others. */
1144       for (int i = ir->coordinate->type->vector_elements; i<3; i++) {
1145          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
1146       }
1147       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1148       mlen += 3;
1149    } else if (ir->op == ir_txd) {
1150       fs_reg &dPdx = lod;
1151
1152       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1153          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
1154          coordinate.reg_offset++;
1155       }
1156       /* the slots for u and v are always present, but r is optional */
1157       mlen += MAX2(ir->coordinate->type->vector_elements, 2);
1158
1159       /*  P   = u, v, r
1160        * dPdx = dudx, dvdx, drdx
1161        * dPdy = dudy, dvdy, drdy
1162        *
1163        * 1-arg: Does not exist.
1164        *
1165        * 2-arg: dudx   dvdx   dudy   dvdy
1166        *        dPdx.x dPdx.y dPdy.x dPdy.y
1167        *        m4     m5     m6     m7
1168        *
1169        * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
1170        *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
1171        *        m5     m6     m7     m8     m9     m10
1172        */
1173       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
1174          emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
1175          dPdx.reg_offset++;
1176       }
1177       mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
1178
1179       for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
1180          emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
1181          dPdy.reg_offset++;
1182       }
1183       mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
1184    } else if (ir->op == ir_txs) {
1185       /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
1186       simd16 = true;
1187       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
1188       mlen += 2;
1189    } else {
1190       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1191        * instructions.  We'll need to do SIMD16 here.
1192        */
1193       simd16 = true;
1194       assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
1195
1196       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1197          emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
1198                   coordinate));
1199          coordinate.reg_offset++;
1200       }
1201
1202       /* Initialize the rest of u/v/r with 0.0.  Empirically, this seems to
1203        * be necessary for TXF (ld), but seems wise to do for all messages.
1204        */
1205       for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
1206          emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
1207       }
1208
1209       /* lod/bias appears after u/v/r. */
1210       mlen += 6;
1211
1212       emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
1213       mlen++;
1214
1215       /* The unused upper half. */
1216       mlen++;
1217    }
1218
1219    if (simd16) {
1220       /* Now, since we're doing simd16, the return is 2 interleaved
1221        * vec4s where the odd-indexed ones are junk. We'll need to move
1222        * this weirdness around to the expected layout.
1223        */
1224       orig_dst = dst;
1225       dst = fs_reg(GRF, virtual_grf_alloc(8),
1226                    (brw->is_g4x ?
1227                     brw_type_for_base_type(ir->type) :
1228                     BRW_REGISTER_TYPE_F));
1229    }
1230
1231    enum opcode opcode;
1232
1233    switch (ir->op) {
1234    case ir_tex: opcode = SHADER_OPCODE_TEX; break;
1235    case ir_txb: opcode = FS_OPCODE_TXB; break;
1236    case ir_txl: opcode = SHADER_OPCODE_TXL; break;
1237    case ir_txd: opcode = SHADER_OPCODE_TXD; break;
1238    case ir_txs: opcode = SHADER_OPCODE_TXS; break;
1239    case ir_txf: opcode = SHADER_OPCODE_TXF; break;
1240    default:
1241       unreachable("not reached");
1242    }
1243
1244    fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
1245    inst->base_mrf = base_mrf;
1246    inst->mlen = mlen;
1247    inst->header_present = true;
1248    inst->regs_written = simd16 ? 8 : 4;
1249
1250    if (simd16) {
1251       for (int i = 0; i < 4; i++) {
1252          emit(MOV(orig_dst, dst));
1253          orig_dst.reg_offset++;
1254          dst.reg_offset += 2;
1255       }
1256    }
1257
1258    return inst;
1259 }
1260
1261 /* gen5's sampler has slots for u, v, r, array index, then optional
1262  * parameters like shadow comparitor or LOD bias.  If optional
1263  * parameters aren't present, those base slots are optional and don't
1264  * need to be included in the message.
1265  *
1266  * We don't fill in the unnecessary slots regardless, which may look
1267  * surprising in the disassembly.
1268  */
1269 fs_inst *
1270 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
1271                               fs_reg shadow_c, fs_reg lod, fs_reg lod2,
1272                               fs_reg sample_index, uint32_t sampler)
1273 {
1274    int mlen = 0;
1275    int base_mrf = 2;
1276    int reg_width = dispatch_width / 8;
1277    bool header_present = false;
1278    const int vector_elements =
1279       ir->coordinate ? ir->coordinate->type->vector_elements : 0;
1280
1281    if (ir->offset) {
1282       /* The offsets set up by the ir_texture visitor are in the
1283        * m1 header, so we can't go headerless.
1284        */
1285       header_present = true;
1286       mlen++;
1287       base_mrf--;
1288    }
1289
1290    for (int i = 0; i < vector_elements; i++) {
1291       emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
1292                coordinate));
1293       coordinate.reg_offset++;
1294    }
1295    mlen += vector_elements * reg_width;
1296
1297    if (ir->shadow_comparitor) {
1298       mlen = MAX2(mlen, header_present + 4 * reg_width);
1299
1300       emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
1301       mlen += reg_width;
1302    }
1303
1304    enum opcode opcode;
1305    switch (ir->op) {
1306    case ir_tex:
1307       opcode = SHADER_OPCODE_TEX;
1308       break;
1309    case ir_txb:
1310       mlen = MAX2(mlen, header_present + 4 * reg_width);
1311       emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1312       mlen += reg_width;
1313
1314       opcode = FS_OPCODE_TXB;
1315       break;
1316    case ir_txl:
1317       mlen = MAX2(mlen, header_present + 4 * reg_width);
1318       emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1319       mlen += reg_width;
1320
1321       opcode = SHADER_OPCODE_TXL;
1322       break;
1323    case ir_txd: {
1324       mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
1325
1326       /**
1327        *  P   =  u,    v,    r
1328        * dPdx = dudx, dvdx, drdx
1329        * dPdy = dudy, dvdy, drdy
1330        *
1331        * Load up these values:
1332        * - dudx   dudy   dvdx   dvdy   drdx   drdy
1333        * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
1334        */
1335       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
1336          emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1337          lod.reg_offset++;
1338          mlen += reg_width;
1339
1340          emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
1341          lod2.reg_offset++;
1342          mlen += reg_width;
1343       }
1344
1345       opcode = SHADER_OPCODE_TXD;
1346       break;
1347    }
1348    case ir_txs:
1349       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
1350       mlen += reg_width;
1351
1352       opcode = SHADER_OPCODE_TXS;
1353       break;
1354    case ir_query_levels:
1355       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
1356       mlen += reg_width;
1357
1358       opcode = SHADER_OPCODE_TXS;
1359       break;
1360    case ir_txf:
1361       mlen = header_present + 4 * reg_width;
1362       emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod));
1363
1364       opcode = SHADER_OPCODE_TXF;
1365       break;
1366    case ir_txf_ms:
1367       mlen = header_present + 4 * reg_width;
1368
1369       /* lod */
1370       emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0)));
1371       /* sample index */
1372       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
1373       mlen += reg_width;
1374
1375       opcode = SHADER_OPCODE_TXF_CMS;
1376       break;
1377    case ir_lod:
1378       opcode = SHADER_OPCODE_LOD;
1379       break;
1380    case ir_tg4:
1381       opcode = SHADER_OPCODE_TG4;
1382       break;
1383    default:
1384       unreachable("not reached");
1385    }
1386
1387    fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
1388    inst->base_mrf = base_mrf;
1389    inst->mlen = mlen;
1390    inst->header_present = header_present;
1391    inst->regs_written = 4;
1392
1393    if (mlen > MAX_SAMPLER_MESSAGE_SIZE) {
1394       fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
1395            " disallowed by hardware\n");
1396    }
1397
1398    return inst;
1399 }
1400
1401 fs_inst *
1402 fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
1403                               fs_reg shadow_c, fs_reg lod, fs_reg lod2,
1404                               fs_reg sample_index, fs_reg mcs, uint32_t sampler)
1405 {
1406    int reg_width = dispatch_width / 8;
1407    bool header_present = false;
1408
1409    fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
1410    for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
1411       sources[i] = fs_reg(this, glsl_type::float_type);
1412    }
1413    int length = 0;
1414
1415    if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf) || sampler >= 16) {
1416       /* For general texture offsets (no txf workaround), we need a header to
1417        * put them in.  Note that for SIMD16 we're making space for two actual
1418        * hardware registers here, so the emit will have to fix up for this.
1419        *
1420        * * ir4_tg4 needs to place its channel select in the header,
1421        * for interaction with ARB_texture_swizzle
1422        *
1423        * The sampler index is only 4-bits, so for larger sampler numbers we
1424        * need to offset the Sampler State Pointer in the header.
1425        */
1426       header_present = true;
1427       sources[length] = reg_undef;
1428       length++;
1429    }
1430
1431    if (ir->shadow_comparitor) {
1432       emit(MOV(sources[length], shadow_c));
1433       length++;
1434    }
1435
1436    bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
1437    bool coordinate_done = false;
1438
1439    /* Set up the LOD info */
1440    switch (ir->op) {
1441    case ir_tex:
1442    case ir_lod:
1443       break;
1444    case ir_txb:
1445       emit(MOV(sources[length], lod));
1446       length++;
1447       break;
1448    case ir_txl:
1449       emit(MOV(sources[length], lod));
1450       length++;
1451       break;
1452    case ir_txd: {
1453       no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
1454
1455       /* Load dPdx and the coordinate together:
1456        * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
1457        */
1458       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1459          emit(MOV(sources[length], coordinate));
1460          coordinate.reg_offset++;
1461          length++;
1462
1463          /* For cube map array, the coordinate is (u,v,r,ai) but there are
1464           * only derivatives for (u, v, r).
1465           */
1466          if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
1467             emit(MOV(sources[length], lod));
1468             lod.reg_offset++;
1469             length++;
1470
1471             emit(MOV(sources[length], lod2));
1472             lod2.reg_offset++;
1473             length++;
1474          }
1475       }
1476
1477       coordinate_done = true;
1478       break;
1479    }
1480    case ir_txs:
1481       emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod));
1482       length++;
1483       break;
1484    case ir_query_levels:
1485       emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)));
1486       length++;
1487       break;
1488    case ir_txf:
1489       /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
1490       emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
1491       coordinate.reg_offset++;
1492       length++;
1493
1494       emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod));
1495       length++;
1496
1497       for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
1498          emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
1499          coordinate.reg_offset++;
1500          length++;
1501       }
1502
1503       coordinate_done = true;
1504       break;
1505    case ir_txf_ms:
1506       emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index));
1507       length++;
1508
1509       /* data from the multisample control surface */
1510       emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs));
1511       length++;
1512
1513       /* there is no offsetting for this message; just copy in the integer
1514        * texture coordinates
1515        */
1516       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1517          emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
1518          coordinate.reg_offset++;
1519          length++;
1520       }
1521
1522       coordinate_done = true;
1523       break;
1524    case ir_tg4:
1525       if (has_nonconstant_offset) {
1526          if (ir->shadow_comparitor)
1527             no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
1528
1529          /* More crazy intermixing */
1530          ir->offset->accept(this);
1531          fs_reg offset_value = this->result;
1532
1533          for (int i = 0; i < 2; i++) { /* u, v */
1534             emit(MOV(sources[length], coordinate));
1535             coordinate.reg_offset++;
1536             length++;
1537          }
1538
1539          for (int i = 0; i < 2; i++) { /* offu, offv */
1540             emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value));
1541             offset_value.reg_offset++;
1542             length++;
1543          }
1544
1545          if (ir->coordinate->type->vector_elements == 3) { /* r if present */
1546             emit(MOV(sources[length], coordinate));
1547             coordinate.reg_offset++;
1548             length++;
1549          }
1550
1551          coordinate_done = true;
1552       }
1553       break;
1554    }
1555
1556    /* Set up the coordinate (except for cases where it was done above) */
1557    if (ir->coordinate && !coordinate_done) {
1558       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1559          emit(MOV(sources[length], coordinate));
1560          coordinate.reg_offset++;
1561          length++;
1562       }
1563    }
1564
1565    fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(length),
1566                                BRW_REGISTER_TYPE_F);
1567    emit(LOAD_PAYLOAD(src_payload, sources, length));
1568
1569    /* Generate the SEND */
1570    enum opcode opcode;
1571    switch (ir->op) {
1572    case ir_tex: opcode = SHADER_OPCODE_TEX; break;
1573    case ir_txb: opcode = FS_OPCODE_TXB; break;
1574    case ir_txl: opcode = SHADER_OPCODE_TXL; break;
1575    case ir_txd: opcode = SHADER_OPCODE_TXD; break;
1576    case ir_txf: opcode = SHADER_OPCODE_TXF; break;
1577    case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
1578    case ir_txs: opcode = SHADER_OPCODE_TXS; break;
1579    case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
1580    case ir_lod: opcode = SHADER_OPCODE_LOD; break;
1581    case ir_tg4:
1582       if (has_nonconstant_offset)
1583          opcode = SHADER_OPCODE_TG4_OFFSET;
1584       else
1585          opcode = SHADER_OPCODE_TG4;
1586       break;
1587    default:
1588       unreachable("not reached");
1589    }
1590    fs_inst *inst = emit(opcode, dst, src_payload, fs_reg(sampler));
1591    inst->base_mrf = -1;
1592    if (reg_width == 2)
1593       inst->mlen = length * reg_width - header_present;
1594    else
1595       inst->mlen = length * reg_width;
1596    inst->header_present = header_present;
1597    inst->regs_written = 4;
1598
1599    if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
1600       fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
1601            " disallowed by hardware\n");
1602    }
1603
1604    return inst;
1605 }
1606
1607 fs_reg
1608 fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
1609                              bool is_rect, uint32_t sampler, int texunit)
1610 {
1611    fs_inst *inst = NULL;
1612    bool needs_gl_clamp = true;
1613    fs_reg scale_x, scale_y;
1614
1615    /* The 965 requires the EU to do the normalization of GL rectangle
1616     * texture coordinates.  We use the program parameter state
1617     * tracking to get the scaling factor.
1618     */
1619    if (is_rect &&
1620        (brw->gen < 6 ||
1621         (brw->gen >= 6 && (key->tex.gl_clamp_mask[0] & (1 << sampler) ||
1622                            key->tex.gl_clamp_mask[1] & (1 << sampler))))) {
1623       struct gl_program_parameter_list *params = prog->Parameters;
1624       int tokens[STATE_LENGTH] = {
1625          STATE_INTERNAL,
1626          STATE_TEXRECT_SCALE,
1627          texunit,
1628          0,
1629          0
1630       };
1631
1632       no16("rectangle scale uniform setup not supported on SIMD16\n");
1633       if (dispatch_width == 16) {
1634          return coordinate;
1635       }
1636
1637       GLuint index = _mesa_add_state_reference(params,
1638                                                (gl_state_index *)tokens);
1639       /* Try to find existing copies of the texrect scale uniforms. */
1640       for (unsigned i = 0; i < uniforms; i++) {
1641          if (stage_prog_data->param[i] ==
1642              &prog->Parameters->ParameterValues[index][0].f) {
1643             scale_x = fs_reg(UNIFORM, i);
1644             scale_y = fs_reg(UNIFORM, i + 1);
1645             break;
1646          }
1647       }
1648
1649       /* If we didn't already set them up, do so now. */
1650       if (scale_x.file == BAD_FILE) {
1651          scale_x = fs_reg(UNIFORM, uniforms);
1652          scale_y = fs_reg(UNIFORM, uniforms + 1);
1653
1654          stage_prog_data->param[uniforms++] =
1655             &prog->Parameters->ParameterValues[index][0].f;
1656          stage_prog_data->param[uniforms++] =
1657             &prog->Parameters->ParameterValues[index][1].f;
1658       }
1659    }
1660
1661    /* The 965 requires the EU to do the normalization of GL rectangle
1662     * texture coordinates.  We use the program parameter state
1663     * tracking to get the scaling factor.
1664     */
1665    if (brw->gen < 6 && is_rect) {
1666       fs_reg dst = fs_reg(this, ir->coordinate->type);
1667       fs_reg src = coordinate;
1668       coordinate = dst;
1669
1670       emit(MUL(dst, src, scale_x));
1671       dst.reg_offset++;
1672       src.reg_offset++;
1673       emit(MUL(dst, src, scale_y));
1674    } else if (is_rect) {
1675       /* On gen6+, the sampler handles the rectangle coordinates
1676        * natively, without needing rescaling.  But that means we have
1677        * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
1678        * not [0, 1] like the default case below.
1679        */
1680       needs_gl_clamp = false;
1681
1682       for (int i = 0; i < 2; i++) {
1683          if (key->tex.gl_clamp_mask[i] & (1 << sampler)) {
1684             fs_reg chan = coordinate;
1685             chan.reg_offset += i;
1686
1687             inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f));
1688             inst->conditional_mod = BRW_CONDITIONAL_G;
1689
1690             /* Our parameter comes in as 1.0/width or 1.0/height,
1691              * because that's what people normally want for doing
1692              * texture rectangle handling.  We need width or height
1693              * for clamping, but we don't care enough to make a new
1694              * parameter type, so just invert back.
1695              */
1696             fs_reg limit = fs_reg(this, glsl_type::float_type);
1697             emit(MOV(limit, i == 0 ? scale_x : scale_y));
1698             emit(SHADER_OPCODE_RCP, limit, limit);
1699
1700             inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
1701             inst->conditional_mod = BRW_CONDITIONAL_L;
1702          }
1703       }
1704    }
1705
1706    if (ir->coordinate && needs_gl_clamp) {
1707       for (unsigned int i = 0;
1708            i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
1709          if (key->tex.gl_clamp_mask[i] & (1 << sampler)) {
1710             fs_reg chan = coordinate;
1711             chan.reg_offset += i;
1712
1713             fs_inst *inst = emit(MOV(chan, chan));
1714             inst->saturate = true;
1715          }
1716       }
1717    }
1718    return coordinate;
1719 }
1720
1721 /* Sample from the MCS surface attached to this multisample texture. */
1722 fs_reg
1723 fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, uint32_t sampler)
1724 {
1725    int reg_width = dispatch_width / 8;
1726    int length = ir->coordinate->type->vector_elements;
1727    fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length),
1728                            BRW_REGISTER_TYPE_F);
1729    fs_reg dest = fs_reg(this, glsl_type::uvec4_type);
1730    fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length);
1731
1732    /* parameters are: u, v, r; missing parameters are treated as zero */
1733    for (int i = 0; i < length; i++) {
1734       sources[i] = fs_reg(this, glsl_type::float_type);
1735       emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate));
1736       coordinate.reg_offset++;
1737    }
1738
1739    emit(LOAD_PAYLOAD(payload, sources, length));
1740
1741    fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, fs_reg(sampler));
1742    inst->base_mrf = -1;
1743    inst->mlen = length * reg_width;
1744    inst->header_present = false;
1745    inst->regs_written = 4; /* we only care about one reg of response,
1746                             * but the sampler always writes 4/8
1747                             */
1748
1749    return dest;
1750 }
1751
1752 void
1753 fs_visitor::visit(ir_texture *ir)
1754 {
1755    fs_inst *inst = NULL;
1756
1757    uint32_t sampler =
1758       _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
1759    /* FINISHME: We're failing to recompile our programs when the sampler is
1760     * updated.  This only matters for the texture rectangle scale parameters
1761     * (pre-gen6, or gen6+ with GL_CLAMP).
1762     */
1763    int texunit = prog->SamplerUnits[sampler];
1764
1765    if (ir->op == ir_tg4) {
1766       /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
1767        * emitting anything other than setting up the constant result.
1768        */
1769       ir_constant *chan = ir->lod_info.component->as_constant();
1770       int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
1771       if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
1772
1773          fs_reg res = fs_reg(this, glsl_type::vec4_type);
1774          this->result = res;
1775
1776          for (int i=0; i<4; i++) {
1777             emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
1778             res.reg_offset++;
1779          }
1780          return;
1781       }
1782    }
1783
1784    /* Should be lowered by do_lower_texture_projection */
1785    assert(!ir->projector);
1786
1787    /* Should be lowered */
1788    assert(!ir->offset || !ir->offset->type->is_array());
1789
1790    /* Generate code to compute all the subexpression trees.  This has to be
1791     * done before loading any values into MRFs for the sampler message since
1792     * generating these values may involve SEND messages that need the MRFs.
1793     */
1794    fs_reg coordinate;
1795    if (ir->coordinate) {
1796       ir->coordinate->accept(this);
1797
1798       coordinate = rescale_texcoord(ir, this->result,
1799                                     ir->sampler->type->sampler_dimensionality ==
1800                                     GLSL_SAMPLER_DIM_RECT,
1801                                     sampler, texunit);
1802    }
1803
1804    fs_reg shadow_comparitor;
1805    if (ir->shadow_comparitor) {
1806       ir->shadow_comparitor->accept(this);
1807       shadow_comparitor = this->result;
1808    }
1809
1810    fs_reg lod, lod2, sample_index, mcs;
1811    switch (ir->op) {
1812    case ir_tex:
1813    case ir_lod:
1814    case ir_tg4:
1815    case ir_query_levels:
1816       break;
1817    case ir_txb:
1818       ir->lod_info.bias->accept(this);
1819       lod = this->result;
1820       break;
1821    case ir_txd:
1822       ir->lod_info.grad.dPdx->accept(this);
1823       lod = this->result;
1824
1825       ir->lod_info.grad.dPdy->accept(this);
1826       lod2 = this->result;
1827       break;
1828    case ir_txf:
1829    case ir_txl:
1830    case ir_txs:
1831       ir->lod_info.lod->accept(this);
1832       lod = this->result;
1833       break;
1834    case ir_txf_ms:
1835       ir->lod_info.sample_index->accept(this);
1836       sample_index = this->result;
1837
1838       if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
1839          mcs = emit_mcs_fetch(ir, coordinate, sampler);
1840       else
1841          mcs = fs_reg(0u);
1842       break;
1843    default:
1844       unreachable("Unrecognized texture opcode");
1845    };
1846
1847    /* Writemasking doesn't eliminate channels on SIMD8 texture
1848     * samples, so don't worry about them.
1849     */
1850    fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
1851
1852    if (brw->gen >= 7) {
1853       inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
1854                                lod, lod2, sample_index, mcs, sampler);
1855    } else if (brw->gen >= 5) {
1856       inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
1857                                lod, lod2, sample_index, sampler);
1858    } else {
1859       inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
1860                                lod, lod2, sampler);
1861    }
1862
1863    if (ir->offset != NULL && ir->op != ir_txf)
1864       inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
1865
1866    if (ir->op == ir_tg4)
1867       inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17
1868
1869    if (ir->shadow_comparitor)
1870       inst->shadow_compare = true;
1871
1872    /* fixup #layers for cube map arrays */
1873    if (ir->op == ir_txs) {
1874       glsl_type const *type = ir->sampler->type;
1875       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
1876           type->sampler_array) {
1877          fs_reg depth = dst;
1878          depth.reg_offset = 2;
1879          fs_reg fixed_depth = fs_reg(this, glsl_type::int_type);
1880          emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
1881
1882          fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
1883          fs_reg d = dst;
1884          for (int i = 0; i < inst->regs_written; i++) {
1885             if (i == 2) {
1886                fixed_payload[i] = fixed_depth;
1887             } else {
1888                d.reg_offset = i;
1889                fixed_payload[i] = d;
1890             }
1891          }
1892          emit(LOAD_PAYLOAD(dst, fixed_payload, inst->regs_written));
1893       }
1894    }
1895
1896    if (brw->gen == 6 && ir->op == ir_tg4) {
1897       emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], dst);
1898    }
1899
1900    swizzle_result(ir, dst, sampler);
1901 }
1902
1903 /**
1904  * Apply workarounds for Gen6 gather with UINT/SINT
1905  */
1906 void
1907 fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
1908 {
1909    if (!wa)
1910       return;
1911
1912    int width = (wa & WA_8BIT) ? 8 : 16;
1913
1914    for (int i = 0; i < 4; i++) {
1915       fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
1916       /* Convert from UNORM to UINT */
1917       emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))));
1918       emit(MOV(dst, dst_f));
1919
1920       if (wa & WA_SIGN) {
1921          /* Reinterpret the UINT value as a signed INT value by
1922           * shifting the sign bit into place, then shifting back
1923           * preserving sign.
1924           */
1925          emit(SHL(dst, dst, fs_reg(32 - width)));
1926          emit(ASR(dst, dst, fs_reg(32 - width)));
1927       }
1928
1929       dst.reg_offset++;
1930    }
1931 }
1932
1933 /**
1934  * Set up the gather channel based on the swizzle, for gather4.
1935  */
1936 uint32_t
1937 fs_visitor::gather_channel(ir_texture *ir, uint32_t sampler)
1938 {
1939    ir_constant *chan = ir->lod_info.component->as_constant();
1940    int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
1941    switch (swiz) {
1942       case SWIZZLE_X: return 0;
1943       case SWIZZLE_Y:
1944          /* gather4 sampler is broken for green channel on RG32F --
1945           * we must ask for blue instead.
1946           */
1947          if (key->tex.gather_channel_quirk_mask & (1<<sampler))
1948             return 2;
1949          return 1;
1950       case SWIZZLE_Z: return 2;
1951       case SWIZZLE_W: return 3;
1952       default:
1953          unreachable("Not reached"); /* zero, one swizzles handled already */
1954    }
1955 }
1956
1957 /**
1958  * Swizzle the result of a texture result.  This is necessary for
1959  * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
1960  */
1961 void
1962 fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, uint32_t sampler)
1963 {
1964    if (ir->op == ir_query_levels) {
1965       /* # levels is in .w */
1966       orig_val.reg_offset += 3;
1967       this->result = orig_val;
1968       return;
1969    }
1970
1971    this->result = orig_val;
1972
1973    /* txs,lod don't actually sample the texture, so swizzling the result
1974     * makes no sense.
1975     */
1976    if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4)
1977       return;
1978
1979    if (ir->type == glsl_type::float_type) {
1980       /* Ignore DEPTH_TEXTURE_MODE swizzling. */
1981       assert(ir->sampler->type->sampler_shadow);
1982    } else if (key->tex.swizzles[sampler] != SWIZZLE_NOOP) {
1983       fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
1984
1985       for (int i = 0; i < 4; i++) {
1986          int swiz = GET_SWZ(key->tex.swizzles[sampler], i);
1987          fs_reg l = swizzled_result;
1988          l.reg_offset += i;
1989
1990          if (swiz == SWIZZLE_ZERO) {
1991             emit(MOV(l, fs_reg(0.0f)));
1992          } else if (swiz == SWIZZLE_ONE) {
1993             emit(MOV(l, fs_reg(1.0f)));
1994          } else {
1995             fs_reg r = orig_val;
1996             r.reg_offset += GET_SWZ(key->tex.swizzles[sampler], i);
1997             emit(MOV(l, r));
1998          }
1999       }
2000       this->result = swizzled_result;
2001    }
2002 }
2003
2004 void
2005 fs_visitor::visit(ir_swizzle *ir)
2006 {
2007    ir->val->accept(this);
2008    fs_reg val = this->result;
2009
2010    if (ir->type->vector_elements == 1) {
2011       this->result.reg_offset += ir->mask.x;
2012       return;
2013    }
2014
2015    fs_reg result = fs_reg(this, ir->type);
2016    this->result = result;
2017
2018    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
2019       fs_reg channel = val;
2020       int swiz = 0;
2021
2022       switch (i) {
2023       case 0:
2024          swiz = ir->mask.x;
2025          break;
2026       case 1:
2027          swiz = ir->mask.y;
2028          break;
2029       case 2:
2030          swiz = ir->mask.z;
2031          break;
2032       case 3:
2033          swiz = ir->mask.w;
2034          break;
2035       }
2036
2037       channel.reg_offset += swiz;
2038       emit(MOV(result, channel));
2039       result.reg_offset++;
2040    }
2041 }
2042
2043 void
2044 fs_visitor::visit(ir_discard *ir)
2045 {
2046    assert(ir->condition == NULL); /* FINISHME */
2047
2048    /* We track our discarded pixels in f0.1.  By predicating on it, we can
2049     * update just the flag bits that aren't yet discarded.  By emitting a
2050     * CMP of g0 != g0, all our currently executing channels will get turned
2051     * off.
2052     */
2053    fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
2054                                    BRW_REGISTER_TYPE_UW));
2055    fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
2056                            BRW_CONDITIONAL_NZ));
2057    cmp->predicate = BRW_PREDICATE_NORMAL;
2058    cmp->flag_subreg = 1;
2059
2060    if (brw->gen >= 6) {
2061       /* For performance, after a discard, jump to the end of the shader.
2062        * Only jump if all relevant channels have been discarded.
2063        */
2064       fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
2065       discard_jump->flag_subreg = 1;
2066
2067       discard_jump->predicate = (dispatch_width == 8)
2068                                 ? BRW_PREDICATE_ALIGN1_ANY8H
2069                                 : BRW_PREDICATE_ALIGN1_ANY16H;
2070       discard_jump->predicate_inverse = true;
2071    }
2072 }
2073
2074 void
2075 fs_visitor::visit(ir_constant *ir)
2076 {
2077    /* Set this->result to reg at the bottom of the function because some code
2078     * paths will cause this visitor to be applied to other fields.  This will
2079     * cause the value stored in this->result to be modified.
2080     *
2081     * Make reg constant so that it doesn't get accidentally modified along the
2082     * way.  Yes, I actually had this problem. :(
2083     */
2084    const fs_reg reg(this, ir->type);
2085    fs_reg dst_reg = reg;
2086
2087    if (ir->type->is_array()) {
2088       const unsigned size = type_size(ir->type->fields.array);
2089
2090       for (unsigned i = 0; i < ir->type->length; i++) {
2091          ir->array_elements[i]->accept(this);
2092          fs_reg src_reg = this->result;
2093
2094          dst_reg.type = src_reg.type;
2095          for (unsigned j = 0; j < size; j++) {
2096             emit(MOV(dst_reg, src_reg));
2097             src_reg.reg_offset++;
2098             dst_reg.reg_offset++;
2099          }
2100       }
2101    } else if (ir->type->is_record()) {
2102       foreach_in_list(ir_constant, field, &ir->components) {
2103          const unsigned size = type_size(field->type);
2104
2105          field->accept(this);
2106          fs_reg src_reg = this->result;
2107
2108          dst_reg.type = src_reg.type;
2109          for (unsigned j = 0; j < size; j++) {
2110             emit(MOV(dst_reg, src_reg));
2111             src_reg.reg_offset++;
2112             dst_reg.reg_offset++;
2113          }
2114       }
2115    } else {
2116       const unsigned size = type_size(ir->type);
2117
2118       for (unsigned i = 0; i < size; i++) {
2119          switch (ir->type->base_type) {
2120          case GLSL_TYPE_FLOAT:
2121             emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
2122             break;
2123          case GLSL_TYPE_UINT:
2124             emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
2125             break;
2126          case GLSL_TYPE_INT:
2127             emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
2128             break;
2129          case GLSL_TYPE_BOOL:
2130             emit(MOV(dst_reg, fs_reg((int)ir->value.b[i])));
2131             break;
2132          default:
2133             unreachable("Non-float/uint/int/bool constant");
2134          }
2135          dst_reg.reg_offset++;
2136       }
2137    }
2138
2139    this->result = reg;
2140 }
2141
2142 void
2143 fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
2144 {
2145    ir_expression *expr = ir->as_expression();
2146
2147    if (expr &&
2148        expr->operation != ir_binop_logic_and &&
2149        expr->operation != ir_binop_logic_or &&
2150        expr->operation != ir_binop_logic_xor) {
2151       fs_reg op[2];
2152       fs_inst *inst;
2153
2154       assert(expr->get_num_operands() <= 2);
2155       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
2156          assert(expr->operands[i]->type->is_scalar());
2157
2158          expr->operands[i]->accept(this);
2159          op[i] = this->result;
2160
2161          resolve_ud_negate(&op[i]);
2162       }
2163
2164       switch (expr->operation) {
2165       case ir_unop_logic_not:
2166          inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
2167          inst->conditional_mod = BRW_CONDITIONAL_Z;
2168          break;
2169
2170       case ir_unop_f2b:
2171          if (brw->gen >= 6) {
2172             emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
2173          } else {
2174             inst = emit(MOV(reg_null_f, op[0]));
2175             inst->conditional_mod = BRW_CONDITIONAL_NZ;
2176          }
2177          break;
2178
2179       case ir_unop_i2b:
2180          if (brw->gen >= 6) {
2181             emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
2182          } else {
2183             inst = emit(MOV(reg_null_d, op[0]));
2184             inst->conditional_mod = BRW_CONDITIONAL_NZ;
2185          }
2186          break;
2187
2188       case ir_binop_greater:
2189       case ir_binop_gequal:
2190       case ir_binop_less:
2191       case ir_binop_lequal:
2192       case ir_binop_equal:
2193       case ir_binop_all_equal:
2194       case ir_binop_nequal:
2195       case ir_binop_any_nequal:
2196          resolve_bool_comparison(expr->operands[0], &op[0]);
2197          resolve_bool_comparison(expr->operands[1], &op[1]);
2198
2199          emit(CMP(reg_null_d, op[0], op[1],
2200                   brw_conditional_for_comparison(expr->operation)));
2201          break;
2202
2203       default:
2204          unreachable("not reached");
2205       }
2206       return;
2207    }
2208
2209    ir->accept(this);
2210
2211    fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
2212    inst->conditional_mod = BRW_CONDITIONAL_NZ;
2213 }
2214
2215 /**
2216  * Emit a gen6 IF statement with the comparison folded into the IF
2217  * instruction.
2218  */
2219 void
2220 fs_visitor::emit_if_gen6(ir_if *ir)
2221 {
2222    ir_expression *expr = ir->condition->as_expression();
2223
2224    if (expr) {
2225       fs_reg op[2];
2226       fs_inst *inst;
2227       fs_reg temp;
2228
2229       assert(expr->get_num_operands() <= 2);
2230       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
2231          assert(expr->operands[i]->type->is_scalar());
2232
2233          expr->operands[i]->accept(this);
2234          op[i] = this->result;
2235       }
2236
2237       switch (expr->operation) {
2238       case ir_unop_logic_not:
2239       case ir_binop_logic_xor:
2240       case ir_binop_logic_or:
2241       case ir_binop_logic_and:
2242          /* For operations on bool arguments, only the low bit of the bool is
2243           * valid, and the others are undefined.  Fall back to the condition
2244           * code path.
2245           */
2246          break;
2247
2248       case ir_unop_f2b:
2249          inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
2250          inst->conditional_mod = BRW_CONDITIONAL_NZ;
2251          return;
2252
2253       case ir_unop_i2b:
2254          emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
2255          return;
2256
2257       case ir_binop_greater:
2258       case ir_binop_gequal:
2259       case ir_binop_less:
2260       case ir_binop_lequal:
2261       case ir_binop_equal:
2262       case ir_binop_all_equal:
2263       case ir_binop_nequal:
2264       case ir_binop_any_nequal:
2265          resolve_bool_comparison(expr->operands[0], &op[0]);
2266          resolve_bool_comparison(expr->operands[1], &op[1]);
2267
2268          emit(IF(op[0], op[1],
2269                  brw_conditional_for_comparison(expr->operation)));
2270          return;
2271       default:
2272          unreachable("not reached");
2273       }
2274    }
2275
2276    emit_bool_to_cond_code(ir->condition);
2277    fs_inst *inst = emit(BRW_OPCODE_IF);
2278    inst->predicate = BRW_PREDICATE_NORMAL;
2279 }
2280
2281 /**
2282  * Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL.
2283  *
2284  * Many GLSL shaders contain the following pattern:
2285  *
2286  *    x = condition ? foo : bar
2287  *
2288  * The compiler emits an ir_if tree for this, since each subexpression might be
2289  * a complex tree that could have side-effects or short-circuit logic.
2290  *
2291  * However, the common case is to simply select one of two constants or
2292  * variable values---which is exactly what SEL is for.  In this case, the
2293  * assembly looks like:
2294  *
2295  *    (+f0) IF
2296  *    MOV dst src0
2297  *    ELSE
2298  *    MOV dst src1
2299  *    ENDIF
2300  *
2301  * which can be easily translated into:
2302  *
2303  *    (+f0) SEL dst src0 src1
2304  *
2305  * If src0 is an immediate value, we promote it to a temporary GRF.
2306  */
2307 void
2308 fs_visitor::try_replace_with_sel()
2309 {
2310    fs_inst *endif_inst = (fs_inst *) instructions.get_tail();
2311    assert(endif_inst->opcode == BRW_OPCODE_ENDIF);
2312
2313    /* Pattern match in reverse: IF, MOV, ELSE, MOV, ENDIF. */
2314    int opcodes[] = {
2315       BRW_OPCODE_IF, BRW_OPCODE_MOV, BRW_OPCODE_ELSE, BRW_OPCODE_MOV,
2316    };
2317
2318    fs_inst *match = (fs_inst *) endif_inst->prev;
2319    for (int i = 0; i < 4; i++) {
2320       if (match->is_head_sentinel() || match->opcode != opcodes[4-i-1])
2321          return;
2322       match = (fs_inst *) match->prev;
2323    }
2324
2325    /* The opcodes match; it looks like the right sequence of instructions. */
2326    fs_inst *else_mov = (fs_inst *) endif_inst->prev;
2327    fs_inst *then_mov = (fs_inst *) else_mov->prev->prev;
2328    fs_inst *if_inst = (fs_inst *) then_mov->prev;
2329
2330    /* Check that the MOVs are the right form. */
2331    if (then_mov->dst.equals(else_mov->dst) &&
2332        !then_mov->is_partial_write() &&
2333        !else_mov->is_partial_write()) {
2334
2335       /* Remove the matched instructions; we'll emit a SEL to replace them. */
2336       while (!if_inst->next->is_tail_sentinel())
2337          if_inst->next->remove();
2338       if_inst->remove();
2339
2340       /* Only the last source register can be a constant, so if the MOV in
2341        * the "then" clause uses a constant, we need to put it in a temporary.
2342        */
2343       fs_reg src0(then_mov->src[0]);
2344       if (src0.file == IMM) {
2345          src0 = fs_reg(this, glsl_type::float_type);
2346          src0.type = then_mov->src[0].type;
2347          emit(MOV(src0, then_mov->src[0]));
2348       }
2349
2350       fs_inst *sel;
2351       if (if_inst->conditional_mod) {
2352          /* Sandybridge-specific IF with embedded comparison */
2353          emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
2354                   if_inst->conditional_mod));
2355          sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
2356          sel->predicate = BRW_PREDICATE_NORMAL;
2357       } else {
2358          /* Separate CMP and IF instructions */
2359          sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
2360          sel->predicate = if_inst->predicate;
2361          sel->predicate_inverse = if_inst->predicate_inverse;
2362       }
2363    }
2364 }
2365
2366 void
2367 fs_visitor::visit(ir_if *ir)
2368 {
2369    if (brw->gen < 6) {
2370       no16("Can't support (non-uniform) control flow on SIMD16\n");
2371    }
2372
2373    /* Don't point the annotation at the if statement, because then it plus
2374     * the then and else blocks get printed.
2375     */
2376    this->base_ir = ir->condition;
2377
2378    if (brw->gen == 6) {
2379       emit_if_gen6(ir);
2380    } else {
2381       emit_bool_to_cond_code(ir->condition);
2382
2383       emit(IF(BRW_PREDICATE_NORMAL));
2384    }
2385
2386    foreach_in_list(ir_instruction, ir_, &ir->then_instructions) {
2387       this->base_ir = ir_;
2388       ir_->accept(this);
2389    }
2390
2391    if (!ir->else_instructions.is_empty()) {
2392       emit(BRW_OPCODE_ELSE);
2393
2394       foreach_in_list(ir_instruction, ir_, &ir->else_instructions) {
2395          this->base_ir = ir_;
2396          ir_->accept(this);
2397       }
2398    }
2399
2400    emit(BRW_OPCODE_ENDIF);
2401
2402    try_replace_with_sel();
2403 }
2404
2405 void
2406 fs_visitor::visit(ir_loop *ir)
2407 {
2408    if (brw->gen < 6) {
2409       no16("Can't support (non-uniform) control flow on SIMD16\n");
2410    }
2411
2412    this->base_ir = NULL;
2413    emit(BRW_OPCODE_DO);
2414
2415    foreach_in_list(ir_instruction, ir_, &ir->body_instructions) {
2416       this->base_ir = ir_;
2417       ir_->accept(this);
2418    }
2419
2420    this->base_ir = NULL;
2421    emit(BRW_OPCODE_WHILE);
2422 }
2423
2424 void
2425 fs_visitor::visit(ir_loop_jump *ir)
2426 {
2427    switch (ir->mode) {
2428    case ir_loop_jump::jump_break:
2429       emit(BRW_OPCODE_BREAK);
2430       break;
2431    case ir_loop_jump::jump_continue:
2432       emit(BRW_OPCODE_CONTINUE);
2433       break;
2434    }
2435 }
2436
2437 void
2438 fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
2439 {
2440    ir_dereference *deref = static_cast<ir_dereference *>(
2441       ir->actual_parameters.get_head());
2442    ir_variable *location = deref->variable_referenced();
2443    unsigned surf_index = (prog_data->base.binding_table.abo_start +
2444                           location->data.atomic.buffer_index);
2445
2446    /* Calculate the surface offset */
2447    fs_reg offset(this, glsl_type::uint_type);
2448    ir_dereference_array *deref_array = deref->as_dereference_array();
2449
2450    if (deref_array) {
2451       deref_array->array_index->accept(this);
2452
2453       fs_reg tmp(this, glsl_type::uint_type);
2454       emit(MUL(tmp, this->result, ATOMIC_COUNTER_SIZE));
2455       emit(ADD(offset, tmp, location->data.atomic.offset));
2456    } else {
2457       offset = location->data.atomic.offset;
2458    }
2459
2460    /* Emit the appropriate machine instruction */
2461    const char *callee = ir->callee->function_name();
2462    ir->return_deref->accept(this);
2463    fs_reg dst = this->result;
2464
2465    if (!strcmp("__intrinsic_atomic_read", callee)) {
2466       emit_untyped_surface_read(surf_index, dst, offset);
2467
2468    } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
2469       emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
2470                           fs_reg(), fs_reg());
2471
2472    } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
2473       emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
2474                           fs_reg(), fs_reg());
2475    }
2476 }
2477
2478 void
2479 fs_visitor::visit(ir_call *ir)
2480 {
2481    const char *callee = ir->callee->function_name();
2482
2483    if (!strcmp("__intrinsic_atomic_read", callee) ||
2484        !strcmp("__intrinsic_atomic_increment", callee) ||
2485        !strcmp("__intrinsic_atomic_predecrement", callee)) {
2486       visit_atomic_counter_intrinsic(ir);
2487    } else {
2488       unreachable("Unsupported intrinsic.");
2489    }
2490 }
2491
2492 void
2493 fs_visitor::visit(ir_return *)
2494 {
2495    unreachable("FINISHME");
2496 }
2497
2498 void
2499 fs_visitor::visit(ir_function *ir)
2500 {
2501    /* Ignore function bodies other than main() -- we shouldn't see calls to
2502     * them since they should all be inlined before we get to ir_to_mesa.
2503     */
2504    if (strcmp(ir->name, "main") == 0) {
2505       const ir_function_signature *sig;
2506       exec_list empty;
2507
2508       sig = ir->matching_signature(NULL, &empty, false);
2509
2510       assert(sig);
2511
2512       foreach_in_list(ir_instruction, ir_, &sig->body) {
2513          this->base_ir = ir_;
2514          ir_->accept(this);
2515       }
2516    }
2517 }
2518
2519 void
2520 fs_visitor::visit(ir_function_signature *)
2521 {
2522    unreachable("not reached");
2523 }
2524
2525 void
2526 fs_visitor::visit(ir_emit_vertex *)
2527 {
2528    unreachable("not reached");
2529 }
2530
2531 void
2532 fs_visitor::visit(ir_end_primitive *)
2533 {
2534    unreachable("not reached");
2535 }
2536
2537 void
2538 fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
2539                                 fs_reg dst, fs_reg offset, fs_reg src0,
2540                                 fs_reg src1)
2541 {
2542    const unsigned operand_len = dispatch_width / 8;
2543    unsigned mlen = 0;
2544
2545    /* Initialize the sample mask in the message header. */
2546    emit(MOV(brw_uvec_mrf(8, mlen, 0), fs_reg(0u)))
2547       ->force_writemask_all = true;
2548
2549    if (fp->UsesKill) {
2550       emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1)))
2551          ->force_writemask_all = true;
2552    } else {
2553       emit(MOV(brw_uvec_mrf(1, mlen, 7),
2554                retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
2555          ->force_writemask_all = true;
2556    }
2557
2558    mlen++;
2559
2560    /* Set the atomic operation offset. */
2561    emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset));
2562    mlen += operand_len;
2563
2564    /* Set the atomic operation arguments. */
2565    if (src0.file != BAD_FILE) {
2566       emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0));
2567       mlen += operand_len;
2568    }
2569
2570    if (src1.file != BAD_FILE) {
2571       emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1));
2572       mlen += operand_len;
2573    }
2574
2575    /* Emit the instruction. */
2576    fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
2577                                         atomic_op, surf_index);
2578    inst->base_mrf = 0;
2579    inst->mlen = mlen;
2580    inst->header_present = true;
2581    emit(inst);
2582 }
2583
2584 void
2585 fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
2586                                       fs_reg offset)
2587 {
2588    const unsigned operand_len = dispatch_width / 8;
2589    unsigned mlen = 0;
2590
2591    /* Initialize the sample mask in the message header. */
2592    emit(MOV(brw_uvec_mrf(8, mlen, 0), fs_reg(0u)))
2593       ->force_writemask_all = true;
2594
2595    if (fp->UsesKill) {
2596       emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1)))
2597          ->force_writemask_all = true;
2598    } else {
2599       emit(MOV(brw_uvec_mrf(1, mlen, 7),
2600                retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
2601          ->force_writemask_all = true;
2602    }
2603
2604    mlen++;
2605
2606    /* Set the surface read offset. */
2607    emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset));
2608    mlen += operand_len;
2609
2610    /* Emit the instruction. */
2611    fs_inst *inst = new(mem_ctx)
2612       fs_inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index);
2613    inst->base_mrf = 0;
2614    inst->mlen = mlen;
2615    inst->header_present = true;
2616    emit(inst);
2617 }
2618
2619 fs_inst *
2620 fs_visitor::emit(fs_inst *inst)
2621 {
2622    if (force_uncompressed_stack > 0)
2623       inst->force_uncompressed = true;
2624
2625    inst->annotation = this->current_annotation;
2626    inst->ir = this->base_ir;
2627
2628    this->instructions.push_tail(inst);
2629
2630    return inst;
2631 }
2632
2633 void
2634 fs_visitor::emit(exec_list list)
2635 {
2636    foreach_in_list_safe(fs_inst, inst, &list) {
2637       inst->remove();
2638       emit(inst);
2639    }
2640 }
2641
2642 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
2643 void
2644 fs_visitor::emit_dummy_fs()
2645 {
2646    int reg_width = dispatch_width / 8;
2647
2648    /* Everyone's favorite color. */
2649    emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
2650    emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)));
2651    emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)));
2652    emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)));
2653
2654    fs_inst *write;
2655    write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
2656    write->base_mrf = 2;
2657    write->mlen = 4 * reg_width;
2658    write->eot = true;
2659 }
2660
2661 /* The register location here is relative to the start of the URB
2662  * data.  It will get adjusted to be a real location before
2663  * generate_code() time.
2664  */
2665 struct brw_reg
2666 fs_visitor::interp_reg(int location, int channel)
2667 {
2668    int regnr = prog_data->urb_setup[location] * 2 + channel / 2;
2669    int stride = (channel & 1) * 4;
2670
2671    assert(prog_data->urb_setup[location] != -1);
2672
2673    return brw_vec1_grf(regnr, stride);
2674 }
2675
2676 /** Emits the interpolation for the varying inputs. */
2677 void
2678 fs_visitor::emit_interpolation_setup_gen4()
2679 {
2680    this->current_annotation = "compute pixel centers";
2681    this->pixel_x = fs_reg(this, glsl_type::uint_type);
2682    this->pixel_y = fs_reg(this, glsl_type::uint_type);
2683    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
2684    this->pixel_y.type = BRW_REGISTER_TYPE_UW;
2685
2686    emit(FS_OPCODE_PIXEL_X, this->pixel_x);
2687    emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
2688
2689    this->current_annotation = "compute pixel deltas from v0";
2690    if (brw->has_pln) {
2691       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
2692          fs_reg(this, glsl_type::vec2_type);
2693       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
2694          this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
2695       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
2696    } else {
2697       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
2698          fs_reg(this, glsl_type::float_type);
2699       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
2700          fs_reg(this, glsl_type::float_type);
2701    }
2702    emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
2703             this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
2704    emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
2705             this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
2706
2707    this->current_annotation = "compute pos.w and 1/pos.w";
2708    /* Compute wpos.w.  It's always in our setup, since it's needed to
2709     * interpolate the other attributes.
2710     */
2711    this->wpos_w = fs_reg(this, glsl_type::float_type);
2712    emit(FS_OPCODE_LINTERP, wpos_w,
2713         this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
2714         this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
2715         interp_reg(VARYING_SLOT_POS, 3));
2716    /* Compute the pixel 1/W value from wpos.w. */
2717    this->pixel_w = fs_reg(this, glsl_type::float_type);
2718    emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
2719    this->current_annotation = NULL;
2720 }
2721
2722 /** Emits the interpolation for the varying inputs. */
2723 void
2724 fs_visitor::emit_interpolation_setup_gen6()
2725 {
2726    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
2727
2728    /* If the pixel centers end up used, the setup is the same as for gen4. */
2729    this->current_annotation = "compute pixel centers";
2730    fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
2731    fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
2732    int_pixel_x.type = BRW_REGISTER_TYPE_UW;
2733    int_pixel_y.type = BRW_REGISTER_TYPE_UW;
2734    emit(ADD(int_pixel_x,
2735             fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
2736             fs_reg(brw_imm_v(0x10101010))));
2737    emit(ADD(int_pixel_y,
2738             fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
2739             fs_reg(brw_imm_v(0x11001100))));
2740
2741    /* As of gen6, we can no longer mix float and int sources.  We have
2742     * to turn the integer pixel centers into floats for their actual
2743     * use.
2744     */
2745    this->pixel_x = fs_reg(this, glsl_type::float_type);
2746    this->pixel_y = fs_reg(this, glsl_type::float_type);
2747    emit(MOV(this->pixel_x, int_pixel_x));
2748    emit(MOV(this->pixel_y, int_pixel_y));
2749
2750    this->current_annotation = "compute pos.w";
2751    this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0));
2752    this->wpos_w = fs_reg(this, glsl_type::float_type);
2753    emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
2754
2755    for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
2756       uint8_t reg = payload.barycentric_coord_reg[i];
2757       this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
2758       this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
2759    }
2760
2761    this->current_annotation = NULL;
2762 }
2763
2764 void
2765 fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
2766 {
2767    int reg_width = dispatch_width / 8;
2768    fs_inst *inst;
2769    fs_reg color = outputs[target];
2770    fs_reg mrf;
2771
2772    /* If there's no color data to be written, skip it. */
2773    if (color.file == BAD_FILE)
2774       return;
2775
2776    color.reg_offset += index;
2777
2778    if (dispatch_width == 8 || brw->gen >= 6) {
2779       /* SIMD8 write looks like:
2780        * m + 0: r0
2781        * m + 1: r1
2782        * m + 2: g0
2783        * m + 3: g1
2784        *
2785        * gen6 SIMD16 DP write looks like:
2786        * m + 0: r0
2787        * m + 1: r1
2788        * m + 2: g0
2789        * m + 3: g1
2790        * m + 4: b0
2791        * m + 5: b1
2792        * m + 6: a0
2793        * m + 7: a1
2794        */
2795       inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width,
2796                              color.type),
2797                       color));
2798       inst->saturate = key->clamp_fragment_color;
2799    } else {
2800       /* pre-gen6 SIMD16 single source DP write looks like:
2801        * m + 0: r0
2802        * m + 1: g0
2803        * m + 2: b0
2804        * m + 3: a0
2805        * m + 4: r1
2806        * m + 5: g1
2807        * m + 6: b1
2808        * m + 7: a1
2809        */
2810       if (brw->has_compr4) {
2811          /* By setting the high bit of the MRF register number, we
2812           * indicate that we want COMPR4 mode - instead of doing the
2813           * usual destination + 1 for the second half we get
2814           * destination + 4.
2815           */
2816          inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
2817                                 color.type),
2818                          color));
2819          inst->saturate = key->clamp_fragment_color;
2820       } else {
2821          push_force_uncompressed();
2822          inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type),
2823                          color));
2824          inst->saturate = key->clamp_fragment_color;
2825          pop_force_uncompressed();
2826
2827          inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
2828                          half(color, 1)));
2829          inst->force_sechalf = true;
2830          inst->saturate = key->clamp_fragment_color;
2831       }
2832    }
2833 }
2834
2835 static enum brw_conditional_mod
2836 cond_for_alpha_func(GLenum func)
2837 {
2838    switch(func) {
2839       case GL_GREATER:
2840          return BRW_CONDITIONAL_G;
2841       case GL_GEQUAL:
2842          return BRW_CONDITIONAL_GE;
2843       case GL_LESS:
2844          return BRW_CONDITIONAL_L;
2845       case GL_LEQUAL:
2846          return BRW_CONDITIONAL_LE;
2847       case GL_EQUAL:
2848          return BRW_CONDITIONAL_EQ;
2849       case GL_NOTEQUAL:
2850          return BRW_CONDITIONAL_NEQ;
2851       default:
2852          unreachable("Not reached");
2853    }
2854 }
2855
2856 /**
2857  * Alpha test support for when we compile it into the shader instead
2858  * of using the normal fixed-function alpha test.
2859  */
2860 void
2861 fs_visitor::emit_alpha_test()
2862 {
2863    this->current_annotation = "Alpha test";
2864
2865    fs_inst *cmp;
2866    if (key->alpha_test_func == GL_ALWAYS)
2867       return;
2868
2869    if (key->alpha_test_func == GL_NEVER) {
2870       /* f0.1 = 0 */
2871       fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
2872                                       BRW_REGISTER_TYPE_UW));
2873       cmp = emit(CMP(reg_null_f, some_reg, some_reg,
2874                      BRW_CONDITIONAL_NEQ));
2875    } else {
2876       /* RT0 alpha */
2877       fs_reg color = outputs[0];
2878       color.reg_offset += 3;
2879
2880       /* f0.1 &= func(color, ref) */
2881       cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref),
2882                      cond_for_alpha_func(key->alpha_test_func)));
2883    }
2884    cmp->predicate = BRW_PREDICATE_NORMAL;
2885    cmp->flag_subreg = 1;
2886 }
2887
2888 void
2889 fs_visitor::emit_fb_writes()
2890 {
2891    this->current_annotation = "FB write header";
2892    bool header_present = true;
2893    /* We can potentially have a message length of up to 15, so we have to set
2894     * base_mrf to either 0 or 1 in order to fit in m0..m15.
2895     */
2896    int base_mrf = 1;
2897    int nr = base_mrf;
2898    int reg_width = dispatch_width / 8;
2899    bool src0_alpha_to_render_target = false;
2900
2901    if (do_dual_src) {
2902       no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");
2903       if (dispatch_width == 16)
2904          do_dual_src = false;
2905    }
2906
2907    /* From the Sandy Bridge PRM, volume 4, page 198:
2908     *
2909     *     "Dispatched Pixel Enables. One bit per pixel indicating
2910     *      which pixels were originally enabled when the thread was
2911     *      dispatched. This field is only required for the end-of-
2912     *      thread message and on all dual-source messages."
2913     */
2914    if (brw->gen >= 6 &&
2915        (brw->is_haswell || brw->gen >= 8 || !this->fp->UsesKill) &&
2916        !do_dual_src &&
2917        key->nr_color_regions == 1) {
2918       header_present = false;
2919    }
2920
2921    if (header_present) {
2922       src0_alpha_to_render_target = brw->gen >= 6 &&
2923                                     !do_dual_src &&
2924                                     key->replicate_alpha;
2925       /* m2, m3 header */
2926       nr += 2;
2927    }
2928
2929    if (payload.aa_dest_stencil_reg) {
2930       push_force_uncompressed();
2931       emit(MOV(fs_reg(MRF, nr++),
2932                fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
2933       pop_force_uncompressed();
2934    }
2935
2936    prog_data->uses_omask =
2937       fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
2938    if (prog_data->uses_omask) {
2939       this->current_annotation = "FB write oMask";
2940       assert(this->sample_mask.file != BAD_FILE);
2941       /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
2942       emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask);
2943       nr += 1;
2944    }
2945
2946    /* Reserve space for color. It'll be filled in per MRT below. */
2947    int color_mrf = nr;
2948    nr += 4 * reg_width;
2949    if (do_dual_src)
2950       nr += 4;
2951    if (src0_alpha_to_render_target)
2952       nr += reg_width;
2953
2954    if (source_depth_to_render_target) {
2955       if (brw->gen == 6) {
2956          /* For outputting oDepth on gen6, SIMD8 writes have to be
2957           * used.  This would require SIMD8 moves of each half to
2958           * message regs, kind of like pre-gen5 SIMD16 FB writes.
2959           * Just bail on doing so for now.
2960           */
2961          no16("Missing support for simd16 depth writes on gen6\n");
2962       }
2963
2964       if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
2965          /* Hand over gl_FragDepth. */
2966          assert(this->frag_depth.file != BAD_FILE);
2967          emit(MOV(fs_reg(MRF, nr), this->frag_depth));
2968       } else {
2969          /* Pass through the payload depth. */
2970          emit(MOV(fs_reg(MRF, nr),
2971                   fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
2972       }
2973       nr += reg_width;
2974    }
2975
2976    if (payload.dest_depth_reg) {
2977       emit(MOV(fs_reg(MRF, nr),
2978                fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
2979       nr += reg_width;
2980    }
2981
2982    if (do_dual_src) {
2983       fs_reg src0 = this->outputs[0];
2984       fs_reg src1 = this->dual_src_output;
2985
2986       this->current_annotation = ralloc_asprintf(this->mem_ctx,
2987                                                  "FB write src0");
2988       for (int i = 0; i < 4; i++) {
2989          fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
2990          src0.reg_offset++;
2991          inst->saturate = key->clamp_fragment_color;
2992       }
2993
2994       this->current_annotation = ralloc_asprintf(this->mem_ctx,
2995                                                  "FB write src1");
2996       for (int i = 0; i < 4; i++) {
2997          fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
2998                                   src1));
2999          src1.reg_offset++;
3000          inst->saturate = key->clamp_fragment_color;
3001       }
3002
3003       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
3004          emit_shader_time_end();
3005
3006       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
3007       inst->target = 0;
3008       inst->base_mrf = base_mrf;
3009       inst->mlen = nr - base_mrf;
3010       inst->eot = true;
3011       inst->header_present = header_present;
3012       if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
3013          inst->predicate = BRW_PREDICATE_NORMAL;
3014          inst->flag_subreg = 1;
3015       }
3016
3017       prog_data->dual_src_blend = true;
3018       this->current_annotation = NULL;
3019       return;
3020    }
3021
3022    for (int target = 0; target < key->nr_color_regions; target++) {
3023       this->current_annotation = ralloc_asprintf(this->mem_ctx,
3024                                                  "FB write target %d",
3025                                                  target);
3026       /* If src0_alpha_to_render_target is true, include source zero alpha
3027        * data in RenderTargetWrite message for targets > 0.
3028        */
3029       int write_color_mrf = color_mrf;
3030       if (src0_alpha_to_render_target && target != 0) {
3031          fs_inst *inst;
3032          fs_reg color = outputs[0];
3033          color.reg_offset += 3;
3034
3035          inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
3036                          color));
3037          inst->saturate = key->clamp_fragment_color;
3038          write_color_mrf = color_mrf + reg_width;
3039       }
3040
3041       for (unsigned i = 0; i < this->output_components[target]; i++)
3042          emit_color_write(target, i, write_color_mrf);
3043
3044       bool eot = false;
3045       if (target == key->nr_color_regions - 1) {
3046          eot = true;
3047
3048          if (INTEL_DEBUG & DEBUG_SHADER_TIME)
3049             emit_shader_time_end();
3050       }
3051
3052       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
3053       inst->target = target;
3054       inst->base_mrf = base_mrf;
3055       if (src0_alpha_to_render_target && target == 0)
3056          inst->mlen = nr - base_mrf - reg_width;
3057       else
3058          inst->mlen = nr - base_mrf;
3059       inst->eot = eot;
3060       inst->header_present = header_present;
3061       if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
3062          inst->predicate = BRW_PREDICATE_NORMAL;
3063          inst->flag_subreg = 1;
3064       }
3065    }
3066
3067    if (key->nr_color_regions == 0) {
3068       /* Even if there's no color buffers enabled, we still need to send
3069        * alpha out the pipeline to our null renderbuffer to support
3070        * alpha-testing, alpha-to-coverage, and so on.
3071        */
3072       emit_color_write(0, 3, color_mrf);
3073
3074       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
3075          emit_shader_time_end();
3076
3077       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
3078       inst->base_mrf = base_mrf;
3079       inst->mlen = nr - base_mrf;
3080       inst->eot = true;
3081       inst->header_present = header_present;
3082       if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
3083          inst->predicate = BRW_PREDICATE_NORMAL;
3084          inst->flag_subreg = 1;
3085       }
3086    }
3087
3088    this->current_annotation = NULL;
3089 }
3090
3091 void
3092 fs_visitor::resolve_ud_negate(fs_reg *reg)
3093 {
3094    if (reg->type != BRW_REGISTER_TYPE_UD ||
3095        !reg->negate)
3096       return;
3097
3098    fs_reg temp = fs_reg(this, glsl_type::uint_type);
3099    emit(MOV(temp, *reg));
3100    *reg = temp;
3101 }
3102
3103 void
3104 fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
3105 {
3106    if (rvalue->type != glsl_type::bool_type)
3107       return;
3108
3109    fs_reg temp = fs_reg(this, glsl_type::bool_type);
3110    emit(AND(temp, *reg, fs_reg(1)));
3111    *reg = temp;
3112 }
3113
3114 fs_visitor::fs_visitor(struct brw_context *brw,
3115                        void *mem_ctx,
3116                        const struct brw_wm_prog_key *key,
3117                        struct brw_wm_prog_data *prog_data,
3118                        struct gl_shader_program *shader_prog,
3119                        struct gl_fragment_program *fp,
3120                        unsigned dispatch_width)
3121    : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base,
3122                      MESA_SHADER_FRAGMENT),
3123      key(key), prog_data(prog_data),
3124      dispatch_width(dispatch_width)
3125 {
3126    this->fp = fp;
3127    this->mem_ctx = mem_ctx;
3128    this->failed = false;
3129    this->simd16_unsupported = false;
3130    this->no16_msg = NULL;
3131    this->variable_ht = hash_table_ctor(0,
3132                                        hash_table_pointer_hash,
3133                                        hash_table_pointer_compare);
3134
3135    memset(&this->payload, 0, sizeof(this->payload));
3136    memset(this->outputs, 0, sizeof(this->outputs));
3137    memset(this->output_components, 0, sizeof(this->output_components));
3138    this->source_depth_to_render_target = false;
3139    this->runtime_check_aads_emit = false;
3140    this->first_non_payload_grf = 0;
3141    this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3142
3143    this->current_annotation = NULL;
3144    this->base_ir = NULL;
3145
3146    this->virtual_grf_sizes = NULL;
3147    this->virtual_grf_count = 0;
3148    this->virtual_grf_array_size = 0;
3149    this->virtual_grf_start = NULL;
3150    this->virtual_grf_end = NULL;
3151    this->live_intervals = NULL;
3152    this->regs_live_at_ip = NULL;
3153
3154    this->uniforms = 0;
3155    this->last_scratch = 0;
3156    this->pull_constant_loc = NULL;
3157    this->push_constant_loc = NULL;
3158
3159    this->force_uncompressed_stack = 0;
3160
3161    this->spilled_any_registers = false;
3162    this->do_dual_src = false;
3163
3164    if (dispatch_width == 8)
3165       this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
3166 }
3167
3168 fs_visitor::~fs_visitor()
3169 {
3170    hash_table_dtor(this->variable_ht);
3171 }