src/mesa/drivers/dri/i965/brw_fs_nir.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "glsl/ir.h"
  25 #include "glsl/ir_optimization.h"
  26 #include "glsl/nir/glsl_to_nir.h"
  27 #include "brw_fs.h"
  28 #include "brw_nir.h"
  29
  30 static void
  31 nir_optimize(nir_shader *nir)
  32 {
  33    bool progress;
  34    do {
  35       progress = false;
  36       nir_lower_vars_to_ssa(nir);
  37       nir_validate_shader(nir);
  38       nir_lower_alu_to_scalar(nir);
  39       nir_validate_shader(nir);
  40       progress |= nir_copy_prop(nir);
  41       nir_validate_shader(nir);
  42       nir_lower_phis_to_scalar(nir);
  43       nir_validate_shader(nir);
  44       progress |= nir_copy_prop(nir);
  45       nir_validate_shader(nir);
  46       progress |= nir_opt_dce(nir);
  47       nir_validate_shader(nir);
  48       progress |= nir_opt_cse(nir);
  49       nir_validate_shader(nir);
  50       progress |= nir_opt_peephole_select(nir);
  51       nir_validate_shader(nir);
  52       progress |= nir_opt_algebraic(nir);
  53       nir_validate_shader(nir);
  54       progress |= nir_opt_constant_folding(nir);
  55       nir_validate_shader(nir);
  56       progress |= nir_opt_remove_phis(nir);
  57       nir_validate_shader(nir);
  58    } while (progress);
  59 }
  60
  61 static bool
  62 count_nir_instrs_in_block(nir_block *block, void *state)
  63 {
  64    int *count = (int *) state;
  65    nir_foreach_instr(block, instr) {
  66       *count = *count + 1;
  67    }
  68    return true;
  69 }
  70
  71 static int
  72 count_nir_instrs(nir_shader *nir)
  73 {
  74    int count = 0;
  75    nir_foreach_overload(nir, overload) {
  76       if (!overload->impl)
  77          continue;
  78       nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
  79    }
  80    return count;
  81 }
  82
  83 void
  84 fs_visitor::emit_nir_code()
  85 {
  86    const nir_shader_compiler_options *options =
  87       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
  88
  89    /* first, lower the GLSL IR shader to NIR */
  90    lower_output_reads(shader->base.ir);
  91    nir_shader *nir = glsl_to_nir(&shader->base, options);
  92    nir_validate_shader(nir);
  93
  94    nir_lower_global_vars_to_local(nir);
  95    nir_validate_shader(nir);
  96
  97    nir_split_var_copies(nir);
  98    nir_validate_shader(nir);
  99
 100    nir_optimize(nir);
 101
 102    /* Lower a bunch of stuff */
 103    nir_lower_var_copies(nir);
 104    nir_validate_shader(nir);
 105
 106    /* Get rid of split copies */
 107    nir_optimize(nir);
 108
 109    nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
 110                                                 &num_direct_uniforms,
 111                                                 &nir->num_uniforms);
 112    nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
 113    nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
 114
 115    nir_lower_io(nir);
 116    nir_validate_shader(nir);
 117
 118    nir_remove_dead_variables(nir);
 119    nir_validate_shader(nir);
 120
 121    nir_lower_samplers(nir, shader_prog, shader->base.Program);
 122    nir_validate_shader(nir);
 123
 124    nir_lower_system_values(nir);
 125    nir_validate_shader(nir);
 126
 127    nir_lower_atomics(nir);
 128    nir_validate_shader(nir);
 129
 130    nir_optimize(nir);
 131
 132    nir_lower_locals_to_regs(nir);
 133    nir_validate_shader(nir);
 134
 135    nir_lower_to_source_mods(nir);
 136    nir_validate_shader(nir);
 137    nir_copy_prop(nir);
 138    nir_validate_shader(nir);
 139
 140    if (unlikely(debug_enabled)) {
 141       fprintf(stderr, "NIR (SSA form) for %s shader:\n", stage_name);
 142       nir_print_shader(nir, stderr);
 143    }
 144
 145    if (dispatch_width == 8) {
 146       static GLuint msg_id = 0;
 147       _mesa_gl_debug(&brw->ctx, &msg_id,
 148                      MESA_DEBUG_SOURCE_SHADER_COMPILER,
 149                      MESA_DEBUG_TYPE_OTHER,
 150                      MESA_DEBUG_SEVERITY_NOTIFICATION,
 151                      "%s NIR shader: %d inst\n",
 152                      stage_abbrev,
 153                      count_nir_instrs(nir));
 154    }
 155
 156    nir_convert_from_ssa(nir);
 157    nir_validate_shader(nir);
 158
 159    /* This is the last pass we run before we start emitting stuff.  It
 160     * determines when we need to insert boolean resolves on Gen <= 5.  We
 161     * run it last because it stashes data in instr->pass_flags and we don't
 162     * want that to be squashed by other NIR passes.
 163     */
 164    if (brw->gen <= 5)
 165       brw_nir_analyze_boolean_resolves(nir);
 166
 167    /* emit the arrays used for inputs and outputs - load/store intrinsics will
 168     * be converted to reads/writes of these arrays
 169     */
 170
 171    if (nir->num_inputs > 0) {
 172       nir_inputs = vgrf(nir->num_inputs);
 173       nir_setup_inputs(nir);
 174    }
 175
 176    if (nir->num_outputs > 0) {
 177       nir_outputs = vgrf(nir->num_outputs);
 178       nir_setup_outputs(nir);
 179    }
 180
 181    if (nir->num_uniforms > 0) {
 182       nir_setup_uniforms(nir);
 183    }
 184
 185    nir_emit_system_values(nir);
 186
 187    nir_globals = ralloc_array(mem_ctx, fs_reg, nir->reg_alloc);
 188    foreach_list_typed(nir_register, reg, node, &nir->registers) {
 189       unsigned array_elems =
 190          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 191       unsigned size = array_elems * reg->num_components;
 192       nir_globals[reg->index] = vgrf(size);
 193    }
 194
 195    /* get the main function and emit it */
 196    nir_foreach_overload(nir, overload) {
 197       assert(strcmp(overload->function->name, "main") == 0);
 198       assert(overload->impl);
 199       nir_emit_impl(overload->impl);
 200    }
 201
 202    if (unlikely(debug_enabled)) {
 203       fprintf(stderr, "NIR (final form) for %s shader:\n", stage_name);
 204       nir_print_shader(nir, stderr);
 205    }
 206
 207    ralloc_free(nir);
 208 }
 209
 210 void
 211 fs_visitor::nir_setup_inputs(nir_shader *shader)
 212 {
 213    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
 214       enum brw_reg_type type = brw_type_for_base_type(var->type);
 215       fs_reg input = offset(nir_inputs, var->data.driver_location);
 216
 217       fs_reg reg;
 218       switch (stage) {
 219       case MESA_SHADER_VERTEX: {
 220          /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
 221           * stored in nir_variable::location.
 222           *
 223           * However, NIR's load_input intrinsics use a different index - an
 224           * offset into a single contiguous array containing all inputs.
 225           * This index corresponds to the nir_variable::driver_location field.
 226           *
 227           * So, we need to copy from fs_reg(ATTR, var->location) to
 228           * offset(nir_inputs, var->data.driver_location).
 229           */
 230          unsigned components = var->type->without_array()->components();
 231          unsigned array_length = var->type->is_array() ? var->type->length : 1;
 232          for (unsigned i = 0; i < array_length; i++) {
 233             for (unsigned j = 0; j < components; j++) {
 234                emit(MOV(retype(offset(input, components * i + j), type),
 235                         offset(fs_reg(ATTR, var->data.location + i, type), j)));
 236             }
 237          }
 238          break;
 239       }
 240       case MESA_SHADER_GEOMETRY:
 241       case MESA_SHADER_COMPUTE:
 242          unreachable("fs_visitor not used for these stages yet.");
 243          break;
 244       case MESA_SHADER_FRAGMENT:
 245          if (var->data.location == VARYING_SLOT_POS) {
 246             reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
 247                                                 var->data.origin_upper_left);
 248             emit_percomp(MOV(input, reg), 0xF);
 249          } else {
 250             emit_general_interpolation(input, var->name, var->type,
 251                                        (glsl_interp_qualifier) var->data.interpolation,
 252                                        var->data.location, var->data.centroid,
 253                                        var->data.sample);
 254          }
 255          break;
 256       }
 257    }
 258 }
 259
 260 void
 261 fs_visitor::nir_setup_outputs(nir_shader *shader)
 262 {
 263    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 264
 265    foreach_list_typed(nir_variable, var, node, &shader->outputs) {
 266       fs_reg reg = offset(nir_outputs, var->data.driver_location);
 267
 268       int vector_elements =
 269          var->type->is_array() ? var->type->fields.array->vector_elements
 270                                : var->type->vector_elements;
 271
 272       if (stage == MESA_SHADER_VERTEX) {
 273          for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) {
 274             int output = var->data.location + i;
 275             this->outputs[output] = offset(reg, 4 * i);
 276             this->output_components[output] = vector_elements;
 277          }
 278       } else if (var->data.index > 0) {
 279          assert(var->data.location == FRAG_RESULT_DATA0);
 280          assert(var->data.index == 1);
 281          this->dual_src_output = reg;
 282          this->do_dual_src = true;
 283       } else if (var->data.location == FRAG_RESULT_COLOR) {
 284          /* Writing gl_FragColor outputs to all color regions. */
 285          for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
 286             this->outputs[i] = reg;
 287             this->output_components[i] = 4;
 288          }
 289       } else if (var->data.location == FRAG_RESULT_DEPTH) {
 290          this->frag_depth = reg;
 291       } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
 292          this->sample_mask = reg;
 293       } else {
 294          /* gl_FragData or a user-defined FS output */
 295          assert(var->data.location >= FRAG_RESULT_DATA0 &&
 296                 var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
 297
 298          /* General color output. */
 299          for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
 300             int output = var->data.location - FRAG_RESULT_DATA0 + i;
 301             this->outputs[output] = offset(reg, vector_elements * i);
 302             this->output_components[output] = vector_elements;
 303          }
 304       }
 305    }
 306 }
 307
 308 void
 309 fs_visitor::nir_setup_uniforms(nir_shader *shader)
 310 {
 311    uniforms = shader->num_uniforms;
 312
 313    /* We split the uniform register file in half.  The first half is
 314     * entirely direct uniforms.  The second half is indirect.
 315     */
 316    param_size[0] = num_direct_uniforms;
 317    if (shader->num_uniforms > num_direct_uniforms)
 318       param_size[num_direct_uniforms] = shader->num_uniforms - num_direct_uniforms;
 319
 320    if (dispatch_width != 8)
 321       return;
 322
 323    foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
 324       /* UBO's and atomics don't take up space in the uniform file */
 325
 326       if (var->interface_type != NULL || var->type->contains_atomic())
 327          continue;
 328
 329       if (strncmp(var->name, "gl_", 3) == 0)
 330          nir_setup_builtin_uniform(var);
 331       else
 332          nir_setup_uniform(var);
 333    }
 334 }
 335
 336 void
 337 fs_visitor::nir_setup_uniform(nir_variable *var)
 338 {
 339    int namelen = strlen(var->name);
 340
 341    /* The data for our (non-builtin) uniforms is stored in a series of
 342       * gl_uniform_driver_storage structs for each subcomponent that
 343       * glGetUniformLocation() could name.  We know it's been set up in the
 344       * same order we'd walk the type, so walk the list of storage and find
 345       * anything with our name, or the prefix of a component that starts with
 346       * our name.
 347       */
 348    unsigned index = var->data.driver_location;
 349    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
 350       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 351
 352       if (strncmp(var->name, storage->name, namelen) != 0 ||
 353          (storage->name[namelen] != 0 &&
 354          storage->name[namelen] != '.' &&
 355          storage->name[namelen] != '[')) {
 356          continue;
 357       }
 358
 359       unsigned slots = storage->type->component_slots();
 360       if (storage->array_elements)
 361          slots *= storage->array_elements;
 362
 363       for (unsigned i = 0; i < slots; i++) {
 364          stage_prog_data->param[index++] = &storage->storage[i];
 365       }
 366    }
 367
 368    /* Make sure we actually initialized the right amount of stuff here. */
 369    assert(var->data.driver_location + var->type->component_slots() == index);
 370 }
 371
 372 void
 373 fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
 374 {
 375    const nir_state_slot *const slots = var->state_slots;
 376    assert(var->state_slots != NULL);
 377
 378    unsigned uniform_index = var->data.driver_location;
 379    for (unsigned int i = 0; i < var->num_state_slots; i++) {
 380       /* This state reference has already been setup by ir_to_mesa, but we'll
 381        * get the same index back here.
 382        */
 383       int index = _mesa_add_state_reference(this->prog->Parameters,
 384                                             (gl_state_index *)slots[i].tokens);
 385
 386       /* Add each of the unique swizzles of the element as a parameter.
 387        * This'll end up matching the expected layout of the
 388        * array/matrix/structure we're trying to fill in.
 389        */
 390       int last_swiz = -1;
 391       for (unsigned int j = 0; j < 4; j++) {
 392          int swiz = GET_SWZ(slots[i].swizzle, j);
 393          if (swiz == last_swiz)
 394             break;
 395          last_swiz = swiz;
 396
 397          stage_prog_data->param[uniform_index++] =
 398             &prog->Parameters->ParameterValues[index][swiz];
 399       }
 400    }
 401 }
 402
 403 static bool
 404 emit_system_values_block(nir_block *block, void *void_visitor)
 405 {
 406    fs_visitor *v = (fs_visitor *)void_visitor;
 407    fs_reg *reg;
 408
 409    nir_foreach_instr(block, instr) {
 410       if (instr->type != nir_instr_type_intrinsic)
 411          continue;
 412
 413       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 414       switch (intrin->intrinsic) {
 415       case nir_intrinsic_load_vertex_id:
 416          unreachable("should be lowered by lower_vertex_id().");
 417
 418       case nir_intrinsic_load_vertex_id_zero_base:
 419          assert(v->stage == MESA_SHADER_VERTEX);
 420          reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
 421          if (reg->file == BAD_FILE)
 422             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
 423          break;
 424
 425       case nir_intrinsic_load_base_vertex:
 426          assert(v->stage == MESA_SHADER_VERTEX);
 427          reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
 428          if (reg->file == BAD_FILE)
 429             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
 430          break;
 431
 432       case nir_intrinsic_load_instance_id:
 433          assert(v->stage == MESA_SHADER_VERTEX);
 434          reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
 435          if (reg->file == BAD_FILE)
 436             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
 437          break;
 438
 439       case nir_intrinsic_load_sample_pos:
 440          assert(v->stage == MESA_SHADER_FRAGMENT);
 441          reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
 442          if (reg->file == BAD_FILE)
 443             *reg = *v->emit_samplepos_setup();
 444          break;
 445
 446       case nir_intrinsic_load_sample_id:
 447          assert(v->stage == MESA_SHADER_FRAGMENT);
 448          reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
 449          if (reg->file == BAD_FILE)
 450             *reg = *v->emit_sampleid_setup();
 451          break;
 452
 453       case nir_intrinsic_load_sample_mask_in:
 454          assert(v->stage == MESA_SHADER_FRAGMENT);
 455          assert(v->brw->gen >= 7);
 456          reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
 457          if (reg->file == BAD_FILE)
 458             *reg = fs_reg(retype(brw_vec8_grf(v->payload.sample_mask_in_reg, 0),
 459                                  BRW_REGISTER_TYPE_D));
 460          break;
 461
 462       default:
 463          break;
 464       }
 465    }
 466
 467    return true;
 468 }
 469
 470 void
 471 fs_visitor::nir_emit_system_values(nir_shader *shader)
 472 {
 473    nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
 474    nir_foreach_overload(shader, overload) {
 475       assert(strcmp(overload->function->name, "main") == 0);
 476       assert(overload->impl);
 477       nir_foreach_block(overload->impl, emit_system_values_block, this);
 478    }
 479 }
 480
 481 void
 482 fs_visitor::nir_emit_impl(nir_function_impl *impl)
 483 {
 484    nir_locals = reralloc(mem_ctx, nir_locals, fs_reg, impl->reg_alloc);
 485    foreach_list_typed(nir_register, reg, node, &impl->registers) {
 486       unsigned array_elems =
 487          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 488       unsigned size = array_elems * reg->num_components;
 489       nir_locals[reg->index] = vgrf(size);
 490    }
 491
 492    nir_emit_cf_list(&impl->body);
 493 }
 494
 495 void
 496 fs_visitor::nir_emit_cf_list(exec_list *list)
 497 {
 498    exec_list_validate(list);
 499    foreach_list_typed(nir_cf_node, node, node, list) {
 500       switch (node->type) {
 501       case nir_cf_node_if:
 502          nir_emit_if(nir_cf_node_as_if(node));
 503          break;
 504
 505       case nir_cf_node_loop:
 506          nir_emit_loop(nir_cf_node_as_loop(node));
 507          break;
 508
 509       case nir_cf_node_block:
 510          nir_emit_block(nir_cf_node_as_block(node));
 511          break;
 512
 513       default:
 514          unreachable("Invalid CFG node block");
 515       }
 516    }
 517 }
 518
 519 void
 520 fs_visitor::nir_emit_if(nir_if *if_stmt)
 521 {
 522    /* first, put the condition into f0 */
 523    fs_inst *inst = emit(MOV(reg_null_d,
 524                             retype(get_nir_src(if_stmt->condition),
 525                                    BRW_REGISTER_TYPE_D)));
 526    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 527
 528    emit(IF(BRW_PREDICATE_NORMAL));
 529
 530    nir_emit_cf_list(&if_stmt->then_list);
 531
 532    /* note: if the else is empty, dead CF elimination will remove it */
 533    emit(BRW_OPCODE_ELSE);
 534
 535    nir_emit_cf_list(&if_stmt->else_list);
 536
 537    emit(BRW_OPCODE_ENDIF);
 538
 539    if (!try_replace_with_sel() && brw->gen < 6) {
 540       no16("Can't support (non-uniform) control flow on SIMD16\n");
 541    }
 542 }
 543
 544 void
 545 fs_visitor::nir_emit_loop(nir_loop *loop)
 546 {
 547    if (brw->gen < 6) {
 548       no16("Can't support (non-uniform) control flow on SIMD16\n");
 549    }
 550
 551    emit(BRW_OPCODE_DO);
 552
 553    nir_emit_cf_list(&loop->body);
 554
 555    emit(BRW_OPCODE_WHILE);
 556 }
 557
 558 void
 559 fs_visitor::nir_emit_block(nir_block *block)
 560 {
 561    nir_foreach_instr(block, instr) {
 562       nir_emit_instr(instr);
 563    }
 564 }
 565
 566 void
 567 fs_visitor::nir_emit_instr(nir_instr *instr)
 568 {
 569    switch (instr->type) {
 570    case nir_instr_type_alu:
 571       nir_emit_alu(nir_instr_as_alu(instr));
 572       break;
 573
 574    case nir_instr_type_intrinsic:
 575       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
 576       break;
 577
 578    case nir_instr_type_tex:
 579       nir_emit_texture(nir_instr_as_tex(instr));
 580       break;
 581
 582    case nir_instr_type_load_const:
 583       /* We can hit these, but we do nothing now and use them as
 584        * immediates later.
 585        */
 586       break;
 587
 588    case nir_instr_type_jump:
 589       nir_emit_jump(nir_instr_as_jump(instr));
 590       break;
 591
 592    default:
 593       unreachable("unknown instruction type");
 594    }
 595 }
 596
 597 static brw_reg_type
 598 brw_type_for_nir_type(nir_alu_type type)
 599 {
 600    switch (type) {
 601    case nir_type_unsigned:
 602       return BRW_REGISTER_TYPE_UD;
 603    case nir_type_bool:
 604    case nir_type_int:
 605       return BRW_REGISTER_TYPE_D;
 606    case nir_type_float:
 607       return BRW_REGISTER_TYPE_F;
 608    default:
 609       unreachable("unknown type");
 610    }
 611
 612    return BRW_REGISTER_TYPE_F;
 613 }
 614
 615 bool
 616 fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
 617                                          const fs_reg &result)
 618 {
 619    if (instr->src[0].src.is_ssa ||
 620        !instr->src[0].src.reg.reg ||
 621        !instr->src[0].src.reg.reg->parent_instr)
 622       return false;
 623
 624    if (instr->src[0].src.reg.reg->parent_instr->type !=
 625        nir_instr_type_intrinsic)
 626       return false;
 627
 628    nir_intrinsic_instr *src0 =
 629       nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr);
 630
 631    if (src0->intrinsic != nir_intrinsic_load_front_face)
 632       return false;
 633
 634    nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
 635    if (!value1 || fabsf(value1->f[0]) != 1.0f)
 636       return false;
 637
 638    nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
 639    if (!value2 || fabsf(value2->f[0]) != 1.0f)
 640       return false;
 641
 642    fs_reg tmp = vgrf(glsl_type::int_type);
 643
 644    if (brw->gen >= 6) {
 645       /* Bit 15 of g0.0 is 0 if the polygon is front facing. */
 646       fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
 647
 648       /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
 649        *
 650        *    or(8)  tmp.1<2>W  g0.0<0,1,0>W  0x00003f80W
 651        *    and(8) dst<1>D    tmp<8,8,1>D   0xbf800000D
 652        *
 653        * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0).
 654        *
 655        * This negation looks like it's safe in practice, because bits 0:4 will
 656        * surely be TRIANGLES
 657        */
 658
 659       if (value1->f[0] == -1.0f) {
 660          g0.negate = true;
 661       }
 662
 663       tmp.type = BRW_REGISTER_TYPE_W;
 664       tmp.subreg_offset = 2;
 665       tmp.stride = 2;
 666
 667       fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80)));
 668       or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
 669
 670       tmp.type = BRW_REGISTER_TYPE_D;
 671       tmp.subreg_offset = 0;
 672       tmp.stride = 1;
 673    } else {
 674       /* Bit 31 of g1.6 is 0 if the polygon is front facing. */
 675       fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
 676
 677       /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
 678        *
 679        *    or(8)  tmp<1>D  g1.6<0,1,0>D  0x3f800000D
 680        *    and(8) dst<1>D  tmp<8,8,1>D   0xbf800000D
 681        *
 682        * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0).
 683        *
 684        * This negation looks like it's safe in practice, because bits 0:4 will
 685        * surely be TRIANGLES
 686        */
 687
 688       if (value1->f[0] == -1.0f) {
 689          g1_6.negate = true;
 690       }
 691
 692       emit(OR(tmp, g1_6, fs_reg(0x3f800000)));
 693    }
 694    emit(AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)));
 695
 696    return true;
 697 }
 698
 699 void
 700 fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 701 {
 702    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
 703    fs_inst *inst;
 704
 705    fs_reg result = get_nir_dest(instr->dest.dest);
 706    result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
 707
 708    fs_reg op[4];
 709    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
 710       op[i] = get_nir_src(instr->src[i].src);
 711       op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
 712       op[i].abs = instr->src[i].abs;
 713       op[i].negate = instr->src[i].negate;
 714    }
 715
 716    /* We get a bunch of mov's out of the from_ssa pass and they may still
 717     * be vectorized.  We'll handle them as a special-case.  We'll also
 718     * handle vecN here because it's basically the same thing.
 719     */
 720    switch (instr->op) {
 721    case nir_op_imov:
 722    case nir_op_fmov:
 723    case nir_op_vec2:
 724    case nir_op_vec3:
 725    case nir_op_vec4: {
 726       fs_reg temp = result;
 727       bool need_extra_copy = false;
 728       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
 729          if (!instr->src[i].src.is_ssa &&
 730              instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
 731             need_extra_copy = true;
 732             temp = retype(vgrf(4), result.type);
 733             break;
 734          }
 735       }
 736
 737       for (unsigned i = 0; i < 4; i++) {
 738          if (!(instr->dest.write_mask & (1 << i)))
 739             continue;
 740
 741          if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
 742             inst = emit(MOV(offset(temp, i),
 743                         offset(op[0], instr->src[0].swizzle[i])));
 744          } else {
 745             inst = emit(MOV(offset(temp, i),
 746                         offset(op[i], instr->src[i].swizzle[0])));
 747          }
 748          inst->saturate = instr->dest.saturate;
 749       }
 750
 751       /* In this case the source and destination registers were the same,
 752        * so we need to insert an extra set of moves in order to deal with
 753        * any swizzling.
 754        */
 755       if (need_extra_copy) {
 756          for (unsigned i = 0; i < 4; i++) {
 757             if (!(instr->dest.write_mask & (1 << i)))
 758                continue;
 759
 760             emit(MOV(offset(result, i), offset(temp, i)));
 761          }
 762       }
 763       return;
 764    }
 765    default:
 766       break;
 767    }
 768
 769    /* At this point, we have dealt with any instruction that operates on
 770     * more than a single channel.  Therefore, we can just adjust the source
 771     * and destination registers for that channel and emit the instruction.
 772     */
 773    unsigned channel = 0;
 774    if (nir_op_infos[instr->op].output_size == 0) {
 775       /* Since NIR is doing the scalarizing for us, we should only ever see
 776        * vectorized operations with a single channel.
 777        */
 778       assert(_mesa_bitcount(instr->dest.write_mask) == 1);
 779       channel = ffs(instr->dest.write_mask) - 1;
 780
 781       result = offset(result, channel);
 782    }
 783
 784    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
 785       assert(nir_op_infos[instr->op].input_sizes[i] < 2);
 786       op[i] = offset(op[i], instr->src[i].swizzle[channel]);
 787    }
 788
 789    switch (instr->op) {
 790    case nir_op_i2f:
 791    case nir_op_u2f:
 792       inst = emit(MOV(result, op[0]));
 793       inst->saturate = instr->dest.saturate;
 794       break;
 795
 796    case nir_op_f2i:
 797    case nir_op_f2u:
 798       emit(MOV(result, op[0]));
 799       break;
 800
 801    case nir_op_fsign: {
 802       /* AND(val, 0x80000000) gives the sign bit.
 803          *
 804          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
 805          * zero.
 806          */
 807       emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 808
 809       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
 810       op[0].type = BRW_REGISTER_TYPE_UD;
 811       result.type = BRW_REGISTER_TYPE_UD;
 812       emit(AND(result_int, op[0], fs_reg(0x80000000u)));
 813
 814       inst = emit(OR(result_int, result_int, fs_reg(0x3f800000u)));
 815       inst->predicate = BRW_PREDICATE_NORMAL;
 816       if (instr->dest.saturate) {
 817          inst = emit(MOV(result, result));
 818          inst->saturate = true;
 819       }
 820       break;
 821    }
 822
 823    case nir_op_isign:
 824       /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
 825        *               -> non-negative val generates 0x00000000.
 826        *  Predicated OR sets 1 if val is positive.
 827        */
 828       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
 829       emit(ASR(result, op[0], fs_reg(31)));
 830       inst = emit(OR(result, result, fs_reg(1)));
 831       inst->predicate = BRW_PREDICATE_NORMAL;
 832       break;
 833
 834    case nir_op_frcp:
 835       inst = emit_math(SHADER_OPCODE_RCP, result, op[0]);
 836       inst->saturate = instr->dest.saturate;
 837       break;
 838
 839    case nir_op_fexp2:
 840       inst = emit_math(SHADER_OPCODE_EXP2, result, op[0]);
 841       inst->saturate = instr->dest.saturate;
 842       break;
 843
 844    case nir_op_flog2:
 845       inst = emit_math(SHADER_OPCODE_LOG2, result, op[0]);
 846       inst->saturate = instr->dest.saturate;
 847       break;
 848
 849    case nir_op_fexp:
 850    case nir_op_flog:
 851       unreachable("not reached: should be handled by ir_explog_to_explog2");
 852
 853    case nir_op_fsin:
 854    case nir_op_fsin_reduced:
 855       inst = emit_math(SHADER_OPCODE_SIN, result, op[0]);
 856       inst->saturate = instr->dest.saturate;
 857       break;
 858
 859    case nir_op_fcos:
 860    case nir_op_fcos_reduced:
 861       inst = emit_math(SHADER_OPCODE_COS, result, op[0]);
 862       inst->saturate = instr->dest.saturate;
 863       break;
 864
 865    case nir_op_fddx:
 866       if (fs_key->high_quality_derivatives) {
 867          inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
 868       } else {
 869          inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
 870       }
 871       inst->saturate = instr->dest.saturate;
 872       break;
 873    case nir_op_fddx_fine:
 874       inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
 875       inst->saturate = instr->dest.saturate;
 876       break;
 877    case nir_op_fddx_coarse:
 878       inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
 879       inst->saturate = instr->dest.saturate;
 880       break;
 881    case nir_op_fddy:
 882       if (fs_key->high_quality_derivatives) {
 883          inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
 884                      fs_reg(fs_key->render_to_fbo));
 885       } else {
 886          inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
 887                      fs_reg(fs_key->render_to_fbo));
 888       }
 889       inst->saturate = instr->dest.saturate;
 890       break;
 891    case nir_op_fddy_fine:
 892       inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
 893                   fs_reg(fs_key->render_to_fbo));
 894       inst->saturate = instr->dest.saturate;
 895       break;
 896    case nir_op_fddy_coarse:
 897       inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
 898                   fs_reg(fs_key->render_to_fbo));
 899       inst->saturate = instr->dest.saturate;
 900       break;
 901
 902    case nir_op_fadd:
 903    case nir_op_iadd:
 904       inst = emit(ADD(result, op[0], op[1]));
 905       inst->saturate = instr->dest.saturate;
 906       break;
 907
 908    case nir_op_fmul:
 909       inst = emit(MUL(result, op[0], op[1]));
 910       inst->saturate = instr->dest.saturate;
 911       break;
 912
 913    case nir_op_imul: {
 914       if (brw->gen >= 8) {
 915          emit(MUL(result, op[0], op[1]));
 916          break;
 917       } else {
 918          nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
 919          nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
 920
 921          if (value0 && value0->u[0] < (1 << 16)) {
 922             if (brw->gen < 7) {
 923                emit(MUL(result, op[0], op[1]));
 924             } else {
 925                emit(MUL(result, op[1], op[0]));
 926             }
 927             break;
 928          } else if (value1 && value1->u[0] < (1 << 16)) {
 929             if (brw->gen < 7) {
 930                emit(MUL(result, op[1], op[0]));
 931             } else {
 932                emit(MUL(result, op[0], op[1]));
 933             }
 934             break;
 935          }
 936       }
 937
 938       if (brw->gen >= 7)
 939          no16("SIMD16 explicit accumulator operands unsupported\n");
 940
 941       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 942
 943       emit(MUL(acc, op[0], op[1]));
 944       emit(MACH(reg_null_d, op[0], op[1]));
 945       emit(MOV(result, fs_reg(acc)));
 946       break;
 947    }
 948
 949    case nir_op_imul_high:
 950    case nir_op_umul_high: {
 951       if (brw->gen >= 7)
 952          no16("SIMD16 explicit accumulator operands unsupported\n");
 953
 954       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 955
 956       emit(MUL(acc, op[0], op[1]));
 957       emit(MACH(result, op[0], op[1]));
 958       break;
 959    }
 960
 961    case nir_op_idiv:
 962    case nir_op_udiv:
 963       emit_math(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
 964       break;
 965
 966    case nir_op_uadd_carry: {
 967       if (brw->gen >= 7)
 968          no16("SIMD16 explicit accumulator operands unsupported\n");
 969
 970       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 971                                   BRW_REGISTER_TYPE_UD);
 972
 973       emit(ADDC(reg_null_ud, op[0], op[1]));
 974       emit(MOV(result, fs_reg(acc)));
 975       break;
 976    }
 977
 978    case nir_op_usub_borrow: {
 979       if (brw->gen >= 7)
 980          no16("SIMD16 explicit accumulator operands unsupported\n");
 981
 982       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 983                                   BRW_REGISTER_TYPE_UD);
 984
 985       emit(SUBB(reg_null_ud, op[0], op[1]));
 986       emit(MOV(result, fs_reg(acc)));
 987       break;
 988    }
 989
 990    case nir_op_umod:
 991       emit_math(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
 992       break;
 993
 994    case nir_op_flt:
 995    case nir_op_ilt:
 996    case nir_op_ult:
 997       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_L));
 998       break;
 999
1000    case nir_op_fge:
1001    case nir_op_ige:
1002    case nir_op_uge:
1003       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE));
1004       break;
1005
1006    case nir_op_feq:
1007    case nir_op_ieq:
1008       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z));
1009       break;
1010
1011    case nir_op_fne:
1012    case nir_op_ine:
1013       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ));
1014       break;
1015
1016    case nir_op_inot:
1017       if (brw->gen >= 8) {
1018          resolve_source_modifiers(&op[0]);
1019       }
1020       emit(NOT(result, op[0]));
1021       break;
1022    case nir_op_ixor:
1023       if (brw->gen >= 8) {
1024          resolve_source_modifiers(&op[0]);
1025          resolve_source_modifiers(&op[1]);
1026       }
1027       emit(XOR(result, op[0], op[1]));
1028       break;
1029    case nir_op_ior:
1030       if (brw->gen >= 8) {
1031          resolve_source_modifiers(&op[0]);
1032          resolve_source_modifiers(&op[1]);
1033       }
1034       emit(OR(result, op[0], op[1]));
1035       break;
1036    case nir_op_iand:
1037       if (brw->gen >= 8) {
1038          resolve_source_modifiers(&op[0]);
1039          resolve_source_modifiers(&op[1]);
1040       }
1041       emit(AND(result, op[0], op[1]));
1042       break;
1043
1044    case nir_op_fdot2:
1045    case nir_op_fdot3:
1046    case nir_op_fdot4:
1047    case nir_op_bany2:
1048    case nir_op_bany3:
1049    case nir_op_bany4:
1050    case nir_op_ball2:
1051    case nir_op_ball3:
1052    case nir_op_ball4:
1053    case nir_op_ball_fequal2:
1054    case nir_op_ball_iequal2:
1055    case nir_op_ball_fequal3:
1056    case nir_op_ball_iequal3:
1057    case nir_op_ball_fequal4:
1058    case nir_op_ball_iequal4:
1059    case nir_op_bany_fnequal2:
1060    case nir_op_bany_inequal2:
1061    case nir_op_bany_fnequal3:
1062    case nir_op_bany_inequal3:
1063    case nir_op_bany_fnequal4:
1064    case nir_op_bany_inequal4:
1065       unreachable("Lowered by nir_lower_alu_reductions");
1066
1067    case nir_op_fnoise1_1:
1068    case nir_op_fnoise1_2:
1069    case nir_op_fnoise1_3:
1070    case nir_op_fnoise1_4:
1071    case nir_op_fnoise2_1:
1072    case nir_op_fnoise2_2:
1073    case nir_op_fnoise2_3:
1074    case nir_op_fnoise2_4:
1075    case nir_op_fnoise3_1:
1076    case nir_op_fnoise3_2:
1077    case nir_op_fnoise3_3:
1078    case nir_op_fnoise3_4:
1079    case nir_op_fnoise4_1:
1080    case nir_op_fnoise4_2:
1081    case nir_op_fnoise4_3:
1082    case nir_op_fnoise4_4:
1083       unreachable("not reached: should be handled by lower_noise");
1084
1085    case nir_op_ldexp:
1086       unreachable("not reached: should be handled by ldexp_to_arith()");
1087
1088    case nir_op_fsqrt:
1089       inst = emit_math(SHADER_OPCODE_SQRT, result, op[0]);
1090       inst->saturate = instr->dest.saturate;
1091       break;
1092
1093    case nir_op_frsq:
1094       inst = emit_math(SHADER_OPCODE_RSQ, result, op[0]);
1095       inst->saturate = instr->dest.saturate;
1096       break;
1097
1098    case nir_op_b2i:
1099       emit(AND(result, op[0], fs_reg(1)));
1100       break;
1101    case nir_op_b2f:
1102       emit(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)));
1103       break;
1104
1105    case nir_op_f2b:
1106       emit(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
1107       break;
1108    case nir_op_i2b:
1109       emit(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1110       break;
1111
1112    case nir_op_ftrunc:
1113       inst = emit(RNDZ(result, op[0]));
1114       inst->saturate = instr->dest.saturate;
1115       break;
1116
1117    case nir_op_fceil: {
1118       op[0].negate = !op[0].negate;
1119       fs_reg temp = vgrf(glsl_type::float_type);
1120       emit(RNDD(temp, op[0]));
1121       temp.negate = true;
1122       inst = emit(MOV(result, temp));
1123       inst->saturate = instr->dest.saturate;
1124       break;
1125    }
1126    case nir_op_ffloor:
1127       inst = emit(RNDD(result, op[0]));
1128       inst->saturate = instr->dest.saturate;
1129       break;
1130    case nir_op_ffract:
1131       inst = emit(FRC(result, op[0]));
1132       inst->saturate = instr->dest.saturate;
1133       break;
1134    case nir_op_fround_even:
1135       inst = emit(RNDE(result, op[0]));
1136       inst->saturate = instr->dest.saturate;
1137       break;
1138
1139    case nir_op_fmin:
1140    case nir_op_imin:
1141    case nir_op_umin:
1142       if (brw->gen >= 6) {
1143          inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
1144          inst->conditional_mod = BRW_CONDITIONAL_L;
1145       } else {
1146          emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L));
1147          inst = emit(SEL(result, op[0], op[1]));
1148          inst->predicate = BRW_PREDICATE_NORMAL;
1149       }
1150       inst->saturate = instr->dest.saturate;
1151       break;
1152
1153    case nir_op_fmax:
1154    case nir_op_imax:
1155    case nir_op_umax:
1156       if (brw->gen >= 6) {
1157          inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
1158          inst->conditional_mod = BRW_CONDITIONAL_GE;
1159       } else {
1160          emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE));
1161          inst = emit(SEL(result, op[0], op[1]));
1162          inst->predicate = BRW_PREDICATE_NORMAL;
1163       }
1164       inst->saturate = instr->dest.saturate;
1165       break;
1166
1167    case nir_op_pack_snorm_2x16:
1168    case nir_op_pack_snorm_4x8:
1169    case nir_op_pack_unorm_2x16:
1170    case nir_op_pack_unorm_4x8:
1171    case nir_op_unpack_snorm_2x16:
1172    case nir_op_unpack_snorm_4x8:
1173    case nir_op_unpack_unorm_2x16:
1174    case nir_op_unpack_unorm_4x8:
1175    case nir_op_unpack_half_2x16:
1176    case nir_op_pack_half_2x16:
1177       unreachable("not reached: should be handled by lower_packing_builtins");
1178
1179    case nir_op_unpack_half_2x16_split_x:
1180       inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
1181       inst->saturate = instr->dest.saturate;
1182       break;
1183    case nir_op_unpack_half_2x16_split_y:
1184       inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
1185       inst->saturate = instr->dest.saturate;
1186       break;
1187
1188    case nir_op_fpow:
1189       inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]);
1190       inst->saturate = instr->dest.saturate;
1191       break;
1192
1193    case nir_op_bitfield_reverse:
1194       emit(BFREV(result, op[0]));
1195       break;
1196
1197    case nir_op_bit_count:
1198       emit(CBIT(result, op[0]));
1199       break;
1200
1201    case nir_op_ufind_msb:
1202    case nir_op_ifind_msb: {
1203       emit(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]));
1204
1205       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1206        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1207        * subtract the result from 31 to convert the MSB count into an LSB count.
1208        */
1209
1210       emit(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ));
1211       fs_reg neg_result(result);
1212       neg_result.negate = true;
1213       inst = emit(ADD(result, neg_result, fs_reg(31)));
1214       inst->predicate = BRW_PREDICATE_NORMAL;
1215       break;
1216    }
1217
1218    case nir_op_find_lsb:
1219       emit(FBL(result, op[0]));
1220       break;
1221
1222    case nir_op_ubitfield_extract:
1223    case nir_op_ibitfield_extract:
1224       emit(BFE(result, op[2], op[1], op[0]));
1225       break;
1226    case nir_op_bfm:
1227       emit(BFI1(result, op[0], op[1]));
1228       break;
1229    case nir_op_bfi:
1230       emit(BFI2(result, op[0], op[1], op[2]));
1231       break;
1232
1233    case nir_op_bitfield_insert:
1234       unreachable("not reached: should be handled by "
1235                   "lower_instructions::bitfield_insert_to_bfm_bfi");
1236
1237    case nir_op_ishl:
1238       emit(SHL(result, op[0], op[1]));
1239       break;
1240    case nir_op_ishr:
1241       emit(ASR(result, op[0], op[1]));
1242       break;
1243    case nir_op_ushr:
1244       emit(SHR(result, op[0], op[1]));
1245       break;
1246
1247    case nir_op_pack_half_2x16_split:
1248       emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
1249       break;
1250
1251    case nir_op_ffma:
1252       inst = emit(MAD(result, op[2], op[1], op[0]));
1253       inst->saturate = instr->dest.saturate;
1254       break;
1255
1256    case nir_op_flrp:
1257       inst = emit_lrp(result, op[0], op[1], op[2]);
1258       inst->saturate = instr->dest.saturate;
1259       break;
1260
1261    case nir_op_bcsel:
1262       if (optimize_frontfacing_ternary(instr, result))
1263          return;
1264
1265       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1266       inst = emit(SEL(result, op[1], op[2]));
1267       inst->predicate = BRW_PREDICATE_NORMAL;
1268       break;
1269
1270    default:
1271       unreachable("unhandled instruction");
1272    }
1273
1274    /* If we need to do a boolean resolve, replace the result with -(x & 1)
1275     * to sign extend the low bit to 0/~0
1276     */
1277    if (brw->gen <= 5 &&
1278        (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
1279       fs_reg masked = vgrf(glsl_type::int_type);
1280       emit(AND(masked, result, fs_reg(1)));
1281       masked.negate = true;
1282       emit(MOV(retype(result, BRW_REGISTER_TYPE_D), masked));
1283    }
1284 }
1285
1286 fs_reg
1287 fs_visitor::get_nir_src(nir_src src)
1288 {
1289    if (src.is_ssa) {
1290       assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
1291       nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1292       fs_reg reg = vgrf(src.ssa->num_components);
1293       reg.type = BRW_REGISTER_TYPE_D;
1294
1295       for (unsigned i = 0; i < src.ssa->num_components; ++i)
1296          emit(MOV(offset(reg, i), fs_reg(load->value.i[i])));
1297
1298       return reg;
1299    } else {
1300       fs_reg reg;
1301       if (src.reg.reg->is_global)
1302          reg = nir_globals[src.reg.reg->index];
1303       else
1304          reg = nir_locals[src.reg.reg->index];
1305
1306       /* to avoid floating-point denorm flushing problems, set the type by
1307        * default to D - instructions that need floating point semantics will set
1308        * this to F if they need to
1309        */
1310       reg = retype(offset(reg, src.reg.base_offset), BRW_REGISTER_TYPE_D);
1311       if (src.reg.indirect) {
1312          reg.reladdr = new(mem_ctx) fs_reg();
1313          *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
1314                                BRW_REGISTER_TYPE_D);
1315       }
1316
1317       return reg;
1318    }
1319 }
1320
1321 fs_reg
1322 fs_visitor::get_nir_dest(nir_dest dest)
1323 {
1324    fs_reg reg;
1325    if (dest.reg.reg->is_global)
1326       reg = nir_globals[dest.reg.reg->index];
1327    else
1328       reg = nir_locals[dest.reg.reg->index];
1329
1330    reg = offset(reg, dest.reg.base_offset);
1331    if (dest.reg.indirect) {
1332       reg.reladdr = new(mem_ctx) fs_reg();
1333       *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
1334                             BRW_REGISTER_TYPE_D);
1335    }
1336
1337    return reg;
1338 }
1339
1340 void
1341 fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
1342 {
1343    for (unsigned i = 0; i < 4; i++) {
1344       if (!((wr_mask >> i) & 1))
1345          continue;
1346
1347       fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
1348       new_inst->dst = offset(new_inst->dst, i);
1349       for (unsigned j = 0; j < new_inst->sources; j++)
1350          if (inst->src[j].file == GRF)
1351             new_inst->src[j] = offset(new_inst->src[j], i);
1352
1353       emit(new_inst);
1354    }
1355 }
1356
1357 void
1358 fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
1359 {
1360    fs_reg dest;
1361    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1362       dest = get_nir_dest(instr->dest);
1363
1364    bool has_indirect = false;
1365
1366    switch (instr->intrinsic) {
1367    case nir_intrinsic_discard:
1368    case nir_intrinsic_discard_if: {
1369       /* We track our discarded pixels in f0.1.  By predicating on it, we can
1370        * update just the flag bits that aren't yet discarded.  If there's no
1371        * condition, we emit a CMP of g0 != g0, so all currently executing
1372        * channels will get turned off.
1373        */
1374       fs_inst *cmp;
1375       if (instr->intrinsic == nir_intrinsic_discard_if) {
1376          cmp = emit(CMP(reg_null_f, get_nir_src(instr->src[0]),
1377                         fs_reg(0), BRW_CONDITIONAL_Z));
1378       } else {
1379          fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
1380                                        BRW_REGISTER_TYPE_UW));
1381          cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ));
1382       }
1383       cmp->predicate = BRW_PREDICATE_NORMAL;
1384       cmp->flag_subreg = 1;
1385
1386       if (brw->gen >= 6) {
1387          emit_discard_jump();
1388       }
1389       break;
1390    }
1391
1392    case nir_intrinsic_atomic_counter_inc:
1393    case nir_intrinsic_atomic_counter_dec:
1394    case nir_intrinsic_atomic_counter_read: {
1395       unsigned surf_index = prog_data->binding_table.abo_start +
1396                             (unsigned) instr->const_index[0];
1397       fs_reg offset = fs_reg(get_nir_src(instr->src[0]));
1398
1399       switch (instr->intrinsic) {
1400          case nir_intrinsic_atomic_counter_inc:
1401             emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
1402                                 fs_reg(), fs_reg());
1403             break;
1404          case nir_intrinsic_atomic_counter_dec:
1405             emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
1406                                 fs_reg(), fs_reg());
1407             break;
1408          case nir_intrinsic_atomic_counter_read:
1409             emit_untyped_surface_read(surf_index, dest, offset);
1410             break;
1411          default:
1412             unreachable("Unreachable");
1413       }
1414       break;
1415    }
1416
1417    case nir_intrinsic_load_front_face:
1418       emit(MOV(retype(dest, BRW_REGISTER_TYPE_D),
1419                *emit_frontfacing_interpolation()));
1420       break;
1421
1422    case nir_intrinsic_load_vertex_id:
1423       unreachable("should be lowered by lower_vertex_id()");
1424
1425    case nir_intrinsic_load_vertex_id_zero_base: {
1426       fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
1427       assert(vertex_id.file != BAD_FILE);
1428       dest.type = vertex_id.type;
1429       emit(MOV(dest, vertex_id));
1430       break;
1431    }
1432
1433    case nir_intrinsic_load_base_vertex: {
1434       fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
1435       assert(base_vertex.file != BAD_FILE);
1436       dest.type = base_vertex.type;
1437       emit(MOV(dest, base_vertex));
1438       break;
1439    }
1440
1441    case nir_intrinsic_load_instance_id: {
1442       fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
1443       assert(instance_id.file != BAD_FILE);
1444       dest.type = instance_id.type;
1445       emit(MOV(dest, instance_id));
1446       break;
1447    }
1448
1449    case nir_intrinsic_load_sample_mask_in: {
1450       fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
1451       assert(sample_mask_in.file != BAD_FILE);
1452       dest.type = sample_mask_in.type;
1453       emit(MOV(dest, sample_mask_in));
1454       break;
1455    }
1456
1457    case nir_intrinsic_load_sample_pos: {
1458       fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
1459       assert(sample_pos.file != BAD_FILE);
1460       dest.type = sample_pos.type;
1461       emit(MOV(dest, sample_pos));
1462       emit(MOV(offset(dest, 1), offset(sample_pos, 1)));
1463       break;
1464    }
1465
1466    case nir_intrinsic_load_sample_id: {
1467       fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
1468       assert(sample_id.file != BAD_FILE);
1469       dest.type = sample_id.type;
1470       emit(MOV(dest, sample_id));
1471       break;
1472    }
1473
1474    case nir_intrinsic_load_uniform_indirect:
1475       has_indirect = true;
1476    case nir_intrinsic_load_uniform: {
1477       unsigned index = instr->const_index[0];
1478
1479       fs_reg uniform_reg;
1480       if (index < num_direct_uniforms) {
1481          uniform_reg = fs_reg(UNIFORM, 0);
1482       } else {
1483          uniform_reg = fs_reg(UNIFORM, num_direct_uniforms);
1484          index -= num_direct_uniforms;
1485       }
1486
1487       for (int i = 0; i < instr->const_index[1]; i++) {
1488          for (unsigned j = 0; j < instr->num_components; j++) {
1489             fs_reg src = offset(retype(uniform_reg, dest.type), index);
1490             if (has_indirect)
1491                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1492             index++;
1493
1494             emit(MOV(dest, src));
1495             dest = offset(dest, 1);
1496          }
1497       }
1498       break;
1499    }
1500
1501    case nir_intrinsic_load_ubo_indirect:
1502       has_indirect = true;
1503       /* fallthrough */
1504    case nir_intrinsic_load_ubo: {
1505       nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
1506       fs_reg surf_index;
1507
1508       if (const_index) {
1509          surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
1510                              const_index->u[0]);
1511       } else {
1512          /* The block index is not a constant. Evaluate the index expression
1513           * per-channel and add the base UBO index; the generator will select
1514           * a value from any live channel.
1515           */
1516          surf_index = vgrf(glsl_type::uint_type);
1517          emit(ADD(surf_index, get_nir_src(instr->src[0]),
1518                   fs_reg(stage_prog_data->binding_table.ubo_start)))
1519             ->force_writemask_all = true;
1520
1521          /* Assume this may touch any UBO. It would be nice to provide
1522           * a tighter bound, but the array information is already lowered away.
1523           */
1524          brw_mark_surface_used(prog_data,
1525                                stage_prog_data->binding_table.ubo_start +
1526                                shader_prog->NumUniformBlocks - 1);
1527       }
1528
1529       if (has_indirect) {
1530          /* Turn the byte offset into a dword offset. */
1531          fs_reg base_offset = vgrf(glsl_type::int_type);
1532          emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
1533                                  BRW_REGISTER_TYPE_D),
1534                   fs_reg(2)));
1535
1536          unsigned vec4_offset = instr->const_index[0] / 4;
1537          for (int i = 0; i < instr->num_components; i++)
1538             emit(VARYING_PULL_CONSTANT_LOAD(offset(dest, i), surf_index,
1539                                             base_offset, vec4_offset + i));
1540       } else {
1541          fs_reg packed_consts = vgrf(glsl_type::float_type);
1542          packed_consts.type = dest.type;
1543
1544          fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
1545          emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
1546               surf_index, const_offset_reg);
1547
1548          for (unsigned i = 0; i < instr->num_components; i++) {
1549             packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
1550
1551             /* The std140 packing rules don't allow vectors to cross 16-byte
1552              * boundaries, and a reg is 32 bytes.
1553              */
1554             assert(packed_consts.subreg_offset < 32);
1555
1556             emit(MOV(dest, packed_consts));
1557             dest = offset(dest, 1);
1558          }
1559       }
1560       break;
1561    }
1562
1563    case nir_intrinsic_load_input_indirect:
1564       has_indirect = true;
1565       /* fallthrough */
1566    case nir_intrinsic_load_input: {
1567       unsigned index = 0;
1568       for (int i = 0; i < instr->const_index[1]; i++) {
1569          for (unsigned j = 0; j < instr->num_components; j++) {
1570             fs_reg src = offset(retype(nir_inputs, dest.type),
1571                                 instr->const_index[0] + index);
1572             if (has_indirect)
1573                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1574             index++;
1575
1576             emit(MOV(dest, src));
1577             dest = offset(dest, 1);
1578          }
1579       }
1580       break;
1581    }
1582
1583    /* Handle ARB_gpu_shader5 interpolation intrinsics
1584     *
1585     * It's worth a quick word of explanation as to why we handle the full
1586     * variable-based interpolation intrinsic rather than a lowered version
1587     * with like we do for other inputs.  We have to do that because the way
1588     * we set up inputs doesn't allow us to use the already setup inputs for
1589     * interpolation.  At the beginning of the shader, we go through all of
1590     * the input variables and do the initial interpolation and put it in
1591     * the nir_inputs array based on its location as determined in
1592     * nir_lower_io.  If the input isn't used, dead code cleans up and
1593     * everything works fine.  However, when we get to the ARB_gpu_shader5
1594     * interpolation intrinsics, we need to reinterpolate the input
1595     * differently.  If we used an intrinsic that just had an index it would
1596     * only give us the offset into the nir_inputs array.  However, this is
1597     * useless because that value is post-interpolation and we need
1598     * pre-interpolation.  In order to get the actual location of the bits
1599     * we get from the vertex fetching hardware, we need the variable.
1600     */
1601    case nir_intrinsic_interp_var_at_centroid:
1602    case nir_intrinsic_interp_var_at_sample:
1603    case nir_intrinsic_interp_var_at_offset: {
1604       /* in SIMD16 mode, the pixel interpolator returns coords interleaved
1605        * 8 channels at a time, same as the barycentric coords presented in
1606        * the FS payload. this requires a bit of extra work to support.
1607        */
1608       no16("interpolate_at_* not yet supported in SIMD16 mode.");
1609
1610       fs_reg dst_x = vgrf(2);
1611       fs_reg dst_y = offset(dst_x, 1);
1612
1613       /* For most messages, we need one reg of ignored data; the hardware
1614        * requires mlen==1 even when there is no payload. in the per-slot
1615        * offset case, we'll replace this with the proper source data.
1616        */
1617       fs_reg src = vgrf(glsl_type::float_type);
1618       int mlen = 1;     /* one reg unless overriden */
1619       fs_inst *inst;
1620
1621       switch (instr->intrinsic) {
1622       case nir_intrinsic_interp_var_at_centroid:
1623          inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
1624          break;
1625
1626       case nir_intrinsic_interp_var_at_sample: {
1627          /* XXX: We should probably handle non-constant sample id's */
1628          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
1629          assert(const_sample);
1630          unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
1631          inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src,
1632                      fs_reg(msg_data));
1633          break;
1634       }
1635
1636       case nir_intrinsic_interp_var_at_offset: {
1637          nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
1638
1639          if (const_offset) {
1640             unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
1641             unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
1642
1643             inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
1644                         fs_reg(off_x | (off_y << 4)));
1645          } else {
1646             src = vgrf(glsl_type::ivec2_type);
1647             fs_reg offset_src = retype(get_nir_src(instr->src[0]),
1648                                        BRW_REGISTER_TYPE_F);
1649             for (int i = 0; i < 2; i++) {
1650                fs_reg temp = vgrf(glsl_type::float_type);
1651                emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f)));
1652                fs_reg itemp = vgrf(glsl_type::int_type);
1653                emit(MOV(itemp, temp));  /* float to int */
1654
1655                /* Clamp the upper end of the range to +7/16.
1656                 * ARB_gpu_shader5 requires that we support a maximum offset
1657                 * of +0.5, which isn't representable in a S0.4 value -- if
1658                 * we didn't clamp it, we'd end up with -8/16, which is the
1659                 * opposite of what the shader author wanted.
1660                 *
1661                 * This is legal due to ARB_gpu_shader5's quantization
1662                 * rules:
1663                 *
1664                 * "Not all values of <offset> may be supported; x and y
1665                 * offsets may be rounded to fixed-point values with the
1666                 * number of fraction bits given by the
1667                 * implementation-dependent constant
1668                 * FRAGMENT_INTERPOLATION_OFFSET_BITS"
1669                 */
1670
1671                emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7))
1672                    ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
1673             }
1674
1675             mlen = 2;
1676             inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
1677                         fs_reg(0u));
1678          }
1679          break;
1680       }
1681
1682       default:
1683          unreachable("Invalid intrinsic");
1684       }
1685
1686       inst->mlen = mlen;
1687       inst->regs_written = 2; /* 2 floats per slot returned */
1688       inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
1689                                INTERP_QUALIFIER_NOPERSPECTIVE;
1690
1691       for (unsigned j = 0; j < instr->num_components; j++) {
1692          fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
1693          src.type = dest.type;
1694
1695          emit(FS_OPCODE_LINTERP, dest, dst_x, dst_y, src);
1696          dest = offset(dest, 1);
1697       }
1698       break;
1699    }
1700
1701    case nir_intrinsic_store_output_indirect:
1702       has_indirect = true;
1703    case nir_intrinsic_store_output: {
1704       fs_reg src = get_nir_src(instr->src[0]);
1705       unsigned index = 0;
1706       for (int i = 0; i < instr->const_index[1]; i++) {
1707          for (unsigned j = 0; j < instr->num_components; j++) {
1708             fs_reg new_dest = offset(retype(nir_outputs, src.type),
1709                                      instr->const_index[0] + index);
1710             if (has_indirect)
1711                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
1712             index++;
1713             emit(MOV(new_dest, src));
1714             src = offset(src, 1);
1715          }
1716       }
1717       break;
1718    }
1719
1720    default:
1721       unreachable("unknown intrinsic");
1722    }
1723 }
1724
1725 void
1726 fs_visitor::nir_emit_texture(nir_tex_instr *instr)
1727 {
1728    unsigned sampler = instr->sampler_index;
1729    fs_reg sampler_reg(sampler);
1730
1731    /* FINISHME: We're failing to recompile our programs when the sampler is
1732     * updated.  This only matters for the texture rectangle scale parameters
1733     * (pre-gen6, or gen6+ with GL_CLAMP).
1734     */
1735    int texunit = prog->SamplerUnits[sampler];
1736
1737    int gather_component = instr->component;
1738
1739    bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
1740
1741    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
1742                         instr->is_array;
1743
1744    int lod_components = 0, offset_components = 0;
1745
1746    fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, offset;
1747
1748    for (unsigned i = 0; i < instr->num_srcs; i++) {
1749       fs_reg src = get_nir_src(instr->src[i].src);
1750       switch (instr->src[i].src_type) {
1751       case nir_tex_src_bias:
1752          lod = retype(src, BRW_REGISTER_TYPE_F);
1753          break;
1754       case nir_tex_src_comparitor:
1755          shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
1756          break;
1757       case nir_tex_src_coord:
1758          switch (instr->op) {
1759          case nir_texop_txf:
1760          case nir_texop_txf_ms:
1761             coordinate = retype(src, BRW_REGISTER_TYPE_D);
1762             break;
1763          default:
1764             coordinate = retype(src, BRW_REGISTER_TYPE_F);
1765             break;
1766          }
1767          break;
1768       case nir_tex_src_ddx:
1769          lod = retype(src, BRW_REGISTER_TYPE_F);
1770          lod_components = nir_tex_instr_src_size(instr, i);
1771          break;
1772       case nir_tex_src_ddy:
1773          lod2 = retype(src, BRW_REGISTER_TYPE_F);
1774          break;
1775       case nir_tex_src_lod:
1776          switch (instr->op) {
1777          case nir_texop_txs:
1778             lod = retype(src, BRW_REGISTER_TYPE_UD);
1779             break;
1780          case nir_texop_txf:
1781             lod = retype(src, BRW_REGISTER_TYPE_D);
1782             break;
1783          default:
1784             lod = retype(src, BRW_REGISTER_TYPE_F);
1785             break;
1786          }
1787          break;
1788       case nir_tex_src_ms_index:
1789          sample_index = retype(src, BRW_REGISTER_TYPE_UD);
1790          break;
1791       case nir_tex_src_offset:
1792          offset = retype(src, BRW_REGISTER_TYPE_D);
1793          if (instr->is_array)
1794             offset_components = instr->coord_components - 1;
1795          else
1796             offset_components = instr->coord_components;
1797          break;
1798       case nir_tex_src_projector:
1799          unreachable("should be lowered");
1800
1801       case nir_tex_src_sampler_offset: {
1802          /* Figure out the highest possible sampler index and mark it as used */
1803          uint32_t max_used = sampler + instr->sampler_array_size - 1;
1804          if (instr->op == nir_texop_tg4 && brw->gen < 8) {
1805             max_used += stage_prog_data->binding_table.gather_texture_start;
1806          } else {
1807             max_used += stage_prog_data->binding_table.texture_start;
1808          }
1809          brw_mark_surface_used(prog_data, max_used);
1810
1811          /* Emit code to evaluate the actual indexing expression */
1812          sampler_reg = vgrf(glsl_type::uint_type);
1813          emit(ADD(sampler_reg, src, fs_reg(sampler)))
1814              ->force_writemask_all = true;
1815          break;
1816       }
1817
1818       default:
1819          unreachable("unknown texture source");
1820       }
1821    }
1822
1823    if (instr->op == nir_texop_txf_ms) {
1824       if (brw->gen >= 7 &&
1825           key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
1826          mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg);
1827       } else {
1828          mcs = fs_reg(0u);
1829       }
1830    }
1831
1832    for (unsigned i = 0; i < 3; i++) {
1833       if (instr->const_offset[i] != 0) {
1834          assert(offset_components == 0);
1835          offset = fs_reg(brw_texture_offset(ctx, instr->const_offset, 3));
1836          break;
1837       }
1838    }
1839
1840    enum glsl_base_type dest_base_type;
1841    switch (instr->dest_type) {
1842    case nir_type_float:
1843       dest_base_type = GLSL_TYPE_FLOAT;
1844       break;
1845    case nir_type_int:
1846       dest_base_type = GLSL_TYPE_INT;
1847       break;
1848    case nir_type_unsigned:
1849       dest_base_type = GLSL_TYPE_UINT;
1850       break;
1851    default:
1852       unreachable("bad type");
1853    }
1854
1855    const glsl_type *dest_type =
1856       glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
1857                               1);
1858
1859    ir_texture_opcode op;
1860    switch (instr->op) {
1861    case nir_texop_lod: op = ir_lod; break;
1862    case nir_texop_query_levels: op = ir_query_levels; break;
1863    case nir_texop_tex: op = ir_tex; break;
1864    case nir_texop_tg4: op = ir_tg4; break;
1865    case nir_texop_txb: op = ir_txb; break;
1866    case nir_texop_txd: op = ir_txd; break;
1867    case nir_texop_txf: op = ir_txf; break;
1868    case nir_texop_txf_ms: op = ir_txf_ms; break;
1869    case nir_texop_txl: op = ir_txl; break;
1870    case nir_texop_txs: op = ir_txs; break;
1871    default:
1872       unreachable("unknown texture opcode");
1873    }
1874
1875    emit_texture(op, dest_type, coordinate, instr->coord_components,
1876                 shadow_comparitor, lod, lod2, lod_components, sample_index,
1877                 offset, mcs, gather_component,
1878                 is_cube_array, is_rect, sampler, sampler_reg, texunit);
1879
1880    fs_reg dest = get_nir_dest(instr->dest);
1881    dest.type = this->result.type;
1882    unsigned num_components = nir_tex_instr_dest_size(instr);
1883    emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
1884 }
1885
1886 void
1887 fs_visitor::nir_emit_jump(nir_jump_instr *instr)
1888 {
1889    switch (instr->type) {
1890    case nir_jump_break:
1891       emit(BRW_OPCODE_BREAK);
1892       break;
1893    case nir_jump_continue:
1894       emit(BRW_OPCODE_CONTINUE);
1895       break;
1896    case nir_jump_return:
1897    default:
1898       unreachable("unknown jump");
1899    }
1900 }