src/mesa/drivers/dri/i965/brw_fs_nir.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "glsl/nir/glsl_to_nir.h"
  25 #include "brw_fs.h"
  26
  27 void
  28 fs_visitor::emit_nir_code()
  29 {
  30    /* first, lower the GLSL IR shader to NIR */
  31    nir_shader *nir = glsl_to_nir(shader->base.ir, NULL, true);
  32    nir_validate_shader(nir);
  33
  34    /* lower some of the GLSL-isms into NIR-isms - after this point, we no
  35     * longer have to deal with variables inside the shader
  36     */
  37
  38    nir_lower_variables_scalar(nir, true, true, true, true);
  39    nir_validate_shader(nir);
  40
  41    nir_lower_samplers(nir, shader_prog, shader->base.Program);
  42    nir_validate_shader(nir);
  43
  44    nir_lower_system_values(nir);
  45    nir_validate_shader(nir);
  46
  47    nir_lower_atomics(nir);
  48    nir_validate_shader(nir);
  49
  50    nir_remove_dead_variables(nir);
  51    nir_opt_global_to_local(nir);
  52    nir_validate_shader(nir);
  53
  54    if (1)
  55       nir_print_shader(nir, stderr);
  56
  57    if (1) {
  58       nir_convert_to_ssa(nir);
  59       nir_validate_shader(nir);
  60       nir_copy_prop(nir);
  61       nir_validate_shader(nir);
  62       nir_opt_dce(nir);
  63       nir_validate_shader(nir);
  64       nir_opt_peephole_select(nir);
  65       nir_validate_shader(nir);
  66       nir_print_shader(nir, stderr);
  67
  68       nir_convert_from_ssa(nir);
  69       nir_print_shader(nir, stderr);
  70       nir_validate_shader(nir);
  71       nir_lower_vec_to_movs(nir);
  72       nir_validate_shader(nir);
  73    }
  74
  75    /* emit the arrays used for inputs and outputs - load/store intrinsics will
  76     * be converted to reads/writes of these arrays
  77     */
  78
  79    if (nir->num_inputs > 0) {
  80       nir_inputs = fs_reg(GRF, virtual_grf_alloc(nir->num_inputs));
  81       nir_setup_inputs(nir);
  82    }
  83
  84    if (nir->num_outputs > 0) {
  85       nir_outputs = fs_reg(GRF, virtual_grf_alloc(nir->num_outputs));
  86       nir_setup_outputs(nir);
  87    }
  88
  89    if (nir->num_uniforms > 0) {
  90       nir_uniforms = fs_reg(UNIFORM, 0);
  91       nir_setup_uniforms(nir);
  92    }
  93
  94    nir_setup_registers(&nir->registers);
  95
  96    /* get the main function and emit it */
  97    nir_foreach_overload(nir, overload) {
  98       assert(strcmp(overload->function->name, "main") == 0);
  99       assert(overload->impl);
 100       nir_emit_impl(overload->impl);
 101    }
 102
 103    ralloc_free(nir);
 104 }
 105
 106 void
 107 fs_visitor::nir_setup_inputs(nir_shader *shader)
 108 {
 109    fs_reg varying = nir_inputs;
 110
 111    struct hash_entry *entry;
 112    hash_table_foreach(shader->inputs, entry) {
 113       nir_variable *var = (nir_variable *) entry->data;
 114       varying.reg_offset = var->data.driver_location;
 115
 116       fs_reg reg;
 117       if (!strcmp(var->name, "gl_FragCoord")) {
 118          reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
 119                                              var->data.origin_upper_left);
 120          emit_percomp(MOV(varying, reg), 0xF);
 121       } else if (!strcmp(var->name, "gl_FrontFacing")) {
 122          reg = *emit_frontfacing_interpolation();
 123          emit(MOV(retype(varying, BRW_REGISTER_TYPE_UD), reg));
 124       } else {
 125          emit_general_interpolation(varying, var->name, var->type,
 126                                     (glsl_interp_qualifier) var->data.interpolation,
 127                                     var->data.location, var->data.centroid,
 128                                     var->data.sample);
 129       }
 130    }
 131 }
 132
 133 void
 134 fs_visitor::nir_setup_outputs(nir_shader *shader)
 135 {
 136    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 137    fs_reg reg = nir_outputs;
 138
 139    struct hash_entry *entry;
 140    hash_table_foreach(shader->outputs, entry) {
 141       nir_variable *var = (nir_variable *) entry->data;
 142       reg.reg_offset = var->data.driver_location;
 143
 144       if (var->data.index > 0) {
 145          assert(var->data.location == FRAG_RESULT_DATA0);
 146          assert(var->data.index == 1);
 147          this->dual_src_output = reg;
 148          this->do_dual_src = true;
 149       } else if (var->data.location == FRAG_RESULT_COLOR) {
 150          /* Writing gl_FragColor outputs to all color regions. */
 151          for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
 152             this->outputs[i] = reg;
 153             this->output_components[i] = 4;
 154          }
 155       } else if (var->data.location == FRAG_RESULT_DEPTH) {
 156          this->frag_depth = reg;
 157       } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
 158          this->sample_mask = reg;
 159       } else {
 160          /* gl_FragData or a user-defined FS output */
 161          assert(var->data.location >= FRAG_RESULT_DATA0 &&
 162                 var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
 163
 164          int vector_elements =
 165             var->type->is_array() ? var->type->fields.array->vector_elements
 166                                   : var->type->vector_elements;
 167
 168          /* General color output. */
 169          for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
 170             int output = var->data.location - FRAG_RESULT_DATA0 + i;
 171             this->outputs[output] = reg;
 172             this->outputs[output].reg_offset += vector_elements * i;
 173             this->output_components[output] = vector_elements;
 174          }
 175       }
 176    }
 177 }
 178
 179 void
 180 fs_visitor::nir_setup_uniforms(nir_shader *shader)
 181 {
 182    uniforms = shader->num_uniforms;
 183    param_size[0] = shader->num_uniforms;
 184
 185    if (dispatch_width != 8)
 186       return;
 187
 188    struct hash_entry *entry;
 189    hash_table_foreach(shader->uniforms, entry) {
 190       nir_variable *var = (nir_variable *) entry->data;
 191
 192       /* UBO's and atomics don't take up space in the uniform file */
 193
 194       if (var->interface_type != NULL || var->type->contains_atomic())
 195          continue;
 196
 197       if (strncmp(var->name, "gl_", 3) == 0)
 198          nir_setup_builtin_uniform(var);
 199       else
 200          nir_setup_uniform(var);
 201    }
 202 }
 203
 204 void
 205 fs_visitor::nir_setup_uniform(nir_variable *var)
 206 {
 207    int namelen = strlen(var->name);
 208
 209    /* The data for our (non-builtin) uniforms is stored in a series of
 210       * gl_uniform_driver_storage structs for each subcomponent that
 211       * glGetUniformLocation() could name.  We know it's been set up in the
 212       * same order we'd walk the type, so walk the list of storage and find
 213       * anything with our name, or the prefix of a component that starts with
 214       * our name.
 215       */
 216    unsigned index = var->data.driver_location;
 217    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
 218       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 219
 220       if (strncmp(var->name, storage->name, namelen) != 0 ||
 221          (storage->name[namelen] != 0 &&
 222          storage->name[namelen] != '.' &&
 223          storage->name[namelen] != '[')) {
 224          continue;
 225       }
 226
 227       unsigned slots = storage->type->component_slots();
 228       if (storage->array_elements)
 229          slots *= storage->array_elements;
 230
 231       for (unsigned i = 0; i < slots; i++) {
 232          stage_prog_data->param[index++] = &storage->storage[i];
 233       }
 234    }
 235
 236    /* Make sure we actually initialized the right amount of stuff here. */
 237    assert(var->data.driver_location + var->type->component_slots() == index);
 238 }
 239
 240 void
 241 fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
 242 {
 243    const nir_state_slot *const slots = var->state_slots;
 244    assert(var->state_slots != NULL);
 245
 246    unsigned uniform_index = var->data.driver_location;
 247    for (unsigned int i = 0; i < var->num_state_slots; i++) {
 248       /* This state reference has already been setup by ir_to_mesa, but we'll
 249        * get the same index back here.
 250        */
 251       int index = _mesa_add_state_reference(this->prog->Parameters,
 252                                             (gl_state_index *)slots[i].tokens);
 253
 254       /* Add each of the unique swizzles of the element as a parameter.
 255        * This'll end up matching the expected layout of the
 256        * array/matrix/structure we're trying to fill in.
 257        */
 258       int last_swiz = -1;
 259       for (unsigned int j = 0; j < 4; j++) {
 260          int swiz = GET_SWZ(slots[i].swizzle, j);
 261          if (swiz == last_swiz)
 262             break;
 263          last_swiz = swiz;
 264
 265          stage_prog_data->param[uniform_index++] =
 266             &prog->Parameters->ParameterValues[index][swiz];
 267       }
 268    }
 269 }
 270
 271 void
 272 fs_visitor::nir_setup_registers(exec_list *list)
 273 {
 274    foreach_list_typed(nir_register, nir_reg, node, list) {
 275       unsigned array_elems =
 276          nir_reg->num_array_elems == 0 ? 1 : nir_reg->num_array_elems;
 277       unsigned size = array_elems * nir_reg->num_components;
 278       fs_reg *reg = new(mem_ctx) fs_reg(GRF, virtual_grf_alloc(size));
 279       _mesa_hash_table_insert(this->nir_reg_ht, nir_reg, reg);
 280    }
 281 }
 282
 283 void
 284 fs_visitor::nir_emit_impl(nir_function_impl *impl)
 285 {
 286    nir_setup_registers(&impl->registers);
 287    nir_emit_cf_list(&impl->body);
 288 }
 289
 290 void
 291 fs_visitor::nir_emit_cf_list(exec_list *list)
 292 {
 293    foreach_list_typed(nir_cf_node, node, node, list) {
 294       switch (node->type) {
 295       case nir_cf_node_if:
 296          nir_emit_if(nir_cf_node_as_if(node));
 297          break;
 298
 299       case nir_cf_node_loop:
 300          nir_emit_loop(nir_cf_node_as_loop(node));
 301          break;
 302
 303       case nir_cf_node_block:
 304          nir_emit_block(nir_cf_node_as_block(node));
 305          break;
 306
 307       default:
 308          unreachable("Invalid CFG node block");
 309       }
 310    }
 311 }
 312
 313 void
 314 fs_visitor::nir_emit_if(nir_if *if_stmt)
 315 {
 316    if (brw->gen < 6) {
 317       no16("Can't support (non-uniform) control flow on SIMD16\n");
 318    }
 319
 320    /* first, put the condition into f0 */
 321    fs_inst *inst = emit(MOV(reg_null_d,
 322                             retype(get_nir_src(if_stmt->condition),
 323                                    BRW_REGISTER_TYPE_UD)));
 324    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 325
 326    emit(IF(BRW_PREDICATE_NORMAL));
 327
 328    nir_emit_cf_list(&if_stmt->then_list);
 329
 330    /* note: if the else is empty, dead CF elimination will remove it */
 331    emit(BRW_OPCODE_ELSE);
 332
 333    nir_emit_cf_list(&if_stmt->else_list);
 334
 335    emit(BRW_OPCODE_ENDIF);
 336
 337    try_replace_with_sel();
 338 }
 339
 340 void
 341 fs_visitor::nir_emit_loop(nir_loop *loop)
 342 {
 343    if (brw->gen < 6) {
 344       no16("Can't support (non-uniform) control flow on SIMD16\n");
 345    }
 346
 347    emit(BRW_OPCODE_DO);
 348
 349    nir_emit_cf_list(&loop->body);
 350
 351    emit(BRW_OPCODE_WHILE);
 352 }
 353
 354 void
 355 fs_visitor::nir_emit_block(nir_block *block)
 356 {
 357    nir_foreach_instr(block, instr) {
 358       nir_emit_instr(instr);
 359    }
 360 }
 361
 362 void
 363 fs_visitor::nir_emit_instr(nir_instr *instr)
 364 {
 365    switch (instr->type) {
 366    case nir_instr_type_alu:
 367       nir_emit_alu(nir_instr_as_alu(instr));
 368       break;
 369
 370    case nir_instr_type_intrinsic:
 371       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
 372       break;
 373
 374    case nir_instr_type_texture:
 375       nir_emit_texture(nir_instr_as_texture(instr));
 376       break;
 377
 378    case nir_instr_type_load_const:
 379       nir_emit_load_const(nir_instr_as_load_const(instr));
 380       break;
 381
 382    case nir_instr_type_jump:
 383       nir_emit_jump(nir_instr_as_jump(instr));
 384       break;
 385
 386    default:
 387       unreachable("unknown instruction type");
 388    }
 389 }
 390
 391 static brw_reg_type
 392 brw_type_for_nir_type(nir_alu_type type)
 393 {
 394    switch (type) {
 395    case nir_type_bool:
 396    case nir_type_unsigned:
 397       return BRW_REGISTER_TYPE_UD;
 398    case nir_type_int:
 399       return BRW_REGISTER_TYPE_D;
 400    case nir_type_float:
 401       return BRW_REGISTER_TYPE_F;
 402    default:
 403       unreachable("unknown type");
 404    }
 405
 406    return BRW_REGISTER_TYPE_F;
 407 }
 408
 409 void
 410 fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 411 {
 412    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
 413
 414    fs_reg op[3];
 415    fs_reg dest = get_nir_dest(instr->dest.dest);
 416    dest.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
 417
 418    fs_reg result;
 419    if (instr->has_predicate) {
 420       result = fs_reg(GRF, virtual_grf_alloc(4));
 421       result.type = dest.type;
 422    } else {
 423       result = dest;
 424    }
 425
 426
 427    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
 428       op[i] = get_nir_alu_src(instr, i);
 429
 430    switch (instr->op) {
 431    case nir_op_fmov:
 432    case nir_op_i2f:
 433    case nir_op_u2f: {
 434       fs_inst *inst = MOV(result, op[0]);
 435       inst->saturate = instr->dest.saturate;
 436       emit_percomp(inst, instr->dest.write_mask);
 437    }
 438       break;
 439
 440    case nir_op_imov:
 441    case nir_op_f2i:
 442    case nir_op_f2u:
 443       emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
 444       break;
 445
 446    case nir_op_fsign: {
 447       /* AND(val, 0x80000000) gives the sign bit.
 448          *
 449          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
 450          * zero.
 451          */
 452       emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
 453                    instr->dest.write_mask);
 454
 455       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
 456       op[0].type = BRW_REGISTER_TYPE_UD;
 457       result.type = BRW_REGISTER_TYPE_UD;
 458       emit_percomp(AND(result_int, op[0], fs_reg(0x80000000u)),
 459                    instr->dest.write_mask);
 460
 461       fs_inst *inst = OR(result_int, result_int, fs_reg(0x3f800000u));
 462       inst->predicate = BRW_PREDICATE_NORMAL;
 463       emit_percomp(inst, instr->dest.write_mask);
 464       if (instr->dest.saturate) {
 465          fs_inst *inst = MOV(result, result);
 466          inst->saturate = true;
 467          emit_percomp(inst, instr->dest.write_mask);
 468       }
 469       break;
 470    }
 471
 472    case nir_op_isign: {
 473       /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
 474          *               -> non-negative val generates 0x00000000.
 475          *  Predicated OR sets 1 if val is positive.
 476          */
 477       emit_percomp(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G),
 478                    instr->dest.write_mask);
 479
 480       emit_percomp(ASR(result, op[0], fs_reg(31)), instr->dest.write_mask);
 481
 482       fs_inst *inst = OR(result, result, fs_reg(1));
 483       inst->predicate = BRW_PREDICATE_NORMAL;
 484       emit_percomp(inst, instr->dest.write_mask);
 485       break;
 486    }
 487
 488    case nir_op_frcp:
 489       emit_math_percomp(SHADER_OPCODE_RCP, result, op[0],
 490                         instr->dest.write_mask, instr->dest.saturate);
 491       break;
 492
 493    case nir_op_fexp2:
 494       emit_math_percomp(SHADER_OPCODE_EXP2, result, op[0],
 495                         instr->dest.write_mask, instr->dest.saturate);
 496       break;
 497
 498    case nir_op_flog2:
 499       emit_math_percomp(SHADER_OPCODE_LOG2, result, op[0],
 500                         instr->dest.write_mask, instr->dest.saturate);
 501       break;
 502
 503    case nir_op_fexp:
 504    case nir_op_flog:
 505       unreachable("not reached: should be handled by ir_explog_to_explog2");
 506
 507    case nir_op_fsin:
 508    case nir_op_fsin_reduced:
 509       emit_math_percomp(SHADER_OPCODE_SIN, result, op[0],
 510                         instr->dest.write_mask, instr->dest.saturate);
 511       break;
 512
 513    case nir_op_fcos:
 514    case nir_op_fcos_reduced:
 515       emit_math_percomp(SHADER_OPCODE_COS, result, op[0],
 516                         instr->dest.write_mask, instr->dest.saturate);
 517       break;
 518
 519    case nir_op_fddx:
 520       if (fs_key->high_quality_derivatives)
 521          emit_percomp(FS_OPCODE_DDX_FINE, result, op[0],
 522                       instr->dest.write_mask, instr->dest.saturate);
 523       else
 524          emit_percomp(FS_OPCODE_DDX_COARSE, result, op[0],
 525                       instr->dest.write_mask, instr->dest.saturate);
 526       break;
 527    case nir_op_fddx_fine:
 528       emit_percomp(FS_OPCODE_DDX_FINE, result, op[0],
 529                    instr->dest.write_mask, instr->dest.saturate);
 530       break;
 531    case nir_op_fddx_coarse:
 532       emit_percomp(FS_OPCODE_DDX_COARSE, result, op[0],
 533                    instr->dest.write_mask, instr->dest.saturate);
 534       break;
 535    case nir_op_fddy:
 536       if (fs_key->high_quality_derivatives)
 537          emit_percomp(FS_OPCODE_DDY_FINE, result, op[0],
 538                       fs_reg(fs_key->render_to_fbo),
 539                       instr->dest.write_mask, instr->dest.saturate);
 540       else
 541          emit_percomp(FS_OPCODE_DDY_COARSE, result, op[0],
 542                       fs_reg(fs_key->render_to_fbo),
 543                       instr->dest.write_mask, instr->dest.saturate);
 544       break;
 545    case nir_op_fddy_fine:
 546       emit_percomp(FS_OPCODE_DDY_FINE, result, op[0],
 547                    fs_reg(fs_key->render_to_fbo),
 548                    instr->dest.write_mask, instr->dest.saturate);
 549       break;
 550    case nir_op_fddy_coarse:
 551       emit_percomp(FS_OPCODE_DDY_COARSE, result, op[0],
 552                    fs_reg(fs_key->render_to_fbo),
 553                    instr->dest.write_mask, instr->dest.saturate);
 554       break;
 555
 556    case nir_op_fadd:
 557    case nir_op_iadd: {
 558       fs_inst *inst = ADD(result, op[0], op[1]);
 559       inst->saturate = instr->dest.saturate;
 560       emit_percomp(inst, instr->dest.write_mask);
 561       break;
 562    }
 563
 564    case nir_op_fmul: {
 565       fs_inst *inst = MUL(result, op[0], op[1]);
 566       inst->saturate = instr->dest.saturate;
 567       emit_percomp(MUL(result, op[0], op[1]), instr->dest.write_mask);
 568       break;
 569    }
 570
 571    case nir_op_imul: {
 572       /* TODO put in the 16-bit constant optimization once we have SSA */
 573
 574       if (brw->gen >= 7)
 575          no16("SIMD16 explicit accumulator operands unsupported\n");
 576
 577       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 578
 579       emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
 580       emit_percomp(MACH(reg_null_d, op[0], op[1]), instr->dest.write_mask);
 581       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 582       break;
 583    }
 584
 585    case nir_op_imul_high:
 586    case nir_op_umul_high: {
 587       if (brw->gen >= 7)
 588          no16("SIMD16 explicit accumulator operands unsupported\n");
 589
 590       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 591
 592       emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
 593       emit_percomp(MACH(result, op[0], op[1]), instr->dest.write_mask);
 594       break;
 595    }
 596
 597    case nir_op_idiv:
 598    case nir_op_udiv:
 599       emit_math_percomp(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1],
 600                         instr->dest.write_mask);
 601       break;
 602
 603    case nir_op_uadd_carry: {
 604       if (brw->gen >= 7)
 605          no16("SIMD16 explicit accumulator operands unsupported\n");
 606
 607       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 608                                   BRW_REGISTER_TYPE_UD);
 609
 610       emit_percomp(ADDC(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
 611       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 612       break;
 613    }
 614
 615    case nir_op_usub_borrow: {
 616       if (brw->gen >= 7)
 617          no16("SIMD16 explicit accumulator operands unsupported\n");
 618
 619       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 620                                   BRW_REGISTER_TYPE_UD);
 621
 622       emit_percomp(SUBB(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
 623       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 624       break;
 625    }
 626
 627    case nir_op_umod:
 628       emit_math_percomp(SHADER_OPCODE_INT_REMAINDER, result, op[0],
 629                         op[1], instr->dest.write_mask);
 630       break;
 631
 632    case nir_op_flt:
 633    case nir_op_ilt:
 634    case nir_op_ult:
 635       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_L),
 636                    instr->dest.write_mask);
 637       break;
 638
 639    case nir_op_fge:
 640    case nir_op_ige:
 641    case nir_op_uge:
 642       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE),
 643                    instr->dest.write_mask);
 644       break;
 645
 646    case nir_op_feq:
 647    case nir_op_ieq:
 648       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z),
 649                    instr->dest.write_mask);
 650       break;
 651
 652    case nir_op_fne:
 653    case nir_op_ine:
 654       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ),
 655                    instr->dest.write_mask);
 656       break;
 657
 658    case nir_op_ball_fequal2:
 659    case nir_op_ball_iequal2:
 660    case nir_op_ball_fequal3:
 661    case nir_op_ball_iequal3:
 662    case nir_op_ball_fequal4:
 663    case nir_op_ball_iequal4: {
 664       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 665       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 666       emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
 667                    (1 << num_components) - 1);
 668       emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
 669       break;
 670    }
 671
 672    case nir_op_bany_fnequal2:
 673    case nir_op_bany_inequal2:
 674    case nir_op_bany_fnequal3:
 675    case nir_op_bany_inequal3:
 676    case nir_op_bany_fnequal4:
 677    case nir_op_bany_inequal4: {
 678       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 679       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 680       temp.type = BRW_REGISTER_TYPE_UD;
 681       emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_NZ),
 682                    (1 << num_components) - 1);
 683       emit_reduction(BRW_OPCODE_OR, result, temp, num_components);
 684       break;
 685    }
 686
 687    case nir_op_inot:
 688       emit_percomp(NOT(result, op[0]), instr->dest.write_mask);
 689       break;
 690    case nir_op_ixor:
 691       emit_percomp(XOR(result, op[0], op[1]), instr->dest.write_mask);
 692       break;
 693    case nir_op_ior:
 694       emit_percomp(OR(result, op[0], op[1]), instr->dest.write_mask);
 695       break;
 696    case nir_op_iand:
 697       emit_percomp(AND(result, op[0], op[1]), instr->dest.write_mask);
 698       break;
 699
 700    case nir_op_fdot2:
 701    case nir_op_fdot3:
 702    case nir_op_fdot4: {
 703       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 704       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 705       emit_percomp(MUL(temp, op[0], op[1]), (1 << num_components) - 1);
 706       emit_reduction(BRW_OPCODE_ADD, result, temp, num_components);
 707       if (instr->dest.saturate) {
 708          fs_inst *inst = emit(MOV(result, result));
 709          inst->saturate = true;
 710       }
 711       break;
 712    }
 713
 714    case nir_op_bany2:
 715    case nir_op_bany3:
 716    case nir_op_bany4: {
 717       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 718       emit_reduction(BRW_OPCODE_OR, result, op[0], num_components);
 719       break;
 720    }
 721
 722    case nir_op_ball2:
 723    case nir_op_ball3:
 724    case nir_op_ball4: {
 725       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 726       emit_reduction(BRW_OPCODE_AND, result, op[0], num_components);
 727       break;
 728    }
 729
 730    case nir_op_fnoise1_1:
 731    case nir_op_fnoise1_2:
 732    case nir_op_fnoise1_3:
 733    case nir_op_fnoise1_4:
 734    case nir_op_fnoise2_1:
 735    case nir_op_fnoise2_2:
 736    case nir_op_fnoise2_3:
 737    case nir_op_fnoise2_4:
 738    case nir_op_fnoise3_1:
 739    case nir_op_fnoise3_2:
 740    case nir_op_fnoise3_3:
 741    case nir_op_fnoise3_4:
 742    case nir_op_fnoise4_1:
 743    case nir_op_fnoise4_2:
 744    case nir_op_fnoise4_3:
 745    case nir_op_fnoise4_4:
 746       unreachable("not reached: should be handled by lower_noise");
 747
 748    case nir_op_vec2:
 749    case nir_op_vec3:
 750    case nir_op_vec4:
 751       unreachable("not reached: should be handled by lower_quadop_vector");
 752
 753    case nir_op_ldexp:
 754       unreachable("not reached: should be handled by ldexp_to_arith()");
 755
 756    case nir_op_fsqrt:
 757       emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
 758                         instr->dest.write_mask, instr->dest.saturate);
 759       break;
 760
 761    case nir_op_frsq:
 762       emit_math_percomp(SHADER_OPCODE_RSQ, result, op[0],
 763                         instr->dest.write_mask, instr->dest.saturate);
 764       break;
 765
 766    case nir_op_b2i:
 767       emit_percomp(AND(result, op[0], fs_reg(1)), instr->dest.write_mask);
 768       break;
 769    case nir_op_b2f: {
 770       emit_percomp(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0],
 771                        fs_reg(0x3f800000u)),
 772                    instr->dest.write_mask);
 773       break;
 774    }
 775
 776    case nir_op_f2b:
 777       emit_percomp(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
 778                    instr->dest.write_mask);
 779       break;
 780    case nir_op_i2b:
 781       emit_percomp(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ),
 782                    instr->dest.write_mask);
 783       break;
 784
 785    case nir_op_ftrunc: {
 786       fs_inst *inst = RNDZ(result, op[0]);
 787       inst->saturate = instr->dest.saturate;
 788       emit_percomp(inst, instr->dest.write_mask);
 789       break;
 790    }
 791    case nir_op_fceil: {
 792       op[0].negate = !op[0].negate;
 793       fs_reg temp = fs_reg(this, glsl_type::vec4_type);
 794       emit_percomp(RNDD(temp, op[0]), instr->dest.write_mask);
 795       temp.negate = true;
 796       fs_inst *inst = MOV(result, temp);
 797       inst->saturate = instr->dest.saturate;
 798       emit_percomp(inst, instr->dest.write_mask);
 799       break;
 800    }
 801    case nir_op_ffloor: {
 802       fs_inst *inst = RNDD(result, op[0]);
 803       inst->saturate = instr->dest.saturate;
 804       emit_percomp(inst, instr->dest.write_mask);
 805       break;
 806    }
 807    case nir_op_ffract: {
 808       fs_inst *inst = FRC(result, op[0]);
 809       inst->saturate = instr->dest.saturate;
 810       emit_percomp(inst, instr->dest.write_mask);
 811       break;
 812    }
 813    case nir_op_fround_even: {
 814       fs_inst *inst = RNDE(result, op[0]);
 815       inst->saturate = instr->dest.saturate;
 816       emit_percomp(inst, instr->dest.write_mask);
 817       break;
 818    }
 819
 820    case nir_op_fmin:
 821    case nir_op_imin:
 822    case nir_op_umin:
 823       if (brw->gen >= 6) {
 824          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 825                       instr->dest.write_mask, instr->dest.saturate,
 826                       BRW_PREDICATE_NONE, BRW_CONDITIONAL_L);
 827       } else {
 828          emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L),
 829                       instr->dest.write_mask);
 830
 831          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 832                       instr->dest.write_mask, instr->dest.saturate,
 833                       BRW_PREDICATE_NORMAL);
 834       }
 835       break;
 836
 837    case nir_op_fmax:
 838    case nir_op_imax:
 839    case nir_op_umax:
 840       if (brw->gen >= 6) {
 841          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 842                       instr->dest.write_mask, instr->dest.saturate,
 843                       BRW_PREDICATE_NONE, BRW_CONDITIONAL_GE);
 844       } else {
 845          emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE),
 846                       instr->dest.write_mask);
 847
 848          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 849                       instr->dest.write_mask, instr->dest.saturate,
 850                       BRW_PREDICATE_NORMAL);
 851       }
 852       break;
 853
 854    case nir_op_pack_snorm_2x16:
 855    case nir_op_pack_snorm_4x8:
 856    case nir_op_pack_unorm_2x16:
 857    case nir_op_pack_unorm_4x8:
 858    case nir_op_unpack_snorm_2x16:
 859    case nir_op_unpack_snorm_4x8:
 860    case nir_op_unpack_unorm_2x16:
 861    case nir_op_unpack_unorm_4x8:
 862    case nir_op_unpack_half_2x16:
 863    case nir_op_pack_half_2x16:
 864       unreachable("not reached: should be handled by lower_packing_builtins");
 865
 866    case nir_op_unpack_half_2x16_split_x:
 867       emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0],
 868                    instr->dest.write_mask, instr->dest.saturate);
 869       break;
 870    case nir_op_unpack_half_2x16_split_y:
 871       emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0],
 872            instr->dest.write_mask, instr->dest.saturate);
 873       break;
 874
 875    case nir_op_fpow:
 876       emit_percomp(SHADER_OPCODE_POW, result, op[0], op[1],
 877                    instr->dest.write_mask, instr->dest.saturate);
 878       break;
 879
 880    case nir_op_bitfield_reverse:
 881       emit_percomp(BFREV(result, op[0]), instr->dest.write_mask);
 882       break;
 883
 884    case nir_op_bit_count:
 885       emit_percomp(CBIT(result, op[0]), instr->dest.write_mask);
 886       break;
 887
 888    case nir_op_ufind_msb:
 889    case nir_op_ifind_msb: {
 890       emit_percomp(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]),
 891                    instr->dest.write_mask);
 892
 893       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
 894        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
 895        * subtract the result from 31 to convert the MSB count into an LSB count.
 896        */
 897
 898       emit_percomp(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ),
 899                    instr->dest.write_mask);
 900       fs_reg neg_result(result);
 901       neg_result.negate = true;
 902       fs_inst *inst = ADD(result, neg_result, fs_reg(31));
 903       inst->predicate = BRW_PREDICATE_NORMAL;
 904       emit_percomp(inst, instr->dest.write_mask);
 905       break;
 906    }
 907
 908    case nir_op_find_lsb:
 909       emit_percomp(FBL(result, op[0]), instr->dest.write_mask);
 910       break;
 911
 912    case nir_op_ubitfield_extract:
 913    case nir_op_ibitfield_extract:
 914       emit_percomp(BFE(result, op[2], op[1], op[0]), instr->dest.write_mask);
 915       break;
 916    case nir_op_bfm:
 917       emit_percomp(BFI1(result, op[0], op[1]), instr->dest.write_mask);
 918       break;
 919    case nir_op_bfi:
 920       emit_percomp(BFI2(result, op[0], op[1], op[2]), instr->dest.write_mask);
 921       break;
 922
 923    case nir_op_bitfield_insert:
 924       unreachable("not reached: should be handled by "
 925                   "lower_instructions::bitfield_insert_to_bfm_bfi");
 926
 927    case nir_op_ishl:
 928       emit_percomp(SHL(result, op[0], op[1]), instr->dest.write_mask);
 929       break;
 930    case nir_op_ishr:
 931       emit_percomp(ASR(result, op[0], op[1]), instr->dest.write_mask);
 932       break;
 933    case nir_op_ushr:
 934       emit_percomp(SHR(result, op[0], op[1]), instr->dest.write_mask);
 935       break;
 936
 937    case nir_op_pack_half_2x16_split:
 938       emit_percomp(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1],
 939                    instr->dest.write_mask);
 940       break;
 941
 942    case nir_op_ffma:
 943       emit_percomp(MAD(result, op[2], op[1], op[0]), instr->dest.write_mask);
 944       break;
 945
 946    case nir_op_flrp:
 947       /* TODO emulate for gen < 6 */
 948       emit_percomp(LRP(result, op[2], op[1], op[0]), instr->dest.write_mask);
 949       break;
 950
 951    case nir_op_bcsel:
 952       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 953       emit_percomp(BRW_OPCODE_SEL, result, op[1], op[2],
 954                    instr->dest.write_mask, false, BRW_PREDICATE_NORMAL);
 955       break;
 956
 957    default:
 958       unreachable("unhandled instruction");
 959    }
 960
 961    /* emit a predicated move if there was predication */
 962    if (instr->has_predicate) {
 963       fs_inst *inst = emit(MOV(reg_null_d,
 964                                retype(get_nir_src(instr->predicate),
 965                                    BRW_REGISTER_TYPE_UD)));
 966       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 967       inst = MOV(dest, result);
 968       inst->predicate = BRW_PREDICATE_NORMAL;
 969       emit_percomp(inst, instr->dest.write_mask);
 970    }
 971 }
 972
 973 fs_reg
 974 fs_visitor::get_nir_src(nir_src src)
 975 {
 976    struct hash_entry *entry =
 977       _mesa_hash_table_search(this->nir_reg_ht, src.reg.reg);
 978    fs_reg reg = *((fs_reg *) entry->data);
 979    /* to avoid floating-point denorm flushing problems, set the type by
 980     * default to D - instructions that need floating point semantics will set
 981     * this to F if they need to
 982     */
 983    reg.type = BRW_REGISTER_TYPE_D;
 984    reg.reg_offset = src.reg.base_offset;
 985    if (src.reg.indirect) {
 986       reg.reladdr = new(mem_ctx) fs_reg();
 987       *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
 988                             BRW_REGISTER_TYPE_D);
 989    }
 990
 991    return reg;
 992 }
 993
 994 fs_reg
 995 fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
 996 {
 997    fs_reg reg = get_nir_src(instr->src[src].src);
 998
 999    reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
1000    reg.abs = instr->src[src].abs;
1001    reg.negate = instr->src[src].negate;
1002
1003    bool needs_swizzle = false;
1004    unsigned num_components = 0;
1005    for (unsigned i = 0; i < 4; i++) {
1006       if (!nir_alu_instr_channel_used(instr, src, i))
1007          continue;
1008
1009       if (instr->src[src].swizzle[i] != i)
1010          needs_swizzle = true;
1011
1012       num_components = i + 1;
1013    }
1014
1015    if (needs_swizzle) {
1016       /* resolve the swizzle through MOV's */
1017       fs_reg new_reg = fs_reg(GRF, virtual_grf_alloc(num_components), reg.type);
1018
1019       for (unsigned i = 0; i < 4; i++) {
1020          if (!nir_alu_instr_channel_used(instr, src, i))
1021             continue;
1022
1023          emit(MOV(offset(new_reg, i),
1024                   offset(reg, instr->src[src].swizzle[i])));
1025       }
1026
1027       return new_reg;
1028    }
1029
1030    return reg;
1031 }
1032
1033 fs_reg
1034 fs_visitor::get_nir_dest(nir_dest dest)
1035 {
1036    struct hash_entry *entry =
1037       _mesa_hash_table_search(this->nir_reg_ht, dest.reg.reg);
1038    fs_reg reg = *((fs_reg *) entry->data);
1039    reg.reg_offset = dest.reg.base_offset;
1040    if (dest.reg.indirect) {
1041       reg.reladdr = new(mem_ctx) fs_reg();
1042       *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
1043                             BRW_REGISTER_TYPE_D);
1044    }
1045
1046    return reg;
1047 }
1048
1049 void
1050 fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
1051 {
1052    for (unsigned i = 0; i < 4; i++) {
1053       if (!((wr_mask >> i) & 1))
1054          continue;
1055
1056       fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
1057       new_inst->dst.reg_offset += i;
1058       for (unsigned j = 0; j < new_inst->sources; j++)
1059          if (inst->src[j].file == GRF)
1060             new_inst->src[j].reg_offset += i;
1061
1062       emit(new_inst);
1063    }
1064 }
1065
1066 void
1067 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1068                          unsigned wr_mask, bool saturate,
1069                          enum brw_predicate predicate,
1070                          enum brw_conditional_mod mod)
1071 {
1072    for (unsigned i = 0; i < 4; i++) {
1073       if (!((wr_mask >> i) & 1))
1074          continue;
1075
1076       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0);
1077       new_inst->dst.reg_offset += i;
1078       for (unsigned j = 0; j < new_inst->sources; j++)
1079          if (new_inst->src[j].file == GRF)
1080             new_inst->src[j].reg_offset += i;
1081
1082       new_inst->predicate = predicate;
1083       new_inst->conditional_mod = mod;
1084       new_inst->saturate = saturate;
1085       emit(new_inst);
1086    }
1087 }
1088
1089 void
1090 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
1091                          unsigned wr_mask, bool saturate,
1092                          enum brw_predicate predicate,
1093                          enum brw_conditional_mod mod)
1094 {
1095    for (unsigned i = 0; i < 4; i++) {
1096       if (!((wr_mask >> i) & 1))
1097          continue;
1098
1099       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1);
1100       new_inst->dst.reg_offset += i;
1101       for (unsigned j = 0; j < new_inst->sources; j++)
1102          if (new_inst->src[j].file == GRF)
1103             new_inst->src[j].reg_offset += i;
1104
1105       new_inst->predicate = predicate;
1106       new_inst->conditional_mod = mod;
1107       new_inst->saturate = saturate;
1108       emit(new_inst);
1109    }
1110 }
1111
1112 void
1113 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1114                               unsigned wr_mask, bool saturate)
1115 {
1116    for (unsigned i = 0; i < 4; i++) {
1117       if (!((wr_mask >> i) & 1))
1118          continue;
1119
1120       fs_reg new_dest = dest;
1121       new_dest.reg_offset += i;
1122       fs_reg new_src0 = src0;
1123       if (src0.file == GRF)
1124          new_src0.reg_offset += i;
1125
1126       fs_inst *new_inst = emit_math(op, new_dest, new_src0);
1127       new_inst->saturate = saturate;
1128    }
1129 }
1130
1131 void
1132 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1133                               fs_reg src1, unsigned wr_mask,
1134                               bool saturate)
1135 {
1136    for (unsigned i = 0; i < 4; i++) {
1137       if (!((wr_mask >> i) & 1))
1138          continue;
1139
1140       fs_reg new_dest = dest;
1141       new_dest.reg_offset += i;
1142       fs_reg new_src0 = src0;
1143       if (src0.file == GRF)
1144          new_src0.reg_offset += i;
1145       fs_reg new_src1 = src1;
1146       if (src1.file == GRF)
1147          new_src1.reg_offset += i;
1148
1149       fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1);
1150       new_inst->saturate = saturate;
1151    }
1152 }
1153
1154 void
1155 fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
1156                            unsigned num_components)
1157 {
1158    fs_reg src0 = src;
1159    fs_reg src1 = src;
1160    src1.reg_offset++;
1161
1162    if (num_components == 2) {
1163       emit(op, dest, src0, src1);
1164       return;
1165    }
1166
1167    fs_reg temp1 = fs_reg(GRF, virtual_grf_alloc(1));
1168    temp1.type = src.type;
1169    emit(op, temp1, src0, src1);
1170
1171    fs_reg src2 = src;
1172    src2.reg_offset += 2;
1173
1174    if (num_components == 3) {
1175       emit(op, dest, temp1, src2);
1176       return;
1177    }
1178
1179    assert(num_components == 4);
1180
1181    fs_reg src3 = src;
1182    src3.reg_offset += 3;
1183    fs_reg temp2 = fs_reg(GRF, virtual_grf_alloc(1));
1184    temp2.type = src.type;
1185
1186    emit(op, temp2, src2, src3);
1187    emit(op, dest, temp1, temp2);
1188 }
1189
1190 void
1191 fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
1192 {
1193    fs_reg dest;
1194    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1195       dest = get_nir_dest(instr->dest);
1196    if (instr->has_predicate) {
1197       fs_inst *inst = emit(MOV(reg_null_d,
1198                                retype(get_nir_src(instr->predicate),
1199                                       BRW_REGISTER_TYPE_UD)));
1200       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1201    }
1202
1203    switch (instr->intrinsic) {
1204    case nir_intrinsic_discard: {
1205       /* We track our discarded pixels in f0.1.  By predicating on it, we can
1206        * update just the flag bits that aren't yet discarded.  By emitting a
1207        * CMP of g0 != g0, all our currently executing channels will get turned
1208        * off.
1209        */
1210       fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
1211                                     BRW_REGISTER_TYPE_UW));
1212       fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
1213                               BRW_CONDITIONAL_NZ));
1214       cmp->predicate = BRW_PREDICATE_NORMAL;
1215       cmp->flag_subreg = 1;
1216
1217       if (brw->gen >= 6) {
1218          /* For performance, after a discard, jump to the end of the shader.
1219          * Only jump if all relevant channels have been discarded.
1220          */
1221          fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
1222          discard_jump->flag_subreg = 1;
1223
1224          discard_jump->predicate = (dispatch_width == 8)
1225                                  ? BRW_PREDICATE_ALIGN1_ANY8H
1226                                  : BRW_PREDICATE_ALIGN1_ANY16H;
1227          discard_jump->predicate_inverse = true;
1228       }
1229
1230       break;
1231    }
1232
1233    case nir_intrinsic_atomic_counter_inc:
1234    case nir_intrinsic_atomic_counter_dec:
1235    case nir_intrinsic_atomic_counter_read: {
1236       unsigned surf_index = prog_data->binding_table.abo_start +
1237                             (unsigned) instr->const_index[0];
1238       fs_reg offset = fs_reg(get_nir_src(instr->src[0]));
1239
1240       switch (instr->intrinsic) {
1241          case nir_intrinsic_atomic_counter_inc:
1242             emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
1243                                 fs_reg(), fs_reg());
1244             break;
1245          case nir_intrinsic_atomic_counter_dec:
1246             emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
1247                                 fs_reg(), fs_reg());
1248             break;
1249          case nir_intrinsic_atomic_counter_read:
1250             emit_untyped_surface_read(surf_index, dest, offset);
1251             break;
1252          default:
1253             unreachable("Unreachable");
1254       }
1255       break;
1256    }
1257
1258    case nir_intrinsic_load_front_face:
1259       assert(!"TODO");
1260
1261    case nir_intrinsic_load_sample_mask_in: {
1262       assert(brw->gen >= 7);
1263       fs_reg reg = fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
1264                           BRW_REGISTER_TYPE_D));
1265       dest.type = reg.type;
1266       fs_inst *inst = MOV(dest, reg);
1267       if (instr->has_predicate)
1268          inst->predicate = BRW_PREDICATE_NORMAL;
1269       emit(inst);
1270       break;
1271    }
1272
1273    case nir_intrinsic_load_sample_pos: {
1274       fs_reg *reg = emit_samplepos_setup();
1275       dest.type = reg->type;
1276       emit(MOV(dest, *reg));
1277       emit(MOV(offset(dest, 1), offset(*reg, 1)));
1278       break;
1279    }
1280
1281    case nir_intrinsic_load_sample_id: {
1282       fs_reg *reg = emit_sampleid_setup();
1283       dest.type = reg->type;
1284       emit(MOV(dest, *reg));
1285       break;
1286    }
1287
1288    case nir_intrinsic_load_uniform_vec1:
1289    case nir_intrinsic_load_uniform_vec2:
1290    case nir_intrinsic_load_uniform_vec3:
1291    case nir_intrinsic_load_uniform_vec4: {
1292       unsigned index = 0;
1293       for (int i = 0; i < instr->const_index[1]; i++) {
1294          for (unsigned j = 0;
1295             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1296             fs_reg src = nir_uniforms;
1297             src.reg_offset = instr->const_index[0] + index;
1298             src.type = dest.type;
1299             index++;
1300
1301             fs_inst *inst = MOV(dest, src);
1302             if (instr->has_predicate)
1303                inst->predicate = BRW_PREDICATE_NORMAL;
1304             emit(inst);
1305             dest.reg_offset++;
1306          }
1307       }
1308       break;
1309    }
1310
1311    case nir_intrinsic_load_uniform_vec1_indirect:
1312    case nir_intrinsic_load_uniform_vec2_indirect:
1313    case nir_intrinsic_load_uniform_vec3_indirect:
1314    case nir_intrinsic_load_uniform_vec4_indirect: {
1315       unsigned index = 0;
1316       for (int i = 0; i < instr->const_index[1]; i++) {
1317          for (unsigned j = 0;
1318             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1319             fs_reg src = nir_uniforms;
1320             src.reg_offset = instr->const_index[0] + index;
1321             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1322             src.reladdr->type = BRW_REGISTER_TYPE_D;
1323             src.type = dest.type;
1324             index++;
1325
1326             fs_inst *inst = MOV(dest, src);
1327             if (instr->has_predicate)
1328                inst->predicate = BRW_PREDICATE_NORMAL;
1329             emit(inst);
1330             dest.reg_offset++;
1331          }
1332       }
1333       break;
1334    }
1335
1336    case nir_intrinsic_load_ubo_vec1:
1337    case nir_intrinsic_load_ubo_vec2:
1338    case nir_intrinsic_load_ubo_vec3:
1339    case nir_intrinsic_load_ubo_vec4: {
1340       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1341                                  (unsigned) instr->const_index[0]);
1342       fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
1343       packed_consts.type = dest.type;
1344
1345       fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
1346       emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
1347                                 packed_consts, surf_index, const_offset_reg));
1348
1349       for (unsigned i = 0;
1350            i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1351          packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
1352
1353          /* The std140 packing rules don't allow vectors to cross 16-byte
1354           * boundaries, and a reg is 32 bytes.
1355           */
1356          assert(packed_consts.subreg_offset < 32);
1357
1358          fs_inst *inst = MOV(dest, packed_consts);
1359          if (instr->has_predicate)
1360                inst->predicate = BRW_PREDICATE_NORMAL;
1361          emit(inst);
1362
1363          dest.reg_offset++;
1364       }
1365       break;
1366    }
1367
1368    case nir_intrinsic_load_ubo_vec1_indirect:
1369    case nir_intrinsic_load_ubo_vec2_indirect:
1370    case nir_intrinsic_load_ubo_vec3_indirect:
1371    case nir_intrinsic_load_ubo_vec4_indirect: {
1372       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1373                                  instr->const_index[0]);
1374       /* Turn the byte offset into a dword offset. */
1375       unsigned base_offset = instr->const_index[1] / 4;
1376       fs_reg offset = fs_reg(this, glsl_type::int_type);
1377       emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
1378                fs_reg(2)));
1379
1380       for (unsigned i = 0;
1381            i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1382          exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
1383                                                      offset, base_offset + i);
1384          fs_inst *last_inst = (fs_inst *) list.get_tail();
1385          if (instr->has_predicate)
1386                last_inst->predicate = BRW_PREDICATE_NORMAL;
1387          emit(list);
1388
1389          dest.reg_offset++;
1390       }
1391       break;
1392    }
1393
1394    case nir_intrinsic_load_input_vec1:
1395    case nir_intrinsic_load_input_vec2:
1396    case nir_intrinsic_load_input_vec3:
1397    case nir_intrinsic_load_input_vec4: {
1398       unsigned index = 0;
1399       for (int i = 0; i < instr->const_index[1]; i++) {
1400          for (unsigned j = 0;
1401             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1402             fs_reg src = nir_inputs;
1403             src.reg_offset = instr->const_index[0] + index;
1404             src.type = dest.type;
1405             index++;
1406
1407             fs_inst *inst = MOV(dest, src);
1408             if (instr->has_predicate)
1409                inst->predicate = BRW_PREDICATE_NORMAL;
1410             emit(inst);
1411             dest.reg_offset++;
1412          }
1413       }
1414       break;
1415    }
1416
1417    case nir_intrinsic_load_input_vec1_indirect:
1418    case nir_intrinsic_load_input_vec2_indirect:
1419    case nir_intrinsic_load_input_vec3_indirect:
1420    case nir_intrinsic_load_input_vec4_indirect: {
1421       unsigned index = 0;
1422       for (int i = 0; i < instr->const_index[1]; i++) {
1423          for (unsigned j = 0;
1424             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1425             fs_reg src = nir_inputs;
1426             src.reg_offset = instr->const_index[0] + index;
1427             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1428             src.reladdr->type = BRW_REGISTER_TYPE_D;
1429             src.type = dest.type;
1430             index++;
1431
1432             fs_inst *inst = MOV(dest, src);
1433             if (instr->has_predicate)
1434                inst->predicate = BRW_PREDICATE_NORMAL;
1435             emit(inst);
1436             dest.reg_offset++;
1437          }
1438       }
1439       break;
1440    }
1441
1442    case nir_intrinsic_store_output_vec1:
1443    case nir_intrinsic_store_output_vec2:
1444    case nir_intrinsic_store_output_vec3:
1445    case nir_intrinsic_store_output_vec4: {
1446       fs_reg src = get_nir_src(instr->src[0]);
1447       unsigned index = 0;
1448       for (int i = 0; i < instr->const_index[1]; i++) {
1449          for (unsigned j = 0;
1450             j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1451             fs_reg new_dest = nir_outputs;
1452             new_dest.reg_offset = instr->const_index[0] + index;
1453             new_dest.type = src.type;
1454             index++;
1455             fs_inst *inst = MOV(new_dest, src);
1456             if (instr->has_predicate)
1457                inst->predicate = BRW_PREDICATE_NORMAL;
1458             emit(inst);
1459             src.reg_offset++;
1460          }
1461       }
1462       break;
1463    }
1464
1465    case nir_intrinsic_store_output_vec1_indirect:
1466    case nir_intrinsic_store_output_vec2_indirect:
1467    case nir_intrinsic_store_output_vec3_indirect:
1468    case nir_intrinsic_store_output_vec4_indirect: {
1469       fs_reg src = get_nir_src(instr->src[0]);
1470       fs_reg indirect = get_nir_src(instr->src[1]);
1471       unsigned index = 0;
1472       for (int i = 0; i < instr->const_index[1]; i++) {
1473          for (unsigned j = 0;
1474             j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1475             fs_reg new_dest = nir_outputs;
1476             new_dest.reg_offset = instr->const_index[0] + index;
1477             new_dest.reladdr = new(mem_ctx) fs_reg(indirect);
1478             new_dest.type = src.type;
1479             index++;
1480             fs_inst *inst = MOV(new_dest, src);
1481             if (instr->has_predicate)
1482                inst->predicate = BRW_PREDICATE_NORMAL;
1483             emit(MOV(new_dest, src));
1484             src.reg_offset++;
1485          }
1486       }
1487       break;
1488    }
1489
1490    default:
1491       unreachable("unknown intrinsic");
1492    }
1493 }
1494
1495 void
1496 fs_visitor::nir_emit_texture(nir_tex_instr *instr)
1497 {
1498    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
1499    unsigned sampler = instr->sampler_index;
1500
1501    /* FINISHME: We're failing to recompile our programs when the sampler is
1502     * updated.  This only matters for the texture rectangle scale parameters
1503     * (pre-gen6, or gen6+ with GL_CLAMP).
1504     */
1505    int texunit = prog->SamplerUnits[sampler];
1506
1507    int gather_component = instr->component;
1508
1509    bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
1510
1511    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
1512                         instr->is_array;
1513
1514    int lod_components, offset_components = 0;
1515
1516    fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, offset;
1517
1518    for (unsigned i = 0; i < instr->num_srcs; i++) {
1519       fs_reg src = get_nir_src(instr->src[i]);
1520       switch (instr->src_type[i]) {
1521       case nir_tex_src_bias:
1522          lod = retype(src, BRW_REGISTER_TYPE_F);
1523          break;
1524       case nir_tex_src_comparitor:
1525          shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
1526          break;
1527       case nir_tex_src_coord:
1528          switch (instr->op) {
1529          case nir_texop_txf:
1530          case nir_texop_txf_ms:
1531             coordinate = retype(src, BRW_REGISTER_TYPE_D);
1532             break;
1533          default:
1534             coordinate = retype(src, BRW_REGISTER_TYPE_F);
1535             break;
1536          }
1537          break;
1538       case nir_tex_src_ddx:
1539          lod = retype(src, BRW_REGISTER_TYPE_F);
1540          lod_components = nir_tex_instr_src_size(instr, i);
1541          break;
1542       case nir_tex_src_ddy:
1543          lod2 = retype(src, BRW_REGISTER_TYPE_F);
1544          break;
1545       case nir_tex_src_lod:
1546          switch (instr->op) {
1547          case nir_texop_txs:
1548             lod = retype(src, BRW_REGISTER_TYPE_UD);
1549             break;
1550          case nir_texop_txf:
1551             lod = retype(src, BRW_REGISTER_TYPE_D);
1552             break;
1553          default:
1554             lod = retype(src, BRW_REGISTER_TYPE_F);
1555             break;
1556          }
1557          break;
1558       case nir_tex_src_ms_index:
1559          sample_index = retype(src, BRW_REGISTER_TYPE_UD);
1560          break;
1561       case nir_tex_src_offset:
1562          offset = retype(src, BRW_REGISTER_TYPE_D);
1563          if (instr->is_array)
1564             offset_components = instr->coord_components - 1;
1565          else
1566             offset_components = instr->coord_components;
1567          break;
1568       case nir_tex_src_projector:
1569          unreachable("should be lowered");
1570       case nir_tex_src_sampler_index:
1571          unreachable("not yet supported");
1572       default:
1573          unreachable("unknown texture source");
1574       }
1575    }
1576
1577    if (instr->op == nir_texop_txf_ms) {
1578       if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
1579          mcs = emit_mcs_fetch(coordinate, instr->coord_components, fs_reg(sampler));
1580       else
1581          mcs = fs_reg(0u);
1582    }
1583
1584    for (unsigned i = 0; i < 3; i++) {
1585       if (instr->const_offset[i] != 0) {
1586          assert(offset_components == 0);
1587          offset = fs_reg(brw_texture_offset(ctx, instr->const_offset, 3));
1588          break;
1589       }
1590    }
1591
1592    enum glsl_base_type dest_base_type;
1593    switch (instr->dest_type) {
1594    case nir_type_float:
1595       dest_base_type = GLSL_TYPE_FLOAT;
1596       break;
1597    case nir_type_int:
1598       dest_base_type = GLSL_TYPE_INT;
1599       break;
1600    case nir_type_unsigned:
1601       dest_base_type = GLSL_TYPE_UINT;
1602       break;
1603    default:
1604       unreachable("bad type");
1605    }
1606
1607    const glsl_type *dest_type =
1608       glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
1609                               1);
1610
1611    ir_texture_opcode op;
1612    switch (instr->op) {
1613    case nir_texop_lod: op = ir_lod; break;
1614    case nir_texop_query_levels: op = ir_query_levels; break;
1615    case nir_texop_tex: op = ir_tex; break;
1616    case nir_texop_tg4: op = ir_tg4; break;
1617    case nir_texop_txb: op = ir_txb; break;
1618    case nir_texop_txd: op = ir_txd; break;
1619    case nir_texop_txf: op = ir_txf; break;
1620    case nir_texop_txf_ms: op = ir_txf_ms; break;
1621    case nir_texop_txl: op = ir_txl; break;
1622    case nir_texop_txs: op = ir_txs; break;
1623    default:
1624       unreachable("unknown texture opcode");
1625    }
1626
1627    emit_texture(op, dest_type, coordinate, instr->coord_components,
1628                 shadow_comparitor, lod, lod2, lod_components, sample_index,
1629                 offset, offset_components, mcs, gather_component,
1630                 is_cube_array, is_rect, sampler, fs_reg(sampler), texunit);
1631
1632    fs_reg dest = get_nir_dest(instr->dest);
1633    dest.type = this->result.type;
1634    unsigned num_components = nir_tex_instr_dest_size(instr);
1635    emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
1636 }
1637
1638 void
1639 fs_visitor::nir_emit_load_const(nir_load_const_instr *instr)
1640 {
1641    fs_reg dest = get_nir_dest(instr->dest);
1642    dest.type = BRW_REGISTER_TYPE_UD;
1643    if (instr->array_elems == 0) {
1644       for (unsigned i = 0; i < instr->num_components; i++) {
1645          emit(MOV(dest, fs_reg(instr->value.u[i])));
1646          dest.reg_offset++;
1647       }
1648    } else {
1649       for (unsigned i = 0; i < instr->array_elems; i++) {
1650          for (unsigned j = 0; j < instr->num_components; j++) {
1651             emit(MOV(dest, fs_reg(instr->array[i].u[j])));
1652             dest.reg_offset++;
1653          }
1654       }
1655    }
1656 }
1657
1658 void
1659 fs_visitor::nir_emit_jump(nir_jump_instr *instr)
1660 {
1661    switch (instr->type) {
1662    case nir_jump_break:
1663       emit(BRW_OPCODE_BREAK);
1664       break;
1665    case nir_jump_continue:
1666       emit(BRW_OPCODE_CONTINUE);
1667       break;
1668    case nir_jump_return:
1669    default:
1670       unreachable("unknown jump");
1671    }
1672 }