src/mesa/drivers/dri/i965/brw_fs_nir.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "glsl/nir/glsl_to_nir.h"
  25 #include "brw_fs.h"
  26
  27 static glsl_interp_qualifier
  28 determine_interpolation_mode(nir_variable *var, bool flat_shade)
  29 {
  30    if (var->data.interpolation != INTERP_QUALIFIER_NONE)
  31       return (glsl_interp_qualifier) var->data.interpolation;
  32    int location = var->data.location;
  33    bool is_gl_Color =
  34       location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
  35    if (flat_shade && is_gl_Color)
  36       return INTERP_QUALIFIER_FLAT;
  37    else
  38       return INTERP_QUALIFIER_SMOOTH;
  39 }
  40
  41 void
  42 fs_visitor::emit_nir_code()
  43 {
  44    /* first, lower the GLSL IR shader to NIR */
  45    nir_shader *nir = glsl_to_nir(shader->base.ir, NULL, true);
  46    nir_validate_shader(nir);
  47
  48    /* lower some of the GLSL-isms into NIR-isms - after this point, we no
  49     * longer have to deal with variables inside the shader
  50     */
  51
  52    nir_lower_variables_scalar(nir, true, true, true, true);
  53    nir_validate_shader(nir);
  54
  55    nir_lower_samplers(nir, shader_prog, shader->base.Program);
  56    nir_validate_shader(nir);
  57
  58    nir_lower_system_values(nir);
  59    nir_validate_shader(nir);
  60
  61    nir_lower_atomics(nir);
  62    nir_validate_shader(nir);
  63
  64    nir_remove_dead_variables(nir);
  65    nir_opt_global_to_local(nir);
  66    nir_validate_shader(nir);
  67
  68    if (1)
  69       nir_print_shader(nir, stderr);
  70
  71    /* emit the arrays used for inputs and outputs - load/store intrinsics will
  72     * be converted to reads/writes of these arrays
  73     */
  74
  75    if (nir->num_inputs > 0) {
  76       nir_inputs = fs_reg(GRF, virtual_grf_alloc(nir->num_inputs));
  77       nir_setup_inputs(nir);
  78    }
  79
  80    if (nir->num_outputs > 0) {
  81       nir_outputs = fs_reg(GRF, virtual_grf_alloc(nir->num_outputs));
  82       nir_setup_outputs(nir);
  83    }
  84
  85    if (nir->num_uniforms > 0) {
  86       nir_uniforms = fs_reg(UNIFORM, 0);
  87       nir_setup_uniforms(nir);
  88    }
  89
  90    nir_setup_registers(&nir->registers);
  91
  92    /* get the main function and emit it */
  93    nir_foreach_overload(nir, overload) {
  94       assert(strcmp(overload->function->name, "main") == 0);
  95       assert(overload->impl);
  96       nir_emit_impl(overload->impl);
  97    }
  98
  99    ralloc_free(nir);
 100 }
 101
 102 void
 103 fs_visitor::nir_setup_inputs(nir_shader *shader)
 104 {
 105    fs_reg varying = nir_inputs;
 106
 107    struct hash_entry *entry;
 108    hash_table_foreach(shader->inputs, entry) {
 109       nir_variable *var = (nir_variable *) entry->data;
 110       varying.reg_offset = var->data.driver_location;
 111
 112       fs_reg reg;
 113       if (!strcmp(var->name, "gl_FragCoord")) {
 114          reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
 115                                              var->data.origin_upper_left);
 116          emit_percomp(MOV(varying, reg), 0xF);
 117       } else if (!strcmp(var->name, "gl_FrontFacing")) {
 118          reg = *emit_frontfacing_interpolation();
 119          emit(MOV(retype(varying, BRW_REGISTER_TYPE_UD), reg));
 120       } else {
 121          nir_emit_interpolation(var, &varying);
 122       }
 123    }
 124 }
 125
 126 void
 127 fs_visitor::nir_emit_interpolation(nir_variable *var, fs_reg *varying)
 128 {
 129    brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
 130    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 131    fs_reg reg = *varying;
 132    reg.type = brw_type_for_base_type(var->type->get_scalar_type());
 133
 134    unsigned int array_elements;
 135    const glsl_type *type;
 136
 137    if (var->type->is_array()) {
 138       array_elements = var->type->length;
 139       if (array_elements == 0) {
 140          fail("dereferenced array '%s' has length 0\n", var->name);
 141       }
 142       type = var->type->fields.array;
 143    } else {
 144       array_elements = 1;
 145       type = var->type;
 146    }
 147
 148    glsl_interp_qualifier interpolation_mode =
 149       determine_interpolation_mode(var, key->flat_shade);
 150
 151    int location = var->data.location;
 152    for (unsigned int i = 0; i < array_elements; i++) {
 153       for (unsigned int j = 0; j < type->matrix_columns; j++) {
 154          if (prog_data->urb_setup[location] == -1) {
 155             /* If there's no incoming setup data for this slot, don't
 156              * emit interpolation for it.
 157              */
 158             reg.reg_offset += type->vector_elements;
 159             location++;
 160             continue;
 161          }
 162
 163          if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
 164             /* Constant interpolation (flat shading) case. The SF has
 165              * handed us defined values in only the constant offset
 166              * field of the setup reg.
 167              */
 168             for (unsigned int k = 0; k < type->vector_elements; k++) {
 169                struct brw_reg interp = interp_reg(location, k);
 170                interp = suboffset(interp, 3);
 171                interp.type = reg.type;
 172                emit(FS_OPCODE_CINTERP, reg, fs_reg(interp));
 173                reg.reg_offset++;
 174             }
 175          } else {
 176             /* Smooth/noperspective interpolation case. */
 177             for (unsigned int k = 0; k < type->vector_elements; k++) {
 178                struct brw_reg interp = interp_reg(location, k);
 179                if (brw->needs_unlit_centroid_workaround && var->data.centroid) {
 180                   /* Get the pixel/sample mask into f0 so that we know
 181                    * which pixels are lit.  Then, for each channel that is
 182                    * unlit, replace the centroid data with non-centroid
 183                    * data.
 184                    */
 185                   emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
 186
 187                   fs_inst *inst;
 188                   inst = emit_linterp(reg, fs_reg(interp), interpolation_mode,
 189                                       false, false);
 190                   inst->predicate = BRW_PREDICATE_NORMAL;
 191                   inst->predicate_inverse = true;
 192                   if (brw->has_pln)
 193                      inst->no_dd_clear = true;
 194
 195                   inst = emit_linterp(reg, fs_reg(interp), interpolation_mode,
 196                                       var->data.centroid && !key->persample_shading,
 197                                       var->data.sample || key->persample_shading);
 198                   inst->predicate = BRW_PREDICATE_NORMAL;
 199                   inst->predicate_inverse = false;
 200                   if (brw->has_pln)
 201                      inst->no_dd_check = true;
 202
 203                } else {
 204                   emit_linterp(reg, fs_reg(interp), interpolation_mode,
 205                                var->data.centroid && !key->persample_shading,
 206                                var->data.sample || key->persample_shading);
 207                }
 208                if (brw->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
 209                   emit(BRW_OPCODE_MUL, reg, reg, this->pixel_w);
 210                }
 211               reg.reg_offset++;
 212             }
 213
 214          }
 215          location++;
 216       }
 217    }
 218 }
 219
 220 void
 221 fs_visitor::nir_setup_outputs(nir_shader *shader)
 222 {
 223    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 224    fs_reg reg = nir_outputs;
 225
 226    struct hash_entry *entry;
 227    hash_table_foreach(shader->outputs, entry) {
 228       nir_variable *var = (nir_variable *) entry->data;
 229       reg.reg_offset = var->data.driver_location;
 230
 231       if (var->data.index > 0) {
 232          assert(var->data.location == FRAG_RESULT_DATA0);
 233          assert(var->data.index == 1);
 234          this->dual_src_output = reg;
 235          this->do_dual_src = true;
 236       } else if (var->data.location == FRAG_RESULT_COLOR) {
 237          /* Writing gl_FragColor outputs to all color regions. */
 238          for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
 239             this->outputs[i] = reg;
 240             this->output_components[i] = 4;
 241          }
 242       } else if (var->data.location == FRAG_RESULT_DEPTH) {
 243          this->frag_depth = reg;
 244       } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
 245          this->sample_mask = reg;
 246       } else {
 247          /* gl_FragData or a user-defined FS output */
 248          assert(var->data.location >= FRAG_RESULT_DATA0 &&
 249                 var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
 250
 251          int vector_elements =
 252             var->type->is_array() ? var->type->fields.array->vector_elements
 253                                   : var->type->vector_elements;
 254
 255          /* General color output. */
 256          for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
 257             int output = var->data.location - FRAG_RESULT_DATA0 + i;
 258             this->outputs[output] = reg;
 259             this->outputs[output].reg_offset += vector_elements * i;
 260             this->output_components[output] = vector_elements;
 261          }
 262       }
 263    }
 264 }
 265
 266 void
 267 fs_visitor::nir_setup_uniforms(nir_shader *shader)
 268 {
 269    uniforms = shader->num_uniforms;
 270    param_size[0] = shader->num_uniforms;
 271
 272    if (dispatch_width != 8)
 273       return;
 274
 275    struct hash_entry *entry;
 276    hash_table_foreach(shader->uniforms, entry) {
 277       nir_variable *var = (nir_variable *) entry->data;
 278
 279       /* UBO's and atomics don't take up space in the uniform file */
 280
 281       if (var->interface_type != NULL || var->type->contains_atomic())
 282          continue;
 283
 284       if (strncmp(var->name, "gl_", 3) == 0)
 285          nir_setup_builtin_uniform(var);
 286       else
 287          nir_setup_uniform(var);
 288    }
 289 }
 290
 291 void
 292 fs_visitor::nir_setup_uniform(nir_variable *var)
 293 {
 294    int namelen = strlen(var->name);
 295
 296    /* The data for our (non-builtin) uniforms is stored in a series of
 297       * gl_uniform_driver_storage structs for each subcomponent that
 298       * glGetUniformLocation() could name.  We know it's been set up in the
 299       * same order we'd walk the type, so walk the list of storage and find
 300       * anything with our name, or the prefix of a component that starts with
 301       * our name.
 302       */
 303    unsigned index = var->data.driver_location;
 304    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
 305       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 306
 307       if (strncmp(var->name, storage->name, namelen) != 0 ||
 308          (storage->name[namelen] != 0 &&
 309          storage->name[namelen] != '.' &&
 310          storage->name[namelen] != '[')) {
 311          continue;
 312       }
 313
 314       unsigned slots = storage->type->component_slots();
 315       if (storage->array_elements)
 316          slots *= storage->array_elements;
 317
 318       for (unsigned i = 0; i < slots; i++) {
 319          stage_prog_data->param[index++] = &storage->storage[i];
 320       }
 321    }
 322
 323    /* Make sure we actually initialized the right amount of stuff here. */
 324    assert(var->data.driver_location + var->type->component_slots() == index);
 325 }
 326
 327 void
 328 fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
 329 {
 330    const nir_state_slot *const slots = var->state_slots;
 331    assert(var->state_slots != NULL);
 332
 333    unsigned uniform_index = var->data.driver_location;
 334    for (unsigned int i = 0; i < var->num_state_slots; i++) {
 335       /* This state reference has already been setup by ir_to_mesa, but we'll
 336        * get the same index back here.
 337        */
 338       int index = _mesa_add_state_reference(this->prog->Parameters,
 339                                             (gl_state_index *)slots[i].tokens);
 340
 341       /* Add each of the unique swizzles of the element as a parameter.
 342        * This'll end up matching the expected layout of the
 343        * array/matrix/structure we're trying to fill in.
 344        */
 345       int last_swiz = -1;
 346       for (unsigned int j = 0; j < 4; j++) {
 347          int swiz = GET_SWZ(slots[i].swizzle, j);
 348          if (swiz == last_swiz)
 349             break;
 350          last_swiz = swiz;
 351
 352          stage_prog_data->param[uniform_index++] =
 353             &prog->Parameters->ParameterValues[index][swiz];
 354       }
 355    }
 356 }
 357
 358 void
 359 fs_visitor::nir_setup_registers(exec_list *list)
 360 {
 361    foreach_list_typed(nir_register, nir_reg, node, list) {
 362       unsigned array_elems =
 363          nir_reg->num_array_elems == 0 ? 1 : nir_reg->num_array_elems;
 364       unsigned size = array_elems * nir_reg->num_components;
 365       fs_reg *reg = new(mem_ctx) fs_reg(GRF, virtual_grf_alloc(size));
 366       _mesa_hash_table_insert(this->nir_reg_ht, nir_reg, reg);
 367    }
 368 }
 369
 370 void
 371 fs_visitor::nir_emit_impl(nir_function_impl *impl)
 372 {
 373    nir_setup_registers(&impl->registers);
 374    nir_emit_cf_list(&impl->body);
 375 }
 376
 377 void
 378 fs_visitor::nir_emit_cf_list(exec_list *list)
 379 {
 380    foreach_list_typed(nir_cf_node, node, node, list) {
 381       switch (node->type) {
 382       case nir_cf_node_if:
 383          nir_emit_if(nir_cf_node_as_if(node));
 384          break;
 385
 386       case nir_cf_node_loop:
 387          nir_emit_loop(nir_cf_node_as_loop(node));
 388          break;
 389
 390       case nir_cf_node_block:
 391          nir_emit_block(nir_cf_node_as_block(node));
 392          break;
 393
 394       default:
 395          unreachable("Invalid CFG node block");
 396       }
 397    }
 398 }
 399
 400 void
 401 fs_visitor::nir_emit_if(nir_if *if_stmt)
 402 {
 403    if (brw->gen < 6) {
 404       no16("Can't support (non-uniform) control flow on SIMD16\n");
 405    }
 406
 407    /* first, put the condition into f0 */
 408    fs_inst *inst = emit(MOV(reg_null_d,
 409                             retype(get_nir_src(if_stmt->condition),
 410                                    BRW_REGISTER_TYPE_UD)));
 411    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 412
 413    emit(IF(BRW_PREDICATE_NORMAL));
 414
 415    nir_emit_cf_list(&if_stmt->then_list);
 416
 417    /* note: if the else is empty, dead CF elimination will remove it */
 418    emit(BRW_OPCODE_ELSE);
 419
 420    nir_emit_cf_list(&if_stmt->else_list);
 421
 422    emit(BRW_OPCODE_ENDIF);
 423
 424    try_replace_with_sel();
 425 }
 426
 427 void
 428 fs_visitor::nir_emit_loop(nir_loop *loop)
 429 {
 430    if (brw->gen < 6) {
 431       no16("Can't support (non-uniform) control flow on SIMD16\n");
 432    }
 433
 434    emit(BRW_OPCODE_DO);
 435
 436    nir_emit_cf_list(&loop->body);
 437
 438    emit(BRW_OPCODE_WHILE);
 439 }
 440
 441 void
 442 fs_visitor::nir_emit_block(nir_block *block)
 443 {
 444    nir_foreach_instr(block, instr) {
 445       nir_emit_instr(instr);
 446    }
 447 }
 448
 449 void
 450 fs_visitor::nir_emit_instr(nir_instr *instr)
 451 {
 452    switch (instr->type) {
 453    case nir_instr_type_alu:
 454       nir_emit_alu(nir_instr_as_alu(instr));
 455       break;
 456
 457    case nir_instr_type_intrinsic:
 458       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
 459       break;
 460
 461    case nir_instr_type_texture:
 462       nir_emit_texture(nir_instr_as_texture(instr));
 463       break;
 464
 465    case nir_instr_type_load_const:
 466       nir_emit_load_const(nir_instr_as_load_const(instr));
 467       break;
 468
 469    case nir_instr_type_jump:
 470       nir_emit_jump(nir_instr_as_jump(instr));
 471       break;
 472
 473    default:
 474       unreachable("unknown instruction type");
 475    }
 476 }
 477
 478 static brw_reg_type
 479 brw_type_for_nir_type(nir_alu_type type)
 480 {
 481    switch (type) {
 482    case nir_type_bool:
 483    case nir_type_unsigned:
 484       return BRW_REGISTER_TYPE_UD;
 485    case nir_type_int:
 486       return BRW_REGISTER_TYPE_D;
 487    case nir_type_float:
 488       return BRW_REGISTER_TYPE_F;
 489    default:
 490       unreachable("unknown type");
 491    }
 492
 493    return BRW_REGISTER_TYPE_F;
 494 }
 495
 496 void
 497 fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 498 {
 499    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
 500
 501    fs_reg op[3];
 502    fs_reg dest = retype(get_nir_dest(instr->dest.dest),
 503                         brw_type_for_nir_type(nir_op_infos[instr->op].output_type));
 504
 505    fs_reg result;
 506    if (instr->has_predicate) {
 507       result = fs_reg(GRF, virtual_grf_alloc(4));
 508       result.type = dest.type;
 509    } else {
 510       result = dest;
 511    }
 512
 513
 514    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
 515       op[i] = retype(get_nir_alu_src(instr, i),
 516                      brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]));
 517    }
 518
 519    switch (instr->op) {
 520    case nir_op_fmov:
 521    case nir_op_i2f:
 522    case nir_op_u2f: {
 523       fs_inst *inst = MOV(result, op[0]);
 524       inst->saturate = instr->dest.saturate;
 525       emit_percomp(inst, instr->dest.write_mask);
 526    }
 527       break;
 528
 529    case nir_op_imov:
 530    case nir_op_f2i:
 531    case nir_op_f2u:
 532       emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
 533       break;
 534
 535    case nir_op_fsign: {
 536       /* AND(val, 0x80000000) gives the sign bit.
 537          *
 538          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
 539          * zero.
 540          */
 541       emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
 542                    instr->dest.write_mask);
 543
 544       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
 545       op[0].type = BRW_REGISTER_TYPE_UD;
 546       result.type = BRW_REGISTER_TYPE_UD;
 547       emit_percomp(AND(result_int, op[0], fs_reg(0x80000000u)),
 548                    instr->dest.write_mask);
 549
 550       fs_inst *inst = OR(result_int, result_int, fs_reg(0x3f800000u));
 551       inst->predicate = BRW_PREDICATE_NORMAL;
 552       emit_percomp(inst, instr->dest.write_mask);
 553       if (instr->dest.saturate) {
 554          fs_inst *inst = MOV(result, result);
 555          inst->saturate = true;
 556          emit_percomp(inst, instr->dest.write_mask);
 557       }
 558       break;
 559    }
 560
 561    case nir_op_isign: {
 562       /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
 563          *               -> non-negative val generates 0x00000000.
 564          *  Predicated OR sets 1 if val is positive.
 565          */
 566       emit_percomp(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G),
 567                    instr->dest.write_mask);
 568
 569       emit_percomp(ASR(result, op[0], fs_reg(31)), instr->dest.write_mask);
 570
 571       fs_inst *inst = OR(result, result, fs_reg(1));
 572       inst->predicate = BRW_PREDICATE_NORMAL;
 573       emit_percomp(inst, instr->dest.write_mask);
 574       break;
 575    }
 576
 577    case nir_op_frcp:
 578       emit_math_percomp(SHADER_OPCODE_RCP, result, op[0],
 579                         instr->dest.write_mask, instr->dest.saturate);
 580       break;
 581
 582    case nir_op_fexp2:
 583       emit_math_percomp(SHADER_OPCODE_EXP2, result, op[0],
 584                         instr->dest.write_mask, instr->dest.saturate);
 585       break;
 586
 587    case nir_op_flog2:
 588       emit_math_percomp(SHADER_OPCODE_LOG2, result, op[0],
 589                         instr->dest.write_mask, instr->dest.saturate);
 590       break;
 591
 592    case nir_op_fexp:
 593    case nir_op_flog:
 594       unreachable("not reached: should be handled by ir_explog_to_explog2");
 595
 596    case nir_op_fsin:
 597    case nir_op_fsin_reduced:
 598       emit_math_percomp(SHADER_OPCODE_SIN, result, op[0],
 599                         instr->dest.write_mask, instr->dest.saturate);
 600       break;
 601
 602    case nir_op_fcos:
 603    case nir_op_fcos_reduced:
 604       emit_math_percomp(SHADER_OPCODE_COS, result, op[0],
 605                         instr->dest.write_mask, instr->dest.saturate);
 606       break;
 607
 608    case nir_op_fddx:
 609       if (fs_key->high_quality_derivatives)
 610          emit_percomp(FS_OPCODE_DDX_FINE, result, op[0],
 611                       instr->dest.write_mask, instr->dest.saturate);
 612       else
 613          emit_percomp(FS_OPCODE_DDX_COARSE, result, op[0],
 614                       instr->dest.write_mask, instr->dest.saturate);
 615       break;
 616    case nir_op_fddy:
 617       if (fs_key->high_quality_derivatives)
 618          emit_percomp(FS_OPCODE_DDY_FINE, result, op[0],
 619                       fs_reg(fs_key->render_to_fbo),
 620                       instr->dest.write_mask, instr->dest.saturate);
 621       else
 622          emit_percomp(FS_OPCODE_DDY_COARSE, result, op[0],
 623                       fs_reg(fs_key->render_to_fbo),
 624                       instr->dest.write_mask, instr->dest.saturate);
 625       break;
 626
 627    case nir_op_fadd:
 628    case nir_op_iadd: {
 629       fs_inst *inst = ADD(result, op[0], op[1]);
 630       inst->saturate = instr->dest.saturate;
 631       emit_percomp(inst, instr->dest.write_mask);
 632       break;
 633    }
 634
 635    case nir_op_fmul: {
 636       fs_inst *inst = MUL(result, op[0], op[1]);
 637       inst->saturate = instr->dest.saturate;
 638       emit_percomp(MUL(result, op[0], op[1]), instr->dest.write_mask);
 639       break;
 640    }
 641
 642    case nir_op_imul: {
 643       /* TODO put in the 16-bit constant optimization once we have SSA */
 644
 645       if (brw->gen >= 7)
 646          no16("SIMD16 explicit accumulator operands unsupported\n");
 647
 648       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 649
 650       emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
 651       emit_percomp(MACH(reg_null_d, op[0], op[1]), instr->dest.write_mask);
 652       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 653       break;
 654    }
 655
 656    case nir_op_imul_high:
 657    case nir_op_umul_high: {
 658       if (brw->gen >= 7)
 659          no16("SIMD16 explicit accumulator operands unsupported\n");
 660
 661       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 662
 663       emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
 664       emit_percomp(MACH(result, op[0], op[1]), instr->dest.write_mask);
 665       break;
 666    }
 667
 668    case nir_op_idiv:
 669    case nir_op_udiv:
 670       emit_math_percomp(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1],
 671                         instr->dest.write_mask);
 672       break;
 673
 674    case nir_op_uadd_carry: {
 675       if (brw->gen >= 7)
 676          no16("SIMD16 explicit accumulator operands unsupported\n");
 677
 678       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 679                                   BRW_REGISTER_TYPE_UD);
 680
 681       emit_percomp(ADDC(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
 682       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 683       break;
 684    }
 685
 686    case nir_op_usub_borrow: {
 687       if (brw->gen >= 7)
 688          no16("SIMD16 explicit accumulator operands unsupported\n");
 689
 690       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 691                                   BRW_REGISTER_TYPE_UD);
 692
 693       emit_percomp(SUBB(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
 694       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 695       break;
 696    }
 697
 698    case nir_op_umod:
 699       emit_math_percomp(SHADER_OPCODE_INT_REMAINDER, result, op[0],
 700                         op[1], instr->dest.write_mask);
 701       break;
 702
 703    case nir_op_flt:
 704    case nir_op_ilt:
 705    case nir_op_ult:
 706       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_L),
 707                    instr->dest.write_mask);
 708       break;
 709
 710    case nir_op_fge:
 711    case nir_op_ige:
 712    case nir_op_uge:
 713       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE),
 714                    instr->dest.write_mask);
 715       break;
 716
 717    case nir_op_feq:
 718    case nir_op_ieq:
 719       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z),
 720                    instr->dest.write_mask);
 721       break;
 722
 723    case nir_op_fne:
 724    case nir_op_ine:
 725       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ),
 726                    instr->dest.write_mask);
 727       break;
 728
 729    case nir_op_ball_fequal2:
 730    case nir_op_ball_iequal2:
 731    case nir_op_ball_fequal3:
 732    case nir_op_ball_iequal3:
 733    case nir_op_ball_fequal4:
 734    case nir_op_ball_iequal4: {
 735       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 736       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 737       emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
 738                    (1 << num_components) - 1);
 739       emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
 740       break;
 741    }
 742
 743    case nir_op_bany_fnequal2:
 744    case nir_op_bany_inequal2:
 745    case nir_op_bany_fnequal3:
 746    case nir_op_bany_inequal3:
 747    case nir_op_bany_fnequal4:
 748    case nir_op_bany_inequal4: {
 749       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 750       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 751       temp.type = BRW_REGISTER_TYPE_UD;
 752       emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_NZ),
 753                    (1 << num_components) - 1);
 754       emit_reduction(BRW_OPCODE_OR, result, temp, num_components);
 755       break;
 756    }
 757
 758    case nir_op_inot:
 759       emit_percomp(NOT(result, op[0]), instr->dest.write_mask);
 760       break;
 761    case nir_op_ixor:
 762       emit_percomp(XOR(result, op[0], op[1]), instr->dest.write_mask);
 763       break;
 764    case nir_op_ior:
 765       emit_percomp(OR(result, op[0], op[1]), instr->dest.write_mask);
 766       break;
 767    case nir_op_iand:
 768       emit_percomp(AND(result, op[0], op[1]), instr->dest.write_mask);
 769       break;
 770
 771    case nir_op_fdot2:
 772    case nir_op_fdot3:
 773    case nir_op_fdot4: {
 774       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 775       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 776       emit_percomp(MUL(temp, op[0], op[1]), (1 << num_components) - 1);
 777       emit_reduction(BRW_OPCODE_ADD, result, temp, num_components);
 778       if (instr->dest.saturate) {
 779          fs_inst *inst = emit(MOV(result, result));
 780          inst->saturate = true;
 781       }
 782       break;
 783    }
 784
 785    case nir_op_bany2:
 786    case nir_op_bany3:
 787    case nir_op_bany4: {
 788       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 789       emit_reduction(BRW_OPCODE_OR, result, op[0], num_components);
 790       break;
 791    }
 792
 793    case nir_op_ball2:
 794    case nir_op_ball3:
 795    case nir_op_ball4: {
 796       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 797       emit_reduction(BRW_OPCODE_AND, result, op[0], num_components);
 798       break;
 799    }
 800
 801    case nir_op_fnoise1_1:
 802    case nir_op_fnoise1_2:
 803    case nir_op_fnoise1_3:
 804    case nir_op_fnoise1_4:
 805    case nir_op_fnoise2_1:
 806    case nir_op_fnoise2_2:
 807    case nir_op_fnoise2_3:
 808    case nir_op_fnoise2_4:
 809    case nir_op_fnoise3_1:
 810    case nir_op_fnoise3_2:
 811    case nir_op_fnoise3_3:
 812    case nir_op_fnoise3_4:
 813    case nir_op_fnoise4_1:
 814    case nir_op_fnoise4_2:
 815    case nir_op_fnoise4_3:
 816    case nir_op_fnoise4_4:
 817       unreachable("not reached: should be handled by lower_noise");
 818
 819    case nir_op_vec2:
 820    case nir_op_vec3:
 821    case nir_op_vec4:
 822       unreachable("not reached: should be handled by lower_quadop_vector");
 823
 824    case nir_op_ldexp:
 825       unreachable("not reached: should be handled by ldexp_to_arith()");
 826
 827    case nir_op_fsqrt:
 828       emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
 829                         instr->dest.write_mask, instr->dest.saturate);
 830       break;
 831
 832    case nir_op_frsq:
 833       emit_math_percomp(SHADER_OPCODE_RSQ, result, op[0],
 834                         instr->dest.write_mask, instr->dest.saturate);
 835       break;
 836
 837    case nir_op_b2i:
 838       emit_percomp(AND(result, op[0], fs_reg(1)), instr->dest.write_mask);
 839       break;
 840    case nir_op_b2f: {
 841       emit_percomp(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0],
 842                        fs_reg(0x3f800000u)),
 843                    instr->dest.write_mask);
 844       break;
 845    }
 846
 847    case nir_op_f2b:
 848       emit_percomp(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
 849                    instr->dest.write_mask);
 850       break;
 851    case nir_op_i2b:
 852       emit_percomp(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ),
 853                    instr->dest.write_mask);
 854       break;
 855
 856    case nir_op_ftrunc: {
 857       fs_inst *inst = RNDZ(result, op[0]);
 858       inst->saturate = instr->dest.saturate;
 859       emit_percomp(inst, instr->dest.write_mask);
 860       break;
 861    }
 862    case nir_op_fceil: {
 863       op[0].negate = !op[0].negate;
 864       fs_reg temp = fs_reg(this, glsl_type::vec4_type);
 865       emit_percomp(RNDD(temp, op[0]), instr->dest.write_mask);
 866       temp.negate = true;
 867       fs_inst *inst = MOV(result, temp);
 868       inst->saturate = instr->dest.saturate;
 869       emit_percomp(inst, instr->dest.write_mask);
 870       break;
 871    }
 872    case nir_op_ffloor: {
 873       fs_inst *inst = RNDD(result, op[0]);
 874       inst->saturate = instr->dest.saturate;
 875       emit_percomp(inst, instr->dest.write_mask);
 876       break;
 877    }
 878    case nir_op_ffract: {
 879       fs_inst *inst = FRC(result, op[0]);
 880       inst->saturate = instr->dest.saturate;
 881       emit_percomp(inst, instr->dest.write_mask);
 882       break;
 883    }
 884    case nir_op_fround_even: {
 885       fs_inst *inst = RNDE(result, op[0]);
 886       inst->saturate = instr->dest.saturate;
 887       emit_percomp(inst, instr->dest.write_mask);
 888       break;
 889    }
 890
 891    case nir_op_fmin:
 892    case nir_op_imin:
 893    case nir_op_umin:
 894       if (brw->gen >= 6) {
 895          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 896                       instr->dest.write_mask, instr->dest.saturate,
 897                       BRW_PREDICATE_NONE, BRW_CONDITIONAL_L);
 898       } else {
 899          emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L),
 900                       instr->dest.write_mask);
 901
 902          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 903                       instr->dest.write_mask, instr->dest.saturate,
 904                       BRW_PREDICATE_NORMAL);
 905       }
 906       break;
 907
 908    case nir_op_fmax:
 909    case nir_op_imax:
 910    case nir_op_umax:
 911       if (brw->gen >= 6) {
 912          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 913                       instr->dest.write_mask, instr->dest.saturate,
 914                       BRW_PREDICATE_NONE, BRW_CONDITIONAL_GE);
 915       } else {
 916          emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE),
 917                       instr->dest.write_mask);
 918
 919          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 920                       instr->dest.write_mask, instr->dest.saturate,
 921                       BRW_PREDICATE_NORMAL);
 922       }
 923       break;
 924
 925    case nir_op_pack_snorm_2x16:
 926    case nir_op_pack_snorm_4x8:
 927    case nir_op_pack_unorm_2x16:
 928    case nir_op_pack_unorm_4x8:
 929    case nir_op_unpack_snorm_2x16:
 930    case nir_op_unpack_snorm_4x8:
 931    case nir_op_unpack_unorm_2x16:
 932    case nir_op_unpack_unorm_4x8:
 933    case nir_op_unpack_half_2x16:
 934    case nir_op_pack_half_2x16:
 935       unreachable("not reached: should be handled by lower_packing_builtins");
 936
 937    case nir_op_unpack_half_2x16_split_x:
 938       emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0],
 939                    instr->dest.write_mask, instr->dest.saturate);
 940       break;
 941    case nir_op_unpack_half_2x16_split_y:
 942       emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0],
 943            instr->dest.write_mask, instr->dest.saturate);
 944       break;
 945
 946    case nir_op_fpow:
 947       emit_percomp(SHADER_OPCODE_POW, result, op[0], op[1],
 948                    instr->dest.write_mask, instr->dest.saturate);
 949       break;
 950
 951    case nir_op_bitfield_reverse:
 952       emit_percomp(BFREV(result, op[0]), instr->dest.write_mask);
 953       break;
 954
 955    case nir_op_bit_count:
 956       emit_percomp(CBIT(result, op[0]), instr->dest.write_mask);
 957       break;
 958
 959    case nir_op_find_msb: {
 960       fs_reg temp = fs_reg(this, glsl_type::uvec4_type);
 961       emit_percomp(FBH(temp, op[0]), instr->dest.write_mask);
 962
 963       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
 964        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
 965        * subtract the result from 31 to convert the MSB count into an LSB count.
 966        */
 967
 968       emit_percomp(CMP(reg_null_d, temp, fs_reg(~0), BRW_CONDITIONAL_NZ),
 969                    instr->dest.write_mask);
 970       temp.negate = true;
 971       fs_inst *inst = ADD(result, temp, fs_reg(31));
 972       inst->predicate = BRW_PREDICATE_NORMAL;
 973       emit_percomp(inst, instr->dest.write_mask);
 974       break;
 975    }
 976
 977    case nir_op_find_lsb:
 978       emit_percomp(FBL(result, op[0]), instr->dest.write_mask);
 979       break;
 980
 981    case nir_op_ubitfield_extract:
 982    case nir_op_ibitfield_extract:
 983       emit_percomp(BFE(result, op[2], op[1], op[0]), instr->dest.write_mask);
 984       break;
 985    case nir_op_bfm:
 986       emit_percomp(BFI1(result, op[0], op[1]), instr->dest.write_mask);
 987       break;
 988    case nir_op_bfi:
 989       emit_percomp(BFI2(result, op[0], op[1], op[2]), instr->dest.write_mask);
 990       break;
 991
 992    case nir_op_bitfield_insert:
 993       unreachable("not reached: should be handled by "
 994                   "lower_instructions::bitfield_insert_to_bfm_bfi");
 995
 996    case nir_op_ishl:
 997       emit_percomp(SHL(result, op[0], op[1]), instr->dest.write_mask);
 998       break;
 999    case nir_op_ishr:
1000       emit_percomp(ASR(result, op[0], op[1]), instr->dest.write_mask);
1001       break;
1002    case nir_op_ushr:
1003       emit_percomp(SHR(result, op[0], op[1]), instr->dest.write_mask);
1004       break;
1005
1006    case nir_op_pack_half_2x16_split:
1007       emit_percomp(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1],
1008                    instr->dest.write_mask);
1009       break;
1010
1011    case nir_op_ffma:
1012       emit_percomp(MAD(result, op[2], op[1], op[0]), instr->dest.write_mask);
1013       break;
1014
1015    case nir_op_flrp:
1016       /* TODO emulate for gen < 6 */
1017       emit_percomp(LRP(result, op[2], op[1], op[0]), instr->dest.write_mask);
1018       break;
1019
1020    case nir_op_bcsel:
1021       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1022       emit_percomp(BRW_OPCODE_SEL, result, op[1], op[2],
1023                    instr->dest.write_mask, false, BRW_PREDICATE_NORMAL);
1024       break;
1025
1026    default:
1027       unreachable("unhandled instruction");
1028    }
1029
1030    /* emit a predicated move if there was predication */
1031    if (instr->has_predicate) {
1032       fs_inst *inst = emit(MOV(reg_null_d,
1033                                retype(get_nir_src(instr->predicate),
1034                                    BRW_REGISTER_TYPE_UD)));
1035       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1036       inst = MOV(dest, result);
1037       inst->predicate = BRW_PREDICATE_NORMAL;
1038       emit_percomp(inst, instr->dest.write_mask);
1039    }
1040 }
1041
1042 fs_reg
1043 fs_visitor::get_nir_src(nir_src src)
1044 {
1045    struct hash_entry *entry =
1046       _mesa_hash_table_search(this->nir_reg_ht, src.reg.reg);
1047    fs_reg reg = *((fs_reg *) entry->data);
1048    /* to avoid floating-point denorm flushing problems, set the type by
1049     * default to D - instructions that need floating point semantics will set
1050     * this to F if they need to
1051     */
1052    reg.type = BRW_REGISTER_TYPE_D;
1053    reg.reg_offset = src.reg.base_offset;
1054    if (src.reg.indirect) {
1055       reg.reladdr = new(mem_ctx) fs_reg();
1056       *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
1057                             BRW_REGISTER_TYPE_D);
1058    }
1059
1060    return reg;
1061 }
1062
1063 fs_reg
1064 fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
1065 {
1066    fs_reg reg = get_nir_src(instr->src[src].src);
1067
1068    reg.abs = instr->src[src].abs;
1069    reg.negate = instr->src[src].negate;
1070
1071    bool needs_swizzle = false;
1072    unsigned num_components = 0;
1073    for (unsigned i = 0; i < 4; i++) {
1074       if (!nir_alu_instr_channel_used(instr, src, i))
1075          continue;
1076
1077       if (instr->src[src].swizzle[i] != i)
1078          needs_swizzle = true;
1079
1080       num_components = i + 1;
1081    }
1082
1083    if (needs_swizzle) {
1084       /* resolve the swizzle through MOV's */
1085       fs_reg new_reg = fs_reg(GRF, virtual_grf_alloc(num_components));
1086
1087       for (unsigned i = 0; i < 4; i++) {
1088          if (!nir_alu_instr_channel_used(instr, src, i))
1089             continue;
1090
1091          fs_reg dest = new_reg;
1092          dest.type = reg.type;
1093          dest.reg_offset = i;
1094
1095          fs_reg src0 = reg;
1096          src0.reg_offset += instr->src[src].swizzle[i];
1097
1098          emit(MOV(dest, src0));
1099       }
1100
1101       return new_reg;
1102    }
1103
1104    return reg;
1105 }
1106
1107 fs_reg
1108 fs_visitor::get_nir_dest(nir_dest dest)
1109 {
1110    struct hash_entry *entry =
1111       _mesa_hash_table_search(this->nir_reg_ht, dest.reg.reg);
1112    fs_reg reg = *((fs_reg *) entry->data);
1113    reg.reg_offset = dest.reg.base_offset;
1114    if (dest.reg.indirect) {
1115       reg.reladdr = new(mem_ctx) fs_reg();
1116       *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
1117                             BRW_REGISTER_TYPE_D);
1118    }
1119
1120    return reg;
1121 }
1122
1123 void
1124 fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
1125 {
1126    for (unsigned i = 0; i < 4; i++) {
1127       if (!((wr_mask >> i) & 1))
1128          continue;
1129
1130       fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
1131       new_inst->dst.reg_offset += i;
1132       for (unsigned j = 0; j < new_inst->sources; j++)
1133          if (inst->src[j].file == GRF)
1134             new_inst->src[j].reg_offset += i;
1135
1136       emit(new_inst);
1137    }
1138 }
1139
1140 void
1141 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1142                          unsigned wr_mask, bool saturate,
1143                          enum brw_predicate predicate,
1144                          enum brw_conditional_mod mod)
1145 {
1146    for (unsigned i = 0; i < 4; i++) {
1147       if (!((wr_mask >> i) & 1))
1148          continue;
1149
1150       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0);
1151       new_inst->dst.reg_offset += i;
1152       for (unsigned j = 0; j < new_inst->sources; j++)
1153          if (new_inst->src[j].file == GRF)
1154             new_inst->src[j].reg_offset += i;
1155
1156       new_inst->predicate = predicate;
1157       new_inst->conditional_mod = mod;
1158       new_inst->saturate = saturate;
1159       emit(new_inst);
1160    }
1161 }
1162
1163 void
1164 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
1165                          unsigned wr_mask, bool saturate,
1166                          enum brw_predicate predicate,
1167                          enum brw_conditional_mod mod)
1168 {
1169    for (unsigned i = 0; i < 4; i++) {
1170       if (!((wr_mask >> i) & 1))
1171          continue;
1172
1173       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1);
1174       new_inst->dst.reg_offset += i;
1175       for (unsigned j = 0; j < new_inst->sources; j++)
1176          if (new_inst->src[j].file == GRF)
1177             new_inst->src[j].reg_offset += i;
1178
1179       new_inst->predicate = predicate;
1180       new_inst->conditional_mod = mod;
1181       new_inst->saturate = saturate;
1182       emit(new_inst);
1183    }
1184 }
1185
1186 void
1187 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1188                               unsigned wr_mask, bool saturate)
1189 {
1190    for (unsigned i = 0; i < 4; i++) {
1191       if (!((wr_mask >> i) & 1))
1192          continue;
1193
1194       fs_reg new_dest = dest;
1195       new_dest.reg_offset += i;
1196       fs_reg new_src0 = src0;
1197       if (src0.file == GRF)
1198          new_src0.reg_offset += i;
1199
1200       fs_inst *new_inst = emit_math(op, new_dest, new_src0);
1201       new_inst->saturate = saturate;
1202    }
1203 }
1204
1205 void
1206 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1207                               fs_reg src1, unsigned wr_mask,
1208                               bool saturate)
1209 {
1210    for (unsigned i = 0; i < 4; i++) {
1211       if (!((wr_mask >> i) & 1))
1212          continue;
1213
1214       fs_reg new_dest = dest;
1215       new_dest.reg_offset += i;
1216       fs_reg new_src0 = src0;
1217       if (src0.file == GRF)
1218          new_src0.reg_offset += i;
1219       fs_reg new_src1 = src1;
1220       if (src1.file == GRF)
1221          new_src1.reg_offset += i;
1222
1223       fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1);
1224       new_inst->saturate = saturate;
1225    }
1226 }
1227
1228 void
1229 fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
1230                            unsigned num_components)
1231 {
1232    fs_reg src0 = src;
1233    fs_reg src1 = src;
1234    src1.reg_offset++;
1235
1236    if (num_components == 2) {
1237       emit(op, dest, src0, src1);
1238       return;
1239    }
1240
1241    fs_reg temp1 = fs_reg(GRF, virtual_grf_alloc(1));
1242    temp1.type = src.type;
1243    emit(op, temp1, src0, src1);
1244
1245    fs_reg src2 = src;
1246    src2.reg_offset += 2;
1247
1248    if (num_components == 3) {
1249       emit(op, dest, temp1, src2);
1250       return;
1251    }
1252
1253    assert(num_components == 4);
1254
1255    fs_reg src3 = src;
1256    src3.reg_offset += 3;
1257    fs_reg temp2 = fs_reg(GRF, virtual_grf_alloc(1));
1258    temp2.type = src.type;
1259
1260    emit(op, temp2, src2, src3);
1261    emit(op, dest, temp1, temp2);
1262 }
1263
1264 void
1265 fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
1266 {
1267    fs_reg dest;
1268    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1269       dest = get_nir_dest(instr->dest);
1270    if (instr->has_predicate) {
1271       fs_inst *inst = emit(MOV(reg_null_d,
1272                                retype(get_nir_src(instr->predicate),
1273                                       BRW_REGISTER_TYPE_UD)));
1274       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1275    }
1276
1277    switch (instr->intrinsic) {
1278    case nir_intrinsic_discard: {
1279       /* We track our discarded pixels in f0.1.  By predicating on it, we can
1280        * update just the flag bits that aren't yet discarded.  By emitting a
1281        * CMP of g0 != g0, all our currently executing channels will get turned
1282        * off.
1283        */
1284       fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
1285                                     BRW_REGISTER_TYPE_UW));
1286       fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
1287                               BRW_CONDITIONAL_NZ));
1288       cmp->predicate = BRW_PREDICATE_NORMAL;
1289       cmp->flag_subreg = 1;
1290
1291       if (brw->gen >= 6) {
1292          /* For performance, after a discard, jump to the end of the shader.
1293          * Only jump if all relevant channels have been discarded.
1294          */
1295          fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
1296          discard_jump->flag_subreg = 1;
1297
1298          discard_jump->predicate = (dispatch_width == 8)
1299                                  ? BRW_PREDICATE_ALIGN1_ANY8H
1300                                  : BRW_PREDICATE_ALIGN1_ANY16H;
1301          discard_jump->predicate_inverse = true;
1302       }
1303
1304       break;
1305    }
1306
1307    case nir_intrinsic_atomic_counter_inc:
1308    case nir_intrinsic_atomic_counter_dec:
1309    case nir_intrinsic_atomic_counter_read:
1310       assert(!"TODO");
1311
1312
1313    case nir_intrinsic_load_front_face:
1314       assert(!"TODO");
1315
1316    case nir_intrinsic_load_sample_mask_in: {
1317       assert(brw->gen >= 7);
1318       fs_reg reg = fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
1319                           BRW_REGISTER_TYPE_D));
1320       dest.type = reg.type;
1321       fs_inst *inst = MOV(dest, reg);
1322       if (instr->has_predicate)
1323          inst->predicate = BRW_PREDICATE_NORMAL;
1324       emit(inst);
1325       break;
1326    }
1327
1328    case nir_intrinsic_load_sample_pos:
1329    case nir_intrinsic_load_sample_id:
1330       assert(!"TODO");
1331
1332    case nir_intrinsic_load_uniform_vec1:
1333    case nir_intrinsic_load_uniform_vec2:
1334    case nir_intrinsic_load_uniform_vec3:
1335    case nir_intrinsic_load_uniform_vec4: {
1336       unsigned index = 0;
1337       for (int i = 0; i < instr->const_index[1]; i++) {
1338          for (unsigned j = 0;
1339             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1340             fs_reg src = nir_uniforms;
1341             src.reg_offset = instr->const_index[0] + index;
1342             src.type = dest.type;
1343             index++;
1344
1345             fs_inst *inst = MOV(dest, src);
1346             if (instr->has_predicate)
1347                inst->predicate = BRW_PREDICATE_NORMAL;
1348             emit(inst);
1349             dest.reg_offset++;
1350          }
1351       }
1352       break;
1353    }
1354
1355    case nir_intrinsic_load_uniform_vec1_indirect:
1356    case nir_intrinsic_load_uniform_vec2_indirect:
1357    case nir_intrinsic_load_uniform_vec3_indirect:
1358    case nir_intrinsic_load_uniform_vec4_indirect: {
1359       unsigned index = 0;
1360       for (int i = 0; i < instr->const_index[1]; i++) {
1361          for (unsigned j = 0;
1362             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1363             fs_reg src = nir_uniforms;
1364             src.reg_offset = instr->const_index[0] + index;
1365             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1366             src.reladdr->type = BRW_REGISTER_TYPE_D;
1367             src.type = dest.type;
1368             index++;
1369
1370             fs_inst *inst = MOV(dest, src);
1371             if (instr->has_predicate)
1372                inst->predicate = BRW_PREDICATE_NORMAL;
1373             emit(inst);
1374             dest.reg_offset++;
1375          }
1376       }
1377       break;
1378    }
1379
1380    case nir_intrinsic_load_ubo_vec1:
1381    case nir_intrinsic_load_ubo_vec2:
1382    case nir_intrinsic_load_ubo_vec3:
1383    case nir_intrinsic_load_ubo_vec4: {
1384       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1385                                  (unsigned) instr->const_index[0]);
1386       fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
1387       packed_consts.type = dest.type;
1388
1389       fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
1390       emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
1391                                 packed_consts, surf_index, const_offset_reg));
1392
1393       for (unsigned i = 0;
1394            i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1395          packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
1396
1397          /* The std140 packing rules don't allow vectors to cross 16-byte
1398           * boundaries, and a reg is 32 bytes.
1399           */
1400          assert(packed_consts.subreg_offset < 32);
1401
1402          fs_inst *inst = MOV(dest, packed_consts);
1403          if (instr->has_predicate)
1404                inst->predicate = BRW_PREDICATE_NORMAL;
1405          emit(inst);
1406
1407          dest.reg_offset++;
1408       }
1409       break;
1410    }
1411
1412    case nir_intrinsic_load_ubo_vec1_indirect:
1413    case nir_intrinsic_load_ubo_vec2_indirect:
1414    case nir_intrinsic_load_ubo_vec3_indirect:
1415    case nir_intrinsic_load_ubo_vec4_indirect: {
1416       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1417                                  instr->const_index[0]);
1418       /* Turn the byte offset into a dword offset. */
1419       unsigned base_offset = instr->const_index[1] / 4;
1420       fs_reg offset = fs_reg(this, glsl_type::int_type);
1421       emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
1422                fs_reg(2)));
1423
1424       for (unsigned i = 0;
1425            i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1426          exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
1427                                                      offset, base_offset + i);
1428          fs_inst *last_inst = (fs_inst *) list.get_tail();
1429          if (instr->has_predicate)
1430                last_inst->predicate = BRW_PREDICATE_NORMAL;
1431          emit(list);
1432
1433          dest.reg_offset++;
1434       }
1435       break;
1436    }
1437
1438    case nir_intrinsic_load_input_vec1:
1439    case nir_intrinsic_load_input_vec2:
1440    case nir_intrinsic_load_input_vec3:
1441    case nir_intrinsic_load_input_vec4: {
1442       unsigned index = 0;
1443       for (int i = 0; i < instr->const_index[1]; i++) {
1444          for (unsigned j = 0;
1445             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1446             fs_reg src = nir_inputs;
1447             src.reg_offset = instr->const_index[0] + index;
1448             src.type = dest.type;
1449             index++;
1450
1451             fs_inst *inst = MOV(dest, src);
1452             if (instr->has_predicate)
1453                inst->predicate = BRW_PREDICATE_NORMAL;
1454             emit(inst);
1455             dest.reg_offset++;
1456          }
1457       }
1458       break;
1459    }
1460
1461    case nir_intrinsic_load_input_vec1_indirect:
1462    case nir_intrinsic_load_input_vec2_indirect:
1463    case nir_intrinsic_load_input_vec3_indirect:
1464    case nir_intrinsic_load_input_vec4_indirect: {
1465       unsigned index = 0;
1466       for (int i = 0; i < instr->const_index[1]; i++) {
1467          for (unsigned j = 0;
1468             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1469             fs_reg src = nir_inputs;
1470             src.reg_offset = instr->const_index[0] + index;
1471             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1472             src.reladdr->type = BRW_REGISTER_TYPE_D;
1473             src.type = dest.type;
1474             index++;
1475
1476             fs_inst *inst = MOV(dest, src);
1477             if (instr->has_predicate)
1478                inst->predicate = BRW_PREDICATE_NORMAL;
1479             emit(inst);
1480             dest.reg_offset++;
1481          }
1482       }
1483       break;
1484    }
1485
1486    case nir_intrinsic_store_output_vec1:
1487    case nir_intrinsic_store_output_vec2:
1488    case nir_intrinsic_store_output_vec3:
1489    case nir_intrinsic_store_output_vec4: {
1490       fs_reg src = get_nir_src(instr->src[0]);
1491       unsigned index = 0;
1492       for (int i = 0; i < instr->const_index[1]; i++) {
1493          for (unsigned j = 0;
1494             j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1495             fs_reg new_dest = nir_outputs;
1496             new_dest.reg_offset = instr->const_index[0] + index;
1497             new_dest.type = src.type;
1498             index++;
1499             fs_inst *inst = MOV(new_dest, src);
1500             if (instr->has_predicate)
1501                inst->predicate = BRW_PREDICATE_NORMAL;
1502             emit(inst);
1503             src.reg_offset++;
1504          }
1505       }
1506       break;
1507    }
1508
1509    case nir_intrinsic_store_output_vec1_indirect:
1510    case nir_intrinsic_store_output_vec2_indirect:
1511    case nir_intrinsic_store_output_vec3_indirect:
1512    case nir_intrinsic_store_output_vec4_indirect: {
1513       fs_reg src = get_nir_src(instr->src[0]);
1514       fs_reg indirect = get_nir_src(instr->src[1]);
1515       unsigned index = 0;
1516       for (int i = 0; i < instr->const_index[1]; i++) {
1517          for (unsigned j = 0;
1518             j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1519             fs_reg new_dest = nir_outputs;
1520             new_dest.reg_offset = instr->const_index[0] + index;
1521             new_dest.reladdr = new(mem_ctx) fs_reg(indirect);
1522             new_dest.type = src.type;
1523             index++;
1524             fs_inst *inst = MOV(new_dest, src);
1525             if (instr->has_predicate)
1526                inst->predicate = BRW_PREDICATE_NORMAL;
1527             emit(MOV(new_dest, src));
1528             src.reg_offset++;
1529          }
1530       }
1531       break;
1532    }
1533
1534    default:
1535       unreachable("unknown intrinsic");
1536    }
1537 }
1538
1539 void
1540 fs_visitor::nir_emit_texture(nir_tex_instr *instr)
1541 {
1542    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
1543    unsigned sampler = instr->sampler_index;
1544
1545    /* FINISHME: We're failing to recompile our programs when the sampler is
1546     * updated.  This only matters for the texture rectangle scale parameters
1547     * (pre-gen6, or gen6+ with GL_CLAMP).
1548     */
1549    int texunit = prog->SamplerUnits[sampler];
1550
1551    int gather_component = instr->component;
1552
1553    bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
1554
1555    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
1556                         instr->is_array;
1557
1558    int lod_components, offset_components = 0;
1559
1560    fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, offset;
1561
1562    for (unsigned i = 0; i < instr->num_srcs; i++) {
1563       fs_reg src = get_nir_src(instr->src[i]);
1564       switch (instr->src_type[i]) {
1565       case nir_tex_src_bias:
1566          lod = retype(src, BRW_REGISTER_TYPE_F);
1567          break;
1568       case nir_tex_src_comparitor:
1569          shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
1570          break;
1571       case nir_tex_src_coord:
1572          switch (instr->op) {
1573          case nir_texop_txf:
1574          case nir_texop_txf_ms:
1575             coordinate = retype(src, BRW_REGISTER_TYPE_D);
1576             break;
1577          default:
1578             coordinate = retype(src, BRW_REGISTER_TYPE_F);
1579             break;
1580          }
1581          break;
1582       case nir_tex_src_ddx:
1583          lod = retype(src, BRW_REGISTER_TYPE_F);
1584          lod_components = nir_tex_instr_src_size(instr, i);
1585          break;
1586       case nir_tex_src_ddy:
1587          lod2 = retype(src, BRW_REGISTER_TYPE_F);
1588          break;
1589       case nir_tex_src_lod:
1590          switch (instr->op) {
1591          case nir_texop_txs:
1592             lod = retype(src, BRW_REGISTER_TYPE_UD);
1593             break;
1594          case nir_texop_txf:
1595             lod = retype(src, BRW_REGISTER_TYPE_D);
1596             break;
1597          default:
1598             lod = retype(src, BRW_REGISTER_TYPE_F);
1599             break;
1600          }
1601          break;
1602       case nir_tex_src_ms_index:
1603          sample_index = retype(src, BRW_REGISTER_TYPE_UD);
1604          break;
1605       case nir_tex_src_offset:
1606          offset = retype(src, BRW_REGISTER_TYPE_D);
1607          if (instr->is_array)
1608             offset_components = instr->coord_components - 1;
1609          else
1610             offset_components = instr->coord_components;
1611          break;
1612       case nir_tex_src_projector:
1613          unreachable("should be lowered");
1614       case nir_tex_src_sampler_index:
1615          unreachable("not yet supported");
1616       default:
1617          unreachable("unknown texture source");
1618       }
1619    }
1620
1621    if (instr->op == nir_texop_txf_ms) {
1622       if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
1623          mcs = emit_mcs_fetch(coordinate, instr->coord_components, fs_reg(sampler));
1624       else
1625          mcs = fs_reg(0u);
1626    }
1627
1628    for (unsigned i = 0; i < 3; i++) {
1629       if (instr->const_offset[i] != 0) {
1630          assert(offset_components == 0);
1631          offset = fs_reg(brw_texture_offset(ctx, instr->const_offset, 3));
1632          break;
1633       }
1634    }
1635
1636    enum glsl_base_type dest_base_type;
1637    switch (instr->dest_type) {
1638    case nir_type_float:
1639       dest_base_type = GLSL_TYPE_FLOAT;
1640       break;
1641    case nir_type_int:
1642       dest_base_type = GLSL_TYPE_INT;
1643       break;
1644    case nir_type_unsigned:
1645       dest_base_type = GLSL_TYPE_UINT;
1646       break;
1647    default:
1648       unreachable("bad type");
1649    }
1650
1651    const glsl_type *dest_type =
1652       glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
1653                               1);
1654
1655    ir_texture_opcode op;
1656    switch (instr->op) {
1657    case nir_texop_lod: op = ir_lod; break;
1658    case nir_texop_query_levels: op = ir_query_levels; break;
1659    case nir_texop_tex: op = ir_tex; break;
1660    case nir_texop_tg4: op = ir_tg4; break;
1661    case nir_texop_txb: op = ir_txb; break;
1662    case nir_texop_txd: op = ir_txd; break;
1663    case nir_texop_txf: op = ir_txf; break;
1664    case nir_texop_txf_ms: op = ir_txf_ms; break;
1665    case nir_texop_txl: op = ir_txl; break;
1666    case nir_texop_txs: op = ir_txs; break;
1667    default:
1668       unreachable("unknown texture opcode");
1669    }
1670
1671    emit_texture(op, dest_type, coordinate, instr->coord_components,
1672                 shadow_comparitor, lod, lod2, lod_components, sample_index,
1673                 offset, offset_components, mcs, gather_component,
1674                 is_cube_array, is_rect, sampler, fs_reg(sampler), texunit);
1675
1676    fs_reg dest = get_nir_dest(instr->dest);
1677    dest.type = this->result.type;
1678    unsigned num_components = nir_tex_instr_dest_size(instr);
1679    emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
1680 }
1681
1682 void
1683 fs_visitor::nir_emit_load_const(nir_load_const_instr *instr)
1684 {
1685    fs_reg dest = get_nir_dest(instr->dest);
1686    dest.type = BRW_REGISTER_TYPE_UD;
1687    if (instr->array_elems == 0) {
1688       for (unsigned i = 0; i < instr->num_components; i++) {
1689          emit(MOV(dest, fs_reg(instr->value.u[i])));
1690          dest.reg_offset++;
1691       }
1692    } else {
1693       for (unsigned i = 0; i < instr->array_elems; i++) {
1694          for (unsigned j = 0; j < instr->num_components; j++) {
1695             emit(MOV(dest, fs_reg(instr->array[i].u[j])));
1696             dest.reg_offset++;
1697          }
1698       }
1699    }
1700 }
1701
1702 void
1703 fs_visitor::nir_emit_jump(nir_jump_instr *instr)
1704 {
1705    switch (instr->type) {
1706    case nir_jump_break:
1707       emit(BRW_OPCODE_BREAK);
1708       break;
1709    case nir_jump_continue:
1710       emit(BRW_OPCODE_CONTINUE);
1711       break;
1712    case nir_jump_return:
1713    default:
1714       unreachable("unknown jump");
1715    }
1716 }