src/mesa/drivers/dri/i965/brw_fs_nir.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "glsl/nir/glsl_to_nir.h"
  25 #include "brw_fs.h"
  26
  27 void
  28 fs_visitor::emit_nir_code()
  29 {
  30    /* first, lower the GLSL IR shader to NIR */
  31    nir_shader *nir = glsl_to_nir(shader->base.ir, NULL, true);
  32    nir_validate_shader(nir);
  33
  34    /* lower some of the GLSL-isms into NIR-isms - after this point, we no
  35     * longer have to deal with variables inside the shader
  36     */
  37
  38    nir_lower_variables_scalar(nir, true, true, true, true);
  39    nir_validate_shader(nir);
  40
  41    nir_lower_samplers(nir, shader_prog, shader->base.Program);
  42    nir_validate_shader(nir);
  43
  44    nir_lower_system_values(nir);
  45    nir_validate_shader(nir);
  46
  47    nir_lower_atomics(nir);
  48    nir_validate_shader(nir);
  49
  50    nir_remove_dead_variables(nir);
  51    nir_opt_global_to_local(nir);
  52    nir_validate_shader(nir);
  53
  54    if (1)
  55       nir_print_shader(nir, stderr);
  56
  57    nir_convert_to_ssa(nir);
  58    nir_validate_shader(nir);
  59
  60    bool progress;
  61    do {
  62       progress = false;
  63       progress |= nir_copy_prop(nir);
  64       nir_validate_shader(nir);
  65       progress |= nir_opt_dce(nir);
  66       nir_validate_shader(nir);
  67       progress |= nir_opt_cse(nir);
  68       nir_validate_shader(nir);
  69       progress |= nir_opt_peephole_select(nir);
  70       nir_validate_shader(nir);
  71       progress |= nir_opt_peephole_ffma(nir);
  72       nir_validate_shader(nir);
  73    } while (progress);
  74    nir_print_shader(nir, stderr);
  75
  76    nir_convert_from_ssa(nir);
  77    nir_validate_shader(nir);
  78    nir_print_shader(nir, stderr);
  79    nir_lower_vec_to_movs(nir);
  80    nir_validate_shader(nir);
  81
  82    /* emit the arrays used for inputs and outputs - load/store intrinsics will
  83     * be converted to reads/writes of these arrays
  84     */
  85
  86    if (nir->num_inputs > 0) {
  87       nir_inputs = fs_reg(GRF, virtual_grf_alloc(nir->num_inputs));
  88       nir_setup_inputs(nir);
  89    }
  90
  91    if (nir->num_outputs > 0) {
  92       nir_outputs = fs_reg(GRF, virtual_grf_alloc(nir->num_outputs));
  93       nir_setup_outputs(nir);
  94    }
  95
  96    if (nir->num_uniforms > 0) {
  97       nir_uniforms = fs_reg(UNIFORM, 0);
  98       nir_setup_uniforms(nir);
  99    }
 100
 101    nir_globals = ralloc_array(mem_ctx, fs_reg, nir->reg_alloc);
 102    foreach_list_typed(nir_register, reg, node, &nir->registers) {
 103       unsigned array_elems =
 104          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 105       unsigned size = array_elems * reg->num_components;
 106       nir_globals[reg->index] = fs_reg(GRF, virtual_grf_alloc(size));
 107    }
 108
 109    /* get the main function and emit it */
 110    nir_foreach_overload(nir, overload) {
 111       assert(strcmp(overload->function->name, "main") == 0);
 112       assert(overload->impl);
 113       nir_emit_impl(overload->impl);
 114    }
 115
 116    ralloc_free(nir);
 117 }
 118
 119 void
 120 fs_visitor::nir_setup_inputs(nir_shader *shader)
 121 {
 122    fs_reg varying = nir_inputs;
 123
 124    struct hash_entry *entry;
 125    hash_table_foreach(shader->inputs, entry) {
 126       nir_variable *var = (nir_variable *) entry->data;
 127       varying.reg_offset = var->data.driver_location;
 128
 129       fs_reg reg;
 130       if (!strcmp(var->name, "gl_FragCoord")) {
 131          reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
 132                                              var->data.origin_upper_left);
 133          emit_percomp(MOV(varying, reg), 0xF);
 134       } else if (!strcmp(var->name, "gl_FrontFacing")) {
 135          reg = *emit_frontfacing_interpolation();
 136          emit(MOV(retype(varying, BRW_REGISTER_TYPE_UD), reg));
 137       } else {
 138          emit_general_interpolation(varying, var->name, var->type,
 139                                     (glsl_interp_qualifier) var->data.interpolation,
 140                                     var->data.location, var->data.centroid,
 141                                     var->data.sample);
 142       }
 143    }
 144 }
 145
 146 void
 147 fs_visitor::nir_setup_outputs(nir_shader *shader)
 148 {
 149    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 150    fs_reg reg = nir_outputs;
 151
 152    struct hash_entry *entry;
 153    hash_table_foreach(shader->outputs, entry) {
 154       nir_variable *var = (nir_variable *) entry->data;
 155       reg.reg_offset = var->data.driver_location;
 156
 157       if (var->data.index > 0) {
 158          assert(var->data.location == FRAG_RESULT_DATA0);
 159          assert(var->data.index == 1);
 160          this->dual_src_output = reg;
 161          this->do_dual_src = true;
 162       } else if (var->data.location == FRAG_RESULT_COLOR) {
 163          /* Writing gl_FragColor outputs to all color regions. */
 164          for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
 165             this->outputs[i] = reg;
 166             this->output_components[i] = 4;
 167          }
 168       } else if (var->data.location == FRAG_RESULT_DEPTH) {
 169          this->frag_depth = reg;
 170       } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
 171          this->sample_mask = reg;
 172       } else {
 173          /* gl_FragData or a user-defined FS output */
 174          assert(var->data.location >= FRAG_RESULT_DATA0 &&
 175                 var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
 176
 177          int vector_elements =
 178             var->type->is_array() ? var->type->fields.array->vector_elements
 179                                   : var->type->vector_elements;
 180
 181          /* General color output. */
 182          for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
 183             int output = var->data.location - FRAG_RESULT_DATA0 + i;
 184             this->outputs[output] = reg;
 185             this->outputs[output].reg_offset += vector_elements * i;
 186             this->output_components[output] = vector_elements;
 187          }
 188       }
 189    }
 190 }
 191
 192 void
 193 fs_visitor::nir_setup_uniforms(nir_shader *shader)
 194 {
 195    uniforms = shader->num_uniforms;
 196    param_size[0] = shader->num_uniforms;
 197
 198    if (dispatch_width != 8)
 199       return;
 200
 201    struct hash_entry *entry;
 202    hash_table_foreach(shader->uniforms, entry) {
 203       nir_variable *var = (nir_variable *) entry->data;
 204
 205       /* UBO's and atomics don't take up space in the uniform file */
 206
 207       if (var->interface_type != NULL || var->type->contains_atomic())
 208          continue;
 209
 210       if (strncmp(var->name, "gl_", 3) == 0)
 211          nir_setup_builtin_uniform(var);
 212       else
 213          nir_setup_uniform(var);
 214    }
 215 }
 216
 217 void
 218 fs_visitor::nir_setup_uniform(nir_variable *var)
 219 {
 220    int namelen = strlen(var->name);
 221
 222    /* The data for our (non-builtin) uniforms is stored in a series of
 223       * gl_uniform_driver_storage structs for each subcomponent that
 224       * glGetUniformLocation() could name.  We know it's been set up in the
 225       * same order we'd walk the type, so walk the list of storage and find
 226       * anything with our name, or the prefix of a component that starts with
 227       * our name.
 228       */
 229    unsigned index = var->data.driver_location;
 230    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
 231       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 232
 233       if (strncmp(var->name, storage->name, namelen) != 0 ||
 234          (storage->name[namelen] != 0 &&
 235          storage->name[namelen] != '.' &&
 236          storage->name[namelen] != '[')) {
 237          continue;
 238       }
 239
 240       unsigned slots = storage->type->component_slots();
 241       if (storage->array_elements)
 242          slots *= storage->array_elements;
 243
 244       for (unsigned i = 0; i < slots; i++) {
 245          stage_prog_data->param[index++] = &storage->storage[i];
 246       }
 247    }
 248
 249    /* Make sure we actually initialized the right amount of stuff here. */
 250    assert(var->data.driver_location + var->type->component_slots() == index);
 251 }
 252
 253 void
 254 fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
 255 {
 256    const nir_state_slot *const slots = var->state_slots;
 257    assert(var->state_slots != NULL);
 258
 259    unsigned uniform_index = var->data.driver_location;
 260    for (unsigned int i = 0; i < var->num_state_slots; i++) {
 261       /* This state reference has already been setup by ir_to_mesa, but we'll
 262        * get the same index back here.
 263        */
 264       int index = _mesa_add_state_reference(this->prog->Parameters,
 265                                             (gl_state_index *)slots[i].tokens);
 266
 267       /* Add each of the unique swizzles of the element as a parameter.
 268        * This'll end up matching the expected layout of the
 269        * array/matrix/structure we're trying to fill in.
 270        */
 271       int last_swiz = -1;
 272       for (unsigned int j = 0; j < 4; j++) {
 273          int swiz = GET_SWZ(slots[i].swizzle, j);
 274          if (swiz == last_swiz)
 275             break;
 276          last_swiz = swiz;
 277
 278          stage_prog_data->param[uniform_index++] =
 279             &prog->Parameters->ParameterValues[index][swiz];
 280       }
 281    }
 282 }
 283
 284 void
 285 fs_visitor::nir_emit_impl(nir_function_impl *impl)
 286 {
 287    nir_locals = reralloc(mem_ctx, nir_locals, fs_reg, impl->reg_alloc);
 288    foreach_list_typed(nir_register, reg, node, &impl->registers) {
 289       unsigned array_elems =
 290          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 291       unsigned size = array_elems * reg->num_components;
 292       nir_locals[reg->index] = fs_reg(GRF, virtual_grf_alloc(size));
 293    }
 294
 295    nir_emit_cf_list(&impl->body);
 296 }
 297
 298 void
 299 fs_visitor::nir_emit_cf_list(exec_list *list)
 300 {
 301    foreach_list_typed(nir_cf_node, node, node, list) {
 302       switch (node->type) {
 303       case nir_cf_node_if:
 304          nir_emit_if(nir_cf_node_as_if(node));
 305          break;
 306
 307       case nir_cf_node_loop:
 308          nir_emit_loop(nir_cf_node_as_loop(node));
 309          break;
 310
 311       case nir_cf_node_block:
 312          nir_emit_block(nir_cf_node_as_block(node));
 313          break;
 314
 315       default:
 316          unreachable("Invalid CFG node block");
 317       }
 318    }
 319 }
 320
 321 void
 322 fs_visitor::nir_emit_if(nir_if *if_stmt)
 323 {
 324    if (brw->gen < 6) {
 325       no16("Can't support (non-uniform) control flow on SIMD16\n");
 326    }
 327
 328    /* first, put the condition into f0 */
 329    fs_inst *inst = emit(MOV(reg_null_d,
 330                             retype(get_nir_src(if_stmt->condition),
 331                                    BRW_REGISTER_TYPE_UD)));
 332    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 333
 334    emit(IF(BRW_PREDICATE_NORMAL));
 335
 336    nir_emit_cf_list(&if_stmt->then_list);
 337
 338    /* note: if the else is empty, dead CF elimination will remove it */
 339    emit(BRW_OPCODE_ELSE);
 340
 341    nir_emit_cf_list(&if_stmt->else_list);
 342
 343    emit(BRW_OPCODE_ENDIF);
 344
 345    try_replace_with_sel();
 346 }
 347
 348 void
 349 fs_visitor::nir_emit_loop(nir_loop *loop)
 350 {
 351    if (brw->gen < 6) {
 352       no16("Can't support (non-uniform) control flow on SIMD16\n");
 353    }
 354
 355    emit(BRW_OPCODE_DO);
 356
 357    nir_emit_cf_list(&loop->body);
 358
 359    emit(BRW_OPCODE_WHILE);
 360 }
 361
 362 void
 363 fs_visitor::nir_emit_block(nir_block *block)
 364 {
 365    nir_foreach_instr(block, instr) {
 366       nir_emit_instr(instr);
 367    }
 368 }
 369
 370 void
 371 fs_visitor::nir_emit_instr(nir_instr *instr)
 372 {
 373    switch (instr->type) {
 374    case nir_instr_type_alu:
 375       nir_emit_alu(nir_instr_as_alu(instr));
 376       break;
 377
 378    case nir_instr_type_intrinsic:
 379       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
 380       break;
 381
 382    case nir_instr_type_texture:
 383       nir_emit_texture(nir_instr_as_texture(instr));
 384       break;
 385
 386    case nir_instr_type_load_const:
 387       nir_emit_load_const(nir_instr_as_load_const(instr));
 388       break;
 389
 390    case nir_instr_type_jump:
 391       nir_emit_jump(nir_instr_as_jump(instr));
 392       break;
 393
 394    default:
 395       unreachable("unknown instruction type");
 396    }
 397 }
 398
 399 static brw_reg_type
 400 brw_type_for_nir_type(nir_alu_type type)
 401 {
 402    switch (type) {
 403    case nir_type_bool:
 404    case nir_type_unsigned:
 405       return BRW_REGISTER_TYPE_UD;
 406    case nir_type_int:
 407       return BRW_REGISTER_TYPE_D;
 408    case nir_type_float:
 409       return BRW_REGISTER_TYPE_F;
 410    default:
 411       unreachable("unknown type");
 412    }
 413
 414    return BRW_REGISTER_TYPE_F;
 415 }
 416
 417 void
 418 fs_visitor::nir_emit_alu(nir_alu_instr *instr)
 419 {
 420    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
 421
 422    fs_reg op[3];
 423    fs_reg dest = get_nir_dest(instr->dest.dest);
 424    dest.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
 425
 426    fs_reg result;
 427    if (instr->has_predicate) {
 428       result = fs_reg(GRF, virtual_grf_alloc(4));
 429       result.type = dest.type;
 430    } else {
 431       result = dest;
 432    }
 433
 434
 435    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
 436       op[i] = get_nir_alu_src(instr, i);
 437
 438    switch (instr->op) {
 439    case nir_op_fmov:
 440    case nir_op_i2f:
 441    case nir_op_u2f: {
 442       fs_inst *inst = MOV(result, op[0]);
 443       inst->saturate = instr->dest.saturate;
 444       emit_percomp(inst, instr->dest.write_mask);
 445    }
 446       break;
 447
 448    case nir_op_imov:
 449    case nir_op_f2i:
 450    case nir_op_f2u:
 451       emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
 452       break;
 453
 454    case nir_op_fsign: {
 455       /* AND(val, 0x80000000) gives the sign bit.
 456          *
 457          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
 458          * zero.
 459          */
 460       emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
 461                    instr->dest.write_mask);
 462
 463       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
 464       op[0].type = BRW_REGISTER_TYPE_UD;
 465       result.type = BRW_REGISTER_TYPE_UD;
 466       emit_percomp(AND(result_int, op[0], fs_reg(0x80000000u)),
 467                    instr->dest.write_mask);
 468
 469       fs_inst *inst = OR(result_int, result_int, fs_reg(0x3f800000u));
 470       inst->predicate = BRW_PREDICATE_NORMAL;
 471       emit_percomp(inst, instr->dest.write_mask);
 472       if (instr->dest.saturate) {
 473          fs_inst *inst = MOV(result, result);
 474          inst->saturate = true;
 475          emit_percomp(inst, instr->dest.write_mask);
 476       }
 477       break;
 478    }
 479
 480    case nir_op_isign: {
 481       /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
 482          *               -> non-negative val generates 0x00000000.
 483          *  Predicated OR sets 1 if val is positive.
 484          */
 485       emit_percomp(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G),
 486                    instr->dest.write_mask);
 487
 488       emit_percomp(ASR(result, op[0], fs_reg(31)), instr->dest.write_mask);
 489
 490       fs_inst *inst = OR(result, result, fs_reg(1));
 491       inst->predicate = BRW_PREDICATE_NORMAL;
 492       emit_percomp(inst, instr->dest.write_mask);
 493       break;
 494    }
 495
 496    case nir_op_frcp:
 497       emit_math_percomp(SHADER_OPCODE_RCP, result, op[0],
 498                         instr->dest.write_mask, instr->dest.saturate);
 499       break;
 500
 501    case nir_op_fexp2:
 502       emit_math_percomp(SHADER_OPCODE_EXP2, result, op[0],
 503                         instr->dest.write_mask, instr->dest.saturate);
 504       break;
 505
 506    case nir_op_flog2:
 507       emit_math_percomp(SHADER_OPCODE_LOG2, result, op[0],
 508                         instr->dest.write_mask, instr->dest.saturate);
 509       break;
 510
 511    case nir_op_fexp:
 512    case nir_op_flog:
 513       unreachable("not reached: should be handled by ir_explog_to_explog2");
 514
 515    case nir_op_fsin:
 516    case nir_op_fsin_reduced:
 517       emit_math_percomp(SHADER_OPCODE_SIN, result, op[0],
 518                         instr->dest.write_mask, instr->dest.saturate);
 519       break;
 520
 521    case nir_op_fcos:
 522    case nir_op_fcos_reduced:
 523       emit_math_percomp(SHADER_OPCODE_COS, result, op[0],
 524                         instr->dest.write_mask, instr->dest.saturate);
 525       break;
 526
 527    case nir_op_fddx:
 528       if (fs_key->high_quality_derivatives)
 529          emit_percomp(FS_OPCODE_DDX_FINE, result, op[0],
 530                       instr->dest.write_mask, instr->dest.saturate);
 531       else
 532          emit_percomp(FS_OPCODE_DDX_COARSE, result, op[0],
 533                       instr->dest.write_mask, instr->dest.saturate);
 534       break;
 535    case nir_op_fddx_fine:
 536       emit_percomp(FS_OPCODE_DDX_FINE, result, op[0],
 537                    instr->dest.write_mask, instr->dest.saturate);
 538       break;
 539    case nir_op_fddx_coarse:
 540       emit_percomp(FS_OPCODE_DDX_COARSE, result, op[0],
 541                    instr->dest.write_mask, instr->dest.saturate);
 542       break;
 543    case nir_op_fddy:
 544       if (fs_key->high_quality_derivatives)
 545          emit_percomp(FS_OPCODE_DDY_FINE, result, op[0],
 546                       fs_reg(fs_key->render_to_fbo),
 547                       instr->dest.write_mask, instr->dest.saturate);
 548       else
 549          emit_percomp(FS_OPCODE_DDY_COARSE, result, op[0],
 550                       fs_reg(fs_key->render_to_fbo),
 551                       instr->dest.write_mask, instr->dest.saturate);
 552       break;
 553    case nir_op_fddy_fine:
 554       emit_percomp(FS_OPCODE_DDY_FINE, result, op[0],
 555                    fs_reg(fs_key->render_to_fbo),
 556                    instr->dest.write_mask, instr->dest.saturate);
 557       break;
 558    case nir_op_fddy_coarse:
 559       emit_percomp(FS_OPCODE_DDY_COARSE, result, op[0],
 560                    fs_reg(fs_key->render_to_fbo),
 561                    instr->dest.write_mask, instr->dest.saturate);
 562       break;
 563
 564    case nir_op_fadd:
 565    case nir_op_iadd: {
 566       fs_inst *inst = ADD(result, op[0], op[1]);
 567       inst->saturate = instr->dest.saturate;
 568       emit_percomp(inst, instr->dest.write_mask);
 569       break;
 570    }
 571
 572    case nir_op_fmul: {
 573       fs_inst *inst = MUL(result, op[0], op[1]);
 574       inst->saturate = instr->dest.saturate;
 575       emit_percomp(MUL(result, op[0], op[1]), instr->dest.write_mask);
 576       break;
 577    }
 578
 579    case nir_op_imul: {
 580       /* TODO put in the 16-bit constant optimization once we have SSA */
 581
 582       if (brw->gen >= 7)
 583          no16("SIMD16 explicit accumulator operands unsupported\n");
 584
 585       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 586
 587       emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
 588       emit_percomp(MACH(reg_null_d, op[0], op[1]), instr->dest.write_mask);
 589       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 590       break;
 591    }
 592
 593    case nir_op_imul_high:
 594    case nir_op_umul_high: {
 595       if (brw->gen >= 7)
 596          no16("SIMD16 explicit accumulator operands unsupported\n");
 597
 598       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
 599
 600       emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
 601       emit_percomp(MACH(result, op[0], op[1]), instr->dest.write_mask);
 602       break;
 603    }
 604
 605    case nir_op_idiv:
 606    case nir_op_udiv:
 607       emit_math_percomp(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1],
 608                         instr->dest.write_mask);
 609       break;
 610
 611    case nir_op_uadd_carry: {
 612       if (brw->gen >= 7)
 613          no16("SIMD16 explicit accumulator operands unsupported\n");
 614
 615       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 616                                   BRW_REGISTER_TYPE_UD);
 617
 618       emit_percomp(ADDC(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
 619       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 620       break;
 621    }
 622
 623    case nir_op_usub_borrow: {
 624       if (brw->gen >= 7)
 625          no16("SIMD16 explicit accumulator operands unsupported\n");
 626
 627       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
 628                                   BRW_REGISTER_TYPE_UD);
 629
 630       emit_percomp(SUBB(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
 631       emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
 632       break;
 633    }
 634
 635    case nir_op_umod:
 636       emit_math_percomp(SHADER_OPCODE_INT_REMAINDER, result, op[0],
 637                         op[1], instr->dest.write_mask);
 638       break;
 639
 640    case nir_op_flt:
 641    case nir_op_ilt:
 642    case nir_op_ult:
 643       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_L),
 644                    instr->dest.write_mask);
 645       break;
 646
 647    case nir_op_fge:
 648    case nir_op_ige:
 649    case nir_op_uge:
 650       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE),
 651                    instr->dest.write_mask);
 652       break;
 653
 654    case nir_op_feq:
 655    case nir_op_ieq:
 656       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z),
 657                    instr->dest.write_mask);
 658       break;
 659
 660    case nir_op_fne:
 661    case nir_op_ine:
 662       emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ),
 663                    instr->dest.write_mask);
 664       break;
 665
 666    case nir_op_ball_fequal2:
 667    case nir_op_ball_iequal2:
 668    case nir_op_ball_fequal3:
 669    case nir_op_ball_iequal3:
 670    case nir_op_ball_fequal4:
 671    case nir_op_ball_iequal4: {
 672       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 673       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 674       emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
 675                    (1 << num_components) - 1);
 676       emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
 677       break;
 678    }
 679
 680    case nir_op_bany_fnequal2:
 681    case nir_op_bany_inequal2:
 682    case nir_op_bany_fnequal3:
 683    case nir_op_bany_inequal3:
 684    case nir_op_bany_fnequal4:
 685    case nir_op_bany_inequal4: {
 686       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 687       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 688       temp.type = BRW_REGISTER_TYPE_UD;
 689       emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_NZ),
 690                    (1 << num_components) - 1);
 691       emit_reduction(BRW_OPCODE_OR, result, temp, num_components);
 692       break;
 693    }
 694
 695    case nir_op_inot:
 696       emit_percomp(NOT(result, op[0]), instr->dest.write_mask);
 697       break;
 698    case nir_op_ixor:
 699       emit_percomp(XOR(result, op[0], op[1]), instr->dest.write_mask);
 700       break;
 701    case nir_op_ior:
 702       emit_percomp(OR(result, op[0], op[1]), instr->dest.write_mask);
 703       break;
 704    case nir_op_iand:
 705       emit_percomp(AND(result, op[0], op[1]), instr->dest.write_mask);
 706       break;
 707
 708    case nir_op_fdot2:
 709    case nir_op_fdot3:
 710    case nir_op_fdot4: {
 711       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 712       fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
 713       emit_percomp(MUL(temp, op[0], op[1]), (1 << num_components) - 1);
 714       emit_reduction(BRW_OPCODE_ADD, result, temp, num_components);
 715       if (instr->dest.saturate) {
 716          fs_inst *inst = emit(MOV(result, result));
 717          inst->saturate = true;
 718       }
 719       break;
 720    }
 721
 722    case nir_op_bany2:
 723    case nir_op_bany3:
 724    case nir_op_bany4: {
 725       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 726       emit_reduction(BRW_OPCODE_OR, result, op[0], num_components);
 727       break;
 728    }
 729
 730    case nir_op_ball2:
 731    case nir_op_ball3:
 732    case nir_op_ball4: {
 733       unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 734       emit_reduction(BRW_OPCODE_AND, result, op[0], num_components);
 735       break;
 736    }
 737
 738    case nir_op_fnoise1_1:
 739    case nir_op_fnoise1_2:
 740    case nir_op_fnoise1_3:
 741    case nir_op_fnoise1_4:
 742    case nir_op_fnoise2_1:
 743    case nir_op_fnoise2_2:
 744    case nir_op_fnoise2_3:
 745    case nir_op_fnoise2_4:
 746    case nir_op_fnoise3_1:
 747    case nir_op_fnoise3_2:
 748    case nir_op_fnoise3_3:
 749    case nir_op_fnoise3_4:
 750    case nir_op_fnoise4_1:
 751    case nir_op_fnoise4_2:
 752    case nir_op_fnoise4_3:
 753    case nir_op_fnoise4_4:
 754       unreachable("not reached: should be handled by lower_noise");
 755
 756    case nir_op_vec2:
 757    case nir_op_vec3:
 758    case nir_op_vec4:
 759       unreachable("not reached: should be handled by lower_quadop_vector");
 760
 761    case nir_op_ldexp:
 762       unreachable("not reached: should be handled by ldexp_to_arith()");
 763
 764    case nir_op_fsqrt:
 765       emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
 766                         instr->dest.write_mask, instr->dest.saturate);
 767       break;
 768
 769    case nir_op_frsq:
 770       emit_math_percomp(SHADER_OPCODE_RSQ, result, op[0],
 771                         instr->dest.write_mask, instr->dest.saturate);
 772       break;
 773
 774    case nir_op_b2i:
 775       emit_percomp(AND(result, op[0], fs_reg(1)), instr->dest.write_mask);
 776       break;
 777    case nir_op_b2f: {
 778       emit_percomp(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0],
 779                        fs_reg(0x3f800000u)),
 780                    instr->dest.write_mask);
 781       break;
 782    }
 783
 784    case nir_op_f2b:
 785       emit_percomp(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
 786                    instr->dest.write_mask);
 787       break;
 788    case nir_op_i2b:
 789       emit_percomp(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ),
 790                    instr->dest.write_mask);
 791       break;
 792
 793    case nir_op_ftrunc: {
 794       fs_inst *inst = RNDZ(result, op[0]);
 795       inst->saturate = instr->dest.saturate;
 796       emit_percomp(inst, instr->dest.write_mask);
 797       break;
 798    }
 799    case nir_op_fceil: {
 800       op[0].negate = !op[0].negate;
 801       fs_reg temp = fs_reg(this, glsl_type::vec4_type);
 802       emit_percomp(RNDD(temp, op[0]), instr->dest.write_mask);
 803       temp.negate = true;
 804       fs_inst *inst = MOV(result, temp);
 805       inst->saturate = instr->dest.saturate;
 806       emit_percomp(inst, instr->dest.write_mask);
 807       break;
 808    }
 809    case nir_op_ffloor: {
 810       fs_inst *inst = RNDD(result, op[0]);
 811       inst->saturate = instr->dest.saturate;
 812       emit_percomp(inst, instr->dest.write_mask);
 813       break;
 814    }
 815    case nir_op_ffract: {
 816       fs_inst *inst = FRC(result, op[0]);
 817       inst->saturate = instr->dest.saturate;
 818       emit_percomp(inst, instr->dest.write_mask);
 819       break;
 820    }
 821    case nir_op_fround_even: {
 822       fs_inst *inst = RNDE(result, op[0]);
 823       inst->saturate = instr->dest.saturate;
 824       emit_percomp(inst, instr->dest.write_mask);
 825       break;
 826    }
 827
 828    case nir_op_fmin:
 829    case nir_op_imin:
 830    case nir_op_umin:
 831       if (brw->gen >= 6) {
 832          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 833                       instr->dest.write_mask, instr->dest.saturate,
 834                       BRW_PREDICATE_NONE, BRW_CONDITIONAL_L);
 835       } else {
 836          emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L),
 837                       instr->dest.write_mask);
 838
 839          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 840                       instr->dest.write_mask, instr->dest.saturate,
 841                       BRW_PREDICATE_NORMAL);
 842       }
 843       break;
 844
 845    case nir_op_fmax:
 846    case nir_op_imax:
 847    case nir_op_umax:
 848       if (brw->gen >= 6) {
 849          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 850                       instr->dest.write_mask, instr->dest.saturate,
 851                       BRW_PREDICATE_NONE, BRW_CONDITIONAL_GE);
 852       } else {
 853          emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE),
 854                       instr->dest.write_mask);
 855
 856          emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
 857                       instr->dest.write_mask, instr->dest.saturate,
 858                       BRW_PREDICATE_NORMAL);
 859       }
 860       break;
 861
 862    case nir_op_pack_snorm_2x16:
 863    case nir_op_pack_snorm_4x8:
 864    case nir_op_pack_unorm_2x16:
 865    case nir_op_pack_unorm_4x8:
 866    case nir_op_unpack_snorm_2x16:
 867    case nir_op_unpack_snorm_4x8:
 868    case nir_op_unpack_unorm_2x16:
 869    case nir_op_unpack_unorm_4x8:
 870    case nir_op_unpack_half_2x16:
 871    case nir_op_pack_half_2x16:
 872       unreachable("not reached: should be handled by lower_packing_builtins");
 873
 874    case nir_op_unpack_half_2x16_split_x:
 875       emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0],
 876                    instr->dest.write_mask, instr->dest.saturate);
 877       break;
 878    case nir_op_unpack_half_2x16_split_y:
 879       emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0],
 880            instr->dest.write_mask, instr->dest.saturate);
 881       break;
 882
 883    case nir_op_fpow:
 884       emit_percomp(SHADER_OPCODE_POW, result, op[0], op[1],
 885                    instr->dest.write_mask, instr->dest.saturate);
 886       break;
 887
 888    case nir_op_bitfield_reverse:
 889       emit_percomp(BFREV(result, op[0]), instr->dest.write_mask);
 890       break;
 891
 892    case nir_op_bit_count:
 893       emit_percomp(CBIT(result, op[0]), instr->dest.write_mask);
 894       break;
 895
 896    case nir_op_ufind_msb:
 897    case nir_op_ifind_msb: {
 898       emit_percomp(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]),
 899                    instr->dest.write_mask);
 900
 901       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
 902        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
 903        * subtract the result from 31 to convert the MSB count into an LSB count.
 904        */
 905
 906       emit_percomp(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ),
 907                    instr->dest.write_mask);
 908       fs_reg neg_result(result);
 909       neg_result.negate = true;
 910       fs_inst *inst = ADD(result, neg_result, fs_reg(31));
 911       inst->predicate = BRW_PREDICATE_NORMAL;
 912       emit_percomp(inst, instr->dest.write_mask);
 913       break;
 914    }
 915
 916    case nir_op_find_lsb:
 917       emit_percomp(FBL(result, op[0]), instr->dest.write_mask);
 918       break;
 919
 920    case nir_op_ubitfield_extract:
 921    case nir_op_ibitfield_extract:
 922       emit_percomp(BFE(result, op[2], op[1], op[0]), instr->dest.write_mask);
 923       break;
 924    case nir_op_bfm:
 925       emit_percomp(BFI1(result, op[0], op[1]), instr->dest.write_mask);
 926       break;
 927    case nir_op_bfi:
 928       emit_percomp(BFI2(result, op[0], op[1], op[2]), instr->dest.write_mask);
 929       break;
 930
 931    case nir_op_bitfield_insert:
 932       unreachable("not reached: should be handled by "
 933                   "lower_instructions::bitfield_insert_to_bfm_bfi");
 934
 935    case nir_op_ishl:
 936       emit_percomp(SHL(result, op[0], op[1]), instr->dest.write_mask);
 937       break;
 938    case nir_op_ishr:
 939       emit_percomp(ASR(result, op[0], op[1]), instr->dest.write_mask);
 940       break;
 941    case nir_op_ushr:
 942       emit_percomp(SHR(result, op[0], op[1]), instr->dest.write_mask);
 943       break;
 944
 945    case nir_op_pack_half_2x16_split:
 946       emit_percomp(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1],
 947                    instr->dest.write_mask);
 948       break;
 949
 950    case nir_op_ffma:
 951       emit_percomp(MAD(result, op[2], op[1], op[0]), instr->dest.write_mask);
 952       break;
 953
 954    case nir_op_flrp:
 955       /* TODO emulate for gen < 6 */
 956       emit_percomp(LRP(result, op[2], op[1], op[0]), instr->dest.write_mask);
 957       break;
 958
 959    case nir_op_bcsel:
 960       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 961       emit_percomp(BRW_OPCODE_SEL, result, op[1], op[2],
 962                    instr->dest.write_mask, false, BRW_PREDICATE_NORMAL);
 963       break;
 964
 965    default:
 966       unreachable("unhandled instruction");
 967    }
 968
 969    /* emit a predicated move if there was predication */
 970    if (instr->has_predicate) {
 971       fs_inst *inst = emit(MOV(reg_null_d,
 972                                retype(get_nir_src(instr->predicate),
 973                                    BRW_REGISTER_TYPE_UD)));
 974       inst->conditional_mod = BRW_CONDITIONAL_NZ;
 975       inst = MOV(dest, result);
 976       inst->predicate = BRW_PREDICATE_NORMAL;
 977       emit_percomp(inst, instr->dest.write_mask);
 978    }
 979 }
 980
 981 fs_reg
 982 fs_visitor::get_nir_src(nir_src src)
 983 {
 984    if (src.is_ssa) {
 985       assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
 986       nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
 987       fs_reg reg(GRF, virtual_grf_alloc(src.ssa->num_components),
 988                  BRW_REGISTER_TYPE_D);
 989
 990       for (unsigned i = 0; i < src.ssa->num_components; ++i)
 991          emit(MOV(offset(reg, i), fs_reg(load->value.i[i])));
 992
 993       return reg;
 994    } else {
 995       fs_reg reg;
 996       if (src.reg.reg->is_global)
 997          reg = nir_globals[src.reg.reg->index];
 998       else
 999          reg = nir_locals[src.reg.reg->index];
1000
1001       /* to avoid floating-point denorm flushing problems, set the type by
1002        * default to D - instructions that need floating point semantics will set
1003        * this to F if they need to
1004        */
1005       reg.type = BRW_REGISTER_TYPE_D;
1006       reg.reg_offset = src.reg.base_offset;
1007       if (src.reg.indirect) {
1008          reg.reladdr = new(mem_ctx) fs_reg();
1009          *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
1010                                BRW_REGISTER_TYPE_D);
1011       }
1012
1013       return reg;
1014    }
1015 }
1016
1017 fs_reg
1018 fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
1019 {
1020    fs_reg reg = get_nir_src(instr->src[src].src);
1021
1022    reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
1023    reg.abs = instr->src[src].abs;
1024    reg.negate = instr->src[src].negate;
1025
1026    bool needs_swizzle = false;
1027    unsigned num_components = 0;
1028    for (unsigned i = 0; i < 4; i++) {
1029       if (!nir_alu_instr_channel_used(instr, src, i))
1030          continue;
1031
1032       if (instr->src[src].swizzle[i] != i)
1033          needs_swizzle = true;
1034
1035       num_components = i + 1;
1036    }
1037
1038    if (needs_swizzle) {
1039       /* resolve the swizzle through MOV's */
1040       fs_reg new_reg = fs_reg(GRF, virtual_grf_alloc(num_components), reg.type);
1041
1042       for (unsigned i = 0; i < 4; i++) {
1043          if (!nir_alu_instr_channel_used(instr, src, i))
1044             continue;
1045
1046          emit(MOV(offset(new_reg, i),
1047                   offset(reg, instr->src[src].swizzle[i])));
1048       }
1049
1050       return new_reg;
1051    }
1052
1053    return reg;
1054 }
1055
1056 fs_reg
1057 fs_visitor::get_nir_dest(nir_dest dest)
1058 {
1059    fs_reg reg;
1060    if (dest.reg.reg->is_global)
1061       reg = nir_globals[dest.reg.reg->index];
1062    else
1063       reg = nir_locals[dest.reg.reg->index];
1064
1065    reg.reg_offset = dest.reg.base_offset;
1066    if (dest.reg.indirect) {
1067       reg.reladdr = new(mem_ctx) fs_reg();
1068       *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
1069                             BRW_REGISTER_TYPE_D);
1070    }
1071
1072    return reg;
1073 }
1074
1075 void
1076 fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
1077 {
1078    for (unsigned i = 0; i < 4; i++) {
1079       if (!((wr_mask >> i) & 1))
1080          continue;
1081
1082       fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
1083       new_inst->dst.reg_offset += i;
1084       for (unsigned j = 0; j < new_inst->sources; j++)
1085          if (inst->src[j].file == GRF)
1086             new_inst->src[j].reg_offset += i;
1087
1088       emit(new_inst);
1089    }
1090 }
1091
1092 void
1093 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1094                          unsigned wr_mask, bool saturate,
1095                          enum brw_predicate predicate,
1096                          enum brw_conditional_mod mod)
1097 {
1098    for (unsigned i = 0; i < 4; i++) {
1099       if (!((wr_mask >> i) & 1))
1100          continue;
1101
1102       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0);
1103       new_inst->dst.reg_offset += i;
1104       for (unsigned j = 0; j < new_inst->sources; j++)
1105          if (new_inst->src[j].file == GRF)
1106             new_inst->src[j].reg_offset += i;
1107
1108       new_inst->predicate = predicate;
1109       new_inst->conditional_mod = mod;
1110       new_inst->saturate = saturate;
1111       emit(new_inst);
1112    }
1113 }
1114
1115 void
1116 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
1117                          unsigned wr_mask, bool saturate,
1118                          enum brw_predicate predicate,
1119                          enum brw_conditional_mod mod)
1120 {
1121    for (unsigned i = 0; i < 4; i++) {
1122       if (!((wr_mask >> i) & 1))
1123          continue;
1124
1125       fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1);
1126       new_inst->dst.reg_offset += i;
1127       for (unsigned j = 0; j < new_inst->sources; j++)
1128          if (new_inst->src[j].file == GRF)
1129             new_inst->src[j].reg_offset += i;
1130
1131       new_inst->predicate = predicate;
1132       new_inst->conditional_mod = mod;
1133       new_inst->saturate = saturate;
1134       emit(new_inst);
1135    }
1136 }
1137
1138 void
1139 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1140                               unsigned wr_mask, bool saturate)
1141 {
1142    for (unsigned i = 0; i < 4; i++) {
1143       if (!((wr_mask >> i) & 1))
1144          continue;
1145
1146       fs_reg new_dest = dest;
1147       new_dest.reg_offset += i;
1148       fs_reg new_src0 = src0;
1149       if (src0.file == GRF)
1150          new_src0.reg_offset += i;
1151
1152       fs_inst *new_inst = emit_math(op, new_dest, new_src0);
1153       new_inst->saturate = saturate;
1154    }
1155 }
1156
1157 void
1158 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1159                               fs_reg src1, unsigned wr_mask,
1160                               bool saturate)
1161 {
1162    for (unsigned i = 0; i < 4; i++) {
1163       if (!((wr_mask >> i) & 1))
1164          continue;
1165
1166       fs_reg new_dest = dest;
1167       new_dest.reg_offset += i;
1168       fs_reg new_src0 = src0;
1169       if (src0.file == GRF)
1170          new_src0.reg_offset += i;
1171       fs_reg new_src1 = src1;
1172       if (src1.file == GRF)
1173          new_src1.reg_offset += i;
1174
1175       fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1);
1176       new_inst->saturate = saturate;
1177    }
1178 }
1179
1180 void
1181 fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
1182                            unsigned num_components)
1183 {
1184    fs_reg src0 = src;
1185    fs_reg src1 = src;
1186    src1.reg_offset++;
1187
1188    if (num_components == 2) {
1189       emit(op, dest, src0, src1);
1190       return;
1191    }
1192
1193    fs_reg temp1 = fs_reg(GRF, virtual_grf_alloc(1));
1194    temp1.type = src.type;
1195    emit(op, temp1, src0, src1);
1196
1197    fs_reg src2 = src;
1198    src2.reg_offset += 2;
1199
1200    if (num_components == 3) {
1201       emit(op, dest, temp1, src2);
1202       return;
1203    }
1204
1205    assert(num_components == 4);
1206
1207    fs_reg src3 = src;
1208    src3.reg_offset += 3;
1209    fs_reg temp2 = fs_reg(GRF, virtual_grf_alloc(1));
1210    temp2.type = src.type;
1211
1212    emit(op, temp2, src2, src3);
1213    emit(op, dest, temp1, temp2);
1214 }
1215
1216 void
1217 fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
1218 {
1219    fs_reg dest;
1220    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1221       dest = get_nir_dest(instr->dest);
1222    if (instr->has_predicate) {
1223       fs_inst *inst = emit(MOV(reg_null_d,
1224                                retype(get_nir_src(instr->predicate),
1225                                       BRW_REGISTER_TYPE_UD)));
1226       inst->conditional_mod = BRW_CONDITIONAL_NZ;
1227    }
1228
1229    switch (instr->intrinsic) {
1230    case nir_intrinsic_discard: {
1231       /* We track our discarded pixels in f0.1.  By predicating on it, we can
1232        * update just the flag bits that aren't yet discarded.  By emitting a
1233        * CMP of g0 != g0, all our currently executing channels will get turned
1234        * off.
1235        */
1236       fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
1237                                     BRW_REGISTER_TYPE_UW));
1238       fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
1239                               BRW_CONDITIONAL_NZ));
1240       cmp->predicate = BRW_PREDICATE_NORMAL;
1241       cmp->flag_subreg = 1;
1242
1243       if (brw->gen >= 6) {
1244          /* For performance, after a discard, jump to the end of the shader.
1245          * Only jump if all relevant channels have been discarded.
1246          */
1247          fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
1248          discard_jump->flag_subreg = 1;
1249
1250          discard_jump->predicate = (dispatch_width == 8)
1251                                  ? BRW_PREDICATE_ALIGN1_ANY8H
1252                                  : BRW_PREDICATE_ALIGN1_ANY16H;
1253          discard_jump->predicate_inverse = true;
1254       }
1255
1256       break;
1257    }
1258
1259    case nir_intrinsic_atomic_counter_inc:
1260    case nir_intrinsic_atomic_counter_dec:
1261    case nir_intrinsic_atomic_counter_read: {
1262       unsigned surf_index = prog_data->binding_table.abo_start +
1263                             (unsigned) instr->const_index[0];
1264       fs_reg offset = fs_reg(get_nir_src(instr->src[0]));
1265
1266       switch (instr->intrinsic) {
1267          case nir_intrinsic_atomic_counter_inc:
1268             emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
1269                                 fs_reg(), fs_reg());
1270             break;
1271          case nir_intrinsic_atomic_counter_dec:
1272             emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
1273                                 fs_reg(), fs_reg());
1274             break;
1275          case nir_intrinsic_atomic_counter_read:
1276             emit_untyped_surface_read(surf_index, dest, offset);
1277             break;
1278          default:
1279             unreachable("Unreachable");
1280       }
1281       break;
1282    }
1283
1284    case nir_intrinsic_load_front_face:
1285       assert(!"TODO");
1286
1287    case nir_intrinsic_load_sample_mask_in: {
1288       assert(brw->gen >= 7);
1289       fs_reg reg = fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
1290                           BRW_REGISTER_TYPE_D));
1291       dest.type = reg.type;
1292       fs_inst *inst = MOV(dest, reg);
1293       if (instr->has_predicate)
1294          inst->predicate = BRW_PREDICATE_NORMAL;
1295       emit(inst);
1296       break;
1297    }
1298
1299    case nir_intrinsic_load_sample_pos: {
1300       fs_reg *reg = emit_samplepos_setup();
1301       dest.type = reg->type;
1302       emit(MOV(dest, *reg));
1303       emit(MOV(offset(dest, 1), offset(*reg, 1)));
1304       break;
1305    }
1306
1307    case nir_intrinsic_load_sample_id: {
1308       fs_reg *reg = emit_sampleid_setup();
1309       dest.type = reg->type;
1310       emit(MOV(dest, *reg));
1311       break;
1312    }
1313
1314    case nir_intrinsic_load_uniform_vec1:
1315    case nir_intrinsic_load_uniform_vec2:
1316    case nir_intrinsic_load_uniform_vec3:
1317    case nir_intrinsic_load_uniform_vec4: {
1318       unsigned index = 0;
1319       for (int i = 0; i < instr->const_index[1]; i++) {
1320          for (unsigned j = 0;
1321             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1322             fs_reg src = nir_uniforms;
1323             src.reg_offset = instr->const_index[0] + index;
1324             src.type = dest.type;
1325             index++;
1326
1327             fs_inst *inst = MOV(dest, src);
1328             if (instr->has_predicate)
1329                inst->predicate = BRW_PREDICATE_NORMAL;
1330             emit(inst);
1331             dest.reg_offset++;
1332          }
1333       }
1334       break;
1335    }
1336
1337    case nir_intrinsic_load_uniform_vec1_indirect:
1338    case nir_intrinsic_load_uniform_vec2_indirect:
1339    case nir_intrinsic_load_uniform_vec3_indirect:
1340    case nir_intrinsic_load_uniform_vec4_indirect: {
1341       unsigned index = 0;
1342       for (int i = 0; i < instr->const_index[1]; i++) {
1343          for (unsigned j = 0;
1344             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1345             fs_reg src = nir_uniforms;
1346             src.reg_offset = instr->const_index[0] + index;
1347             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1348             src.reladdr->type = BRW_REGISTER_TYPE_D;
1349             src.type = dest.type;
1350             index++;
1351
1352             fs_inst *inst = MOV(dest, src);
1353             if (instr->has_predicate)
1354                inst->predicate = BRW_PREDICATE_NORMAL;
1355             emit(inst);
1356             dest.reg_offset++;
1357          }
1358       }
1359       break;
1360    }
1361
1362    case nir_intrinsic_load_ubo_vec1:
1363    case nir_intrinsic_load_ubo_vec2:
1364    case nir_intrinsic_load_ubo_vec3:
1365    case nir_intrinsic_load_ubo_vec4: {
1366       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1367                                  (unsigned) instr->const_index[0]);
1368       fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
1369       packed_consts.type = dest.type;
1370
1371       fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
1372       emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
1373                                 packed_consts, surf_index, const_offset_reg));
1374
1375       for (unsigned i = 0;
1376            i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1377          packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
1378
1379          /* The std140 packing rules don't allow vectors to cross 16-byte
1380           * boundaries, and a reg is 32 bytes.
1381           */
1382          assert(packed_consts.subreg_offset < 32);
1383
1384          fs_inst *inst = MOV(dest, packed_consts);
1385          if (instr->has_predicate)
1386                inst->predicate = BRW_PREDICATE_NORMAL;
1387          emit(inst);
1388
1389          dest.reg_offset++;
1390       }
1391       break;
1392    }
1393
1394    case nir_intrinsic_load_ubo_vec1_indirect:
1395    case nir_intrinsic_load_ubo_vec2_indirect:
1396    case nir_intrinsic_load_ubo_vec3_indirect:
1397    case nir_intrinsic_load_ubo_vec4_indirect: {
1398       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1399                                  instr->const_index[0]);
1400       /* Turn the byte offset into a dword offset. */
1401       unsigned base_offset = instr->const_index[1] / 4;
1402       fs_reg offset = fs_reg(this, glsl_type::int_type);
1403       emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
1404                fs_reg(2)));
1405
1406       for (unsigned i = 0;
1407            i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1408          exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
1409                                                      offset, base_offset + i);
1410          fs_inst *last_inst = (fs_inst *) list.get_tail();
1411          if (instr->has_predicate)
1412                last_inst->predicate = BRW_PREDICATE_NORMAL;
1413          emit(list);
1414
1415          dest.reg_offset++;
1416       }
1417       break;
1418    }
1419
1420    case nir_intrinsic_load_input_vec1:
1421    case nir_intrinsic_load_input_vec2:
1422    case nir_intrinsic_load_input_vec3:
1423    case nir_intrinsic_load_input_vec4: {
1424       unsigned index = 0;
1425       for (int i = 0; i < instr->const_index[1]; i++) {
1426          for (unsigned j = 0;
1427             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1428             fs_reg src = nir_inputs;
1429             src.reg_offset = instr->const_index[0] + index;
1430             src.type = dest.type;
1431             index++;
1432
1433             fs_inst *inst = MOV(dest, src);
1434             if (instr->has_predicate)
1435                inst->predicate = BRW_PREDICATE_NORMAL;
1436             emit(inst);
1437             dest.reg_offset++;
1438          }
1439       }
1440       break;
1441    }
1442
1443    case nir_intrinsic_load_input_vec1_indirect:
1444    case nir_intrinsic_load_input_vec2_indirect:
1445    case nir_intrinsic_load_input_vec3_indirect:
1446    case nir_intrinsic_load_input_vec4_indirect: {
1447       unsigned index = 0;
1448       for (int i = 0; i < instr->const_index[1]; i++) {
1449          for (unsigned j = 0;
1450             j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1451             fs_reg src = nir_inputs;
1452             src.reg_offset = instr->const_index[0] + index;
1453             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1454             src.reladdr->type = BRW_REGISTER_TYPE_D;
1455             src.type = dest.type;
1456             index++;
1457
1458             fs_inst *inst = MOV(dest, src);
1459             if (instr->has_predicate)
1460                inst->predicate = BRW_PREDICATE_NORMAL;
1461             emit(inst);
1462             dest.reg_offset++;
1463          }
1464       }
1465       break;
1466    }
1467
1468    case nir_intrinsic_store_output_vec1:
1469    case nir_intrinsic_store_output_vec2:
1470    case nir_intrinsic_store_output_vec3:
1471    case nir_intrinsic_store_output_vec4: {
1472       fs_reg src = get_nir_src(instr->src[0]);
1473       unsigned index = 0;
1474       for (int i = 0; i < instr->const_index[1]; i++) {
1475          for (unsigned j = 0;
1476             j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1477             fs_reg new_dest = nir_outputs;
1478             new_dest.reg_offset = instr->const_index[0] + index;
1479             new_dest.type = src.type;
1480             index++;
1481             fs_inst *inst = MOV(new_dest, src);
1482             if (instr->has_predicate)
1483                inst->predicate = BRW_PREDICATE_NORMAL;
1484             emit(inst);
1485             src.reg_offset++;
1486          }
1487       }
1488       break;
1489    }
1490
1491    case nir_intrinsic_store_output_vec1_indirect:
1492    case nir_intrinsic_store_output_vec2_indirect:
1493    case nir_intrinsic_store_output_vec3_indirect:
1494    case nir_intrinsic_store_output_vec4_indirect: {
1495       fs_reg src = get_nir_src(instr->src[0]);
1496       fs_reg indirect = get_nir_src(instr->src[1]);
1497       unsigned index = 0;
1498       for (int i = 0; i < instr->const_index[1]; i++) {
1499          for (unsigned j = 0;
1500             j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1501             fs_reg new_dest = nir_outputs;
1502             new_dest.reg_offset = instr->const_index[0] + index;
1503             new_dest.reladdr = new(mem_ctx) fs_reg(indirect);
1504             new_dest.type = src.type;
1505             index++;
1506             fs_inst *inst = MOV(new_dest, src);
1507             if (instr->has_predicate)
1508                inst->predicate = BRW_PREDICATE_NORMAL;
1509             emit(MOV(new_dest, src));
1510             src.reg_offset++;
1511          }
1512       }
1513       break;
1514    }
1515
1516    default:
1517       unreachable("unknown intrinsic");
1518    }
1519 }
1520
1521 void
1522 fs_visitor::nir_emit_texture(nir_tex_instr *instr)
1523 {
1524    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
1525    unsigned sampler = instr->sampler_index;
1526
1527    /* FINISHME: We're failing to recompile our programs when the sampler is
1528     * updated.  This only matters for the texture rectangle scale parameters
1529     * (pre-gen6, or gen6+ with GL_CLAMP).
1530     */
1531    int texunit = prog->SamplerUnits[sampler];
1532
1533    int gather_component = instr->component;
1534
1535    bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
1536
1537    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
1538                         instr->is_array;
1539
1540    int lod_components, offset_components = 0;
1541
1542    fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, offset;
1543
1544    for (unsigned i = 0; i < instr->num_srcs; i++) {
1545       fs_reg src = get_nir_src(instr->src[i]);
1546       switch (instr->src_type[i]) {
1547       case nir_tex_src_bias:
1548          lod = retype(src, BRW_REGISTER_TYPE_F);
1549          break;
1550       case nir_tex_src_comparitor:
1551          shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
1552          break;
1553       case nir_tex_src_coord:
1554          switch (instr->op) {
1555          case nir_texop_txf:
1556          case nir_texop_txf_ms:
1557             coordinate = retype(src, BRW_REGISTER_TYPE_D);
1558             break;
1559          default:
1560             coordinate = retype(src, BRW_REGISTER_TYPE_F);
1561             break;
1562          }
1563          break;
1564       case nir_tex_src_ddx:
1565          lod = retype(src, BRW_REGISTER_TYPE_F);
1566          lod_components = nir_tex_instr_src_size(instr, i);
1567          break;
1568       case nir_tex_src_ddy:
1569          lod2 = retype(src, BRW_REGISTER_TYPE_F);
1570          break;
1571       case nir_tex_src_lod:
1572          switch (instr->op) {
1573          case nir_texop_txs:
1574             lod = retype(src, BRW_REGISTER_TYPE_UD);
1575             break;
1576          case nir_texop_txf:
1577             lod = retype(src, BRW_REGISTER_TYPE_D);
1578             break;
1579          default:
1580             lod = retype(src, BRW_REGISTER_TYPE_F);
1581             break;
1582          }
1583          break;
1584       case nir_tex_src_ms_index:
1585          sample_index = retype(src, BRW_REGISTER_TYPE_UD);
1586          break;
1587       case nir_tex_src_offset:
1588          offset = retype(src, BRW_REGISTER_TYPE_D);
1589          if (instr->is_array)
1590             offset_components = instr->coord_components - 1;
1591          else
1592             offset_components = instr->coord_components;
1593          break;
1594       case nir_tex_src_projector:
1595          unreachable("should be lowered");
1596       case nir_tex_src_sampler_index:
1597          unreachable("not yet supported");
1598       default:
1599          unreachable("unknown texture source");
1600       }
1601    }
1602
1603    if (instr->op == nir_texop_txf_ms) {
1604       if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
1605          mcs = emit_mcs_fetch(coordinate, instr->coord_components, fs_reg(sampler));
1606       else
1607          mcs = fs_reg(0u);
1608    }
1609
1610    for (unsigned i = 0; i < 3; i++) {
1611       if (instr->const_offset[i] != 0) {
1612          assert(offset_components == 0);
1613          offset = fs_reg(brw_texture_offset(ctx, instr->const_offset, 3));
1614          break;
1615       }
1616    }
1617
1618    enum glsl_base_type dest_base_type;
1619    switch (instr->dest_type) {
1620    case nir_type_float:
1621       dest_base_type = GLSL_TYPE_FLOAT;
1622       break;
1623    case nir_type_int:
1624       dest_base_type = GLSL_TYPE_INT;
1625       break;
1626    case nir_type_unsigned:
1627       dest_base_type = GLSL_TYPE_UINT;
1628       break;
1629    default:
1630       unreachable("bad type");
1631    }
1632
1633    const glsl_type *dest_type =
1634       glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
1635                               1);
1636
1637    ir_texture_opcode op;
1638    switch (instr->op) {
1639    case nir_texop_lod: op = ir_lod; break;
1640    case nir_texop_query_levels: op = ir_query_levels; break;
1641    case nir_texop_tex: op = ir_tex; break;
1642    case nir_texop_tg4: op = ir_tg4; break;
1643    case nir_texop_txb: op = ir_txb; break;
1644    case nir_texop_txd: op = ir_txd; break;
1645    case nir_texop_txf: op = ir_txf; break;
1646    case nir_texop_txf_ms: op = ir_txf_ms; break;
1647    case nir_texop_txl: op = ir_txl; break;
1648    case nir_texop_txs: op = ir_txs; break;
1649    default:
1650       unreachable("unknown texture opcode");
1651    }
1652
1653    emit_texture(op, dest_type, coordinate, instr->coord_components,
1654                 shadow_comparitor, lod, lod2, lod_components, sample_index,
1655                 offset, offset_components, mcs, gather_component,
1656                 is_cube_array, is_rect, sampler, fs_reg(sampler), texunit);
1657
1658    fs_reg dest = get_nir_dest(instr->dest);
1659    dest.type = this->result.type;
1660    unsigned num_components = nir_tex_instr_dest_size(instr);
1661    emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
1662 }
1663
1664 void
1665 fs_visitor::nir_emit_load_const(nir_load_const_instr *instr)
1666 {
1667    /* Bail on SSA constant loads.  These are used for immediates. */
1668    if (instr->dest.is_ssa)
1669       return;
1670
1671    fs_reg dest = get_nir_dest(instr->dest);
1672    dest.type = BRW_REGISTER_TYPE_UD;
1673    if (instr->array_elems == 0) {
1674       for (unsigned i = 0; i < instr->num_components; i++) {
1675          emit(MOV(dest, fs_reg(instr->value.u[i])));
1676          dest.reg_offset++;
1677       }
1678    } else {
1679       for (unsigned i = 0; i < instr->array_elems; i++) {
1680          for (unsigned j = 0; j < instr->num_components; j++) {
1681             emit(MOV(dest, fs_reg(instr->array[i].u[j])));
1682             dest.reg_offset++;
1683          }
1684       }
1685    }
1686 }
1687
1688 void
1689 fs_visitor::nir_emit_jump(nir_jump_instr *instr)
1690 {
1691    switch (instr->type) {
1692    case nir_jump_break:
1693       emit(BRW_OPCODE_BREAK);
1694       break;
1695    case nir_jump_continue:
1696       emit(BRW_OPCODE_CONTINUE);
1697       break;
1698    case nir_jump_return:
1699    default:
1700       unreachable("unknown jump");
1701    }
1702 }