src/mesa/drivers/dri/i965/brw_nir.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_nir.h"
  25 #include "brw_shader.h"
  26 #include "glsl/nir/glsl_to_nir.h"
  27 #include "glsl/nir/nir_builder.h"
  28 #include "program/prog_to_nir.h"
  29
  30 static bool
  31 is_input(nir_intrinsic_instr *intrin)
  32 {
  33    return intrin->intrinsic == nir_intrinsic_load_input ||
  34           intrin->intrinsic == nir_intrinsic_load_per_vertex_input;
  35 }
  36
  37 static bool
  38 is_output(nir_intrinsic_instr *intrin)
  39 {
  40    return intrin->intrinsic == nir_intrinsic_load_output ||
  41           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
  42           intrin->intrinsic == nir_intrinsic_store_output ||
  43           intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
  44 }
  45
  46 /**
  47  * In many cases, we just add the base and offset together, so there's no
  48  * reason to keep them separate.  Sometimes, combining them is essential:
  49  * if a shader only accesses part of a compound variable (such as a matrix
  50  * or array), the variable's base may not actually exist in the VUE map.
  51  *
  52  * This pass adds constant offsets to instr->const_index[0], and resets
  53  * the offset source to 0.  Non-constant offsets remain unchanged - since
  54  * we don't know what part of a compound variable is accessed, we allocate
  55  * storage for the entire thing.
  56  */
  57 struct add_const_offset_to_base_params {
  58    nir_builder b;
  59    nir_variable_mode mode;
  60 };
  61
  62 static bool
  63 add_const_offset_to_base(nir_block *block, void *closure)
  64 {
  65    struct add_const_offset_to_base_params *params = closure;
  66    nir_builder *b = &params->b;
  67
  68    nir_foreach_instr_safe(block, instr) {
  69       if (instr->type != nir_instr_type_intrinsic)
  70          continue;
  71
  72       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  73
  74       if ((params->mode == nir_var_shader_in && is_input(intrin)) ||
  75           (params->mode == nir_var_shader_out && is_output(intrin))) {
  76          nir_src *offset = nir_get_io_offset_src(intrin);
  77          nir_const_value *const_offset = nir_src_as_const_value(*offset);
  78
  79          if (const_offset) {
  80             intrin->const_index[0] += const_offset->u[0];
  81             b->cursor = nir_before_instr(&intrin->instr);
  82             nir_instr_rewrite_src(&intrin->instr, offset,
  83                                   nir_src_for_ssa(nir_imm_int(b, 0)));
  84          }
  85       }
  86    }
  87    return true;
  88
  89 }
  90
  91 static bool
  92 remap_vs_attrs(nir_block *block, void *closure)
  93 {
  94    GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
  95
  96    nir_foreach_instr(block, instr) {
  97       if (instr->type != nir_instr_type_intrinsic)
  98          continue;
  99
 100       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 101
 102       if (intrin->intrinsic == nir_intrinsic_load_input) {
 103          /* Attributes come in a contiguous block, ordered by their
 104           * gl_vert_attrib value.  That means we can compute the slot
 105           * number for an attribute by masking out the enabled attributes
 106           * before it and counting the bits.
 107           */
 108          int attr = intrin->const_index[0];
 109          int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
 110
 111          intrin->const_index[0] = 4 * slot;
 112       }
 113    }
 114    return true;
 115 }
 116
 117 static bool
 118 remap_inputs_with_vue_map(nir_block *block, void *closure)
 119 {
 120    const struct brw_vue_map *vue_map = closure;
 121
 122    nir_foreach_instr(block, instr) {
 123       if (instr->type != nir_instr_type_intrinsic)
 124          continue;
 125
 126       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 127
 128       if (intrin->intrinsic == nir_intrinsic_load_input ||
 129           intrin->intrinsic == nir_intrinsic_load_per_vertex_input) {
 130          int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
 131          assert(vue_slot != -1);
 132          intrin->const_index[0] = vue_slot;
 133       }
 134    }
 135    return true;
 136 }
 137
 138 struct remap_patch_urb_offsets_state {
 139    nir_builder b;
 140    struct brw_vue_map vue_map;
 141 };
 142
 143 static bool
 144 remap_patch_urb_offsets(nir_block *block, void *closure)
 145 {
 146    struct remap_patch_urb_offsets_state *state = closure;
 147
 148    nir_foreach_instr_safe(block, instr) {
 149       if (instr->type != nir_instr_type_intrinsic)
 150          continue;
 151
 152       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 153
 154       gl_shader_stage stage = state->b.shader->stage;
 155
 156       if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
 157           (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
 158          int vue_slot = state->vue_map.varying_to_slot[intrin->const_index[0]];
 159          assert(vue_slot != -1);
 160          intrin->const_index[0] = vue_slot;
 161
 162          nir_src *vertex = nir_get_io_vertex_index_src(intrin);
 163          if (vertex) {
 164             nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
 165             if (const_vertex) {
 166                intrin->const_index[0] += const_vertex->u[0] *
 167                                          state->vue_map.num_per_vertex_slots;
 168             } else {
 169                state->b.cursor = nir_before_instr(&intrin->instr);
 170
 171                /* Multiply by the number of per-vertex slots. */
 172                nir_ssa_def *vertex_offset =
 173                   nir_imul(&state->b,
 174                            nir_ssa_for_src(&state->b, *vertex, 1),
 175                            nir_imm_int(&state->b,
 176                                        state->vue_map.num_per_vertex_slots));
 177
 178                /* Add it to the existing offset */
 179                nir_src *offset = nir_get_io_offset_src(intrin);
 180                nir_ssa_def *total_offset =
 181                   nir_iadd(&state->b, vertex_offset,
 182                            nir_ssa_for_src(&state->b, *offset, 1));
 183
 184                nir_instr_rewrite_src(&intrin->instr, offset,
 185                                      nir_src_for_ssa(total_offset));
 186             }
 187          }
 188       }
 189    }
 190    return true;
 191 }
 192
 193 static void
 194 brw_nir_lower_inputs(nir_shader *nir,
 195                      const struct brw_device_info *devinfo,
 196                      bool is_scalar)
 197 {
 198    struct add_const_offset_to_base_params params = {
 199       .mode = nir_var_shader_in
 200    };
 201
 202    switch (nir->stage) {
 203    case MESA_SHADER_VERTEX:
 204       /* Start with the location of the variable's base. */
 205       foreach_list_typed(nir_variable, var, node, &nir->inputs) {
 206          var->data.driver_location = var->data.location;
 207       }
 208
 209       /* Now use nir_lower_io to walk dereference chains.  Attribute arrays
 210        * are loaded as one vec4 per element (or matrix column), so we use
 211        * type_size_vec4 here.
 212        */
 213       nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
 214
 215       if (is_scalar) {
 216          /* Finally, translate VERT_ATTRIB_* values into the actual registers.
 217           *
 218           * Note that we can use nir->info.inputs_read instead of
 219           * key->inputs_read since the two are identical aside from Gen4-5
 220           * edge flag differences.
 221           */
 222          GLbitfield64 inputs_read = nir->info.inputs_read;
 223
 224          /* This pass needs actual constants */
 225          nir_opt_constant_folding(nir);
 226
 227          nir_foreach_function(nir, function) {
 228             if (function->impl) {
 229                nir_builder_init(&params.b, function->impl);
 230                nir_foreach_block(function->impl, add_const_offset_to_base, &params);
 231                nir_foreach_block(function->impl, remap_vs_attrs, &inputs_read);
 232             }
 233          }
 234       }
 235       break;
 236    case MESA_SHADER_TESS_CTRL:
 237    case MESA_SHADER_GEOMETRY: {
 238       if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) {
 239          foreach_list_typed(nir_variable, var, node, &nir->inputs) {
 240             var->data.driver_location = var->data.location;
 241          }
 242       } else {
 243          /* The GLSL linker will have already matched up GS inputs and
 244           * the outputs of prior stages.  The driver does extend VS outputs
 245           * in some cases, but only for legacy OpenGL or Gen4-5 hardware,
 246           * neither of which offer geometry shader support.  So we can
 247           * safely ignore that.
 248           *
 249           * For SSO pipelines, we use a fixed VUE map layout based on variable
 250           * locations, so we can rely on rendezvous-by-location to make this
 251           * work.
 252           *
 253           * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
 254           * written by previous stages and shows up via payload magic.
 255           */
 256          struct brw_vue_map input_vue_map;
 257          GLbitfield64 inputs_read =
 258             nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
 259          brw_compute_vue_map(devinfo, &input_vue_map, inputs_read,
 260                              nir->info.separate_shader ||
 261                              nir->stage == MESA_SHADER_TESS_CTRL);
 262
 263          foreach_list_typed(nir_variable, var, node, &nir->inputs) {
 264             var->data.driver_location = var->data.location;
 265          }
 266
 267          /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
 268          nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
 269
 270          /* This pass needs actual constants */
 271          nir_opt_constant_folding(nir);
 272
 273          nir_foreach_function(nir, function) {
 274             if (function->impl) {
 275                nir_builder_init(&params.b, function->impl);
 276                nir_foreach_block(function->impl, add_const_offset_to_base, &params);
 277                nir_foreach_block(function->impl, remap_inputs_with_vue_map,
 278                                  &input_vue_map);
 279             }
 280          }
 281       }
 282       break;
 283    }
 284    case MESA_SHADER_TESS_EVAL: {
 285       struct remap_patch_urb_offsets_state state;
 286       brw_compute_tess_vue_map(&state.vue_map,
 287                                nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
 288                                nir->info.patch_inputs_read);
 289
 290       foreach_list_typed(nir_variable, var, node, &nir->inputs) {
 291          var->data.driver_location = var->data.location;
 292       }
 293
 294       nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
 295
 296       /* This pass needs actual constants */
 297       nir_opt_constant_folding(nir);
 298
 299       nir_foreach_function(nir, function) {
 300          if (function->impl) {
 301             nir_builder_init(&params.b, function->impl);
 302             nir_foreach_block(function->impl, add_const_offset_to_base, &params);
 303             nir_builder_init(&state.b, function->impl);
 304             nir_foreach_block(function->impl, remap_patch_urb_offsets, &state);
 305          }
 306       }
 307       break;
 308    }
 309    case MESA_SHADER_FRAGMENT:
 310       assert(is_scalar);
 311       nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
 312                                type_size_scalar);
 313       break;
 314    case MESA_SHADER_COMPUTE:
 315       /* Compute shaders have no inputs. */
 316       assert(exec_list_is_empty(&nir->inputs));
 317       break;
 318    default:
 319       unreachable("unsupported shader stage");
 320    }
 321 }
 322
 323 static void
 324 brw_nir_lower_outputs(nir_shader *nir,
 325                       const struct brw_device_info *devinfo,
 326                       bool is_scalar)
 327 {
 328    switch (nir->stage) {
 329    case MESA_SHADER_VERTEX:
 330    case MESA_SHADER_TESS_EVAL:
 331    case MESA_SHADER_GEOMETRY:
 332       if (is_scalar) {
 333          nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
 334                                   type_size_vec4_times_4);
 335          nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
 336       } else {
 337          nir_foreach_variable(var, &nir->outputs)
 338             var->data.driver_location = var->data.location;
 339       }
 340       break;
 341    case MESA_SHADER_TESS_CTRL: {
 342       struct add_const_offset_to_base_params params = {
 343          .mode = nir_var_shader_out
 344       };
 345
 346       struct remap_patch_urb_offsets_state state;
 347       brw_compute_tess_vue_map(&state.vue_map, nir->info.outputs_written,
 348                                nir->info.patch_outputs_written);
 349
 350       nir_foreach_variable(var, &nir->outputs) {
 351          var->data.driver_location = var->data.location;
 352       }
 353
 354       nir_lower_io(nir, nir_var_shader_out, type_size_vec4);
 355
 356       /* This pass needs actual constants */
 357       nir_opt_constant_folding(nir);
 358
 359       nir_foreach_function(nir, function) {
 360          if (function->impl) {
 361             nir_builder_init(&params.b, function->impl);
 362             nir_foreach_block(function->impl, add_const_offset_to_base, &params);
 363             nir_builder_init(&state.b, function->impl);
 364             nir_foreach_block(function->impl, remap_patch_urb_offsets, &state);
 365          }
 366       }
 367       break;
 368    }
 369    case MESA_SHADER_FRAGMENT:
 370       nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
 371                                type_size_scalar);
 372       break;
 373    case MESA_SHADER_COMPUTE:
 374       /* Compute shaders have no outputs. */
 375       assert(exec_list_is_empty(&nir->outputs));
 376       break;
 377    default:
 378       unreachable("unsupported shader stage");
 379    }
 380 }
 381
 382 static int
 383 type_size_scalar_bytes(const struct glsl_type *type)
 384 {
 385    return type_size_scalar(type) * 4;
 386 }
 387
 388 static int
 389 type_size_vec4_bytes(const struct glsl_type *type)
 390 {
 391    return type_size_vec4(type) * 16;
 392 }
 393
 394 static void
 395 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
 396 {
 397    if (is_scalar) {
 398       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
 399                                type_size_scalar_bytes);
 400       nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes);
 401    } else {
 402       nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
 403                                type_size_vec4_bytes);
 404       nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes);
 405    }
 406 }
 407
 408 #include "util/debug.h"
 409
 410 static bool
 411 should_clone_nir()
 412 {
 413    static int should_clone = -1;
 414    if (should_clone < 1)
 415       should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
 416
 417    return should_clone;
 418 }
 419
 420 #define _OPT(do_pass) (({                                            \
 421    bool this_progress = true;                                        \
 422    do_pass                                                           \
 423    nir_validate_shader(nir);                                         \
 424    if (should_clone_nir()) {                                         \
 425       nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
 426       ralloc_free(nir);                                              \
 427       nir = clone;                                                   \
 428    }                                                                 \
 429    this_progress;                                                    \
 430 }))
 431
 432 #define OPT(pass, ...) _OPT(                   \
 433    nir_metadata_set_validation_flag(nir);      \
 434    this_progress = pass(nir ,##__VA_ARGS__);   \
 435    if (this_progress) {                        \
 436       progress = true;                         \
 437       nir_metadata_check_validation_flag(nir); \
 438    }                                           \
 439 )
 440
 441 #define OPT_V(pass, ...) _OPT( \
 442    pass(nir, ##__VA_ARGS__);   \
 443 )
 444
 445 static nir_shader *
 446 nir_optimize(nir_shader *nir, bool is_scalar)
 447 {
 448    bool progress;
 449    do {
 450       progress = false;
 451       OPT_V(nir_lower_vars_to_ssa);
 452
 453       if (is_scalar) {
 454          OPT_V(nir_lower_alu_to_scalar);
 455       }
 456
 457       OPT(nir_copy_prop);
 458
 459       if (is_scalar) {
 460          OPT_V(nir_lower_phis_to_scalar);
 461       }
 462
 463       OPT(nir_copy_prop);
 464       OPT(nir_opt_dce);
 465       OPT(nir_opt_cse);
 466       OPT(nir_opt_peephole_select);
 467       OPT(nir_opt_algebraic);
 468       OPT(nir_opt_constant_folding);
 469       OPT(nir_opt_dead_cf);
 470       OPT(nir_opt_remove_phis);
 471       OPT(nir_opt_undef);
 472    } while (progress);
 473
 474    return nir;
 475 }
 476
 477 /* Does some simple lowering and runs the standard suite of optimizations
 478  *
 479  * This is intended to be called more-or-less directly after you get the
 480  * shader out of GLSL or some other source.  While it is geared towards i965,
 481  * it is not at all generator-specific except for the is_scalar flag.  Even
 482  * there, it is safe to call with is_scalar = false for a shader that is
 483  * intended for the FS backend as long as nir_optimize is called again with
 484  * is_scalar = true to scalarize everything prior to code gen.
 485  */
 486 nir_shader *
 487 brw_preprocess_nir(nir_shader *nir, bool is_scalar)
 488 {
 489    bool progress; /* Written by OPT and OPT_V */
 490    (void)progress;
 491
 492    if (nir->stage == MESA_SHADER_GEOMETRY)
 493       OPT(nir_lower_gs_intrinsics);
 494
 495    static const nir_lower_tex_options tex_options = {
 496       .lower_txp = ~0,
 497    };
 498
 499    OPT(nir_lower_tex, &tex_options);
 500    OPT(nir_normalize_cubemap_coords);
 501
 502    OPT(nir_lower_global_vars_to_local);
 503
 504    OPT(nir_split_var_copies);
 505
 506    nir = nir_optimize(nir, is_scalar);
 507
 508    /* Lower a bunch of stuff */
 509    OPT_V(nir_lower_var_copies);
 510
 511    /* Get rid of split copies */
 512    nir = nir_optimize(nir, is_scalar);
 513
 514    OPT(nir_remove_dead_variables);
 515
 516    return nir;
 517 }
 518
 519 /** Lower input and output loads and stores for i965. */
 520 nir_shader *
 521 brw_nir_lower_io(nir_shader *nir,
 522                  const struct brw_device_info *devinfo,
 523                  bool is_scalar)
 524 {
 525    bool progress; /* Written by OPT and OPT_V */
 526    (void)progress;
 527
 528    OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
 529    OPT_V(brw_nir_lower_outputs, devinfo, is_scalar);
 530    OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
 531
 532    return nir_optimize(nir, is_scalar);
 533 }
 534
 535 /* Prepare the given shader for codegen
 536  *
 537  * This function is intended to be called right before going into the actual
 538  * backend and is highly backend-specific.  Also, once this function has been
 539  * called on a shader, it will no longer be in SSA form so most optimizations
 540  * will not work.
 541  */
 542 nir_shader *
 543 brw_postprocess_nir(nir_shader *nir,
 544                     const struct brw_device_info *devinfo,
 545                     bool is_scalar)
 546 {
 547    bool debug_enabled =
 548       (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
 549
 550    bool progress; /* Written by OPT and OPT_V */
 551    (void)progress;
 552
 553    if (devinfo->gen >= 6) {
 554       /* Try and fuse multiply-adds */
 555       OPT(brw_nir_opt_peephole_ffma);
 556    }
 557
 558    OPT(nir_opt_algebraic_late);
 559
 560    OPT(nir_lower_locals_to_regs);
 561
 562    OPT_V(nir_lower_to_source_mods);
 563    OPT(nir_copy_prop);
 564    OPT(nir_opt_dce);
 565
 566    if (unlikely(debug_enabled)) {
 567       /* Re-index SSA defs so we print more sensible numbers. */
 568       nir_foreach_function(nir, function) {
 569          if (function->impl)
 570             nir_index_ssa_defs(function->impl);
 571       }
 572
 573       fprintf(stderr, "NIR (SSA form) for %s shader:\n",
 574               _mesa_shader_stage_to_string(nir->stage));
 575       nir_print_shader(nir, stderr);
 576    }
 577
 578    OPT_V(nir_convert_from_ssa, true);
 579
 580    if (!is_scalar) {
 581       OPT_V(nir_move_vec_src_uses_to_dest);
 582       OPT(nir_lower_vec_to_movs);
 583    }
 584
 585    /* This is the last pass we run before we start emitting stuff.  It
 586     * determines when we need to insert boolean resolves on Gen <= 5.  We
 587     * run it last because it stashes data in instr->pass_flags and we don't
 588     * want that to be squashed by other NIR passes.
 589     */
 590    if (devinfo->gen <= 5)
 591       brw_nir_analyze_boolean_resolves(nir);
 592
 593    nir_sweep(nir);
 594
 595    if (unlikely(debug_enabled)) {
 596       fprintf(stderr, "NIR (final form) for %s shader:\n",
 597               _mesa_shader_stage_to_string(nir->stage));
 598       nir_print_shader(nir, stderr);
 599    }
 600
 601    return nir;
 602 }
 603
 604 nir_shader *
 605 brw_create_nir(struct brw_context *brw,
 606                const struct gl_shader_program *shader_prog,
 607                const struct gl_program *prog,
 608                gl_shader_stage stage,
 609                bool is_scalar)
 610 {
 611    struct gl_context *ctx = &brw->ctx;
 612    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
 613    const nir_shader_compiler_options *options =
 614       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
 615    bool progress;
 616    nir_shader *nir;
 617
 618    /* First, lower the GLSL IR or Mesa IR to NIR */
 619    if (shader_prog) {
 620       nir = glsl_to_nir(shader_prog, stage, options);
 621    } else {
 622       nir = prog_to_nir(prog, options);
 623       OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
 624    }
 625    nir_validate_shader(nir);
 626
 627    (void)progress;
 628
 629    nir = brw_preprocess_nir(nir, is_scalar);
 630
 631    OPT(nir_lower_system_values);
 632    OPT_V(brw_nir_lower_uniforms, is_scalar);
 633
 634    if (shader_prog) {
 635       OPT_V(nir_lower_samplers, shader_prog);
 636       OPT_V(nir_lower_atomics, shader_prog);
 637    }
 638
 639    if (nir->stage != MESA_SHADER_TESS_CTRL &&
 640        nir->stage != MESA_SHADER_TESS_EVAL) {
 641       nir = brw_nir_lower_io(nir, devinfo, is_scalar);
 642    }
 643
 644    return nir;
 645 }
 646
 647 nir_shader *
 648 brw_nir_apply_sampler_key(nir_shader *nir,
 649                           const struct brw_device_info *devinfo,
 650                           const struct brw_sampler_prog_key_data *key_tex,
 651                           bool is_scalar)
 652 {
 653    nir_lower_tex_options tex_options = { 0 };
 654
 655    /* Iron Lake and prior require lowering of all rectangle textures */
 656    if (devinfo->gen < 6)
 657       tex_options.lower_rect = true;
 658
 659    /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */
 660    if (devinfo->gen < 8) {
 661       tex_options.saturate_s = key_tex->gl_clamp_mask[0];
 662       tex_options.saturate_t = key_tex->gl_clamp_mask[1];
 663       tex_options.saturate_r = key_tex->gl_clamp_mask[2];
 664    }
 665
 666    /* Prior to Haswell, we have to fake texture swizzle */
 667    for (unsigned s = 0; s < MAX_SAMPLERS; s++) {
 668       if (key_tex->swizzles[s] == SWIZZLE_NOOP)
 669          continue;
 670
 671       tex_options.swizzle_result |= (1 << s);
 672       for (unsigned c = 0; c < 4; c++)
 673          tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
 674    }
 675
 676    if (nir_lower_tex(nir, &tex_options)) {
 677       nir_validate_shader(nir);
 678       nir = nir_optimize(nir, is_scalar);
 679    }
 680
 681    return nir;
 682 }
 683
 684 enum brw_reg_type
 685 brw_type_for_nir_type(nir_alu_type type)
 686 {
 687    switch (type) {
 688    case nir_type_uint:
 689       return BRW_REGISTER_TYPE_UD;
 690    case nir_type_bool:
 691    case nir_type_int:
 692       return BRW_REGISTER_TYPE_D;
 693    case nir_type_float:
 694       return BRW_REGISTER_TYPE_F;
 695    default:
 696       unreachable("unknown type");
 697    }
 698
 699    return BRW_REGISTER_TYPE_F;
 700 }
 701
 702 /* Returns the glsl_base_type corresponding to a nir_alu_type.
 703  * This is used by both brw_vec4_nir and brw_fs_nir.
 704  */
 705 enum glsl_base_type
 706 brw_glsl_base_type_for_nir_type(nir_alu_type type)
 707 {
 708    switch (type) {
 709    case nir_type_float:
 710       return GLSL_TYPE_FLOAT;
 711
 712    case nir_type_int:
 713       return GLSL_TYPE_INT;
 714
 715    case nir_type_uint:
 716       return GLSL_TYPE_UINT;
 717
 718    default:
 719       unreachable("bad type");
 720    }
 721 }