src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage)) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static void
  63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  64 {
  65    nir_foreach_function(function, shader) {
  66       if (!function->impl)
  67          continue;
  68
  69       nir_foreach_block(block, function->impl) {
  70          nir_foreach_instr(instr, block) {
  71             if (instr->type != nir_instr_type_intrinsic)
  72                continue;
  73
  74             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  75             if (intrin->intrinsic != nir_intrinsic_load_deref)
  76                continue;
  77
  78             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  79             if (deref->mode != nir_var_shader_out)
  80                continue;
  81
  82             nir_variable *var = nir_deref_instr_get_variable(deref);
  83             if (var->data.patch) {
  84                patches_read[var->data.location_frac] |=
  85                   get_variable_io_mask(var, shader->info.stage);
  86             } else {
  87                read[var->data.location_frac] |=
  88                   get_variable_io_mask(var, shader->info.stage);
  89             }
  90          }
  91       }
  92    }
  93 }
  94
  95 /**
  96  * Helper for removing unused shader I/O variables, by demoting them to global
  97  * variables (which may then by dead code eliminated).
  98  *
  99  * Example usage is:
 100  *
 101  * progress = nir_remove_unused_io_vars(producer,
 102  *                                      &producer->outputs,
 103  *                                      read, patches_read) ||
 104  *                                      progress;
 105  *
 106  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 107  * representing each .location_frac used.  Note that for vector variables,
 108  * only the first channel (.location_frac) is examined for deciding if the
 109  * variable is used!
 110  */
 111 bool
 112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
 113                           uint64_t *used_by_other_stage,
 114                           uint64_t *used_by_other_stage_patches)
 115 {
 116    bool progress = false;
 117    uint64_t *used;
 118
 119    nir_foreach_variable_safe(var, var_list) {
 120       if (var->data.patch)
 121          used = used_by_other_stage_patches;
 122       else
 123          used = used_by_other_stage;
 124
 125       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 126          continue;
 127
 128       if (var->data.always_active_io)
 129          continue;
 130
 131       uint64_t other_stage = used[var->data.location_frac];
 132
 133       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 134          /* This one is invalid, make it a global variable instead */
 135          var->data.location = 0;
 136          var->data.mode = nir_var_global;
 137
 138          exec_node_remove(&var->node);
 139          exec_list_push_tail(&shader->globals, &var->node);
 140
 141          progress = true;
 142       }
 143    }
 144
 145    if (progress)
 146       nir_fixup_deref_modes(shader);
 147
 148    return progress;
 149 }
 150
 151 bool
 152 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 153 {
 154    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 155    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 156
 157    uint64_t read[4] = { 0 }, written[4] = { 0 };
 158    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 159
 160    nir_foreach_variable(var, &producer->outputs) {
 161       if (var->data.patch) {
 162          patches_written[var->data.location_frac] |=
 163             get_variable_io_mask(var, producer->info.stage);
 164       } else {
 165          written[var->data.location_frac] |=
 166             get_variable_io_mask(var, producer->info.stage);
 167       }
 168    }
 169
 170    nir_foreach_variable(var, &consumer->inputs) {
 171       if (var->data.patch) {
 172          patches_read[var->data.location_frac] |=
 173             get_variable_io_mask(var, consumer->info.stage);
 174       } else {
 175          read[var->data.location_frac] |=
 176             get_variable_io_mask(var, consumer->info.stage);
 177       }
 178    }
 179
 180    /* Each TCS invocation can read data written by other TCS invocations,
 181     * so even if the outputs are not used by the TES we must also make
 182     * sure they are not read by the TCS before demoting them to globals.
 183     */
 184    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 185       tcs_add_output_reads(producer, read, patches_read);
 186
 187    bool progress = false;
 188    progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
 189                                         patches_read);
 190
 191    progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
 192                                         patches_written) || progress;
 193
 194    return progress;
 195 }
 196
 197 static uint8_t
 198 get_interp_type(nir_variable *var, const struct glsl_type *type,
 199                 bool default_to_smooth_interp)
 200 {
 201    if (glsl_type_is_integer(type))
 202       return INTERP_MODE_FLAT;
 203    else if (var->data.interpolation != INTERP_MODE_NONE)
 204       return var->data.interpolation;
 205    else if (default_to_smooth_interp)
 206       return INTERP_MODE_SMOOTH;
 207    else
 208       return INTERP_MODE_NONE;
 209 }
 210
 211 #define INTERPOLATE_LOC_SAMPLE 0
 212 #define INTERPOLATE_LOC_CENTROID 1
 213 #define INTERPOLATE_LOC_CENTER 2
 214
 215 static uint8_t
 216 get_interp_loc(nir_variable *var)
 217 {
 218    if (var->data.sample)
 219       return INTERPOLATE_LOC_SAMPLE;
 220    else if (var->data.centroid)
 221       return INTERPOLATE_LOC_CENTROID;
 222    else
 223       return INTERPOLATE_LOC_CENTER;
 224 }
 225
 226 static void
 227 get_slot_component_masks_and_interp_types(struct exec_list *var_list,
 228                                           uint8_t *comps,
 229                                           uint8_t *interp_type,
 230                                           uint8_t *interp_loc,
 231                                           gl_shader_stage stage,
 232                                           bool default_to_smooth_interp)
 233 {
 234    nir_foreach_variable_safe(var, var_list) {
 235       assert(var->data.location >= 0);
 236
 237       /* Only remap things that aren't built-ins.
 238        * TODO: add TES patch support.
 239        */
 240       if (var->data.location >= VARYING_SLOT_VAR0 &&
 241           var->data.location - VARYING_SLOT_VAR0 < 32) {
 242
 243          const struct glsl_type *type = var->type;
 244          if (nir_is_per_vertex_io(var, stage)) {
 245             assert(glsl_type_is_array(type));
 246             type = glsl_get_array_element(type);
 247          }
 248
 249          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 250          unsigned elements =
 251             glsl_get_vector_elements(glsl_without_array(type));
 252
 253          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 254          unsigned slots = glsl_count_attribute_slots(type, false);
 255          unsigned comps_slot2 = 0;
 256          for (unsigned i = 0; i < slots; i++) {
 257             interp_type[location + i] =
 258                get_interp_type(var, type, default_to_smooth_interp);
 259             interp_loc[location + i] = get_interp_loc(var);
 260
 261             if (dual_slot) {
 262                if (i & 1) {
 263                   comps[location + i] |= ((1 << comps_slot2) - 1);
 264                } else {
 265                   unsigned num_comps = 4 - var->data.location_frac;
 266                   comps_slot2 = (elements * 2) - num_comps;
 267
 268                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 269                   assert(var->data.location_frac == 0 ||
 270                          var->data.location_frac == 2);
 271                   assert(comps_slot2 <= 4);
 272
 273                   comps[location + i] |=
 274                      ((1 << num_comps) - 1) << var->data.location_frac;
 275                }
 276             } else {
 277                comps[location + i] |=
 278                   ((1 << elements) - 1) << var->data.location_frac;
 279             }
 280          }
 281       }
 282    }
 283 }
 284
 285 struct varying_loc
 286 {
 287    uint8_t component;
 288    uint32_t location;
 289 };
 290
 291 static void
 292 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 293                            struct varying_loc (*remap)[4],
 294                            uint64_t *slots_used, uint64_t *out_slots_read)
 295  {
 296    uint64_t out_slots_read_tmp = 0;
 297
 298    /* We don't touch builtins so just copy the bitmask */
 299    uint64_t slots_used_tmp =
 300       *slots_used & (((uint64_t)1 << (VARYING_SLOT_VAR0 - 1)) - 1);
 301
 302    nir_foreach_variable(var, var_list) {
 303       assert(var->data.location >= 0);
 304
 305       /* Only remap things that aren't built-ins */
 306       if (var->data.location >= VARYING_SLOT_VAR0 &&
 307           var->data.location - VARYING_SLOT_VAR0 < 32) {
 308          assert(var->data.location - VARYING_SLOT_VAR0 < 32);
 309
 310          const struct glsl_type *type = var->type;
 311          if (nir_is_per_vertex_io(var, stage)) {
 312             assert(glsl_type_is_array(type));
 313             type = glsl_get_array_element(type);
 314          }
 315
 316          unsigned num_slots = glsl_count_attribute_slots(type, false);
 317          bool used_across_stages = false;
 318          bool outputs_read = false;
 319
 320          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 321          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 322
 323          uint64_t slots = (((uint64_t)1 << num_slots) - 1) << var->data.location;
 324          if (slots & *slots_used)
 325             used_across_stages = true;
 326
 327          if (slots & *out_slots_read)
 328             outputs_read = true;
 329
 330          if (new_loc->location) {
 331             var->data.location = new_loc->location;
 332             var->data.location_frac = new_loc->component;
 333          }
 334
 335          if (var->data.always_active_io) {
 336             /* We can't apply link time optimisations (specifically array
 337              * splitting) to these so we need to copy the existing mask
 338              * otherwise we will mess up the mask for things like partially
 339              * marked arrays.
 340              */
 341             if (used_across_stages) {
 342                slots_used_tmp |=
 343                   *slots_used & (((uint64_t)1 << num_slots) - 1) << var->data.location;
 344             }
 345
 346             if (outputs_read) {
 347                out_slots_read_tmp |=
 348                   *out_slots_read & (((uint64_t)1 << num_slots) - 1) << var->data.location;
 349             }
 350
 351          } else {
 352             for (unsigned i = 0; i < num_slots; i++) {
 353                if (used_across_stages)
 354                   slots_used_tmp |= (uint64_t)1 << (var->data.location + i);
 355
 356                if (outputs_read)
 357                   out_slots_read_tmp |= (uint64_t)1 << (var->data.location + i);
 358             }
 359          }
 360       }
 361    }
 362
 363    *slots_used = slots_used_tmp;
 364    *out_slots_read = out_slots_read_tmp;
 365 }
 366
 367 /* If there are empty components in the slot compact the remaining components
 368  * as close to component 0 as possible. This will make it easier to fill the
 369  * empty components with components from a different slot in a following pass.
 370  */
 371 static void
 372 compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
 373                    uint8_t *interp_type, uint8_t *interp_loc,
 374                    bool default_to_smooth_interp)
 375 {
 376    struct exec_list *input_list = &consumer->inputs;
 377    struct exec_list *output_list = &producer->outputs;
 378    struct varying_loc remap[32][4] = {{{0}, {0}}};
 379
 380    /* Create a cursor for each interpolation type */
 381    unsigned cursor[4] = {0};
 382
 383    /* We only need to pass over one stage and we choose the consumer as it seems
 384     * to cause a larger reduction in instruction counts (tested on i965).
 385     */
 386    nir_foreach_variable(var, input_list) {
 387
 388       /* Only remap things that aren't builtins.
 389        * TODO: add TES patch support.
 390        */
 391       if (var->data.location >= VARYING_SLOT_VAR0 &&
 392           var->data.location - VARYING_SLOT_VAR0 < 32) {
 393
 394          /* We can't repack xfb varyings. */
 395          if (var->data.always_active_io)
 396             continue;
 397
 398          const struct glsl_type *type = var->type;
 399          if (nir_is_per_vertex_io(var, consumer->info.stage)) {
 400             assert(glsl_type_is_array(type));
 401             type = glsl_get_array_element(type);
 402          }
 403
 404          /* Skip types that require more complex packing handling.
 405           * TODO: add support for these types.
 406           */
 407          if (glsl_type_is_array(type) ||
 408              glsl_type_is_dual_slot(type) ||
 409              glsl_type_is_matrix(type) ||
 410              glsl_type_is_struct(type) ||
 411              glsl_type_is_64bit(type))
 412             continue;
 413
 414          /* We ignore complex types above and all other vector types should
 415           * have been split into scalar variables by the lower_io_to_scalar
 416           * pass. The only exception should by OpenGL xfb varyings.
 417           */
 418          if (glsl_get_vector_elements(type) != 1)
 419             continue;
 420
 421          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 422          uint8_t used_comps = comps[location];
 423
 424          /* If there are no empty components there is nothing more for us to do.
 425           */
 426          if (used_comps == 0xf)
 427             continue;
 428
 429          bool found_new_offset = false;
 430          uint8_t interp = get_interp_type(var, type, default_to_smooth_interp);
 431          for (; cursor[interp] < 32; cursor[interp]++) {
 432             uint8_t cursor_used_comps = comps[cursor[interp]];
 433
 434             /* We couldn't find anywhere to pack the varying continue on. */
 435             if (cursor[interp] == location &&
 436                 (var->data.location_frac == 0 ||
 437                  cursor_used_comps & ((1 << (var->data.location_frac)) - 1)))
 438                break;
 439
 440             /* We can only pack varyings with matching interpolation types */
 441             if (interp_type[cursor[interp]] != interp)
 442                continue;
 443
 444             /* Interpolation loc must match also.
 445              * TODO: i965 can handle these if they don't match, but the
 446              * radeonsi nir backend handles everything as vec4s and so expects
 447              * this to be the same for all components. We could make this
 448              * check driver specfific or drop it if NIR ever become the only
 449              * radeonsi backend.
 450              */
 451             if (interp_loc[cursor[interp]] != get_interp_loc(var))
 452                continue;
 453
 454             /* If the slot is empty just skip it for now, compact_var_list()
 455              * can be called after this function to remove empty slots for us.
 456              * TODO: finish implementing compact_var_list() requires array and
 457              * matrix splitting.
 458              */
 459             if (!cursor_used_comps)
 460                continue;
 461
 462             uint8_t unused_comps = ~cursor_used_comps;
 463
 464             for (unsigned i = 0; i < 4; i++) {
 465                uint8_t new_var_comps = 1 << i;
 466                if (unused_comps & new_var_comps) {
 467                   remap[location][var->data.location_frac].component = i;
 468                   remap[location][var->data.location_frac].location =
 469                      cursor[interp] + VARYING_SLOT_VAR0;
 470
 471                   found_new_offset = true;
 472
 473                   /* Turn off the mask for the component we are remapping */
 474                   if (comps[location] & 1 << var->data.location_frac) {
 475                      comps[location] ^= 1 << var->data.location_frac;
 476                      comps[cursor[interp]] |= new_var_comps;
 477                   }
 478                   break;
 479                }
 480             }
 481
 482             if (found_new_offset)
 483                break;
 484          }
 485       }
 486    }
 487
 488    uint64_t zero = 0;
 489    remap_slots_and_components(input_list, consumer->info.stage, remap,
 490                               &consumer->info.inputs_read, &zero);
 491    remap_slots_and_components(output_list, producer->info.stage, remap,
 492                               &producer->info.outputs_written,
 493                               &producer->info.outputs_read);
 494 }
 495
 496 /* We assume that this has been called more-or-less directly after
 497  * remove_unused_varyings.  At this point, all of the varyings that we
 498  * aren't going to be using have been completely removed and the
 499  * inputs_read and outputs_written fields in nir_shader_info reflect
 500  * this.  Therefore, the total set of valid slots is the OR of the two
 501  * sets of varyings;  this accounts for varyings which one side may need
 502  * to read/write even if the other doesn't.  This can happen if, for
 503  * instance, an array is used indirectly from one side causing it to be
 504  * unsplittable but directly from the other.
 505  */
 506 void
 507 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 508                      bool default_to_smooth_interp)
 509 {
 510    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 511    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 512
 513    uint8_t comps[32] = {0};
 514    uint8_t interp_type[32] = {0};
 515    uint8_t interp_loc[32] = {0};
 516
 517    get_slot_component_masks_and_interp_types(&producer->outputs, comps,
 518                                              interp_type, interp_loc,
 519                                              producer->info.stage,
 520                                              default_to_smooth_interp);
 521    get_slot_component_masks_and_interp_types(&consumer->inputs, comps,
 522                                              interp_type, interp_loc,
 523                                              consumer->info.stage,
 524                                              default_to_smooth_interp);
 525
 526    compact_components(producer, consumer, comps, interp_type, interp_loc,
 527                       default_to_smooth_interp);
 528 }
 529
 530 /*
 531  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 532  * don't touch them.
 533  */
 534 void
 535 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 536 {
 537    nir_variable *input_vars[MAX_VARYING] = { 0 };
 538
 539    nir_foreach_variable(var, &consumer->inputs) {
 540       if (var->data.location >= VARYING_SLOT_VAR0 &&
 541           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 542
 543          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 544          input_vars[location] = var;
 545       }
 546    }
 547
 548    nir_foreach_variable(var, &producer->outputs) {
 549       if (var->data.location >= VARYING_SLOT_VAR0 &&
 550           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 551
 552          if (!var->data.always_active_io)
 553             continue;
 554
 555          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 556          if (input_vars[location]) {
 557             input_vars[location]->data.always_active_io = true;
 558          }
 559       }
 560    }
 561 }
 562
 563 static bool
 564 can_replace_varying(nir_variable *out_var)
 565 {
 566    /* Skip types that require more complex handling.
 567     * TODO: add support for these types.
 568     */
 569    if (glsl_type_is_array(out_var->type) ||
 570        glsl_type_is_dual_slot(out_var->type) ||
 571        glsl_type_is_matrix(out_var->type) ||
 572        glsl_type_is_struct(out_var->type))
 573       return false;
 574
 575    /* Limit this pass to scalars for now to keep things simple. Most varyings
 576     * should have been lowered to scalars at this point anyway.
 577     */
 578    if (!glsl_type_is_scalar(out_var->type))
 579       return false;
 580
 581    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 582        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 583       return false;
 584
 585    return true;
 586 }
 587
 588 static bool
 589 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 590 {
 591    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 592
 593    nir_builder b;
 594    nir_builder_init(&b, impl);
 595
 596    nir_variable *out_var =
 597       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 598
 599    bool progress = false;
 600    nir_foreach_block(block, impl) {
 601       nir_foreach_instr(instr, block) {
 602          if (instr->type != nir_instr_type_intrinsic)
 603             continue;
 604
 605          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 606          if (intr->intrinsic != nir_intrinsic_load_deref)
 607             continue;
 608
 609          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 610          if (in_deref->mode != nir_var_shader_in)
 611             continue;
 612
 613          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 614
 615          if (in_var->data.location != out_var->data.location ||
 616              in_var->data.location_frac != out_var->data.location_frac)
 617             continue;
 618
 619          b.cursor = nir_before_instr(instr);
 620
 621          nir_load_const_instr *out_const =
 622             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 623
 624          /* Add new const to replace the input */
 625          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 626                                              intr->dest.ssa.bit_size,
 627                                              out_const->value);
 628
 629          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 630
 631          progress = true;
 632       }
 633    }
 634
 635    return progress;
 636 }
 637
 638 bool
 639 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 640 {
 641    /* TODO: Add support for more shader stage combinations */
 642    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
 643        (producer->info.stage != MESA_SHADER_VERTEX &&
 644         producer->info.stage != MESA_SHADER_TESS_EVAL))
 645       return false;
 646
 647    bool progress = false;
 648
 649    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 650
 651    /* If we find a store in the last block of the producer we can be sure this
 652     * is the only possible value for this output.
 653     */
 654    nir_block *last_block = nir_impl_last_block(impl);
 655    nir_foreach_instr_reverse(instr, last_block) {
 656       if (instr->type != nir_instr_type_intrinsic)
 657          continue;
 658
 659       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 660
 661       if (intr->intrinsic != nir_intrinsic_store_deref)
 662          continue;
 663
 664       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
 665       if (out_deref->mode != nir_var_shader_out)
 666          continue;
 667
 668       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
 669       if (!can_replace_varying(out_var))
 670          continue;
 671
 672       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
 673          progress |= replace_constant_input(consumer, intr);
 674       }
 675    }
 676
 677    return progress;
 678 }