src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage)) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static void
  63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  64 {
  65    nir_foreach_function(function, shader) {
  66       if (!function->impl)
  67          continue;
  68
  69       nir_foreach_block(block, function->impl) {
  70          nir_foreach_instr(instr, block) {
  71             if (instr->type != nir_instr_type_intrinsic)
  72                continue;
  73
  74             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  75             if (intrin->intrinsic != nir_intrinsic_load_deref)
  76                continue;
  77
  78             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  79             if (deref->mode != nir_var_shader_out)
  80                continue;
  81
  82             nir_variable *var = nir_deref_instr_get_variable(deref);
  83             if (var->data.patch) {
  84                patches_read[var->data.location_frac] |=
  85                   get_variable_io_mask(var, shader->info.stage);
  86             } else {
  87                read[var->data.location_frac] |=
  88                   get_variable_io_mask(var, shader->info.stage);
  89             }
  90          }
  91       }
  92    }
  93 }
  94
  95 /**
  96  * Helper for removing unused shader I/O variables, by demoting them to global
  97  * variables (which may then by dead code eliminated).
  98  *
  99  * Example usage is:
 100  *
 101  * progress = nir_remove_unused_io_vars(producer,
 102  *                                      &producer->outputs,
 103  *                                      read, patches_read) ||
 104  *                                      progress;
 105  *
 106  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 107  * representing each .location_frac used.  Note that for vector variables,
 108  * only the first channel (.location_frac) is examined for deciding if the
 109  * variable is used!
 110  */
 111 bool
 112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
 113                           uint64_t *used_by_other_stage,
 114                           uint64_t *used_by_other_stage_patches)
 115 {
 116    bool progress = false;
 117    uint64_t *used;
 118
 119    nir_foreach_variable_safe(var, var_list) {
 120       if (var->data.patch)
 121          used = used_by_other_stage_patches;
 122       else
 123          used = used_by_other_stage;
 124
 125       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 126          continue;
 127
 128       if (var->data.always_active_io)
 129          continue;
 130
 131       if (var->data.explicit_xfb_buffer)
 132          continue;
 133
 134       uint64_t other_stage = used[var->data.location_frac];
 135
 136       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 137          /* This one is invalid, make it a global variable instead */
 138          var->data.location = 0;
 139          var->data.mode = nir_var_shader_temp;
 140
 141          exec_node_remove(&var->node);
 142          exec_list_push_tail(&shader->globals, &var->node);
 143
 144          progress = true;
 145       }
 146    }
 147
 148    if (progress)
 149       nir_fixup_deref_modes(shader);
 150
 151    return progress;
 152 }
 153
 154 bool
 155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 156 {
 157    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 158    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 159
 160    uint64_t read[4] = { 0 }, written[4] = { 0 };
 161    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 162
 163    nir_foreach_variable(var, &producer->outputs) {
 164       if (var->data.patch) {
 165          patches_written[var->data.location_frac] |=
 166             get_variable_io_mask(var, producer->info.stage);
 167       } else {
 168          written[var->data.location_frac] |=
 169             get_variable_io_mask(var, producer->info.stage);
 170       }
 171    }
 172
 173    nir_foreach_variable(var, &consumer->inputs) {
 174       if (var->data.patch) {
 175          patches_read[var->data.location_frac] |=
 176             get_variable_io_mask(var, consumer->info.stage);
 177       } else {
 178          read[var->data.location_frac] |=
 179             get_variable_io_mask(var, consumer->info.stage);
 180       }
 181    }
 182
 183    /* Each TCS invocation can read data written by other TCS invocations,
 184     * so even if the outputs are not used by the TES we must also make
 185     * sure they are not read by the TCS before demoting them to globals.
 186     */
 187    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 188       tcs_add_output_reads(producer, read, patches_read);
 189
 190    bool progress = false;
 191    progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
 192                                         patches_read);
 193
 194    progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
 195                                         patches_written) || progress;
 196
 197    return progress;
 198 }
 199
 200 static uint8_t
 201 get_interp_type(nir_variable *var, const struct glsl_type *type,
 202                 bool default_to_smooth_interp)
 203 {
 204    if (glsl_type_is_integer(type))
 205       return INTERP_MODE_FLAT;
 206    else if (var->data.interpolation != INTERP_MODE_NONE)
 207       return var->data.interpolation;
 208    else if (default_to_smooth_interp)
 209       return INTERP_MODE_SMOOTH;
 210    else
 211       return INTERP_MODE_NONE;
 212 }
 213
 214 #define INTERPOLATE_LOC_SAMPLE 0
 215 #define INTERPOLATE_LOC_CENTROID 1
 216 #define INTERPOLATE_LOC_CENTER 2
 217
 218 static uint8_t
 219 get_interp_loc(nir_variable *var)
 220 {
 221    if (var->data.sample)
 222       return INTERPOLATE_LOC_SAMPLE;
 223    else if (var->data.centroid)
 224       return INTERPOLATE_LOC_CENTROID;
 225    else
 226       return INTERPOLATE_LOC_CENTER;
 227 }
 228
 229 static void
 230 get_slot_component_masks_and_interp_types(struct exec_list *var_list,
 231                                           uint8_t *comps,
 232                                           uint8_t *interp_type,
 233                                           uint8_t *interp_loc,
 234                                           gl_shader_stage stage,
 235                                           bool default_to_smooth_interp)
 236 {
 237    nir_foreach_variable_safe(var, var_list) {
 238       assert(var->data.location >= 0);
 239
 240       /* Only remap things that aren't built-ins.
 241        * TODO: add TES patch support.
 242        */
 243       if (var->data.location >= VARYING_SLOT_VAR0 &&
 244           var->data.location - VARYING_SLOT_VAR0 < 32) {
 245
 246          const struct glsl_type *type = var->type;
 247          if (nir_is_per_vertex_io(var, stage)) {
 248             assert(glsl_type_is_array(type));
 249             type = glsl_get_array_element(type);
 250          }
 251
 252          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 253          unsigned elements =
 254             glsl_get_vector_elements(glsl_without_array(type));
 255
 256          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 257          unsigned slots = glsl_count_attribute_slots(type, false);
 258          unsigned comps_slot2 = 0;
 259          for (unsigned i = 0; i < slots; i++) {
 260             interp_type[location + i] =
 261                get_interp_type(var, type, default_to_smooth_interp);
 262             interp_loc[location + i] = get_interp_loc(var);
 263
 264             if (dual_slot) {
 265                if (i & 1) {
 266                   comps[location + i] |= ((1 << comps_slot2) - 1);
 267                } else {
 268                   unsigned num_comps = 4 - var->data.location_frac;
 269                   comps_slot2 = (elements * 2) - num_comps;
 270
 271                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 272                   assert(var->data.location_frac == 0 ||
 273                          var->data.location_frac == 2);
 274                   assert(comps_slot2 <= 4);
 275
 276                   comps[location + i] |=
 277                      ((1 << num_comps) - 1) << var->data.location_frac;
 278                }
 279             } else {
 280                comps[location + i] |=
 281                   ((1 << elements) - 1) << var->data.location_frac;
 282             }
 283          }
 284       }
 285    }
 286 }
 287
 288 struct varying_loc
 289 {
 290    uint8_t component;
 291    uint32_t location;
 292 };
 293
 294 static void
 295 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 296                            struct varying_loc (*remap)[4],
 297                            uint64_t *slots_used, uint64_t *out_slots_read)
 298  {
 299    uint64_t out_slots_read_tmp = 0;
 300
 301    /* We don't touch builtins so just copy the bitmask */
 302    uint64_t slots_used_tmp =
 303       *slots_used & (((uint64_t)1 << (VARYING_SLOT_VAR0 - 1)) - 1);
 304
 305    nir_foreach_variable(var, var_list) {
 306       assert(var->data.location >= 0);
 307
 308       /* Only remap things that aren't built-ins */
 309       if (var->data.location >= VARYING_SLOT_VAR0 &&
 310           var->data.location - VARYING_SLOT_VAR0 < 32) {
 311          assert(var->data.location - VARYING_SLOT_VAR0 < 32);
 312
 313          const struct glsl_type *type = var->type;
 314          if (nir_is_per_vertex_io(var, stage)) {
 315             assert(glsl_type_is_array(type));
 316             type = glsl_get_array_element(type);
 317          }
 318
 319          unsigned num_slots = glsl_count_attribute_slots(type, false);
 320          bool used_across_stages = false;
 321          bool outputs_read = false;
 322
 323          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 324          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 325
 326          uint64_t slots = (((uint64_t)1 << num_slots) - 1) << var->data.location;
 327          if (slots & *slots_used)
 328             used_across_stages = true;
 329
 330          if (slots & *out_slots_read)
 331             outputs_read = true;
 332
 333          if (new_loc->location) {
 334             var->data.location = new_loc->location;
 335             var->data.location_frac = new_loc->component;
 336          }
 337
 338          if (var->data.always_active_io) {
 339             /* We can't apply link time optimisations (specifically array
 340              * splitting) to these so we need to copy the existing mask
 341              * otherwise we will mess up the mask for things like partially
 342              * marked arrays.
 343              */
 344             if (used_across_stages) {
 345                slots_used_tmp |=
 346                   *slots_used & (((uint64_t)1 << num_slots) - 1) << var->data.location;
 347             }
 348
 349             if (outputs_read) {
 350                out_slots_read_tmp |=
 351                   *out_slots_read & (((uint64_t)1 << num_slots) - 1) << var->data.location;
 352             }
 353
 354          } else {
 355             for (unsigned i = 0; i < num_slots; i++) {
 356                if (used_across_stages)
 357                   slots_used_tmp |= (uint64_t)1 << (var->data.location + i);
 358
 359                if (outputs_read)
 360                   out_slots_read_tmp |= (uint64_t)1 << (var->data.location + i);
 361             }
 362          }
 363       }
 364    }
 365
 366    *slots_used = slots_used_tmp;
 367    *out_slots_read = out_slots_read_tmp;
 368 }
 369
 370 /* If there are empty components in the slot compact the remaining components
 371  * as close to component 0 as possible. This will make it easier to fill the
 372  * empty components with components from a different slot in a following pass.
 373  */
 374 static void
 375 compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
 376                    uint8_t *interp_type, uint8_t *interp_loc,
 377                    bool default_to_smooth_interp)
 378 {
 379    struct exec_list *input_list = &consumer->inputs;
 380    struct exec_list *output_list = &producer->outputs;
 381    struct varying_loc remap[32][4] = {{{0}, {0}}};
 382
 383    /* Create a cursor for each interpolation type */
 384    unsigned cursor[4] = {0};
 385
 386    /* We only need to pass over one stage and we choose the consumer as it seems
 387     * to cause a larger reduction in instruction counts (tested on i965).
 388     */
 389    nir_foreach_variable(var, input_list) {
 390
 391       /* Only remap things that aren't builtins.
 392        * TODO: add TES patch support.
 393        */
 394       if (var->data.location >= VARYING_SLOT_VAR0 &&
 395           var->data.location - VARYING_SLOT_VAR0 < 32) {
 396
 397          /* We can't repack xfb varyings. */
 398          if (var->data.always_active_io)
 399             continue;
 400
 401          const struct glsl_type *type = var->type;
 402          if (nir_is_per_vertex_io(var, consumer->info.stage)) {
 403             assert(glsl_type_is_array(type));
 404             type = glsl_get_array_element(type);
 405          }
 406
 407          /* Skip types that require more complex packing handling.
 408           * TODO: add support for these types.
 409           */
 410          if (glsl_type_is_array(type) ||
 411              glsl_type_is_dual_slot(type) ||
 412              glsl_type_is_matrix(type) ||
 413              glsl_type_is_struct(type) ||
 414              glsl_type_is_64bit(type))
 415             continue;
 416
 417          /* We ignore complex types above and all other vector types should
 418           * have been split into scalar variables by the lower_io_to_scalar
 419           * pass. The only exception should by OpenGL xfb varyings.
 420           */
 421          if (glsl_get_vector_elements(type) != 1)
 422             continue;
 423
 424          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 425          uint8_t used_comps = comps[location];
 426
 427          /* If there are no empty components there is nothing more for us to do.
 428           */
 429          if (used_comps == 0xf)
 430             continue;
 431
 432          bool found_new_offset = false;
 433          uint8_t interp = get_interp_type(var, type, default_to_smooth_interp);
 434          for (; cursor[interp] < 32; cursor[interp]++) {
 435             uint8_t cursor_used_comps = comps[cursor[interp]];
 436
 437             /* We couldn't find anywhere to pack the varying continue on. */
 438             if (cursor[interp] == location &&
 439                 (var->data.location_frac == 0 ||
 440                  cursor_used_comps & ((1 << (var->data.location_frac)) - 1)))
 441                break;
 442
 443             /* We can only pack varyings with matching interpolation types */
 444             if (interp_type[cursor[interp]] != interp)
 445                continue;
 446
 447             /* Interpolation loc must match also.
 448              * TODO: i965 can handle these if they don't match, but the
 449              * radeonsi nir backend handles everything as vec4s and so expects
 450              * this to be the same for all components. We could make this
 451              * check driver specfific or drop it if NIR ever become the only
 452              * radeonsi backend.
 453              */
 454             if (interp_loc[cursor[interp]] != get_interp_loc(var))
 455                continue;
 456
 457             /* If the slot is empty just skip it for now, compact_var_list()
 458              * can be called after this function to remove empty slots for us.
 459              * TODO: finish implementing compact_var_list() requires array and
 460              * matrix splitting.
 461              */
 462             if (!cursor_used_comps)
 463                continue;
 464
 465             uint8_t unused_comps = ~cursor_used_comps;
 466
 467             for (unsigned i = 0; i < 4; i++) {
 468                uint8_t new_var_comps = 1 << i;
 469                if (unused_comps & new_var_comps) {
 470                   remap[location][var->data.location_frac].component = i;
 471                   remap[location][var->data.location_frac].location =
 472                      cursor[interp] + VARYING_SLOT_VAR0;
 473
 474                   found_new_offset = true;
 475
 476                   /* Turn off the mask for the component we are remapping */
 477                   if (comps[location] & 1 << var->data.location_frac) {
 478                      comps[location] ^= 1 << var->data.location_frac;
 479                      comps[cursor[interp]] |= new_var_comps;
 480                   }
 481                   break;
 482                }
 483             }
 484
 485             if (found_new_offset)
 486                break;
 487          }
 488       }
 489    }
 490
 491    uint64_t zero = 0;
 492    remap_slots_and_components(input_list, consumer->info.stage, remap,
 493                               &consumer->info.inputs_read, &zero);
 494    remap_slots_and_components(output_list, producer->info.stage, remap,
 495                               &producer->info.outputs_written,
 496                               &producer->info.outputs_read);
 497 }
 498
 499 /* We assume that this has been called more-or-less directly after
 500  * remove_unused_varyings.  At this point, all of the varyings that we
 501  * aren't going to be using have been completely removed and the
 502  * inputs_read and outputs_written fields in nir_shader_info reflect
 503  * this.  Therefore, the total set of valid slots is the OR of the two
 504  * sets of varyings;  this accounts for varyings which one side may need
 505  * to read/write even if the other doesn't.  This can happen if, for
 506  * instance, an array is used indirectly from one side causing it to be
 507  * unsplittable but directly from the other.
 508  */
 509 void
 510 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 511                      bool default_to_smooth_interp)
 512 {
 513    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 514    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 515
 516    uint8_t comps[32] = {0};
 517    uint8_t interp_type[32] = {0};
 518    uint8_t interp_loc[32] = {0};
 519
 520    get_slot_component_masks_and_interp_types(&producer->outputs, comps,
 521                                              interp_type, interp_loc,
 522                                              producer->info.stage,
 523                                              default_to_smooth_interp);
 524    get_slot_component_masks_and_interp_types(&consumer->inputs, comps,
 525                                              interp_type, interp_loc,
 526                                              consumer->info.stage,
 527                                              default_to_smooth_interp);
 528
 529    compact_components(producer, consumer, comps, interp_type, interp_loc,
 530                       default_to_smooth_interp);
 531 }
 532
 533 /*
 534  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 535  * don't touch them.
 536  */
 537 void
 538 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 539 {
 540    nir_variable *input_vars[MAX_VARYING] = { 0 };
 541
 542    nir_foreach_variable(var, &consumer->inputs) {
 543       if (var->data.location >= VARYING_SLOT_VAR0 &&
 544           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 545
 546          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 547          input_vars[location] = var;
 548       }
 549    }
 550
 551    nir_foreach_variable(var, &producer->outputs) {
 552       if (var->data.location >= VARYING_SLOT_VAR0 &&
 553           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 554
 555          if (!var->data.always_active_io)
 556             continue;
 557
 558          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 559          if (input_vars[location]) {
 560             input_vars[location]->data.always_active_io = true;
 561          }
 562       }
 563    }
 564 }
 565
 566 static bool
 567 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 568 {
 569    return in_var->data.location == out_var->data.location &&
 570           in_var->data.location_frac == out_var->data.location_frac;
 571 }
 572
 573 static nir_variable *
 574 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 575 {
 576    nir_foreach_variable(var, &consumer->inputs) {
 577       if (does_varying_match(out_var, var))
 578          return var;
 579    }
 580
 581    return NULL;
 582 }
 583
 584 static bool
 585 can_replace_varying(nir_variable *out_var)
 586 {
 587    /* Skip types that require more complex handling.
 588     * TODO: add support for these types.
 589     */
 590    if (glsl_type_is_array(out_var->type) ||
 591        glsl_type_is_dual_slot(out_var->type) ||
 592        glsl_type_is_matrix(out_var->type) ||
 593        glsl_type_is_struct(out_var->type))
 594       return false;
 595
 596    /* Limit this pass to scalars for now to keep things simple. Most varyings
 597     * should have been lowered to scalars at this point anyway.
 598     */
 599    if (!glsl_type_is_scalar(out_var->type))
 600       return false;
 601
 602    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 603        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 604       return false;
 605
 606    return true;
 607 }
 608
 609 static bool
 610 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 611 {
 612    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 613
 614    nir_builder b;
 615    nir_builder_init(&b, impl);
 616
 617    nir_variable *out_var =
 618       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 619
 620    bool progress = false;
 621    nir_foreach_block(block, impl) {
 622       nir_foreach_instr(instr, block) {
 623          if (instr->type != nir_instr_type_intrinsic)
 624             continue;
 625
 626          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 627          if (intr->intrinsic != nir_intrinsic_load_deref)
 628             continue;
 629
 630          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 631          if (in_deref->mode != nir_var_shader_in)
 632             continue;
 633
 634          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 635
 636          if (!does_varying_match(out_var, in_var))
 637             continue;
 638
 639          b.cursor = nir_before_instr(instr);
 640
 641          nir_load_const_instr *out_const =
 642             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 643
 644          /* Add new const to replace the input */
 645          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 646                                              intr->dest.ssa.bit_size,
 647                                              out_const->value);
 648
 649          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 650
 651          progress = true;
 652       }
 653    }
 654
 655    return progress;
 656 }
 657
 658 static bool
 659 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 660                          nir_intrinsic_instr *dup_store_intr)
 661 {
 662    assert(input_var);
 663
 664    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 665
 666    nir_builder b;
 667    nir_builder_init(&b, impl);
 668
 669    nir_variable *dup_out_var =
 670       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 671
 672    bool progress = false;
 673    nir_foreach_block(block, impl) {
 674       nir_foreach_instr(instr, block) {
 675          if (instr->type != nir_instr_type_intrinsic)
 676             continue;
 677
 678          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 679          if (intr->intrinsic != nir_intrinsic_load_deref)
 680             continue;
 681
 682          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 683          if (in_deref->mode != nir_var_shader_in)
 684             continue;
 685
 686          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 687
 688          if (!does_varying_match(dup_out_var, in_var) ||
 689              in_var->data.interpolation != input_var->data.interpolation ||
 690              get_interp_loc(in_var) != get_interp_loc(input_var))
 691             continue;
 692
 693          b.cursor = nir_before_instr(instr);
 694
 695          nir_ssa_def *load = nir_load_var(&b, input_var);
 696          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 697
 698          progress = true;
 699       }
 700    }
 701
 702    return progress;
 703 }
 704
 705 bool
 706 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 707 {
 708    /* TODO: Add support for more shader stage combinations */
 709    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
 710        (producer->info.stage != MESA_SHADER_VERTEX &&
 711         producer->info.stage != MESA_SHADER_TESS_EVAL))
 712       return false;
 713
 714    bool progress = false;
 715
 716    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 717
 718    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
 719
 720    /* If we find a store in the last block of the producer we can be sure this
 721     * is the only possible value for this output.
 722     */
 723    nir_block *last_block = nir_impl_last_block(impl);
 724    nir_foreach_instr_reverse(instr, last_block) {
 725       if (instr->type != nir_instr_type_intrinsic)
 726          continue;
 727
 728       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 729
 730       if (intr->intrinsic != nir_intrinsic_store_deref)
 731          continue;
 732
 733       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
 734       if (out_deref->mode != nir_var_shader_out)
 735          continue;
 736
 737       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
 738       if (!can_replace_varying(out_var))
 739          continue;
 740
 741       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
 742          progress |= replace_constant_input(consumer, intr);
 743       } else {
 744          struct hash_entry *entry =
 745                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
 746          if (entry) {
 747             progress |= replace_duplicate_input(consumer,
 748                                                 (nir_variable *) entry->data,
 749                                                 intr);
 750          } else {
 751             nir_variable *in_var = get_matching_input_var(consumer, out_var);
 752             if (in_var) {
 753                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
 754                                        in_var);
 755             }
 756          }
 757       }
 758    }
 759
 760    _mesa_hash_table_destroy(varying_values, NULL);
 761
 762    return progress;
 763 }