src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage)) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static void
  63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  64 {
  65    nir_foreach_function(function, shader) {
  66       if (!function->impl)
  67          continue;
  68
  69       nir_foreach_block(block, function->impl) {
  70          nir_foreach_instr(instr, block) {
  71             if (instr->type != nir_instr_type_intrinsic)
  72                continue;
  73
  74             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  75             if (intrin->intrinsic != nir_intrinsic_load_deref)
  76                continue;
  77
  78             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  79             if (deref->mode != nir_var_shader_out)
  80                continue;
  81
  82             nir_variable *var = nir_deref_instr_get_variable(deref);
  83             if (var->data.patch) {
  84                patches_read[var->data.location_frac] |=
  85                   get_variable_io_mask(var, shader->info.stage);
  86             } else {
  87                read[var->data.location_frac] |=
  88                   get_variable_io_mask(var, shader->info.stage);
  89             }
  90          }
  91       }
  92    }
  93 }
  94
  95 /**
  96  * Helper for removing unused shader I/O variables, by demoting them to global
  97  * variables (which may then by dead code eliminated).
  98  *
  99  * Example usage is:
 100  *
 101  * progress = nir_remove_unused_io_vars(producer,
 102  *                                      &producer->outputs,
 103  *                                      read, patches_read) ||
 104  *                                      progress;
 105  *
 106  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 107  * representing each .location_frac used.  Note that for vector variables,
 108  * only the first channel (.location_frac) is examined for deciding if the
 109  * variable is used!
 110  */
 111 bool
 112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
 113                           uint64_t *used_by_other_stage,
 114                           uint64_t *used_by_other_stage_patches)
 115 {
 116    bool progress = false;
 117    uint64_t *used;
 118
 119    nir_foreach_variable_safe(var, var_list) {
 120       if (var->data.patch)
 121          used = used_by_other_stage_patches;
 122       else
 123          used = used_by_other_stage;
 124
 125       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 126          continue;
 127
 128       if (var->data.always_active_io)
 129          continue;
 130
 131       if (var->data.explicit_xfb_buffer)
 132          continue;
 133
 134       uint64_t other_stage = used[var->data.location_frac];
 135
 136       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 137          /* This one is invalid, make it a global variable instead */
 138          var->data.location = 0;
 139          var->data.mode = nir_var_shader_temp;
 140
 141          exec_node_remove(&var->node);
 142          exec_list_push_tail(&shader->globals, &var->node);
 143
 144          progress = true;
 145       }
 146    }
 147
 148    if (progress)
 149       nir_fixup_deref_modes(shader);
 150
 151    return progress;
 152 }
 153
 154 bool
 155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 156 {
 157    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 158    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 159
 160    uint64_t read[4] = { 0 }, written[4] = { 0 };
 161    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 162
 163    nir_foreach_variable(var, &producer->outputs) {
 164       if (var->data.patch) {
 165          patches_written[var->data.location_frac] |=
 166             get_variable_io_mask(var, producer->info.stage);
 167       } else {
 168          written[var->data.location_frac] |=
 169             get_variable_io_mask(var, producer->info.stage);
 170       }
 171    }
 172
 173    nir_foreach_variable(var, &consumer->inputs) {
 174       if (var->data.patch) {
 175          patches_read[var->data.location_frac] |=
 176             get_variable_io_mask(var, consumer->info.stage);
 177       } else {
 178          read[var->data.location_frac] |=
 179             get_variable_io_mask(var, consumer->info.stage);
 180       }
 181    }
 182
 183    /* Each TCS invocation can read data written by other TCS invocations,
 184     * so even if the outputs are not used by the TES we must also make
 185     * sure they are not read by the TCS before demoting them to globals.
 186     */
 187    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 188       tcs_add_output_reads(producer, read, patches_read);
 189
 190    bool progress = false;
 191    progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
 192                                         patches_read);
 193
 194    progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
 195                                         patches_written) || progress;
 196
 197    return progress;
 198 }
 199
 200 static uint8_t
 201 get_interp_type(nir_variable *var, const struct glsl_type *type,
 202                 bool default_to_smooth_interp)
 203 {
 204    if (glsl_type_is_integer(type))
 205       return INTERP_MODE_FLAT;
 206    else if (var->data.interpolation != INTERP_MODE_NONE)
 207       return var->data.interpolation;
 208    else if (default_to_smooth_interp)
 209       return INTERP_MODE_SMOOTH;
 210    else
 211       return INTERP_MODE_NONE;
 212 }
 213
 214 #define INTERPOLATE_LOC_SAMPLE 0
 215 #define INTERPOLATE_LOC_CENTROID 1
 216 #define INTERPOLATE_LOC_CENTER 2
 217
 218 static uint8_t
 219 get_interp_loc(nir_variable *var)
 220 {
 221    if (var->data.sample)
 222       return INTERPOLATE_LOC_SAMPLE;
 223    else if (var->data.centroid)
 224       return INTERPOLATE_LOC_CENTROID;
 225    else
 226       return INTERPOLATE_LOC_CENTER;
 227 }
 228
 229 static bool
 230 is_packing_supported_for_type(const struct glsl_type *type)
 231 {
 232    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 233     * other then 32bit. All other vector types should have been split into
 234     * scalar variables by the lower_io_to_scalar pass. The only exception
 235     * should be OpenGL xfb varyings.
 236     * TODO: add support for more complex types?
 237     */
 238    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 239 }
 240
 241 struct assigned_comps
 242 {
 243    uint8_t comps;
 244    uint8_t interp_type;
 245    uint8_t interp_loc;
 246 };
 247
 248 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 249  * algorithms this function just assigns them their existing location for now.
 250  * TODO: allow better packing of complex types.
 251  */
 252 static void
 253 get_unmoveable_components_masks(struct exec_list *var_list,
 254                                 struct assigned_comps *comps,
 255                                 gl_shader_stage stage,
 256                                 bool default_to_smooth_interp)
 257 {
 258    nir_foreach_variable_safe(var, var_list) {
 259       assert(var->data.location >= 0);
 260
 261       /* Only remap things that aren't built-ins. */
 262       if (var->data.location >= VARYING_SLOT_VAR0 &&
 263           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 264
 265          const struct glsl_type *type = var->type;
 266          if (nir_is_per_vertex_io(var, stage)) {
 267             assert(glsl_type_is_array(type));
 268             type = glsl_get_array_element(type);
 269          }
 270
 271          /* If we can pack this varying then don't mark the components as
 272           * used.
 273           */
 274          if (is_packing_supported_for_type(type))
 275             continue;
 276
 277          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 278          unsigned elements =
 279             glsl_get_vector_elements(glsl_without_array(type));
 280
 281          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 282          unsigned slots = glsl_count_attribute_slots(type, false);
 283          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 284          unsigned comps_slot2 = 0;
 285          for (unsigned i = 0; i < slots; i++) {
 286             if (dual_slot) {
 287                if (i & 1) {
 288                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 289                } else {
 290                   unsigned num_comps = 4 - var->data.location_frac;
 291                   comps_slot2 = (elements * dmul) - num_comps;
 292
 293                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 294                   assert(var->data.location_frac == 0 ||
 295                          var->data.location_frac == 2);
 296                   assert(comps_slot2 <= 4);
 297
 298                   comps[location + i].comps |=
 299                      ((1 << num_comps) - 1) << var->data.location_frac;
 300                }
 301             } else {
 302                comps[location + i].comps |=
 303                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 304             }
 305
 306             comps[location + i].interp_type =
 307                get_interp_type(var, type, default_to_smooth_interp);
 308             comps[location + i].interp_loc = get_interp_loc(var);
 309          }
 310       }
 311    }
 312 }
 313
 314 struct varying_loc
 315 {
 316    uint8_t component;
 317    uint32_t location;
 318 };
 319
 320 static void
 321 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 322                     uint64_t slots_used_mask, unsigned num_slots)
 323 {
 324    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 325
 326    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 327       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 328 }
 329
 330 static void
 331 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 332 {
 333    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 334
 335    slots_used[var->data.patch ? 1 : 0] |=
 336       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 337 }
 338
 339 static void
 340 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 341                            struct varying_loc (*remap)[4],
 342                            uint64_t *slots_used, uint64_t *out_slots_read,
 343                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 344  {
 345    uint64_t out_slots_read_tmp[2] = {0};
 346    uint64_t slots_used_tmp[2] = {0};
 347
 348    /* We don't touch builtins so just copy the bitmask */
 349    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 350
 351    nir_foreach_variable(var, var_list) {
 352       assert(var->data.location >= 0);
 353
 354       /* Only remap things that aren't built-ins */
 355       if (var->data.location >= VARYING_SLOT_VAR0 &&
 356           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 357
 358          const struct glsl_type *type = var->type;
 359          if (nir_is_per_vertex_io(var, stage)) {
 360             assert(glsl_type_is_array(type));
 361             type = glsl_get_array_element(type);
 362          }
 363
 364          unsigned num_slots = glsl_count_attribute_slots(type, false);
 365          bool used_across_stages = false;
 366          bool outputs_read = false;
 367
 368          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 369          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 370
 371          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 372          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 373          uint64_t outs_used =
 374             var->data.patch ? *p_out_slots_read : *out_slots_read;
 375          uint64_t slots =
 376             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 377
 378          if (slots & used)
 379             used_across_stages = true;
 380
 381          if (slots & outs_used)
 382             outputs_read = true;
 383
 384          if (new_loc->location) {
 385             var->data.location = new_loc->location;
 386             var->data.location_frac = new_loc->component;
 387          }
 388
 389          if (var->data.always_active_io) {
 390             /* We can't apply link time optimisations (specifically array
 391              * splitting) to these so we need to copy the existing mask
 392              * otherwise we will mess up the mask for things like partially
 393              * marked arrays.
 394              */
 395             if (used_across_stages)
 396                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 397
 398             if (outputs_read) {
 399                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 400                                    num_slots);
 401             }
 402          } else {
 403             for (unsigned i = 0; i < num_slots; i++) {
 404                if (used_across_stages)
 405                   mark_used_slot(var, slots_used_tmp, i);
 406
 407                if (outputs_read)
 408                   mark_used_slot(var, out_slots_read_tmp, i);
 409             }
 410          }
 411       }
 412    }
 413
 414    *slots_used = slots_used_tmp[0];
 415    *out_slots_read = out_slots_read_tmp[0];
 416    *p_slots_used = slots_used_tmp[1];
 417    *p_out_slots_read = out_slots_read_tmp[1];
 418 }
 419
 420 struct varying_component {
 421    nir_variable *var;
 422    uint8_t interp_type;
 423    uint8_t interp_loc;
 424    bool is_patch;
 425    bool initialised;
 426 };
 427
 428 static int
 429 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 430 {
 431    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 432    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 433
 434    /* We want patches to be order at the end of the array */
 435    if (comp1->is_patch != comp2->is_patch)
 436       return comp1->is_patch ? 1 : -1;
 437
 438    /* We can only pack varyings with matching interpolation types so group
 439     * them together.
 440     */
 441    if (comp1->interp_type != comp2->interp_type)
 442       return comp1->interp_type - comp2->interp_type;
 443
 444    /* Interpolation loc must match also. */
 445    if (comp1->interp_loc != comp2->interp_loc)
 446       return comp1->interp_loc - comp2->interp_loc;
 447
 448    /* If everything else matches just use the original location to sort */
 449    return comp1->var->data.location - comp2->var->data.location;
 450 }
 451
 452 static void
 453 gather_varying_component_info(nir_shader *consumer,
 454                               struct varying_component **varying_comp_info,
 455                               unsigned *varying_comp_info_size,
 456                               bool default_to_smooth_interp)
 457 {
 458    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
 459    unsigned num_of_comps_to_pack = 0;
 460
 461    /* Count the number of varying that can be packed and create a mapping
 462     * of those varyings to the array we will pass to qsort.
 463     */
 464    nir_foreach_variable(var, &consumer->inputs) {
 465
 466       /* Only remap things that aren't builtins. */
 467       if (var->data.location >= VARYING_SLOT_VAR0 &&
 468           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 469
 470          /* We can't repack xfb varyings. */
 471          if (var->data.always_active_io)
 472             continue;
 473
 474          const struct glsl_type *type = var->type;
 475          if (nir_is_per_vertex_io(var, consumer->info.stage)) {
 476             assert(glsl_type_is_array(type));
 477             type = glsl_get_array_element(type);
 478          }
 479
 480          if (!is_packing_supported_for_type(type))
 481             continue;
 482
 483          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 484          store_varying_info_idx[loc][var->data.location_frac] =
 485             ++num_of_comps_to_pack;
 486       }
 487    }
 488
 489    *varying_comp_info_size = num_of_comps_to_pack;
 490    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 491                                       num_of_comps_to_pack);
 492
 493    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 494
 495    /* Walk over the shader and populate the varying component info array */
 496    nir_foreach_block(block, impl) {
 497       nir_foreach_instr(instr, block) {
 498          if (instr->type != nir_instr_type_intrinsic)
 499             continue;
 500
 501          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 502          if (intr->intrinsic != nir_intrinsic_load_deref &&
 503              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 504              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 505              intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
 506             continue;
 507
 508          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 509          if (deref->mode != nir_var_shader_in)
 510             continue;
 511
 512          /* We only remap things that aren't builtins. */
 513          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 514          if (in_var->data.location < VARYING_SLOT_VAR0)
 515             continue;
 516
 517          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 518          if (location >= MAX_VARYINGS_INCL_PATCH)
 519             continue;
 520
 521          unsigned var_info_idx =
 522             store_varying_info_idx[location][in_var->data.location_frac];
 523          if (!var_info_idx)
 524             continue;
 525
 526          struct varying_component *vc_info =
 527             &(*varying_comp_info)[var_info_idx-1];
 528
 529          if (!vc_info->initialised) {
 530             const struct glsl_type *type = in_var->type;
 531             if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
 532                assert(glsl_type_is_array(type));
 533                type = glsl_get_array_element(type);
 534             }
 535
 536             vc_info->var = in_var;
 537             vc_info->interp_type =
 538                get_interp_type(in_var, type, default_to_smooth_interp);
 539             vc_info->interp_loc = get_interp_loc(in_var);
 540             vc_info->is_patch = in_var->data.patch;
 541          }
 542       }
 543    }
 544 }
 545
 546 static void
 547 assign_remap_locations(struct varying_loc (*remap)[4],
 548                        struct assigned_comps *assigned_comps,
 549                        struct varying_component *info,
 550                        unsigned *cursor, unsigned *comp,
 551                        unsigned max_location)
 552 {
 553    unsigned tmp_cursor = *cursor;
 554    unsigned tmp_comp = *comp;
 555
 556    for (; tmp_cursor < max_location; tmp_cursor++) {
 557
 558       if (assigned_comps[tmp_cursor].comps) {
 559          /* We can only pack varyings with matching interpolation types,
 560           * interpolation loc must match also.
 561           * TODO: i965 can handle interpolation locations that don't match,
 562           * but the radeonsi nir backend handles everything as vec4s and so
 563           * expects this to be the same for all components. We could make this
 564           * check driver specfific or drop it if NIR ever become the only
 565           * radeonsi backend.
 566           */
 567          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 568              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 569             tmp_comp = 0;
 570             continue;
 571          }
 572
 573          while (tmp_comp < 4 &&
 574                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 575             tmp_comp++;
 576          }
 577       }
 578
 579       if (tmp_comp == 4) {
 580          tmp_comp = 0;
 581          continue;
 582       }
 583
 584       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 585
 586       /* Once we have assigned a location mark it as used */
 587       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 588       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 589       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 590
 591       /* Assign remap location */
 592       remap[location][info->var->data.location_frac].component = tmp_comp++;
 593       remap[location][info->var->data.location_frac].location =
 594          tmp_cursor + VARYING_SLOT_VAR0;
 595
 596       break;
 597    }
 598
 599    *cursor = tmp_cursor;
 600    *comp = tmp_comp;
 601 }
 602
 603 /* If there are empty components in the slot compact the remaining components
 604  * as close to component 0 as possible. This will make it easier to fill the
 605  * empty components with components from a different slot in a following pass.
 606  */
 607 static void
 608 compact_components(nir_shader *producer, nir_shader *consumer,
 609                    struct assigned_comps *assigned_comps,
 610                    bool default_to_smooth_interp)
 611 {
 612    struct exec_list *input_list = &consumer->inputs;
 613    struct exec_list *output_list = &producer->outputs;
 614    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 615    struct varying_component *varying_comp_info;
 616    unsigned varying_comp_info_size;
 617
 618    /* Gather varying component info */
 619    gather_varying_component_info(consumer, &varying_comp_info,
 620                                  &varying_comp_info_size,
 621                                  default_to_smooth_interp);
 622
 623    /* Sort varying components. */
 624    qsort(varying_comp_info, varying_comp_info_size,
 625          sizeof(struct varying_component), cmp_varying_component);
 626
 627    unsigned cursor = 0;
 628    unsigned comp = 0;
 629
 630    /* Set the remap array based on the sorted components */
 631    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 632       struct varying_component *info = &varying_comp_info[i];
 633
 634       assert(info->is_patch || cursor < MAX_VARYING);
 635       if (info->is_patch) {
 636          /* The list should be sorted with all non-patch inputs first followed
 637           * by patch inputs.  When we hit our first patch input, we need to
 638           * reset the cursor to MAX_VARYING so we put them in the right slot.
 639           */
 640          if (cursor < MAX_VARYING) {
 641             cursor = MAX_VARYING;
 642             comp = 0;
 643          }
 644
 645          assign_remap_locations(remap, assigned_comps, info,
 646                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 647       } else {
 648          assign_remap_locations(remap, assigned_comps, info,
 649                                 &cursor, &comp, MAX_VARYING);
 650
 651          /* Check if we failed to assign a remap location. This can happen if
 652           * for example there are a bunch of unmovable components with
 653           * mismatching interpolation types causing us to skip over locations
 654           * that would have been useful for packing later components.
 655           * The solution is to iterate over the locations again (this should
 656           * happen very rarely in practice).
 657           */
 658          if (cursor == MAX_VARYING) {
 659             cursor = 0;
 660             comp = 0;
 661             assign_remap_locations(remap, assigned_comps, info,
 662                                    &cursor, &comp, MAX_VARYING);
 663          }
 664       }
 665    }
 666
 667    ralloc_free(varying_comp_info);
 668
 669    uint64_t zero = 0;
 670    uint32_t zero32 = 0;
 671    remap_slots_and_components(input_list, consumer->info.stage, remap,
 672                               &consumer->info.inputs_read, &zero,
 673                               &consumer->info.patch_inputs_read, &zero32);
 674    remap_slots_and_components(output_list, producer->info.stage, remap,
 675                               &producer->info.outputs_written,
 676                               &producer->info.outputs_read,
 677                               &producer->info.patch_outputs_written,
 678                               &producer->info.patch_outputs_read);
 679 }
 680
 681 /* We assume that this has been called more-or-less directly after
 682  * remove_unused_varyings.  At this point, all of the varyings that we
 683  * aren't going to be using have been completely removed and the
 684  * inputs_read and outputs_written fields in nir_shader_info reflect
 685  * this.  Therefore, the total set of valid slots is the OR of the two
 686  * sets of varyings;  this accounts for varyings which one side may need
 687  * to read/write even if the other doesn't.  This can happen if, for
 688  * instance, an array is used indirectly from one side causing it to be
 689  * unsplittable but directly from the other.
 690  */
 691 void
 692 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 693                      bool default_to_smooth_interp)
 694 {
 695    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 696    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 697
 698    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
 699
 700    get_unmoveable_components_masks(&producer->outputs, assigned_comps,
 701                                    producer->info.stage,
 702                                    default_to_smooth_interp);
 703    get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
 704                                    consumer->info.stage,
 705                                    default_to_smooth_interp);
 706
 707    compact_components(producer, consumer, assigned_comps,
 708                       default_to_smooth_interp);
 709 }
 710
 711 /*
 712  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 713  * don't touch them.
 714  */
 715 void
 716 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 717 {
 718    nir_variable *input_vars[MAX_VARYING] = { 0 };
 719
 720    nir_foreach_variable(var, &consumer->inputs) {
 721       if (var->data.location >= VARYING_SLOT_VAR0 &&
 722           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 723
 724          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 725          input_vars[location] = var;
 726       }
 727    }
 728
 729    nir_foreach_variable(var, &producer->outputs) {
 730       if (var->data.location >= VARYING_SLOT_VAR0 &&
 731           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 732
 733          if (!var->data.always_active_io)
 734             continue;
 735
 736          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 737          if (input_vars[location]) {
 738             input_vars[location]->data.always_active_io = true;
 739          }
 740       }
 741    }
 742 }
 743
 744 static bool
 745 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 746 {
 747    return in_var->data.location == out_var->data.location &&
 748           in_var->data.location_frac == out_var->data.location_frac;
 749 }
 750
 751 static nir_variable *
 752 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 753 {
 754    nir_foreach_variable(var, &consumer->inputs) {
 755       if (does_varying_match(out_var, var))
 756          return var;
 757    }
 758
 759    return NULL;
 760 }
 761
 762 static bool
 763 can_replace_varying(nir_variable *out_var)
 764 {
 765    /* Skip types that require more complex handling.
 766     * TODO: add support for these types.
 767     */
 768    if (glsl_type_is_array(out_var->type) ||
 769        glsl_type_is_dual_slot(out_var->type) ||
 770        glsl_type_is_matrix(out_var->type) ||
 771        glsl_type_is_struct(out_var->type))
 772       return false;
 773
 774    /* Limit this pass to scalars for now to keep things simple. Most varyings
 775     * should have been lowered to scalars at this point anyway.
 776     */
 777    if (!glsl_type_is_scalar(out_var->type))
 778       return false;
 779
 780    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 781        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 782       return false;
 783
 784    return true;
 785 }
 786
 787 static bool
 788 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 789 {
 790    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 791
 792    nir_builder b;
 793    nir_builder_init(&b, impl);
 794
 795    nir_variable *out_var =
 796       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 797
 798    bool progress = false;
 799    nir_foreach_block(block, impl) {
 800       nir_foreach_instr(instr, block) {
 801          if (instr->type != nir_instr_type_intrinsic)
 802             continue;
 803
 804          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 805          if (intr->intrinsic != nir_intrinsic_load_deref)
 806             continue;
 807
 808          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 809          if (in_deref->mode != nir_var_shader_in)
 810             continue;
 811
 812          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 813
 814          if (!does_varying_match(out_var, in_var))
 815             continue;
 816
 817          b.cursor = nir_before_instr(instr);
 818
 819          nir_load_const_instr *out_const =
 820             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 821
 822          /* Add new const to replace the input */
 823          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 824                                              intr->dest.ssa.bit_size,
 825                                              out_const->value);
 826
 827          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 828
 829          progress = true;
 830       }
 831    }
 832
 833    return progress;
 834 }
 835
 836 static bool
 837 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 838                          nir_intrinsic_instr *dup_store_intr)
 839 {
 840    assert(input_var);
 841
 842    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 843
 844    nir_builder b;
 845    nir_builder_init(&b, impl);
 846
 847    nir_variable *dup_out_var =
 848       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 849
 850    bool progress = false;
 851    nir_foreach_block(block, impl) {
 852       nir_foreach_instr(instr, block) {
 853          if (instr->type != nir_instr_type_intrinsic)
 854             continue;
 855
 856          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 857          if (intr->intrinsic != nir_intrinsic_load_deref)
 858             continue;
 859
 860          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 861          if (in_deref->mode != nir_var_shader_in)
 862             continue;
 863
 864          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 865
 866          if (!does_varying_match(dup_out_var, in_var) ||
 867              in_var->data.interpolation != input_var->data.interpolation ||
 868              get_interp_loc(in_var) != get_interp_loc(input_var))
 869             continue;
 870
 871          b.cursor = nir_before_instr(instr);
 872
 873          nir_ssa_def *load = nir_load_var(&b, input_var);
 874          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 875
 876          progress = true;
 877       }
 878    }
 879
 880    return progress;
 881 }
 882
 883 bool
 884 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 885 {
 886    /* TODO: Add support for more shader stage combinations */
 887    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
 888        (producer->info.stage != MESA_SHADER_VERTEX &&
 889         producer->info.stage != MESA_SHADER_TESS_EVAL))
 890       return false;
 891
 892    bool progress = false;
 893
 894    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 895
 896    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
 897
 898    /* If we find a store in the last block of the producer we can be sure this
 899     * is the only possible value for this output.
 900     */
 901    nir_block *last_block = nir_impl_last_block(impl);
 902    nir_foreach_instr_reverse(instr, last_block) {
 903       if (instr->type != nir_instr_type_intrinsic)
 904          continue;
 905
 906       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 907
 908       if (intr->intrinsic != nir_intrinsic_store_deref)
 909          continue;
 910
 911       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
 912       if (out_deref->mode != nir_var_shader_out)
 913          continue;
 914
 915       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
 916       if (!can_replace_varying(out_var))
 917          continue;
 918
 919       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
 920          progress |= replace_constant_input(consumer, intr);
 921       } else {
 922          struct hash_entry *entry =
 923                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
 924          if (entry) {
 925             progress |= replace_duplicate_input(consumer,
 926                                                 (nir_variable *) entry->data,
 927                                                 intr);
 928          } else {
 929             nir_variable *in_var = get_matching_input_var(consumer, out_var);
 930             if (in_var) {
 931                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
 932                                        in_var);
 933             }
 934          }
 935       }
 936    }
 937
 938    _mesa_hash_table_destroy(varying_values, NULL);
 939
 940    return progress;
 941 }