src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage)) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static void
  63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  64 {
  65    nir_foreach_function(function, shader) {
  66       if (!function->impl)
  67          continue;
  68
  69       nir_foreach_block(block, function->impl) {
  70          nir_foreach_instr(instr, block) {
  71             if (instr->type != nir_instr_type_intrinsic)
  72                continue;
  73
  74             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  75             if (intrin->intrinsic != nir_intrinsic_load_deref)
  76                continue;
  77
  78             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  79             if (deref->mode != nir_var_shader_out)
  80                continue;
  81
  82             nir_variable *var = nir_deref_instr_get_variable(deref);
  83             if (var->data.patch) {
  84                patches_read[var->data.location_frac] |=
  85                   get_variable_io_mask(var, shader->info.stage);
  86             } else {
  87                read[var->data.location_frac] |=
  88                   get_variable_io_mask(var, shader->info.stage);
  89             }
  90          }
  91       }
  92    }
  93 }
  94
  95 /**
  96  * Helper for removing unused shader I/O variables, by demoting them to global
  97  * variables (which may then by dead code eliminated).
  98  *
  99  * Example usage is:
 100  *
 101  * progress = nir_remove_unused_io_vars(producer,
 102  *                                      &producer->outputs,
 103  *                                      read, patches_read) ||
 104  *                                      progress;
 105  *
 106  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 107  * representing each .location_frac used.  Note that for vector variables,
 108  * only the first channel (.location_frac) is examined for deciding if the
 109  * variable is used!
 110  */
 111 bool
 112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
 113                           uint64_t *used_by_other_stage,
 114                           uint64_t *used_by_other_stage_patches)
 115 {
 116    bool progress = false;
 117    uint64_t *used;
 118
 119    nir_foreach_variable_safe(var, var_list) {
 120       if (var->data.patch)
 121          used = used_by_other_stage_patches;
 122       else
 123          used = used_by_other_stage;
 124
 125       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 126          continue;
 127
 128       if (var->data.always_active_io)
 129          continue;
 130
 131       if (var->data.explicit_xfb_buffer)
 132          continue;
 133
 134       uint64_t other_stage = used[var->data.location_frac];
 135
 136       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 137          /* This one is invalid, make it a global variable instead */
 138          var->data.location = 0;
 139          var->data.mode = nir_var_shader_temp;
 140
 141          exec_node_remove(&var->node);
 142          exec_list_push_tail(&shader->globals, &var->node);
 143
 144          progress = true;
 145       }
 146    }
 147
 148    if (progress)
 149       nir_fixup_deref_modes(shader);
 150
 151    return progress;
 152 }
 153
 154 bool
 155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 156 {
 157    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 158    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 159
 160    uint64_t read[4] = { 0 }, written[4] = { 0 };
 161    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 162
 163    nir_foreach_variable(var, &producer->outputs) {
 164       if (var->data.patch) {
 165          patches_written[var->data.location_frac] |=
 166             get_variable_io_mask(var, producer->info.stage);
 167       } else {
 168          written[var->data.location_frac] |=
 169             get_variable_io_mask(var, producer->info.stage);
 170       }
 171    }
 172
 173    nir_foreach_variable(var, &consumer->inputs) {
 174       if (var->data.patch) {
 175          patches_read[var->data.location_frac] |=
 176             get_variable_io_mask(var, consumer->info.stage);
 177       } else {
 178          read[var->data.location_frac] |=
 179             get_variable_io_mask(var, consumer->info.stage);
 180       }
 181    }
 182
 183    /* Each TCS invocation can read data written by other TCS invocations,
 184     * so even if the outputs are not used by the TES we must also make
 185     * sure they are not read by the TCS before demoting them to globals.
 186     */
 187    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 188       tcs_add_output_reads(producer, read, patches_read);
 189
 190    bool progress = false;
 191    progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
 192                                         patches_read);
 193
 194    progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
 195                                         patches_written) || progress;
 196
 197    return progress;
 198 }
 199
 200 static uint8_t
 201 get_interp_type(nir_variable *var, const struct glsl_type *type,
 202                 bool default_to_smooth_interp)
 203 {
 204    if (glsl_type_is_integer(type))
 205       return INTERP_MODE_FLAT;
 206    else if (var->data.interpolation != INTERP_MODE_NONE)
 207       return var->data.interpolation;
 208    else if (default_to_smooth_interp)
 209       return INTERP_MODE_SMOOTH;
 210    else
 211       return INTERP_MODE_NONE;
 212 }
 213
 214 #define INTERPOLATE_LOC_SAMPLE 0
 215 #define INTERPOLATE_LOC_CENTROID 1
 216 #define INTERPOLATE_LOC_CENTER 2
 217
 218 static uint8_t
 219 get_interp_loc(nir_variable *var)
 220 {
 221    if (var->data.sample)
 222       return INTERPOLATE_LOC_SAMPLE;
 223    else if (var->data.centroid)
 224       return INTERPOLATE_LOC_CENTROID;
 225    else
 226       return INTERPOLATE_LOC_CENTER;
 227 }
 228
 229 static bool
 230 is_packing_supported_for_type(const struct glsl_type *type)
 231 {
 232    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 233     * other then 32bit. All other vector types should have been split into
 234     * scalar variables by the lower_io_to_scalar pass. The only exception
 235     * should be OpenGL xfb varyings.
 236     * TODO: add support for more complex types?
 237     */
 238    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 239 }
 240
 241 struct assigned_comps
 242 {
 243    uint8_t comps;
 244    uint8_t interp_type;
 245    uint8_t interp_loc;
 246 };
 247
 248 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 249  * algorithms this function just assigns them their existing location for now.
 250  * TODO: allow better packing of complex types.
 251  */
 252 static void
 253 get_unmoveable_components_masks(struct exec_list *var_list,
 254                                 struct assigned_comps *comps,
 255                                 gl_shader_stage stage,
 256                                 bool default_to_smooth_interp)
 257 {
 258    nir_foreach_variable_safe(var, var_list) {
 259       assert(var->data.location >= 0);
 260
 261       /* Only remap things that aren't built-ins. */
 262       if (var->data.location >= VARYING_SLOT_VAR0 &&
 263           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 264
 265          const struct glsl_type *type = var->type;
 266          if (nir_is_per_vertex_io(var, stage)) {
 267             assert(glsl_type_is_array(type));
 268             type = glsl_get_array_element(type);
 269          }
 270
 271          /* If we can pack this varying then don't mark the components as
 272           * used.
 273           */
 274          if (is_packing_supported_for_type(type))
 275             continue;
 276
 277          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 278
 279          unsigned elements =
 280             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 281             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 282
 283          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 284          unsigned slots = glsl_count_attribute_slots(type, false);
 285          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 286          unsigned comps_slot2 = 0;
 287          for (unsigned i = 0; i < slots; i++) {
 288             if (dual_slot) {
 289                if (i & 1) {
 290                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 291                } else {
 292                   unsigned num_comps = 4 - var->data.location_frac;
 293                   comps_slot2 = (elements * dmul) - num_comps;
 294
 295                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 296                   assert(var->data.location_frac == 0 ||
 297                          var->data.location_frac == 2);
 298                   assert(comps_slot2 <= 4);
 299
 300                   comps[location + i].comps |=
 301                      ((1 << num_comps) - 1) << var->data.location_frac;
 302                }
 303             } else {
 304                comps[location + i].comps |=
 305                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 306             }
 307
 308             comps[location + i].interp_type =
 309                get_interp_type(var, type, default_to_smooth_interp);
 310             comps[location + i].interp_loc = get_interp_loc(var);
 311          }
 312       }
 313    }
 314 }
 315
 316 struct varying_loc
 317 {
 318    uint8_t component;
 319    uint32_t location;
 320 };
 321
 322 static void
 323 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 324                     uint64_t slots_used_mask, unsigned num_slots)
 325 {
 326    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 327
 328    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 329       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 330 }
 331
 332 static void
 333 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 334 {
 335    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 336
 337    slots_used[var->data.patch ? 1 : 0] |=
 338       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 339 }
 340
 341 static void
 342 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 343                            struct varying_loc (*remap)[4],
 344                            uint64_t *slots_used, uint64_t *out_slots_read,
 345                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 346  {
 347    uint64_t out_slots_read_tmp[2] = {0};
 348    uint64_t slots_used_tmp[2] = {0};
 349
 350    /* We don't touch builtins so just copy the bitmask */
 351    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 352
 353    nir_foreach_variable(var, var_list) {
 354       assert(var->data.location >= 0);
 355
 356       /* Only remap things that aren't built-ins */
 357       if (var->data.location >= VARYING_SLOT_VAR0 &&
 358           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 359
 360          const struct glsl_type *type = var->type;
 361          if (nir_is_per_vertex_io(var, stage)) {
 362             assert(glsl_type_is_array(type));
 363             type = glsl_get_array_element(type);
 364          }
 365
 366          unsigned num_slots = glsl_count_attribute_slots(type, false);
 367          bool used_across_stages = false;
 368          bool outputs_read = false;
 369
 370          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 371          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 372
 373          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 374          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 375          uint64_t outs_used =
 376             var->data.patch ? *p_out_slots_read : *out_slots_read;
 377          uint64_t slots =
 378             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 379
 380          if (slots & used)
 381             used_across_stages = true;
 382
 383          if (slots & outs_used)
 384             outputs_read = true;
 385
 386          if (new_loc->location) {
 387             var->data.location = new_loc->location;
 388             var->data.location_frac = new_loc->component;
 389          }
 390
 391          if (var->data.always_active_io) {
 392             /* We can't apply link time optimisations (specifically array
 393              * splitting) to these so we need to copy the existing mask
 394              * otherwise we will mess up the mask for things like partially
 395              * marked arrays.
 396              */
 397             if (used_across_stages)
 398                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 399
 400             if (outputs_read) {
 401                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 402                                    num_slots);
 403             }
 404          } else {
 405             for (unsigned i = 0; i < num_slots; i++) {
 406                if (used_across_stages)
 407                   mark_used_slot(var, slots_used_tmp, i);
 408
 409                if (outputs_read)
 410                   mark_used_slot(var, out_slots_read_tmp, i);
 411             }
 412          }
 413       }
 414    }
 415
 416    *slots_used = slots_used_tmp[0];
 417    *out_slots_read = out_slots_read_tmp[0];
 418    *p_slots_used = slots_used_tmp[1];
 419    *p_out_slots_read = out_slots_read_tmp[1];
 420 }
 421
 422 struct varying_component {
 423    nir_variable *var;
 424    uint8_t interp_type;
 425    uint8_t interp_loc;
 426    bool is_patch;
 427    bool initialised;
 428 };
 429
 430 static int
 431 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 432 {
 433    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 434    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 435
 436    /* We want patches to be order at the end of the array */
 437    if (comp1->is_patch != comp2->is_patch)
 438       return comp1->is_patch ? 1 : -1;
 439
 440    /* We can only pack varyings with matching interpolation types so group
 441     * them together.
 442     */
 443    if (comp1->interp_type != comp2->interp_type)
 444       return comp1->interp_type - comp2->interp_type;
 445
 446    /* Interpolation loc must match also. */
 447    if (comp1->interp_loc != comp2->interp_loc)
 448       return comp1->interp_loc - comp2->interp_loc;
 449
 450    /* If everything else matches just use the original location to sort */
 451    return comp1->var->data.location - comp2->var->data.location;
 452 }
 453
 454 static void
 455 gather_varying_component_info(nir_shader *consumer,
 456                               struct varying_component **varying_comp_info,
 457                               unsigned *varying_comp_info_size,
 458                               bool default_to_smooth_interp)
 459 {
 460    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
 461    unsigned num_of_comps_to_pack = 0;
 462
 463    /* Count the number of varying that can be packed and create a mapping
 464     * of those varyings to the array we will pass to qsort.
 465     */
 466    nir_foreach_variable(var, &consumer->inputs) {
 467
 468       /* Only remap things that aren't builtins. */
 469       if (var->data.location >= VARYING_SLOT_VAR0 &&
 470           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 471
 472          /* We can't repack xfb varyings. */
 473          if (var->data.always_active_io)
 474             continue;
 475
 476          const struct glsl_type *type = var->type;
 477          if (nir_is_per_vertex_io(var, consumer->info.stage)) {
 478             assert(glsl_type_is_array(type));
 479             type = glsl_get_array_element(type);
 480          }
 481
 482          if (!is_packing_supported_for_type(type))
 483             continue;
 484
 485          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 486          store_varying_info_idx[loc][var->data.location_frac] =
 487             ++num_of_comps_to_pack;
 488       }
 489    }
 490
 491    *varying_comp_info_size = num_of_comps_to_pack;
 492    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 493                                       num_of_comps_to_pack);
 494
 495    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 496
 497    /* Walk over the shader and populate the varying component info array */
 498    nir_foreach_block(block, impl) {
 499       nir_foreach_instr(instr, block) {
 500          if (instr->type != nir_instr_type_intrinsic)
 501             continue;
 502
 503          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 504          if (intr->intrinsic != nir_intrinsic_load_deref &&
 505              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 506              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 507              intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
 508             continue;
 509
 510          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 511          if (deref->mode != nir_var_shader_in)
 512             continue;
 513
 514          /* We only remap things that aren't builtins. */
 515          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 516          if (in_var->data.location < VARYING_SLOT_VAR0)
 517             continue;
 518
 519          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 520          if (location >= MAX_VARYINGS_INCL_PATCH)
 521             continue;
 522
 523          unsigned var_info_idx =
 524             store_varying_info_idx[location][in_var->data.location_frac];
 525          if (!var_info_idx)
 526             continue;
 527
 528          struct varying_component *vc_info =
 529             &(*varying_comp_info)[var_info_idx-1];
 530
 531          if (!vc_info->initialised) {
 532             const struct glsl_type *type = in_var->type;
 533             if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
 534                assert(glsl_type_is_array(type));
 535                type = glsl_get_array_element(type);
 536             }
 537
 538             vc_info->var = in_var;
 539             vc_info->interp_type =
 540                get_interp_type(in_var, type, default_to_smooth_interp);
 541             vc_info->interp_loc = get_interp_loc(in_var);
 542             vc_info->is_patch = in_var->data.patch;
 543          }
 544       }
 545    }
 546 }
 547
 548 static void
 549 assign_remap_locations(struct varying_loc (*remap)[4],
 550                        struct assigned_comps *assigned_comps,
 551                        struct varying_component *info,
 552                        unsigned *cursor, unsigned *comp,
 553                        unsigned max_location)
 554 {
 555    unsigned tmp_cursor = *cursor;
 556    unsigned tmp_comp = *comp;
 557
 558    for (; tmp_cursor < max_location; tmp_cursor++) {
 559
 560       if (assigned_comps[tmp_cursor].comps) {
 561          /* We can only pack varyings with matching interpolation types,
 562           * interpolation loc must match also.
 563           * TODO: i965 can handle interpolation locations that don't match,
 564           * but the radeonsi nir backend handles everything as vec4s and so
 565           * expects this to be the same for all components. We could make this
 566           * check driver specfific or drop it if NIR ever become the only
 567           * radeonsi backend.
 568           */
 569          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 570              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 571             tmp_comp = 0;
 572             continue;
 573          }
 574
 575          while (tmp_comp < 4 &&
 576                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 577             tmp_comp++;
 578          }
 579       }
 580
 581       if (tmp_comp == 4) {
 582          tmp_comp = 0;
 583          continue;
 584       }
 585
 586       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 587
 588       /* Once we have assigned a location mark it as used */
 589       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 590       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 591       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 592
 593       /* Assign remap location */
 594       remap[location][info->var->data.location_frac].component = tmp_comp++;
 595       remap[location][info->var->data.location_frac].location =
 596          tmp_cursor + VARYING_SLOT_VAR0;
 597
 598       break;
 599    }
 600
 601    *cursor = tmp_cursor;
 602    *comp = tmp_comp;
 603 }
 604
 605 /* If there are empty components in the slot compact the remaining components
 606  * as close to component 0 as possible. This will make it easier to fill the
 607  * empty components with components from a different slot in a following pass.
 608  */
 609 static void
 610 compact_components(nir_shader *producer, nir_shader *consumer,
 611                    struct assigned_comps *assigned_comps,
 612                    bool default_to_smooth_interp)
 613 {
 614    struct exec_list *input_list = &consumer->inputs;
 615    struct exec_list *output_list = &producer->outputs;
 616    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 617    struct varying_component *varying_comp_info;
 618    unsigned varying_comp_info_size;
 619
 620    /* Gather varying component info */
 621    gather_varying_component_info(consumer, &varying_comp_info,
 622                                  &varying_comp_info_size,
 623                                  default_to_smooth_interp);
 624
 625    /* Sort varying components. */
 626    qsort(varying_comp_info, varying_comp_info_size,
 627          sizeof(struct varying_component), cmp_varying_component);
 628
 629    unsigned cursor = 0;
 630    unsigned comp = 0;
 631
 632    /* Set the remap array based on the sorted components */
 633    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 634       struct varying_component *info = &varying_comp_info[i];
 635
 636       assert(info->is_patch || cursor < MAX_VARYING);
 637       if (info->is_patch) {
 638          /* The list should be sorted with all non-patch inputs first followed
 639           * by patch inputs.  When we hit our first patch input, we need to
 640           * reset the cursor to MAX_VARYING so we put them in the right slot.
 641           */
 642          if (cursor < MAX_VARYING) {
 643             cursor = MAX_VARYING;
 644             comp = 0;
 645          }
 646
 647          assign_remap_locations(remap, assigned_comps, info,
 648                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 649       } else {
 650          assign_remap_locations(remap, assigned_comps, info,
 651                                 &cursor, &comp, MAX_VARYING);
 652
 653          /* Check if we failed to assign a remap location. This can happen if
 654           * for example there are a bunch of unmovable components with
 655           * mismatching interpolation types causing us to skip over locations
 656           * that would have been useful for packing later components.
 657           * The solution is to iterate over the locations again (this should
 658           * happen very rarely in practice).
 659           */
 660          if (cursor == MAX_VARYING) {
 661             cursor = 0;
 662             comp = 0;
 663             assign_remap_locations(remap, assigned_comps, info,
 664                                    &cursor, &comp, MAX_VARYING);
 665          }
 666       }
 667    }
 668
 669    ralloc_free(varying_comp_info);
 670
 671    uint64_t zero = 0;
 672    uint32_t zero32 = 0;
 673    remap_slots_and_components(input_list, consumer->info.stage, remap,
 674                               &consumer->info.inputs_read, &zero,
 675                               &consumer->info.patch_inputs_read, &zero32);
 676    remap_slots_and_components(output_list, producer->info.stage, remap,
 677                               &producer->info.outputs_written,
 678                               &producer->info.outputs_read,
 679                               &producer->info.patch_outputs_written,
 680                               &producer->info.patch_outputs_read);
 681 }
 682
 683 /* We assume that this has been called more-or-less directly after
 684  * remove_unused_varyings.  At this point, all of the varyings that we
 685  * aren't going to be using have been completely removed and the
 686  * inputs_read and outputs_written fields in nir_shader_info reflect
 687  * this.  Therefore, the total set of valid slots is the OR of the two
 688  * sets of varyings;  this accounts for varyings which one side may need
 689  * to read/write even if the other doesn't.  This can happen if, for
 690  * instance, an array is used indirectly from one side causing it to be
 691  * unsplittable but directly from the other.
 692  */
 693 void
 694 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 695                      bool default_to_smooth_interp)
 696 {
 697    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 698    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 699
 700    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
 701
 702    get_unmoveable_components_masks(&producer->outputs, assigned_comps,
 703                                    producer->info.stage,
 704                                    default_to_smooth_interp);
 705    get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
 706                                    consumer->info.stage,
 707                                    default_to_smooth_interp);
 708
 709    compact_components(producer, consumer, assigned_comps,
 710                       default_to_smooth_interp);
 711 }
 712
 713 /*
 714  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 715  * don't touch them.
 716  */
 717 void
 718 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 719 {
 720    nir_variable *input_vars[MAX_VARYING] = { 0 };
 721
 722    nir_foreach_variable(var, &consumer->inputs) {
 723       if (var->data.location >= VARYING_SLOT_VAR0 &&
 724           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 725
 726          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 727          input_vars[location] = var;
 728       }
 729    }
 730
 731    nir_foreach_variable(var, &producer->outputs) {
 732       if (var->data.location >= VARYING_SLOT_VAR0 &&
 733           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 734
 735          if (!var->data.always_active_io)
 736             continue;
 737
 738          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 739          if (input_vars[location]) {
 740             input_vars[location]->data.always_active_io = true;
 741          }
 742       }
 743    }
 744 }
 745
 746 static bool
 747 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 748 {
 749    return in_var->data.location == out_var->data.location &&
 750           in_var->data.location_frac == out_var->data.location_frac;
 751 }
 752
 753 static nir_variable *
 754 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 755 {
 756    nir_foreach_variable(var, &consumer->inputs) {
 757       if (does_varying_match(out_var, var))
 758          return var;
 759    }
 760
 761    return NULL;
 762 }
 763
 764 static bool
 765 can_replace_varying(nir_variable *out_var)
 766 {
 767    /* Skip types that require more complex handling.
 768     * TODO: add support for these types.
 769     */
 770    if (glsl_type_is_array(out_var->type) ||
 771        glsl_type_is_dual_slot(out_var->type) ||
 772        glsl_type_is_matrix(out_var->type) ||
 773        glsl_type_is_struct_or_ifc(out_var->type))
 774       return false;
 775
 776    /* Limit this pass to scalars for now to keep things simple. Most varyings
 777     * should have been lowered to scalars at this point anyway.
 778     */
 779    if (!glsl_type_is_scalar(out_var->type))
 780       return false;
 781
 782    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 783        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 784       return false;
 785
 786    return true;
 787 }
 788
 789 static bool
 790 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 791 {
 792    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 793
 794    nir_builder b;
 795    nir_builder_init(&b, impl);
 796
 797    nir_variable *out_var =
 798       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 799
 800    bool progress = false;
 801    nir_foreach_block(block, impl) {
 802       nir_foreach_instr(instr, block) {
 803          if (instr->type != nir_instr_type_intrinsic)
 804             continue;
 805
 806          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 807          if (intr->intrinsic != nir_intrinsic_load_deref)
 808             continue;
 809
 810          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 811          if (in_deref->mode != nir_var_shader_in)
 812             continue;
 813
 814          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 815
 816          if (!does_varying_match(out_var, in_var))
 817             continue;
 818
 819          b.cursor = nir_before_instr(instr);
 820
 821          nir_load_const_instr *out_const =
 822             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 823
 824          /* Add new const to replace the input */
 825          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 826                                              intr->dest.ssa.bit_size,
 827                                              out_const->value);
 828
 829          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 830
 831          progress = true;
 832       }
 833    }
 834
 835    return progress;
 836 }
 837
 838 static bool
 839 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 840                          nir_intrinsic_instr *dup_store_intr)
 841 {
 842    assert(input_var);
 843
 844    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 845
 846    nir_builder b;
 847    nir_builder_init(&b, impl);
 848
 849    nir_variable *dup_out_var =
 850       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 851
 852    bool progress = false;
 853    nir_foreach_block(block, impl) {
 854       nir_foreach_instr(instr, block) {
 855          if (instr->type != nir_instr_type_intrinsic)
 856             continue;
 857
 858          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 859          if (intr->intrinsic != nir_intrinsic_load_deref)
 860             continue;
 861
 862          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 863          if (in_deref->mode != nir_var_shader_in)
 864             continue;
 865
 866          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 867
 868          if (!does_varying_match(dup_out_var, in_var) ||
 869              in_var->data.interpolation != input_var->data.interpolation ||
 870              get_interp_loc(in_var) != get_interp_loc(input_var))
 871             continue;
 872
 873          b.cursor = nir_before_instr(instr);
 874
 875          nir_ssa_def *load = nir_load_var(&b, input_var);
 876          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 877
 878          progress = true;
 879       }
 880    }
 881
 882    return progress;
 883 }
 884
 885 bool
 886 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 887 {
 888    /* TODO: Add support for more shader stage combinations */
 889    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
 890        (producer->info.stage != MESA_SHADER_VERTEX &&
 891         producer->info.stage != MESA_SHADER_TESS_EVAL))
 892       return false;
 893
 894    bool progress = false;
 895
 896    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 897
 898    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
 899
 900    /* If we find a store in the last block of the producer we can be sure this
 901     * is the only possible value for this output.
 902     */
 903    nir_block *last_block = nir_impl_last_block(impl);
 904    nir_foreach_instr_reverse(instr, last_block) {
 905       if (instr->type != nir_instr_type_intrinsic)
 906          continue;
 907
 908       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 909
 910       if (intr->intrinsic != nir_intrinsic_store_deref)
 911          continue;
 912
 913       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
 914       if (out_deref->mode != nir_var_shader_out)
 915          continue;
 916
 917       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
 918       if (!can_replace_varying(out_var))
 919          continue;
 920
 921       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
 922          progress |= replace_constant_input(consumer, intr);
 923       } else {
 924          struct hash_entry *entry =
 925                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
 926          if (entry) {
 927             progress |= replace_duplicate_input(consumer,
 928                                                 (nir_variable *) entry->data,
 929                                                 intr);
 930          } else {
 931             nir_variable *in_var = get_matching_input_var(consumer, out_var);
 932             if (in_var) {
 933                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
 934                                        in_var);
 935             }
 936          }
 937       }
 938    }
 939
 940    _mesa_hash_table_destroy(varying_values, NULL);
 941
 942    return progress;
 943 }