src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage)) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static void
  63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  64 {
  65    nir_foreach_function(function, shader) {
  66       if (!function->impl)
  67          continue;
  68
  69       nir_foreach_block(block, function->impl) {
  70          nir_foreach_instr(instr, block) {
  71             if (instr->type != nir_instr_type_intrinsic)
  72                continue;
  73
  74             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  75             if (intrin->intrinsic != nir_intrinsic_load_deref)
  76                continue;
  77
  78             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  79             if (deref->mode != nir_var_shader_out)
  80                continue;
  81
  82             nir_variable *var = nir_deref_instr_get_variable(deref);
  83             if (var->data.patch) {
  84                patches_read[var->data.location_frac] |=
  85                   get_variable_io_mask(var, shader->info.stage);
  86             } else {
  87                read[var->data.location_frac] |=
  88                   get_variable_io_mask(var, shader->info.stage);
  89             }
  90          }
  91       }
  92    }
  93 }
  94
  95 /**
  96  * Helper for removing unused shader I/O variables, by demoting them to global
  97  * variables (which may then by dead code eliminated).
  98  *
  99  * Example usage is:
 100  *
 101  * progress = nir_remove_unused_io_vars(producer,
 102  *                                      &producer->outputs,
 103  *                                      read, patches_read) ||
 104  *                                      progress;
 105  *
 106  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 107  * representing each .location_frac used.  Note that for vector variables,
 108  * only the first channel (.location_frac) is examined for deciding if the
 109  * variable is used!
 110  */
 111 bool
 112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
 113                           uint64_t *used_by_other_stage,
 114                           uint64_t *used_by_other_stage_patches)
 115 {
 116    bool progress = false;
 117    uint64_t *used;
 118
 119    nir_foreach_variable_safe(var, var_list) {
 120       if (var->data.patch)
 121          used = used_by_other_stage_patches;
 122       else
 123          used = used_by_other_stage;
 124
 125       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 126          continue;
 127
 128       if (var->data.always_active_io)
 129          continue;
 130
 131       if (var->data.explicit_xfb_buffer)
 132          continue;
 133
 134       uint64_t other_stage = used[var->data.location_frac];
 135
 136       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 137          /* This one is invalid, make it a global variable instead */
 138          var->data.location = 0;
 139          var->data.mode = nir_var_shader_temp;
 140
 141          exec_node_remove(&var->node);
 142          exec_list_push_tail(&shader->globals, &var->node);
 143
 144          progress = true;
 145       }
 146    }
 147
 148    if (progress)
 149       nir_fixup_deref_modes(shader);
 150
 151    return progress;
 152 }
 153
 154 bool
 155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 156 {
 157    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 158    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 159
 160    uint64_t read[4] = { 0 }, written[4] = { 0 };
 161    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 162
 163    nir_foreach_variable(var, &producer->outputs) {
 164       if (var->data.patch) {
 165          patches_written[var->data.location_frac] |=
 166             get_variable_io_mask(var, producer->info.stage);
 167       } else {
 168          written[var->data.location_frac] |=
 169             get_variable_io_mask(var, producer->info.stage);
 170       }
 171    }
 172
 173    nir_foreach_variable(var, &consumer->inputs) {
 174       if (var->data.patch) {
 175          patches_read[var->data.location_frac] |=
 176             get_variable_io_mask(var, consumer->info.stage);
 177       } else {
 178          read[var->data.location_frac] |=
 179             get_variable_io_mask(var, consumer->info.stage);
 180       }
 181    }
 182
 183    /* Each TCS invocation can read data written by other TCS invocations,
 184     * so even if the outputs are not used by the TES we must also make
 185     * sure they are not read by the TCS before demoting them to globals.
 186     */
 187    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 188       tcs_add_output_reads(producer, read, patches_read);
 189
 190    bool progress = false;
 191    progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
 192                                         patches_read);
 193
 194    progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
 195                                         patches_written) || progress;
 196
 197    return progress;
 198 }
 199
 200 static uint8_t
 201 get_interp_type(nir_variable *var, const struct glsl_type *type,
 202                 bool default_to_smooth_interp)
 203 {
 204    if (glsl_type_is_integer(type))
 205       return INTERP_MODE_FLAT;
 206    else if (var->data.interpolation != INTERP_MODE_NONE)
 207       return var->data.interpolation;
 208    else if (default_to_smooth_interp)
 209       return INTERP_MODE_SMOOTH;
 210    else
 211       return INTERP_MODE_NONE;
 212 }
 213
 214 #define INTERPOLATE_LOC_SAMPLE 0
 215 #define INTERPOLATE_LOC_CENTROID 1
 216 #define INTERPOLATE_LOC_CENTER 2
 217
 218 static uint8_t
 219 get_interp_loc(nir_variable *var)
 220 {
 221    if (var->data.sample)
 222       return INTERPOLATE_LOC_SAMPLE;
 223    else if (var->data.centroid)
 224       return INTERPOLATE_LOC_CENTROID;
 225    else
 226       return INTERPOLATE_LOC_CENTER;
 227 }
 228
 229 static bool
 230 is_packing_supported_for_type(const struct glsl_type *type)
 231 {
 232    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 233     * other then 32bit. All other vector types should have been split into
 234     * scalar variables by the lower_io_to_scalar pass. The only exception
 235     * should be OpenGL xfb varyings.
 236     * TODO: add support for more complex types?
 237     */
 238    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 239 }
 240
 241 struct assigned_comps
 242 {
 243    uint8_t comps;
 244    uint8_t interp_type;
 245    uint8_t interp_loc;
 246    bool is_32bit;
 247 };
 248
 249 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 250  * algorithms this function just assigns them their existing location for now.
 251  * TODO: allow better packing of complex types.
 252  */
 253 static void
 254 get_unmoveable_components_masks(struct exec_list *var_list,
 255                                 struct assigned_comps *comps,
 256                                 gl_shader_stage stage,
 257                                 bool default_to_smooth_interp)
 258 {
 259    nir_foreach_variable_safe(var, var_list) {
 260       assert(var->data.location >= 0);
 261
 262       /* Only remap things that aren't built-ins. */
 263       if (var->data.location >= VARYING_SLOT_VAR0 &&
 264           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 265
 266          const struct glsl_type *type = var->type;
 267          if (nir_is_per_vertex_io(var, stage)) {
 268             assert(glsl_type_is_array(type));
 269             type = glsl_get_array_element(type);
 270          }
 271
 272          /* If we can pack this varying then don't mark the components as
 273           * used.
 274           */
 275          if (is_packing_supported_for_type(type))
 276             continue;
 277
 278          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 279
 280          unsigned elements =
 281             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 282             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 283
 284          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 285          unsigned slots = glsl_count_attribute_slots(type, false);
 286          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 287          unsigned comps_slot2 = 0;
 288          for (unsigned i = 0; i < slots; i++) {
 289             if (dual_slot) {
 290                if (i & 1) {
 291                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 292                } else {
 293                   unsigned num_comps = 4 - var->data.location_frac;
 294                   comps_slot2 = (elements * dmul) - num_comps;
 295
 296                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 297                   assert(var->data.location_frac == 0 ||
 298                          var->data.location_frac == 2);
 299                   assert(comps_slot2 <= 4);
 300
 301                   comps[location + i].comps |=
 302                      ((1 << num_comps) - 1) << var->data.location_frac;
 303                }
 304             } else {
 305                comps[location + i].comps |=
 306                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 307             }
 308
 309             comps[location + i].interp_type =
 310                get_interp_type(var, type, default_to_smooth_interp);
 311             comps[location + i].interp_loc = get_interp_loc(var);
 312             comps[location + i].is_32bit =
 313                glsl_type_is_32bit(glsl_without_array(type));
 314          }
 315       }
 316    }
 317 }
 318
 319 struct varying_loc
 320 {
 321    uint8_t component;
 322    uint32_t location;
 323 };
 324
 325 static void
 326 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 327                     uint64_t slots_used_mask, unsigned num_slots)
 328 {
 329    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 330
 331    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 332       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 333 }
 334
 335 static void
 336 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 337 {
 338    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 339
 340    slots_used[var->data.patch ? 1 : 0] |=
 341       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 342 }
 343
 344 static void
 345 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 346                            struct varying_loc (*remap)[4],
 347                            uint64_t *slots_used, uint64_t *out_slots_read,
 348                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 349  {
 350    uint64_t out_slots_read_tmp[2] = {0};
 351    uint64_t slots_used_tmp[2] = {0};
 352
 353    /* We don't touch builtins so just copy the bitmask */
 354    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 355
 356    nir_foreach_variable(var, var_list) {
 357       assert(var->data.location >= 0);
 358
 359       /* Only remap things that aren't built-ins */
 360       if (var->data.location >= VARYING_SLOT_VAR0 &&
 361           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 362
 363          const struct glsl_type *type = var->type;
 364          if (nir_is_per_vertex_io(var, stage)) {
 365             assert(glsl_type_is_array(type));
 366             type = glsl_get_array_element(type);
 367          }
 368
 369          unsigned num_slots = glsl_count_attribute_slots(type, false);
 370          bool used_across_stages = false;
 371          bool outputs_read = false;
 372
 373          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 374          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 375
 376          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 377          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 378          uint64_t outs_used =
 379             var->data.patch ? *p_out_slots_read : *out_slots_read;
 380          uint64_t slots =
 381             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 382
 383          if (slots & used)
 384             used_across_stages = true;
 385
 386          if (slots & outs_used)
 387             outputs_read = true;
 388
 389          if (new_loc->location) {
 390             var->data.location = new_loc->location;
 391             var->data.location_frac = new_loc->component;
 392          }
 393
 394          if (var->data.always_active_io) {
 395             /* We can't apply link time optimisations (specifically array
 396              * splitting) to these so we need to copy the existing mask
 397              * otherwise we will mess up the mask for things like partially
 398              * marked arrays.
 399              */
 400             if (used_across_stages)
 401                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 402
 403             if (outputs_read) {
 404                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 405                                    num_slots);
 406             }
 407          } else {
 408             for (unsigned i = 0; i < num_slots; i++) {
 409                if (used_across_stages)
 410                   mark_used_slot(var, slots_used_tmp, i);
 411
 412                if (outputs_read)
 413                   mark_used_slot(var, out_slots_read_tmp, i);
 414             }
 415          }
 416       }
 417    }
 418
 419    *slots_used = slots_used_tmp[0];
 420    *out_slots_read = out_slots_read_tmp[0];
 421    *p_slots_used = slots_used_tmp[1];
 422    *p_out_slots_read = out_slots_read_tmp[1];
 423 }
 424
 425 struct varying_component {
 426    nir_variable *var;
 427    uint8_t interp_type;
 428    uint8_t interp_loc;
 429    bool is_32bit;
 430    bool is_patch;
 431    bool initialised;
 432 };
 433
 434 static int
 435 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 436 {
 437    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 438    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 439
 440    /* We want patches to be order at the end of the array */
 441    if (comp1->is_patch != comp2->is_patch)
 442       return comp1->is_patch ? 1 : -1;
 443
 444    /* We can only pack varyings with matching interpolation types so group
 445     * them together.
 446     */
 447    if (comp1->interp_type != comp2->interp_type)
 448       return comp1->interp_type - comp2->interp_type;
 449
 450    /* Interpolation loc must match also. */
 451    if (comp1->interp_loc != comp2->interp_loc)
 452       return comp1->interp_loc - comp2->interp_loc;
 453
 454    /* If everything else matches just use the original location to sort */
 455    return comp1->var->data.location - comp2->var->data.location;
 456 }
 457
 458 static void
 459 gather_varying_component_info(nir_shader *consumer,
 460                               struct varying_component **varying_comp_info,
 461                               unsigned *varying_comp_info_size,
 462                               bool default_to_smooth_interp)
 463 {
 464    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
 465    unsigned num_of_comps_to_pack = 0;
 466
 467    /* Count the number of varying that can be packed and create a mapping
 468     * of those varyings to the array we will pass to qsort.
 469     */
 470    nir_foreach_variable(var, &consumer->inputs) {
 471
 472       /* Only remap things that aren't builtins. */
 473       if (var->data.location >= VARYING_SLOT_VAR0 &&
 474           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 475
 476          /* We can't repack xfb varyings. */
 477          if (var->data.always_active_io)
 478             continue;
 479
 480          const struct glsl_type *type = var->type;
 481          if (nir_is_per_vertex_io(var, consumer->info.stage)) {
 482             assert(glsl_type_is_array(type));
 483             type = glsl_get_array_element(type);
 484          }
 485
 486          if (!is_packing_supported_for_type(type))
 487             continue;
 488
 489          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 490          store_varying_info_idx[loc][var->data.location_frac] =
 491             ++num_of_comps_to_pack;
 492       }
 493    }
 494
 495    *varying_comp_info_size = num_of_comps_to_pack;
 496    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 497                                       num_of_comps_to_pack);
 498
 499    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 500
 501    /* Walk over the shader and populate the varying component info array */
 502    nir_foreach_block(block, impl) {
 503       nir_foreach_instr(instr, block) {
 504          if (instr->type != nir_instr_type_intrinsic)
 505             continue;
 506
 507          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 508          if (intr->intrinsic != nir_intrinsic_load_deref &&
 509              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 510              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 511              intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
 512             continue;
 513
 514          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 515          if (deref->mode != nir_var_shader_in)
 516             continue;
 517
 518          /* We only remap things that aren't builtins. */
 519          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 520          if (in_var->data.location < VARYING_SLOT_VAR0)
 521             continue;
 522
 523          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 524          if (location >= MAX_VARYINGS_INCL_PATCH)
 525             continue;
 526
 527          unsigned var_info_idx =
 528             store_varying_info_idx[location][in_var->data.location_frac];
 529          if (!var_info_idx)
 530             continue;
 531
 532          struct varying_component *vc_info =
 533             &(*varying_comp_info)[var_info_idx-1];
 534
 535          if (!vc_info->initialised) {
 536             const struct glsl_type *type = in_var->type;
 537             if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
 538                assert(glsl_type_is_array(type));
 539                type = glsl_get_array_element(type);
 540             }
 541
 542             vc_info->var = in_var;
 543             vc_info->interp_type =
 544                get_interp_type(in_var, type, default_to_smooth_interp);
 545             vc_info->interp_loc = get_interp_loc(in_var);
 546             vc_info->is_32bit = glsl_type_is_32bit(type);
 547             vc_info->is_patch = in_var->data.patch;
 548          }
 549       }
 550    }
 551 }
 552
 553 static void
 554 assign_remap_locations(struct varying_loc (*remap)[4],
 555                        struct assigned_comps *assigned_comps,
 556                        struct varying_component *info,
 557                        unsigned *cursor, unsigned *comp,
 558                        unsigned max_location)
 559 {
 560    unsigned tmp_cursor = *cursor;
 561    unsigned tmp_comp = *comp;
 562
 563    for (; tmp_cursor < max_location; tmp_cursor++) {
 564
 565       if (assigned_comps[tmp_cursor].comps) {
 566          /* We can only pack varyings with matching interpolation types,
 567           * interpolation loc must match also.
 568           * TODO: i965 can handle interpolation locations that don't match,
 569           * but the radeonsi nir backend handles everything as vec4s and so
 570           * expects this to be the same for all components. We could make this
 571           * check driver specfific or drop it if NIR ever become the only
 572           * radeonsi backend.
 573           */
 574          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 575              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 576             tmp_comp = 0;
 577             continue;
 578          }
 579
 580          /* We can only pack varyings with matching types, and the current
 581           * algorithm only supports packing 32-bit.
 582           */
 583          if (!assigned_comps[tmp_cursor].is_32bit) {
 584             tmp_comp = 0;
 585             continue;
 586          }
 587
 588          while (tmp_comp < 4 &&
 589                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 590             tmp_comp++;
 591          }
 592       }
 593
 594       if (tmp_comp == 4) {
 595          tmp_comp = 0;
 596          continue;
 597       }
 598
 599       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 600
 601       /* Once we have assigned a location mark it as used */
 602       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 603       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 604       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 605       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
 606
 607       /* Assign remap location */
 608       remap[location][info->var->data.location_frac].component = tmp_comp++;
 609       remap[location][info->var->data.location_frac].location =
 610          tmp_cursor + VARYING_SLOT_VAR0;
 611
 612       break;
 613    }
 614
 615    *cursor = tmp_cursor;
 616    *comp = tmp_comp;
 617 }
 618
 619 /* If there are empty components in the slot compact the remaining components
 620  * as close to component 0 as possible. This will make it easier to fill the
 621  * empty components with components from a different slot in a following pass.
 622  */
 623 static void
 624 compact_components(nir_shader *producer, nir_shader *consumer,
 625                    struct assigned_comps *assigned_comps,
 626                    bool default_to_smooth_interp)
 627 {
 628    struct exec_list *input_list = &consumer->inputs;
 629    struct exec_list *output_list = &producer->outputs;
 630    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 631    struct varying_component *varying_comp_info;
 632    unsigned varying_comp_info_size;
 633
 634    /* Gather varying component info */
 635    gather_varying_component_info(consumer, &varying_comp_info,
 636                                  &varying_comp_info_size,
 637                                  default_to_smooth_interp);
 638
 639    /* Sort varying components. */
 640    qsort(varying_comp_info, varying_comp_info_size,
 641          sizeof(struct varying_component), cmp_varying_component);
 642
 643    unsigned cursor = 0;
 644    unsigned comp = 0;
 645
 646    /* Set the remap array based on the sorted components */
 647    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 648       struct varying_component *info = &varying_comp_info[i];
 649
 650       assert(info->is_patch || cursor < MAX_VARYING);
 651       if (info->is_patch) {
 652          /* The list should be sorted with all non-patch inputs first followed
 653           * by patch inputs.  When we hit our first patch input, we need to
 654           * reset the cursor to MAX_VARYING so we put them in the right slot.
 655           */
 656          if (cursor < MAX_VARYING) {
 657             cursor = MAX_VARYING;
 658             comp = 0;
 659          }
 660
 661          assign_remap_locations(remap, assigned_comps, info,
 662                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 663       } else {
 664          assign_remap_locations(remap, assigned_comps, info,
 665                                 &cursor, &comp, MAX_VARYING);
 666
 667          /* Check if we failed to assign a remap location. This can happen if
 668           * for example there are a bunch of unmovable components with
 669           * mismatching interpolation types causing us to skip over locations
 670           * that would have been useful for packing later components.
 671           * The solution is to iterate over the locations again (this should
 672           * happen very rarely in practice).
 673           */
 674          if (cursor == MAX_VARYING) {
 675             cursor = 0;
 676             comp = 0;
 677             assign_remap_locations(remap, assigned_comps, info,
 678                                    &cursor, &comp, MAX_VARYING);
 679          }
 680       }
 681    }
 682
 683    ralloc_free(varying_comp_info);
 684
 685    uint64_t zero = 0;
 686    uint32_t zero32 = 0;
 687    remap_slots_and_components(input_list, consumer->info.stage, remap,
 688                               &consumer->info.inputs_read, &zero,
 689                               &consumer->info.patch_inputs_read, &zero32);
 690    remap_slots_and_components(output_list, producer->info.stage, remap,
 691                               &producer->info.outputs_written,
 692                               &producer->info.outputs_read,
 693                               &producer->info.patch_outputs_written,
 694                               &producer->info.patch_outputs_read);
 695 }
 696
 697 /* We assume that this has been called more-or-less directly after
 698  * remove_unused_varyings.  At this point, all of the varyings that we
 699  * aren't going to be using have been completely removed and the
 700  * inputs_read and outputs_written fields in nir_shader_info reflect
 701  * this.  Therefore, the total set of valid slots is the OR of the two
 702  * sets of varyings;  this accounts for varyings which one side may need
 703  * to read/write even if the other doesn't.  This can happen if, for
 704  * instance, an array is used indirectly from one side causing it to be
 705  * unsplittable but directly from the other.
 706  */
 707 void
 708 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 709                      bool default_to_smooth_interp)
 710 {
 711    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 712    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 713
 714    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
 715
 716    get_unmoveable_components_masks(&producer->outputs, assigned_comps,
 717                                    producer->info.stage,
 718                                    default_to_smooth_interp);
 719    get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
 720                                    consumer->info.stage,
 721                                    default_to_smooth_interp);
 722
 723    compact_components(producer, consumer, assigned_comps,
 724                       default_to_smooth_interp);
 725 }
 726
 727 /*
 728  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 729  * don't touch them.
 730  */
 731 void
 732 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 733 {
 734    nir_variable *input_vars[MAX_VARYING] = { 0 };
 735
 736    nir_foreach_variable(var, &consumer->inputs) {
 737       if (var->data.location >= VARYING_SLOT_VAR0 &&
 738           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 739
 740          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 741          input_vars[location] = var;
 742       }
 743    }
 744
 745    nir_foreach_variable(var, &producer->outputs) {
 746       if (var->data.location >= VARYING_SLOT_VAR0 &&
 747           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 748
 749          if (!var->data.always_active_io)
 750             continue;
 751
 752          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 753          if (input_vars[location]) {
 754             input_vars[location]->data.always_active_io = true;
 755          }
 756       }
 757    }
 758 }
 759
 760 static bool
 761 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 762 {
 763    return in_var->data.location == out_var->data.location &&
 764           in_var->data.location_frac == out_var->data.location_frac;
 765 }
 766
 767 static nir_variable *
 768 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 769 {
 770    nir_foreach_variable(var, &consumer->inputs) {
 771       if (does_varying_match(out_var, var))
 772          return var;
 773    }
 774
 775    return NULL;
 776 }
 777
 778 static bool
 779 can_replace_varying(nir_variable *out_var)
 780 {
 781    /* Skip types that require more complex handling.
 782     * TODO: add support for these types.
 783     */
 784    if (glsl_type_is_array(out_var->type) ||
 785        glsl_type_is_dual_slot(out_var->type) ||
 786        glsl_type_is_matrix(out_var->type) ||
 787        glsl_type_is_struct_or_ifc(out_var->type))
 788       return false;
 789
 790    /* Limit this pass to scalars for now to keep things simple. Most varyings
 791     * should have been lowered to scalars at this point anyway.
 792     */
 793    if (!glsl_type_is_scalar(out_var->type))
 794       return false;
 795
 796    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 797        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 798       return false;
 799
 800    return true;
 801 }
 802
 803 static bool
 804 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 805 {
 806    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 807
 808    nir_builder b;
 809    nir_builder_init(&b, impl);
 810
 811    nir_variable *out_var =
 812       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 813
 814    bool progress = false;
 815    nir_foreach_block(block, impl) {
 816       nir_foreach_instr(instr, block) {
 817          if (instr->type != nir_instr_type_intrinsic)
 818             continue;
 819
 820          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 821          if (intr->intrinsic != nir_intrinsic_load_deref)
 822             continue;
 823
 824          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 825          if (in_deref->mode != nir_var_shader_in)
 826             continue;
 827
 828          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 829
 830          if (!does_varying_match(out_var, in_var))
 831             continue;
 832
 833          b.cursor = nir_before_instr(instr);
 834
 835          nir_load_const_instr *out_const =
 836             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 837
 838          /* Add new const to replace the input */
 839          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 840                                              intr->dest.ssa.bit_size,
 841                                              out_const->value);
 842
 843          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 844
 845          progress = true;
 846       }
 847    }
 848
 849    return progress;
 850 }
 851
 852 static bool
 853 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 854                          nir_intrinsic_instr *dup_store_intr)
 855 {
 856    assert(input_var);
 857
 858    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 859
 860    nir_builder b;
 861    nir_builder_init(&b, impl);
 862
 863    nir_variable *dup_out_var =
 864       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 865
 866    bool progress = false;
 867    nir_foreach_block(block, impl) {
 868       nir_foreach_instr(instr, block) {
 869          if (instr->type != nir_instr_type_intrinsic)
 870             continue;
 871
 872          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 873          if (intr->intrinsic != nir_intrinsic_load_deref)
 874             continue;
 875
 876          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 877          if (in_deref->mode != nir_var_shader_in)
 878             continue;
 879
 880          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 881
 882          if (!does_varying_match(dup_out_var, in_var) ||
 883              in_var->data.interpolation != input_var->data.interpolation ||
 884              get_interp_loc(in_var) != get_interp_loc(input_var))
 885             continue;
 886
 887          b.cursor = nir_before_instr(instr);
 888
 889          nir_ssa_def *load = nir_load_var(&b, input_var);
 890          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 891
 892          progress = true;
 893       }
 894    }
 895
 896    return progress;
 897 }
 898
 899 bool
 900 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 901 {
 902    /* TODO: Add support for more shader stage combinations */
 903    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
 904        (producer->info.stage != MESA_SHADER_VERTEX &&
 905         producer->info.stage != MESA_SHADER_TESS_EVAL))
 906       return false;
 907
 908    bool progress = false;
 909
 910    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 911
 912    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
 913
 914    /* If we find a store in the last block of the producer we can be sure this
 915     * is the only possible value for this output.
 916     */
 917    nir_block *last_block = nir_impl_last_block(impl);
 918    nir_foreach_instr_reverse(instr, last_block) {
 919       if (instr->type != nir_instr_type_intrinsic)
 920          continue;
 921
 922       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 923
 924       if (intr->intrinsic != nir_intrinsic_store_deref)
 925          continue;
 926
 927       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
 928       if (out_deref->mode != nir_var_shader_out)
 929          continue;
 930
 931       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
 932       if (!can_replace_varying(out_var))
 933          continue;
 934
 935       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
 936          progress |= replace_constant_input(consumer, intr);
 937       } else {
 938          struct hash_entry *entry =
 939                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
 940          if (entry) {
 941             progress |= replace_duplicate_input(consumer,
 942                                                 (nir_variable *) entry->data,
 943                                                 intr);
 944          } else {
 945             nir_variable *in_var = get_matching_input_var(consumer, out_var);
 946             if (in_var) {
 947                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
 948                                        in_var);
 949             }
 950          }
 951       }
 952    }
 953
 954    _mesa_hash_table_destroy(varying_values, NULL);
 955
 956    return progress;
 957 }