src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage)) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static void
  63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  64 {
  65    nir_foreach_function(function, shader) {
  66       if (!function->impl)
  67          continue;
  68
  69       nir_foreach_block(block, function->impl) {
  70          nir_foreach_instr(instr, block) {
  71             if (instr->type != nir_instr_type_intrinsic)
  72                continue;
  73
  74             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  75             if (intrin->intrinsic != nir_intrinsic_load_deref)
  76                continue;
  77
  78             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  79             if (deref->mode != nir_var_shader_out)
  80                continue;
  81
  82             nir_variable *var = nir_deref_instr_get_variable(deref);
  83             if (var->data.patch) {
  84                patches_read[var->data.location_frac] |=
  85                   get_variable_io_mask(var, shader->info.stage);
  86             } else {
  87                read[var->data.location_frac] |=
  88                   get_variable_io_mask(var, shader->info.stage);
  89             }
  90          }
  91       }
  92    }
  93 }
  94
  95 /**
  96  * Helper for removing unused shader I/O variables, by demoting them to global
  97  * variables (which may then by dead code eliminated).
  98  *
  99  * Example usage is:
 100  *
 101  * progress = nir_remove_unused_io_vars(producer,
 102  *                                      &producer->outputs,
 103  *                                      read, patches_read) ||
 104  *                                      progress;
 105  *
 106  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 107  * representing each .location_frac used.  Note that for vector variables,
 108  * only the first channel (.location_frac) is examined for deciding if the
 109  * variable is used!
 110  */
 111 bool
 112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
 113                           uint64_t *used_by_other_stage,
 114                           uint64_t *used_by_other_stage_patches)
 115 {
 116    bool progress = false;
 117    uint64_t *used;
 118
 119    nir_foreach_variable_safe(var, var_list) {
 120       if (var->data.patch)
 121          used = used_by_other_stage_patches;
 122       else
 123          used = used_by_other_stage;
 124
 125       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 126          continue;
 127
 128       if (var->data.always_active_io)
 129          continue;
 130
 131       if (var->data.explicit_xfb_buffer)
 132          continue;
 133
 134       uint64_t other_stage = used[var->data.location_frac];
 135
 136       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 137          /* This one is invalid, make it a global variable instead */
 138          var->data.location = 0;
 139          var->data.mode = nir_var_shader_temp;
 140
 141          exec_node_remove(&var->node);
 142          exec_list_push_tail(&shader->globals, &var->node);
 143
 144          progress = true;
 145       }
 146    }
 147
 148    if (progress)
 149       nir_fixup_deref_modes(shader);
 150
 151    return progress;
 152 }
 153
 154 bool
 155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 156 {
 157    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 158    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 159
 160    uint64_t read[4] = { 0 }, written[4] = { 0 };
 161    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 162
 163    nir_foreach_variable(var, &producer->outputs) {
 164       if (var->data.patch) {
 165          patches_written[var->data.location_frac] |=
 166             get_variable_io_mask(var, producer->info.stage);
 167       } else {
 168          written[var->data.location_frac] |=
 169             get_variable_io_mask(var, producer->info.stage);
 170       }
 171    }
 172
 173    nir_foreach_variable(var, &consumer->inputs) {
 174       if (var->data.patch) {
 175          patches_read[var->data.location_frac] |=
 176             get_variable_io_mask(var, consumer->info.stage);
 177       } else {
 178          read[var->data.location_frac] |=
 179             get_variable_io_mask(var, consumer->info.stage);
 180       }
 181    }
 182
 183    /* Each TCS invocation can read data written by other TCS invocations,
 184     * so even if the outputs are not used by the TES we must also make
 185     * sure they are not read by the TCS before demoting them to globals.
 186     */
 187    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 188       tcs_add_output_reads(producer, read, patches_read);
 189
 190    bool progress = false;
 191    progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
 192                                         patches_read);
 193
 194    progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
 195                                         patches_written) || progress;
 196
 197    return progress;
 198 }
 199
 200 static uint8_t
 201 get_interp_type(nir_variable *var, const struct glsl_type *type,
 202                 bool default_to_smooth_interp)
 203 {
 204    if (glsl_type_is_integer(type))
 205       return INTERP_MODE_FLAT;
 206    else if (var->data.interpolation != INTERP_MODE_NONE)
 207       return var->data.interpolation;
 208    else if (default_to_smooth_interp)
 209       return INTERP_MODE_SMOOTH;
 210    else
 211       return INTERP_MODE_NONE;
 212 }
 213
 214 #define INTERPOLATE_LOC_SAMPLE 0
 215 #define INTERPOLATE_LOC_CENTROID 1
 216 #define INTERPOLATE_LOC_CENTER 2
 217
 218 static uint8_t
 219 get_interp_loc(nir_variable *var)
 220 {
 221    if (var->data.sample)
 222       return INTERPOLATE_LOC_SAMPLE;
 223    else if (var->data.centroid)
 224       return INTERPOLATE_LOC_CENTROID;
 225    else
 226       return INTERPOLATE_LOC_CENTER;
 227 }
 228
 229 static bool
 230 is_packing_supported_for_type(const struct glsl_type *type)
 231 {
 232    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 233     * other then 32bit. All other vector types should have been split into
 234     * scalar variables by the lower_io_to_scalar pass. The only exception
 235     * should be OpenGL xfb varyings.
 236     * TODO: add support for more complex types?
 237     */
 238    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 239 }
 240
 241 struct assigned_comps
 242 {
 243    uint8_t comps;
 244    uint8_t interp_type;
 245    uint8_t interp_loc;
 246    bool is_32bit;
 247 };
 248
 249 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 250  * algorithms this function just assigns them their existing location for now.
 251  * TODO: allow better packing of complex types.
 252  */
 253 static void
 254 get_unmoveable_components_masks(struct exec_list *var_list,
 255                                 struct assigned_comps *comps,
 256                                 gl_shader_stage stage,
 257                                 bool default_to_smooth_interp)
 258 {
 259    nir_foreach_variable_safe(var, var_list) {
 260       assert(var->data.location >= 0);
 261
 262       /* Only remap things that aren't built-ins. */
 263       if (var->data.location >= VARYING_SLOT_VAR0 &&
 264           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 265
 266          const struct glsl_type *type = var->type;
 267          if (nir_is_per_vertex_io(var, stage)) {
 268             assert(glsl_type_is_array(type));
 269             type = glsl_get_array_element(type);
 270          }
 271
 272          /* If we can pack this varying then don't mark the components as
 273           * used.
 274           */
 275          if (is_packing_supported_for_type(type))
 276             continue;
 277
 278          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 279
 280          unsigned elements =
 281             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 282             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 283
 284          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 285          unsigned slots = glsl_count_attribute_slots(type, false);
 286          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 287          unsigned comps_slot2 = 0;
 288          for (unsigned i = 0; i < slots; i++) {
 289             if (dual_slot) {
 290                if (i & 1) {
 291                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 292                } else {
 293                   unsigned num_comps = 4 - var->data.location_frac;
 294                   comps_slot2 = (elements * dmul) - num_comps;
 295
 296                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 297                   assert(var->data.location_frac == 0 ||
 298                          var->data.location_frac == 2);
 299                   assert(comps_slot2 <= 4);
 300
 301                   comps[location + i].comps |=
 302                      ((1 << num_comps) - 1) << var->data.location_frac;
 303                }
 304             } else {
 305                comps[location + i].comps |=
 306                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 307             }
 308
 309             comps[location + i].interp_type =
 310                get_interp_type(var, type, default_to_smooth_interp);
 311             comps[location + i].interp_loc = get_interp_loc(var);
 312             comps[location + i].is_32bit = glsl_type_is_32bit(type);
 313          }
 314       }
 315    }
 316 }
 317
 318 struct varying_loc
 319 {
 320    uint8_t component;
 321    uint32_t location;
 322 };
 323
 324 static void
 325 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 326                     uint64_t slots_used_mask, unsigned num_slots)
 327 {
 328    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 329
 330    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 331       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 332 }
 333
 334 static void
 335 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 336 {
 337    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 338
 339    slots_used[var->data.patch ? 1 : 0] |=
 340       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 341 }
 342
 343 static void
 344 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 345                            struct varying_loc (*remap)[4],
 346                            uint64_t *slots_used, uint64_t *out_slots_read,
 347                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 348  {
 349    uint64_t out_slots_read_tmp[2] = {0};
 350    uint64_t slots_used_tmp[2] = {0};
 351
 352    /* We don't touch builtins so just copy the bitmask */
 353    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 354
 355    nir_foreach_variable(var, var_list) {
 356       assert(var->data.location >= 0);
 357
 358       /* Only remap things that aren't built-ins */
 359       if (var->data.location >= VARYING_SLOT_VAR0 &&
 360           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 361
 362          const struct glsl_type *type = var->type;
 363          if (nir_is_per_vertex_io(var, stage)) {
 364             assert(glsl_type_is_array(type));
 365             type = glsl_get_array_element(type);
 366          }
 367
 368          unsigned num_slots = glsl_count_attribute_slots(type, false);
 369          bool used_across_stages = false;
 370          bool outputs_read = false;
 371
 372          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 373          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 374
 375          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 376          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 377          uint64_t outs_used =
 378             var->data.patch ? *p_out_slots_read : *out_slots_read;
 379          uint64_t slots =
 380             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 381
 382          if (slots & used)
 383             used_across_stages = true;
 384
 385          if (slots & outs_used)
 386             outputs_read = true;
 387
 388          if (new_loc->location) {
 389             var->data.location = new_loc->location;
 390             var->data.location_frac = new_loc->component;
 391          }
 392
 393          if (var->data.always_active_io) {
 394             /* We can't apply link time optimisations (specifically array
 395              * splitting) to these so we need to copy the existing mask
 396              * otherwise we will mess up the mask for things like partially
 397              * marked arrays.
 398              */
 399             if (used_across_stages)
 400                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 401
 402             if (outputs_read) {
 403                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 404                                    num_slots);
 405             }
 406          } else {
 407             for (unsigned i = 0; i < num_slots; i++) {
 408                if (used_across_stages)
 409                   mark_used_slot(var, slots_used_tmp, i);
 410
 411                if (outputs_read)
 412                   mark_used_slot(var, out_slots_read_tmp, i);
 413             }
 414          }
 415       }
 416    }
 417
 418    *slots_used = slots_used_tmp[0];
 419    *out_slots_read = out_slots_read_tmp[0];
 420    *p_slots_used = slots_used_tmp[1];
 421    *p_out_slots_read = out_slots_read_tmp[1];
 422 }
 423
 424 struct varying_component {
 425    nir_variable *var;
 426    uint8_t interp_type;
 427    uint8_t interp_loc;
 428    bool is_32bit;
 429    bool is_patch;
 430    bool initialised;
 431 };
 432
 433 static int
 434 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 435 {
 436    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 437    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 438
 439    /* We want patches to be order at the end of the array */
 440    if (comp1->is_patch != comp2->is_patch)
 441       return comp1->is_patch ? 1 : -1;
 442
 443    /* We can only pack varyings with matching interpolation types so group
 444     * them together.
 445     */
 446    if (comp1->interp_type != comp2->interp_type)
 447       return comp1->interp_type - comp2->interp_type;
 448
 449    /* Interpolation loc must match also. */
 450    if (comp1->interp_loc != comp2->interp_loc)
 451       return comp1->interp_loc - comp2->interp_loc;
 452
 453    /* If everything else matches just use the original location to sort */
 454    return comp1->var->data.location - comp2->var->data.location;
 455 }
 456
 457 static void
 458 gather_varying_component_info(nir_shader *consumer,
 459                               struct varying_component **varying_comp_info,
 460                               unsigned *varying_comp_info_size,
 461                               bool default_to_smooth_interp)
 462 {
 463    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
 464    unsigned num_of_comps_to_pack = 0;
 465
 466    /* Count the number of varying that can be packed and create a mapping
 467     * of those varyings to the array we will pass to qsort.
 468     */
 469    nir_foreach_variable(var, &consumer->inputs) {
 470
 471       /* Only remap things that aren't builtins. */
 472       if (var->data.location >= VARYING_SLOT_VAR0 &&
 473           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 474
 475          /* We can't repack xfb varyings. */
 476          if (var->data.always_active_io)
 477             continue;
 478
 479          const struct glsl_type *type = var->type;
 480          if (nir_is_per_vertex_io(var, consumer->info.stage)) {
 481             assert(glsl_type_is_array(type));
 482             type = glsl_get_array_element(type);
 483          }
 484
 485          if (!is_packing_supported_for_type(type))
 486             continue;
 487
 488          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 489          store_varying_info_idx[loc][var->data.location_frac] =
 490             ++num_of_comps_to_pack;
 491       }
 492    }
 493
 494    *varying_comp_info_size = num_of_comps_to_pack;
 495    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 496                                       num_of_comps_to_pack);
 497
 498    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 499
 500    /* Walk over the shader and populate the varying component info array */
 501    nir_foreach_block(block, impl) {
 502       nir_foreach_instr(instr, block) {
 503          if (instr->type != nir_instr_type_intrinsic)
 504             continue;
 505
 506          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 507          if (intr->intrinsic != nir_intrinsic_load_deref &&
 508              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 509              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 510              intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
 511             continue;
 512
 513          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 514          if (deref->mode != nir_var_shader_in)
 515             continue;
 516
 517          /* We only remap things that aren't builtins. */
 518          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 519          if (in_var->data.location < VARYING_SLOT_VAR0)
 520             continue;
 521
 522          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 523          if (location >= MAX_VARYINGS_INCL_PATCH)
 524             continue;
 525
 526          unsigned var_info_idx =
 527             store_varying_info_idx[location][in_var->data.location_frac];
 528          if (!var_info_idx)
 529             continue;
 530
 531          struct varying_component *vc_info =
 532             &(*varying_comp_info)[var_info_idx-1];
 533
 534          if (!vc_info->initialised) {
 535             const struct glsl_type *type = in_var->type;
 536             if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
 537                assert(glsl_type_is_array(type));
 538                type = glsl_get_array_element(type);
 539             }
 540
 541             vc_info->var = in_var;
 542             vc_info->interp_type =
 543                get_interp_type(in_var, type, default_to_smooth_interp);
 544             vc_info->interp_loc = get_interp_loc(in_var);
 545             vc_info->is_32bit = glsl_type_is_32bit(type);
 546             vc_info->is_patch = in_var->data.patch;
 547          }
 548       }
 549    }
 550 }
 551
 552 static void
 553 assign_remap_locations(struct varying_loc (*remap)[4],
 554                        struct assigned_comps *assigned_comps,
 555                        struct varying_component *info,
 556                        unsigned *cursor, unsigned *comp,
 557                        unsigned max_location)
 558 {
 559    unsigned tmp_cursor = *cursor;
 560    unsigned tmp_comp = *comp;
 561
 562    for (; tmp_cursor < max_location; tmp_cursor++) {
 563
 564       if (assigned_comps[tmp_cursor].comps) {
 565          /* We can only pack varyings with matching interpolation types,
 566           * interpolation loc must match also.
 567           * TODO: i965 can handle interpolation locations that don't match,
 568           * but the radeonsi nir backend handles everything as vec4s and so
 569           * expects this to be the same for all components. We could make this
 570           * check driver specfific or drop it if NIR ever become the only
 571           * radeonsi backend.
 572           */
 573          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 574              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 575             tmp_comp = 0;
 576             continue;
 577          }
 578
 579          /* We can only pack varyings with matching types, and the current
 580           * algorithm only supports packing 32-bit.
 581           */
 582          if (!assigned_comps[tmp_cursor].is_32bit) {
 583             tmp_comp = 0;
 584             continue;
 585          }
 586
 587          while (tmp_comp < 4 &&
 588                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 589             tmp_comp++;
 590          }
 591       }
 592
 593       if (tmp_comp == 4) {
 594          tmp_comp = 0;
 595          continue;
 596       }
 597
 598       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 599
 600       /* Once we have assigned a location mark it as used */
 601       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 602       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 603       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 604       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
 605
 606       /* Assign remap location */
 607       remap[location][info->var->data.location_frac].component = tmp_comp++;
 608       remap[location][info->var->data.location_frac].location =
 609          tmp_cursor + VARYING_SLOT_VAR0;
 610
 611       break;
 612    }
 613
 614    *cursor = tmp_cursor;
 615    *comp = tmp_comp;
 616 }
 617
 618 /* If there are empty components in the slot compact the remaining components
 619  * as close to component 0 as possible. This will make it easier to fill the
 620  * empty components with components from a different slot in a following pass.
 621  */
 622 static void
 623 compact_components(nir_shader *producer, nir_shader *consumer,
 624                    struct assigned_comps *assigned_comps,
 625                    bool default_to_smooth_interp)
 626 {
 627    struct exec_list *input_list = &consumer->inputs;
 628    struct exec_list *output_list = &producer->outputs;
 629    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 630    struct varying_component *varying_comp_info;
 631    unsigned varying_comp_info_size;
 632
 633    /* Gather varying component info */
 634    gather_varying_component_info(consumer, &varying_comp_info,
 635                                  &varying_comp_info_size,
 636                                  default_to_smooth_interp);
 637
 638    /* Sort varying components. */
 639    qsort(varying_comp_info, varying_comp_info_size,
 640          sizeof(struct varying_component), cmp_varying_component);
 641
 642    unsigned cursor = 0;
 643    unsigned comp = 0;
 644
 645    /* Set the remap array based on the sorted components */
 646    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 647       struct varying_component *info = &varying_comp_info[i];
 648
 649       assert(info->is_patch || cursor < MAX_VARYING);
 650       if (info->is_patch) {
 651          /* The list should be sorted with all non-patch inputs first followed
 652           * by patch inputs.  When we hit our first patch input, we need to
 653           * reset the cursor to MAX_VARYING so we put them in the right slot.
 654           */
 655          if (cursor < MAX_VARYING) {
 656             cursor = MAX_VARYING;
 657             comp = 0;
 658          }
 659
 660          assign_remap_locations(remap, assigned_comps, info,
 661                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 662       } else {
 663          assign_remap_locations(remap, assigned_comps, info,
 664                                 &cursor, &comp, MAX_VARYING);
 665
 666          /* Check if we failed to assign a remap location. This can happen if
 667           * for example there are a bunch of unmovable components with
 668           * mismatching interpolation types causing us to skip over locations
 669           * that would have been useful for packing later components.
 670           * The solution is to iterate over the locations again (this should
 671           * happen very rarely in practice).
 672           */
 673          if (cursor == MAX_VARYING) {
 674             cursor = 0;
 675             comp = 0;
 676             assign_remap_locations(remap, assigned_comps, info,
 677                                    &cursor, &comp, MAX_VARYING);
 678          }
 679       }
 680    }
 681
 682    ralloc_free(varying_comp_info);
 683
 684    uint64_t zero = 0;
 685    uint32_t zero32 = 0;
 686    remap_slots_and_components(input_list, consumer->info.stage, remap,
 687                               &consumer->info.inputs_read, &zero,
 688                               &consumer->info.patch_inputs_read, &zero32);
 689    remap_slots_and_components(output_list, producer->info.stage, remap,
 690                               &producer->info.outputs_written,
 691                               &producer->info.outputs_read,
 692                               &producer->info.patch_outputs_written,
 693                               &producer->info.patch_outputs_read);
 694 }
 695
 696 /* We assume that this has been called more-or-less directly after
 697  * remove_unused_varyings.  At this point, all of the varyings that we
 698  * aren't going to be using have been completely removed and the
 699  * inputs_read and outputs_written fields in nir_shader_info reflect
 700  * this.  Therefore, the total set of valid slots is the OR of the two
 701  * sets of varyings;  this accounts for varyings which one side may need
 702  * to read/write even if the other doesn't.  This can happen if, for
 703  * instance, an array is used indirectly from one side causing it to be
 704  * unsplittable but directly from the other.
 705  */
 706 void
 707 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 708                      bool default_to_smooth_interp)
 709 {
 710    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 711    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 712
 713    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
 714
 715    get_unmoveable_components_masks(&producer->outputs, assigned_comps,
 716                                    producer->info.stage,
 717                                    default_to_smooth_interp);
 718    get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
 719                                    consumer->info.stage,
 720                                    default_to_smooth_interp);
 721
 722    compact_components(producer, consumer, assigned_comps,
 723                       default_to_smooth_interp);
 724 }
 725
 726 /*
 727  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 728  * don't touch them.
 729  */
 730 void
 731 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 732 {
 733    nir_variable *input_vars[MAX_VARYING] = { 0 };
 734
 735    nir_foreach_variable(var, &consumer->inputs) {
 736       if (var->data.location >= VARYING_SLOT_VAR0 &&
 737           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 738
 739          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 740          input_vars[location] = var;
 741       }
 742    }
 743
 744    nir_foreach_variable(var, &producer->outputs) {
 745       if (var->data.location >= VARYING_SLOT_VAR0 &&
 746           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 747
 748          if (!var->data.always_active_io)
 749             continue;
 750
 751          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 752          if (input_vars[location]) {
 753             input_vars[location]->data.always_active_io = true;
 754          }
 755       }
 756    }
 757 }
 758
 759 static bool
 760 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 761 {
 762    return in_var->data.location == out_var->data.location &&
 763           in_var->data.location_frac == out_var->data.location_frac;
 764 }
 765
 766 static nir_variable *
 767 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 768 {
 769    nir_foreach_variable(var, &consumer->inputs) {
 770       if (does_varying_match(out_var, var))
 771          return var;
 772    }
 773
 774    return NULL;
 775 }
 776
 777 static bool
 778 can_replace_varying(nir_variable *out_var)
 779 {
 780    /* Skip types that require more complex handling.
 781     * TODO: add support for these types.
 782     */
 783    if (glsl_type_is_array(out_var->type) ||
 784        glsl_type_is_dual_slot(out_var->type) ||
 785        glsl_type_is_matrix(out_var->type) ||
 786        glsl_type_is_struct_or_ifc(out_var->type))
 787       return false;
 788
 789    /* Limit this pass to scalars for now to keep things simple. Most varyings
 790     * should have been lowered to scalars at this point anyway.
 791     */
 792    if (!glsl_type_is_scalar(out_var->type))
 793       return false;
 794
 795    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 796        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 797       return false;
 798
 799    return true;
 800 }
 801
 802 static bool
 803 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 804 {
 805    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 806
 807    nir_builder b;
 808    nir_builder_init(&b, impl);
 809
 810    nir_variable *out_var =
 811       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 812
 813    bool progress = false;
 814    nir_foreach_block(block, impl) {
 815       nir_foreach_instr(instr, block) {
 816          if (instr->type != nir_instr_type_intrinsic)
 817             continue;
 818
 819          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 820          if (intr->intrinsic != nir_intrinsic_load_deref)
 821             continue;
 822
 823          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 824          if (in_deref->mode != nir_var_shader_in)
 825             continue;
 826
 827          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 828
 829          if (!does_varying_match(out_var, in_var))
 830             continue;
 831
 832          b.cursor = nir_before_instr(instr);
 833
 834          nir_load_const_instr *out_const =
 835             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 836
 837          /* Add new const to replace the input */
 838          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 839                                              intr->dest.ssa.bit_size,
 840                                              out_const->value);
 841
 842          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 843
 844          progress = true;
 845       }
 846    }
 847
 848    return progress;
 849 }
 850
 851 static bool
 852 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 853                          nir_intrinsic_instr *dup_store_intr)
 854 {
 855    assert(input_var);
 856
 857    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 858
 859    nir_builder b;
 860    nir_builder_init(&b, impl);
 861
 862    nir_variable *dup_out_var =
 863       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 864
 865    bool progress = false;
 866    nir_foreach_block(block, impl) {
 867       nir_foreach_instr(instr, block) {
 868          if (instr->type != nir_instr_type_intrinsic)
 869             continue;
 870
 871          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 872          if (intr->intrinsic != nir_intrinsic_load_deref)
 873             continue;
 874
 875          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 876          if (in_deref->mode != nir_var_shader_in)
 877             continue;
 878
 879          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 880
 881          if (!does_varying_match(dup_out_var, in_var) ||
 882              in_var->data.interpolation != input_var->data.interpolation ||
 883              get_interp_loc(in_var) != get_interp_loc(input_var))
 884             continue;
 885
 886          b.cursor = nir_before_instr(instr);
 887
 888          nir_ssa_def *load = nir_load_var(&b, input_var);
 889          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 890
 891          progress = true;
 892       }
 893    }
 894
 895    return progress;
 896 }
 897
 898 bool
 899 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
 900 {
 901    /* TODO: Add support for more shader stage combinations */
 902    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
 903        (producer->info.stage != MESA_SHADER_VERTEX &&
 904         producer->info.stage != MESA_SHADER_TESS_EVAL))
 905       return false;
 906
 907    bool progress = false;
 908
 909    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
 910
 911    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
 912
 913    /* If we find a store in the last block of the producer we can be sure this
 914     * is the only possible value for this output.
 915     */
 916    nir_block *last_block = nir_impl_last_block(impl);
 917    nir_foreach_instr_reverse(instr, last_block) {
 918       if (instr->type != nir_instr_type_intrinsic)
 919          continue;
 920
 921       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 922
 923       if (intr->intrinsic != nir_intrinsic_store_deref)
 924          continue;
 925
 926       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
 927       if (out_deref->mode != nir_var_shader_out)
 928          continue;
 929
 930       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
 931       if (!can_replace_varying(out_var))
 932          continue;
 933
 934       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
 935          progress |= replace_constant_input(consumer, intr);
 936       } else {
 937          struct hash_entry *entry =
 938                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
 939          if (entry) {
 940             progress |= replace_duplicate_input(consumer,
 941                                                 (nir_variable *) entry->data,
 942                                                 intr);
 943          } else {
 944             nir_variable *in_var = get_matching_input_var(consumer, out_var);
 945             if (in_var) {
 946                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
 947                                        in_var);
 948             }
 949          }
 950       }
 951    }
 952
 953    _mesa_hash_table_destroy(varying_values, NULL);
 954
 955    return progress;
 956 }