src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static uint8_t
  63 get_num_components(nir_variable *var)
  64 {
  65    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
  66       return 4;
  67
  68    return glsl_get_vector_elements(glsl_without_array(var->type));
  69 }
  70
  71 static void
  72 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  73 {
  74    nir_foreach_function(function, shader) {
  75       if (!function->impl)
  76          continue;
  77
  78       nir_foreach_block(block, function->impl) {
  79          nir_foreach_instr(instr, block) {
  80             if (instr->type != nir_instr_type_intrinsic)
  81                continue;
  82
  83             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  84             if (intrin->intrinsic != nir_intrinsic_load_deref)
  85                continue;
  86
  87             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  88             if (deref->mode != nir_var_shader_out)
  89                continue;
  90
  91             nir_variable *var = nir_deref_instr_get_variable(deref);
  92             for (unsigned i = 0; i < get_num_components(var); i++) {
  93                if (var->data.patch) {
  94                   patches_read[var->data.location_frac + i] |=
  95                      get_variable_io_mask(var, shader->info.stage);
  96                } else {
  97                   read[var->data.location_frac + i] |=
  98                      get_variable_io_mask(var, shader->info.stage);
  99                }
 100             }
 101          }
 102       }
 103    }
 104 }
 105
 106 /**
 107  * Helper for removing unused shader I/O variables, by demoting them to global
 108  * variables (which may then by dead code eliminated).
 109  *
 110  * Example usage is:
 111  *
 112  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
 113  *                                      read, patches_read) ||
 114  *                                      progress;
 115  *
 116  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 117  * representing each .location_frac used.  Note that for vector variables,
 118  * only the first channel (.location_frac) is examined for deciding if the
 119  * variable is used!
 120  */
 121 bool
 122 nir_remove_unused_io_vars(nir_shader *shader,
 123                           nir_variable_mode mode,
 124                           uint64_t *used_by_other_stage,
 125                           uint64_t *used_by_other_stage_patches)
 126 {
 127    bool progress = false;
 128    uint64_t *used;
 129
 130    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
 131    struct exec_list *var_list = nir_variable_list_for_mode(shader, mode);
 132
 133    nir_foreach_variable_safe(var, var_list) {
 134       if (var->data.patch)
 135          used = used_by_other_stage_patches;
 136       else
 137          used = used_by_other_stage;
 138
 139       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 140          continue;
 141
 142       if (var->data.always_active_io)
 143          continue;
 144
 145       if (var->data.explicit_xfb_buffer)
 146          continue;
 147
 148       uint64_t other_stage = used[var->data.location_frac];
 149
 150       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 151          /* This one is invalid, make it a global variable instead */
 152          var->data.location = 0;
 153          var->data.mode = nir_var_shader_temp;
 154
 155          exec_node_remove(&var->node);
 156          exec_list_push_tail(&shader->globals, &var->node);
 157
 158          progress = true;
 159       }
 160    }
 161
 162    if (progress)
 163       nir_fixup_deref_modes(shader);
 164
 165    return progress;
 166 }
 167
 168 bool
 169 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 170 {
 171    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 172    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 173
 174    uint64_t read[4] = { 0 }, written[4] = { 0 };
 175    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 176
 177    nir_foreach_shader_out_variable(var, producer) {
 178       for (unsigned i = 0; i < get_num_components(var); i++) {
 179          if (var->data.patch) {
 180             patches_written[var->data.location_frac + i] |=
 181                get_variable_io_mask(var, producer->info.stage);
 182          } else {
 183             written[var->data.location_frac + i] |=
 184                get_variable_io_mask(var, producer->info.stage);
 185          }
 186       }
 187    }
 188
 189    nir_foreach_shader_in_variable(var, consumer) {
 190       for (unsigned i = 0; i < get_num_components(var); i++) {
 191          if (var->data.patch) {
 192             patches_read[var->data.location_frac + i] |=
 193                get_variable_io_mask(var, consumer->info.stage);
 194          } else {
 195             read[var->data.location_frac + i] |=
 196                get_variable_io_mask(var, consumer->info.stage);
 197          }
 198       }
 199    }
 200
 201    /* Each TCS invocation can read data written by other TCS invocations,
 202     * so even if the outputs are not used by the TES we must also make
 203     * sure they are not read by the TCS before demoting them to globals.
 204     */
 205    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 206       tcs_add_output_reads(producer, read, patches_read);
 207
 208    bool progress = false;
 209    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
 210                                         patches_read);
 211
 212    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
 213                                         patches_written) || progress;
 214
 215    return progress;
 216 }
 217
 218 static uint8_t
 219 get_interp_type(nir_variable *var, const struct glsl_type *type,
 220                 bool default_to_smooth_interp)
 221 {
 222    if (glsl_type_is_integer(type))
 223       return INTERP_MODE_FLAT;
 224    else if (var->data.interpolation != INTERP_MODE_NONE)
 225       return var->data.interpolation;
 226    else if (default_to_smooth_interp)
 227       return INTERP_MODE_SMOOTH;
 228    else
 229       return INTERP_MODE_NONE;
 230 }
 231
 232 #define INTERPOLATE_LOC_SAMPLE 0
 233 #define INTERPOLATE_LOC_CENTROID 1
 234 #define INTERPOLATE_LOC_CENTER 2
 235
 236 static uint8_t
 237 get_interp_loc(nir_variable *var)
 238 {
 239    if (var->data.sample)
 240       return INTERPOLATE_LOC_SAMPLE;
 241    else if (var->data.centroid)
 242       return INTERPOLATE_LOC_CENTROID;
 243    else
 244       return INTERPOLATE_LOC_CENTER;
 245 }
 246
 247 static bool
 248 is_packing_supported_for_type(const struct glsl_type *type)
 249 {
 250    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 251     * other then 32bit. All other vector types should have been split into
 252     * scalar variables by the lower_io_to_scalar pass. The only exception
 253     * should be OpenGL xfb varyings.
 254     * TODO: add support for more complex types?
 255     */
 256    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 257 }
 258
 259 struct assigned_comps
 260 {
 261    uint8_t comps;
 262    uint8_t interp_type;
 263    uint8_t interp_loc;
 264    bool is_32bit;
 265 };
 266
 267 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 268  * algorithms this function just assigns them their existing location for now.
 269  * TODO: allow better packing of complex types.
 270  */
 271 static void
 272 get_unmoveable_components_masks(nir_shader *shader,
 273                                 nir_variable_mode mode,
 274                                 struct assigned_comps *comps,
 275                                 gl_shader_stage stage,
 276                                 bool default_to_smooth_interp)
 277 {
 278    nir_foreach_variable_with_modes_safe(var, shader, mode) {
 279       assert(var->data.location >= 0);
 280
 281       /* Only remap things that aren't built-ins. */
 282       if (var->data.location >= VARYING_SLOT_VAR0 &&
 283           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 284
 285          const struct glsl_type *type = var->type;
 286          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 287             assert(glsl_type_is_array(type));
 288             type = glsl_get_array_element(type);
 289          }
 290
 291          /* If we can pack this varying then don't mark the components as
 292           * used.
 293           */
 294          if (is_packing_supported_for_type(type))
 295             continue;
 296
 297          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 298
 299          unsigned elements =
 300             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 301             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 302
 303          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 304          unsigned slots = glsl_count_attribute_slots(type, false);
 305          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 306          unsigned comps_slot2 = 0;
 307          for (unsigned i = 0; i < slots; i++) {
 308             if (dual_slot) {
 309                if (i & 1) {
 310                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 311                } else {
 312                   unsigned num_comps = 4 - var->data.location_frac;
 313                   comps_slot2 = (elements * dmul) - num_comps;
 314
 315                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 316                   assert(var->data.location_frac == 0 ||
 317                          var->data.location_frac == 2);
 318                   assert(comps_slot2 <= 4);
 319
 320                   comps[location + i].comps |=
 321                      ((1 << num_comps) - 1) << var->data.location_frac;
 322                }
 323             } else {
 324                comps[location + i].comps |=
 325                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 326             }
 327
 328             comps[location + i].interp_type =
 329                get_interp_type(var, type, default_to_smooth_interp);
 330             comps[location + i].interp_loc = get_interp_loc(var);
 331             comps[location + i].is_32bit =
 332                glsl_type_is_32bit(glsl_without_array(type));
 333          }
 334       }
 335    }
 336 }
 337
 338 struct varying_loc
 339 {
 340    uint8_t component;
 341    uint32_t location;
 342 };
 343
 344 static void
 345 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 346                     uint64_t slots_used_mask, unsigned num_slots)
 347 {
 348    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 349
 350    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 351       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 352 }
 353
 354 static void
 355 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 356 {
 357    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 358
 359    slots_used[var->data.patch ? 1 : 0] |=
 360       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 361 }
 362
 363 static void
 364 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
 365                            struct varying_loc (*remap)[4],
 366                            uint64_t *slots_used, uint64_t *out_slots_read,
 367                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 368  {
 369    const gl_shader_stage stage = shader->info.stage;
 370    uint64_t out_slots_read_tmp[2] = {0};
 371    uint64_t slots_used_tmp[2] = {0};
 372
 373    /* We don't touch builtins so just copy the bitmask */
 374    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 375
 376    nir_foreach_variable_with_modes(var, shader, mode) {
 377       assert(var->data.location >= 0);
 378
 379       /* Only remap things that aren't built-ins */
 380       if (var->data.location >= VARYING_SLOT_VAR0 &&
 381           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 382
 383          const struct glsl_type *type = var->type;
 384          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 385             assert(glsl_type_is_array(type));
 386             type = glsl_get_array_element(type);
 387          }
 388
 389          unsigned num_slots = glsl_count_attribute_slots(type, false);
 390          bool used_across_stages = false;
 391          bool outputs_read = false;
 392
 393          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 394          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 395
 396          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 397          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 398          uint64_t outs_used =
 399             var->data.patch ? *p_out_slots_read : *out_slots_read;
 400          uint64_t slots =
 401             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 402
 403          if (slots & used)
 404             used_across_stages = true;
 405
 406          if (slots & outs_used)
 407             outputs_read = true;
 408
 409          if (new_loc->location) {
 410             var->data.location = new_loc->location;
 411             var->data.location_frac = new_loc->component;
 412          }
 413
 414          if (var->data.always_active_io) {
 415             /* We can't apply link time optimisations (specifically array
 416              * splitting) to these so we need to copy the existing mask
 417              * otherwise we will mess up the mask for things like partially
 418              * marked arrays.
 419              */
 420             if (used_across_stages)
 421                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 422
 423             if (outputs_read) {
 424                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 425                                    num_slots);
 426             }
 427          } else {
 428             for (unsigned i = 0; i < num_slots; i++) {
 429                if (used_across_stages)
 430                   mark_used_slot(var, slots_used_tmp, i);
 431
 432                if (outputs_read)
 433                   mark_used_slot(var, out_slots_read_tmp, i);
 434             }
 435          }
 436       }
 437    }
 438
 439    *slots_used = slots_used_tmp[0];
 440    *out_slots_read = out_slots_read_tmp[0];
 441    *p_slots_used = slots_used_tmp[1];
 442    *p_out_slots_read = out_slots_read_tmp[1];
 443 }
 444
 445 struct varying_component {
 446    nir_variable *var;
 447    uint8_t interp_type;
 448    uint8_t interp_loc;
 449    bool is_32bit;
 450    bool is_patch;
 451    bool is_intra_stage_only;
 452    bool initialised;
 453 };
 454
 455 static int
 456 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 457 {
 458    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 459    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 460
 461    /* We want patches to be order at the end of the array */
 462    if (comp1->is_patch != comp2->is_patch)
 463       return comp1->is_patch ? 1 : -1;
 464
 465    /* We want to try to group together TCS outputs that are only read by other
 466     * TCS invocations and not consumed by the follow stage.
 467     */
 468    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
 469       return comp1->is_intra_stage_only ? 1 : -1;
 470
 471    /* We can only pack varyings with matching interpolation types so group
 472     * them together.
 473     */
 474    if (comp1->interp_type != comp2->interp_type)
 475       return comp1->interp_type - comp2->interp_type;
 476
 477    /* Interpolation loc must match also. */
 478    if (comp1->interp_loc != comp2->interp_loc)
 479       return comp1->interp_loc - comp2->interp_loc;
 480
 481    /* If everything else matches just use the original location to sort */
 482    return comp1->var->data.location - comp2->var->data.location;
 483 }
 484
 485 static void
 486 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
 487                               struct varying_component **varying_comp_info,
 488                               unsigned *varying_comp_info_size,
 489                               bool default_to_smooth_interp)
 490 {
 491    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 492    unsigned num_of_comps_to_pack = 0;
 493
 494    /* Count the number of varying that can be packed and create a mapping
 495     * of those varyings to the array we will pass to qsort.
 496     */
 497    nir_foreach_shader_out_variable(var, producer) {
 498
 499       /* Only remap things that aren't builtins. */
 500       if (var->data.location >= VARYING_SLOT_VAR0 &&
 501           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 502
 503          /* We can't repack xfb varyings. */
 504          if (var->data.always_active_io)
 505             continue;
 506
 507          const struct glsl_type *type = var->type;
 508          if (nir_is_per_vertex_io(var, producer->info.stage) || var->data.per_view) {
 509             assert(glsl_type_is_array(type));
 510             type = glsl_get_array_element(type);
 511          }
 512
 513          if (!is_packing_supported_for_type(type))
 514             continue;
 515
 516          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 517          store_varying_info_idx[loc][var->data.location_frac] =
 518             ++num_of_comps_to_pack;
 519       }
 520    }
 521
 522    *varying_comp_info_size = num_of_comps_to_pack;
 523    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 524                                       num_of_comps_to_pack);
 525
 526    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 527
 528    /* Walk over the shader and populate the varying component info array */
 529    nir_foreach_block(block, impl) {
 530       nir_foreach_instr(instr, block) {
 531          if (instr->type != nir_instr_type_intrinsic)
 532             continue;
 533
 534          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 535          if (intr->intrinsic != nir_intrinsic_load_deref &&
 536              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 537              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 538              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
 539              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
 540             continue;
 541
 542          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 543          if (deref->mode != nir_var_shader_in)
 544             continue;
 545
 546          /* We only remap things that aren't builtins. */
 547          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 548          if (in_var->data.location < VARYING_SLOT_VAR0)
 549             continue;
 550
 551          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 552          if (location >= MAX_VARYINGS_INCL_PATCH)
 553             continue;
 554
 555          unsigned var_info_idx =
 556             store_varying_info_idx[location][in_var->data.location_frac];
 557          if (!var_info_idx)
 558             continue;
 559
 560          struct varying_component *vc_info =
 561             &(*varying_comp_info)[var_info_idx-1];
 562
 563          if (!vc_info->initialised) {
 564             const struct glsl_type *type = in_var->type;
 565             if (nir_is_per_vertex_io(in_var, consumer->info.stage) ||
 566                 in_var->data.per_view) {
 567                assert(glsl_type_is_array(type));
 568                type = glsl_get_array_element(type);
 569             }
 570
 571             vc_info->var = in_var;
 572             vc_info->interp_type =
 573                get_interp_type(in_var, type, default_to_smooth_interp);
 574             vc_info->interp_loc = get_interp_loc(in_var);
 575             vc_info->is_32bit = glsl_type_is_32bit(type);
 576             vc_info->is_patch = in_var->data.patch;
 577             vc_info->is_intra_stage_only = false;
 578             vc_info->initialised = true;
 579          }
 580       }
 581    }
 582
 583    /* Walk over the shader and populate the varying component info array
 584     * for varyings which are read by other TCS instances but are not consumed
 585     * by the TES.
 586     */
 587    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
 588       impl = nir_shader_get_entrypoint(producer);
 589
 590       nir_foreach_block(block, impl) {
 591          nir_foreach_instr(instr, block) {
 592             if (instr->type != nir_instr_type_intrinsic)
 593                continue;
 594
 595             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 596             if (intr->intrinsic != nir_intrinsic_load_deref)
 597                continue;
 598
 599             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 600             if (deref->mode != nir_var_shader_out)
 601                continue;
 602
 603             /* We only remap things that aren't builtins. */
 604             nir_variable *out_var = nir_deref_instr_get_variable(deref);
 605             if (out_var->data.location < VARYING_SLOT_VAR0)
 606                continue;
 607
 608             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
 609             if (location >= MAX_VARYINGS_INCL_PATCH)
 610                continue;
 611
 612             unsigned var_info_idx =
 613                store_varying_info_idx[location][out_var->data.location_frac];
 614             if (!var_info_idx) {
 615                /* Something went wrong, the shader interfaces didn't match, so
 616                 * abandon packing. This can happen for example when the
 617                 * inputs are scalars but the outputs are struct members.
 618                 */
 619                *varying_comp_info_size = 0;
 620                break;
 621             }
 622
 623             struct varying_component *vc_info =
 624                &(*varying_comp_info)[var_info_idx-1];
 625
 626             if (!vc_info->initialised) {
 627                const struct glsl_type *type = out_var->type;
 628                if (nir_is_per_vertex_io(out_var, producer->info.stage)) {
 629                   assert(glsl_type_is_array(type));
 630                   type = glsl_get_array_element(type);
 631                }
 632
 633                vc_info->var = out_var;
 634                vc_info->interp_type =
 635                   get_interp_type(out_var, type, default_to_smooth_interp);
 636                vc_info->interp_loc = get_interp_loc(out_var);
 637                vc_info->is_32bit = glsl_type_is_32bit(type);
 638                vc_info->is_patch = out_var->data.patch;
 639                vc_info->is_intra_stage_only = true;
 640                vc_info->initialised = true;
 641             }
 642          }
 643       }
 644    }
 645
 646    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
 647       struct varying_component *vc_info = &(*varying_comp_info)[i];
 648       if (!vc_info->initialised) {
 649          /* Something went wrong, the shader interfaces didn't match, so
 650           * abandon packing. This can happen for example when the outputs are
 651           * scalars but the inputs are struct members.
 652           */
 653          *varying_comp_info_size = 0;
 654          break;
 655       }
 656    }
 657 }
 658
 659 static void
 660 assign_remap_locations(struct varying_loc (*remap)[4],
 661                        struct assigned_comps *assigned_comps,
 662                        struct varying_component *info,
 663                        unsigned *cursor, unsigned *comp,
 664                        unsigned max_location)
 665 {
 666    unsigned tmp_cursor = *cursor;
 667    unsigned tmp_comp = *comp;
 668
 669    for (; tmp_cursor < max_location; tmp_cursor++) {
 670
 671       if (assigned_comps[tmp_cursor].comps) {
 672          /* We can only pack varyings with matching interpolation types,
 673           * interpolation loc must match also.
 674           * TODO: i965 can handle interpolation locations that don't match,
 675           * but the radeonsi nir backend handles everything as vec4s and so
 676           * expects this to be the same for all components. We could make this
 677           * check driver specfific or drop it if NIR ever become the only
 678           * radeonsi backend.
 679           */
 680          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 681              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 682             tmp_comp = 0;
 683             continue;
 684          }
 685
 686          /* We can only pack varyings with matching types, and the current
 687           * algorithm only supports packing 32-bit.
 688           */
 689          if (!assigned_comps[tmp_cursor].is_32bit) {
 690             tmp_comp = 0;
 691             continue;
 692          }
 693
 694          while (tmp_comp < 4 &&
 695                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 696             tmp_comp++;
 697          }
 698       }
 699
 700       if (tmp_comp == 4) {
 701          tmp_comp = 0;
 702          continue;
 703       }
 704
 705       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 706
 707       /* Once we have assigned a location mark it as used */
 708       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 709       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 710       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 711       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
 712
 713       /* Assign remap location */
 714       remap[location][info->var->data.location_frac].component = tmp_comp++;
 715       remap[location][info->var->data.location_frac].location =
 716          tmp_cursor + VARYING_SLOT_VAR0;
 717
 718       break;
 719    }
 720
 721    *cursor = tmp_cursor;
 722    *comp = tmp_comp;
 723 }
 724
 725 /* If there are empty components in the slot compact the remaining components
 726  * as close to component 0 as possible. This will make it easier to fill the
 727  * empty components with components from a different slot in a following pass.
 728  */
 729 static void
 730 compact_components(nir_shader *producer, nir_shader *consumer,
 731                    struct assigned_comps *assigned_comps,
 732                    bool default_to_smooth_interp)
 733 {
 734    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 735    struct varying_component *varying_comp_info;
 736    unsigned varying_comp_info_size;
 737
 738    /* Gather varying component info */
 739    gather_varying_component_info(producer, consumer, &varying_comp_info,
 740                                  &varying_comp_info_size,
 741                                  default_to_smooth_interp);
 742
 743    /* Sort varying components. */
 744    qsort(varying_comp_info, varying_comp_info_size,
 745          sizeof(struct varying_component), cmp_varying_component);
 746
 747    unsigned cursor = 0;
 748    unsigned comp = 0;
 749
 750    /* Set the remap array based on the sorted components */
 751    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 752       struct varying_component *info = &varying_comp_info[i];
 753
 754       assert(info->is_patch || cursor < MAX_VARYING);
 755       if (info->is_patch) {
 756          /* The list should be sorted with all non-patch inputs first followed
 757           * by patch inputs.  When we hit our first patch input, we need to
 758           * reset the cursor to MAX_VARYING so we put them in the right slot.
 759           */
 760          if (cursor < MAX_VARYING) {
 761             cursor = MAX_VARYING;
 762             comp = 0;
 763          }
 764
 765          assign_remap_locations(remap, assigned_comps, info,
 766                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 767       } else {
 768          assign_remap_locations(remap, assigned_comps, info,
 769                                 &cursor, &comp, MAX_VARYING);
 770
 771          /* Check if we failed to assign a remap location. This can happen if
 772           * for example there are a bunch of unmovable components with
 773           * mismatching interpolation types causing us to skip over locations
 774           * that would have been useful for packing later components.
 775           * The solution is to iterate over the locations again (this should
 776           * happen very rarely in practice).
 777           */
 778          if (cursor == MAX_VARYING) {
 779             cursor = 0;
 780             comp = 0;
 781             assign_remap_locations(remap, assigned_comps, info,
 782                                    &cursor, &comp, MAX_VARYING);
 783          }
 784       }
 785    }
 786
 787    ralloc_free(varying_comp_info);
 788
 789    uint64_t zero = 0;
 790    uint32_t zero32 = 0;
 791    remap_slots_and_components(consumer, nir_var_shader_in, remap,
 792                               &consumer->info.inputs_read, &zero,
 793                               &consumer->info.patch_inputs_read, &zero32);
 794    remap_slots_and_components(producer, nir_var_shader_out, remap,
 795                               &producer->info.outputs_written,
 796                               &producer->info.outputs_read,
 797                               &producer->info.patch_outputs_written,
 798                               &producer->info.patch_outputs_read);
 799 }
 800
 801 /* We assume that this has been called more-or-less directly after
 802  * remove_unused_varyings.  At this point, all of the varyings that we
 803  * aren't going to be using have been completely removed and the
 804  * inputs_read and outputs_written fields in nir_shader_info reflect
 805  * this.  Therefore, the total set of valid slots is the OR of the two
 806  * sets of varyings;  this accounts for varyings which one side may need
 807  * to read/write even if the other doesn't.  This can happen if, for
 808  * instance, an array is used indirectly from one side causing it to be
 809  * unsplittable but directly from the other.
 810  */
 811 void
 812 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 813                      bool default_to_smooth_interp)
 814 {
 815    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 816    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 817
 818    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
 819
 820    get_unmoveable_components_masks(producer, nir_var_shader_out,
 821                                    assigned_comps,
 822                                    producer->info.stage,
 823                                    default_to_smooth_interp);
 824    get_unmoveable_components_masks(consumer, nir_var_shader_in,
 825                                    assigned_comps,
 826                                    consumer->info.stage,
 827                                    default_to_smooth_interp);
 828
 829    compact_components(producer, consumer, assigned_comps,
 830                       default_to_smooth_interp);
 831 }
 832
 833 /*
 834  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 835  * don't touch them.
 836  */
 837 void
 838 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 839 {
 840    nir_variable *input_vars[MAX_VARYING] = { 0 };
 841
 842    nir_foreach_shader_in_variable(var, consumer) {
 843       if (var->data.location >= VARYING_SLOT_VAR0 &&
 844           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 845
 846          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 847          input_vars[location] = var;
 848       }
 849    }
 850
 851    nir_foreach_shader_out_variable(var, producer) {
 852       if (var->data.location >= VARYING_SLOT_VAR0 &&
 853           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 854
 855          if (!var->data.always_active_io)
 856             continue;
 857
 858          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 859          if (input_vars[location]) {
 860             input_vars[location]->data.always_active_io = true;
 861          }
 862       }
 863    }
 864 }
 865
 866 static bool
 867 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 868 {
 869    return in_var->data.location == out_var->data.location &&
 870           in_var->data.location_frac == out_var->data.location_frac;
 871 }
 872
 873 static nir_variable *
 874 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 875 {
 876    nir_foreach_shader_in_variable(var, consumer) {
 877       if (does_varying_match(out_var, var))
 878          return var;
 879    }
 880
 881    return NULL;
 882 }
 883
 884 static bool
 885 can_replace_varying(nir_variable *out_var)
 886 {
 887    /* Skip types that require more complex handling.
 888     * TODO: add support for these types.
 889     */
 890    if (glsl_type_is_array(out_var->type) ||
 891        glsl_type_is_dual_slot(out_var->type) ||
 892        glsl_type_is_matrix(out_var->type) ||
 893        glsl_type_is_struct_or_ifc(out_var->type))
 894       return false;
 895
 896    /* Limit this pass to scalars for now to keep things simple. Most varyings
 897     * should have been lowered to scalars at this point anyway.
 898     */
 899    if (!glsl_type_is_scalar(out_var->type))
 900       return false;
 901
 902    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 903        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 904       return false;
 905
 906    return true;
 907 }
 908
 909 static bool
 910 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 911 {
 912    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 913
 914    nir_builder b;
 915    nir_builder_init(&b, impl);
 916
 917    nir_variable *out_var =
 918       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 919
 920    bool progress = false;
 921    nir_foreach_block(block, impl) {
 922       nir_foreach_instr(instr, block) {
 923          if (instr->type != nir_instr_type_intrinsic)
 924             continue;
 925
 926          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 927          if (intr->intrinsic != nir_intrinsic_load_deref)
 928             continue;
 929
 930          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 931          if (in_deref->mode != nir_var_shader_in)
 932             continue;
 933
 934          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 935
 936          if (!does_varying_match(out_var, in_var))
 937             continue;
 938
 939          b.cursor = nir_before_instr(instr);
 940
 941          nir_load_const_instr *out_const =
 942             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 943
 944          /* Add new const to replace the input */
 945          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 946                                              intr->dest.ssa.bit_size,
 947                                              out_const->value);
 948
 949          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 950
 951          progress = true;
 952       }
 953    }
 954
 955    return progress;
 956 }
 957
 958 static bool
 959 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 960                          nir_intrinsic_instr *dup_store_intr)
 961 {
 962    assert(input_var);
 963
 964    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 965
 966    nir_builder b;
 967    nir_builder_init(&b, impl);
 968
 969    nir_variable *dup_out_var =
 970       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 971
 972    bool progress = false;
 973    nir_foreach_block(block, impl) {
 974       nir_foreach_instr(instr, block) {
 975          if (instr->type != nir_instr_type_intrinsic)
 976             continue;
 977
 978          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 979          if (intr->intrinsic != nir_intrinsic_load_deref)
 980             continue;
 981
 982          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 983          if (in_deref->mode != nir_var_shader_in)
 984             continue;
 985
 986          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 987
 988          if (!does_varying_match(dup_out_var, in_var) ||
 989              in_var->data.interpolation != input_var->data.interpolation ||
 990              get_interp_loc(in_var) != get_interp_loc(input_var))
 991             continue;
 992
 993          b.cursor = nir_before_instr(instr);
 994
 995          nir_ssa_def *load = nir_load_var(&b, input_var);
 996          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 997
 998          progress = true;
 999       }
1000    }
1001
1002    return progress;
1003 }
1004
1005 bool
1006 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1007 {
1008    /* TODO: Add support for more shader stage combinations */
1009    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1010        (producer->info.stage != MESA_SHADER_VERTEX &&
1011         producer->info.stage != MESA_SHADER_TESS_EVAL))
1012       return false;
1013
1014    bool progress = false;
1015
1016    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1017
1018    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1019
1020    /* If we find a store in the last block of the producer we can be sure this
1021     * is the only possible value for this output.
1022     */
1023    nir_block *last_block = nir_impl_last_block(impl);
1024    nir_foreach_instr_reverse(instr, last_block) {
1025       if (instr->type != nir_instr_type_intrinsic)
1026          continue;
1027
1028       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1029
1030       if (intr->intrinsic != nir_intrinsic_store_deref)
1031          continue;
1032
1033       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1034       if (out_deref->mode != nir_var_shader_out)
1035          continue;
1036
1037       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1038       if (!can_replace_varying(out_var))
1039          continue;
1040
1041       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1042          progress |= replace_constant_input(consumer, intr);
1043       } else {
1044          struct hash_entry *entry =
1045                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
1046          if (entry) {
1047             progress |= replace_duplicate_input(consumer,
1048                                                 (nir_variable *) entry->data,
1049                                                 intr);
1050          } else {
1051             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1052             if (in_var) {
1053                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1054                                        in_var);
1055             }
1056          }
1057       }
1058    }
1059
1060    _mesa_hash_table_destroy(varying_values, NULL);
1061
1062    return progress;
1063 }
1064
1065 /* TODO any better helper somewhere to sort a list? */
1066
1067 static void
1068 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1069 {
1070    nir_foreach_variable(var, var_list) {
1071       if (var->data.location > new_var->data.location) {
1072          exec_node_insert_node_before(&var->node, &new_var->node);
1073          return;
1074       }
1075    }
1076    exec_list_push_tail(var_list, &new_var->node);
1077 }
1078
1079 static void
1080 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1081               struct exec_list *sorted_list)
1082 {
1083    exec_list_make_empty(sorted_list);
1084    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1085       exec_node_remove(&var->node);
1086       insert_sorted(sorted_list, var);
1087    }
1088 }
1089
1090 void
1091 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1092                             unsigned *size, gl_shader_stage stage)
1093 {
1094    unsigned location = 0;
1095    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1096    uint64_t processed_locs[2] = {0};
1097
1098    struct exec_list io_vars;
1099    sort_varyings(shader, mode, &io_vars);
1100
1101    int UNUSED last_loc = 0;
1102    bool last_partial = false;
1103    nir_foreach_variable(var, &io_vars) {
1104       const struct glsl_type *type = var->type;
1105       if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
1106          assert(glsl_type_is_array(type));
1107          type = glsl_get_array_element(type);
1108       }
1109
1110       int base;
1111       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1112          base = VERT_ATTRIB_GENERIC0;
1113       else if (var->data.mode == nir_var_shader_out &&
1114                stage == MESA_SHADER_FRAGMENT)
1115          base = FRAG_RESULT_DATA0;
1116       else
1117          base = VARYING_SLOT_VAR0;
1118
1119       unsigned var_size;
1120       if (var->data.compact) {
1121          /* If we are inside a partial compact,
1122           * don't allow another compact to be in this slot
1123           * if it starts at component 0.
1124           */
1125          if (last_partial && var->data.location_frac == 0) {
1126             location++;
1127          }
1128
1129          /* compact variables must be arrays of scalars */
1130          assert(glsl_type_is_array(type));
1131          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1132          unsigned start = 4 * location + var->data.location_frac;
1133          unsigned end = start + glsl_get_length(type);
1134          var_size = end / 4 - location;
1135          last_partial = end % 4 != 0;
1136       } else {
1137          /* Compact variables bypass the normal varying compacting pass,
1138           * which means they cannot be in the same vec4 slot as a normal
1139           * variable. If part of the current slot is taken up by a compact
1140           * variable, we need to go to the next one.
1141           */
1142          if (last_partial) {
1143             location++;
1144             last_partial = false;
1145          }
1146          var_size = glsl_count_attribute_slots(type, false);
1147       }
1148
1149       /* Builtins don't allow component packing so we only need to worry about
1150        * user defined varyings sharing the same location.
1151        */
1152       bool processed = false;
1153       if (var->data.location >= base) {
1154          unsigned glsl_location = var->data.location - base;
1155
1156          for (unsigned i = 0; i < var_size; i++) {
1157             if (processed_locs[var->data.index] &
1158                 ((uint64_t)1 << (glsl_location + i)))
1159                processed = true;
1160             else
1161                processed_locs[var->data.index] |=
1162                   ((uint64_t)1 << (glsl_location + i));
1163          }
1164       }
1165
1166       /* Because component packing allows varyings to share the same location
1167        * we may have already have processed this location.
1168        */
1169       if (processed) {
1170          unsigned driver_location = assigned_locations[var->data.location];
1171          var->data.driver_location = driver_location;
1172
1173          /* An array may be packed such that is crosses multiple other arrays
1174           * or variables, we need to make sure we have allocated the elements
1175           * consecutively if the previously proccessed var was shorter than
1176           * the current array we are processing.
1177           *
1178           * NOTE: The code below assumes the var list is ordered in ascending
1179           * location order.
1180           */
1181          assert(last_loc <= var->data.location);
1182          last_loc = var->data.location;
1183          unsigned last_slot_location = driver_location + var_size;
1184          if (last_slot_location > location) {
1185             unsigned num_unallocated_slots = last_slot_location - location;
1186             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1187             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1188                assigned_locations[var->data.location + i] = location;
1189                location++;
1190             }
1191          }
1192          continue;
1193       }
1194
1195       for (unsigned i = 0; i < var_size; i++) {
1196          assigned_locations[var->data.location + i] = location + i;
1197       }
1198
1199       var->data.driver_location = location;
1200       location += var_size;
1201    }
1202
1203    if (last_partial)
1204       location++;
1205
1206    struct exec_list *var_list = nir_variable_list_for_mode(shader, mode);
1207    exec_list_append(var_list, &io_vars);
1208    *size = location;
1209 }
1210
1211 static uint64_t
1212 get_linked_variable_location(unsigned location, bool patch)
1213 {
1214    if (!patch)
1215       return location;
1216
1217    /* Reserve locations 0...3 for special patch variables
1218     * like tess factors and bounding boxes, and the generic patch
1219     * variables will come after them.
1220     */
1221    if (location >= VARYING_SLOT_PATCH0)
1222       return location - VARYING_SLOT_PATCH0 + 4;
1223    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1224             location <= VARYING_SLOT_BOUNDING_BOX1)
1225       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1226    else
1227       unreachable("Unsupported variable in get_linked_variable_location.");
1228 }
1229
1230 static uint64_t
1231 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1232 {
1233    const struct glsl_type *type = variable->type;
1234
1235    if (nir_is_per_vertex_io(variable, stage)) {
1236       assert(glsl_type_is_array(type));
1237       type = glsl_get_array_element(type);
1238    }
1239
1240    unsigned slots = glsl_count_attribute_slots(type, false);
1241    if (variable->data.compact) {
1242       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1243       slots = DIV_ROUND_UP(component_count, 4);
1244    }
1245
1246    uint64_t mask = u_bit_consecutive64(0, slots);
1247    return mask;
1248 }
1249
1250 nir_linked_io_var_info
1251 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1252 {
1253    assert(producer);
1254    assert(consumer);
1255
1256    uint64_t producer_output_mask = 0;
1257    uint64_t producer_patch_output_mask = 0;
1258
1259    nir_foreach_shader_out_variable(variable, producer) {
1260       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1261       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1262
1263       if (variable->data.patch)
1264          producer_patch_output_mask |= mask << loc;
1265       else
1266          producer_output_mask |= mask << loc;
1267    }
1268
1269    uint64_t consumer_input_mask = 0;
1270    uint64_t consumer_patch_input_mask = 0;
1271
1272    nir_foreach_shader_in_variable(variable, consumer) {
1273       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1274       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1275
1276       if (variable->data.patch)
1277          consumer_patch_input_mask |= mask << loc;
1278       else
1279          consumer_input_mask |= mask << loc;
1280    }
1281
1282    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1283    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1284
1285    nir_foreach_shader_out_variable(variable, producer) {
1286       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1287
1288       if (variable->data.patch)
1289          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1290       else
1291          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1292    }
1293
1294    nir_foreach_shader_in_variable(variable, consumer) {
1295       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1296
1297       if (variable->data.patch)
1298          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1299       else
1300          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1301    }
1302
1303    nir_linked_io_var_info result = {
1304       .num_linked_io_vars = util_bitcount64(io_mask),
1305       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1306    };
1307
1308    return result;
1309 }