src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static uint8_t
  63 get_num_components(nir_variable *var)
  64 {
  65    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
  66       return 4;
  67
  68    return glsl_get_vector_elements(glsl_without_array(var->type));
  69 }
  70
  71 static void
  72 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  73 {
  74    nir_foreach_function(function, shader) {
  75       if (!function->impl)
  76          continue;
  77
  78       nir_foreach_block(block, function->impl) {
  79          nir_foreach_instr(instr, block) {
  80             if (instr->type != nir_instr_type_intrinsic)
  81                continue;
  82
  83             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  84             if (intrin->intrinsic != nir_intrinsic_load_deref)
  85                continue;
  86
  87             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  88             if (deref->mode != nir_var_shader_out)
  89                continue;
  90
  91             nir_variable *var = nir_deref_instr_get_variable(deref);
  92             for (unsigned i = 0; i < get_num_components(var); i++) {
  93                if (var->data.patch) {
  94                   patches_read[var->data.location_frac + i] |=
  95                      get_variable_io_mask(var, shader->info.stage);
  96                } else {
  97                   read[var->data.location_frac + i] |=
  98                      get_variable_io_mask(var, shader->info.stage);
  99                }
 100             }
 101          }
 102       }
 103    }
 104 }
 105
 106 /**
 107  * Helper for removing unused shader I/O variables, by demoting them to global
 108  * variables (which may then by dead code eliminated).
 109  *
 110  * Example usage is:
 111  *
 112  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
 113  *                                      read, patches_read) ||
 114  *                                      progress;
 115  *
 116  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 117  * representing each .location_frac used.  Note that for vector variables,
 118  * only the first channel (.location_frac) is examined for deciding if the
 119  * variable is used!
 120  */
 121 bool
 122 nir_remove_unused_io_vars(nir_shader *shader,
 123                           nir_variable_mode mode,
 124                           uint64_t *used_by_other_stage,
 125                           uint64_t *used_by_other_stage_patches)
 126 {
 127    bool progress = false;
 128    uint64_t *used;
 129
 130    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
 131    struct exec_list *var_list =
 132       mode == nir_var_shader_in ? &shader->inputs : &shader->outputs;
 133
 134    nir_foreach_variable_safe(var, var_list) {
 135       if (var->data.patch)
 136          used = used_by_other_stage_patches;
 137       else
 138          used = used_by_other_stage;
 139
 140       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 141          continue;
 142
 143       if (var->data.always_active_io)
 144          continue;
 145
 146       if (var->data.explicit_xfb_buffer)
 147          continue;
 148
 149       uint64_t other_stage = used[var->data.location_frac];
 150
 151       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 152          /* This one is invalid, make it a global variable instead */
 153          var->data.location = 0;
 154          var->data.mode = nir_var_shader_temp;
 155
 156          exec_node_remove(&var->node);
 157          exec_list_push_tail(&shader->globals, &var->node);
 158
 159          progress = true;
 160       }
 161    }
 162
 163    if (progress)
 164       nir_fixup_deref_modes(shader);
 165
 166    return progress;
 167 }
 168
 169 bool
 170 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 171 {
 172    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 173    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 174
 175    uint64_t read[4] = { 0 }, written[4] = { 0 };
 176    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 177
 178    nir_foreach_shader_out_variable(var, producer) {
 179       for (unsigned i = 0; i < get_num_components(var); i++) {
 180          if (var->data.patch) {
 181             patches_written[var->data.location_frac + i] |=
 182                get_variable_io_mask(var, producer->info.stage);
 183          } else {
 184             written[var->data.location_frac + i] |=
 185                get_variable_io_mask(var, producer->info.stage);
 186          }
 187       }
 188    }
 189
 190    nir_foreach_shader_in_variable(var, consumer) {
 191       for (unsigned i = 0; i < get_num_components(var); i++) {
 192          if (var->data.patch) {
 193             patches_read[var->data.location_frac + i] |=
 194                get_variable_io_mask(var, consumer->info.stage);
 195          } else {
 196             read[var->data.location_frac + i] |=
 197                get_variable_io_mask(var, consumer->info.stage);
 198          }
 199       }
 200    }
 201
 202    /* Each TCS invocation can read data written by other TCS invocations,
 203     * so even if the outputs are not used by the TES we must also make
 204     * sure they are not read by the TCS before demoting them to globals.
 205     */
 206    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 207       tcs_add_output_reads(producer, read, patches_read);
 208
 209    bool progress = false;
 210    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
 211                                         patches_read);
 212
 213    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
 214                                         patches_written) || progress;
 215
 216    return progress;
 217 }
 218
 219 static uint8_t
 220 get_interp_type(nir_variable *var, const struct glsl_type *type,
 221                 bool default_to_smooth_interp)
 222 {
 223    if (glsl_type_is_integer(type))
 224       return INTERP_MODE_FLAT;
 225    else if (var->data.interpolation != INTERP_MODE_NONE)
 226       return var->data.interpolation;
 227    else if (default_to_smooth_interp)
 228       return INTERP_MODE_SMOOTH;
 229    else
 230       return INTERP_MODE_NONE;
 231 }
 232
 233 #define INTERPOLATE_LOC_SAMPLE 0
 234 #define INTERPOLATE_LOC_CENTROID 1
 235 #define INTERPOLATE_LOC_CENTER 2
 236
 237 static uint8_t
 238 get_interp_loc(nir_variable *var)
 239 {
 240    if (var->data.sample)
 241       return INTERPOLATE_LOC_SAMPLE;
 242    else if (var->data.centroid)
 243       return INTERPOLATE_LOC_CENTROID;
 244    else
 245       return INTERPOLATE_LOC_CENTER;
 246 }
 247
 248 static bool
 249 is_packing_supported_for_type(const struct glsl_type *type)
 250 {
 251    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 252     * other then 32bit. All other vector types should have been split into
 253     * scalar variables by the lower_io_to_scalar pass. The only exception
 254     * should be OpenGL xfb varyings.
 255     * TODO: add support for more complex types?
 256     */
 257    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 258 }
 259
 260 struct assigned_comps
 261 {
 262    uint8_t comps;
 263    uint8_t interp_type;
 264    uint8_t interp_loc;
 265    bool is_32bit;
 266 };
 267
 268 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 269  * algorithms this function just assigns them their existing location for now.
 270  * TODO: allow better packing of complex types.
 271  */
 272 static void
 273 get_unmoveable_components_masks(struct exec_list *var_list,
 274                                 struct assigned_comps *comps,
 275                                 gl_shader_stage stage,
 276                                 bool default_to_smooth_interp)
 277 {
 278    nir_foreach_variable_safe(var, var_list) {
 279       assert(var->data.location >= 0);
 280
 281       /* Only remap things that aren't built-ins. */
 282       if (var->data.location >= VARYING_SLOT_VAR0 &&
 283           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 284
 285          const struct glsl_type *type = var->type;
 286          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 287             assert(glsl_type_is_array(type));
 288             type = glsl_get_array_element(type);
 289          }
 290
 291          /* If we can pack this varying then don't mark the components as
 292           * used.
 293           */
 294          if (is_packing_supported_for_type(type))
 295             continue;
 296
 297          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 298
 299          unsigned elements =
 300             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 301             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 302
 303          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 304          unsigned slots = glsl_count_attribute_slots(type, false);
 305          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 306          unsigned comps_slot2 = 0;
 307          for (unsigned i = 0; i < slots; i++) {
 308             if (dual_slot) {
 309                if (i & 1) {
 310                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 311                } else {
 312                   unsigned num_comps = 4 - var->data.location_frac;
 313                   comps_slot2 = (elements * dmul) - num_comps;
 314
 315                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 316                   assert(var->data.location_frac == 0 ||
 317                          var->data.location_frac == 2);
 318                   assert(comps_slot2 <= 4);
 319
 320                   comps[location + i].comps |=
 321                      ((1 << num_comps) - 1) << var->data.location_frac;
 322                }
 323             } else {
 324                comps[location + i].comps |=
 325                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 326             }
 327
 328             comps[location + i].interp_type =
 329                get_interp_type(var, type, default_to_smooth_interp);
 330             comps[location + i].interp_loc = get_interp_loc(var);
 331             comps[location + i].is_32bit =
 332                glsl_type_is_32bit(glsl_without_array(type));
 333          }
 334       }
 335    }
 336 }
 337
 338 struct varying_loc
 339 {
 340    uint8_t component;
 341    uint32_t location;
 342 };
 343
 344 static void
 345 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 346                     uint64_t slots_used_mask, unsigned num_slots)
 347 {
 348    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 349
 350    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 351       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 352 }
 353
 354 static void
 355 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 356 {
 357    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 358
 359    slots_used[var->data.patch ? 1 : 0] |=
 360       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 361 }
 362
 363 static void
 364 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 365                            struct varying_loc (*remap)[4],
 366                            uint64_t *slots_used, uint64_t *out_slots_read,
 367                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 368  {
 369    uint64_t out_slots_read_tmp[2] = {0};
 370    uint64_t slots_used_tmp[2] = {0};
 371
 372    /* We don't touch builtins so just copy the bitmask */
 373    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 374
 375    nir_foreach_variable(var, var_list) {
 376       assert(var->data.location >= 0);
 377
 378       /* Only remap things that aren't built-ins */
 379       if (var->data.location >= VARYING_SLOT_VAR0 &&
 380           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 381
 382          const struct glsl_type *type = var->type;
 383          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 384             assert(glsl_type_is_array(type));
 385             type = glsl_get_array_element(type);
 386          }
 387
 388          unsigned num_slots = glsl_count_attribute_slots(type, false);
 389          bool used_across_stages = false;
 390          bool outputs_read = false;
 391
 392          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 393          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 394
 395          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 396          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 397          uint64_t outs_used =
 398             var->data.patch ? *p_out_slots_read : *out_slots_read;
 399          uint64_t slots =
 400             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 401
 402          if (slots & used)
 403             used_across_stages = true;
 404
 405          if (slots & outs_used)
 406             outputs_read = true;
 407
 408          if (new_loc->location) {
 409             var->data.location = new_loc->location;
 410             var->data.location_frac = new_loc->component;
 411          }
 412
 413          if (var->data.always_active_io) {
 414             /* We can't apply link time optimisations (specifically array
 415              * splitting) to these so we need to copy the existing mask
 416              * otherwise we will mess up the mask for things like partially
 417              * marked arrays.
 418              */
 419             if (used_across_stages)
 420                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 421
 422             if (outputs_read) {
 423                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 424                                    num_slots);
 425             }
 426          } else {
 427             for (unsigned i = 0; i < num_slots; i++) {
 428                if (used_across_stages)
 429                   mark_used_slot(var, slots_used_tmp, i);
 430
 431                if (outputs_read)
 432                   mark_used_slot(var, out_slots_read_tmp, i);
 433             }
 434          }
 435       }
 436    }
 437
 438    *slots_used = slots_used_tmp[0];
 439    *out_slots_read = out_slots_read_tmp[0];
 440    *p_slots_used = slots_used_tmp[1];
 441    *p_out_slots_read = out_slots_read_tmp[1];
 442 }
 443
 444 struct varying_component {
 445    nir_variable *var;
 446    uint8_t interp_type;
 447    uint8_t interp_loc;
 448    bool is_32bit;
 449    bool is_patch;
 450    bool is_intra_stage_only;
 451    bool initialised;
 452 };
 453
 454 static int
 455 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 456 {
 457    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 458    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 459
 460    /* We want patches to be order at the end of the array */
 461    if (comp1->is_patch != comp2->is_patch)
 462       return comp1->is_patch ? 1 : -1;
 463
 464    /* We want to try to group together TCS outputs that are only read by other
 465     * TCS invocations and not consumed by the follow stage.
 466     */
 467    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
 468       return comp1->is_intra_stage_only ? 1 : -1;
 469
 470    /* We can only pack varyings with matching interpolation types so group
 471     * them together.
 472     */
 473    if (comp1->interp_type != comp2->interp_type)
 474       return comp1->interp_type - comp2->interp_type;
 475
 476    /* Interpolation loc must match also. */
 477    if (comp1->interp_loc != comp2->interp_loc)
 478       return comp1->interp_loc - comp2->interp_loc;
 479
 480    /* If everything else matches just use the original location to sort */
 481    return comp1->var->data.location - comp2->var->data.location;
 482 }
 483
 484 static void
 485 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
 486                               struct varying_component **varying_comp_info,
 487                               unsigned *varying_comp_info_size,
 488                               bool default_to_smooth_interp)
 489 {
 490    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 491    unsigned num_of_comps_to_pack = 0;
 492
 493    /* Count the number of varying that can be packed and create a mapping
 494     * of those varyings to the array we will pass to qsort.
 495     */
 496    nir_foreach_shader_out_variable(var, producer) {
 497
 498       /* Only remap things that aren't builtins. */
 499       if (var->data.location >= VARYING_SLOT_VAR0 &&
 500           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 501
 502          /* We can't repack xfb varyings. */
 503          if (var->data.always_active_io)
 504             continue;
 505
 506          const struct glsl_type *type = var->type;
 507          if (nir_is_per_vertex_io(var, producer->info.stage) || var->data.per_view) {
 508             assert(glsl_type_is_array(type));
 509             type = glsl_get_array_element(type);
 510          }
 511
 512          if (!is_packing_supported_for_type(type))
 513             continue;
 514
 515          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 516          store_varying_info_idx[loc][var->data.location_frac] =
 517             ++num_of_comps_to_pack;
 518       }
 519    }
 520
 521    *varying_comp_info_size = num_of_comps_to_pack;
 522    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 523                                       num_of_comps_to_pack);
 524
 525    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 526
 527    /* Walk over the shader and populate the varying component info array */
 528    nir_foreach_block(block, impl) {
 529       nir_foreach_instr(instr, block) {
 530          if (instr->type != nir_instr_type_intrinsic)
 531             continue;
 532
 533          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 534          if (intr->intrinsic != nir_intrinsic_load_deref &&
 535              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 536              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 537              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
 538              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
 539             continue;
 540
 541          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 542          if (deref->mode != nir_var_shader_in)
 543             continue;
 544
 545          /* We only remap things that aren't builtins. */
 546          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 547          if (in_var->data.location < VARYING_SLOT_VAR0)
 548             continue;
 549
 550          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 551          if (location >= MAX_VARYINGS_INCL_PATCH)
 552             continue;
 553
 554          unsigned var_info_idx =
 555             store_varying_info_idx[location][in_var->data.location_frac];
 556          if (!var_info_idx)
 557             continue;
 558
 559          struct varying_component *vc_info =
 560             &(*varying_comp_info)[var_info_idx-1];
 561
 562          if (!vc_info->initialised) {
 563             const struct glsl_type *type = in_var->type;
 564             if (nir_is_per_vertex_io(in_var, consumer->info.stage) ||
 565                 in_var->data.per_view) {
 566                assert(glsl_type_is_array(type));
 567                type = glsl_get_array_element(type);
 568             }
 569
 570             vc_info->var = in_var;
 571             vc_info->interp_type =
 572                get_interp_type(in_var, type, default_to_smooth_interp);
 573             vc_info->interp_loc = get_interp_loc(in_var);
 574             vc_info->is_32bit = glsl_type_is_32bit(type);
 575             vc_info->is_patch = in_var->data.patch;
 576             vc_info->is_intra_stage_only = false;
 577             vc_info->initialised = true;
 578          }
 579       }
 580    }
 581
 582    /* Walk over the shader and populate the varying component info array
 583     * for varyings which are read by other TCS instances but are not consumed
 584     * by the TES.
 585     */
 586    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
 587       impl = nir_shader_get_entrypoint(producer);
 588
 589       nir_foreach_block(block, impl) {
 590          nir_foreach_instr(instr, block) {
 591             if (instr->type != nir_instr_type_intrinsic)
 592                continue;
 593
 594             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 595             if (intr->intrinsic != nir_intrinsic_load_deref)
 596                continue;
 597
 598             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 599             if (deref->mode != nir_var_shader_out)
 600                continue;
 601
 602             /* We only remap things that aren't builtins. */
 603             nir_variable *out_var = nir_deref_instr_get_variable(deref);
 604             if (out_var->data.location < VARYING_SLOT_VAR0)
 605                continue;
 606
 607             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
 608             if (location >= MAX_VARYINGS_INCL_PATCH)
 609                continue;
 610
 611             unsigned var_info_idx =
 612                store_varying_info_idx[location][out_var->data.location_frac];
 613             if (!var_info_idx) {
 614                /* Something went wrong, the shader interfaces didn't match, so
 615                 * abandon packing. This can happen for example when the
 616                 * inputs are scalars but the outputs are struct members.
 617                 */
 618                *varying_comp_info_size = 0;
 619                break;
 620             }
 621
 622             struct varying_component *vc_info =
 623                &(*varying_comp_info)[var_info_idx-1];
 624
 625             if (!vc_info->initialised) {
 626                const struct glsl_type *type = out_var->type;
 627                if (nir_is_per_vertex_io(out_var, producer->info.stage)) {
 628                   assert(glsl_type_is_array(type));
 629                   type = glsl_get_array_element(type);
 630                }
 631
 632                vc_info->var = out_var;
 633                vc_info->interp_type =
 634                   get_interp_type(out_var, type, default_to_smooth_interp);
 635                vc_info->interp_loc = get_interp_loc(out_var);
 636                vc_info->is_32bit = glsl_type_is_32bit(type);
 637                vc_info->is_patch = out_var->data.patch;
 638                vc_info->is_intra_stage_only = true;
 639                vc_info->initialised = true;
 640             }
 641          }
 642       }
 643    }
 644
 645    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
 646       struct varying_component *vc_info = &(*varying_comp_info)[i];
 647       if (!vc_info->initialised) {
 648          /* Something went wrong, the shader interfaces didn't match, so
 649           * abandon packing. This can happen for example when the outputs are
 650           * scalars but the inputs are struct members.
 651           */
 652          *varying_comp_info_size = 0;
 653          break;
 654       }
 655    }
 656 }
 657
 658 static void
 659 assign_remap_locations(struct varying_loc (*remap)[4],
 660                        struct assigned_comps *assigned_comps,
 661                        struct varying_component *info,
 662                        unsigned *cursor, unsigned *comp,
 663                        unsigned max_location)
 664 {
 665    unsigned tmp_cursor = *cursor;
 666    unsigned tmp_comp = *comp;
 667
 668    for (; tmp_cursor < max_location; tmp_cursor++) {
 669
 670       if (assigned_comps[tmp_cursor].comps) {
 671          /* We can only pack varyings with matching interpolation types,
 672           * interpolation loc must match also.
 673           * TODO: i965 can handle interpolation locations that don't match,
 674           * but the radeonsi nir backend handles everything as vec4s and so
 675           * expects this to be the same for all components. We could make this
 676           * check driver specfific or drop it if NIR ever become the only
 677           * radeonsi backend.
 678           */
 679          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 680              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 681             tmp_comp = 0;
 682             continue;
 683          }
 684
 685          /* We can only pack varyings with matching types, and the current
 686           * algorithm only supports packing 32-bit.
 687           */
 688          if (!assigned_comps[tmp_cursor].is_32bit) {
 689             tmp_comp = 0;
 690             continue;
 691          }
 692
 693          while (tmp_comp < 4 &&
 694                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 695             tmp_comp++;
 696          }
 697       }
 698
 699       if (tmp_comp == 4) {
 700          tmp_comp = 0;
 701          continue;
 702       }
 703
 704       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 705
 706       /* Once we have assigned a location mark it as used */
 707       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 708       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 709       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 710       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
 711
 712       /* Assign remap location */
 713       remap[location][info->var->data.location_frac].component = tmp_comp++;
 714       remap[location][info->var->data.location_frac].location =
 715          tmp_cursor + VARYING_SLOT_VAR0;
 716
 717       break;
 718    }
 719
 720    *cursor = tmp_cursor;
 721    *comp = tmp_comp;
 722 }
 723
 724 /* If there are empty components in the slot compact the remaining components
 725  * as close to component 0 as possible. This will make it easier to fill the
 726  * empty components with components from a different slot in a following pass.
 727  */
 728 static void
 729 compact_components(nir_shader *producer, nir_shader *consumer,
 730                    struct assigned_comps *assigned_comps,
 731                    bool default_to_smooth_interp)
 732 {
 733    struct exec_list *input_list = &consumer->inputs;
 734    struct exec_list *output_list = &producer->outputs;
 735    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 736    struct varying_component *varying_comp_info;
 737    unsigned varying_comp_info_size;
 738
 739    /* Gather varying component info */
 740    gather_varying_component_info(producer, consumer, &varying_comp_info,
 741                                  &varying_comp_info_size,
 742                                  default_to_smooth_interp);
 743
 744    /* Sort varying components. */
 745    qsort(varying_comp_info, varying_comp_info_size,
 746          sizeof(struct varying_component), cmp_varying_component);
 747
 748    unsigned cursor = 0;
 749    unsigned comp = 0;
 750
 751    /* Set the remap array based on the sorted components */
 752    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 753       struct varying_component *info = &varying_comp_info[i];
 754
 755       assert(info->is_patch || cursor < MAX_VARYING);
 756       if (info->is_patch) {
 757          /* The list should be sorted with all non-patch inputs first followed
 758           * by patch inputs.  When we hit our first patch input, we need to
 759           * reset the cursor to MAX_VARYING so we put them in the right slot.
 760           */
 761          if (cursor < MAX_VARYING) {
 762             cursor = MAX_VARYING;
 763             comp = 0;
 764          }
 765
 766          assign_remap_locations(remap, assigned_comps, info,
 767                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 768       } else {
 769          assign_remap_locations(remap, assigned_comps, info,
 770                                 &cursor, &comp, MAX_VARYING);
 771
 772          /* Check if we failed to assign a remap location. This can happen if
 773           * for example there are a bunch of unmovable components with
 774           * mismatching interpolation types causing us to skip over locations
 775           * that would have been useful for packing later components.
 776           * The solution is to iterate over the locations again (this should
 777           * happen very rarely in practice).
 778           */
 779          if (cursor == MAX_VARYING) {
 780             cursor = 0;
 781             comp = 0;
 782             assign_remap_locations(remap, assigned_comps, info,
 783                                    &cursor, &comp, MAX_VARYING);
 784          }
 785       }
 786    }
 787
 788    ralloc_free(varying_comp_info);
 789
 790    uint64_t zero = 0;
 791    uint32_t zero32 = 0;
 792    remap_slots_and_components(input_list, consumer->info.stage, remap,
 793                               &consumer->info.inputs_read, &zero,
 794                               &consumer->info.patch_inputs_read, &zero32);
 795    remap_slots_and_components(output_list, producer->info.stage, remap,
 796                               &producer->info.outputs_written,
 797                               &producer->info.outputs_read,
 798                               &producer->info.patch_outputs_written,
 799                               &producer->info.patch_outputs_read);
 800 }
 801
 802 /* We assume that this has been called more-or-less directly after
 803  * remove_unused_varyings.  At this point, all of the varyings that we
 804  * aren't going to be using have been completely removed and the
 805  * inputs_read and outputs_written fields in nir_shader_info reflect
 806  * this.  Therefore, the total set of valid slots is the OR of the two
 807  * sets of varyings;  this accounts for varyings which one side may need
 808  * to read/write even if the other doesn't.  This can happen if, for
 809  * instance, an array is used indirectly from one side causing it to be
 810  * unsplittable but directly from the other.
 811  */
 812 void
 813 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 814                      bool default_to_smooth_interp)
 815 {
 816    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 817    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 818
 819    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
 820
 821    get_unmoveable_components_masks(&producer->outputs, assigned_comps,
 822                                    producer->info.stage,
 823                                    default_to_smooth_interp);
 824    get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
 825                                    consumer->info.stage,
 826                                    default_to_smooth_interp);
 827
 828    compact_components(producer, consumer, assigned_comps,
 829                       default_to_smooth_interp);
 830 }
 831
 832 /*
 833  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 834  * don't touch them.
 835  */
 836 void
 837 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 838 {
 839    nir_variable *input_vars[MAX_VARYING] = { 0 };
 840
 841    nir_foreach_shader_in_variable(var, consumer) {
 842       if (var->data.location >= VARYING_SLOT_VAR0 &&
 843           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 844
 845          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 846          input_vars[location] = var;
 847       }
 848    }
 849
 850    nir_foreach_shader_out_variable(var, producer) {
 851       if (var->data.location >= VARYING_SLOT_VAR0 &&
 852           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 853
 854          if (!var->data.always_active_io)
 855             continue;
 856
 857          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 858          if (input_vars[location]) {
 859             input_vars[location]->data.always_active_io = true;
 860          }
 861       }
 862    }
 863 }
 864
 865 static bool
 866 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 867 {
 868    return in_var->data.location == out_var->data.location &&
 869           in_var->data.location_frac == out_var->data.location_frac;
 870 }
 871
 872 static nir_variable *
 873 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 874 {
 875    nir_foreach_shader_in_variable(var, consumer) {
 876       if (does_varying_match(out_var, var))
 877          return var;
 878    }
 879
 880    return NULL;
 881 }
 882
 883 static bool
 884 can_replace_varying(nir_variable *out_var)
 885 {
 886    /* Skip types that require more complex handling.
 887     * TODO: add support for these types.
 888     */
 889    if (glsl_type_is_array(out_var->type) ||
 890        glsl_type_is_dual_slot(out_var->type) ||
 891        glsl_type_is_matrix(out_var->type) ||
 892        glsl_type_is_struct_or_ifc(out_var->type))
 893       return false;
 894
 895    /* Limit this pass to scalars for now to keep things simple. Most varyings
 896     * should have been lowered to scalars at this point anyway.
 897     */
 898    if (!glsl_type_is_scalar(out_var->type))
 899       return false;
 900
 901    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 902        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 903       return false;
 904
 905    return true;
 906 }
 907
 908 static bool
 909 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 910 {
 911    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 912
 913    nir_builder b;
 914    nir_builder_init(&b, impl);
 915
 916    nir_variable *out_var =
 917       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 918
 919    bool progress = false;
 920    nir_foreach_block(block, impl) {
 921       nir_foreach_instr(instr, block) {
 922          if (instr->type != nir_instr_type_intrinsic)
 923             continue;
 924
 925          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 926          if (intr->intrinsic != nir_intrinsic_load_deref)
 927             continue;
 928
 929          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 930          if (in_deref->mode != nir_var_shader_in)
 931             continue;
 932
 933          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 934
 935          if (!does_varying_match(out_var, in_var))
 936             continue;
 937
 938          b.cursor = nir_before_instr(instr);
 939
 940          nir_load_const_instr *out_const =
 941             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 942
 943          /* Add new const to replace the input */
 944          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 945                                              intr->dest.ssa.bit_size,
 946                                              out_const->value);
 947
 948          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 949
 950          progress = true;
 951       }
 952    }
 953
 954    return progress;
 955 }
 956
 957 static bool
 958 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 959                          nir_intrinsic_instr *dup_store_intr)
 960 {
 961    assert(input_var);
 962
 963    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 964
 965    nir_builder b;
 966    nir_builder_init(&b, impl);
 967
 968    nir_variable *dup_out_var =
 969       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 970
 971    bool progress = false;
 972    nir_foreach_block(block, impl) {
 973       nir_foreach_instr(instr, block) {
 974          if (instr->type != nir_instr_type_intrinsic)
 975             continue;
 976
 977          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 978          if (intr->intrinsic != nir_intrinsic_load_deref)
 979             continue;
 980
 981          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 982          if (in_deref->mode != nir_var_shader_in)
 983             continue;
 984
 985          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 986
 987          if (!does_varying_match(dup_out_var, in_var) ||
 988              in_var->data.interpolation != input_var->data.interpolation ||
 989              get_interp_loc(in_var) != get_interp_loc(input_var))
 990             continue;
 991
 992          b.cursor = nir_before_instr(instr);
 993
 994          nir_ssa_def *load = nir_load_var(&b, input_var);
 995          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 996
 997          progress = true;
 998       }
 999    }
1000
1001    return progress;
1002 }
1003
1004 bool
1005 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1006 {
1007    /* TODO: Add support for more shader stage combinations */
1008    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1009        (producer->info.stage != MESA_SHADER_VERTEX &&
1010         producer->info.stage != MESA_SHADER_TESS_EVAL))
1011       return false;
1012
1013    bool progress = false;
1014
1015    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1016
1017    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1018
1019    /* If we find a store in the last block of the producer we can be sure this
1020     * is the only possible value for this output.
1021     */
1022    nir_block *last_block = nir_impl_last_block(impl);
1023    nir_foreach_instr_reverse(instr, last_block) {
1024       if (instr->type != nir_instr_type_intrinsic)
1025          continue;
1026
1027       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1028
1029       if (intr->intrinsic != nir_intrinsic_store_deref)
1030          continue;
1031
1032       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1033       if (out_deref->mode != nir_var_shader_out)
1034          continue;
1035
1036       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1037       if (!can_replace_varying(out_var))
1038          continue;
1039
1040       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1041          progress |= replace_constant_input(consumer, intr);
1042       } else {
1043          struct hash_entry *entry =
1044                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
1045          if (entry) {
1046             progress |= replace_duplicate_input(consumer,
1047                                                 (nir_variable *) entry->data,
1048                                                 intr);
1049          } else {
1050             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1051             if (in_var) {
1052                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1053                                        in_var);
1054             }
1055          }
1056       }
1057    }
1058
1059    _mesa_hash_table_destroy(varying_values, NULL);
1060
1061    return progress;
1062 }
1063
1064 /* TODO any better helper somewhere to sort a list? */
1065
1066 static void
1067 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1068 {
1069    nir_foreach_variable(var, var_list) {
1070       if (var->data.location > new_var->data.location) {
1071          exec_node_insert_node_before(&var->node, &new_var->node);
1072          return;
1073       }
1074    }
1075    exec_list_push_tail(var_list, &new_var->node);
1076 }
1077
1078 static void
1079 sort_varyings(struct exec_list *var_list)
1080 {
1081    struct exec_list new_list;
1082    exec_list_make_empty(&new_list);
1083    nir_foreach_variable_safe(var, var_list) {
1084       exec_node_remove(&var->node);
1085       insert_sorted(&new_list, var);
1086    }
1087    exec_list_move_nodes_to(&new_list, var_list);
1088 }
1089
1090 void
1091 nir_assign_io_var_locations(struct exec_list *var_list, unsigned *size,
1092                             gl_shader_stage stage)
1093 {
1094    unsigned location = 0;
1095    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1096    uint64_t processed_locs[2] = {0};
1097
1098    sort_varyings(var_list);
1099
1100    int UNUSED last_loc = 0;
1101    bool last_partial = false;
1102    nir_foreach_variable(var, var_list) {
1103       const struct glsl_type *type = var->type;
1104       if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
1105          assert(glsl_type_is_array(type));
1106          type = glsl_get_array_element(type);
1107       }
1108
1109       int base;
1110       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1111          base = VERT_ATTRIB_GENERIC0;
1112       else if (var->data.mode == nir_var_shader_out &&
1113                stage == MESA_SHADER_FRAGMENT)
1114          base = FRAG_RESULT_DATA0;
1115       else
1116          base = VARYING_SLOT_VAR0;
1117
1118       unsigned var_size;
1119       if (var->data.compact) {
1120          /* If we are inside a partial compact,
1121           * don't allow another compact to be in this slot
1122           * if it starts at component 0.
1123           */
1124          if (last_partial && var->data.location_frac == 0) {
1125             location++;
1126          }
1127
1128          /* compact variables must be arrays of scalars */
1129          assert(glsl_type_is_array(type));
1130          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1131          unsigned start = 4 * location + var->data.location_frac;
1132          unsigned end = start + glsl_get_length(type);
1133          var_size = end / 4 - location;
1134          last_partial = end % 4 != 0;
1135       } else {
1136          /* Compact variables bypass the normal varying compacting pass,
1137           * which means they cannot be in the same vec4 slot as a normal
1138           * variable. If part of the current slot is taken up by a compact
1139           * variable, we need to go to the next one.
1140           */
1141          if (last_partial) {
1142             location++;
1143             last_partial = false;
1144          }
1145          var_size = glsl_count_attribute_slots(type, false);
1146       }
1147
1148       /* Builtins don't allow component packing so we only need to worry about
1149        * user defined varyings sharing the same location.
1150        */
1151       bool processed = false;
1152       if (var->data.location >= base) {
1153          unsigned glsl_location = var->data.location - base;
1154
1155          for (unsigned i = 0; i < var_size; i++) {
1156             if (processed_locs[var->data.index] &
1157                 ((uint64_t)1 << (glsl_location + i)))
1158                processed = true;
1159             else
1160                processed_locs[var->data.index] |=
1161                   ((uint64_t)1 << (glsl_location + i));
1162          }
1163       }
1164
1165       /* Because component packing allows varyings to share the same location
1166        * we may have already have processed this location.
1167        */
1168       if (processed) {
1169          unsigned driver_location = assigned_locations[var->data.location];
1170          var->data.driver_location = driver_location;
1171
1172          /* An array may be packed such that is crosses multiple other arrays
1173           * or variables, we need to make sure we have allocated the elements
1174           * consecutively if the previously proccessed var was shorter than
1175           * the current array we are processing.
1176           *
1177           * NOTE: The code below assumes the var list is ordered in ascending
1178           * location order.
1179           */
1180          assert(last_loc <= var->data.location);
1181          last_loc = var->data.location;
1182          unsigned last_slot_location = driver_location + var_size;
1183          if (last_slot_location > location) {
1184             unsigned num_unallocated_slots = last_slot_location - location;
1185             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1186             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1187                assigned_locations[var->data.location + i] = location;
1188                location++;
1189             }
1190          }
1191          continue;
1192       }
1193
1194       for (unsigned i = 0; i < var_size; i++) {
1195          assigned_locations[var->data.location + i] = location + i;
1196       }
1197
1198       var->data.driver_location = location;
1199       location += var_size;
1200    }
1201
1202    if (last_partial)
1203       location++;
1204
1205    *size = location;
1206 }
1207
1208 static uint64_t
1209 get_linked_variable_location(unsigned location, bool patch)
1210 {
1211    if (!patch)
1212       return location;
1213
1214    /* Reserve locations 0...3 for special patch variables
1215     * like tess factors and bounding boxes, and the generic patch
1216     * variables will come after them.
1217     */
1218    if (location >= VARYING_SLOT_PATCH0)
1219       return location - VARYING_SLOT_PATCH0 + 4;
1220    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1221             location <= VARYING_SLOT_BOUNDING_BOX1)
1222       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1223    else
1224       unreachable("Unsupported variable in get_linked_variable_location.");
1225 }
1226
1227 static uint64_t
1228 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1229 {
1230    const struct glsl_type *type = variable->type;
1231
1232    if (nir_is_per_vertex_io(variable, stage)) {
1233       assert(glsl_type_is_array(type));
1234       type = glsl_get_array_element(type);
1235    }
1236
1237    unsigned slots = glsl_count_attribute_slots(type, false);
1238    if (variable->data.compact) {
1239       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1240       slots = DIV_ROUND_UP(component_count, 4);
1241    }
1242
1243    uint64_t mask = u_bit_consecutive64(0, slots);
1244    return mask;
1245 }
1246
1247 nir_linked_io_var_info
1248 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1249 {
1250    assert(producer);
1251    assert(consumer);
1252
1253    uint64_t producer_output_mask = 0;
1254    uint64_t producer_patch_output_mask = 0;
1255
1256    nir_foreach_shader_out_variable(variable, producer) {
1257       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1258       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1259
1260       if (variable->data.patch)
1261          producer_patch_output_mask |= mask << loc;
1262       else
1263          producer_output_mask |= mask << loc;
1264    }
1265
1266    uint64_t consumer_input_mask = 0;
1267    uint64_t consumer_patch_input_mask = 0;
1268
1269    nir_foreach_shader_in_variable(variable, consumer) {
1270       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1271       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1272
1273       if (variable->data.patch)
1274          consumer_patch_input_mask |= mask << loc;
1275       else
1276          consumer_input_mask |= mask << loc;
1277    }
1278
1279    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1280    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1281
1282    nir_foreach_shader_out_variable(variable, producer) {
1283       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1284
1285       if (variable->data.patch)
1286          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1287       else
1288          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1289    }
1290
1291    nir_foreach_shader_in_variable(variable, consumer) {
1292       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1293
1294       if (variable->data.patch)
1295          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1296       else
1297          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1298    }
1299
1300    nir_linked_io_var_info result = {
1301       .num_linked_io_vars = util_bitcount64(io_mask),
1302       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1303    };
1304
1305    return result;
1306 }